1474 files changed, 117084 insertions, 29987 deletions
diff --git a/libavcodec/012v.c b/libavcodec/012v.c
new file mode 100644
index 0000000..c2b6a35
--- /dev/null
+++ b/libavcodec/012v.c
@@ -0,0 +1,159 @@
+/*
+ * 012v decoder
+ *
+ * Copyright (C) 2012 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "libavutil/intreadwrite.h"
+
+static av_cold int zero12v_decode_init(AVCodecContext *avctx)
+{
+    avctx->pix_fmt             = AV_PIX_FMT_YUV422P16;
+    avctx->bits_per_raw_sample = 10;
+
+    if (avctx->codec_tag == MKTAG('a', '1', '2', 'v'))
+        avpriv_request_sample(avctx, "transparency");
+
+    return 0;
+}
+
+static int zero12v_decode_frame(AVCodecContext *avctx, void *data,
+                                int *got_frame, AVPacket *avpkt)
+{
+    int line = 0, ret;
+    const int width = avctx->width;
+    AVFrame *pic = data;
+    uint16_t *y, *u, *v;
+    const uint8_t *line_end, *src = avpkt->data;
+    int stride = avctx->width * 8 / 3;
+
+    if (width == 1) {
+        av_log(avctx, AV_LOG_ERROR, "Width 1 not supported.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (   avctx->codec_tag == MKTAG('0', '1', '2', 'v')
+        && avpkt->size % avctx->height == 0
+        && avpkt->size / avctx->height * 3 >= width * 8)
+        stride = avpkt->size / avctx->height;
+
+    if (avpkt->size < avctx->height * stride) {
+        av_log(avctx, AV_LOG_ERROR, "Packet too small: %d instead of %d\n",
+               avpkt->size, avctx->height * stride);
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
+
+    pic->pict_type = AV_PICTURE_TYPE_I;
+    pic->key_frame = 1;
+
+    y = (uint16_t *)pic->data[0];
+    u = (uint16_t *)pic->data[1];
+    v = (uint16_t *)pic->data[2];
+    line_end = avpkt->data + stride;
+
+    while (line++ < avctx->height) {
+        while (1) {
+            uint32_t t = AV_RL32(src);
+            src += 4;
+            *u++ = t <<  6 & 0xFFC0;
+            *y++ = t >>  4 & 0xFFC0;
+            *v++ = t >> 14 & 0xFFC0;
+
+            if (src >= line_end - 1) {
+                *y = 0x80;
+                src++;
+                line_end += stride;
+                y = (uint16_t *)(pic->data[0] + line * pic->linesize[0]);
+                u = (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
+                v = (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
+                break;
+            }
+
+            t = AV_RL32(src);
+            src += 4;
+            *y++ = t <<  6 & 0xFFC0;
+            *u++ = t >>  4 & 0xFFC0;
+            *y++ = t >> 14 & 0xFFC0;
+            if (src >= line_end - 2) {
+                if (!(width & 1)) {
+                    *y = 0x80;
+                    src += 2;
+                }
+                line_end += stride;
+                y = (uint16_t *)(pic->data[0] + line * pic->linesize[0]);
+                u = (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
+                v = (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
+                break;
+            }
+
+            t = AV_RL32(src);
+            src += 4;
+            *v++ = t <<  6 & 0xFFC0;
+            *y++ = t >>  4 & 0xFFC0;
+            *u++ = t >> 14 & 0xFFC0;
+
+            if (src >= line_end - 1) {
+                *y = 0x80;
+                src++;
+                line_end += stride;
+                y = (uint16_t *)(pic->data[0] + line * pic->linesize[0]);
+                u = (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
+                v = (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
+                break;
+            }
+
+            t = AV_RL32(src);
+            src += 4;
+            *y++ = t <<  6 & 0xFFC0;
+            *v++ = t >>  4 & 0xFFC0;
+            *y++ = t >> 14 & 0xFFC0;
+
+            if (src >= line_end - 2) {
+                if (width & 1) {
+                    *y = 0x80;
+                    src += 2;
+                }
+                line_end += stride;
+                y = (uint16_t *)(pic->data[0] + line * pic->linesize[0]);
+                u = (uint16_t *)(pic->data[1] + line * pic->linesize[1]);
+                v = (uint16_t *)(pic->data[2] + line * pic->linesize[2]);
+                break;
+            }
+        }
+    }
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_zero12v_decoder = {
+    .name           = "012v",
+    .long_name      = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_012V,
+    .init           = zero12v_decode_init,
+    .decode         = zero12v_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/4xm.c b/libavcodec/4xm.c
index 36d9dc1..3d3bc56 100644
--- a/libavcodec/4xm.c
+++ b/libavcodec/4xm.c
@@ -2,20 +2,20 @@
  * 4XM codec
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
 
 #include <inttypes.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/frame.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
@@ -36,6 +37,7 @@
 #include "get_bits.h"
 #include "internal.h"
 
+
 #define BLOCK_TYPE_VLC_BITS 5
 #define ACDC_VLC_BITS 9
 
@@ -289,7 +291,7 @@ static void init_mv(FourXContext *f, int linesize)
     }
 #endif
 
-static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
+static inline void mcdc(uint16_t *dst, const uint16_t *src, int log2w,
                         int h, int stride, int scale, unsigned dc)
 {
     int i;
@@ -333,7 +335,7 @@ static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w,
         }
         break;
     default:
-        break;
+        av_assert0(0);
     }
 }
 
@@ -351,8 +353,7 @@ static int decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
     int scale   = 1;
     unsigned dc = 0;
 
-    if (code < 0 || code > 6 || log2w < 0)
-        return AVERROR_INVALIDDATA;
+    av_assert0(code >= 0 && code <= 6 && log2w >= 0);
 
     if (code == 1) {
         log2h--;
@@ -369,24 +370,42 @@ static int decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src,
                               src + (1 << log2w),
                               log2w, log2h, stride);
     } else if (code == 6) {
+        if (bytestream2_get_bytes_left(&f->g2) < 4) {
+            av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
+            return AVERROR_INVALIDDATA;
+        }
         if (log2w) {
-            dst[0]      = bytestream2_get_le16(&f->g2);
-            dst[1]      = bytestream2_get_le16(&f->g2);
+            dst[0]      = bytestream2_get_le16u(&f->g2);
+            dst[1]      = bytestream2_get_le16u(&f->g2);
         } else {
-            dst[0]      = bytestream2_get_le16(&f->g2);
-            dst[stride] = bytestream2_get_le16(&f->g2);
+            dst[0]      = bytestream2_get_le16u(&f->g2);
+            dst[stride] = bytestream2_get_le16u(&f->g2);
         }
         return 0;
     }
 
+    if ((code&3)==0 && bytestream2_get_bytes_left(&f->g) < 1) {
+        av_log(f->avctx, AV_LOG_ERROR, "bytestream overread\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     if (code == 0) {
         src  += f->mv[bytestream2_get_byte(&f->g)];
     } else if (code == 3 && f->version >= 2) {
         return 0;
     } else if (code == 4) {
         src  += f->mv[bytestream2_get_byte(&f->g)];
+        if (bytestream2_get_bytes_left(&f->g2) < 2){
+            av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
+            return AVERROR_INVALIDDATA;
+        }
         dc    = bytestream2_get_le16(&f->g2);
     } else if (code == 5) {
+        if (bytestream2_get_bytes_left(&f->g2) < 2){
+            av_log(f->avctx, AV_LOG_ERROR, "wordstream overread\n");
+            return AVERROR_INVALIDDATA;
+        }
+        av_assert0(start <= src && src <= end);
         scale = 0;
         dc    = bytestream2_get_le16(&f->g2);
     }
@@ -415,9 +434,9 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
     src = f->last_frame_buffer;
 
     if (f->version > 1) {
-        if (length < 20)
-            return AVERROR_INVALIDDATA;
         extra           = 20;
+        if (length < extra)
+            return AVERROR_INVALIDDATA;
         bitstream_size  = AV_RL32(buf + 8);
         wordstream_size = AV_RL32(buf + 12);
         bytestream_size = AV_RL32(buf + 16);
@@ -428,24 +447,21 @@ static int decode_p_frame(FourXContext *f, const uint8_t *buf, int length)
         bytestream_size = FFMAX(length - bitstream_size - wordstream_size, 0);
     }
 
-    if (bitstream_size + bytestream_size + wordstream_size + extra != length
-        || bitstream_size  > (1 << 26)
-        || bytestream_size > (1 << 26)
-        || wordstream_size > (1 << 26)) {
-        av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n",
-               bitstream_size, bytestream_size, wordstream_size,
-               bitstream_size + bytestream_size + wordstream_size - length);
+    if (bitstream_size > length || bitstream_size >= INT_MAX/8 ||
+        bytestream_size > length - bitstream_size ||
+        wordstream_size > length - bytestream_size - bitstream_size ||
+        extra > length - bytestream_size - bitstream_size - wordstream_size) {
+        av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
+        bitstream_size+ bytestream_size+ wordstream_size - length);
         return AVERROR_INVALIDDATA;
     }
 
-    av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
-                   bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    av_fast_padded_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
+                          bitstream_size);
     if (!f->bitstream_buffer)
         return AVERROR(ENOMEM);
     f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) (buf + extra),
                        bitstream_size / 4);
-    memset((uint8_t*)f->bitstream_buffer + bitstream_size,
-           0, FF_INPUT_BUFFER_PADDING_SIZE);
     init_get_bits(&f->gb, f->bitstream_buffer, 8 * bitstream_size);
 
     wordstream_offset = extra + bitstream_size;
@@ -476,10 +492,17 @@ static int decode_i_block(FourXContext *f, int16_t *block)
 {
     int code, i, j, level, val;
 
+    if (get_bits_left(&f->gb) < 2){
+        av_log(f->avctx, AV_LOG_ERROR, "%d bits left before decode_i_block()\n", get_bits_left(&f->gb));
+        return -1;
+    }
+
     /* DC coef */
     val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
-    if (val >> 4)
+    if (val >> 4) {
         av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
+        return AVERROR_INVALIDDATA;
+    }
 
     if (val)
         val = get_xbits(&f->gb, val);
@@ -497,7 +520,12 @@ static int decode_i_block(FourXContext *f, int16_t *block)
         if (code == 0xf0) {
             i += 16;
         } else {
-            level = get_xbits(&f->gb, code & 0xf);
+            if (code & 0xf) {
+                level = get_xbits(&f->gb, code & 0xf);
+            } else {
+                av_log(f->avctx, AV_LOG_ERROR, "0 coeff\n");
+                return AVERROR_INVALIDDATA;
+            }
             i    += code >> 4;
             if (i >= 64) {
                 av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
@@ -577,7 +605,7 @@ static int decode_i_mb(FourXContext *f)
 
 static const uint8_t *read_huffman_tables(FourXContext *f,
                                           const uint8_t * const buf,
-                                          int len)
+                                          int buf_size)
 {
     int frequency[512] = { 0 };
     uint8_t flag[512];
@@ -586,6 +614,7 @@ static const uint8_t *read_huffman_tables(FourXContext *f,
     int bits_tab[257];
     int start, end;
     const uint8_t *ptr = buf;
+    const uint8_t *ptr_end = buf + buf_size;
     int j;
 
     memset(up, -1, sizeof(up));
@@ -595,10 +624,10 @@ static const uint8_t *read_huffman_tables(FourXContext *f,
     for (;;) {
         int i;
 
-        len -= end - start + 1;
-
-        if (end < start || len < 0)
+        if (ptr_end - ptr < FFMAX(end - start + 1, 0) + 1) {
+            av_log(f->avctx, AV_LOG_ERROR, "invalid data in read_huffman_tables\n");
             return NULL;
+        }
 
         for (i = start; i <= end; i++)
             frequency[i] = *ptr++;
@@ -606,9 +635,6 @@ static const uint8_t *read_huffman_tables(FourXContext *f,
         if (start == 0)
             break;
 
-        if (--len < 0)
-            return NULL;
-
         end = *ptr++;
     }
     frequency[256] = 1;
@@ -616,6 +642,11 @@ static const uint8_t *read_huffman_tables(FourXContext *f,
     while ((ptr - buf) & 3)
         ptr++; // 4byte align
 
+    if (ptr > ptr_end) {
+        av_log(f->avctx, AV_LOG_ERROR, "ptr overflow in read_huffman_tables\n");
+        return NULL;
+    }
+
     for (j = 257; j < 512; j++) {
         int min_freq[2] = { 256 * 256, 256 * 256 };
         int smallest[2] = { 0, 0 };
@@ -684,6 +715,7 @@ static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
     const int height = f->avctx->height;
     const int mbs    = (FFALIGN(width, 16) >> 4) * (FFALIGN(height, 16) >> 4);
     uint16_t *dst    = f->frame_buffer;
+    const uint8_t *buf_end = buf + length;
     GetByteContext g3;
 
     if (length < mbs * 8) {
@@ -695,6 +727,8 @@ static int decode_i2_frame(FourXContext *f, const uint8_t *buf, int length)
     for (y = 0; y < height; y += 16) {
         for (x = 0; x < width; x += 16) {
             unsigned int color[4] = { 0 }, bits;
+            if (buf_end - buf < 8)
+                return -1;
             // warning following is purely guessed ...
             color[0] = bytestream2_get_le16u(&g3);
             color[1] = bytestream2_get_le16u(&g3);
@@ -728,7 +762,6 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
     const int width  = f->avctx->width;
     const int height = f->avctx->height;
     const unsigned int bitstream_size = AV_RL32(buf);
-    int token_count av_unused;
     unsigned int prestream_size;
     const uint8_t *prestream;
 
@@ -740,7 +773,6 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
         return AVERROR_INVALIDDATA;
     }
 
-    token_count    =     AV_RL32(buf + bitstream_size + 8);
     prestream_size = 4 * AV_RL32(buf + bitstream_size + 4);
     prestream      =             buf + bitstream_size + 12;
 
@@ -757,18 +789,18 @@ static int decode_i_frame(FourXContext *f, const uint8_t *buf, int length)
         return AVERROR_INVALIDDATA;
     }
 
+    av_assert0(prestream <= buf + length);
+
     init_get_bits(&f->gb, buf + 4, 8 * bitstream_size);
 
     prestream_size = length + buf - prestream;
 
-    av_fast_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
-                   prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    av_fast_padded_malloc(&f->bitstream_buffer, &f->bitstream_buffer_size,
+                          prestream_size);
     if (!f->bitstream_buffer)
         return AVERROR(ENOMEM);
     f->bbdsp.bswap_buf(f->bitstream_buffer, (const uint32_t *) prestream,
                        prestream_size / 4);
-    memset((uint8_t*)f->bitstream_buffer + prestream_size,
-           0, FF_INPUT_BUFFER_PADDING_SIZE);
     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8 * prestream_size);
 
     f->last_dc = 0 * 128 * 8 * 8;
@@ -800,11 +832,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     if (buf_size < 20)
         return AVERROR_INVALIDDATA;
 
-    if (avctx->width % 16 || avctx->height % 16) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Dimensions non-multiple of 16 are invalid.\n");
-        return AVERROR_INVALIDDATA;
-    }
+    av_assert0(avctx->width % 16 == 0 && avctx->height % 16 == 0);
 
     if (buf_size < AV_RL32(buf + 4) + 8) {
         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %"PRIu32"\n",
@@ -820,12 +848,19 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         const int data_size  = buf_size - 20;
         CFrameBuffer *cfrm;
 
-        if (data_size < 0)
+        if (f->version <= 1) {
+            av_log(f->avctx, AV_LOG_ERROR, "cfrm in version %d\n", f->version);
             return AVERROR_INVALIDDATA;
+        }
 
         id         = AV_RL32(buf + 12);
         whole_size = AV_RL32(buf + 16);
 
+        if (data_size < 0 || whole_size < 0) {
+            av_log(f->avctx, AV_LOG_ERROR, "sizes invalid\n");
+            return AVERROR_INVALIDDATA;
+        }
+
         for (i = 0; i < CFRAME_BUFFER_COUNT; i++)
             if (f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n",
@@ -844,11 +879,14 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         }
         cfrm = &f->cfrm[i];
 
+        if (data_size > UINT_MAX -  cfrm->size - FF_INPUT_BUFFER_PADDING_SIZE)
+            return AVERROR_INVALIDDATA;
+
         cfrm->data = av_fast_realloc(cfrm->data, &cfrm->allocated_size,
                                      cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
         // explicit check needed as memcpy below might not catch a NULL
         if (!cfrm->data) {
-            av_log(f->avctx, AV_LOG_ERROR, "realloc failure");
+            av_log(f->avctx, AV_LOG_ERROR, "realloc failure\n");
             return AVERROR(ENOMEM);
         }
 
@@ -875,24 +913,27 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         frame_size = buf_size - 12;
     }
 
-
-    if ((ret = ff_get_buffer(avctx, picture, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, picture, 0)) < 0)
         return ret;
-    }
 
     if (frame_4cc == AV_RL32("ifr2")) {
         picture->pict_type = AV_PICTURE_TYPE_I;
-        if ((ret = decode_i2_frame(f, buf - 4, frame_size + 4)) < 0)
+        if ((ret = decode_i2_frame(f, buf - 4, frame_size + 4)) < 0) {
+            av_log(f->avctx, AV_LOG_ERROR, "decode i2 frame failed\n");
             return ret;
+        }
     } else if (frame_4cc == AV_RL32("ifrm")) {
         picture->pict_type = AV_PICTURE_TYPE_I;
-        if ((ret = decode_i_frame(f, buf, frame_size)) < 0)
+        if ((ret = decode_i_frame(f, buf, frame_size)) < 0) {
+            av_log(f->avctx, AV_LOG_ERROR, "decode i frame failed\n");
             return ret;
+        }
     } else if (frame_4cc == AV_RL32("pfrm") || frame_4cc == AV_RL32("pfr2")) {
         picture->pict_type = AV_PICTURE_TYPE_P;
-        if ((ret = decode_p_frame(f, buf, frame_size)) < 0)
+        if ((ret = decode_p_frame(f, buf, frame_size)) < 0) {
+            av_log(f->avctx, AV_LOG_ERROR, "decode p frame failed\n");
             return ret;
+        }
     } else if (frame_4cc == AV_RL32("snd_")) {
         av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n",
                buf_size);
@@ -942,6 +983,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "extradata wrong or missing\n");
         return AVERROR_INVALIDDATA;
     }
+    if((avctx->width % 16) || (avctx->height % 16)) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported width/height\n");
+        return AVERROR_INVALIDDATA;
+    }
 
     ret = av_image_check_size(avctx->width, avctx->height, 0, avctx);
     if (ret < 0)
diff --git a/libavcodec/8bps.c b/libavcodec/8bps.c
index 3fd15e0..e00bdfc 100644
--- a/libavcodec/8bps.c
+++ b/libavcodec/8bps.c
@@ -2,20 +2,20 @@
  * Quicktime Planar RGB (8BPS) Video Decoder
  * Copyright (C) 2003 Roberto Togni
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@
  *
  * Supports: PAL8 (RGB 8bpp, paletted)
  *         : BGR24 (RGB 24bpp) (can also output it as RGB32)
- *         : RGB32 (RGB 32bpp, 4th plane is probably alpha and it's ignored)
+ *         : RGB32 (RGB 32bpp, 4th plane is alpha)
  *
  */
 
@@ -66,27 +66,18 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     unsigned int dlen, p, row;
     const unsigned char *lp, *dp, *ep;
     unsigned char count;
-    unsigned int px_inc;
     unsigned int planes     = c->planes;
     unsigned char *planemap = c->planemap;
     int ret;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     ep = encoded + buf_size;
 
     /* Set data pointer after line lengths */
     dp = encoded + planes * (height << 1);
 
-    /* Ignore alpha plane, don't know what to do with it */
-    if (planes == 4)
-        planes--;
-
-    px_inc = planes + (avctx->pix_fmt == AV_PIX_FMT_RGB32);
-
     for (p = 0; p < planes; p++) {
         /* Lines length pointer for this plane */
         lp = encoded + p * (height << 1);
@@ -105,21 +96,21 @@ static int decode_frame(AVCodecContext *avctx, void *data,
                 if ((count = *dp++) <= 127) {
                     count++;
                     dlen -= count + 1;
-                    if (pixptr_end - pixptr < count * px_inc)
+                    if (pixptr_end - pixptr < count * planes)
                         break;
                     if (ep - dp < count)
                         return AVERROR_INVALIDDATA;
                     while (count--) {
                         *pixptr = *dp++;
-                        pixptr += px_inc;
+                        pixptr += planes;
                     }
                 } else {
                     count = 257 - count;
-                    if (pixptr_end - pixptr < count * px_inc)
+                    if (pixptr_end - pixptr < count * planes)
                         break;
                     while (count--) {
                         *pixptr = *dp;
-                        pixptr += px_inc;
+                        pixptr += planes;
                     }
                     dp++;
                     dlen -= 2;
@@ -180,7 +171,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
         c->planemap[0] = HAVE_BIGENDIAN ? 1 : 2; // 1st plane is red
         c->planemap[1] = HAVE_BIGENDIAN ? 2 : 1; // 2nd plane is green
         c->planemap[2] = HAVE_BIGENDIAN ? 3 : 0; // 3rd plane is blue
-        c->planemap[3] = HAVE_BIGENDIAN ? 0 : 3; // 4th plane is alpha???
+        c->planemap[3] = HAVE_BIGENDIAN ? 0 : 3; // 4th plane is alpha
     }
     return 0;
 }
diff --git a/libavcodec/8svx.c b/libavcodec/8svx.c
index 11fbf19..eff525c 100644
--- a/libavcodec/8svx.c
+++ b/libavcodec/8svx.c
@@ -1,21 +1,21 @@
 /*
- * 8SVX audio decoder
  * Copyright (C) 2008 Jaikrishnan Menon
+ * Copyright (C) 2011 Stefano Sabatini
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,8 +26,18 @@
  *
  * supports: fibonacci delta encoding
  *         : exponential encoding
+ *
+ * For more information about the 8SVX format:
+ * http://netghost.narod.ru/gff/vendspec/iff/iff.txt
+ * http://sox.sourceforge.net/AudioFormats-11.html
+ * http://aminet.net/package/mus/misc/wavepak
+ * http://amigan.1emu.net/reg/8SVX.txt
+ *
+ * Samples can be found here:
+ * http://aminet.net/mods/smpl/
  */
 
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "libavutil/common.h"
@@ -44,18 +54,17 @@ typedef struct EightSvxContext {
     int data_idx;
 } EightSvxContext;
 
-static const int8_t fibonacci[16]   = { -34, -21, -13,  -8, -5, -3, -2, -1,
-                                          0,   1,   2,   3,  5,  8, 13, 21 };
-static const int8_t exponential[16] = { -128, -64, -32, -16, -8, -4, -2, -1,
-                                           0,   1,   2,   4,  8, 16, 32, 64 };
+static const int8_t fibonacci[16]   = { -34,  -21, -13,  -8, -5, -3, -2, -1, 0, 1, 2, 3, 5, 8,  13, 21 };
+static const int8_t exponential[16] = { -128, -64, -32, -16, -8, -4, -2, -1, 0, 1, 2, 4, 8, 16, 32, 64 };
 
-#define MAX_FRAME_SIZE 32768
+#define MAX_FRAME_SIZE 2048
 
 /**
  * Delta decode the compressed values in src, and put the resulting
  * decoded samples in dst.
  *
  * @param[in,out] state starting value. it is saved for use in the next call.
+ * @param table delta sequence table
  */
 static void delta_decode(uint8_t *dst, const uint8_t *src, int src_size,
                          uint8_t *state, const int8_t *table)
@@ -73,12 +82,6 @@ static void delta_decode(uint8_t *dst, const uint8_t *src, int src_size,
     *state = val;
 }
 
-static void raw_decode(uint8_t *dst, const int8_t *src, int src_size)
-{
-    while (src_size--)
-        *dst++ = *src++ + 128;
-}
-
 /** decode a frame */
 static int eightsvx_decode_frame(AVCodecContext *avctx, void *data,
                                  int *got_frame_ptr, AVPacket *avpkt)
@@ -87,27 +90,23 @@ static int eightsvx_decode_frame(AVCodecContext *avctx, void *data,
     AVFrame *frame       = data;
     int buf_size;
     int ch, ret;
-    int is_compr = (avctx->codec_id != AV_CODEC_ID_PCM_S8_PLANAR);
+    int hdr_size = 2;
 
-    /* for the first packet, copy data to buffer */
-    if (avpkt->data) {
-        int hdr_size  = is_compr ? 2 : 0;
-        int chan_size = (avpkt->size - hdr_size * avctx->channels) / avctx->channels;
+    /* decode and interleave the first packet */
+    if (!esc->data[0] && avpkt) {
+        int chan_size = avpkt->size / avctx->channels - hdr_size;
 
-        if (avpkt->size < hdr_size * avctx->channels) {
-            av_log(avctx, AV_LOG_ERROR, "packet size is too small\n");
-            return AVERROR(EINVAL);
+        if (avpkt->size % avctx->channels) {
+            av_log(avctx, AV_LOG_WARNING, "Packet with odd size, ignoring last byte\n");
         }
-        if (esc->data[0]) {
-            av_log(avctx, AV_LOG_ERROR, "unexpected data after first packet\n");
+        if (avpkt->size < (hdr_size + 1) * avctx->channels) {
+            av_log(avctx, AV_LOG_ERROR, "packet size is too small\n");
             return AVERROR(EINVAL);
         }
 
-        if (is_compr) {
         esc->fib_acc[0] = avpkt->data[1] + 128;
         if (avctx->channels == 2)
             esc->fib_acc[1] = avpkt->data[2+chan_size+1] + 128;
-        }
 
         esc->data_idx  = 0;
         esc->data_size = chan_size;
@@ -136,50 +135,37 @@ static int eightsvx_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     /* get output buffer */
-    frame->nb_samples = buf_size * (is_compr + 1);
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    frame->nb_samples = buf_size * 2;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     for (ch = 0; ch < avctx->channels; ch++) {
-        if (is_compr) {
-            delta_decode(frame->data[ch], &esc->data[ch][esc->data_idx],
-                         buf_size, &esc->fib_acc[ch], esc->table);
-        } else {
-            raw_decode(frame->data[ch], &esc->data[ch][esc->data_idx],
-                       buf_size);
-        }
+        delta_decode(frame->data[ch], &esc->data[ch][esc->data_idx],
+                     buf_size, &esc->fib_acc[ch], esc->table);
     }
 
     esc->data_idx += buf_size;
 
     *got_frame_ptr = 1;
 
-    return avpkt->size;
+    return ((avctx->frame_number == 0)*hdr_size + buf_size)*avctx->channels;
 }
 
-/** initialize 8svx decoder */
 static av_cold int eightsvx_decode_init(AVCodecContext *avctx)
 {
     EightSvxContext *esc = avctx->priv_data;
 
     if (avctx->channels < 1 || avctx->channels > 2) {
         av_log(avctx, AV_LOG_ERROR, "8SVX does not support more than 2 channels\n");
-        return AVERROR(EINVAL);
+        return AVERROR_INVALIDDATA;
     }
 
-    switch(avctx->codec->id) {
-        case AV_CODEC_ID_8SVX_FIB:
-          esc->table = fibonacci;
-          break;
-        case AV_CODEC_ID_8SVX_EXP:
-          esc->table = exponential;
-          break;
-        case AV_CODEC_ID_PCM_S8_PLANAR:
-            break;
-        default:
-          return -1;
+    switch (avctx->codec->id) {
+    case AV_CODEC_ID_8SVX_FIB: esc->table = fibonacci;    break;
+    case AV_CODEC_ID_8SVX_EXP: esc->table = exponential;  break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Invalid codec id %d.\n", avctx->codec->id);
+        return AVERROR_INVALIDDATA;
     }
     avctx->sample_fmt = AV_SAMPLE_FMT_U8P;
 
@@ -192,10 +178,13 @@ static av_cold int eightsvx_decode_close(AVCodecContext *avctx)
 
     av_freep(&esc->data[0]);
     av_freep(&esc->data[1]);
+    esc->data_size = 0;
+    esc->data_idx = 0;
 
     return 0;
 }
 
+#if CONFIG_EIGHTSVX_FIB_DECODER
 AVCodec ff_eightsvx_fib_decoder = {
   .name           = "8svx_fib",
   .long_name      = NULL_IF_CONFIG_SMALL("8SVX fibonacci"),
@@ -203,13 +192,14 @@ AVCodec ff_eightsvx_fib_decoder = {
   .id             = AV_CODEC_ID_8SVX_FIB,
   .priv_data_size = sizeof (EightSvxContext),
   .init           = eightsvx_decode_init,
-  .close          = eightsvx_decode_close,
   .decode         = eightsvx_decode_frame,
-  .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
+  .close          = eightsvx_decode_close,
+  .capabilities   = CODEC_CAP_DR1,
   .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
                                                     AV_SAMPLE_FMT_NONE },
 };
-
+#endif
+#if CONFIG_EIGHTSVX_EXP_DECODER
 AVCodec ff_eightsvx_exp_decoder = {
   .name           = "8svx_exp",
   .long_name      = NULL_IF_CONFIG_SMALL("8SVX exponential"),
@@ -217,23 +207,10 @@ AVCodec ff_eightsvx_exp_decoder = {
   .id             = AV_CODEC_ID_8SVX_EXP,
   .priv_data_size = sizeof (EightSvxContext),
   .init           = eightsvx_decode_init,
-  .close          = eightsvx_decode_close,
   .decode         = eightsvx_decode_frame,
-  .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
+  .close          = eightsvx_decode_close,
+  .capabilities   = CODEC_CAP_DR1,
   .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
                                                     AV_SAMPLE_FMT_NONE },
 };
-
-AVCodec ff_pcm_s8_planar_decoder = {
-    .name           = "pcm_s8_planar",
-    .long_name      = NULL_IF_CONFIG_SMALL("PCM signed 8-bit planar"),
-    .type           = AVMEDIA_TYPE_AUDIO,
-    .id             = AV_CODEC_ID_PCM_S8_PLANAR,
-    .priv_data_size = sizeof(EightSvxContext),
-    .init           = eightsvx_decode_init,
-    .close          = eightsvx_decode_close,
-    .decode         = eightsvx_decode_frame,
-    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
-    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
-                                                      AV_SAMPLE_FMT_NONE },
-};
+#endif
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7d19e6e..c73eb83 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1,9 +1,12 @@
+include $(SUBDIR)../config.mak
+
 NAME = avcodec
 
 HEADERS = avcodec.h                                                     \
           avfft.h                                                       \
           dv_profile.h                                                  \
           dxva2.h                                                       \
+          old_codec_ids.h                                               \
           vaapi.h                                                       \
           vda.h                                                         \
           vdpau.h                                                       \
@@ -11,6 +14,8 @@ HEADERS = avcodec.h                                                     \
           xvmc.h                                                        \
 
 OBJS = allcodecs.o                                                      \
+       audioconvert.o                                                   \
+       avdct.o                                                          \
        avpacket.o                                                       \
        avpicture.o                                                      \
        bitstream.o                                                      \
@@ -19,11 +24,12 @@ OBJS = allcodecs.o                                                      \
        dv_profile.o                                                     \
        fmtconvert.o                                                     \
        imgconvert.o                                                     \
-       log2_tab.o                                                       \
        mathtables.o                                                     \
        options.o                                                        \
        parser.o                                                         \
        raw.o                                                            \
+       resample.o                                                       \
+       resample2.o                                                      \
        utils.o                                                          \
 
 # parts needed for many different codecs
@@ -34,13 +40,16 @@ OBJS-$(CONFIG_AUDIODSP)                += audiodsp.o
 OBJS-$(CONFIG_BLOCKDSP)                += blockdsp.o
 OBJS-$(CONFIG_BSWAPDSP)                += bswapdsp.o
 OBJS-$(CONFIG_CABAC)                   += cabac.o
+OBJS-$(CONFIG_CRYSTALHD)               += crystalhd.o
 OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
 OBJS-$(CONFIG_DXVA2)                   += dxva2.o
 OBJS-$(CONFIG_ERROR_RESILIENCE)        += error_resilience.o
+OBJS-$(CONFIG_EXIF)                    += exif.o tiff_common.o
 OBJS-$(CONFIG_FDCTDSP)                 += fdctdsp.o faandct.o           \
                                           jfdctfst.o jfdctint.o
 FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o cos_fixed_tables.o
 OBJS-$(CONFIG_FFT)                     += avfft.o fft_fixed.o fft_float.o \
+                                          fft_fixed_32.o fft_init_table.o \
                                           $(FFT-OBJS-yes)
 OBJS-$(CONFIG_GOLOMB)                  += golomb.o
 OBJS-$(CONFIG_H263DSP)                 += h263dsp.o
@@ -57,10 +66,12 @@ OBJS-$(CONFIG_IDCTDSP)                 += idctdsp.o faanidct.o          \
 OBJS-$(CONFIG_IIRFILTER)               += iirfilter.o
 OBJS-$(CONFIG_INTRAX8)                 += intrax8.o intrax8dsp.o
 OBJS-$(CONFIG_LIBXVID)                 += libxvid_rc.o
+OBJS-$(CONFIG_LLAUDDSP)                += lossless_audiodsp.o
+OBJS-$(CONFIG_LLVIDDSP)                += lossless_videodsp.o
 OBJS-$(CONFIG_LPC)                     += lpc.o
 OBJS-$(CONFIG_LSP)                     += lsp.o
-OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o
-OBJS-$(CONFIG_ME_CMP)                  += me_cmp.o
+OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o mdct_fixed_32.o
+OBJS-$(CONFIG_ME_CMP)                  += me_cmp.o dsputil_compat.o
 OBJS-$(CONFIG_MPEG_ER)                 += mpeg_er.o
 OBJS-$(CONFIG_MPEGAUDIO)               += mpegaudio.o mpegaudiodata.o   \
                                           mpegaudiodecheader.o
@@ -78,6 +89,7 @@ OBJS-$(CONFIG_QPELDSP)                 += qpeldsp.o
 OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
+OBJS-$(CONFIG_SHARED)                  += log2_tab.o
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
 OBJS-$(CONFIG_STARTCODE)               += startcode.o
 OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
@@ -88,6 +100,7 @@ OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
 OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
 
 # decoders/encoders
+OBJS-$(CONFIG_ZERO12V_DECODER)         += 012v.o
 OBJS-$(CONFIG_A64MULTI_ENCODER)        += a64multienc.o elbg.o
 OBJS-$(CONFIG_A64MULTI5_ENCODER)       += a64multienc.o elbg.o
 OBJS-$(CONFIG_AAC_DECODER)             += aacdec.o aactab.o aacsbr.o aacps.o \
@@ -97,7 +110,8 @@ OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
                                           aacpsy.o aactab.o      \
                                           psymodel.o mpeg4audio.o kbdwin.o
 OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
-OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o kbdwin.o
+OBJS-$(CONFIG_AC3_DECODER)             += ac3dec_float.o ac3dec_data.o ac3.o kbdwin.o
+OBJS-$(CONFIG_AC3_FIXED_DECODER)       += ac3dec_fixed.o ac3dec_data.o ac3.o kbdwin.o
 OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3enc.o ac3tab.o \
                                           ac3.o kbdwin.o
 OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3enc.o ac3tab.o ac3.o
@@ -115,10 +129,16 @@ OBJS-$(CONFIG_AMRWB_DECODER)           += amrwbdec.o celp_filters.o   \
                                           celp_math.o acelp_filters.o \
                                           acelp_vectors.o             \
                                           acelp_pitch_delay.o
+OBJS-$(CONFIG_AMV_ENCODER)             += mjpegenc.o mjpeg.o mjpegenc_common.o \
+                                          mpegvideo_enc.o motion_est.o \
+                                          ratecontrol.o mpeg12data.o   \
+                                          mpegvideo.o
 OBJS-$(CONFIG_ANM_DECODER)             += anm.o
 OBJS-$(CONFIG_ANSI_DECODER)            += ansi.o cga_data.o
 OBJS-$(CONFIG_APE_DECODER)             += apedec.o
-OBJS-$(CONFIG_ASS_DECODER)             += assdec.o ass.o
+OBJS-$(CONFIG_SSA_DECODER)             += assdec.o ass.o ass_split.o
+OBJS-$(CONFIG_SSA_ENCODER)             += assenc.o ass.o
+OBJS-$(CONFIG_ASS_DECODER)             += assdec.o ass.o ass_split.o
 OBJS-$(CONFIG_ASS_ENCODER)             += assenc.o ass.o
 OBJS-$(CONFIG_ASV1_DECODER)            += asvdec.o asv.o mpeg12data.o
 OBJS-$(CONFIG_ASV1_ENCODER)            += asvenc.o asv.o mpeg12data.o
@@ -130,12 +150,20 @@ OBJS-$(CONFIG_ATRAC3P_DECODER)         += atrac3plusdec.o atrac3plus.o \
                                           atrac3plusdsp.o atrac.o
 OBJS-$(CONFIG_AURA_DECODER)            += cyuv.o
 OBJS-$(CONFIG_AURA2_DECODER)           += aura.o
+OBJS-$(CONFIG_AVRN_DECODER)            += avrndec.o mjpegdec.o mjpeg.o
+OBJS-$(CONFIG_AVRP_DECODER)            += r210dec.o
+OBJS-$(CONFIG_AVRP_ENCODER)            += r210enc.o
 OBJS-$(CONFIG_AVS_DECODER)             += avs.o
+OBJS-$(CONFIG_AVUI_DECODER)            += avuidec.o
+OBJS-$(CONFIG_AVUI_ENCODER)            += avuienc.o
+OBJS-$(CONFIG_AYUV_DECODER)            += v408dec.o
+OBJS-$(CONFIG_AYUV_ENCODER)            += v408enc.o
 OBJS-$(CONFIG_BETHSOFTVID_DECODER)     += bethsoftvideo.o
 OBJS-$(CONFIG_BFI_DECODER)             += bfi.o
 OBJS-$(CONFIG_BINK_DECODER)            += bink.o binkdsp.o
 OBJS-$(CONFIG_BINKAUDIO_DCT_DECODER)   += binkaudio.o wma.o wma_common.o
 OBJS-$(CONFIG_BINKAUDIO_RDFT_DECODER)  += binkaudio.o wma.o wma_common.o
+OBJS-$(CONFIG_BINTEXT_DECODER)         += bintext.o cga_data.o
 OBJS-$(CONFIG_BMP_DECODER)             += bmp.o msrledec.o
 OBJS-$(CONFIG_BMP_ENCODER)             += bmpenc.o
 OBJS-$(CONFIG_BMV_AUDIO_DECODER)       += bmvaudio.o
@@ -147,21 +175,30 @@ OBJS-$(CONFIG_CAVS_DECODER)            += cavs.o cavsdec.o cavsdsp.o \
 OBJS-$(CONFIG_CDGRAPHICS_DECODER)      += cdgraphics.o
 OBJS-$(CONFIG_CDXL_DECODER)            += cdxl.o
 OBJS-$(CONFIG_CINEPAK_DECODER)         += cinepak.o
+OBJS-$(CONFIG_CINEPAK_ENCODER)         += cinepakenc.o elbg.o
 OBJS-$(CONFIG_CLJR_DECODER)            += cljrdec.o
 OBJS-$(CONFIG_CLJR_ENCODER)            += cljrenc.o
 OBJS-$(CONFIG_CLLC_DECODER)            += cllc.o
 OBJS-$(CONFIG_COOK_DECODER)            += cook.o
 OBJS-$(CONFIG_COMFORTNOISE_DECODER)    += cngdec.o celp_filters.o
 OBJS-$(CONFIG_COMFORTNOISE_ENCODER)    += cngenc.o
+OBJS-$(CONFIG_CPIA_DECODER)            += cpia.o
 OBJS-$(CONFIG_CSCD_DECODER)            += cscd.o
 OBJS-$(CONFIG_CYUV_DECODER)            += cyuv.o
 OBJS-$(CONFIG_DCA_DECODER)             += dcadec.o dca.o dcadsp.o      \
                                           synth_filter.o
+OBJS-$(CONFIG_DCA_ENCODER)             += dcaenc.o dca.o
+OBJS-$(CONFIG_DIRAC_DECODER)           += diracdec.o dirac.o diracdsp.o \
+                                          dirac_arith.o mpeg12data.o dirac_dwt.o
 OBJS-$(CONFIG_DFA_DECODER)             += dfa.o
 OBJS-$(CONFIG_DNXHD_DECODER)           += dnxhddec.o dnxhddata.o
 OBJS-$(CONFIG_DNXHD_ENCODER)           += dnxhdenc.o dnxhddata.o
 OBJS-$(CONFIG_DPX_DECODER)             += dpx.o
 OBJS-$(CONFIG_DPX_ENCODER)             += dpxenc.o
+OBJS-$(CONFIG_DSD_LSBF_DECODER)        += dsddec.o
+OBJS-$(CONFIG_DSD_MSBF_DECODER)        += dsddec.o
+OBJS-$(CONFIG_DSD_LSBF_PLANAR_DECODER) += dsddec.o
+OBJS-$(CONFIG_DSD_MSBF_PLANAR_DECODER) += dsddec.o
 OBJS-$(CONFIG_DSICINAUDIO_DECODER)     += dsicinaudio.o
 OBJS-$(CONFIG_DSICINVIDEO_DECODER)     += dsicinvideo.o
 OBJS-$(CONFIG_DVBSUB_DECODER)          += dvbsubdec.o
@@ -185,14 +222,17 @@ OBJS-$(CONFIG_EIGHTSVX_EXP_DECODER)    += 8svx.o
 OBJS-$(CONFIG_EIGHTSVX_FIB_DECODER)    += 8svx.o
 OBJS-$(CONFIG_ESCAPE124_DECODER)       += escape124.o
 OBJS-$(CONFIG_ESCAPE130_DECODER)       += escape130.o
+OBJS-$(CONFIG_EVRC_DECODER)            += evrcdec.o acelp_vectors.o lsp.o
 OBJS-$(CONFIG_EXR_DECODER)             += exr.o
 OBJS-$(CONFIG_FFV1_DECODER)            += ffv1dec.o ffv1.o
 OBJS-$(CONFIG_FFV1_ENCODER)            += ffv1enc.o ffv1.o
+OBJS-$(CONFIG_FFWAVESYNTH_DECODER)     += ffwavesynth.o
 OBJS-$(CONFIG_FIC_DECODER)             += fic.o
 OBJS-$(CONFIG_FLAC_DECODER)            += flacdec.o flacdata.o flac.o flacdsp.o
-OBJS-$(CONFIG_FLAC_ENCODER)            += flacenc.o flacdata.o flac.o flacdsp.o
+OBJS-$(CONFIG_FLAC_ENCODER)            += flacenc.o flacdata.o flac.o flacdsp.o vorbis_data.o
 OBJS-$(CONFIG_FLASHSV_DECODER)         += flashsv.o
 OBJS-$(CONFIG_FLASHSV_ENCODER)         += flashsvenc.o
+OBJS-$(CONFIG_FLASHSV2_ENCODER)        += flashsv2enc.o
 OBJS-$(CONFIG_FLASHSV2_DECODER)        += flashsv.o
 OBJS-$(CONFIG_FLIC_DECODER)            += flicvideo.o
 OBJS-$(CONFIG_FOURXM_DECODER)          += 4xm.o
@@ -200,7 +240,9 @@ OBJS-$(CONFIG_FRAPS_DECODER)           += fraps.o
 OBJS-$(CONFIG_FRWU_DECODER)            += frwu.o
 OBJS-$(CONFIG_G2M_DECODER)             += g2meet.o mjpeg.o
 OBJS-$(CONFIG_G723_1_DECODER)          += g723_1.o acelp_vectors.o \
-                                          celp_filters.o
+                                          celp_filters.o celp_math.o
+OBJS-$(CONFIG_G723_1_ENCODER)          += g723_1.o acelp_vectors.o celp_math.o
+OBJS-$(CONFIG_G729_DECODER)            += g729dec.o lsp.o celp_math.o acelp_filters.o acelp_pitch_delay.o acelp_vectors.o g729postfilter.o
 OBJS-$(CONFIG_GIF_DECODER)             += gifdec.o lzw.o
 OBJS-$(CONFIG_GIF_ENCODER)             += gif.o lzwenc.o
 OBJS-$(CONFIG_GSM_DECODER)             += gsmdec.o gsmdec_data.o msgsmdec.o
@@ -216,6 +258,7 @@ OBJS-$(CONFIG_H264_DECODER)            += h264.o h264_cabac.o h264_cavlc.o \
                                           h264_direct.o h264_loopfilter.o  \
                                           h264_mb.o h264_picture.o h264_ps.o \
                                           h264_refs.o h264_sei.o h264_slice.o
+OBJS-$(CONFIG_H264_VDA_DECODER)        += vda_h264_dec.o
 OBJS-$(CONFIG_HEVC_DECODER)            += hevc.o hevc_mvs.o hevc_ps.o hevc_sei.o \
                                           hevc_cabac.o hevc_refs.o hevcpred.o    \
                                           hevcdsp.o hevc_filter.o
@@ -223,6 +266,7 @@ OBJS-$(CONFIG_HNM4_VIDEO_DECODER)      += hnm4video.o
 OBJS-$(CONFIG_HUFFYUV_DECODER)         += huffyuv.o huffyuvdec.o
 OBJS-$(CONFIG_HUFFYUV_ENCODER)         += huffyuv.o huffyuvenc.o
 OBJS-$(CONFIG_IDCIN_DECODER)           += idcinvideo.o
+OBJS-$(CONFIG_IDF_DECODER)             += bintext.o cga_data.o
 OBJS-$(CONFIG_IFF_BYTERUN1_DECODER)    += iff.o
 OBJS-$(CONFIG_IFF_ILBM_DECODER)        += iff.o
 OBJS-$(CONFIG_IMC_DECODER)             += imc.o
@@ -232,6 +276,9 @@ OBJS-$(CONFIG_INDEO4_DECODER)          += indeo4.o ivi_common.o ivi_dsp.o
 OBJS-$(CONFIG_INDEO5_DECODER)          += indeo5.o ivi_common.o ivi_dsp.o
 OBJS-$(CONFIG_INTERPLAY_DPCM_DECODER)  += dpcm.o
 OBJS-$(CONFIG_INTERPLAY_VIDEO_DECODER) += interplayvideo.o
+OBJS-$(CONFIG_JACOSUB_DECODER)         += jacosubdec.o ass.o
+OBJS-$(CONFIG_JPEG2000_ENCODER)        += j2kenc.o mqcenc.o mqc.o jpeg2000.o \
+                                          jpeg2000dwt.o
 OBJS-$(CONFIG_JPEG2000_DECODER)        += jpeg2000dec.o jpeg2000.o      \
                                           jpeg2000dwt.o mqcdec.o mqc.o
 OBJS-$(CONFIG_JPEGLS_DECODER)          += jpeglsdec.o jpegls.o
@@ -247,6 +294,7 @@ OBJS-$(CONFIG_MACE6_DECODER)           += mace.o
 OBJS-$(CONFIG_MDEC_DECODER)            += mdec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_METASOUND_DECODER)       += metasound.o metasound_data.o \
                                           twinvq.o
+OBJS-$(CONFIG_MICRODVD_DECODER)        += microdvddec.o ass.o
 OBJS-$(CONFIG_MIMIC_DECODER)           += mimic.o
 OBJS-$(CONFIG_MJPEG_DECODER)           += mjpegdec.o mjpeg.o
 OBJS-$(CONFIG_MJPEG_ENCODER)           += mjpegenc.o mjpeg.o mjpegenc_common.o
@@ -254,10 +302,14 @@ OBJS-$(CONFIG_MJPEGB_DECODER)          += mjpegbdec.o
 OBJS-$(CONFIG_MLP_DECODER)             += mlpdec.o mlpdsp.o
 OBJS-$(CONFIG_MMVIDEO_DECODER)         += mmvideo.o
 OBJS-$(CONFIG_MOTIONPIXELS_DECODER)    += motionpixels.o
+OBJS-$(CONFIG_MOVTEXT_DECODER)         += movtextdec.o ass.o
+OBJS-$(CONFIG_MOVTEXT_ENCODER)         += movtextenc.o ass_split.o
 OBJS-$(CONFIG_MP1_DECODER)             += mpegaudiodec_fixed.o
 OBJS-$(CONFIG_MP1FLOAT_DECODER)        += mpegaudiodec_float.o
 OBJS-$(CONFIG_MP2_DECODER)             += mpegaudiodec_fixed.o
-OBJS-$(CONFIG_MP2_ENCODER)             += mpegaudioenc.o mpegaudio.o \
+OBJS-$(CONFIG_MP2_ENCODER)             += mpegaudioenc_float.o mpegaudio.o \
+                                          mpegaudiodata.o mpegaudiodsp_data.o
+OBJS-$(CONFIG_MP2FIXED_ENCODER)        += mpegaudioenc_fixed.o mpegaudio.o \
                                           mpegaudiodata.o mpegaudiodsp_data.o
 OBJS-$(CONFIG_MP2FLOAT_DECODER)        += mpegaudiodec_float.o
 OBJS-$(CONFIG_MP3_DECODER)             += mpegaudiodec_fixed.o
@@ -268,12 +320,13 @@ OBJS-$(CONFIG_MP3ON4_DECODER)          += mpegaudiodec_fixed.o mpeg4audio.o
 OBJS-$(CONFIG_MP3ON4FLOAT_DECODER)     += mpegaudiodec_float.o mpeg4audio.o
 OBJS-$(CONFIG_MPC7_DECODER)            += mpc7.o mpc.o
 OBJS-$(CONFIG_MPC8_DECODER)            += mpc8.o mpc.o
-OBJS-$(CONFIG_MPEG_XVMC_DECODER)       += mpegvideo_xvmc.o
+OBJS-$(CONFIG_MPEGVIDEO_DECODER)       += mpeg12dec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_MPEG1VIDEO_DECODER)      += mpeg12dec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_MPEG1VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o
 OBJS-$(CONFIG_MPEG2VIDEO_DECODER)      += mpeg12dec.o mpeg12.o mpeg12data.o
 OBJS-$(CONFIG_MPEG2VIDEO_ENCODER)      += mpeg12enc.o mpeg12.o
 OBJS-$(CONFIG_MPEG4_DECODER)           += xvididct.o
+OBJS-$(CONFIG_MPL2_DECODER)            += mpl2dec.o ass.o
 OBJS-$(CONFIG_MSMPEG4V1_DECODER)       += msmpeg4dec.o msmpeg4.o msmpeg4data.o
 OBJS-$(CONFIG_MSMPEG4V2_DECODER)       += msmpeg4dec.o msmpeg4.o msmpeg4data.o
 OBJS-$(CONFIG_MSMPEG4V2_ENCODER)       += msmpeg4enc.o msmpeg4.o msmpeg4data.o
@@ -284,6 +337,7 @@ OBJS-$(CONFIG_MSA1_DECODER)            += mss3.o mss34dsp.o
 OBJS-$(CONFIG_MSS1_DECODER)            += mss1.o mss12.o
 OBJS-$(CONFIG_MSS2_DECODER)            += mss2.o mss12.o mss2dsp.o
 OBJS-$(CONFIG_MSVIDEO1_DECODER)        += msvideo1.o
+OBJS-$(CONFIG_MSVIDEO1_ENCODER)        += msvideo1enc.o elbg.o
 OBJS-$(CONFIG_MSZH_DECODER)            += lcldec.o
 OBJS-$(CONFIG_MTS2_DECODER)            += mss4.o mss34dsp.o
 OBJS-$(CONFIG_MVC1_DECODER)            += mvcdec.o
@@ -310,12 +364,16 @@ OBJS-$(CONFIG_PGMYUV_DECODER)          += pnmdec.o pnm.o
 OBJS-$(CONFIG_PGMYUV_ENCODER)          += pnmenc.o
 OBJS-$(CONFIG_PGSSUB_DECODER)          += pgssubdec.o
 OBJS-$(CONFIG_PICTOR_DECODER)          += pictordec.o cga_data.o
+OBJS-$(CONFIG_PJS_DECODER)             += textdec.o ass.o
 OBJS-$(CONFIG_PNG_DECODER)             += png.o pngdec.o pngdsp.o
 OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o
-OBJS-$(CONFIG_PRORES_DECODER)          += proresdec.o proresdata.o proresdsp.o
-OBJS-$(CONFIG_PRORES_ENCODER)          += proresenc.o proresdata.o
+OBJS-$(CONFIG_PRORES_DECODER)          += proresdec2.o proresdsp.o proresdata.o
+OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += proresdec_lgpl.o proresdsp.o proresdata.o
+OBJS-$(CONFIG_PRORES_ENCODER)          += proresenc_anatoliy.o
+OBJS-$(CONFIG_PRORES_AW_ENCODER)       += proresenc_anatoliy.o
+OBJS-$(CONFIG_PRORES_KS_ENCODER)       += proresenc_kostya.o proresdata.o
 OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
 OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o                     \
                                           celp_filters.o acelp_vectors.o \
@@ -326,13 +384,16 @@ OBJS-$(CONFIG_QPEG_DECODER)            += qpeg.o
 OBJS-$(CONFIG_QTRLE_DECODER)           += qtrle.o
 OBJS-$(CONFIG_QTRLE_ENCODER)           += qtrleenc.o
 OBJS-$(CONFIG_R10K_DECODER)            += r210dec.o
+OBJS-$(CONFIG_R10K_ENCODER)            += r210enc.o
 OBJS-$(CONFIG_R210_DECODER)            += r210dec.o
+OBJS-$(CONFIG_R210_ENCODER)            += r210enc.o
 OBJS-$(CONFIG_RA_144_DECODER)          += ra144dec.o ra144.o celp_filters.o
 OBJS-$(CONFIG_RA_144_ENCODER)          += ra144enc.o ra144.o celp_filters.o
 OBJS-$(CONFIG_RA_288_DECODER)          += ra288.o celp_filters.o
 OBJS-$(CONFIG_RALF_DECODER)            += ralf.o
 OBJS-$(CONFIG_RAWVIDEO_DECODER)        += rawdec.o
 OBJS-$(CONFIG_RAWVIDEO_ENCODER)        += rawenc.o
+OBJS-$(CONFIG_REALTEXT_DECODER)        += realtextdec.o ass.o
 OBJS-$(CONFIG_RL2_DECODER)             += rl2.o
 OBJS-$(CONFIG_ROQ_DECODER)             += roqvideodec.o roqvideo.o
 OBJS-$(CONFIG_ROQ_ENCODER)             += roqvideoenc.o roqvideo.o elbg.o
@@ -345,7 +406,9 @@ OBJS-$(CONFIG_RV20_DECODER)            += rv10.o
 OBJS-$(CONFIG_RV20_ENCODER)            += rv20enc.o
 OBJS-$(CONFIG_RV30_DECODER)            += rv30.o rv34.o rv30dsp.o rv34dsp.o
 OBJS-$(CONFIG_RV40_DECODER)            += rv40.o rv34.o rv34dsp.o rv40dsp.o
+OBJS-$(CONFIG_SAMI_DECODER)            += samidec.o ass.o
 OBJS-$(CONFIG_S302M_DECODER)           += s302m.o
+OBJS-$(CONFIG_S302M_ENCODER)           += s302menc.o
 OBJS-$(CONFIG_SANM_DECODER)            += sanm.o
 OBJS-$(CONFIG_SGI_DECODER)             += sgidec.o
 OBJS-$(CONFIG_SGI_ENCODER)             += sgienc.o rle.o
@@ -358,29 +421,45 @@ OBJS-$(CONFIG_SIPR_DECODER)            += sipr.o acelp_pitch_delay.o \
 OBJS-$(CONFIG_SMACKAUD_DECODER)        += smacker.o
 OBJS-$(CONFIG_SMACKER_DECODER)         += smacker.o
 OBJS-$(CONFIG_SMC_DECODER)             += smc.o
+OBJS-$(CONFIG_SMVJPEG_DECODER)         += smvjpegdec.o
+OBJS-$(CONFIG_SNOW_DECODER)            += snowdec.o snow.o snow_dwt.o
+OBJS-$(CONFIG_SNOW_ENCODER)            += snowenc.o snow.o snow_dwt.o             \
+                                          h263.o ituh263enc.o
 OBJS-$(CONFIG_SOL_DPCM_DECODER)        += dpcm.o
+OBJS-$(CONFIG_SONIC_DECODER)           += sonic.o
+OBJS-$(CONFIG_SONIC_ENCODER)           += sonic.o
+OBJS-$(CONFIG_SONIC_LS_ENCODER)        += sonic.o
 OBJS-$(CONFIG_SP5X_DECODER)            += sp5xdec.o
 OBJS-$(CONFIG_SRT_DECODER)             += srtdec.o ass.o
+OBJS-$(CONFIG_SRT_ENCODER)             += srtenc.o ass_split.o
+OBJS-$(CONFIG_SUBRIP_DECODER)          += srtdec.o ass.o
+OBJS-$(CONFIG_SUBRIP_ENCODER)          += srtenc.o ass_split.o
+OBJS-$(CONFIG_SUBVIEWER1_DECODER)      += textdec.o ass.o
+OBJS-$(CONFIG_SUBVIEWER_DECODER)       += subviewerdec.o ass.o
 OBJS-$(CONFIG_SUNRAST_DECODER)         += sunrast.o
 OBJS-$(CONFIG_SUNRAST_ENCODER)         += sunrastenc.o
 OBJS-$(CONFIG_SVQ1_DECODER)            += svq1dec.o svq1.o svq13.o h263.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += svq1enc.o svq1.o    \
                                           h263.o ituh263enc.o
 OBJS-$(CONFIG_SVQ3_DECODER)            += svq3.o svq13.o mpegutils.o
+OBJS-$(CONFIG_TEXT_DECODER)            += textdec.o ass.o
 OBJS-$(CONFIG_TAK_DECODER)             += takdec.o tak.o
 OBJS-$(CONFIG_TARGA_DECODER)           += targa.o
 OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
+OBJS-$(CONFIG_TARGA_Y216_DECODER)      += targa_y216dec.o
 OBJS-$(CONFIG_THEORA_DECODER)          += xiph.o
 OBJS-$(CONFIG_TIERTEXSEQVIDEO_DECODER) += tiertexseqv.o
-OBJS-$(CONFIG_TIFF_DECODER)            += tiff.o lzw.o faxcompr.o
-OBJS-$(CONFIG_TIFF_ENCODER)            += tiffenc.o rle.o lzwenc.o
+OBJS-$(CONFIG_TIFF_DECODER)            += tiff.o lzw.o faxcompr.o tiff_data.o tiff_common.o
+OBJS-$(CONFIG_TIFF_ENCODER)            += tiffenc.o rle.o lzwenc.o tiff_data.o
 OBJS-$(CONFIG_TMV_DECODER)             += tmv.o cga_data.o
+OBJS-$(CONFIG_TRUEHD_DECODER)          += mlpdec.o mlpdsp.o
 OBJS-$(CONFIG_TRUEMOTION1_DECODER)     += truemotion1.o
 OBJS-$(CONFIG_TRUEMOTION2_DECODER)     += truemotion2.o
 OBJS-$(CONFIG_TRUESPEECH_DECODER)      += truespeech.o
 OBJS-$(CONFIG_TSCC_DECODER)            += tscc.o msrledec.o
 OBJS-$(CONFIG_TSCC2_DECODER)           += tscc2.o
-OBJS-$(CONFIG_TTA_DECODER)             += tta.o
+OBJS-$(CONFIG_TTA_DECODER)             += tta.o ttadata.o ttadsp.o
+OBJS-$(CONFIG_TTA_ENCODER)             += ttaenc.o ttadata.o
 OBJS-$(CONFIG_TWINVQ_DECODER)          += twinvqdec.o twinvq.o
 OBJS-$(CONFIG_TXD_DECODER)             += txd.o s3tc.o
 OBJS-$(CONFIG_ULTI_DECODER)            += ulti.o
@@ -388,13 +467,18 @@ OBJS-$(CONFIG_UTVIDEO_DECODER)         += utvideodec.o utvideo.o
 OBJS-$(CONFIG_UTVIDEO_ENCODER)         += utvideoenc.o utvideo.o
 OBJS-$(CONFIG_V210_DECODER)            += v210dec.o
 OBJS-$(CONFIG_V210_ENCODER)            += v210enc.o
+OBJS-$(CONFIG_V308_DECODER)            += v308dec.o
+OBJS-$(CONFIG_V308_ENCODER)            += v308enc.o
+OBJS-$(CONFIG_V408_DECODER)            += v408dec.o
+OBJS-$(CONFIG_V408_ENCODER)            += v408enc.o
 OBJS-$(CONFIG_V410_DECODER)            += v410dec.o
 OBJS-$(CONFIG_V410_ENCODER)            += v410enc.o
 OBJS-$(CONFIG_V210X_DECODER)           += v210x.o
 OBJS-$(CONFIG_VB_DECODER)              += vb.o
 OBJS-$(CONFIG_VBLE_DECODER)            += vble.o
 OBJS-$(CONFIG_VC1_DECODER)             += vc1dec.o vc1.o vc1data.o vc1dsp.o \
-                                          msmpeg4dec.o msmpeg4.o msmpeg4data.o
+                                          msmpeg4dec.o msmpeg4.o msmpeg4data.o \
+                                          wmv2dsp.o startcode.o
 OBJS-$(CONFIG_VCR1_DECODER)            += vcr1.o
 OBJS-$(CONFIG_VMDAUDIO_DECODER)        += vmdaudio.o
 OBJS-$(CONFIG_VMDVIDEO_DECODER)        += vmdvideo.o
@@ -410,11 +494,15 @@ OBJS-$(CONFIG_VP6_DECODER)             += vp6.o vp56.o vp56data.o vp56dsp.o \
                                           vp6dsp.o vp56rac.o
 OBJS-$(CONFIG_VP7_DECODER)             += vp8.o vp8dsp.o vp56rac.o
 OBJS-$(CONFIG_VP8_DECODER)             += vp8.o vp8dsp.o vp56rac.o
-OBJS-$(CONFIG_VP9_DECODER)             += vp9.o vp9data.o vp9dsp.o \
-                                          vp9block.o vp9prob.o vp9mvs.o vp56rac.o
+OBJS-$(CONFIG_VP9_DECODER)             += vp9.o vp9dsp.o vp56rac.o
+OBJS-$(CONFIG_VPLAYER_DECODER)         += textdec.o ass.o
 OBJS-$(CONFIG_VQA_DECODER)             += vqavideo.o
 OBJS-$(CONFIG_WAVPACK_DECODER)         += wavpack.o
-OBJS-$(CONFIG_WEBP_DECODER)            += webp.o
+OBJS-$(CONFIG_WAVPACK_ENCODER)         += wavpackenc.o
+OBJS-$(CONFIG_WEBP_DECODER)            += vp8.o vp8dsp.o vp56rac.o
+OBJS-$(CONFIG_WEBP_DECODER)            += webp.o exif.o tiff_common.o
+OBJS-$(CONFIG_WEBVTT_DECODER)          += webvttdec.o ass.o
+OBJS-$(CONFIG_WEBVTT_ENCODER)          += webvttenc.o ass_split.o
 OBJS-$(CONFIG_WMALOSSLESS_DECODER)     += wmalosslessdec.o wma_common.o
 OBJS-$(CONFIG_WMAPRO_DECODER)          += wmaprodec.o wma.o wma_common.o
 OBJS-$(CONFIG_WMAV1_DECODER)           += wmadec.o wma.o wma_common.o aactab.o
@@ -425,6 +513,7 @@ OBJS-$(CONFIG_WMAVOICE_DECODER)        += wmavoice.o \
                                           celp_filters.o \
                                           acelp_vectors.o acelp_filters.o
 OBJS-$(CONFIG_WMV1_DECODER)            += msmpeg4dec.o msmpeg4.o msmpeg4data.o
+OBJS-$(CONFIG_WMV1_ENCODER)            += msmpeg4enc.o
 OBJS-$(CONFIG_WMV2_DECODER)            += wmv2dec.o wmv2.o wmv2dsp.o \
                                           msmpeg4dec.o msmpeg4.o msmpeg4data.o
 OBJS-$(CONFIG_WMV2_ENCODER)            += wmv2enc.o wmv2.o wmv2dsp.o \
@@ -434,14 +523,21 @@ OBJS-$(CONFIG_WS_SND1_DECODER)         += ws-snd1.o
 OBJS-$(CONFIG_XAN_DPCM_DECODER)        += dpcm.o
 OBJS-$(CONFIG_XAN_WC3_DECODER)         += xan.o
 OBJS-$(CONFIG_XAN_WC4_DECODER)         += xxan.o
+OBJS-$(CONFIG_XBIN_DECODER)            += bintext.o cga_data.o
 OBJS-$(CONFIG_XBM_DECODER)             += xbmdec.o
 OBJS-$(CONFIG_XBM_ENCODER)             += xbmenc.o
+OBJS-$(CONFIG_XFACE_DECODER)           += xfacedec.o xface.o
+OBJS-$(CONFIG_XFACE_ENCODER)           += xfaceenc.o xface.o
 OBJS-$(CONFIG_XL_DECODER)              += xl.o
 OBJS-$(CONFIG_XSUB_DECODER)            += xsubdec.o
 OBJS-$(CONFIG_XSUB_ENCODER)            += xsubenc.o
 OBJS-$(CONFIG_XWD_DECODER)             += xwddec.o
 OBJS-$(CONFIG_XWD_ENCODER)             += xwdenc.o
+OBJS-$(CONFIG_Y41P_DECODER)            += y41pdec.o
+OBJS-$(CONFIG_Y41P_ENCODER)            += y41penc.o
 OBJS-$(CONFIG_YOP_DECODER)             += yop.o
+OBJS-$(CONFIG_YUV4_DECODER)            += yuv4dec.o
+OBJS-$(CONFIG_YUV4_ENCODER)            += yuv4enc.o
 OBJS-$(CONFIG_ZEROCODEC_DECODER)       += zerocodec.o
 OBJS-$(CONFIG_ZLIB_DECODER)            += lcldec.o
 OBJS-$(CONFIG_ZLIB_ENCODER)            += lclenc.o
@@ -466,12 +562,16 @@ OBJS-$(CONFIG_PCM_MULAW_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_MULAW_ENCODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S8_DECODER)             += pcm.o
 OBJS-$(CONFIG_PCM_S8_ENCODER)             += pcm.o
-OBJS-$(CONFIG_PCM_S8_PLANAR_DECODER)      += 8svx.o
+OBJS-$(CONFIG_PCM_S8_PLANAR_DECODER)      += pcm.o
+OBJS-$(CONFIG_PCM_S8_PLANAR_ENCODER)      += pcm.o
 OBJS-$(CONFIG_PCM_S16BE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S16BE_ENCODER)          += pcm.o
+OBJS-$(CONFIG_PCM_S16BE_PLANAR_DECODER)   += pcm.o
+OBJS-$(CONFIG_PCM_S16BE_PLANAR_ENCODER)   += pcm.o
 OBJS-$(CONFIG_PCM_S16LE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S16LE_ENCODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S16LE_PLANAR_DECODER)   += pcm.o
+OBJS-$(CONFIG_PCM_S16LE_PLANAR_ENCODER)   += pcm.o
 OBJS-$(CONFIG_PCM_S24BE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S24BE_ENCODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S24DAUD_DECODER)        += pcm.o
@@ -479,11 +579,13 @@ OBJS-$(CONFIG_PCM_S24DAUD_ENCODER)        += pcm.o
 OBJS-$(CONFIG_PCM_S24LE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S24LE_ENCODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S24LE_PLANAR_DECODER)   += pcm.o
+OBJS-$(CONFIG_PCM_S24LE_PLANAR_ENCODER)   += pcm.o
 OBJS-$(CONFIG_PCM_S32BE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S32BE_ENCODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S32LE_DECODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S32LE_ENCODER)          += pcm.o
 OBJS-$(CONFIG_PCM_S32LE_PLANAR_DECODER)   += pcm.o
+OBJS-$(CONFIG_PCM_S32LE_PLANAR_ENCODER)   += pcm.o
 OBJS-$(CONFIG_PCM_U8_DECODER)             += pcm.o
 OBJS-$(CONFIG_PCM_U8_ENCODER)             += pcm.o
 OBJS-$(CONFIG_PCM_U16BE_DECODER)          += pcm.o
@@ -503,7 +605,9 @@ OBJS-$(CONFIG_PCM_ZORK_DECODER)           += pcm.o
 OBJS-$(CONFIG_ADPCM_4XM_DECODER)          += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_ADX_DECODER)          += adxdec.o adx.o
 OBJS-$(CONFIG_ADPCM_ADX_ENCODER)          += adxenc.o adx.o
+OBJS-$(CONFIG_ADPCM_AFC_DECODER)          += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_CT_DECODER)           += adpcm.o adpcm_data.o
+OBJS-$(CONFIG_ADPCM_DTK_DECODER)          += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_EA_DECODER)           += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_EA_MAXIS_XA_DECODER)  += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_EA_R1_DECODER)        += adpcm.o adpcm_data.o
@@ -514,6 +618,7 @@ OBJS-$(CONFIG_ADPCM_G722_DECODER)         += g722.o g722dec.o
 OBJS-$(CONFIG_ADPCM_G722_ENCODER)         += g722.o g722enc.o
 OBJS-$(CONFIG_ADPCM_G726_DECODER)         += g726.o
 OBJS-$(CONFIG_ADPCM_G726_ENCODER)         += g726.o
+OBJS-$(CONFIG_ADPCM_G726LE_DECODER)       += g726.o
 OBJS-$(CONFIG_ADPCM_IMA_AMV_DECODER)      += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_APC_DECODER)      += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_DK3_DECODER)      += adpcm.o adpcm_data.o
@@ -521,8 +626,10 @@ OBJS-$(CONFIG_ADPCM_IMA_DK4_DECODER)      += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_EA_EACS_DECODER)  += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_EA_SEAD_DECODER)  += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_ISS_DECODER)      += adpcm.o adpcm_data.o
+OBJS-$(CONFIG_ADPCM_IMA_OKI_DECODER)      += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_QT_DECODER)       += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_QT_ENCODER)       += adpcmenc.o adpcm_data.o
+OBJS-$(CONFIG_ADPCM_IMA_RAD_DECODER)      += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_SMJPEG_DECODER)   += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_WAV_DECODER)      += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_IMA_WAV_ENCODER)      += adpcmenc.o adpcm_data.o
@@ -539,63 +646,76 @@ OBJS-$(CONFIG_ADPCM_VIMA_DECODER)         += vima.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_XA_DECODER)           += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_YAMAHA_DECODER)       += adpcm.o adpcm_data.o
 OBJS-$(CONFIG_ADPCM_YAMAHA_ENCODER)       += adpcmenc.o adpcm_data.o
+OBJS-$(CONFIG_VIMA_DECODER)               += vima.o adpcm_data.o
 
 # hardware accelerators
-OBJS-$(CONFIG_H263_VAAPI_HWACCEL)         += vaapi_mpeg4.o
+OBJS-$(CONFIG_H263_VAAPI_HWACCEL)         += vaapi_mpeg4.o vaapi_mpeg.o
 OBJS-$(CONFIG_H263_VDPAU_HWACCEL)         += vdpau_mpeg4.o
 OBJS-$(CONFIG_H264_DXVA2_HWACCEL)         += dxva2_h264.o
 OBJS-$(CONFIG_H264_VAAPI_HWACCEL)         += vaapi_h264.o
 OBJS-$(CONFIG_H264_VDA_HWACCEL)           += vda_h264.o
 OBJS-$(CONFIG_H264_VDPAU_HWACCEL)         += vdpau_h264.o
 OBJS-$(CONFIG_MPEG1_VDPAU_HWACCEL)        += vdpau_mpeg12.o
+OBJS-$(CONFIG_MPEG1_XVMC_HWACCEL)         += mpegvideo_xvmc.o
 OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL)        += dxva2_mpeg2.o
-OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)        += vaapi_mpeg2.o
+OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)        += vaapi_mpeg2.o vaapi_mpeg.o
 OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL)        += vdpau_mpeg12.o
-OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)        += vaapi_mpeg4.o
+OBJS-$(CONFIG_MPEG2_XVMC_HWACCEL)         += mpegvideo_xvmc.o
+OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)        += vaapi_mpeg4.o vaapi_mpeg.o
 OBJS-$(CONFIG_MPEG4_VDPAU_HWACCEL)        += vdpau_mpeg4.o
 OBJS-$(CONFIG_VC1_DXVA2_HWACCEL)          += dxva2_vc1.o
-OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
+OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o vaapi_mpeg.o
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
 
 # libavformat dependencies
 OBJS-$(CONFIG_ADTS_MUXER)              += mpeg4audio.o
 OBJS-$(CONFIG_CAF_DEMUXER)             += mpeg4audio.o mpegaudiodata.o  \
                                           ac3tab.o
-OBJS-$(CONFIG_FLAC_DEMUXER)            += flac.o flacdata.o             \
+OBJS-$(CONFIG_FLAC_DEMUXER)            += flac.o flacdata.o vorbis_data.o \
                                           vorbis_parser.o xiph.o
-OBJS-$(CONFIG_FLAC_MUXER)              += flac.o flacdata.o
+OBJS-$(CONFIG_FLAC_MUXER)              += flac.o flacdata.o vorbis_data.o
 OBJS-$(CONFIG_FLV_DEMUXER)             += mpeg4audio.o
 OBJS-$(CONFIG_GXF_DEMUXER)             += mpeg12data.o
 OBJS-$(CONFIG_IFF_DEMUXER)             += iff.o
 OBJS-$(CONFIG_ISMV_MUXER)              += mpeg4audio.o mpegaudiodata.o
 OBJS-$(CONFIG_LATM_MUXER)              += mpeg4audio.o
-OBJS-$(CONFIG_MATROSKA_AUDIO_MUXER)    += xiph.o mpeg4audio.o           \
+OBJS-$(CONFIG_MATROSKA_AUDIO_MUXER)    += xiph.o mpeg4audio.o vorbis_data.o \
                                           flac.o flacdata.o
 OBJS-$(CONFIG_MATROSKA_DEMUXER)        += mpeg4audio.o mpegaudiodata.o  \
                                           vorbis_parser.o xiph.o
 OBJS-$(CONFIG_MATROSKA_MUXER)          += mpeg4audio.o mpegaudiodata.o  \
-                                          flac.o flacdata.o xiph.o
+                                          flac.o flacdata.o vorbis_data.o xiph.o
 OBJS-$(CONFIG_MP2_MUXER)               += mpegaudiodata.o mpegaudiodecheader.o
 OBJS-$(CONFIG_MP3_MUXER)               += mpegaudiodata.o mpegaudiodecheader.o
 OBJS-$(CONFIG_MOV_DEMUXER)             += mpeg4audio.o mpegaudiodata.o ac3tab.o
 OBJS-$(CONFIG_MOV_MUXER)               += mpeg4audio.o mpegaudiodata.o
 OBJS-$(CONFIG_MPEGTS_MUXER)            += mpeg4audio.o
 OBJS-$(CONFIG_MPEGTS_DEMUXER)          += mpeg4audio.o mpegaudiodata.o
+OBJS-$(CONFIG_MXF_MUXER)               += dnxhddata.o
 OBJS-$(CONFIG_NUT_MUXER)               += mpegaudiodata.o
+OBJS-$(CONFIG_OGA_MUXER)               += xiph.o flac.o flacdata.o
 OBJS-$(CONFIG_OGG_DEMUXER)             += xiph.o flac.o flacdata.o     \
                                           mpeg12data.o vorbis_parser.o \
-                                          dirac.o
-OBJS-$(CONFIG_OGG_MUXER)               += xiph.o flac.o flacdata.o
+                                          dirac.o vorbis_data.o
+OBJS-$(CONFIG_OGG_MUXER)               += xiph.o flac.o flacdata.o \
+                                          vorbis_data.o
 OBJS-$(CONFIG_RTP_MUXER)               += mpeg4audio.o xiph.o
 OBJS-$(CONFIG_RTPDEC)                  += mjpeg.o
 OBJS-$(CONFIG_SPDIF_DEMUXER)           += aacadtsdec.o mpeg4audio.o
 OBJS-$(CONFIG_SPDIF_MUXER)             += dca.o
 OBJS-$(CONFIG_TAK_DEMUXER)             += tak.o
 OBJS-$(CONFIG_WEBM_MUXER)              += mpeg4audio.o mpegaudiodata.o  \
-                                          xiph.o flac.o flacdata.o
+                                          xiph.o flac.o flacdata.o \
+                                          vorbis_data.o
+OBJS-$(CONFIG_WEBM_DASH_MANIFEST_DEMUXER) += vorbis_parser.o xiph.o
 OBJS-$(CONFIG_WTV_DEMUXER)             += mpeg4audio.o mpegaudiodata.o
 
+# libavfilter dependencies
+OBJS-$(CONFIG_ELBG_FILTER)             += elbg.o
+
 # external codec libraries
+OBJS-$(CONFIG_LIBAACPLUS_ENCODER)         += libaacplus.o
+OBJS-$(CONFIG_LIBCELT_DECODER)            += libcelt_dec.o
 OBJS-$(CONFIG_LIBFAAC_ENCODER)            += libfaac.o
 OBJS-$(CONFIG_LIBFDK_AAC_DECODER)         += libfdk-aacdec.o
 OBJS-$(CONFIG_LIBFDK_AAC_ENCODER)         += libfdk-aacenc.o
@@ -619,14 +739,19 @@ OBJS-$(CONFIG_LIBSCHROEDINGER_DECODER)    += libschroedingerdec.o \
                                              libschroedinger.o
 OBJS-$(CONFIG_LIBSCHROEDINGER_ENCODER)    += libschroedingerenc.o \
                                              libschroedinger.o
+OBJS-$(CONFIG_LIBSHINE_ENCODER)           += libshine.o
 OBJS-$(CONFIG_LIBSPEEX_DECODER)           += libspeexdec.o
 OBJS-$(CONFIG_LIBSPEEX_ENCODER)           += libspeexenc.o
+OBJS-$(CONFIG_LIBSTAGEFRIGHT_H264_DECODER)+= libstagefright.o
 OBJS-$(CONFIG_LIBTHEORA_ENCODER)          += libtheoraenc.o
 OBJS-$(CONFIG_LIBTWOLAME_ENCODER)         += libtwolame.o
+OBJS-$(CONFIG_LIBUTVIDEO_DECODER)         += libutvideodec.o
+OBJS-$(CONFIG_LIBUTVIDEO_ENCODER)         += libutvideoenc.o
 OBJS-$(CONFIG_LIBVO_AACENC_ENCODER)       += libvo-aacenc.o mpeg4audio.o
 OBJS-$(CONFIG_LIBVO_AMRWBENC_ENCODER)     += libvo-amrwbenc.o
-OBJS-$(CONFIG_LIBVORBIS_ENCODER)          += libvorbis.o \
-                                             vorbis_data.o vorbis_parser.o
+OBJS-$(CONFIG_LIBVORBIS_DECODER)          += libvorbisdec.o
+OBJS-$(CONFIG_LIBVORBIS_ENCODER)          += libvorbisenc.o \
+                                             vorbis_data.o vorbis_parser.o xiph.o
 OBJS-$(CONFIG_LIBVPX_VP8_DECODER)         += libvpxdec.o
 OBJS-$(CONFIG_LIBVPX_VP8_ENCODER)         += libvpxenc.o
 OBJS-$(CONFIG_LIBVPX_VP9_DECODER)         += libvpxdec.o libvpx.o
@@ -637,6 +762,7 @@ OBJS-$(CONFIG_LIBX264_ENCODER)            += libx264.o
 OBJS-$(CONFIG_LIBX265_ENCODER)            += libx265.o
 OBJS-$(CONFIG_LIBXAVS_ENCODER)            += libxavs.o
 OBJS-$(CONFIG_LIBXVID_ENCODER)            += libxvid.o
+OBJS-$(CONFIG_LIBZVBI_TELETEXT_DECODER)   += libzvbi-teletextdec.o
 
 # parsers
 OBJS-$(CONFIG_AAC_PARSER)              += aac_parser.o aac_ac3_parser.o \
@@ -651,9 +777,12 @@ OBJS-$(CONFIG_COOK_PARSER)             += cook_parser.o
 OBJS-$(CONFIG_DCA_PARSER)              += dca_parser.o dca.o
 OBJS-$(CONFIG_DIRAC_PARSER)            += dirac_parser.o
 OBJS-$(CONFIG_DNXHD_PARSER)            += dnxhd_parser.o
+OBJS-$(CONFIG_DPX_PARSER)              += dpx_parser.o
 OBJS-$(CONFIG_DVBSUB_PARSER)           += dvbsub_parser.o
+OBJS-$(CONFIG_DVD_NAV_PARSER)          += dvd_nav_parser.o
 OBJS-$(CONFIG_DVDSUB_PARSER)           += dvdsub_parser.o
-OBJS-$(CONFIG_FLAC_PARSER)             += flac_parser.o flacdata.o flac.o
+OBJS-$(CONFIG_FLAC_PARSER)             += flac_parser.o flacdata.o flac.o \
+                                          vorbis_data.o
 OBJS-$(CONFIG_GSM_PARSER)              += gsm_parser.o
 OBJS-$(CONFIG_H261_PARSER)             += h261_parser.o
 OBJS-$(CONFIG_H263_PARSER)             += h263_parser.o
@@ -664,6 +793,7 @@ OBJS-$(CONFIG_MLP_PARSER)              += mlp_parser.o mlp.o
 OBJS-$(CONFIG_MPEG4VIDEO_PARSER)       += mpeg4video_parser.o h263.o \
                                           mpeg4videodec.o mpeg4video.o \
                                           ituh263dec.o h263dec.o
+OBJS-$(CONFIG_PNG_PARSER)              += png_parser.o
 OBJS-$(CONFIG_MPEGAUDIO_PARSER)        += mpegaudio_parser.o \
                                           mpegaudiodecheader.o mpegaudiodata.o
 OBJS-$(CONFIG_MPEGVIDEO_PARSER)        += mpegvideo_parser.o    \
@@ -674,12 +804,13 @@ OBJS-$(CONFIG_PNM_PARSER)              += pnm_parser.o pnm.o
 OBJS-$(CONFIG_RV30_PARSER)             += rv34_parser.o
 OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
 OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
-OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o vc1data.o \
+OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o vc1data.o vc1dsp.o \
                                           msmpeg4.o msmpeg4data.o mpeg4video.o \
-                                          h263.o
+                                          h263.o startcode.o
 OBJS-$(CONFIG_VORBIS_PARSER)           += vorbis_parser.o xiph.o
 OBJS-$(CONFIG_VP3_PARSER)              += vp3_parser.o
 OBJS-$(CONFIG_VP8_PARSER)              += vp8_parser.o
+OBJS-$(CONFIG_VP9_PARSER)              += vp9_parser.o
 
 # bitstream filters
 OBJS-$(CONFIG_AAC_ADTSTOASC_BSF)          += aac_adtstoasc_bsf.o aacadtsdec.o \
@@ -691,6 +822,8 @@ OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF)        += imx_dump_header_bsf.o
 OBJS-$(CONFIG_MJPEG2JPEG_BSF)             += mjpeg2jpeg_bsf.o mjpeg.o
 OBJS-$(CONFIG_MJPEGA_DUMP_HEADER_BSF)     += mjpega_dump_header_bsf.o
 OBJS-$(CONFIG_MOV2TEXTSUB_BSF)            += movsub_bsf.o
+OBJS-$(CONFIG_MP3_HEADER_DECOMPRESS_BSF)  += mp3_header_decompress_bsf.o \
+                                             mpegaudiodata.o
 OBJS-$(CONFIG_NOISE_BSF)                  += noise_bsf.o
 OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)       += remove_extradata_bsf.o
 OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
@@ -699,32 +832,48 @@ OBJS-$(CONFIG_TEXT2MOVSUB_BSF)            += movsub_bsf.o
 OBJS-$(HAVE_LIBC_MSVCRT)               += file_open.o
 OBJS-$(HAVE_THREADS)                   += pthread.o pthread_slice.o pthread_frame.o
 
+OBJS-$(CONFIG_FRAME_THREAD_ENCODER)    += frame_thread_encoder.o
+
+# Windows resource file
+SLIBOBJS-$(HAVE_GNU_WINDRES)           += avcodecres.o
+
 SKIPHEADERS                            += %_tablegen.h                  \
                                           %_tables.h                    \
                                           aac_tablegen_decl.h           \
                                           fft-internal.h                \
+                                          libutvideo.h                  \
+                                          old_codec_ids.h               \
                                           tableprint.h                  \
                                           $(ARCH)/vp56_arith.h          \
 
 SKIPHEADERS-$(CONFIG_DXVA2)            += dxva2.h dxva2_internal.h
 SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER)  += libschroedinger.h
-SKIPHEADERS-$(CONFIG_MPEG_XVMC_DECODER) += xvmc.h
+SKIPHEADERS-$(CONFIG_LIBUTVIDEO)       += libutvideo.h
+SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_internal.h
 SKIPHEADERS-$(CONFIG_VDA)              += vda.h vda_internal.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
 
-TESTPROGS-$(CONFIG_FFT)                   += fft fft-fixed
+TESTPROGS = imgconvert                                                  \
+
+TESTPROGS-$(CONFIG_CABAC)                 += cabac
+TESTPROGS-$(CONFIG_FFT)                   += fft fft-fixed fft-fixed32
 TESTPROGS-$(CONFIG_IDCTDSP)               += dct
 TESTPROGS-$(CONFIG_IIRFILTER)             += iirfilter
+TESTPROGS-$(HAVE_MMX)                     += motion
 TESTPROGS-$(CONFIG_GOLOMB)                += golomb
 TESTPROGS-$(CONFIG_RANGECODER)            += rangecoder
+TESTPROGS-$(CONFIG_SNOW_ENCODER)          += snowenc
 
 TESTOBJS = dctref.o
 
+TOOLS = fourcc2pixfmt
+
 HOSTPROGS = aac_tablegen                                                \
             aacps_tablegen                                              \
             cbrt_tablegen                                               \
             cos_tablegen                                                \
+            dsd_tablegen                                                \
             dv_tablegen                                                 \
             motionpixels_tablegen                                       \
             mpegaudio_tablegen                                          \
@@ -749,7 +898,7 @@ else
 $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=0
 endif
 
-GEN_HEADERS = cbrt_tables.h aacps_tables.h aac_tables.h dv_tables.h     \
+GEN_HEADERS = cbrt_tables.h aacps_tables.h aac_tables.h dsd_tables.h dv_tables.h     \
               sinewin_tables.h mpegaudio_tables.h motionpixels_tables.h \
               pcm_tables.h qdm2_tables.h
 GEN_HEADERS := $(addprefix $(SUBDIR), $(GEN_HEADERS))
@@ -761,6 +910,7 @@ ifdef CONFIG_HARDCODED_TABLES
 $(SUBDIR)aacdec.o: $(SUBDIR)cbrt_tables.h
 $(SUBDIR)aacps.o: $(SUBDIR)aacps_tables.h
 $(SUBDIR)aactab.o: $(SUBDIR)aac_tables.h
+$(SUBDIR)dsddec.o: $(SUBDIR)dsd_tables.h
 $(SUBDIR)dvenc.o: $(SUBDIR)dv_tables.h
 $(SUBDIR)sinewin.o: $(SUBDIR)sinewin_tables.h
 $(SUBDIR)mpegaudiodec_fixed.o: $(SUBDIR)mpegaudio_tables.h
diff --git a/libavcodec/a64colors.h b/libavcodec/a64colors.h
index d977426..a9cdb6f 100644
--- a/libavcodec/a64colors.h
+++ b/libavcodec/a64colors.h
@@ -2,20 +2,20 @@
  * a64 video encoder - c64 colors in rgb (Pepto)
  * Copyright (c) 2009 Tobias Bindhammer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/a64enc.h b/libavcodec/a64enc.h
deleted file mode 100644
index 65c1d30..0000000
--- a/libavcodec/a64enc.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * a64 video encoder - basic headers
- * Copyright (c) 2009 Tobias Bindhammer
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file
- * a64 video encoder - basic headers
- */
-
-#ifndef AVCODEC_A64ENC_H
-#define AVCODEC_A64ENC_H
-
-#include "libavutil/lfg.h"
-#include "avcodec.h"
-
-#define C64XRES 320
-#define C64YRES 200
-
-typedef struct A64Context {
-    /* variables for multicolor modes */
-    AVLFG randctx;
-    int mc_lifetime;
-    int mc_use_5col;
-    unsigned mc_frame_counter;
-    int *mc_meta_charset;
-    int *mc_charmap;
-    int *mc_best_cb;
-    int mc_luma_vals[5];
-    uint8_t *mc_charset;
-    uint8_t *mc_colram;
-    uint8_t *mc_palette;
-    int mc_pal_size;
-
-    /* pts of the next packet that will be output */
-    int64_t next_pts;
-} A64Context;
-
-#endif /* AVCODEC_A64ENC_H */
diff --git a/libavcodec/a64multienc.c b/libavcodec/a64multienc.c
index 786074f..fc00d3f 100644
--- a/libavcodec/a64multienc.c
+++ b/libavcodec/a64multienc.c
@@ -2,20 +2,20 @@
  * a64 video encoder - multicolor modes
  * Copyright (c) 2009 Tobias Bindhammer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,7 +24,6 @@
  * a64 video encoder - multicolor modes
  */
 
-#include "a64enc.h"
 #include "a64colors.h"
 #include "a64tables.h"
 #include "elbg.h"
@@ -37,6 +36,28 @@
 #define INTERLACED    1
 #define CROP_SCREENS  1
 
+#define C64XRES 320
+#define C64YRES 200
+
+typedef struct A64Context {
+    /* variables for multicolor modes */
+    AVLFG randctx;
+    int mc_lifetime;
+    int mc_use_5col;
+    unsigned mc_frame_counter;
+    int *mc_meta_charset;
+    int *mc_charmap;
+    int *mc_best_cb;
+    int mc_luma_vals[5];
+    uint8_t *mc_charset;
+    uint8_t *mc_colram;
+    uint8_t *mc_palette;
+    int mc_pal_size;
+
+    /* pts of the next packet that will be output */
+    int64_t next_pts;
+} A64Context;
+
 /* gray gradient */
 static const int mc_colors[5]={0x0,0xb,0xc,0xf,0x1};
 
@@ -166,11 +187,11 @@ static av_cold int a64multi_close_encoder(AVCodecContext *avctx)
 {
     A64Context *c = avctx->priv_data;
     av_frame_free(&avctx->coded_frame);
-    av_free(c->mc_meta_charset);
-    av_free(c->mc_best_cb);
-    av_free(c->mc_charset);
-    av_free(c->mc_charmap);
-    av_free(c->mc_colram);
+    av_freep(&c->mc_meta_charset);
+    av_freep(&c->mc_best_cb);
+    av_freep(&c->mc_charset);
+    av_freep(&c->mc_charmap);
+    av_freep(&c->mc_colram);
     return 0;
 }
 
@@ -199,9 +220,9 @@ static av_cold int a64multi_encode_init(AVCodecContext *avctx)
                            a64_palette[mc_colors[a]][2] * 0.11;
     }
 
-    if (!(c->mc_meta_charset = av_malloc(32000 * c->mc_lifetime * sizeof(int))) ||
+    if (!(c->mc_meta_charset = av_malloc_array(c->mc_lifetime, 32000 * sizeof(int))) ||
        !(c->mc_best_cb       = av_malloc(CHARSET_CHARS * 32 * sizeof(int)))     ||
-       !(c->mc_charmap       = av_mallocz(1000 * c->mc_lifetime * sizeof(int))) ||
+       !(c->mc_charmap       = av_mallocz_array(c->mc_lifetime, 1000 * sizeof(int))) ||
        !(c->mc_colram        = av_mallocz(CHARSET_CHARS * sizeof(uint8_t)))     ||
        !(c->mc_charset       = av_malloc(0x800 * (INTERLACED+1) * sizeof(uint8_t)))) {
         av_log(avctx, AV_LOG_ERROR, "Failed to allocate buffer memory.\n");
@@ -260,7 +281,7 @@ static int a64multi_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int b_width;
 
     int req_size, ret;
-    uint8_t *buf;
+    uint8_t *buf = NULL;
 
     int *charmap     = c->mc_charmap;
     uint8_t *colram  = c->mc_colram;
@@ -314,15 +335,13 @@ static int a64multi_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         /* any frames to encode? */
         if (c->mc_lifetime) {
             req_size = charset_size + c->mc_lifetime*(screen_size + colram_size);
-            if ((ret = ff_alloc_packet(pkt, req_size)) < 0) {
-                av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", req_size);
+            if ((ret = ff_alloc_packet2(avctx, pkt, req_size)) < 0)
                 return ret;
-            }
             buf = pkt->data;
 
             /* calc optimal new charset + charmaps */
-            ff_init_elbg(meta, 32, 1000 * c->mc_lifetime, best_cb, CHARSET_CHARS, 50, charmap, &c->randctx);
-            ff_do_elbg  (meta, 32, 1000 * c->mc_lifetime, best_cb, CHARSET_CHARS, 50, charmap, &c->randctx);
+            avpriv_init_elbg(meta, 32, 1000 * c->mc_lifetime, best_cb, CHARSET_CHARS, 50, charmap, &c->randctx);
+            avpriv_do_elbg  (meta, 32, 1000 * c->mc_lifetime, best_cb, CHARSET_CHARS, 50, charmap, &c->randctx);
 
             /* create colorram map and a c64 readable charset */
             render_charset(avctx, charset, colram);
@@ -332,7 +351,6 @@ static int a64multi_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
             /* advance pointers */
             buf      += charset_size;
-            charset  += charset_size;
         }
 
         /* write x frames to buf */
@@ -376,6 +394,7 @@ static int a64multi_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
+#if CONFIG_A64MULTI_ENCODER
 AVCodec ff_a64multi_encoder = {
     .name           = "a64multi",
     .long_name      = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64"),
@@ -388,7 +407,8 @@ AVCodec ff_a64multi_encoder = {
     .pix_fmts       = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
     .capabilities   = CODEC_CAP_DELAY,
 };
-
+#endif
+#if CONFIG_A64MULTI5_ENCODER
 AVCodec ff_a64multi5_encoder = {
     .name           = "a64multi5",
     .long_name      = NULL_IF_CONFIG_SMALL("Multicolor charset for Commodore 64, extended with 5th color (colram)"),
@@ -401,3 +421,4 @@ AVCodec ff_a64multi5_encoder = {
     .pix_fmts       = (const enum AVPixelFormat[]) {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
     .capabilities   = CODEC_CAP_DELAY,
 };
+#endif
diff --git a/libavcodec/a64tables.h b/libavcodec/a64tables.h
index b95c5ce..a955ef4 100644
--- a/libavcodec/a64tables.h
+++ b/libavcodec/a64tables.h
@@ -2,20 +2,20 @@
  * a64 video encoder - tables used by a64 encoders
  * Copyright (c) 2009 Tobias Bindhammer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index 375e6b1..1bcd95c 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -81,7 +81,7 @@ enum BandType {
     INTENSITY_BT   = 15,    ///< Scalefactor data are intensity stereo positions.
 };
 
-#define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10)
+#define IS_CODEBOOK_UNSIGNED(x) (((x) - 1) & 10)
 
 enum ChannelPosition {
     AAC_CHANNEL_OFF   = 0,
@@ -259,7 +259,8 @@ typedef struct ChannelElement {
 /**
  * main AAC context
  */
-typedef struct AACContext {
+struct AACContext {
+    AVClass        *class;
     AVCodecContext *avctx;
     AVFrame *frame;
 
@@ -303,9 +304,31 @@ typedef struct AACContext {
     SingleChannelElement *output_element[MAX_CHANNELS]; ///< Points to each SingleChannelElement
     /** @} */
 
+
+    /**
+     * @name Japanese DTV specific extension
+     * @{
+     */
+    int force_dmono_mode;///< 0->not dmono, 1->use first channel, 2->use second channel
+    int dmono_mode;      ///< 0->not dmono, 1->use first channel, 2->use second channel
+    /** @} */
+
     DECLARE_ALIGNED(32, float, temp)[128];
 
     OutputConfiguration oc[2];
-} AACContext;
+    int warned_num_aac_frames;
+
+    /* aacdec functions pointers */
+    void (*imdct_and_windowing)(AACContext *ac, SingleChannelElement *sce);
+    void (*apply_ltp)(AACContext *ac, SingleChannelElement *sce);
+    void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns,
+                      IndividualChannelStream *ics, int decode);
+    void (*windowing_and_mdct_ltp)(AACContext *ac, float *out,
+                                   float *in, IndividualChannelStream *ics);
+    void (*update_ltp)(AACContext *ac, SingleChannelElement *sce);
+
+};
+
+void ff_aacdec_init_mips(AACContext *c);
 
 #endif /* AVCODEC_AAC_H */
diff --git a/libavcodec/aac_ac3_parser.c b/libavcodec/aac_ac3_parser.c
index d3da9b7..7fefda5 100644
--- a/libavcodec/aac_ac3_parser.c
+++ b/libavcodec/aac_ac3_parser.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aac_ac3_parser.h b/libavcodec/aac_ac3_parser.h
index 99286f0..c2506a5 100644
--- a/libavcodec/aac_ac3_parser.h
+++ b/libavcodec/aac_ac3_parser.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aac_adtstoasc_bsf.c b/libavcodec/aac_adtstoasc_bsf.c
index bec6e7f..37ba5c1 100644
--- a/libavcodec/aac_adtstoasc_bsf.c
+++ b/libavcodec/aac_adtstoasc_bsf.c
@@ -2,20 +2,20 @@
  * MPEG-2/4 AAC ADTS to MPEG-4 Audio Specific Configuration bitstream filter
  * Copyright (c) 2009 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -87,6 +87,7 @@ static int aac_adtstoasc_filter(AVBitStreamFilterContext *bsfc,
             buf_size -= get_bits_count(&gb)/8;
             buf      += get_bits_count(&gb)/8;
         }
+        av_free(avctx->extradata);
         avctx->extradata_size = 2 + pce_size;
         avctx->extradata = av_mallocz(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
 
@@ -112,7 +113,7 @@ static int aac_adtstoasc_filter(AVBitStreamFilterContext *bsfc,
 }
 
 AVBitStreamFilter ff_aac_adtstoasc_bsf = {
-    "aac_adtstoasc",
-    sizeof(AACBSFContext),
-    aac_adtstoasc_filter,
+    .name           = "aac_adtstoasc",
+    .priv_data_size = sizeof(AACBSFContext),
+    .filter         = aac_adtstoasc_filter,
 };
diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c
index fdaa5f8..ab6ca4e 100644
--- a/libavcodec/aac_parser.c
+++ b/libavcodec/aac_parser.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aac_tablegen.c b/libavcodec/aac_tablegen.c
index b2c6c95..33a179f 100644
--- a/libavcodec/aac_tablegen.c
+++ b/libavcodec/aac_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aac_tablegen.h b/libavcodec/aac_tablegen.h
index 8a05ec5..1c19a15 100644
--- a/libavcodec/aac_tablegen.h
+++ b/libavcodec/aac_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aac_tablegen_decl.h b/libavcodec/aac_tablegen_decl.h
index a5fd1cf..5105dae 100644
--- a/libavcodec/aac_tablegen_decl.h
+++ b/libavcodec/aac_tablegen_decl.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aacadtsdec.c b/libavcodec/aacadtsdec.c
index 2994bce..d0814ac 100644
--- a/libavcodec/aacadtsdec.c
+++ b/libavcodec/aacadtsdec.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2003 Michael Niedermayer
  * Copyright (c) 2009 Alex Converse
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aacadtsdec.h b/libavcodec/aacadtsdec.h
index 6319efc..d0584ef 100644
--- a/libavcodec/aacadtsdec.h
+++ b/libavcodec/aacadtsdec.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 35b98a9..37548aa 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -2,20 +2,20 @@
  * AAC coefficients encoder
  * Copyright (C) 2008-2009 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -506,7 +506,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
             idx = cb;
     ppos = max_sfb;
     while (ppos > 0) {
-        assert(idx >= 0);
+        av_assert1(idx >= 0);
         cb = idx;
         stackrun[stack_len] = path[ppos][cb].run;
         stackcb [stack_len] = cb;
@@ -776,7 +776,6 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx,
         do {
             int prev = -1;
             tbits = 0;
-            fflag = 0;
             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
                 start = w*128;
                 for (g = 0;  g < sce->ics.num_swb; g++) {
@@ -875,7 +874,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
     } else {
         for (w = 0; w < 8; w++) {
             const float *coeffs = sce->coeffs + w*128;
-            start = 0;
+            curband = start = 0;
             for (i = 0; i < 128; i++) {
                 if (i - start >= sce->ics.swb_sizes[curband]) {
                     start += sce->ics.swb_sizes[curband];
@@ -953,7 +952,6 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
             }
             sce->zeroes[w*16+g] = 0;
             scf  = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218);
-            step = 16;
             for (;;) {
                 float dist = 0.0f;
                 int quant_max;
@@ -1112,26 +1110,26 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
     }
 }
 
-AACCoefficientsEncoder ff_aac_coders[] = {
-    {
+AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
+    [AAC_CODER_FAAC] = {
         search_for_quantizers_faac,
         encode_window_bands_info,
         quantize_and_encode_band,
         search_for_ms,
     },
-    {
+    [AAC_CODER_ANMR] = {
         search_for_quantizers_anmr,
         encode_window_bands_info,
         quantize_and_encode_band,
         search_for_ms,
     },
-    {
+    [AAC_CODER_TWOLOOP] = {
         search_for_quantizers_twoloop,
         codebook_trellis_rate,
         quantize_and_encode_band,
         search_for_ms,
     },
-    {
+    [AAC_CODER_FAST] = {
         search_for_quantizers_fast,
         encode_window_bands_info,
         quantize_and_encode_band,
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 4bdf52f..10c509b 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -8,20 +8,20 @@
  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
  * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -74,6 +74,7 @@
  * N                    SinuSoidal Coding (Transient, Sinusoid, Noise)
  * Y                    Parametric Stereo
  * N                    Direct Stream Transfer
+ * Y                    Enhanced AAC Low Delay (ER AAC ELD)
  *
  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
@@ -81,6 +82,7 @@
  */
 
 #include "libavutil/float_dsp.h"
+#include "libavutil/opt.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "get_bits.h"
@@ -108,12 +110,18 @@
 
 #if ARCH_ARM
 #   include "arm/aac.h"
+#elif ARCH_MIPS
+#   include "mips/aacdec_mips.h"
 #endif
 
 static VLC vlc_scalefactors;
 static VLC vlc_spectral[11];
 
-static const char overread_err[] = "Input buffer exhausted before END element found\n";
+static int output_configure(AACContext *ac,
+                            uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
+                            enum OCStatus oc_type, int get_new_frame);
+
+#define overread_err "Input buffer exhausted before END element found\n"
 
 static int count_channels(uint8_t (*layout)[3], int tags)
 {
@@ -130,7 +138,7 @@ static int count_channels(uint8_t (*layout)[3], int tags)
 /**
  * Check for the channel element in the current channel position configuration.
  * If it exists, make sure the appropriate element is allocated and map the
- * channel order to match the internal Libav channel layout.
+ * channel order to match the internal FFmpeg channel layout.
  *
  * @param   che_pos current channel position configuration
  * @param   type channel element type
@@ -152,6 +160,10 @@ static av_cold int che_configure(AACContext *ac,
             ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
         }
         if (type != TYPE_CCE) {
+            if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
+                av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
+                return AVERROR_INVALIDDATA;
+            }
             ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
             if (type == TYPE_CPE ||
                 (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
@@ -184,11 +196,12 @@ static int frame_configure_elements(AVCodecContext *avctx)
 
     /* get output buffer */
     av_frame_unref(ac->frame);
+    if (!avctx->channels)
+        return 1;
+
     ac->frame->nb_samples = 2048;
-    if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0)
         return ret;
-    }
 
     /* map output channel pointers to AVFrame data */
     for (ch = 0; ch < avctx->channels; ch++) {
@@ -427,6 +440,8 @@ static void pop_output_configuration(AACContext *ac) {
         ac->oc[1] = ac->oc[0];
         ac->avctx->channels = ac->oc[1].channels;
         ac->avctx->channel_layout = ac->oc[1].channel_layout;
+        output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
+                         ac->oc[1].status, 0);
     }
 }
 
@@ -472,7 +487,8 @@ static int output_configure(AACContext *ac,
     }
 
     memcpy(ac->tag_che_map, ac->che, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
-    avctx->channel_layout = ac->oc[1].channel_layout = layout;
+    if (layout) avctx->channel_layout = layout;
+                            ac->oc[1].channel_layout = layout;
     avctx->channels       = ac->oc[1].channels       = channels;
     ac->oc[1].status = oc_type;
 
@@ -484,6 +500,23 @@ static int output_configure(AACContext *ac,
     return 0;
 }
 
+static void flush(AVCodecContext *avctx)
+{
+    AACContext *ac= avctx->priv_data;
+    int type, i, j;
+
+    for (type = 3; type >= 0; type--) {
+        for (i = 0; i < MAX_ELEM_ID; i++) {
+            ChannelElement *che = ac->che[type][i];
+            if (che) {
+                for (j = 0; j <= 1; j++) {
+                    memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
+                }
+            }
+        }
+    }
+}
+
 /**
  * Set up channel positions based on a default channel configuration
  * as specified in table 1.17.
@@ -504,6 +537,25 @@ static int set_default_channel_config(AVCodecContext *avctx,
     *tags = tags_per_config[channel_config];
     memcpy(layout_map, aac_channel_layout_map[channel_config - 1],
            *tags * sizeof(*layout_map));
+
+    /*
+     * AAC specification has 7.1(wide) as a default layout for 8-channel streams.
+     * However, at least Nero AAC encoder encodes 7.1 streams using the default
+     * channel config 7, mapping the side channels of the original audio stream
+     * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD
+     * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding
+     * the incorrect streams as if they were correct (and as the encoder intended).
+     *
+     * As actual intended 7.1(wide) streams are very rare, default to assuming a
+     * 7.1 layout was intended.
+     */
+    if (channel_config == 7 && avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) {
+        av_log(avctx, AV_LOG_INFO, "Assuming an incorrectly encoded 7.1 channel layout"
+               " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode"
+               " according to the specification instead.\n", FF_COMPLIANCE_STRICT);
+        layout_map[2][2] = AAC_CHANNEL_SIDE;
+    }
+
     return 0;
 }
 
@@ -521,6 +573,8 @@ static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
         int layout_map_tags;
         push_output_configuration(ac);
 
+        av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
+
         if (set_default_channel_config(ac->avctx, layout_map,
                                        &layout_map_tags, 2) < 0)
             return NULL;
@@ -538,6 +592,8 @@ static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
         int layout_map_tags;
         push_output_configuration(ac);
 
+        av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
+
         if (set_default_channel_config(ac->avctx, layout_map,
                                        &layout_map_tags, 1) < 0)
             return NULL;
@@ -624,6 +680,8 @@ static void decode_channel_map(uint8_t layout_map[][3],
         case AAC_CHANNEL_LFE:
             syn_ele = TYPE_LFE;
             break;
+        default:
+            av_assert0(0);
         }
         layout_map[0][0] = syn_ele;
         layout_map[0][1] = get_bits(gb, 4);
@@ -669,6 +727,10 @@ static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
     if (get_bits1(gb))
         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
 
+    if (get_bits_left(gb) < 4 * (num_front + num_side + num_back + num_lfe + num_assoc_data + num_cc)) {
+        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
+        return -1;
+    }
     decode_channel_map(layout_map       , AAC_CHANNEL_FRONT, gb, num_front);
     tags = num_front;
     decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE,  gb, num_side);
@@ -688,7 +750,7 @@ static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
     /* comment field, first byte is length */
     comment_len = get_bits(gb, 8) * 8;
     if (get_bits_left(gb) < comment_len) {
-        av_log(avctx, AV_LOG_ERROR, overread_err);
+        av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
         return AVERROR_INVALIDDATA;
     }
     skip_bits_long(gb, comment_len);
@@ -862,9 +924,9 @@ static int decode_audio_specific_config(AACContext *ac,
     GetBitContext gb;
     int i, ret;
 
-    av_dlog(avctx, "extradata size %d\n", avctx->extradata_size);
-    for (i = 0; i < avctx->extradata_size; i++)
-        av_dlog(avctx, "%02x ", avctx->extradata[i]);
+    av_dlog(avctx, "audio specific config size %d\n", bit_size >> 3);
+    for (i = 0; i < bit_size >> 3; i++)
+        av_dlog(avctx, "%02x ", data[i]);
     av_dlog(avctx, "\n");
 
     if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
@@ -928,7 +990,7 @@ static int decode_audio_specific_config(AACContext *ac,
  *
  * @return  Returns a 32-bit pseudorandom integer
  */
-static av_always_inline int lcg_random(int previous_val)
+static av_always_inline int lcg_random(unsigned previous_val)
 {
     union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
     return v.s;
@@ -982,6 +1044,8 @@ static void reset_predictor_group(PredictorState *ps, int group_num)
                                     sizeof(ff_aac_spectral_codes[num][0]), \
         size);
 
+static void aacdec_init(AACContext *ac);
+
 static av_cold int aac_decode_init(AVCodecContext *avctx)
 {
     AACContext *ac = avctx->priv_data;
@@ -990,6 +1054,8 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
     ac->avctx = avctx;
     ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
 
+    aacdec_init(ac);
+
     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 
     if (avctx->extradata_size > 0) {
@@ -1028,6 +1094,11 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
         }
     }
 
+    if (avctx->channels > MAX_CHANNELS) {
+        av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     AAC_INIT_VLC_STATIC( 0, 304);
     AAC_INIT_VLC_STATIC( 1, 270);
     AAC_INIT_VLC_STATIC( 2, 550);
@@ -1088,7 +1159,7 @@ static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
         align_get_bits(gb);
 
     if (get_bits_left(gb) < 8 * count) {
-        av_log(ac->avctx, AV_LOG_ERROR, overread_err);
+        av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
         return AVERROR_INVALIDDATA;
     }
     skip_bits_long(gb, 8 * count);
@@ -1192,13 +1263,13 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
         if (ics->predictor_present) {
             if (aot == AOT_AAC_MAIN) {
                 if (decode_prediction(ac, ics, gb)) {
-                    return AVERROR_INVALIDDATA;
+                    goto fail;
                 }
             } else if (aot == AOT_AAC_LC ||
                        aot == AOT_ER_AAC_LC) {
                 av_log(ac->avctx, AV_LOG_ERROR,
                        "Prediction is not allowed in AAC-LC.\n");
-                return AVERROR_INVALIDDATA;
+                goto fail;
             } else {
                 if (aot == AOT_ER_AAC_LD) {
                     av_log(ac->avctx, AV_LOG_ERROR,
@@ -1216,10 +1287,13 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
                "Number of scalefactor bands in group (%d) "
                "exceeds limit (%d).\n",
                ics->max_sfb, ics->num_swb);
-        return AVERROR_INVALIDDATA;
+        goto fail;
     }
 
     return 0;
+fail:
+    ics->max_sfb = 0;
+    return AVERROR_INVALIDDATA;
 }
 
 /**
@@ -1250,7 +1324,7 @@ static int decode_band_types(AACContext *ac, enum BandType band_type[120],
                 sect_len_incr = get_bits(gb, bits);
                 sect_end += sect_len_incr;
                 if (get_bits_left(gb) < 0) {
-                    av_log(ac->avctx, AV_LOG_ERROR, overread_err);
+                    av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
                     return AVERROR_INVALIDDATA;
                 }
                 if (sect_end > ics->max_sfb) {
@@ -1352,12 +1426,12 @@ static int decode_pulses(Pulse *pulse, GetBitContext *gb,
         return -1;
     pulse->pos[0]    = swb_offset[pulse_swb];
     pulse->pos[0]   += get_bits(gb, 5);
-    if (pulse->pos[0] > 1023)
+    if (pulse->pos[0] >= swb_offset[num_swb])
         return -1;
     pulse->amp[0]    = get_bits(gb, 4);
     for (i = 1; i < pulse->num_pulse; i++) {
         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
-        if (pulse->pos[i] > 1023)
+        if (pulse->pos[i] >= swb_offset[num_swb])
             return -1;
         pulse->amp[i] = get_bits(gb, 4);
     }
@@ -1422,7 +1496,7 @@ static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
              idx++)
             cpe->ms_mask[idx] = get_bits1(gb);
     } else if (ms_present == 2) {
-        memset(cpe->ms_mask, 1, cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb * sizeof(cpe->ms_mask[0]));
+        memset(cpe->ms_mask, 1,  sizeof(cpe->ms_mask[0]) * cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb);
     }
 }
 
@@ -1872,7 +1946,7 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce,
             avpriv_request_sample(ac->avctx, "SSR");
             return AVERROR_PATCHWELCOME;
         }
-        // I see no textual basis in the spec for this occuring after SSR gain
+        // I see no textual basis in the spec for this occurring after SSR gain
         // control, but this is what both reference and real implmentations do
         if (tns->present && er_syntax)
             if (decode_tns(ac, tns, gb, ics) < 0)
@@ -2156,6 +2230,32 @@ static int decode_dynamic_range(DynamicRangeControl *che_drc,
     return n;
 }
 
+static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
+    uint8_t buf[256];
+    int i, major, minor;
+
+    if (len < 13+7*8)
+        goto unknown;
+
+    get_bits(gb, 13); len -= 13;
+
+    for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
+        buf[i] = get_bits(gb, 8);
+
+    buf[i] = 0;
+    if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
+
+    if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
+        ac->avctx->internal->skip_samples = 1024;
+    }
+
+unknown:
+    skip_bits_long(gb, len);
+
+    return 0;
+}
+
 /**
  * Decode extension data (incomplete); reference: table 4.51.
  *
@@ -2199,6 +2299,8 @@ static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
         res = decode_dynamic_range(&ac->che_drc, gb);
         break;
     case EXT_FILL:
+        decode_fill(ac, gb, 8 * cnt - 4);
+        break;
     case EXT_FILL_DATA:
     case EXT_DATA_ELEMENT:
     default:
@@ -2221,7 +2323,7 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns,
     int w, filt, m, i;
     int bottom, top, order, start, end, size, inc;
     float lpc[TNS_MAX_ORDER];
-    float tmp[TNS_MAX_ORDER + 1];
+    float tmp[TNS_MAX_ORDER+1];
 
     for (w = 0; w < ics->num_windows; w++) {
         bottom = ics->num_swb;
@@ -2313,10 +2415,10 @@ static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
             predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
         memset(&predTime[i], 0, (2048 - i) * sizeof(float));
 
-        windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
+        ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
 
         if (sce->tns.present)
-            apply_tns(predFreq, &sce->tns, &sce->ics, 0);
+            ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
 
         for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
             if (ltp->used[sfb])
@@ -2463,7 +2565,7 @@ static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce)
     // Inverse transform, mapped to the conventional IMDCT by
     // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
     // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
-    // Audio, Language and Image Processing, 2008. ICALIP 2008. International Conference on
+    // International Conference on Audio, Language and Image Processing, ICALIP 2008.
     // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
     for (i = 0; i < n2; i+=2) {
         float temp;
@@ -2609,7 +2711,7 @@ static void spectral_to_sample(AACContext *ac)
         imdct_and_window = imdct_and_windowing_eld;
         break;
     default:
-        imdct_and_window = imdct_and_windowing;
+        imdct_and_window = ac->imdct_and_windowing;
     }
     for (type = 3; type >= 0; type--) {
         for (i = 0; i < MAX_ELEM_ID; i++) {
@@ -2620,25 +2722,25 @@ static void spectral_to_sample(AACContext *ac)
                 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
                     if (che->ch[0].ics.predictor_present) {
                         if (che->ch[0].ics.ltp.present)
-                            apply_ltp(ac, &che->ch[0]);
+                            ac->apply_ltp(ac, &che->ch[0]);
                         if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
-                            apply_ltp(ac, &che->ch[1]);
+                            ac->apply_ltp(ac, &che->ch[1]);
                     }
                 }
                 if (che->ch[0].tns.present)
-                    apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
+                    ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
                 if (che->ch[1].tns.present)
-                    apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
+                    ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
                 if (type <= TYPE_CPE)
                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling);
                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
                     imdct_and_window(ac, &che->ch[0]);
                     if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
-                        update_ltp(ac, &che->ch[0]);
+                        ac->update_ltp(ac, &che->ch[0]);
                     if (type == TYPE_CPE) {
                         imdct_and_window(ac, &che->ch[1]);
                         if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
-                            update_ltp(ac, &che->ch[1]);
+                            ac->update_ltp(ac, &che->ch[1]);
                     }
                     if (ac->oc[1].m4ac.sbr > 0) {
                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
@@ -2660,10 +2762,12 @@ static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
 
     size = avpriv_aac_parse_header(gb, &hdr_info);
     if (size > 0) {
-        if (hdr_info.num_aac_frames != 1) {
+        if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
+            // This is 2 for "VLB " audio in NSV files.
+            // See samples/nsv/vlb_audio.
             avpriv_report_missing_feature(ac->avctx,
                                           "More than one AAC RDB per ADTS frame");
-            return AVERROR_PATCHWELCOME;
+            ac->warned_num_aac_frames = 1;
         }
         push_output_configuration(ac);
         if (hdr_info.chan_config) {
@@ -2679,6 +2783,21 @@ static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
                 return ret;
         } else {
             ac->oc[1].m4ac.chan_config = 0;
+            /**
+             * dual mono frames in Japanese DTV can have chan_config 0
+             * WITHOUT specifying PCE.
+             *  thus, set dual mono as default.
+             */
+            if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
+                layout_map_tags = 2;
+                layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
+                layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
+                layout_map[0][1] = 0;
+                layout_map[1][1] = 1;
+                if (output_configure(ac, layout_map, layout_map_tags,
+                                     OC_TRIAL_FRAME, 0))
+                    return -7;
+            }
         }
         ac->oc[1].m4ac.sample_rate     = hdr_info.sample_rate;
         ac->oc[1].m4ac.sampling_index  = hdr_info.sampling_index;
@@ -2761,13 +2880,14 @@ static int aac_decode_er_frame(AVCodecContext *avctx, void *data,
 }
 
 static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
-                                int *got_frame_ptr, GetBitContext *gb)
+                                int *got_frame_ptr, GetBitContext *gb, AVPacket *avpkt)
 {
     AACContext *ac = avctx->priv_data;
     ChannelElement *che = NULL, *che_prev = NULL;
     enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
     int err, elem_id;
     int samples = 0, multiplier, audio_found = 0, pce_found = 0;
+    int is_dmono, sce_count = 0;
 
     ac->frame = data;
 
@@ -2810,6 +2930,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
         case TYPE_SCE:
             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
             audio_found = 1;
+            sce_count++;
             break;
 
         case TYPE_CPE:
@@ -2842,9 +2963,10 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
             if (pce_found) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
-                pop_output_configuration(ac);
             } else {
                 err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
+                if (!err)
+                    ac->oc[1].m4ac.chan_config = 0;
                 pce_found = 1;
             }
             break;
@@ -2854,7 +2976,7 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
             if (elem_id == 15)
                 elem_id += get_bits(gb, 8) - 1;
             if (get_bits_left(gb) < 8 * elem_id) {
-                    av_log(avctx, AV_LOG_ERROR, overread_err);
+                    av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
                     err = AVERROR_INVALIDDATA;
                     goto fail;
             }
@@ -2892,12 +3014,31 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
         ac->oc[1].status = OC_LOCKED;
     }
 
+    if (multiplier) {
+        int side_size;
+        const uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
+        if (side && side_size>=4)
+            AV_WL32(side, 2*AV_RL32(side));
+    }
+
+    *got_frame_ptr = !!samples;
     if (samples) {
         ac->frame->nb_samples = samples;
         ac->frame->sample_rate = avctx->sample_rate;
-    }
+    } else
+        av_frame_unref(ac->frame);
     *got_frame_ptr = !!samples;
 
+    /* for dual-mono audio (SCE + SCE) */
+    is_dmono = ac->dmono_mode && sce_count == 2 &&
+               ac->oc[1].channel_layout == (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT);
+    if (is_dmono) {
+        if (ac->dmono_mode == 1)
+            ((AVFrame *)data)->data[1] =((AVFrame *)data)->data[0];
+        else if (ac->dmono_mode == 2)
+            ((AVFrame *)data)->data[0] =((AVFrame *)data)->data[1];
+    }
+
     return 0;
 fail:
     pop_output_configuration(ac);
@@ -2918,8 +3059,12 @@ static int aac_decode_frame(AVCodecContext *avctx, void *data,
     const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
                                        AV_PKT_DATA_NEW_EXTRADATA,
                                        &new_extradata_size);
+    int jp_dualmono_size;
+    const uint8_t *jp_dualmono   = av_packet_get_side_data(avpkt,
+                                       AV_PKT_DATA_JP_DUALMONO,
+                                       &jp_dualmono_size);
 
-    if (new_extradata) {
+    if (new_extradata && 0) {
         av_free(avctx->extradata);
         avctx->extradata = av_mallocz(new_extradata_size +
                                       FF_INPUT_BUFFER_PADDING_SIZE);
@@ -2936,6 +3081,15 @@ static int aac_decode_frame(AVCodecContext *avctx, void *data,
         }
     }
 
+    ac->dmono_mode = 0;
+    if (jp_dualmono && jp_dualmono_size > 0)
+        ac->dmono_mode =  1 + *jp_dualmono;
+    if (ac->force_dmono_mode >= 0)
+        ac->dmono_mode = ac->force_dmono_mode;
+
+    if (INT_MAX / 8 <= buf_size)
+        return AVERROR_INVALIDDATA;
+
     if ((err = init_get_bits(&gb, buf, buf_size * 8)) < 0)
         return err;
 
@@ -2947,7 +3101,7 @@ static int aac_decode_frame(AVCodecContext *avctx, void *data,
         err = aac_decode_er_frame(avctx, data, got_frame_ptr, &gb);
         break;
     default:
-        err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb);
+        err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb, avpkt);
     }
     if (err < 0)
         return err;
@@ -2985,7 +3139,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx)
 
 struct LATMContext {
     AACContext aac_ctx;     ///< containing AACContext
-    int initialized;        ///< initilized after a valid extradata was seen
+    int initialized;        ///< initialized after a valid extradata was seen
 
     // parser data
     int audio_mux_version_A; ///< LATM syntax version
@@ -3034,7 +3188,11 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
         ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
         ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
 
-        av_log(avctx, AV_LOG_INFO, "audio config changed\n");
+        if(latmctx->initialized) {
+            av_log(avctx, AV_LOG_INFO, "audio config changed\n");
+        } else {
+            av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
+        }
         latmctx->initialized = 0;
 
         esize = (bits_consumed+7) / 8;
@@ -3077,9 +3235,9 @@ static int read_stream_mux_config(struct LATMContext *latmctx,
             return AVERROR_PATCHWELCOME;
         }
 
-        // for each program (which there is only on in DVB)
+        // for each program (which there is only one in DVB)
 
-        // for each layer (which there is only on in DVB)
+        // for each layer (which there is only one in DVB)
         if (get_bits(gb, 3)) {                   // numLayer
             avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
             return AVERROR_PATCHWELCOME;
@@ -3192,7 +3350,7 @@ static int latm_decode_frame(AVCodecContext *avctx, void *out,
     int                 muxlength, err;
     GetBitContext       gb;
 
-    if ((err = init_get_bits(&gb, avpkt->data, avpkt->size * 8)) < 0)
+    if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
         return err;
 
     // check for LOAS sync word
@@ -3200,7 +3358,7 @@ static int latm_decode_frame(AVCodecContext *avctx, void *out,
         return AVERROR_INVALIDDATA;
 
     muxlength = get_bits(&gb, 13) + 3;
-    // not enough data, the parser should have sorted this
+    // not enough data, the parser should have sorted this out
     if (muxlength > avpkt->size)
         return AVERROR_INVALIDDATA;
 
@@ -3230,7 +3388,7 @@ static int latm_decode_frame(AVCodecContext *avctx, void *out,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb)) < 0)
+    if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt)) < 0)
         return err;
 
     return muxlength;
@@ -3247,6 +3405,40 @@ static av_cold int latm_decode_init(AVCodecContext *avctx)
     return ret;
 }
 
+static void aacdec_init(AACContext *c)
+{
+    c->imdct_and_windowing                      = imdct_and_windowing;
+    c->apply_ltp                                = apply_ltp;
+    c->apply_tns                                = apply_tns;
+    c->windowing_and_mdct_ltp                   = windowing_and_mdct_ltp;
+    c->update_ltp                               = update_ltp;
+
+    if(ARCH_MIPS)
+        ff_aacdec_init_mips(c);
+}
+/**
+ * AVOptions for Japanese DTV specific extensions (ADTS only)
+ */
+#define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
+static const AVOption options[] = {
+    {"dual_mono_mode", "Select the channel to decode for dual mono",
+     offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
+     AACDEC_FLAGS, "dual_mono_mode"},
+
+    {"auto", "autoselection",            0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
+    {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
+    {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
+    {"both", "Select both channels",     0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
+
+    {NULL},
+};
+
+static const AVClass aac_decoder_class = {
+    .class_name = "AAC decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_aac_decoder = {
     .name            = "aac",
@@ -3262,6 +3454,8 @@ AVCodec ff_aac_decoder = {
     },
     .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
     .channel_layouts = aac_channel_layout,
+    .flush = flush,
+    .priv_class      = &aac_decoder_class,
 };
 
 /*
@@ -3283,4 +3477,5 @@ AVCodec ff_aac_latm_decoder = {
     },
     .capabilities    = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1,
     .channel_layouts = aac_channel_layout,
+    .flush = flush,
 };
diff --git a/libavcodec/aacdectab.h b/libavcodec/aacdectab.h
index 4c23f2d..4a12b4f 100644
--- a/libavcodec/aacdectab.h
+++ b/libavcodec/aacdectab.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 55aa2f1..499aefb 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -2,20 +2,20 @@
  * AAC encoder
  * Copyright (C) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -145,7 +145,7 @@ static const uint8_t aac_chan_configs[6][5] = {
 };
 
 /**
- * Table to remap channels from Libav's default order to AAC order.
+ * Table to remap channels from libavcodec's default order to AAC order.
  */
 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
     { 0 },
@@ -384,8 +384,7 @@ static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
         for (i = 0; i < sce->ics.max_sfb; i++) {
             if (!sce->zeroes[w*16 + i]) {
                 diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
-                if (diff < 0 || diff > 120)
-                    av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
+                av_assert0(diff >= 0 && diff <= 120);
                 off = sce->sf_idx[w*16 + i];
                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
             }
@@ -478,7 +477,7 @@ static void put_bitstream_info(AACEncContext *s, const char *name)
 
 /*
  * Copy input samples.
- * Channels are reordered from Libav's default order to AAC order.
+ * Channels are reordered from libavcodec's default order to AAC order.
  */
 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
 {
@@ -571,11 +570,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         }
         start_ch += chans;
     }
-    if ((ret = ff_alloc_packet(avpkt, 768 * s->channels))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
         return ret;
-    }
-
     do {
         int frame_bits;
 
@@ -768,9 +764,12 @@ static av_cold int aac_encode_init(AVCodecContext *avctx)
     if (ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths, s->chan_map[0], grouping))
         goto fail;
     s->psypp = ff_psy_preprocess_init(avctx);
-    s->coder = &ff_aac_coders[2];
+    s->coder = &ff_aac_coders[s->options.aac_coder];
+
+    if (HAVE_MIPSDSPR1)
+        ff_aac_coder_init_mips(s);
 
-    s->lambda = avctx->global_quality ? avctx->global_quality : 120;
+    s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
 
     ff_aac_tableinit();
 
@@ -792,6 +791,11 @@ static const AVOption aacenc_options[] = {
         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
+    {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
+        {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
+        {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
+        {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
+        {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
     {NULL}
 };
 
@@ -802,6 +806,13 @@ static const AVClass aacenc_class = {
     LIBAVUTIL_VERSION_INT,
 };
 
+/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
+ * failures */
+static const int mpeg4audio_sample_rates[16] = {
+    96000, 88200, 64000, 48000, 44100, 32000,
+    24000, 22050, 16000, 12000, 11025, 8000, 7350
+};
+
 AVCodec ff_aac_encoder = {
     .name           = "aac",
     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
@@ -811,6 +822,7 @@ AVCodec ff_aac_encoder = {
     .init           = aac_encode_init,
     .encode2        = aac_encode_frame,
     .close          = aac_encode_end,
+    .supported_samplerates = mpeg4audio_sample_rates,
     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
                       CODEC_CAP_EXPERIMENTAL,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index dec445c..ecd6811 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -2,20 +2,20 @@
  * AAC encoder
  * Copyright (C) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,8 +30,18 @@
 #include "audio_frame_queue.h"
 #include "psymodel.h"
 
+typedef enum AACCoder {
+    AAC_CODER_FAAC = 0,
+    AAC_CODER_ANMR,
+    AAC_CODER_TWOLOOP,
+    AAC_CODER_FAST,
+
+    AAC_CODER_NB,
+}AACCoder;
+
 typedef struct AACEncOptions {
     int stereo_mode;
+    int aac_coder;
 } AACEncOptions;
 
 struct AACEncContext;
@@ -82,4 +92,6 @@ typedef struct AACEncContext {
 
 extern float ff_aac_pow34sf_tab[428];
 
+void ff_aac_coder_init_mips(AACEncContext *c);
+
 #endif /* AVCODEC_AACENC_H */
diff --git a/libavcodec/aacps.c b/libavcodec/aacps.c
index 8f55c7f..20012f9 100644
--- a/libavcodec/aacps.c
+++ b/libavcodec/aacps.c
@@ -2,20 +2,20 @@
  * MPEG-4 Parametric Stereo decoding functions
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -139,7 +139,7 @@ static int ps_read_extension_data(GetBitContext *gb, PSContext *ps, int ps_exten
     return get_bits_count(gb) - count;
 }
 
-static void ipdopd_reset(int8_t *opd_hist, int8_t *ipd_hist)
+static void ipdopd_reset(int8_t *ipd_hist, int8_t *opd_hist)
 {
     int i;
     for (i = 0; i < PS_MAX_NR_IPDOPD; i++) {
@@ -236,6 +236,7 @@ int ff_ps_read_data(AVCodecContext *avctx, GetBitContext *gb_host, PSContext *ps
     if (!ps->num_env || ps->border_position[ps->num_env] < numQMFSlots - 1) {
         //Create a fake envelope
         int source = ps->num_env ? ps->num_env - 1 : ps->num_env_old - 1;
+        int b;
         if (source >= 0 && source != ps->num_env) {
             if (ps->enable_iid) {
                 memcpy(ps->iid_par+ps->num_env, ps->iid_par+source, sizeof(ps->iid_par[0]));
@@ -248,6 +249,22 @@ int ff_ps_read_data(AVCodecContext *avctx, GetBitContext *gb_host, PSContext *ps
                 memcpy(ps->opd_par+ps->num_env, ps->opd_par+source, sizeof(ps->opd_par[0]));
             }
         }
+        if (ps->enable_iid){
+            for (b = 0; b < ps->nr_iid_par; b++) {
+                if (FFABS(ps->iid_par[ps->num_env][b]) > 7 + 8 * ps->iid_quant) {
+                    av_log(avctx, AV_LOG_ERROR, "iid_par invalid\n");
+                    goto err;
+                }
+            }
+        }
+        if (ps->enable_icc){
+            for (b = 0; b < ps->nr_iid_par; b++) {
+                if (ps->icc_par[ps->num_env][b] > 7U) {
+                    av_log(avctx, AV_LOG_ERROR, "icc_par invalid\n");
+                    goto err;
+                }
+            }
+        }
         ps->num_env++;
         ps->border_position[ps->num_env] = numQMFSlots - 1;
     }
@@ -415,6 +432,7 @@ static void hybrid_synthesis(PSDSPContext *dsp, float out[2][38][64],
 #define DECAY_SLOPE      0.05f
 /// Number of frequency bands that can be addressed by the parameter index, b(k)
 static const int   NR_PAR_BANDS[]      = { 20, 34 };
+static const int   NR_IPDOPD_BANDS[]   = { 11, 17 };
 /// Number of frequency bands that can be addressed by the sub subband index, k
 static const int   NR_BANDS[]          = { 71, 91 };
 /// Start frequency band for the all-pass filter decay slope
@@ -606,7 +624,6 @@ static void map_val_20_to_34(float par[PS_MAX_NR_IIDICC])
     par[ 3] =  par[ 2];
     par[ 2] =  par[ 1];
     par[ 1] = (par[ 0] + par[ 1]) * 0.5f;
-    par[ 0] =  par[ 0];
 }
 
 static void decorrelation(PSContext *ps, float (*out)[32][2], const float (*s)[32][2], int is34)
@@ -811,7 +828,8 @@ static void stereo_processing(PSContext *ps, float (*l)[32][2], float (*r)[32][2
             h12 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][1];
             h21 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][2];
             h22 = H_LUT[iid_mapped[e][b] + 7 + 23 * ps->iid_quant][icc_mapped[e][b]][3];
-            if (!PS_BASELINE && ps->enable_ipdopd && b < ps->nr_ipdopd_par) {
+
+            if (!PS_BASELINE && ps->enable_ipdopd && b < NR_IPDOPD_BANDS[is34]) {
                 //The spec say says to only run this smoother when enable_ipdopd
                 //is set but the reference decoder appears to run it constantly
                 float h11i, h12i, h21i, h22i;
diff --git a/libavcodec/aacps.h b/libavcodec/aacps.h
index e8a195a..29323ff 100644
--- a/libavcodec/aacps.h
+++ b/libavcodec/aacps.h
@@ -2,20 +2,20 @@
  * MPEG-4 Parametric Stereo definitions and declarations
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aacps_tablegen.c b/libavcodec/aacps_tablegen.c
index 537b6ba..47d4205 100644
--- a/libavcodec/aacps_tablegen.c
+++ b/libavcodec/aacps_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aacps_tablegen.h b/libavcodec/aacps_tablegen.h
index a53f9fa..9df38ff 100644
--- a/libavcodec/aacps_tablegen.h
+++ b/libavcodec/aacps_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aacpsdata.c b/libavcodec/aacpsdata.c
index 675bd8e..7431cae 100644
--- a/libavcodec/aacpsdata.c
+++ b/libavcodec/aacpsdata.c
@@ -2,20 +2,20 @@
  * MPEG-4 Parametric Stereo data tables
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aacpsdsp.c b/libavcodec/aacpsdsp.c
index 88e731f..5dc1a6a 100644
--- a/libavcodec/aacpsdsp.c
+++ b/libavcodec/aacpsdsp.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -211,4 +211,6 @@ av_cold void ff_psdsp_init(PSDSPContext *s)
 
     if (ARCH_ARM)
         ff_psdsp_init_arm(s);
+    if (ARCH_MIPS)
+        ff_psdsp_init_mips(s);
 }
diff --git a/libavcodec/aacpsdsp.h b/libavcodec/aacpsdsp.h
index dc380b1..0ef3023 100644
--- a/libavcodec/aacpsdsp.h
+++ b/libavcodec/aacpsdsp.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -49,5 +49,6 @@ typedef struct PSDSPContext {
 
 void ff_psdsp_init(PSDSPContext *s);
 void ff_psdsp_init_arm(PSDSPContext *s);
+void ff_psdsp_init_mips(PSDSPContext *s);
 
 #endif /* LIBAVCODEC_AACPSDSP_H */
diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c
index 66cf6d5..9eeb836 100644
--- a/libavcodec/aacpsy.c
+++ b/libavcodec/aacpsy.c
@@ -2,20 +2,20 @@
  * AAC encoder psychoacoustic model
  * Copyright (C) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,8 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/libm.h"
+
 #include "avcodec.h"
 #include "aactab.h"
 #include "psymodel.h"
@@ -216,6 +218,10 @@ static const float psy_fir_coeffs[] = {
     -5.52212e-17 * 2, -0.313819 * 2
 };
 
+#if ARCH_MIPS
+#   include "mips/aacpsy_mips.h"
+#endif /* ARCH_MIPS */
+
 /**
  * Calculate the ABR attack threshold from the above LAME psymodel table.
  */
@@ -294,7 +300,7 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
     int i, j, g, start;
     float prev, minscale, minath, minsnr, pe_min;
     const int chan_bitrate = ctx->avctx->bit_rate / ctx->avctx->channels;
-    const int bandwidth    = ctx->avctx->cutoff ? ctx->avctx->cutoff : ctx->avctx->sample_rate / 2;
+    const int bandwidth    = ctx->avctx->cutoff ? ctx->avctx->cutoff : AAC_CUTOFF(ctx->avctx);
     const float num_bark   = calc_bark((float)bandwidth);
 
     ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
@@ -335,7 +341,7 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
             coeff->spread_low[1] = pow(10.0, -bark_width * en_spread_low);
             coeff->spread_hi [1] = pow(10.0, -bark_width * en_spread_hi);
             pe_min = bark_pe * bark_width;
-            minsnr = pow(2.0f, pe_min / band_sizes[g]) - 1.5f;
+            minsnr = exp2(pe_min / band_sizes[g]) - 1.5f;
             coeff->min_snr = av_clipf(1.0f / minsnr, PSY_SNR_25DB, PSY_SNR_1DB);
         }
         start = 0;
@@ -348,7 +354,7 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
         }
     }
 
-    pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels);
+    pctx->ch = av_mallocz_array(ctx->avctx->channels, sizeof(AacPsyChannel));
 
     lame_window_init(pctx, ctx->avctx);
 
@@ -526,8 +532,11 @@ static float calc_reduction_3gpp(float a, float desired_pe, float pe,
 {
     float thr_avg, reduction;
 
-    thr_avg   = powf(2.0f, (a - pe) / (4.0f * active_lines));
-    reduction = powf(2.0f, (a - desired_pe) / (4.0f * active_lines)) - thr_avg;
+    if(active_lines == 0.0)
+        return 0;
+
+    thr_avg   = exp2f((a - pe) / (4.0f * active_lines));
+    reduction = exp2f((a - desired_pe) / (4.0f * active_lines)) - thr_avg;
 
     return FFMAX(reduction, 0.0f);
 }
@@ -538,8 +547,10 @@ static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
     float thr = band->thr;
 
     if (band->energy > thr) {
-        thr = powf(thr, 0.25f) + reduction;
-        thr = powf(thr, 4.0f);
+        thr = sqrtf(thr);
+        thr = sqrtf(thr) + reduction;
+        thr *= thr;
+        thr *= thr;
 
         /* This deviates from the 3GPP spec to match the reference encoder.
          * It performs min(thr_reduced, max(thr, energy/min_snr)) only for bands
@@ -555,6 +566,52 @@ static float calc_reduced_thr_3gpp(AacPsyBand *band, float min_snr,
     return thr;
 }
 
+#ifndef calc_thr_3gpp
+static void calc_thr_3gpp(const FFPsyWindowInfo *wi, const int num_bands, AacPsyChannel *pch,
+                          const uint8_t *band_sizes, const float *coefs)
+{
+    int i, w, g;
+    int start = 0;
+    for (w = 0; w < wi->num_windows*16; w += 16) {
+        for (g = 0; g < num_bands; g++) {
+            AacPsyBand *band = &pch->band[w+g];
+
+            float form_factor = 0.0f;
+            float Temp;
+            band->energy = 0.0f;
+            for (i = 0; i < band_sizes[g]; i++) {
+                band->energy += coefs[start+i] * coefs[start+i];
+                form_factor  += sqrtf(fabs(coefs[start+i]));
+            }
+            Temp = band->energy > 0 ? sqrtf((float)band_sizes[g] / band->energy) : 0;
+            band->thr      = band->energy * 0.001258925f;
+            band->nz_lines = form_factor * sqrtf(Temp);
+
+            start += band_sizes[g];
+        }
+    }
+}
+#endif /* calc_thr_3gpp */
+
+#ifndef psy_hp_filter
+static void psy_hp_filter(const float *firbuf, float *hpfsmpl, const float *psy_fir_coeffs)
+{
+    int i, j;
+    for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
+        float sum1, sum2;
+        sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2];
+        sum2 = 0.0;
+        for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
+            sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]);
+            sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]);
+        }
+        /* NOTE: The LAME psymodel expects it's input in the range -32768 to 32768.
+         *       Tuning this for normalized floats would be difficult. */
+        hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
+    }
+}
+#endif /* psy_hp_filter */
+
 /**
  * Calculate band thresholds as suggested in 3GPP TS26.403
  */
@@ -563,9 +620,8 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
 {
     AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
     AacPsyChannel *pch  = &pctx->ch[channel];
-    int start = 0;
     int i, w, g;
-    float desired_bits, desired_pe, delta_pe, reduction, spread_en[128] = {0};
+    float desired_bits, desired_pe, delta_pe, reduction= NAN, spread_en[128] = {0};
     float a = 0.0f, active_lines = 0.0f, norm_fac = 0.0f;
     float pe = pctx->chan_bitrate > 32000 ? 0.0f : FFMAX(50.0f, 100.0f - pctx->chan_bitrate * 100.0f / 32000.0f);
     const int      num_bands   = ctx->num_bands[wi->num_windows == 8];
@@ -574,22 +630,8 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
     const float avoid_hole_thr = wi->num_windows == 8 ? PSY_3GPP_AH_THR_SHORT : PSY_3GPP_AH_THR_LONG;
 
     //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
-    for (w = 0; w < wi->num_windows*16; w += 16) {
-        for (g = 0; g < num_bands; g++) {
-            AacPsyBand *band = &pch->band[w+g];
+    calc_thr_3gpp(wi, num_bands, pch, band_sizes, coefs);
 
-            float form_factor = 0.0f;
-            band->energy = 0.0f;
-            for (i = 0; i < band_sizes[g]; i++) {
-                band->energy += coefs[start+i] * coefs[start+i];
-                form_factor  += sqrtf(fabs(coefs[start+i]));
-            }
-            band->thr      = band->energy * 0.001258925f;
-            band->nz_lines = form_factor / powf(band->energy / band_sizes[g], 0.25f);
-
-            start += band_sizes[g];
-        }
-    }
     //modify thresholds and energies - spread, threshold in quiet, pre-echo control
     for (w = 0; w < wi->num_windows*16; w += 16) {
         AacPsyBand *bands = &pch->band[w];
@@ -705,7 +747,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel,
                         float delta_sfb_pe = band->norm_fac * norm_fac * delta_pe;
                         float thr = band->thr;
 
-                        thr *= powf(2.0f, delta_sfb_pe / band->active_lines);
+                        thr *= exp2f(delta_sfb_pe / band->active_lines);
                         if (thr > coeffs[g].min_snr * band->energy && band->avoid_holes == PSY_3GPP_AH_INACTIVE)
                             thr = FFMAX(band->thr, coeffs[g].min_snr * band->energy);
                         band->thr = thr;
@@ -795,21 +837,10 @@ static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, const float *audio,
         float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
         float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
         const float *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN);
-        int j, att_sum = 0;
+        int att_sum = 0;
 
         /* LAME comment: apply high pass filter of fs/4 */
-        for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
-            float sum1, sum2;
-            sum1 = firbuf[i + (PSY_LAME_FIR_LEN - 1) / 2];
-            sum2 = 0.0;
-            for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
-                sum1 += psy_fir_coeffs[j] * (firbuf[i + j] + firbuf[i + PSY_LAME_FIR_LEN - j]);
-                sum2 += psy_fir_coeffs[j + 1] * (firbuf[i + j + 1] + firbuf[i + PSY_LAME_FIR_LEN - j - 1]);
-            }
-            /* NOTE: The LAME psymodel expects its input in the range -32768 to
-             * 32768. Tuning this for normalized floats would be difficult. */
-            hpfsmpl[i] = (sum1 + sum2) * 32768.0f;
-        }
+        psy_hp_filter(firbuf, hpfsmpl, psy_fir_coeffs);
 
         /* Calculate the energies of each sub-shortblock */
         for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
index 4d2ac6c..290fb81 100644
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
  * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,13 +35,19 @@
 #include "sbrdsp.h"
 #include "libavutil/internal.h"
 #include "libavutil/libm.h"
+#include "libavutil/avassert.h"
 
 #include <stdint.h>
 #include <float.h>
+#include <math.h>
 
 #define ENVELOPE_ADJUSTMENT_OFFSET 2
 #define NOISE_FLOOR_OFFSET 6.0f
 
+#if ARCH_MIPS
+#include "mips/aacsbr_mips.h"
+#endif /* ARCH_MIPS */
+
 /**
  * SBR VLC tables
  */
@@ -85,6 +91,8 @@ static const int8_t vlc_sbr_lav[10] =
 #define SBR_VLC_ROW(name) \
     { name ## _codes, name ## _bits, sizeof(name ## _codes), sizeof(name ## _codes[0]) }
 
+static void aacsbr_func_ptr_init(AACSBRContext *c);
+
 av_cold void ff_aac_sbr_init(void)
 {
     int n;
@@ -140,6 +148,8 @@ static void sbr_turnoff(SpectralBandReplication *sbr) {
 
 av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr)
 {
+    if(sbr->mdct.mdct_bits)
+        return;
     sbr->kx[0] = sbr->kx[1];
     sbr_turnoff(sbr);
     sbr->data[0].synthesis_filterbank_samples_offset = SBR_SYNTHESIS_BUF_SIZE - (1280 - 128);
@@ -151,6 +161,7 @@ av_cold void ff_aac_sbr_ctx_init(AACContext *ac, SpectralBandReplication *sbr)
     ff_mdct_init(&sbr->mdct_ana, 7, 1, -2.0 * 32768.0);
     ff_ps_ctx_init(&sbr->ps);
     ff_sbrdsp_init(&sbr->dsp);
+    aacsbr_func_ptr_init(&sbr->c);
 }
 
 av_cold void ff_aac_sbr_ctx_close(SpectralBandReplication *sbr)
@@ -334,9 +345,6 @@ static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
     } else
         temp = 5000;
 
-    start_min = ((temp << 7) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
-    stop_min  = ((temp << 8) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
-
     switch (sbr->sample_rate) {
     case 16000:
         sbr_offset_ptr = sbr_offset[0];
@@ -362,6 +370,9 @@ static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
         return -1;
     }
 
+    start_min = ((temp << 7) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
+    stop_min  = ((temp << 8) + (sbr->sample_rate >> 1)) / sbr->sample_rate;
+
     sbr->k[0] = start_min + sbr_offset_ptr[spectrum->bs_start_freq];
 
     if (spectrum->bs_stop_freq < 14) {
@@ -388,6 +399,8 @@ static int sbr_make_f_master(AACContext *ac, SpectralBandReplication *sbr,
         max_qmf_subbands = 35;
     } else if (sbr->sample_rate >= 48000)
         max_qmf_subbands = 32;
+    else
+        av_assert0(0);
 
     if (sbr->k[2] - sbr->k[0] > max_qmf_subbands) {
         av_log(ac->avctx, AV_LOG_ERROR,
@@ -549,7 +562,7 @@ static int sbr_hf_calc_npatches(AACContext *ac, SpectralBandReplication *sbr)
             k = sbr->n_master;
     } while (sb != sbr->kx[1] + sbr->m[1]);
 
-    if (sbr->patch_num_subbands[sbr->num_patches-1] < 3 && sbr->num_patches > 1)
+    if (sbr->num_patches > 1 && sbr->patch_num_subbands[sbr->num_patches-1] < 3)
         sbr->num_patches--;
 
     return 0;
@@ -743,7 +756,7 @@ static int read_sbr_grid(AACContext *ac, SpectralBandReplication *sbr,
         if (ch_data->bs_frame_class == FIXFIX) {
             idx = ch_data->bs_num_env >> 1;
         } else if (ch_data->bs_frame_class & 1) { // FIXVAR or VARVAR
-            idx = ch_data->bs_num_env - FFMAX(bs_pointer - 1, 1);
+            idx = ch_data->bs_num_env - FFMAX((int)bs_pointer - 1, 1);
         } else { // VARFIX
             if (!bs_pointer)
                 idx = 1;
@@ -928,7 +941,9 @@ static void read_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
         }
         break;
     default:
-        avpriv_request_sample(ac->avctx, "Reserved SBR extensions");
+        // some files contain 0-padding
+        if (bs_extension_id || *num_bits_left > 16 || show_bits(gb, *num_bits_left))
+            avpriv_request_sample(ac->avctx, "Reserved SBR extensions");
         skip_bits_long(gb, *num_bits_left); // bs_fill_bits
         *num_bits_left = 0;
         break;
@@ -1117,7 +1132,12 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
             for (k = 0; k < sbr->n[sbr->data[0].bs_freq_res[e]]; k++) {
                 float temp1 = exp2f(sbr->data[0].env_facs[e][k] * alpha + 7.0f);
                 float temp2 = exp2f((pan_offset - sbr->data[1].env_facs[e][k]) * alpha);
-                float fac   = temp1 / (1.0f + temp2);
+                float fac;
+                if (temp1 > 1E20) {
+                    av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
+                    temp1 = 1;
+                }
+                fac   = temp1 / (1.0f + temp2);
                 sbr->data[0].env_facs[e][k] = fac;
                 sbr->data[1].env_facs[e][k] = fac * temp2;
             }
@@ -1126,7 +1146,12 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
             for (k = 0; k < sbr->n_q; k++) {
                 float temp1 = exp2f(NOISE_FLOOR_OFFSET - sbr->data[0].noise_facs[e][k] + 1);
                 float temp2 = exp2f(12 - sbr->data[1].noise_facs[e][k]);
-                float fac   = temp1 / (1.0f + temp2);
+                float fac;
+                if (temp1 > 1E20) {
+                    av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
+                    temp1 = 1;
+                }
+                fac = temp1 / (1.0f + temp2);
                 sbr->data[0].noise_facs[e][k] = fac;
                 sbr->data[1].noise_facs[e][k] = fac * temp2;
             }
@@ -1135,9 +1160,15 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
         for (ch = 0; ch < (id_aac == TYPE_CPE) + 1; ch++) {
             float alpha = sbr->data[ch].bs_amp_res ? 1.0f : 0.5f;
             for (e = 1; e <= sbr->data[ch].bs_num_env; e++)
-                for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++)
+                for (k = 0; k < sbr->n[sbr->data[ch].bs_freq_res[e]]; k++){
                     sbr->data[ch].env_facs[e][k] =
                         exp2f(alpha * sbr->data[ch].env_facs[e][k] + 6.0f);
+                    if (sbr->data[ch].env_facs[e][k] > 1E20) {
+                        av_log(NULL, AV_LOG_ERROR, "envelope scalefactor overflow in dequant\n");
+                        sbr->data[ch].env_facs[e][k] = 1;
+                    }
+                }
+
             for (e = 1; e <= sbr->data[ch].bs_num_noise; e++)
                 for (k = 0; k < sbr->n_q; k++)
                     sbr->data[ch].noise_facs[e][k] =
@@ -1152,6 +1183,7 @@ static void sbr_dequant(SpectralBandReplication *sbr, int id_aac)
  * @param   x       pointer to the beginning of the first sample window
  * @param   W       array of complex-valued samples split into subbands
  */
+#ifndef sbr_qmf_analysis
 static void sbr_qmf_analysis(AVFloatDSPContext *dsp, FFTContext *mdct,
                              SBRDSPContext *sbrdsp, const float *in, float *x,
                              float z[320], float W[2][32][32][2], int buf_idx)
@@ -1169,11 +1201,13 @@ static void sbr_qmf_analysis(AVFloatDSPContext *dsp, FFTContext *mdct,
         x += 32;
     }
 }
+#endif
 
 /**
  * Synthesis QMF Bank (14496-3 sp04 p206) and Downsampled Synthesis QMF Bank
  * (14496-3 sp04 p206)
  */
+#ifndef sbr_qmf_synthesis
 static void sbr_qmf_synthesis(FFTContext *mdct,
                               SBRDSPContext *sbrdsp, AVFloatDSPContext *dsp,
                               float *out, float X[2][38][64],
@@ -1219,6 +1253,7 @@ static void sbr_qmf_synthesis(FFTContext *mdct,
         out += 64 >> div;
     }
 }
+#endif
 
 /** High Frequency Generation (14496-3 sp04 p214+) and Inverse Filtering
  * (14496-3 sp04 p214)
@@ -1572,10 +1607,6 @@ static void sbr_hf_assemble(float Y1[38][64][2],
         0.11516383427084,
         0.03183050093751,
     };
-    static const int8_t phi[2][4] = {
-        {  1,  0, -1,  0}, // real
-        {  0,  1,  0, -1}, // imaginary
-    };
     float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
     int indexnoise = ch_data->f_indexnoise;
     int indexsine  = ch_data->f_indexsine;
@@ -1599,7 +1630,6 @@ static void sbr_hf_assemble(float Y1[38][64][2],
 
     for (e = 0; e < ch_data->bs_num_env; e++) {
         for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
-            int phi_sign = (1 - 2*(kx & 1));
             LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
             LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
             float *g_filt, *q_filt;
@@ -1629,13 +1659,17 @@ static void sbr_hf_assemble(float Y1[38][64][2],
                                                    q_filt, indexnoise,
                                                    kx, m_max);
             } else {
-                for (m = 0; m < m_max; m++) {
-                    Y1[i][m + kx][0] +=
-                        sbr->s_m[e][m] * phi[0][indexsine];
-                    Y1[i][m + kx][1] +=
-                        sbr->s_m[e][m] * (phi[1][indexsine] * phi_sign);
-                    phi_sign = -phi_sign;
+                int idx = indexsine&1;
+                int A = (1-((indexsine+(kx & 1))&2));
+                int B = (A^(-idx)) + idx;
+                float *out = &Y1[i][kx][idx];
+                float *in  = sbr->s_m[e];
+                for (m = 0; m+1 < m_max; m+=2) {
+                    out[2*m  ] += in[m  ] * A;
+                    out[2*m+2] += in[m+1] * B;
                 }
+                if(m_max&1)
+                    out[2*m  ] += in[m  ] * A;
             }
             indexnoise = (indexnoise + m_max) & 0x1ff;
             indexsine = (indexsine + 1) & 3;
@@ -1668,13 +1702,13 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
         sbr_qmf_analysis(&ac->fdsp, &sbr->mdct_ana, &sbr->dsp, ch ? R : L, sbr->data[ch].analysis_filterbank_samples,
                          (float*)sbr->qmf_filter_scratch,
                          sbr->data[ch].W, sbr->data[ch].Ypos);
-        sbr_lf_gen(ac, sbr, sbr->X_low,
-                   (const float (*)[32][32][2]) sbr->data[ch].W,
-                   sbr->data[ch].Ypos);
+        sbr->c.sbr_lf_gen(ac, sbr, sbr->X_low,
+                          (const float (*)[32][32][2]) sbr->data[ch].W,
+                          sbr->data[ch].Ypos);
         sbr->data[ch].Ypos ^= 1;
         if (sbr->start) {
-            sbr_hf_inverse_filter(&sbr->dsp, sbr->alpha0, sbr->alpha1,
-                                  (const float (*)[40][2]) sbr->X_low, sbr->k[0]);
+            sbr->c.sbr_hf_inverse_filter(&sbr->dsp, sbr->alpha0, sbr->alpha1,
+                                         (const float (*)[40][2]) sbr->X_low, sbr->k[0]);
             sbr_chirp(sbr, &sbr->data[ch]);
             sbr_hf_gen(ac, sbr, sbr->X_high,
                        (const float (*)[40][2]) sbr->X_low,
@@ -1688,7 +1722,7 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
             if (!err) {
                 sbr_env_estimate(sbr->e_curr, sbr->X_high, sbr, &sbr->data[ch]);
                 sbr_gain_calc(ac, sbr, &sbr->data[ch], sbr->data[ch].e_a);
-                sbr_hf_assemble(sbr->data[ch].Y[sbr->data[ch].Ypos],
+                sbr->c.sbr_hf_assemble(sbr->data[ch].Y[sbr->data[ch].Ypos],
                                 (const float (*)[40][2]) sbr->X_high,
                                 sbr, &sbr->data[ch],
                                 sbr->data[ch].e_a);
@@ -1696,7 +1730,7 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
         }
 
         /* synthesis */
-        sbr_x_gen(sbr, sbr->X[ch],
+        sbr->c.sbr_x_gen(sbr, sbr->X[ch],
                   (const float (*)[64][2]) sbr->data[ch].Y[1-sbr->data[ch].Ypos],
                   (const float (*)[64][2]) sbr->data[ch].Y[  sbr->data[ch].Ypos],
                   (const float (*)[40][2]) sbr->X_low, ch);
@@ -1723,3 +1757,14 @@ void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
                           &sbr->data[1].synthesis_filterbank_samples_offset,
                           downsampled);
 }
+
+static void aacsbr_func_ptr_init(AACSBRContext *c)
+{
+    c->sbr_lf_gen            = sbr_lf_gen;
+    c->sbr_hf_assemble       = sbr_hf_assemble;
+    c->sbr_x_gen             = sbr_x_gen;
+    c->sbr_hf_inverse_filter = sbr_hf_inverse_filter;
+
+    if(ARCH_MIPS)
+        ff_aacsbr_func_ptr_init_mips(c);
+}
diff --git a/libavcodec/aacsbr.h b/libavcodec/aacsbr.h
index 9bc5e29..f5e33ab 100644
--- a/libavcodec/aacsbr.h
+++ b/libavcodec/aacsbr.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
  * Copyright (c) 2010      Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -46,4 +46,6 @@ int ff_decode_sbr_extension(AACContext *ac, SpectralBandReplication *sbr,
 void ff_sbr_apply(AACContext *ac, SpectralBandReplication *sbr, int id_aac,
                   float* L, float *R);
 
+void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c);
+
 #endif /* AVCODEC_AACSBR_H */
diff --git a/libavcodec/aacsbrdata.h b/libavcodec/aacsbrdata.h
index f309059..12575ee 100644
--- a/libavcodec/aacsbrdata.h
+++ b/libavcodec/aacsbrdata.h
@@ -2,20 +2,20 @@
  * AAC Spectral Band Replication decoding data
  * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -352,7 +352,7 @@ static DECLARE_ALIGNED(32, float, sbr_qmf_window_us)[640] = {
      0.8537385600,
 };
 
-/* First two entries repeated at end to simplify SIMD implementations. */
+/* First eight entries repeated at end to simplify SIMD implementations. */
 const DECLARE_ALIGNED(16, float, ff_sbr_noise_table)[][2] = {
 {-0.99948153278296, -0.59483417516607}, { 0.97113454393991, -0.67528515225647},
 { 0.14130051758487, -0.95090983575689}, {-0.47005496701697, -0.37340549728647},
@@ -610,7 +610,11 @@ const DECLARE_ALIGNED(16, float, ff_sbr_noise_table)[][2] = {
 {-0.93412041758744,  0.41374052024363}, { 0.96063943315511,  0.93116709541280},
 { 0.97534253457837,  0.86150930812689}, { 0.99642466504163,  0.70190043427512},
 {-0.94705089665984, -0.29580042814306}, { 0.91599807087376, -0.98147830385781},
+// Start of duplicated table
 {-0.99948153278296, -0.59483417516607}, { 0.97113454393991, -0.67528515225647},
+{ 0.14130051758487, -0.95090983575689}, {-0.47005496701697, -0.37340549728647},
+{ 0.80705063769351,  0.29653668284408}, {-0.38981478896926,  0.89572605717087},
+{-0.01053049862020, -0.66959058036166}, {-0.91266367957293, -0.11522938140034},
 };
 
 #endif /* AVCODEC_AACSBRDATA_H */
diff --git a/libavcodec/aactab.c b/libavcodec/aactab.c
index ee9a735..eb882e8 100644
--- a/libavcodec/aactab.c
+++ b/libavcodec/aactab.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aactab.h b/libavcodec/aactab.h
index d19b3fd..5ed8508 100644
--- a/libavcodec/aactab.h
+++ b/libavcodec/aactab.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aandcttab.c b/libavcodec/aandcttab.c
index 0c5b573..97013d2 100644
--- a/libavcodec/aandcttab.c
+++ b/libavcodec/aandcttab.c
@@ -1,24 +1,24 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
- * AAN (Arai Agui Aakajima) (I)DCT tables
+ * AAN (Arai, Agui and Nakajima) (I)DCT tables
  */
 
 #include <stdint.h>
diff --git a/libavcodec/aandcttab.h b/libavcodec/aandcttab.h
index daccb7b..b0a2f44 100644
--- a/libavcodec/aandcttab.h
+++ b/libavcodec/aandcttab.h
@@ -1,24 +1,24 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
- * AAN (Arai Agui Nakajima) (I)DCT tables
+ * AAN (Arai, Agui and Nakajima) (I)DCT tables
  */
 
 #ifndef AVCODEC_AANDCTTAB_H
diff --git a/libavcodec/aarch64/asm-offsets.h b/libavcodec/aarch64/asm-offsets.h
index 45b5c40..8defd7c 100644
--- a/libavcodec/aarch64/asm-offsets.h
+++ b/libavcodec/aarch64/asm-offsets.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/cabac.h b/libavcodec/aarch64/cabac.h
index e12953e..6b9b77e 100644
--- a/libavcodec/aarch64/cabac.h
+++ b/libavcodec/aarch64/cabac.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/fft_init_aarch64.c b/libavcodec/aarch64/fft_init_aarch64.c
index 589e82d..8514d3b 100644
--- a/libavcodec/aarch64/fft_init_aarch64.c
+++ b/libavcodec/aarch64/fft_init_aarch64.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/fft_neon.S b/libavcodec/aarch64/fft_neon.S
index 5f88bed..54c13a4 100644
--- a/libavcodec/aarch64/fft_neon.S
+++ b/libavcodec/aarch64/fft_neon.S
@@ -8,20 +8,20 @@
  * This algorithm (though not any of the implementation details) is
  * based on libdjbfft by D. J. Bernstein.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/h264chroma_init_aarch64.c b/libavcodec/aarch64/h264chroma_init_aarch64.c
index c7679ab..2af62be 100644
--- a/libavcodec/aarch64/h264chroma_init_aarch64.c
+++ b/libavcodec/aarch64/h264chroma_init_aarch64.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised H.264 chroma functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/h264cmc_neon.S b/libavcodec/aarch64/h264cmc_neon.S
index d1025c7..486079f 100644
--- a/libavcodec/aarch64/h264cmc_neon.S
+++ b/libavcodec/aarch64/h264cmc_neon.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c
index b106f11..ed5e4bd 100644
--- a/libavcodec/aarch64/h264dsp_init_aarch64.c
+++ b/libavcodec/aarch64/h264dsp_init_aarch64.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/h264dsp_neon.S b/libavcodec/aarch64/h264dsp_neon.S
index 9b4610a..4ec35f2 100644
--- a/libavcodec/aarch64/h264dsp_neon.S
+++ b/libavcodec/aarch64/h264dsp_neon.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/h264idct_neon.S b/libavcodec/aarch64/h264idct_neon.S
index 99c2cb5..04b5a47 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/h264qpel_init_aarch64.c b/libavcodec/aarch64/h264qpel_init_aarch64.c
index 4beb11b..0578c01 100644
--- a/libavcodec/aarch64/h264qpel_init_aarch64.c
+++ b/libavcodec/aarch64/h264qpel_init_aarch64.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised DSP functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/h264qpel_neon.S b/libavcodec/aarch64/h264qpel_neon.S
index 731dc06..d27cfac 100644
--- a/libavcodec/aarch64/h264qpel_neon.S
+++ b/libavcodec/aarch64/h264qpel_neon.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/hpeldsp_init_aarch64.c b/libavcodec/aarch64/hpeldsp_init_aarch64.c
index 6bc4c09..144ae2b 100644
--- a/libavcodec/aarch64/hpeldsp_init_aarch64.c
+++ b/libavcodec/aarch64/hpeldsp_init_aarch64.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised DSP functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/hpeldsp_neon.S b/libavcodec/aarch64/hpeldsp_neon.S
index 2978290..a491c17 100644
--- a/libavcodec/aarch64/hpeldsp_neon.S
+++ b/libavcodec/aarch64/hpeldsp_neon.S
@@ -3,20 +3,20 @@
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  * Copyright (c) 2013 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/mdct_neon.S b/libavcodec/aarch64/mdct_neon.S
index bccd832..1fd199c 100644
--- a/libavcodec/aarch64/mdct_neon.S
+++ b/libavcodec/aarch64/mdct_neon.S
@@ -3,20 +3,20 @@
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/mpegaudiodsp_init.c b/libavcodec/aarch64/mpegaudiodsp_init.c
index a8b2baf..b945146 100644
--- a/libavcodec/aarch64/mpegaudiodsp_init.c
+++ b/libavcodec/aarch64/mpegaudiodsp_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/mpegaudiodsp_neon.S b/libavcodec/aarch64/mpegaudiodsp_neon.S
index 808576a..733fc84 100644
--- a/libavcodec/aarch64/mpegaudiodsp_neon.S
+++ b/libavcodec/aarch64/mpegaudiodsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/neon.S b/libavcodec/aarch64/neon.S
index f1072b7..619aec6 100644
--- a/libavcodec/aarch64/neon.S
+++ b/libavcodec/aarch64/neon.S
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/neontest.c b/libavcodec/aarch64/neontest.c
index 0414829..6e41f37 100644
--- a/libavcodec/aarch64/neontest.c
+++ b/libavcodec/aarch64/neontest.c
@@ -2,20 +2,20 @@
  * check NEON registers for clobbers
  * Copyright (c) 2013 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/opus_imdct_init.c b/libavcodec/aarch64/opus_imdct_init.c
index 1a776dc..3fa9a11 100644
--- a/libavcodec/aarch64/opus_imdct_init.c
+++ b/libavcodec/aarch64/opus_imdct_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/opus_imdct_neon.S b/libavcodec/aarch64/opus_imdct_neon.S
index 5f6c502..6234309 100644
--- a/libavcodec/aarch64/opus_imdct_neon.S
+++ b/libavcodec/aarch64/opus_imdct_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/rv40dsp_init_aarch64.c b/libavcodec/aarch64/rv40dsp_init_aarch64.c
index 0bb404f..764bc1e 100644
--- a/libavcodec/aarch64/rv40dsp_init_aarch64.c
+++ b/libavcodec/aarch64/rv40dsp_init_aarch64.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/vc1dsp_init_aarch64.c b/libavcodec/aarch64/vc1dsp_init_aarch64.c
index 11cd81e..e59e55e 100644
--- a/libavcodec/aarch64/vc1dsp_init_aarch64.c
+++ b/libavcodec/aarch64/vc1dsp_init_aarch64.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/videodsp.S b/libavcodec/aarch64/videodsp.S
index 7ce5a7d..24067cc 100644
--- a/libavcodec/aarch64/videodsp.S
+++ b/libavcodec/aarch64/videodsp.S
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/videodsp_init.c b/libavcodec/aarch64/videodsp_init.c
index 59b697d..6f667a6 100644
--- a/libavcodec/aarch64/videodsp_init.c
+++ b/libavcodec/aarch64/videodsp_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/vorbisdsp_init.c b/libavcodec/aarch64/vorbisdsp_init.c
index 3559b54..c796f95 100644
--- a/libavcodec/aarch64/vorbisdsp_init.c
+++ b/libavcodec/aarch64/vorbisdsp_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aarch64/vorbisdsp_neon.S b/libavcodec/aarch64/vorbisdsp_neon.S
index 11f71f1..e76feeb 100644
--- a/libavcodec/aarch64/vorbisdsp_neon.S
+++ b/libavcodec/aarch64/vorbisdsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aasc.c b/libavcodec/aasc.c
index 468e394..38658f8 100644
--- a/libavcodec/aasc.c
+++ b/libavcodec/aasc.c
@@ -2,20 +2,20 @@
  * Autodesk RLE Decoder
  * Copyright (C) 2005 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,15 +36,39 @@ typedef struct AascContext {
     AVCodecContext *avctx;
     GetByteContext gb;
     AVFrame *frame;
+
+    uint32_t palette[AVPALETTE_COUNT];
+    int palette_size;
 } AascContext;
 
 static av_cold int aasc_decode_init(AVCodecContext *avctx)
 {
     AascContext *s = avctx->priv_data;
+    uint8_t *ptr;
+    int i;
 
     s->avctx = avctx;
-
-    avctx->pix_fmt = AV_PIX_FMT_BGR24;
+    switch (avctx->bits_per_coded_sample) {
+    case 8:
+        avctx->pix_fmt = AV_PIX_FMT_PAL8;
+
+        ptr = avctx->extradata;
+        s->palette_size = FFMIN(avctx->extradata_size, AVPALETTE_SIZE);
+        for (i = 0; i < s->palette_size / 4; i++) {
+            s->palette[i] = 0xFFU << 24 | AV_RL32(ptr);
+            ptr += 4;
+        }
+        break;
+    case 16:
+        avctx->pix_fmt = AV_PIX_FMT_RGB555LE;
+        break;
+    case 24:
+        avctx->pix_fmt = AV_PIX_FMT_BGR24;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", avctx->bits_per_coded_sample);
+        return -1;
+    }
 
     s->frame = av_frame_alloc();
     if (!s->frame)
@@ -60,27 +84,35 @@ static int aasc_decode_frame(AVCodecContext *avctx,
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
     AascContext *s     = avctx->priv_data;
-    int compr, i, stride, ret;
+    int compr, i, stride, psize, ret;
 
-    if (buf_size < 4)
+    if (buf_size < 4) {
+        av_log(avctx, AV_LOG_ERROR, "frame too short\n");
         return AVERROR_INVALIDDATA;
+    }
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     compr     = AV_RL32(buf);
     buf      += 4;
     buf_size -= 4;
+    psize = avctx->bits_per_coded_sample / 8;
+    switch (avctx->codec_tag) {
+    case MKTAG('A', 'A', 'S', '4'):
+        bytestream2_init(&s->gb, buf - 4, buf_size + 4);
+        ff_msrle_decode(avctx, (AVPicture*)s->frame, 8, &s->gb);
+        break;
+    case MKTAG('A', 'A', 'S', 'C'):
     switch (compr) {
     case 0:
-        stride = (avctx->width * 3 + 3) & ~3;
+        stride = (avctx->width * psize + psize) & ~psize;
         if (buf_size < stride * avctx->height)
             return AVERROR_INVALIDDATA;
         for (i = avctx->height - 1; i >= 0; i--) {
-            memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * 3);
+            memcpy(s->frame->data[0] + i * s->frame->linesize[0], buf, avctx->width * psize);
             buf += stride;
+            buf_size -= stride;
         }
         break;
     case 1:
@@ -91,6 +123,14 @@ static int aasc_decode_frame(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
         return AVERROR_INVALIDDATA;
     }
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unknown FourCC: %X\n", avctx->codec_tag);
+        return -1;
+    }
+
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8)
+        memcpy(s->frame->data[1], s->palette, s->palette_size);
 
     *got_frame = 1;
     if ((ret = av_frame_ref(data, s->frame)) < 0)
diff --git a/libavcodec/ac3.c b/libavcodec/ac3.c
index 99e5b50..29e132f 100644
--- a/libavcodec/ac3.c
+++ b/libavcodec/ac3.c
@@ -2,20 +2,20 @@
  * Common code between the AC-3 encoder and decoder
  * Copyright (c) 2000 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
index f2cb6c3..542f79d 100644
--- a/libavcodec/ac3.h
+++ b/libavcodec/ac3.h
@@ -2,20 +2,20 @@
  * Common code between the AC-3 encoder and decoder
  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,6 +39,8 @@
 #define AC3_CRITICAL_BANDS 50
 #define AC3_MAX_CPL_BANDS  18
 
+#include "libavutil/opt.h"
+#include "avcodec.h"
 #include "ac3tab.h"
 
 /* exponent encoding strategy */
@@ -49,6 +51,52 @@
 #define EXP_D25   2
 #define EXP_D45   3
 
+#ifndef USE_FIXED
+#define USE_FIXED 0
+#endif
+
+#if USE_FIXED
+
+#define FFT_FLOAT 0
+
+#define FIXR(a)                 ((int)((a) * 0 + 0.5))
+#define FIXR12(a)               ((int)((a) * 4096 + 0.5))
+#define FIXR15(a)               ((int)((a) * 32768 + 0.5))
+#define ROUND15(x)              ((x) + 16384) >> 15
+
+#define AC3_RENAME(x)           x ## _fixed
+#define AC3_NORM(norm)          (1<<24)/(norm)
+#define AC3_MUL(a,b)            ((((int64_t) (a)) * (b))>>12)
+#define AC3_RANGE(x)            (x)
+#define AC3_DYNAMIC_RANGE(x)    (x)
+#define AC3_SPX_BLEND(x)        (x)
+#define AC3_DYNAMIC_RANGE1      0
+
+#define INTFLOAT                int
+#define SHORTFLOAT              int16_t
+
+#else /* USE_FIXED */
+
+#define FIXR(x)                 ((float)(x))
+#define FIXR12(x)               ((float)(x))
+#define FIXR15(x)               ((float)(x))
+#define ROUND15(x)              (x)
+
+#define AC3_RENAME(x)           x
+#define AC3_NORM(norm)          (1.0f/(norm))
+#define AC3_MUL(a,b)            ((a) * (b))
+#define AC3_RANGE(x)            (dynamic_range_tab[(x)])
+#define AC3_DYNAMIC_RANGE(x)    (powf(x,  s->drc_scale))
+#define AC3_SPX_BLEND(x)        (x)* (1.0f/32)
+#define AC3_DYNAMIC_RANGE1      1.0f
+
+#define INTFLOAT                float
+#define SHORTFLOAT              float
+
+#endif /* USE_FIXED */
+
+#define AC3_LEVEL(x)            ROUND15((x) * FIXR15(0.7071067811865476))
+
 /* pre-defined gain values */
 #define LEVEL_PLUS_3DB          1.4142135623730950
 #define LEVEL_PLUS_1POINT5DB    1.1892071150027209
@@ -140,7 +188,9 @@ typedef struct AC3HeaderInfo {
     int surround_mix_level;                 ///< Surround mix level index
     uint16_t channel_map;
     int num_blocks;                         ///< number of audio blocks
+#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
     int dolby_surround_mode;
+#endif
     /** @} */
 
     /** @name Derived values
@@ -153,6 +203,9 @@ typedef struct AC3HeaderInfo {
     uint16_t frame_size;
     uint64_t channel_layout;
     /** @} */
+#if !AV_HAVE_INCOMPATIBLE_LIBAV_ABI
+    int dolby_surround_mode;
+#endif
 } AC3HeaderInfo;
 
 typedef enum {
diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c
index 5ea09f8..dd6d77c 100644
--- a/libavcodec/ac3_parser.c
+++ b/libavcodec/ac3_parser.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,9 +47,16 @@ static const uint8_t center_levels[4] = { 4, 5, 6, 5 };
 static const uint8_t surround_levels[4] = { 4, 6, 7, 6 };
 
 
-int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
+int avpriv_ac3_parse_header2(GetBitContext *gbc, AC3HeaderInfo **phdr)
 {
     int frame_size_code;
+    AC3HeaderInfo *hdr;
+
+    if (!*phdr)
+        *phdr = av_mallocz(sizeof(AC3HeaderInfo));
+    if (!*phdr)
+        return AVERROR(ENOMEM);
+    hdr = *phdr;
 
     memset(hdr, 0, sizeof(*hdr));
 
@@ -144,6 +151,15 @@ int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
     return 0;
 }
 
+int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
+{
+    AC3HeaderInfo tmp, *ptmp = &tmp;
+    int ret = avpriv_ac3_parse_header2(gbc, &ptmp);
+
+    memcpy(hdr, ptmp, ((intptr_t)&tmp.channel_layout) - ((intptr_t)&tmp) + sizeof(uint64_t));
+    return ret;
+}
+
 static int ac3_sync(uint64_t state, AACAC3ParseContext *hdr_info,
         int *need_next_header, int *new_frame_start)
 {
@@ -152,11 +168,11 @@ static int ac3_sync(uint64_t state, AACAC3ParseContext *hdr_info,
         uint64_t u64;
         uint8_t  u8[8];
     } tmp = { av_be2ne64(state) };
-    AC3HeaderInfo hdr;
+    AC3HeaderInfo hdr, *phdr = &hdr;
     GetBitContext gbc;
 
     init_get_bits(&gbc, tmp.u8+8-AC3_HEADER_SIZE, 54);
-    err = avpriv_ac3_parse_header(&gbc, &hdr);
+    err = avpriv_ac3_parse_header2(&gbc, &phdr);
 
     if(err < 0)
         return 0;
diff --git a/libavcodec/ac3_parser.h b/libavcodec/ac3_parser.h
index 9322550..f37387d 100644
--- a/libavcodec/ac3_parser.h
+++ b/libavcodec/ac3_parser.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,11 +31,14 @@
  * Parse the header up to the lfeon element, which is the first 52 or 54 bits
  * depending on the audio coding mode.
  * @param[in]  gbc BitContext containing the first 54 bits of the frame.
- * @param[out] hdr Pointer to struct where header info is written.
+ * @param[out] hdr Pointer to Pointer to struct where header info is written.
+ *                 will be allocated if NULL
  * @return Returns 0 on success, -1 if there is a sync word mismatch,
  * -2 if the bsid (version) element is invalid, -3 if the fscod (sample rate)
  * element is invalid, or -4 if the frmsizecod (bit rate) element is invalid.
  */
+int avpriv_ac3_parse_header2(GetBitContext *gbc, AC3HeaderInfo **hdr);
+
 int avpriv_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr);
 
 #endif /* AVCODEC_AC3_PARSER_H */
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 4876ac0..72c4185 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -7,20 +7,20 @@
  * Copyright (c) 2007-2008 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
  * Copyright (c) 2007 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -180,14 +180,23 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     ac3_tables_init();
     ff_mdct_init(&s->imdct_256, 8, 1, 1.0);
     ff_mdct_init(&s->imdct_512, 9, 1, 1.0);
-    ff_kbd_window_init(s->window, 5.0, 256);
+    AC3_RENAME(ff_kbd_window_init)(s->window, 5.0, 256);
     ff_bswapdsp_init(&s->bdsp);
+
+#if (USE_FIXED)
+    s->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & CODEC_FLAG_BITEXACT);
+#else
     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
+#endif
+
     ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT);
     ff_fmt_convert_init(&s->fmt_conv, avctx);
     av_lfg_init(&s->dith_state, 0);
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+    if (USE_FIXED)
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
+    else
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 
     /* allow downmixing to stereo or mono */
 #if FF_API_REQUEST_CHANNELS
@@ -275,10 +284,10 @@ static int ac3_parse_header(AC3DecodeContext *s)
  */
 static int parse_frame_header(AC3DecodeContext *s)
 {
-    AC3HeaderInfo hdr;
+    AC3HeaderInfo hdr, *phdr=&hdr;
     int err;
 
-    err = avpriv_ac3_parse_header(&s->gbc, &hdr);
+    err = avpriv_ac3_parse_header2(&s->gbc, &phdr);
     if (err)
         return err;
 
@@ -346,40 +355,45 @@ static void set_downmix_coeffs(AC3DecodeContext *s)
     float cmix = gain_levels[s->  center_mix_level];
     float smix = gain_levels[s->surround_mix_level];
     float norm0, norm1;
+    float downmix_coeffs[AC3_MAX_CHANNELS][2];
 
     for (i = 0; i < s->fbw_channels; i++) {
-        s->downmix_coeffs[i][0] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]];
-        s->downmix_coeffs[i][1] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]];
+        downmix_coeffs[i][0] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]];
+        downmix_coeffs[i][1] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]];
     }
     if (s->channel_mode > 1 && s->channel_mode & 1) {
-        s->downmix_coeffs[1][0] = s->downmix_coeffs[1][1] = cmix;
+        downmix_coeffs[1][0] = downmix_coeffs[1][1] = cmix;
     }
     if (s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) {
         int nf = s->channel_mode - 2;
-        s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf][1] = smix * LEVEL_MINUS_3DB;
+        downmix_coeffs[nf][0] = downmix_coeffs[nf][1] = smix * LEVEL_MINUS_3DB;
     }
     if (s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) {
         int nf = s->channel_mode - 4;
-        s->downmix_coeffs[nf][0] = s->downmix_coeffs[nf+1][1] = smix;
+        downmix_coeffs[nf][0] = downmix_coeffs[nf+1][1] = smix;
     }
 
     /* renormalize */
     norm0 = norm1 = 0.0;
     for (i = 0; i < s->fbw_channels; i++) {
-        norm0 += s->downmix_coeffs[i][0];
-        norm1 += s->downmix_coeffs[i][1];
+        norm0 += downmix_coeffs[i][0];
+        norm1 += downmix_coeffs[i][1];
     }
     norm0 = 1.0f / norm0;
     norm1 = 1.0f / norm1;
     for (i = 0; i < s->fbw_channels; i++) {
-        s->downmix_coeffs[i][0] *= norm0;
-        s->downmix_coeffs[i][1] *= norm1;
+        downmix_coeffs[i][0] *= norm0;
+        downmix_coeffs[i][1] *= norm1;
     }
 
     if (s->output_mode == AC3_CHMODE_MONO) {
         for (i = 0; i < s->fbw_channels; i++)
-            s->downmix_coeffs[i][0] = (s->downmix_coeffs[i][0] +
-                                       s->downmix_coeffs[i][1]) * LEVEL_MINUS_3DB;
+            downmix_coeffs[i][0] = (downmix_coeffs[i][0] +
+                                    downmix_coeffs[i][1]) * LEVEL_MINUS_3DB;
+    }
+    for (i = 0; i < s->fbw_channels; i++) {
+        s->downmix_coeffs[i][0] = FIXR12(downmix_coeffs[i][0]);
+        s->downmix_coeffs[i][1] = FIXR12(downmix_coeffs[i][1]);
     }
 }
 
@@ -483,7 +497,7 @@ static void ac3_decode_transform_coeffs_ch(AC3DecodeContext *s, int ch_index, ma
         case 0:
             /* random noise with approximate range of -0.707 to 0.707 */
             if (dither)
-                mantissa = (av_lfg_get(&s->dith_state) / 362) - 5932275;
+                mantissa = (((av_lfg_get(&s->dith_state)>>8)*181)>>8) - 5931008;
             else
                 mantissa = 0;
             break;
@@ -530,6 +544,10 @@ static void ac3_decode_transform_coeffs_ch(AC3DecodeContext *s, int ch_index, ma
             break;
         default: /* 6 to 15 */
             /* Shift mantissa and sign-extend it. */
+            if (bap > 15) {
+                av_log(s->avctx, AV_LOG_ERROR, "bap %d is invalid in plain AC-3\n", bap);
+                bap = 15;
+            }
             mantissa = get_sbits(gbc, quantization_tab[bap]);
             mantissa <<= 24 - quantization_tab[bap];
             break;
@@ -643,20 +661,30 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
     for (ch = 1; ch <= channels; ch++) {
         if (s->block_switch[ch]) {
             int i;
-            float *x = s->tmp_output + 128;
+            FFTSample *x = s->tmp_output + 128;
             for (i = 0; i < 128; i++)
                 x[i] = s->transform_coeffs[ch][2 * i];
             s->imdct_256.imdct_half(&s->imdct_256, s->tmp_output, x);
+#if USE_FIXED
+            s->fdsp->vector_fmul_window_scaled(s->outptr[ch - 1], s->delay[ch - 1],
+                                       s->tmp_output, s->window, 128, 8);
+#else
             s->fdsp.vector_fmul_window(s->outptr[ch - 1], s->delay[ch - 1],
                                        s->tmp_output, s->window, 128);
+#endif
             for (i = 0; i < 128; i++)
                 x[i] = s->transform_coeffs[ch][2 * i + 1];
             s->imdct_256.imdct_half(&s->imdct_256, s->delay[ch - 1], x);
         } else {
             s->imdct_512.imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
+#if USE_FIXED
+            s->fdsp->vector_fmul_window_scaled(s->outptr[ch - 1], s->delay[ch - 1],
+                                       s->tmp_output, s->window, 128, 8);
+#else
             s->fdsp.vector_fmul_window(s->outptr[ch - 1], s->delay[ch - 1],
                                        s->tmp_output, s->window, 128);
-            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(float));
+#endif
+            memcpy(s->delay[ch - 1], s->tmp_output + 128, 128 * sizeof(FFTSample));
         }
     }
 }
@@ -791,13 +819,13 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
         if (get_bits1(gbc)) {
             /* Allow asymmetric application of DRC when drc_scale > 1.
                Amplification of quiet sounds is enhanced */
-            float range = dynamic_range_tab[get_bits(gbc, 8)];
+            INTFLOAT range = AC3_RANGE(get_bits(gbc, 8));
             if (range > 1.0 || s->drc_scale <= 1.0)
-                s->dynamic_range[i] = powf(range, s->drc_scale);
+                s->dynamic_range[i] = AC3_DYNAMIC_RANGE(range);
             else
                 s->dynamic_range[i] = range;
         } else if (blk == 0) {
-            s->dynamic_range[i] = 1.0f;
+            s->dynamic_range[i] = AC3_DYNAMIC_RANGE1;
         }
     } while (i--);
 
@@ -823,6 +851,9 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
             if (start_subband > 7)
                 start_subband += start_subband - 7;
             end_subband    = get_bits(gbc, 3) + 5;
+#if USE_FIXED
+            s->spx_dst_end_freq = end_freq_inv_tab[end_subband];
+#endif
             if (end_subband   > 7)
                 end_subband   += end_subband   - 7;
             dst_start_freq = dst_start_freq * 12 + 25;
@@ -843,7 +874,8 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
 
             s->spx_dst_start_freq = dst_start_freq;
             s->spx_src_start_freq = src_start_freq;
-            s->spx_dst_end_freq   = dst_end_freq;
+            if (!USE_FIXED)
+                s->spx_dst_end_freq   = dst_end_freq;
 
             decode_band_structure(gbc, blk, s->eac3, 0,
                                   start_subband, end_subband,
@@ -863,18 +895,40 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
         for (ch = 1; ch <= fbw_channels; ch++) {
             if (s->channel_uses_spx[ch]) {
                 if (s->first_spx_coords[ch] || get_bits1(gbc)) {
-                    float spx_blend;
+                    INTFLOAT spx_blend;
                     int bin, master_spx_coord;
 
                     s->first_spx_coords[ch] = 0;
-                    spx_blend = get_bits(gbc, 5) * (1.0f/32);
+                    spx_blend = AC3_SPX_BLEND(get_bits(gbc, 5));
                     master_spx_coord = get_bits(gbc, 2) * 3;
 
                     bin = s->spx_src_start_freq;
                     for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
                         int bandsize;
                         int spx_coord_exp, spx_coord_mant;
-                        float nratio, sblend, nblend, spx_coord;
+                        INTFLOAT nratio, sblend, nblend;
+#if USE_FIXED
+                        int64_t accu;
+                        /* calculate blending factors */
+                        bandsize = s->spx_band_sizes[bnd];
+                        accu = (int64_t)((bin << 23) + (bandsize << 22)) * s->spx_dst_end_freq;
+                        nratio = (int)(accu >> 32);
+                        nratio -= spx_blend << 18;
+
+                        if (nratio < 0) {
+                            nblend = 0;
+                            sblend = 0x800000;
+                        } else if (nratio > 0x7fffff) {
+                            nblend = 0x800000;
+                            sblend = 0;
+                        } else {
+                            nblend = fixed_sqrt(nratio, 23);
+                            accu = (int64_t)nblend * 1859775393;
+                            nblend = (int)((accu + (1<<29)) >> 30);
+                            sblend = fixed_sqrt(0x800000 - nratio, 23);
+                        }
+#else
+                        float spx_coord;
 
                         /* calculate blending factors */
                         bandsize = s->spx_band_sizes[bnd];
@@ -883,6 +937,7 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
                         nblend = sqrtf(3.0f * nratio); // noise is scaled by sqrt(3)
                                                        // to give unity variance
                         sblend = sqrtf(1.0f - nratio);
+#endif
                         bin += bandsize;
 
                         /* decode spx coordinates */
@@ -891,11 +946,18 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
                         if (spx_coord_exp == 15) spx_coord_mant <<= 1;
                         else                     spx_coord_mant += 4;
                         spx_coord_mant <<= (25 - spx_coord_exp - master_spx_coord);
-                        spx_coord = spx_coord_mant * (1.0f / (1 << 23));
 
                         /* multiply noise and signal blending factors by spx coordinate */
+#if USE_FIXED
+                        accu = (int64_t)nblend * spx_coord_mant;
+                        s->spx_noise_blend[ch][bnd]  = (int)((accu + (1<<22)) >> 23);
+                        accu = (int64_t)sblend * spx_coord_mant;
+                        s->spx_signal_blend[ch][bnd] = (int)((accu + (1<<22)) >> 23);
+#else
+                        spx_coord = spx_coord_mant * (1.0f / (1 << 23));
                         s->spx_noise_blend [ch][bnd] = nblend * spx_coord;
                         s->spx_signal_blend[ch][bnd] = sblend * spx_coord;
+#endif
                     }
                 }
             } else {
@@ -1252,14 +1314,19 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
 
     /* apply scaling to coefficients (headroom, dynrng) */
     for (ch = 1; ch <= s->channels; ch++) {
-        float gain = 1.0 / 4194304.0f;
-        if (s->channel_mode == AC3_CHMODE_DUALMONO) {
-            gain *= s->dynamic_range[2 - ch];
+        INTFLOAT gain;
+        if(s->channel_mode == AC3_CHMODE_DUALMONO) {
+            gain = s->dynamic_range[2-ch];
         } else {
-            gain *= s->dynamic_range[0];
+            gain = s->dynamic_range[0];
         }
+#if USE_FIXED
+        scale_coefs(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256);
+#else
+        gain *= 1.0 / 4194304.0f;
         s->fmt_conv.int32_to_float_fmul_scalar(s->transform_coeffs[ch],
                                                s->fixed_coeffs[ch], gain, 256);
+#endif
     }
 
     /* apply spectral extension to high frequency bins */
@@ -1284,19 +1351,24 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
         do_imdct(s, s->channels);
 
         if (downmix_output) {
+#if USE_FIXED
+            ac3_downmix_c_fixed16(s->outptr, s->downmix_coeffs,
+                              s->out_channels, s->fbw_channels, 256);
+#else
             s->ac3dsp.downmix(s->outptr, s->downmix_coeffs,
                               s->out_channels, s->fbw_channels, 256);
+#endif
         }
     } else {
         if (downmix_output) {
-            s->ac3dsp.downmix(s->xcfptr + 1, s->downmix_coeffs,
-                              s->out_channels, s->fbw_channels, 256);
+            s->ac3dsp.AC3_RENAME(downmix)(s->xcfptr + 1, s->downmix_coeffs,
+                                          s->out_channels, s->fbw_channels, 256);
         }
 
         if (downmix_output && !s->downmixed) {
             s->downmixed = 1;
-            s->ac3dsp.downmix(s->dlyptr, s->downmix_coeffs, s->out_channels,
-                              s->fbw_channels, 128);
+            s->ac3dsp.AC3_RENAME(downmix)(s->dlyptr, s->downmix_coeffs,
+                                          s->out_channels, s->fbw_channels, 128);
         }
 
         do_imdct(s, s->out_channels);
@@ -1317,7 +1389,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
     AC3DecodeContext *s = avctx->priv_data;
     int blk, ch, err, ret;
     const uint8_t *channel_map;
-    const float *output[AC3_MAX_CHANNELS];
+    const SHORTFLOAT *output[AC3_MAX_CHANNELS];
     enum AVMatrixEncoding matrix_encoding;
     AVDownmixInfo *downmix_info;
 
@@ -1375,7 +1447,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
         if (s->frame_size > buf_size) {
             av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
             err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
-        } else if (avctx->err_recognition & AV_EF_CRCCHECK) {
+        } else if (avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_CAREFUL)) {
             /* check for crc mismatch */
             if (av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, &buf[2],
                        s->frame_size - 2)) {
@@ -1409,6 +1481,10 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
             s->output_mode  = AC3_CHMODE_STEREO;
         }
 
+        s->loro_center_mix_level   = gain_levels[s->  center_mix_level];
+        s->loro_surround_mix_level = gain_levels[s->surround_mix_level];
+        s->ltrt_center_mix_level   = LEVEL_MINUS_3DB;
+        s->ltrt_surround_mix_level = LEVEL_MINUS_3DB;
         /* set downmixing coefficients if needed */
         if (s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) &&
                 s->fbw_channels == s->out_channels)) {
@@ -1430,19 +1506,18 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = s->num_blocks * AC3_BLOCK_SIZE;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     /* decode the audio blocks */
     channel_map = ff_ac3_dec_channel_map[s->output_mode & ~AC3_OUTPUT_LFEON][s->lfe_on];
+    for (ch = 0; ch < AC3_MAX_CHANNELS; ch++) {
+        output[ch] = s->output[ch];
+        s->outptr[ch] = s->output[ch];
+    }
     for (ch = 0; ch < s->channels; ch++) {
         if (ch < s->out_channels)
-            s->outptr[channel_map[ch]] = (float *)frame->data[ch];
-        else
-            s->outptr[ch] = s->output[ch];
-        output[ch] = s->output[ch];
+            s->outptr[channel_map[ch]] = (SHORTFLOAT *)frame->data[ch];
     }
     for (blk = 0; blk < s->num_blocks; blk++) {
         if (!err && decode_audio_block(s, blk)) {
@@ -1451,16 +1526,20 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data,
         }
         if (err)
             for (ch = 0; ch < s->out_channels; ch++)
-                memcpy(s->outptr[channel_map[ch]], output[ch], sizeof(**output) * AC3_BLOCK_SIZE);
+                memcpy(((SHORTFLOAT*)frame->data[ch]) + AC3_BLOCK_SIZE*blk, output[ch], AC3_BLOCK_SIZE*sizeof(SHORTFLOAT));
         for (ch = 0; ch < s->out_channels; ch++)
             output[ch] = s->outptr[channel_map[ch]];
-        for (ch = 0; ch < s->out_channels; ch++)
-            s->outptr[ch] += AC3_BLOCK_SIZE;
+        for (ch = 0; ch < s->out_channels; ch++) {
+            if (!ch || channel_map[ch])
+                s->outptr[channel_map[ch]] += AC3_BLOCK_SIZE;
+        }
     }
 
+    av_frame_set_decode_error_flags(frame, err ? FF_DECODE_ERROR_INVALID_BITSTREAM : 0);
+
     /* keep last block for error concealment in next frame */
     for (ch = 0; ch < s->out_channels; ch++)
-        memcpy(s->output[ch], output[ch], sizeof(**output) * AC3_BLOCK_SIZE);
+        memcpy(s->output[ch], output[ch], AC3_BLOCK_SIZE*sizeof(SHORTFLOAT));
 
     /*
      * AVMatrixEncoding
@@ -1531,59 +1610,12 @@ static av_cold int ac3_decode_end(AVCodecContext *avctx)
     AC3DecodeContext *s = avctx->priv_data;
     ff_mdct_end(&s->imdct_512);
     ff_mdct_end(&s->imdct_256);
+#if (USE_FIXED)
+    av_freep(&s->fdsp);
+#endif
 
     return 0;
 }
 
 #define OFFSET(x) offsetof(AC3DecodeContext, x)
 #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
-static const AVOption options[] = {
-    { "drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, 0.0, 6.0, PAR },
-    { NULL},
-};
-
-static const AVClass ac3_decoder_class = {
-    .class_name = "AC3 decoder",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-AVCodec ff_ac3_decoder = {
-    .name           = "ac3",
-    .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
-    .type           = AVMEDIA_TYPE_AUDIO,
-    .id             = AV_CODEC_ID_AC3,
-    .priv_data_size = sizeof (AC3DecodeContext),
-    .init           = ac3_decode_init,
-    .close          = ac3_decode_end,
-    .decode         = ac3_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
-    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
-                                                      AV_SAMPLE_FMT_NONE },
-    .priv_class     = &ac3_decoder_class,
-};
-
-#if CONFIG_EAC3_DECODER
-static const AVClass eac3_decoder_class = {
-    .class_name = "E-AC3 decoder",
-    .item_name  = av_default_item_name,
-    .option     = options,
-    .version    = LIBAVUTIL_VERSION_INT,
-};
-
-AVCodec ff_eac3_decoder = {
-    .name           = "eac3",
-    .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
-    .type           = AVMEDIA_TYPE_AUDIO,
-    .id             = AV_CODEC_ID_EAC3,
-    .priv_data_size = sizeof (AC3DecodeContext),
-    .init           = ac3_decode_init,
-    .close          = ac3_decode_end,
-    .decode         = ac3_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
-    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
-                                                      AV_SAMPLE_FMT_NONE },
-    .priv_class     = &eac3_decoder_class,
-};
-#endif
diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h
index babd0a7..3ac44ee 100644
--- a/libavcodec/ac3dec.h
+++ b/libavcodec/ac3dec.h
@@ -2,20 +2,20 @@
  * Common code between the AC-3 and E-AC-3 decoders
  * Copyright (c) 2007 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -51,6 +51,7 @@
 #define AVCODEC_AC3DEC_H
 
 #include "libavutil/float_dsp.h"
+#include "libavutil/fixed_dsp.h"
 #include "libavutil/lfg.h"
 #include "ac3.h"
 #include "ac3dsp.h"
@@ -97,6 +98,12 @@ typedef struct AC3DecodeContext {
     int dolby_headphone_mode;               ///< dolby headphone mode                   (dheadphonmod)
 ///@}
 
+    int preferred_stereo_downmix;
+    float ltrt_center_mix_level;
+    float ltrt_surround_mix_level;
+    float loro_center_mix_level;
+    float loro_surround_mix_level;
+
 ///@name Frame syntax parameters
     int snr_offset_strategy;                ///< SNR offset strategy                    (snroffststr)
     int block_switch_syntax;                ///< block switch syntax enabled            (blkswe)
@@ -132,8 +139,8 @@ typedef struct AC3DecodeContext {
     int num_spx_bands;                          ///< number of spx bands                    (nspxbnds)
     uint8_t spx_band_sizes[SPX_MAX_BANDS];      ///< number of bins in each spx band
     uint8_t first_spx_coords[AC3_MAX_CHANNELS]; ///< first spx coordinates states           (firstspxcos)
-    float spx_noise_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS]; ///< spx noise blending factor  (nblendfact)
-    float spx_signal_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS];///< spx signal blending factor (sblendfact)
+    INTFLOAT spx_noise_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS]; ///< spx noise blending factor  (nblendfact)
+    INTFLOAT spx_signal_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS];///< spx signal blending factor (sblendfact)
 ///@}
 
 ///@name Adaptive hybrid transform
@@ -145,15 +152,15 @@ typedef struct AC3DecodeContext {
     int fbw_channels;                           ///< number of full-bandwidth channels
     int channels;                               ///< number of total channels
     int lfe_ch;                                 ///< index of LFE channel
-    float downmix_coeffs[AC3_MAX_CHANNELS][2];  ///< stereo downmix coefficients
+    SHORTFLOAT downmix_coeffs[AC3_MAX_CHANNELS][2];  ///< stereo downmix coefficients
     int downmixed;                              ///< indicates if coeffs are currently downmixed
     int output_mode;                            ///< output channel configuration
     int out_channels;                           ///< number of output channels
 ///@}
 
 ///@name Dynamic range
-    float dynamic_range[2];                 ///< dynamic range
-    float drc_scale;                        ///< percentage of dynamic range compression to be applied
+    INTFLOAT dynamic_range[2];                 ///< dynamic range
+    INTFLOAT drc_scale;                        ///< percentage of dynamic range compression to be applied
 ///@}
 
 ///@name Bandwidth
@@ -201,22 +208,26 @@ typedef struct AC3DecodeContext {
 
 ///@name Optimization
     BswapDSPContext bdsp;
+#if USE_FIXED
+    AVFixedDSPContext *fdsp;
+#else
     AVFloatDSPContext fdsp;
+#endif
     AC3DSPContext ac3dsp;
     FmtConvertContext fmt_conv;             ///< optimized conversion functions
 ///@}
 
-    float *outptr[AC3_MAX_CHANNELS];
-    float *xcfptr[AC3_MAX_CHANNELS];
-    float *dlyptr[AC3_MAX_CHANNELS];
+    SHORTFLOAT *outptr[AC3_MAX_CHANNELS];
+    INTFLOAT *xcfptr[AC3_MAX_CHANNELS];
+    INTFLOAT *dlyptr[AC3_MAX_CHANNELS];
 
 ///@name Aligned arrays
-    DECLARE_ALIGNED(16, int32_t, fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];     ///< fixed-point transform coefficients
-    DECLARE_ALIGNED(32, float, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
-    DECLARE_ALIGNED(32, float, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
-    DECLARE_ALIGNED(32, float, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
-    DECLARE_ALIGNED(32, float, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
-    DECLARE_ALIGNED(32, float, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
+    DECLARE_ALIGNED(16, int,   fixed_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];       ///< fixed-point transform coefficients
+    DECLARE_ALIGNED(32, INTFLOAT, transform_coeffs)[AC3_MAX_CHANNELS][AC3_MAX_COEFS];   ///< transform coefficients
+    DECLARE_ALIGNED(32, INTFLOAT, delay)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];             ///< delay - added to the next block
+    DECLARE_ALIGNED(32, INTFLOAT, window)[AC3_BLOCK_SIZE];                              ///< window coefficients
+    DECLARE_ALIGNED(32, INTFLOAT, tmp_output)[AC3_BLOCK_SIZE];                          ///< temporary storage for output before windowing
+    DECLARE_ALIGNED(32, SHORTFLOAT, output)[AC3_MAX_CHANNELS][AC3_BLOCK_SIZE];            ///< output after imdct transform and windowing
     DECLARE_ALIGNED(32, uint8_t, input_buffer)[AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< temp buffer to prevent overread
 ///@}
 } AC3DecodeContext;
diff --git a/libavcodec/ac3dec_data.c b/libavcodec/ac3dec_data.c
index 272a963..d0a9b1e 100644
--- a/libavcodec/ac3dec_data.c
+++ b/libavcodec/ac3dec_data.c
@@ -2,20 +2,20 @@
  * AC-3 and E-AC-3 decoder tables
  * Copyright (c) 2007 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ac3dec_data.h b/libavcodec/ac3dec_data.h
index c0a584e..975b52e 100644
--- a/libavcodec/ac3dec_data.h
+++ b/libavcodec/ac3dec_data.h
@@ -2,20 +2,20 @@
  * AC-3 and E-AC-3 decoder tables
  * Copyright (c) 2007 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ac3dec_fixed.c b/libavcodec/ac3dec_fixed.c
new file mode 100644
index 0000000..c6cbeb9
--- /dev/null
+++ b/libavcodec/ac3dec_fixed.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanislav Ocovaj (socovaj@mips.com)
+ *
+ * AC3 fixed-point decoder for MIPS platforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define FFT_FLOAT 0
+#define USE_FIXED 1
+#define FFT_FIXED_32 1
+#include "ac3dec.h"
+
+
+/**
+ * Table for center mix levels
+ * reference: Section 5.4.2.4 cmixlev
+ */
+static const uint8_t center_levels[4] = { 4, 5, 6, 5 };
+
+/**
+ * Table for surround mix levels
+ * reference: Section 5.4.2.5 surmixlev
+ */
+static const uint8_t surround_levels[4] = { 4, 6, 7, 6 };
+
+int end_freq_inv_tab[8] =
+{
+    50529027, 44278013, 39403370, 32292987, 27356480, 23729101, 20951060, 18755316
+};
+
+static void scale_coefs (
+    int32_t *dst,
+    const int32_t *src,
+    int dynrng,
+    int len)
+{
+    int i, shift, round;
+    int16_t mul;
+    int temp, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    mul = (dynrng & 0x1f) + 0x20;
+    shift = 4 - ((dynrng << 24) >> 29);
+    round = 1 << (shift-1);
+    for (i=0; i<len; i+=8) {
+
+        temp = src[i] * mul;
+        temp1 = src[i+1] * mul;
+        temp = temp + round;
+        temp2 = src[i+2] * mul;
+
+        temp1 = temp1 + round;
+        dst[i] = temp >> shift;
+        temp3 = src[i+3] * mul;
+        temp2 = temp2 + round;
+
+        dst[i+1] = temp1 >> shift;
+        temp4 = src[i + 4] * mul;
+        temp3 = temp3 + round;
+        dst[i+2] = temp2 >> shift;
+
+        temp5 = src[i+5] * mul;
+        temp4 = temp4 + round;
+        dst[i+3] = temp3 >> shift;
+        temp6 = src[i+6] * mul;
+
+        dst[i+4] = temp4 >> shift;
+        temp5 = temp5 + round;
+        temp7 = src[i+7] * mul;
+        temp6 = temp6 + round;
+
+        dst[i+5] = temp5 >> shift;
+        temp7 = temp7 + round;
+        dst[i+6] = temp6 >> shift;
+        dst[i+7] = temp7 >> shift;
+
+    }
+}
+
+/**
+ * Downmix samples from original signal to stereo or mono (this is for 16-bit samples
+ * and fixed point decoder - original (for 32-bit samples) is in ac3dsp.c).
+ */
+static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2],
+                                  int out_ch, int in_ch, int len)
+{
+    int i, j;
+    int v0, v1;
+    if (out_ch == 2) {
+        for (i = 0; i < len; i++) {
+            v0 = v1 = 0;
+            for (j = 0; j < in_ch; j++) {
+                v0 += samples[j][i] * matrix[j][0];
+                v1 += samples[j][i] * matrix[j][1];
+            }
+            samples[0][i] = (v0+2048)>>12;
+            samples[1][i] = (v1+2048)>>12;
+        }
+    } else if (out_ch == 1) {
+        for (i = 0; i < len; i++) {
+            v0 = 0;
+            for (j = 0; j < in_ch; j++)
+                v0 += samples[j][i] * matrix[j][0];
+            samples[0][i] = (v0+2048)>>12;
+        }
+    }
+}
+
+#include "ac3dec.c"
+
+static const AVOption options[] = {
+    { NULL},
+};
+
+static const AVClass ac3_decoder_class = {
+    .class_name = "Fixed-Point AC-3 Decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_ac3_fixed_decoder = {
+    .name           = "ac3_fixed",
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_AC3,
+    .priv_data_size = sizeof (AC3DecodeContext),
+    .init           = ac3_decode_init,
+    .close          = ac3_decode_end,
+    .decode         = ac3_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_NONE },
+    .priv_class     = &ac3_decoder_class,
+};
diff --git a/libavcodec/ac3dec_float.c b/libavcodec/ac3dec_float.c
new file mode 100644
index 0000000..7108921
--- /dev/null
+++ b/libavcodec/ac3dec_float.c
@@ -0,0 +1,89 @@
+/*
+ * AC-3 Audio Decoder
+ * This code was developed as part of Google Summer of Code 2006.
+ * E-AC-3 support was added as part of Google Summer of Code 2007.
+ *
+ * Copyright (c) 2006 Kartikey Mahendra BHATT (bhattkm at gmail dot com)
+ * Copyright (c) 2007-2008 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
+ * Copyright (c) 2007 Justin Ruggles <justin.ruggles@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * Upmix delay samples from stereo to original channel layout.
+ */
+#include "ac3dec.h"
+#include "ac3dec.c"
+
+static const AVOption options[] = {
+    { "drc_scale", "percentage of dynamic range compression to apply", OFFSET(drc_scale), AV_OPT_TYPE_FLOAT, {.dbl = 1.0}, 0.0, 6.0, PAR },
+
+{"dmix_mode", "Preferred Stereo Downmix Mode", OFFSET(preferred_stereo_downmix), AV_OPT_TYPE_INT, {.i64 = -1 }, -1, 2, 0, "dmix_mode"},
+{"ltrt_cmixlev",   "Lt/Rt Center Mix Level",   OFFSET(ltrt_center_mix_level),    AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"ltrt_surmixlev", "Lt/Rt Surround Mix Level", OFFSET(ltrt_surround_mix_level),  AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"loro_cmixlev",   "Lo/Ro Center Mix Level",   OFFSET(loro_center_mix_level),    AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+{"loro_surmixlev", "Lo/Ro Surround Mix Level", OFFSET(loro_surround_mix_level),  AV_OPT_TYPE_FLOAT, {.dbl = -1.0 }, -1.0, 2.0, 0},
+
+    { NULL},
+};
+
+static const AVClass ac3_decoder_class = {
+    .class_name = "AC3 decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_ac3_decoder = {
+    .name           = "ac3",
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_AC3,
+    .priv_data_size = sizeof (AC3DecodeContext),
+    .init           = ac3_decode_init,
+    .close          = ac3_decode_end,
+    .decode         = ac3_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
+    .priv_class     = &ac3_decoder_class,
+};
+
+#if CONFIG_EAC3_DECODER
+static const AVClass eac3_decoder_class = {
+    .class_name = "E-AC3 decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_eac3_decoder = {
+    .name           = "eac3",
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_EAC3,
+    .priv_data_size = sizeof (AC3DecodeContext),
+    .init           = ac3_decode_init,
+    .close          = ac3_decode_end,
+    .decode         = ac3_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
+    .priv_class     = &eac3_decoder_class,
+};
+#endif
diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index 25bd6e3..b746817 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -2,20 +2,20 @@
  * AC-3 DSP functions
  * Copyright (c) 2011 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -172,6 +172,48 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
     }
 }
 
+static void ac3_sum_square_butterfly_int32_c(int64_t sum[4],
+                                             const int32_t *coef0,
+                                             const int32_t *coef1,
+                                             int len)
+{
+    int i;
+
+    sum[0] = sum[1] = sum[2] = sum[3] = 0;
+
+    for (i = 0; i < len; i++) {
+        int lt = coef0[i];
+        int rt = coef1[i];
+        int md = lt + rt;
+        int sd = lt - rt;
+        MAC64(sum[0], lt, lt);
+        MAC64(sum[1], rt, rt);
+        MAC64(sum[2], md, md);
+        MAC64(sum[3], sd, sd);
+    }
+}
+
+static void ac3_sum_square_butterfly_float_c(float sum[4],
+                                             const float *coef0,
+                                             const float *coef1,
+                                             int len)
+{
+    int i;
+
+    sum[0] = sum[1] = sum[2] = sum[3] = 0;
+
+    for (i = 0; i < len; i++) {
+        float lt = coef0[i];
+        float rt = coef1[i];
+        float md = lt + rt;
+        float sd = lt - rt;
+        sum[0] += lt * lt;
+        sum[1] += rt * rt;
+        sum[2] += md * md;
+        sum[3] += sd * sd;
+    }
+}
+
 static void ac3_downmix_c(float **samples, float (*matrix)[2],
                           int out_ch, int in_ch, int len)
 {
@@ -197,6 +239,31 @@ static void ac3_downmix_c(float **samples, float (*matrix)[2],
     }
 }
 
+static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2],
+                                int out_ch, int in_ch, int len)
+{
+    int i, j;
+    int64_t v0, v1;
+    if (out_ch == 2) {
+        for (i = 0; i < len; i++) {
+            v0 = v1 = 0;
+            for (j = 0; j < in_ch; j++) {
+                v0 += (int64_t)samples[j][i] * matrix[j][0];
+                v1 += (int64_t)samples[j][i] * matrix[j][1];
+            }
+            samples[0][i] = (v0+2048)>>12;
+            samples[1][i] = (v1+2048)>>12;
+        }
+    } else if (out_ch == 1) {
+        for (i = 0; i < len; i++) {
+            v0 = 0;
+            for (j = 0; j < in_ch; j++)
+                v0 += (int64_t)samples[j][i] * matrix[j][0];
+            samples[0][i] = (v0+2048)>>12;
+        }
+    }
+}
+
 static void apply_window_int16_c(int16_t *output, const int16_t *input,
                                  const int16_t *window, unsigned int len)
 {
@@ -221,11 +288,16 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
     c->update_bap_counts = ac3_update_bap_counts_c;
     c->compute_mantissa_size = ac3_compute_mantissa_size_c;
     c->extract_exponents = ac3_extract_exponents_c;
+    c->sum_square_butterfly_int32 = ac3_sum_square_butterfly_int32_c;
+    c->sum_square_butterfly_float = ac3_sum_square_butterfly_float_c;
     c->downmix = ac3_downmix_c;
+    c->downmix_fixed = ac3_downmix_c_fixed;
     c->apply_window_int16 = apply_window_int16_c;
 
     if (ARCH_ARM)
         ff_ac3dsp_init_arm(c, bit_exact);
     if (ARCH_X86)
         ff_ac3dsp_init_x86(c, bit_exact);
+    if (ARCH_MIPS)
+        ff_ac3dsp_init_mips(c, bit_exact);
 }
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index 6ca0c5b..ed98c8c 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -2,20 +2,20 @@
  * AC-3 DSP functions
  * Copyright (c) 2011 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -126,9 +126,18 @@ typedef struct AC3DSPContext {
 
     void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs);
 
+    void (*sum_square_butterfly_int32)(int64_t sum[4], const int32_t *coef0,
+                                       const int32_t *coef1, int len);
+
+    void (*sum_square_butterfly_float)(float sum[4], const float *coef0,
+                                       const float *coef1, int len);
+
     void (*downmix)(float **samples, float (*matrix)[2], int out_ch,
                     int in_ch, int len);
 
+    void (*downmix_fixed)(int32_t **samples, int16_t (*matrix)[2], int out_ch,
+                          int in_ch, int len);
+
     /**
      * Apply symmetric window in 16-bit fixed-point.
      * @param output destination array
@@ -147,5 +156,6 @@ typedef struct AC3DSPContext {
 void ff_ac3dsp_init    (AC3DSPContext *c, int bit_exact);
 void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact);
 void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact);
+void ff_ac3dsp_init_mips(AC3DSPContext *c, int bit_exact);
 
 #endif /* AVCODEC_AC3DSP_H */
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 5c02e7f..3ae44ff 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com>
  * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -273,7 +273,7 @@ void ff_ac3_apply_rematrixing(AC3EncodeContext *s)
     int nb_coefs;
     int blk, bnd, i;
     int start, end;
-    uint8_t *flags;
+    uint8_t *flags = NULL;
 
     if (!s->rematrixing_enabled)
         return;
@@ -1210,14 +1210,11 @@ static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef,
     int i;
 
     for (i = start_freq; i < end_freq; i++) {
-        int v;
         int c = fixed_coef[i];
         int e = exp[i];
-        int b = bap[i];
-        switch (b) {
-        case 0:
-            v = 0;
-            break;
+        int v = bap[i];
+        if (v)
+        switch (v) {
         case 1:
             v = sym_quant(c, e, 3);
             switch (s->mant1_cnt) {
@@ -1286,7 +1283,7 @@ static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef,
             v = asym_quant(c, e, 16);
             break;
         default:
-            v = asym_quant(c, e, b - 1);
+            v = asym_quant(c, e, v - 1);
             break;
         }
         qmant[i] = v;
@@ -1386,7 +1383,7 @@ static void ac3_output_frame_header(AC3EncodeContext *s)
  */
 static void output_audio_block(AC3EncodeContext *s, int blk)
 {
-    int ch, i, baie, bnd, got_cpl, ch0;
+    int ch, i, baie, bnd, got_cpl, av_uninit(ch0);
     AC3Block *block = &s->blocks[blk];
 
     /* block switching */
@@ -2022,6 +2019,7 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
     AC3EncodeContext *s = avctx->priv_data;
 
     av_freep(&s->windowed_samples);
+    if (s->planar_samples)
     for (ch = 0; ch < s->channels; ch++)
         av_freep(&s->planar_samples[ch]);
     av_freep(&s->planar_samples);
@@ -2250,7 +2248,7 @@ static av_cold int validate_options(AC3EncodeContext *s)
  */
 static av_cold void set_bandwidth(AC3EncodeContext *s)
 {
-    int blk, ch, cpl_start;
+    int blk, ch, av_uninit(cpl_start);
 
     if (s->cutoff) {
         /* calculate bandwidth based on user-specified cutoff frequency */
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index 30c15d0..9c9a7ce 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2000 Fabrice Bellard
  * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
index 2bb82ef..9d39026 100644
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com>
  * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,8 +35,13 @@
 
 #define AC3ENC_TYPE AC3ENC_TYPE_AC3_FIXED
 #include "ac3enc_opts_template.c"
-static const AVClass ac3enc_class = { "Fixed-Point AC-3 Encoder", av_default_item_name,
-                                      ac3_options, LIBAVUTIL_VERSION_INT };
+
+static const AVClass ac3enc_class = {
+    .class_name = "Fixed-Point AC-3 Encoder",
+    .item_name  = av_default_item_name,
+    .option     = ac3_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 #include "ac3enc_template.c"
 
@@ -97,6 +102,12 @@ static void scale_coefficients(AC3EncodeContext *s)
     }
 }
 
+static void sum_square_butterfly(AC3EncodeContext *s, int64_t sum[4],
+                                 const int32_t *coef0, const int32_t *coef1,
+                                 int len)
+{
+    s->ac3dsp.sum_square_butterfly_int32(sum, coef0, coef1, len);
+}
 
 /*
  * Clip MDCT coefficients to allowable range.
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index d106d1b..fa6e509 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com>
  * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,8 +36,12 @@
 
 #define AC3ENC_TYPE AC3ENC_TYPE_AC3
 #include "ac3enc_opts_template.c"
-static const AVClass ac3enc_class = { "AC-3 Encoder", av_default_item_name,
-                                      ac3_options, LIBAVUTIL_VERSION_INT };
+static const AVClass ac3enc_class = {
+    .class_name = "AC-3 Encoder",
+    .item_name  = av_default_item_name,
+    .option     = ac3_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 #include "ac3enc_template.c"
 
@@ -68,7 +72,7 @@ av_cold int ff_ac3_float_mdct_init(AC3EncodeContext *s)
     n  = 1 << 9;
     n2 = n >> 1;
 
-    window = av_malloc(n * sizeof(*window));
+    window = av_malloc_array(n, sizeof(*window));
     if (!window) {
         av_log(s->avctx, AV_LOG_ERROR, "Cannot allocate memory.\n");
         return AVERROR(ENOMEM);
@@ -104,6 +108,12 @@ static void scale_coefficients(AC3EncodeContext *s)
                                chan_size * (s->channels + cpl));
 }
 
+static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
+                                 const float *coef0, const float *coef1,
+                                 int len)
+{
+    s->ac3dsp.sum_square_butterfly_float(sum, coef0, coef1, len);
+}
 
 /*
  * Clip MDCT coefficients to allowable range.
diff --git a/libavcodec/ac3enc_opts_template.c b/libavcodec/ac3enc_opts_template.c
index 339a08f..a252be9 100644
--- a/libavcodec/ac3enc_opts_template.c
+++ b/libavcodec/ac3enc_opts_template.c
@@ -2,20 +2,20 @@
  * AC-3 encoder options
  * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 79b4946..192d16f 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2006-2011 Justin Ruggles <justin.ruggles@gmail.com>
  * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,6 +47,9 @@ static void clip_coefficients(AudioDSPContext *adsp, CoefType *coef,
 
 static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl);
 
+static void sum_square_butterfly(AC3EncodeContext *s, CoefSumType sum[4],
+                                 const CoefType *coef0, const CoefType *coef1,
+                                 int len);
 
 int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s)
 {
@@ -70,7 +73,7 @@ alloc_fail:
 
 /*
  * Copy input samples.
- * Channels are reordered from Libav's default order to AC-3 order.
+ * Channels are reordered from FFmpeg's default order to AC-3 order.
  */
 static void copy_input_samples(AC3EncodeContext *s, SampleType **samples)
 {
@@ -133,7 +136,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
 #else
     int32_t (*fixed_cpl_coords)[AC3_MAX_CHANNELS][16] = cpl_coords;
 #endif
-    int blk, ch, bnd, i, j;
+    int av_uninit(blk), ch, bnd, i, j;
     CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
     int cpl_start, num_cpl_coefs;
 
@@ -336,8 +339,8 @@ static void apply_channel_coupling(AC3EncodeContext *s)
 static void compute_rematrixing_strategy(AC3EncodeContext *s)
 {
     int nb_coefs;
-    int blk, bnd, i;
-    AC3Block *block, *block0;
+    int blk, bnd;
+    AC3Block *block, *block0 = NULL;
 
     if (s->channel_mode != AC3_CHMODE_STEREO)
         return;
@@ -361,20 +364,12 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s)
         }
 
         for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
-            /* calculate calculate sum of squared coeffs for one band in one block */
+            /* calculate sum of squared coeffs for one band in one block */
             int start = ff_ac3_rematrix_band_tab[bnd];
             int end   = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]);
-            CoefSumType sum[4] = {0,};
-            for (i = start; i < end; i++) {
-                CoefType lt = block->mdct_coef[1][i];
-                CoefType rt = block->mdct_coef[2][i];
-                CoefType md = lt + rt;
-                CoefType sd = lt - rt;
-                MAC_COEF(sum[0], lt, lt);
-                MAC_COEF(sum[1], rt, rt);
-                MAC_COEF(sum[2], md, md);
-                MAC_COEF(sum[3], sd, sd);
-            }
+            CoefSumType sum[4];
+            sum_square_butterfly(s, sum, block->mdct_coef[1] + start,
+                                 block->mdct_coef[2] + start, end - start);
 
             /* compare sums to determine if rematrixing will be used for this band */
             if (FFMIN(sum[2], sum[3]) < FFMIN(sum[0], sum[1]))
@@ -443,10 +438,8 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt,
 
     ff_ac3_quantize_mantissas(s);
 
-    if ((ret = ff_alloc_packet(avpkt, s->frame_size))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size)) < 0)
         return ret;
-    }
     ff_ac3_output_frame(s, avpkt->data);
 
     if (frame->pts != AV_NOPTS_VALUE)
diff --git a/libavcodec/ac3tab.c b/libavcodec/ac3tab.c
index 3cd07f9..d62d8bf 100644
--- a/libavcodec/ac3tab.c
+++ b/libavcodec/ac3tab.c
@@ -2,20 +2,20 @@
  * AC-3 tables
  * copyright (c) 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -116,7 +116,7 @@ const uint8_t ff_ac3_enc_channel_map[8][2][6] = {
 };
 
 /**
- * Table to remap channels from from AC-3 order to SMPTE order.
+ * Table to remap channels from AC-3 order to SMPTE order.
  * [channel_mode][lfe][ch]
  */
 const uint8_t ff_ac3_dec_channel_map[8][2][6] = {
diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h
index 83edec5..74cbd9e 100644
--- a/libavcodec/ac3tab.h
+++ b/libavcodec/ac3tab.h
@@ -2,20 +2,20 @@
  * AC-3 tables
  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/acelp_filters.c b/libavcodec/acelp_filters.c
index 93bec65..9ab758b 100644
--- a/libavcodec/acelp_filters.c
+++ b/libavcodec/acelp_filters.c
@@ -3,25 +3,26 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <inttypes.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "avcodec.h"
 #include "acelp_filters.h"
@@ -46,7 +47,7 @@ void ff_acelp_interpolate(int16_t* out, const int16_t* in,
 {
     int n, i;
 
-    assert(frac_pos >= 0 && frac_pos < precision);
+    av_assert1(frac_pos >= 0 && frac_pos < precision);
 
     for (n = 0; n < length; n++) {
         int idx = 0;
@@ -143,3 +144,12 @@ void ff_tilt_compensation(float *mem, float tilt, float *samples, int size)
     samples[0] -= tilt * *mem;
     *mem = new_tilt_mem;
 }
+
+void ff_acelp_filter_init(ACELPFContext *c)
+{
+    c->acelp_interpolatef                      = ff_acelp_interpolatef;
+    c->acelp_apply_order_2_transfer_function   = ff_acelp_apply_order_2_transfer_function;
+
+    if(HAVE_MIPSFPU)
+        ff_acelp_filter_init_mips(c);
+}
diff --git a/libavcodec/acelp_filters.h b/libavcodec/acelp_filters.h
index 6a9ebd9..7a3061b 100644
--- a/libavcodec/acelp_filters.h
+++ b/libavcodec/acelp_filters.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,39 @@
 
 #include <stdint.h>
 
+typedef struct ACELPFContext {
+    /**
+    * Floating point version of ff_acelp_interpolate()
+    */
+    void (*acelp_interpolatef)(float *out, const float *in,
+                            const float *filter_coeffs, int precision,
+                            int frac_pos, int filter_length, int length);
+
+    /**
+     * Apply an order 2 rational transfer function in-place.
+     *
+     * @param out output buffer for filtered speech samples
+     * @param in input buffer containing speech data (may be the same as out)
+     * @param zero_coeffs z^-1 and z^-2 coefficients of the numerator
+     * @param pole_coeffs z^-1 and z^-2 coefficients of the denominator
+     * @param gain scale factor for final output
+     * @param mem intermediate values used by filter (should be 0 initially)
+     * @param n number of samples (should be a multiple of eight)
+     */
+    void (*acelp_apply_order_2_transfer_function)(float *out, const float *in,
+                                                  const float zero_coeffs[2],
+                                                  const float pole_coeffs[2],
+                                                  float gain,
+                                                  float mem[2], int n);
+
+}ACELPFContext;
+
+/**
+ * Initialize ACELPFContext.
+ */
+void ff_acelp_filter_init(ACELPFContext *c);
+void ff_acelp_filter_init_mips(ACELPFContext *c);
+
 /**
  * low-pass Finite Impulse Response filter coefficients.
  *
@@ -76,7 +109,7 @@ void ff_acelp_interpolatef(float *out, const float *in,
  *
  * The filter has a cut-off frequency of 1/80 of the sampling freq
  *
- * @note Two items before the top of the out buffer must contain two items from the
+ * @note Two items before the top of the in buffer must contain two items from the
  *       tail of the previous subframe.
  *
  * @remark It is safe to pass the same array in in and out parameters.
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index 1965772..3ecec01 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -3,25 +3,26 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/common.h"
 #include "libavutil/float_dsp.h"
+#include "libavutil/libm.h"
 #include "libavutil/mathematics.h"
 #include "avcodec.h"
 #include "acelp_pitch_delay.h"
@@ -107,9 +108,20 @@ int16_t ff_acelp_decode_gain_code(
     for(i=0; i<ma_pred_order; i++)
         mr_energy += quant_energy[i] * ma_prediction_coeff[i];
 
+#ifdef G729_BITEXACT
+    mr_energy += (((-6165LL * ff_log2(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size, 0))) >> 3) & ~0x3ff);
+
+    mr_energy = (5439 * (mr_energy >> 15)) >> 8;           // (0.15) = (0.15) * (7.23)
+
+    return bidir_sal(
+               ((ff_exp2(mr_energy & 0x7fff) + 16) >> 5) * (gain_corr_factor >> 1),
+               (mr_energy >> 15) - 25
+           );
+#else
     mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
                 sqrt(adsp->scalarproduct_int16(fc_v, fc_v, subframe_size));
     return mr_energy >> 12;
+#endif
 }
 
 float ff_amr_set_fixed_gain(float fixed_gain_factor, float fixed_mean_energy,
diff --git a/libavcodec/acelp_pitch_delay.h b/libavcodec/acelp_pitch_delay.h
index 7b5b33d..2aade2f 100644
--- a/libavcodec/acelp_pitch_delay.h
+++ b/libavcodec/acelp_pitch_delay.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c
index 0c660ac..86851a3 100644
--- a/libavcodec/acelp_vectors.c
+++ b/libavcodec/acelp_vectors.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -50,6 +50,26 @@ const uint8_t ff_fc_2pulses_9bits_track1_gray[16] =
   28, 26,
 };
 
+const uint8_t ff_fc_2pulses_9bits_track2_gray[32] =
+{
+  0,  2,
+  5,  4,
+  12, 10,
+  7,  9,
+  25, 24,
+  20, 22,
+  14, 15,
+  19, 17,
+  36, 31,
+  21, 26,
+  1,  6,
+  16, 11,
+  27, 29,
+  32, 30,
+  39, 37,
+  34, 35,
+};
+
 const uint8_t ff_fc_4pulses_8bits_tracks_13[16] =
 {
   0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75,
@@ -219,11 +239,12 @@ void ff_set_fixed_vector(float *out, const AMRFixed *in, float scale, int size)
         int x   = in->x[i], repeats = !((in->no_repeat_mask >> i) & 1);
         float y = in->y[i] * scale;
 
-        do {
-            out[x] += y;
-            y *= in->pitch_fac;
-            x += in->pitch_lag;
-        } while (x < size && repeats);
+        if (in->pitch_lag > 0)
+            do {
+                out[x] += y;
+                y *= in->pitch_fac;
+                x += in->pitch_lag;
+            } while (x < size && repeats);
     }
 }
 
@@ -234,9 +255,18 @@ void ff_clear_fixed_vector(float *out, const AMRFixed *in, int size)
     for (i=0; i < in->n; i++) {
         int x  = in->x[i], repeats = !((in->no_repeat_mask >> i) & 1);
 
-        do {
-            out[x] = 0.0;
-            x += in->pitch_lag;
-        } while (x < size && repeats);
+        if (in->pitch_lag > 0)
+            do {
+                out[x] = 0.0;
+                x += in->pitch_lag;
+            } while (x < size && repeats);
     }
 }
+
+void ff_acelp_vectors_init(ACELPVContext *c)
+{
+    c->weighted_vector_sumf   = ff_weighted_vector_sumf;
+
+    if(HAVE_MIPSFPU)
+        ff_acelp_vectors_init_mips(c);
+}
diff --git a/libavcodec/acelp_vectors.h b/libavcodec/acelp_vectors.h
index d6226bf..fae834d 100644
--- a/libavcodec/acelp_vectors.h
+++ b/libavcodec/acelp_vectors.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,30 @@
 
 #include <stdint.h>
 
+typedef struct ACELPVContext {
+    /**
+     * float implementation of weighted sum of two vectors.
+     * @param[out] out result of addition
+     * @param in_a first vector
+     * @param in_b second vector
+     * @param weight_coeff_a first vector weight coefficient
+     * @param weight_coeff_a second vector weight coefficient
+     * @param length vectors length (should be a multiple of two)
+     *
+     * @note It is safe to pass the same buffer for out and in_a or in_b.
+     */
+    void (*weighted_vector_sumf)(float *out, const float *in_a, const float *in_b,
+                                 float weight_coeff_a, float weight_coeff_b,
+                                 int length);
+
+}ACELPVContext;
+
+/**
+ * Initialize ACELPVContext.
+ */
+void ff_acelp_vectors_init(ACELPVContext *c);
+void ff_acelp_vectors_init_mips(ACELPVContext *c);
+
 /** Sparse representation for the algebraic codebook (fixed) vector */
 typedef struct AMRFixed {
     int      n;
@@ -82,6 +106,37 @@ extern const uint8_t ff_fc_2pulses_9bits_track1[16];
 extern const uint8_t ff_fc_2pulses_9bits_track1_gray[16];
 
 /**
+ * Track|Pulse|        Positions
+ * -----------------------------------------
+ *  2   | 1   | 0, 7, 14, 20, 27, 34,  1, 21
+ *      |     | 2, 9, 15, 22, 29, 35,  6, 26
+ *      |     | 4,10, 17, 24, 30, 37, 11, 31
+ *      |     | 5,12, 19, 25, 32, 39, 16, 36
+ * -----------------------------------------
+ *
+ * @remark Track in the table should be read top-to-bottom, left-to-right.
+ *
+ * @note (EE.1) This table (from the reference code) does not comply with
+ *              the specification.
+ *              The specification contains the following table:
+ *
+ * Track|Pulse|        Positions
+ * -----------------------------------------
+ *  2   | 1   | 0, 5, 10, 15, 20, 25, 30, 35
+ *      |     | 1, 6, 11, 16, 21, 26, 31, 36
+ *      |     | 2, 7, 12, 17, 22, 27, 32, 37
+ *      |     | 4, 9, 14, 19, 24, 29, 34, 39
+ *
+ * -----------------------------------------
+ *
+ * @note (EE.2) Reference G.729D code also uses gray decoding for each
+ *              pulse index before looking up the value in the table.
+ *
+ * Used in G.729 @@6.4k (with gray coding)
+ */
+extern const uint8_t ff_fc_2pulses_9bits_track2_gray[32];
+
+/**
  * b60 hamming windowed sinc function coefficients
  */
 extern const float ff_b60_sinc[61];
diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c
index c6bc4d0..e31242f 100644
--- a/libavcodec/adpcm.c
+++ b/libavcodec/adpcm.c
@@ -13,25 +13,24 @@
  * MAXIS EA ADPCM decoder by Robert Marston (rmarston@gmail.com)
  * THP ADPCM decoder by Marco Gerards (mgerards@xs4all.nl)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
 #include "get_bits.h"
-#include "put_bits.h"
 #include "bytestream.h"
 #include "adpcm.h"
 #include "adpcm_data.h"
@@ -96,13 +95,16 @@ static av_cold int adpcm_decode_init(AVCodecContext * avctx)
     unsigned int max_channels = 2;
 
     switch(avctx->codec->id) {
+    case AV_CODEC_ID_ADPCM_DTK:
     case AV_CODEC_ID_ADPCM_EA:
         min_channels = 2;
         break;
+    case AV_CODEC_ID_ADPCM_AFC:
     case AV_CODEC_ID_ADPCM_EA_R1:
     case AV_CODEC_ID_ADPCM_EA_R2:
     case AV_CODEC_ID_ADPCM_EA_R3:
     case AV_CODEC_ID_ADPCM_EA_XAS:
+    case AV_CODEC_ID_ADPCM_THP:
         max_channels = 6;
         break;
     }
@@ -116,10 +118,8 @@ static av_cold int adpcm_decode_init(AVCodecContext * avctx)
         c->status[0].step = c->status[1].step = 511;
         break;
     case AV_CODEC_ID_ADPCM_IMA_WAV:
-        if (avctx->bits_per_coded_sample != 4) {
-            av_log(avctx, AV_LOG_ERROR, "Only 4-bit ADPCM IMA WAV files are supported\n");
-            return -1;
-        }
+        if (avctx->bits_per_coded_sample < 2 || avctx->bits_per_coded_sample > 5)
+            return AVERROR_INVALIDDATA;
         break;
     case AV_CODEC_ID_ADPCM_IMA_APC:
         if (avctx->extradata && avctx->extradata_size >= 8) {
@@ -145,6 +145,8 @@ static av_cold int adpcm_decode_init(AVCodecContext * avctx)
         case AV_CODEC_ID_ADPCM_EA_R3:
         case AV_CODEC_ID_ADPCM_EA_XAS:
         case AV_CODEC_ID_ADPCM_THP:
+        case AV_CODEC_ID_ADPCM_AFC:
+        case AV_CODEC_ID_ADPCM_DTK:
             avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
             break;
         case AV_CODEC_ID_ADPCM_IMA_WS:
@@ -184,6 +186,29 @@ static inline short adpcm_ima_expand_nibble(ADPCMChannelStatus *c, char nibble,
     return (short)c->predictor;
 }
 
+static inline int16_t adpcm_ima_wav_expand_nibble(ADPCMChannelStatus *c, GetBitContext *gb, int bps)
+{
+    int nibble, step_index, predictor, sign, delta, diff, step, shift;
+
+    shift = bps - 1;
+    nibble = get_bits_le(gb, bps),
+    step = ff_adpcm_step_table[c->step_index];
+    step_index = c->step_index + ff_adpcm_index_tables[bps - 2][nibble];
+    step_index = av_clip(step_index, 0, 88);
+
+    sign = nibble & (1 << shift);
+    delta = nibble & ((1 << shift) - 1);
+    diff = ((2 * delta + 1) * step) >> shift;
+    predictor = c->predictor;
+    if (sign) predictor -= diff;
+    else predictor += diff;
+
+    c->predictor = av_clip_int16(predictor);
+    c->step_index = step_index;
+
+    return (int16_t)c->predictor;
+}
+
 static inline int adpcm_ima_qt_expand_nibble(ADPCMChannelStatus *c, int nibble, int shift)
 {
     int step_index;
@@ -225,6 +250,27 @@ static inline short adpcm_ms_expand_nibble(ADPCMChannelStatus *c, int nibble)
     return c->sample1;
 }
 
+static inline short adpcm_ima_oki_expand_nibble(ADPCMChannelStatus *c, int nibble)
+{
+    int step_index, predictor, sign, delta, diff, step;
+
+    step = ff_adpcm_oki_step_table[c->step_index];
+    step_index = c->step_index + ff_adpcm_index_table[(unsigned)nibble];
+    step_index = av_clip(step_index, 0, 48);
+
+    sign = nibble & 8;
+    delta = nibble & 7;
+    diff = ((2 * delta + 1) * step) >> 3;
+    predictor = c->predictor;
+    if (sign) predictor -= diff;
+    else predictor += diff;
+
+    c->predictor = av_clip(predictor, -2048, 2047);
+    c->step_index = step_index;
+
+    return c->predictor << 4;
+}
+
 static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble)
 {
     int sign, delta, diff;
@@ -298,11 +344,9 @@ static int xa_decode(AVCodecContext *avctx, int16_t *out0, int16_t *out1,
     for(i=0;i<4;i++) {
         shift  = 12 - (in[4+i*2] & 15);
         filter = in[4+i*2] >> 4;
-        if (filter > 4) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Invalid XA-ADPCM filter %d (max. allowed is 4)\n",
-                   filter);
-            return AVERROR_INVALIDDATA;
+        if (filter >= FF_ARRAY_ELEMS(xa_adpcm_table)) {
+            avpriv_request_sample(avctx, "unknown XA-ADPCM filter %d", filter);
+            filter=0;
         }
         f0 = xa_adpcm_table[filter][0];
         f1 = xa_adpcm_table[filter][1];
@@ -329,12 +373,11 @@ static int xa_decode(AVCodecContext *avctx, int16_t *out0, int16_t *out1,
 
         shift  = 12 - (in[5+i*2] & 15);
         filter = in[5+i*2] >> 4;
-        if (filter > 4) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Invalid XA-ADPCM filter %d (max. allowed is 4)\n",
-                   filter);
-            return AVERROR_INVALIDDATA;
+        if (filter >= FF_ARRAY_ELEMS(xa_adpcm_table)) {
+            avpriv_request_sample(avctx, "unknown XA-ADPCM filter %d", filter);
+            filter=0;
         }
+
         f0 = xa_adpcm_table[filter][0];
         f1 = xa_adpcm_table[filter][1];
 
@@ -428,9 +471,11 @@ static void adpcm_swf_decode(AVCodecContext *avctx, const uint8_t *buf, int buf_
  * @param[out] coded_samples set to the number of samples as coded in the
  *                           packet, or 0 if the codec does not encode the
  *                           number of samples in each frame.
+ * @param[out] approx_nb_samples set to non-zero if the number of samples
+ *                               returned is an approximation.
  */
 static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
-                          int buf_size, int *coded_samples)
+                          int buf_size, int *coded_samples, int *approx_nb_samples)
 {
     ADPCMDecodeContext *s = avctx->priv_data;
     int nb_samples        = 0;
@@ -439,6 +484,10 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
     int header_size;
 
     *coded_samples = 0;
+    *approx_nb_samples = 0;
+
+    if(ch <= 0)
+        return 0;
 
     switch (avctx->codec->id) {
     /* constant, only check buf_size */
@@ -456,6 +505,7 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
     case AV_CODEC_ID_ADPCM_CT:
     case AV_CODEC_ID_ADPCM_IMA_APC:
     case AV_CODEC_ID_ADPCM_IMA_EA_SEAD:
+    case AV_CODEC_ID_ADPCM_IMA_OKI:
     case AV_CODEC_ID_ADPCM_IMA_WS:
     case AV_CODEC_ID_ADPCM_YAMAHA:
         nb_samples = buf_size * 2 / ch;
@@ -470,7 +520,7 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
         case AV_CODEC_ID_ADPCM_4XM:
         case AV_CODEC_ID_ADPCM_IMA_ISS:     header_size = 4 * ch;      break;
         case AV_CODEC_ID_ADPCM_IMA_AMV:     header_size = 8;           break;
-        case AV_CODEC_ID_ADPCM_IMA_SMJPEG:  header_size = 4;           break;
+        case AV_CODEC_ID_ADPCM_IMA_SMJPEG:  header_size = 4 * ch;      break;
     }
     if (header_size > 0)
         return (buf_size - header_size) * 2 / ch;
@@ -514,6 +564,7 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
         *coded_samples -= *coded_samples % 28;
         nb_samples      = (buf_size - header_size) * 2 / ch;
         nb_samples     -= nb_samples % 28;
+        *approx_nb_samples = 1;
         break;
     case AV_CODEC_ID_ADPCM_IMA_DK3:
         if (avctx->block_align > 0)
@@ -525,11 +576,20 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
             buf_size = FFMIN(buf_size, avctx->block_align);
         nb_samples = 1 + (buf_size - 4 * ch) * 2 / ch;
         break;
+    case AV_CODEC_ID_ADPCM_IMA_RAD:
+        if (avctx->block_align > 0)
+            buf_size = FFMIN(buf_size, avctx->block_align);
+        nb_samples = (buf_size - 4 * ch) * 2 / ch;
+        break;
     case AV_CODEC_ID_ADPCM_IMA_WAV:
+    {
+        int bsize = ff_adpcm_ima_block_sizes[avctx->bits_per_coded_sample - 2];
+        int bsamples = ff_adpcm_ima_block_samples[avctx->bits_per_coded_sample - 2];
         if (avctx->block_align > 0)
             buf_size = FFMIN(buf_size, avctx->block_align);
-        nb_samples = 1 + (buf_size - 4 * ch) / (4 * ch) * 8;
+        nb_samples = 1 + (buf_size - 4 * ch) / (bsize * ch) * bsamples;
         break;
+    }
     case AV_CODEC_ID_ADPCM_MS:
         if (avctx->block_align > 0)
             buf_size = FFMIN(buf_size, avctx->block_align);
@@ -566,15 +626,25 @@ static int get_nb_samples(AVCodecContext *avctx, GetByteContext *gb,
         break;
     }
     case AV_CODEC_ID_ADPCM_THP:
+        if (avctx->extradata) {
+            nb_samples = buf_size / (8 * ch) * 14;
+            break;
+        }
         has_coded_samples = 1;
         bytestream2_skip(gb, 4); // channel size
         *coded_samples  = bytestream2_get_be32(gb);
         *coded_samples -= *coded_samples % 14;
-        nb_samples      = (buf_size - 80) / (8 * ch) * 14;
+        nb_samples      = (buf_size - (8 + 36 * ch)) / (8 * ch) * 14;
+        break;
+    case AV_CODEC_ID_ADPCM_AFC:
+        nb_samples = buf_size / (9 * ch) * 16;
         break;
     case AV_CODEC_ID_ADPCM_XA:
         nb_samples = (buf_size / 128) * 224 / ch;
         break;
+    case AV_CODEC_ID_ADPCM_DTK:
+        nb_samples = buf_size / (16 * ch) * 28;
+        break;
     }
 
     /* validate coded sample count */
@@ -597,11 +667,11 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
     int16_t **samples_p;
     int st; /* stereo */
     int count1, count2;
-    int nb_samples, coded_samples, ret;
+    int nb_samples, coded_samples, approx_nb_samples, ret;
     GetByteContext gb;
 
     bytestream2_init(&gb, buf, buf_size);
-    nb_samples = get_nb_samples(avctx, &gb, buf_size, &coded_samples);
+    nb_samples = get_nb_samples(avctx, &gb, buf_size, &coded_samples, &approx_nb_samples);
     if (nb_samples <= 0) {
         av_log(avctx, AV_LOG_ERROR, "invalid number of samples in packet\n");
         return AVERROR_INVALIDDATA;
@@ -609,17 +679,15 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = nb_samples;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (short *)frame->data[0];
     samples_p = (int16_t **)frame->extended_data;
 
     /* use coded_samples when applicable */
     /* it is always <= nb_samples, so the output buffer will be large enough */
     if (coded_samples) {
-        if (coded_samples != nb_samples)
+        if (!approx_nb_samples && coded_samples != nb_samples)
             av_log(avctx, AV_LOG_WARNING, "mismatch in coded sample count\n");
         frame->nb_samples = nb_samples = coded_samples;
     }
@@ -681,6 +749,23 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
             }
         }
 
+        if (avctx->bits_per_coded_sample != 4) {
+            int samples_per_block = ff_adpcm_ima_block_samples[avctx->bits_per_coded_sample - 2];
+            GetBitContext g;
+
+            init_get_bits8(&g, gb.buffer, bytestream2_get_bytes_left(&gb));
+            for (n = 0; n < (nb_samples - 1) / samples_per_block; n++) {
+                for (i = 0; i < avctx->channels; i++) {
+                    cs = &c->status[i];
+                    samples = &samples_p[i][1 + n * samples_per_block];
+                    for (m = 0; m < samples_per_block; m++) {
+                        samples[m] = adpcm_ima_wav_expand_nibble(cs, &g,
+                                          avctx->bits_per_coded_sample);
+                    }
+                }
+            }
+            bytestream2_skip(&gb, avctx->block_align - avctx->channels * 4);
+        } else {
         for (n = 0; n < (nb_samples - 1) / 8; n++) {
             for (i = 0; i < avctx->channels; i++) {
                 cs = &c->status[i];
@@ -692,6 +777,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
                 }
             }
         }
+        }
         break;
     case AV_CODEC_ID_ADPCM_4XM:
         for (i = 0; i < avctx->channels; i++)
@@ -770,7 +856,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
                 return AVERROR_INVALIDDATA;
             }
         }
-        for (n = (nb_samples >> (1 - st)) - 1; n > 0; n--) {
+        for (n = (nb_samples - 1) >> (1 - st); n > 0; n--) {
             int v = bytestream2_get_byteu(&gb);
             *samples++ = adpcm_ima_expand_nibble(&c->status[0 ], v >> 4  , 3);
             *samples++ = adpcm_ima_expand_nibble(&c->status[st], v & 0x0F, 3);
@@ -835,6 +921,9 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
             *samples++ = c->status[0].predictor + c->status[1].predictor;
             *samples++ = c->status[0].predictor - c->status[1].predictor;
         }
+
+        if ((bytestream2_tell(&gb) & 1))
+            bytestream2_skip(&gb, 1);
         break;
     }
     case AV_CODEC_ID_ADPCM_IMA_ISS:
@@ -871,6 +960,38 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
             *samples++ = adpcm_ima_expand_nibble(&c->status[st], v & 0x0F, 3);
         }
         break;
+    case AV_CODEC_ID_ADPCM_IMA_OKI:
+        while (bytestream2_get_bytes_left(&gb) > 0) {
+            int v = bytestream2_get_byteu(&gb);
+            *samples++ = adpcm_ima_oki_expand_nibble(&c->status[0],  v >> 4  );
+            *samples++ = adpcm_ima_oki_expand_nibble(&c->status[st], v & 0x0F);
+        }
+        break;
+    case AV_CODEC_ID_ADPCM_IMA_RAD:
+        for (channel = 0; channel < avctx->channels; channel++) {
+            cs = &c->status[channel];
+            cs->step_index = sign_extend(bytestream2_get_le16u(&gb), 16);
+            cs->predictor  = sign_extend(bytestream2_get_le16u(&gb), 16);
+            if (cs->step_index > 88u){
+                av_log(avctx, AV_LOG_ERROR, "ERROR: step_index[%d] = %i\n",
+                       channel, cs->step_index);
+                return AVERROR_INVALIDDATA;
+            }
+        }
+        for (n = 0; n < nb_samples / 2; n++) {
+            int byte[2];
+
+            byte[0] = bytestream2_get_byteu(&gb);
+            if (st)
+                byte[1] = bytestream2_get_byteu(&gb);
+            for(channel = 0; channel < avctx->channels; channel++) {
+                *samples++ = adpcm_ima_expand_nibble(&c->status[channel], byte[channel] & 0x0F, 3);
+            }
+            for(channel = 0; channel < avctx->channels; channel++) {
+                *samples++ = adpcm_ima_expand_nibble(&c->status[channel], byte[channel] >> 4  , 3);
+            }
+        }
+        break;
     case AV_CODEC_ID_ADPCM_IMA_WS:
         if (c->vqa_version == 3) {
             for (channel = 0; channel < avctx->channels; channel++) {
@@ -946,6 +1067,9 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
         /* Each EA ADPCM frame has a 12-byte header followed by 30-byte pieces,
            each coding 28 stereo samples. */
 
+        if(avctx->channels != 2)
+            return AVERROR_INVALIDDATA;
+
         current_left_sample   = sign_extend(bytestream2_get_le16u(&gb), 16);
         previous_left_sample  = sign_extend(bytestream2_get_le16u(&gb), 16);
         current_right_sample  = sign_extend(bytestream2_get_le16u(&gb), 16);
@@ -1131,16 +1255,9 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
         }
         break;
     case AV_CODEC_ID_ADPCM_IMA_AMV:
-    case AV_CODEC_ID_ADPCM_IMA_SMJPEG:
-        if (avctx->codec->id == AV_CODEC_ID_ADPCM_IMA_AMV) {
-            c->status[0].predictor = sign_extend(bytestream2_get_le16u(&gb), 16);
-            c->status[0].step_index = bytestream2_get_le16u(&gb);
-            bytestream2_skipu(&gb, 4);
-        } else {
-            c->status[0].predictor = sign_extend(bytestream2_get_be16u(&gb), 16);
-            c->status[0].step_index = bytestream2_get_byteu(&gb);
-            bytestream2_skipu(&gb, 1);
-        }
+        c->status[0].predictor = sign_extend(bytestream2_get_le16u(&gb), 16);
+        c->status[0].step_index = bytestream2_get_le16u(&gb);
+        bytestream2_skipu(&gb, 4);
         if (c->status[0].step_index > 88u) {
             av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n",
                    c->status[0].step_index);
@@ -1148,18 +1265,29 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
         }
 
         for (n = nb_samples >> (1 - st); n > 0; n--) {
-            int hi, lo, v = bytestream2_get_byteu(&gb);
+            int v = bytestream2_get_byteu(&gb);
 
-            if (avctx->codec->id == AV_CODEC_ID_ADPCM_IMA_AMV) {
-                hi = v & 0x0F;
-                lo = v >> 4;
-            } else {
-                lo = v & 0x0F;
-                hi = v >> 4;
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0], v >> 4, 3);
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0], v & 0xf, 3);
+        }
+        break;
+    case AV_CODEC_ID_ADPCM_IMA_SMJPEG:
+        for (i = 0; i < avctx->channels; i++) {
+            c->status[i].predictor = sign_extend(bytestream2_get_be16u(&gb), 16);
+            c->status[i].step_index = bytestream2_get_byteu(&gb);
+            bytestream2_skipu(&gb, 1);
+            if (c->status[i].step_index > 88u) {
+                av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n",
+                       c->status[i].step_index);
+                return AVERROR_INVALIDDATA;
             }
+        }
 
-            *samples++ = adpcm_ima_expand_nibble(&c->status[0], lo, 3);
-            *samples++ = adpcm_ima_expand_nibble(&c->status[0], hi, 3);
+        for (n = nb_samples >> (1 - st); n > 0; n--) {
+            int v = bytestream2_get_byteu(&gb);
+
+            *samples++ = adpcm_ima_qt_expand_nibble(&c->status[0 ], v >> 4, 3);
+            *samples++ = adpcm_ima_qt_expand_nibble(&c->status[st], v & 0xf, 3);
         }
         break;
     case AV_CODEC_ID_ADPCM_CT:
@@ -1189,7 +1317,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
                                                        byte & 0x0F, 4, 0);
             }
         } else if (avctx->codec->id == AV_CODEC_ID_ADPCM_SBPRO_3) {
-            for (n = nb_samples / 3; n > 0; n--) {
+            for (n = (nb_samples<<st) / 3; n > 0; n--) {
                 int byte = bytestream2_get_byteu(&gb);
                 *samples++ = adpcm_sbpro_expand_nibble(&c->status[0],
                                                         byte >> 5        , 3, 0);
@@ -1223,22 +1351,88 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
             *samples++ = adpcm_yamaha_expand_nibble(&c->status[st], v >> 4  );
         }
         break;
+    case AV_CODEC_ID_ADPCM_AFC:
+    {
+        int samples_per_block;
+        int blocks;
+
+        if (avctx->extradata && avctx->extradata_size == 1 && avctx->extradata[0]) {
+            samples_per_block = avctx->extradata[0] / 16;
+            blocks = nb_samples / avctx->extradata[0];
+        } else {
+            samples_per_block = nb_samples / 16;
+            blocks = 1;
+        }
+
+        for (m = 0; m < blocks; m++) {
+        for (channel = 0; channel < avctx->channels; channel++) {
+            int prev1 = c->status[channel].sample1;
+            int prev2 = c->status[channel].sample2;
+
+            samples = samples_p[channel] + m * 16;
+            /* Read in every sample for this channel.  */
+            for (i = 0; i < samples_per_block; i++) {
+                int byte = bytestream2_get_byteu(&gb);
+                int scale = 1 << (byte >> 4);
+                int index = byte & 0xf;
+                int factor1 = ff_adpcm_afc_coeffs[0][index];
+                int factor2 = ff_adpcm_afc_coeffs[1][index];
+
+                /* Decode 16 samples.  */
+                for (n = 0; n < 16; n++) {
+                    int32_t sampledat;
+
+                    if (n & 1) {
+                        sampledat = sign_extend(byte, 4);
+                    } else {
+                        byte = bytestream2_get_byteu(&gb);
+                        sampledat = sign_extend(byte >> 4, 4);
+                    }
+
+                    sampledat = ((prev1 * factor1 + prev2 * factor2) +
+                                 ((sampledat * scale) << 11)) >> 11;
+                    *samples = av_clip_int16(sampledat);
+                    prev2 = prev1;
+                    prev1 = *samples++;
+                }
+            }
+
+            c->status[channel].sample1 = prev1;
+            c->status[channel].sample2 = prev2;
+        }
+        }
+        bytestream2_seek(&gb, 0, SEEK_END);
+        break;
+    }
     case AV_CODEC_ID_ADPCM_THP:
     {
-        int table[2][16];
-        int prev[2][2];
+        int table[6][16];
         int ch;
 
-        for (i = 0; i < 2; i++)
+        if (avctx->extradata) {
+            GetByteContext tb;
+            if (avctx->extradata_size < 32 * avctx->channels) {
+                av_log(avctx, AV_LOG_ERROR, "Missing coeff table\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            bytestream2_init(&tb, avctx->extradata, avctx->extradata_size);
+            for (i = 0; i < avctx->channels; i++)
+                for (n = 0; n < 16; n++)
+                    table[i][n] = sign_extend(bytestream2_get_be16u(&tb), 16);
+        } else {
+        for (i = 0; i < avctx->channels; i++)
             for (n = 0; n < 16; n++)
                 table[i][n] = sign_extend(bytestream2_get_be16u(&gb), 16);
 
         /* Initialize the previous sample.  */
-        for (i = 0; i < 2; i++)
-            for (n = 0; n < 2; n++)
-                prev[i][n] = sign_extend(bytestream2_get_be16u(&gb), 16);
+        for (i = 0; i < avctx->channels; i++) {
+            c->status[i].sample1 = sign_extend(bytestream2_get_be16u(&gb), 16);
+            c->status[i].sample2 = sign_extend(bytestream2_get_be16u(&gb), 16);
+        }
+        }
 
-        for (ch = 0; ch <= st; ch++) {
+        for (ch = 0; ch < avctx->channels; ch++) {
             samples = samples_p[ch];
 
             /* Read in every sample for this channel.  */
@@ -1260,21 +1454,74 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
                         sampledat = sign_extend(byte >> 4, 4);
                     }
 
-                    sampledat = ((prev[ch][0]*factor1
-                                + prev[ch][1]*factor2) >> 11) + (sampledat << exp);
+                    sampledat = ((c->status[ch].sample1 * factor1
+                                + c->status[ch].sample2 * factor2) >> 11) + (sampledat << exp);
                     *samples = av_clip_int16(sampledat);
-                    prev[ch][1] = prev[ch][0];
-                    prev[ch][0] = *samples++;
+                    c->status[ch].sample2 = c->status[ch].sample1;
+                    c->status[ch].sample1 = *samples++;
                 }
             }
         }
         break;
     }
+    case AV_CODEC_ID_ADPCM_DTK:
+        for (channel = 0; channel < avctx->channels; channel++) {
+            samples = samples_p[channel];
+
+            /* Read in every sample for this channel.  */
+            for (i = 0; i < nb_samples / 28; i++) {
+                int byte, header;
+                if (channel)
+                    bytestream2_skipu(&gb, 1);
+                header = bytestream2_get_byteu(&gb);
+                bytestream2_skipu(&gb, 3 - channel);
+
+                /* Decode 28 samples.  */
+                for (n = 0; n < 28; n++) {
+                    int32_t sampledat, prev;
+
+                    switch (header >> 4) {
+                    case 1:
+                        prev = (c->status[channel].sample1 * 0x3c);
+                        break;
+                    case 2:
+                        prev = (c->status[channel].sample1 * 0x73) - (c->status[channel].sample2 * 0x34);
+                        break;
+                    case 3:
+                        prev = (c->status[channel].sample1 * 0x62) - (c->status[channel].sample2 * 0x37);
+                        break;
+                    default:
+                        prev = 0;
+                    }
+
+                    prev = av_clip((prev + 0x20) >> 6, -0x200000, 0x1fffff);
+
+                    byte = bytestream2_get_byteu(&gb);
+                    if (!channel)
+                        sampledat = sign_extend(byte, 4);
+                    else
+                        sampledat = sign_extend(byte >> 4, 4);
+
+                    sampledat = (((sampledat << 12) >> (header & 0xf)) << 6) + prev;
+                    *samples++ = av_clip_int16(sampledat >> 6);
+                    c->status[channel].sample2 = c->status[channel].sample1;
+                    c->status[channel].sample1 = sampledat;
+                }
+            }
+            if (!channel)
+                bytestream2_seek(&gb, 0, SEEK_SET);
+        }
+        break;
 
     default:
         return -1;
     }
 
+    if (avpkt->size && bytestream2_tell(&gb) == 0) {
+        av_log(avctx, AV_LOG_ERROR, "Nothing consumed\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     *got_frame_ptr = 1;
 
     return bytestream2_tell(&gb);
@@ -1304,7 +1551,9 @@ AVCodec ff_ ## name_ ## _decoder = {                        \
 
 /* Note: Do not forget to add new entries to the Makefile as well. */
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_4XM,         sample_fmts_s16p, adpcm_4xm,         "ADPCM 4X Movie");
+ADPCM_DECODER(AV_CODEC_ID_ADPCM_AFC,         sample_fmts_s16p, adpcm_afc,         "ADPCM Nintendo Gamecube AFC");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_CT,          sample_fmts_s16,  adpcm_ct,          "ADPCM Creative Technology");
+ADPCM_DECODER(AV_CODEC_ID_ADPCM_DTK,         sample_fmts_s16p, adpcm_dtk,         "ADPCM Nintendo Gamecube DTK");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_EA,          sample_fmts_s16,  adpcm_ea,          "ADPCM Electronic Arts");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_EA_MAXIS_XA, sample_fmts_s16,  adpcm_ea_maxis_xa, "ADPCM Electronic Arts Maxis CDROM XA");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_EA_R1,       sample_fmts_s16p, adpcm_ea_r1,       "ADPCM Electronic Arts R1");
@@ -1318,7 +1567,9 @@ ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_DK4,     sample_fmts_s16,  adpcm_ima_dk4,
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_EA_EACS, sample_fmts_s16,  adpcm_ima_ea_eacs, "ADPCM IMA Electronic Arts EACS");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_EA_SEAD, sample_fmts_s16,  adpcm_ima_ea_sead, "ADPCM IMA Electronic Arts SEAD");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_ISS,     sample_fmts_s16,  adpcm_ima_iss,     "ADPCM IMA Funcom ISS");
+ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_OKI,     sample_fmts_s16,  adpcm_ima_oki,     "ADPCM IMA Dialogic OKI");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_QT,      sample_fmts_s16p, adpcm_ima_qt,      "ADPCM IMA QuickTime");
+ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_RAD,     sample_fmts_s16,  adpcm_ima_rad,     "ADPCM IMA Radical");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_SMJPEG,  sample_fmts_s16,  adpcm_ima_smjpeg,  "ADPCM IMA Loki SDL MJPEG");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_WAV,     sample_fmts_s16p, adpcm_ima_wav,     "ADPCM IMA WAV");
 ADPCM_DECODER(AV_CODEC_ID_ADPCM_IMA_WS,      sample_fmts_both, adpcm_ima_ws,      "ADPCM IMA Westwood");
diff --git a/libavcodec/adpcm.h b/libavcodec/adpcm.h
index 16facb6..f43a28c 100644
--- a/libavcodec/adpcm.h
+++ b/libavcodec/adpcm.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2001-2003 The ffmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,8 +38,8 @@ typedef struct ADPCMChannelStatus {
     int prev_sample;
 
     /* MS version */
-    int16_t sample1;
-    int16_t sample2;
+    int sample1;
+    int sample2;
     int coeff1;
     int coeff2;
     int idelta;
diff --git a/libavcodec/adpcm_data.c b/libavcodec/adpcm_data.c
index 3bc5de2..2109285 100644
--- a/libavcodec/adpcm_data.c
+++ b/libavcodec/adpcm_data.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2001-2003 The ffmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,12 +27,33 @@
 
 /* ff_adpcm_step_table[] and ff_adpcm_index_table[] are from the ADPCM
    reference source */
-/* This is the index table: */
+static const int8_t adpcm_index_table2[4] = {
+    -1,  2,
+    -1,  2,
+};
+
+static const int8_t adpcm_index_table3[8] = {
+    -1, -1,  1,  2,
+    -1, -1,  1,  2,
+};
+
 const int8_t ff_adpcm_index_table[16] = {
     -1, -1, -1, -1, 2, 4, 6, 8,
     -1, -1, -1, -1, 2, 4, 6, 8,
 };
 
+static const int8_t adpcm_index_table5[32] = {
+    -1, -1, -1, -1, -1, -1, -1, -1, 1, 2, 4, 6, 8, 10, 13, 16,
+    -1, -1, -1, -1, -1, -1, -1, -1, 1, 2, 4, 6, 8, 10, 13, 16,
+};
+
+const int8_t * const ff_adpcm_index_tables[4] = {
+    &adpcm_index_table2[0],
+    &adpcm_index_table3[0],
+    &ff_adpcm_index_table[0],
+    &adpcm_index_table5[0],
+};
+
 /**
  * This is the step table. Note that many programs use slight deviations from
  * this table, but such deviations are negligible:
@@ -49,6 +70,14 @@ const int16_t ff_adpcm_step_table[89] = {
     15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
 };
 
+const int16_t ff_adpcm_oki_step_table[49] = {
+     16,  17,  19,  21,   23,   25,   28,   31,   34,  37,
+     41,  45,  50,  55,   60,   66,   73,   80,   88,  97,
+    107, 118, 130, 143,  157,  173,  190,  209,  230, 253,
+    279, 307, 337, 371,  408,  449,  494,  544,  598, 658,
+    724, 796, 876, 963, 1060, 1166, 1282, 1411, 1552
+};
+
 /* These are for MS-ADPCM */
 /* ff_adpcm_AdaptationTable[], ff_adpcm_AdaptCoeff1[], and
    ff_adpcm_AdaptCoeff2[] are from libsndfile */
@@ -76,3 +105,8 @@ const int8_t ff_adpcm_yamaha_difflookup[] = {
      1,  3,  5,  7,  9,  11,  13,  15,
     -1, -3, -5, -7, -9, -11, -13, -15
 };
+
+const int16_t ff_adpcm_afc_coeffs[2][16] = {
+    { 0, 2048, 0, 1024, 4096, 3584, 3072, 4608, 4200, 4800, 5120, 2048, 1024, 64512, 64512, 63488 },
+    { 0, 0, 2048, 1024, 63488, 64000, 64512, 62976, 63288, 63236, 62464, 63488, 64512, 1024, 0, 0 }
+};
diff --git a/libavcodec/adpcm_data.h b/libavcodec/adpcm_data.h
index a46cb5b..6589bc5 100644
--- a/libavcodec/adpcm_data.h
+++ b/libavcodec/adpcm_data.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2001-2003 The ffmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,12 +28,18 @@
 
 #include <stdint.h>
 
+static const uint8_t ff_adpcm_ima_block_sizes[4]   = {  4, 12, 4, 20 };
+static const uint8_t ff_adpcm_ima_block_samples[4] = { 16, 32, 8, 32 };
+
+extern const int8_t * const ff_adpcm_index_tables[4];
 extern const int8_t  ff_adpcm_index_table[16];
 extern const int16_t ff_adpcm_step_table[89];
+extern const int16_t ff_adpcm_oki_step_table[49];
 extern const int16_t ff_adpcm_AdaptationTable[];
 extern const uint8_t ff_adpcm_AdaptCoeff1[];
 extern const int8_t  ff_adpcm_AdaptCoeff2[];
 extern const int16_t ff_adpcm_yamaha_indexscale[];
 extern const int8_t  ff_adpcm_yamaha_difflookup[];
+extern const int16_t ff_adpcm_afc_coeffs[2][16];
 
 #endif /* AVCODEC_ADPCM_DATA_H */
diff --git a/libavcodec/adpcmenc.c b/libavcodec/adpcmenc.c
index 341dda4..c3e4d0f 100644
--- a/libavcodec/adpcmenc.c
+++ b/libavcodec/adpcmenc.c
@@ -5,25 +5,24 @@
  * fringe ADPCM codecs (e.g., DK3, DK4, Westwood)
  *   by Mike Melanson (melanson@pcisys.net)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
-#include "get_bits.h"
 #include "put_bits.h"
 #include "bytestream.h"
 #include "adpcm.h"
@@ -59,6 +58,8 @@ typedef struct ADPCMEncodeContext {
 
 #define FREEZE_INTERVAL 128
 
+static av_cold int adpcm_encode_close(AVCodecContext *avctx);
+
 static av_cold int adpcm_encode_init(AVCodecContext *avctx)
 {
     ADPCMEncodeContext *s = avctx->priv_data;
@@ -100,6 +101,7 @@ static av_cold int adpcm_encode_init(AVCodecContext *avctx)
         /* seems frame_size isn't taken into account...
            have to buffer the samples :-( */
         avctx->block_align = BLKSIZE;
+        avctx->bits_per_coded_sample = 4;
         break;
     case AV_CODEC_ID_ADPCM_IMA_QT:
         avctx->frame_size  = 64;
@@ -108,8 +110,8 @@ static av_cold int adpcm_encode_init(AVCodecContext *avctx)
     case AV_CODEC_ID_ADPCM_MS:
         /* each 16 bits sample gives one nibble
            and we have 7 bytes per channel overhead */
-        avctx->frame_size = (BLKSIZE - 7 * avctx->channels) * 2 /
-                             avctx->channels + 2;
+        avctx->frame_size = (BLKSIZE - 7 * avctx->channels) * 2 / avctx->channels + 2;
+        avctx->bits_per_coded_sample = 4;
         avctx->block_align    = BLKSIZE;
         if (!(avctx->extradata = av_malloc(32 + FF_INPUT_BUFFER_PADDING_SIZE)))
             goto error;
@@ -144,10 +146,7 @@ static av_cold int adpcm_encode_init(AVCodecContext *avctx)
 
     return 0;
 error:
-    av_freep(&s->paths);
-    av_freep(&s->node_buf);
-    av_freep(&s->nodep_buf);
-    av_freep(&s->trellis_hash);
+    adpcm_encode_close(avctx);
     return ret;
 }
 
@@ -180,24 +179,27 @@ static inline uint8_t adpcm_ima_qt_compress_sample(ADPCMChannelStatus *c,
                                                    int16_t sample)
 {
     int delta  = sample - c->prev_sample;
-    int mask, step = ff_adpcm_step_table[c->step_index];
-    int diff   = step >> 3;
-    int nibble = 0;
+    int diff, step = ff_adpcm_step_table[c->step_index];
+    int nibble = 8*(delta < 0);
 
-    if (delta < 0) {
-        nibble = 8;
-        delta  = -delta;
-    }
+    delta= abs(delta);
+    diff = delta + (step >> 3);
 
-    for (mask = 4; mask;) {
-        if (delta >= step) {
-            nibble |= mask;
-            delta  -= step;
-            diff   += step;
-        }
-        step >>= 1;
-        mask >>= 1;
+    if (delta >= step) {
+        nibble |= 4;
+        delta  -= step;
+    }
+    step >>= 1;
+    if (delta >= step) {
+        nibble |= 2;
+        delta  -= step;
     }
+    step >>= 1;
+    if (delta >= step) {
+        nibble |= 1;
+        delta  -= step;
+    }
+    diff -= delta;
 
     if (nibble & 8)
         c->prev_sample -= diff;
@@ -330,7 +332,7 @@ static void adpcm_compress_trellis(AVCodecContext *avctx,
                     uint8_t *h;\
                     dec_sample = av_clip_int16(dec_sample);\
                     d = sample - dec_sample;\
-                    ssd = nodes[j]->ssd + d*d;\
+                    ssd = nodes[j]->ssd + d*(unsigned)d;\
                     /* Check for wraparound, skip such samples completely. \
                      * Note, changing ssd to a 64 bit variable would be \
                      * simpler, avoiding this check, but it's slower on \
@@ -365,7 +367,7 @@ static void adpcm_compress_trellis(AVCodecContext *avctx,
                     *h = generation;\
                     u  = nodes_next[pos];\
                     if (!u) {\
-                        assert(pathn < FREEZE_INTERVAL << avctx->trellis);\
+                        av_assert1(pathn < FREEZE_INTERVAL << avctx->trellis);\
                         u = t++;\
                         nodes_next[pos] = u;\
                         u->path = pathn++;\
@@ -484,10 +486,8 @@ static int adpcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         pkt_size = (2 + avctx->channels * (22 + 4 * (frame->nb_samples - 1)) + 7) / 8;
     else
         pkt_size = avctx->block_align;
-    if ((ret = ff_alloc_packet(avpkt, pkt_size))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, pkt_size)) < 0)
         return ret;
-    }
     dst = avpkt->data;
 
     switch(avctx->codec->id) {
diff --git a/libavcodec/adx.c b/libavcodec/adx.c
index 4d412d5..9c5bedc 100644
--- a/libavcodec/adx.c
+++ b/libavcodec/adx.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011  Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/adx.h b/libavcodec/adx.h
index 9328111..ca59a2d 100644
--- a/libavcodec/adx.h
+++ b/libavcodec/adx.h
@@ -2,20 +2,20 @@
  * ADX ADPCM codecs
  * Copyright (c) 2001,2003 BERO
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/adx_parser.c b/libavcodec/adx_parser.c
index 706e242..1fa718f 100644
--- a/libavcodec/adx_parser.c
+++ b/libavcodec/adx_parser.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011  Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/adxdec.c b/libavcodec/adxdec.c
index 14fddf5..5115ced 100644
--- a/libavcodec/adxdec.c
+++ b/libavcodec/adxdec.c
@@ -2,20 +2,20 @@
  * ADX ADPCM codecs
  * Copyright (c) 2001,2003 BERO
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -101,6 +101,7 @@ static int adx_decode_frame(AVCodecContext *avctx, void *data,
     int16_t **samples;
     int samples_offset;
     const uint8_t *buf  = avpkt->data;
+    const uint8_t *buf_end = buf + avpkt->size;
     int num_blocks, ch, ret;
 
     if (c->eof) {
@@ -141,16 +142,14 @@ static int adx_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = num_blocks * BLOCK_SAMPLES;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (int16_t **)frame->extended_data;
     samples_offset = 0;
 
     while (num_blocks--) {
         for (ch = 0; ch < c->channels; ch++) {
-            if (adx_decode(c, samples[ch], samples_offset, buf, ch)) {
+            if (buf_end - buf < BLOCK_SIZE || adx_decode(c, samples[ch], samples_offset, buf, ch)) {
                 c->eof = 1;
                 buf = avpkt->data + avpkt->size;
                 break;
@@ -158,9 +157,11 @@ static int adx_decode_frame(AVCodecContext *avctx, void *data,
             buf_size -= BLOCK_SIZE;
             buf      += BLOCK_SIZE;
         }
-        samples_offset += BLOCK_SAMPLES;
+        if (!c->eof)
+            samples_offset += BLOCK_SAMPLES;
     }
 
+    frame->nb_samples = samples_offset;
     *got_frame_ptr = 1;
 
     return buf - avpkt->data;
diff --git a/libavcodec/adxenc.c b/libavcodec/adxenc.c
index e730811..05e3245 100644
--- a/libavcodec/adxenc.c
+++ b/libavcodec/adxenc.c
@@ -2,20 +2,20 @@
  * ADX ADPCM codecs
  * Copyright (c) 2001,2003 BERO
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -133,10 +133,8 @@ static int adx_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int ch, out_size, ret;
 
     out_size = BLOCK_SIZE * avctx->channels + !c->header_parsed * HEADER_SIZE;
-    if ((ret = ff_alloc_packet(avpkt, out_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
         return ret;
-    }
     dst = avpkt->data;
 
     if (!c->header_parsed) {
diff --git a/libavcodec/aic.c b/libavcodec/aic.c
index dac9d8b..00be08b 100644
--- a/libavcodec/aic.c
+++ b/libavcodec/aic.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2013 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -152,6 +152,7 @@ typedef struct AICContext {
     int16_t        *data_ptr[NUM_BANDS];
 
     DECLARE_ALIGNED(16, int16_t, block)[64];
+    DECLARE_ALIGNED(16, uint8_t, quant_matrix)[64];
 } AICContext;
 
 static int aic_decode_header(AICContext *ctx, const uint8_t *src, int size)
@@ -203,7 +204,8 @@ static int aic_decode_coeffs(GetBitContext *gb, int16_t *dst,
     int has_skips, coeff_type, coeff_bits, skip_type, skip_bits;
     const int num_coeffs = aic_num_band_coeffs[band];
     const uint8_t *scan = aic_scan[band | force_chroma];
-    int mb, idx, val;
+    int mb, idx;
+    unsigned val;
 
     has_skips  = get_bits1(gb);
     coeff_type = get_bits1(gb);
@@ -217,14 +219,14 @@ static int aic_decode_coeffs(GetBitContext *gb, int16_t *dst,
             idx = -1;
             do {
                 GET_CODE(val, skip_type, skip_bits);
-                if (val < 0)
+                if (val >= 0x10000)
                     return AVERROR_INVALIDDATA;
                 idx += val + 1;
                 if (idx >= num_coeffs)
                     break;
                 GET_CODE(val, coeff_type, coeff_bits);
                 val++;
-                if (val >= 0x10000 || val < 0)
+                if (val >= 0x10000)
                     return AVERROR_INVALIDDATA;
                 dst[scan[idx]] = val;
             } while (idx < num_coeffs - 1);
@@ -234,7 +236,7 @@ static int aic_decode_coeffs(GetBitContext *gb, int16_t *dst,
         for (mb = 0; mb < slice_width; mb++) {
             for (idx = 0; idx < num_coeffs; idx++) {
                 GET_CODE(val, coeff_type, coeff_bits);
-                if (val >= 0x10000 || val < 0)
+                if (val >= 0x10000)
                     return AVERROR_INVALIDDATA;
                 dst[scan[idx]] = val;
             }
@@ -286,7 +288,7 @@ static void recombine_block_il(int16_t *dst, const uint8_t *scan,
     }
 }
 
-static void unquant_block(int16_t *block, int q)
+static void unquant_block(int16_t *block, int q, uint8_t *quant_matrix)
 {
     int i;
 
@@ -294,7 +296,7 @@ static void unquant_block(int16_t *block, int q)
         int val  = (uint16_t)block[i];
         int sign = val & 1;
 
-        block[i] = (((val >> 1) ^ -sign) * q * aic_quant_matrix[i] >> 4)
+        block[i] = (((val >> 1) ^ -sign) * q * quant_matrix[i] >> 4)
                    + sign;
     }
 }
@@ -335,7 +337,7 @@ static int aic_decode_slice(AICContext *ctx, int mb_x, int mb_y,
             else
                 recombine_block_il(ctx->block, ctx->scantable.permutated,
                                    &base_y, &ext_y, blk);
-            unquant_block(ctx->block, ctx->quant);
+            unquant_block(ctx->block, ctx->quant, ctx->quant_matrix);
             ctx->idsp.idct(ctx->block);
 
             if (!ctx->interlaced) {
@@ -352,7 +354,7 @@ static int aic_decode_slice(AICContext *ctx, int mb_x, int mb_y,
         for (blk = 0; blk < 2; blk++) {
             recombine_block(ctx->block, ctx->scantable.permutated,
                             &base_c, &ext_c);
-            unquant_block(ctx->block, ctx->quant);
+            unquant_block(ctx->block, ctx->quant, ctx->quant_matrix);
             ctx->idsp.idct(ctx->block);
             ctx->idsp.put_signed_pixels_clamped(ctx->block, C[blk],
                                                 ctx->frame->linesize[blk + 1]);
@@ -430,6 +432,8 @@ static av_cold int aic_decode_init(AVCodecContext *avctx)
     for (i = 0; i < 64; i++)
         scan[i] = i;
     ff_init_scantable(ctx->idsp.idct_permutation, &ctx->scantable, scan);
+    for (i = 0; i < 64; i++)
+        ctx->quant_matrix[ctx->idsp.idct_permutation[i]] = aic_quant_matrix[i];
 
     ctx->mb_width  = FFALIGN(avctx->width,  16) >> 4;
     ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
diff --git a/libavcodec/alac.c b/libavcodec/alac.c
index 5272f84..103d14e 100644
--- a/libavcodec/alac.c
+++ b/libavcodec/alac.c
@@ -2,20 +2,20 @@
  * ALAC (Apple Lossless Audio Codec) decoder
  * Copyright (c) 2005 David Hammerton
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,8 +36,8 @@
  *  8bit  compatible version   (0)
  *  8bit  sample size
  *  8bit  history mult         (40)
- *  8bit  initial history      (14)
- *  8bit  rice param limit     (10)
+ *  8bit  initial history      (10)
+ *  8bit  rice param limit     (14)
  *  8bit  channels
  * 16bit  maxRun               (255)
  * 32bit  max coded frame size (0 means unknown)
@@ -52,6 +52,7 @@
 #include "get_bits.h"
 #include "bytestream.h"
 #include "internal.h"
+#include "thread.h"
 #include "unary.h"
 #include "mathops.h"
 #include "alac_data.h"
@@ -75,6 +76,8 @@ typedef struct {
 
     int extra_bits;     /**< number of extra bits beyond 16-bit */
     int nb_samples;     /**< number of samples in the current frame */
+
+    int direct_output;
 } ALACContext;
 
 static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
@@ -99,7 +102,7 @@ static inline unsigned int decode_scalar(GetBitContext *gb, int k, int bps)
     return x;
 }
 
-static void rice_decompress(ALACContext *alac, int32_t *output_buffer,
+static int rice_decompress(ALACContext *alac, int32_t *output_buffer,
                             int nb_samples, int bps, int rice_history_mult)
 {
     int i;
@@ -110,6 +113,9 @@ static void rice_decompress(ALACContext *alac, int32_t *output_buffer,
         int k;
         unsigned int x;
 
+        if(get_bits_left(&alac->gb) <= 0)
+            return -1;
+
         /* calculate rice param and decode next value */
         k = av_log2((history >> 9) + 3);
         k = FFMIN(k, alac->rice_limit);
@@ -150,6 +156,7 @@ static void rice_decompress(ALACContext *alac, int32_t *output_buffer,
             history = 0;
         }
     }
+    return 0;
 }
 
 static inline int sign_only(int v)
@@ -186,7 +193,7 @@ static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out,
     }
 
     /* read warm-up samples */
-    for (i = 1; i <= lpc_order; i++)
+    for (i = 1; i <= lpc_order && i < nb_samples; i++)
         buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps);
 
     /* NOTE: 4 and 8 are very common cases that could be optimized. */
@@ -265,7 +272,7 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
 
     alac->extra_bits = get_bits(&alac->gb, 2) << 3;
     bps = alac->sample_size - alac->extra_bits + channels - 1;
-    if (bps > 32) {
+    if (bps > 32U) {
         av_log(avctx, AV_LOG_ERROR, "bps is unsupported: %d\n", bps);
         return AVERROR_PATCHWELCOME;
     }
@@ -283,19 +290,18 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
         return AVERROR_INVALIDDATA;
     }
     if (!alac->nb_samples) {
+        ThreadFrame tframe = { .f = frame };
         /* get output buffer */
         frame->nb_samples = output_samples;
-        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
             return ret;
-        }
     } else if (output_samples != alac->nb_samples) {
         av_log(avctx, AV_LOG_ERROR, "sample count mismatch: %"PRIu32" != %d\n",
                output_samples, alac->nb_samples);
         return AVERROR_INVALIDDATA;
     }
     alac->nb_samples = output_samples;
-    if (alac->sample_size > 16) {
+    if (alac->direct_output) {
         for (ch = 0; ch < channels; ch++)
             alac->output_samples_buffer[ch] = (int32_t *)frame->extended_data[ch_index + ch];
     }
@@ -326,14 +332,18 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
 
         if (alac->extra_bits) {
             for (i = 0; i < alac->nb_samples; i++) {
+                if(get_bits_left(&alac->gb) <= 0)
+                    return -1;
                 for (ch = 0; ch < channels; ch++)
                     alac->extra_bits_buffer[ch][i] = get_bits(&alac->gb, alac->extra_bits);
             }
         }
         for (ch = 0; ch < channels; ch++) {
-            rice_decompress(alac, alac->predict_error_buffer[ch],
+            int ret=rice_decompress(alac, alac->predict_error_buffer[ch],
                             alac->nb_samples, bps,
                             rice_history_mult[ch] * alac->rice_history_mult / 4);
+            if(ret<0)
+                return ret;
 
             /* adaptive FIR filter */
             if (prediction_type[ch] == 15) {
@@ -358,6 +368,8 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
     } else {
         /* not compressed, easy case */
         for (i = 0; i < alac->nb_samples; i++) {
+            if(get_bits_left(&alac->gb) <= 0)
+                return -1;
             for (ch = 0; ch < channels; ch++) {
                 alac->output_samples_buffer[ch][i] =
                          get_sbits_long(&alac->gb, alac->sample_size);
@@ -378,6 +390,7 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
                           alac->extra_bits, channels, alac->nb_samples);
     }
 
+    if(av_sample_fmt_is_planar(avctx->sample_fmt)) {
     switch(alac->sample_size) {
     case 16: {
         for (ch = 0; ch < channels; ch++) {
@@ -393,6 +406,37 @@ static int decode_element(AVCodecContext *avctx, AVFrame *frame, int ch_index,
         }}
         break;
     }
+    }else{
+        switch(alac->sample_size) {
+        case 16: {
+            int16_t *outbuffer = ((int16_t *)frame->extended_data[0]) + ch_index;
+            for (i = 0; i < alac->nb_samples; i++) {
+                for (ch = 0; ch < channels; ch++)
+                    *outbuffer++ = alac->output_samples_buffer[ch][i];
+                outbuffer += alac->channels - channels;
+            }
+            }
+            break;
+        case 24: {
+            int32_t *outbuffer = ((int32_t *)frame->extended_data[0]) + ch_index;
+            for (i = 0; i < alac->nb_samples; i++) {
+                for (ch = 0; ch < channels; ch++)
+                    *outbuffer++ = alac->output_samples_buffer[ch][i] << 8;
+                outbuffer += alac->channels - channels;
+            }
+            }
+            break;
+        case 32: {
+            int32_t *outbuffer = ((int32_t *)frame->extended_data[0]) + ch_index;
+            for (i = 0; i < alac->nb_samples; i++) {
+                for (ch = 0; ch < channels; ch++)
+                    *outbuffer++ = alac->output_samples_buffer[ch][i];
+                outbuffer += alac->channels - channels;
+            }
+            }
+            break;
+        }
+    }
 
     return 0;
 }
@@ -406,7 +450,8 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data,
     int channels;
     int ch, ret, got_end;
 
-    init_get_bits(&alac->gb, avpkt->data, avpkt->size * 8);
+    if ((ret = init_get_bits8(&alac->gb, avpkt->data, avpkt->size)) < 0)
+        return ret;
 
     got_end = 0;
     alac->nb_samples = 0;
@@ -418,7 +463,7 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data,
             break;
         }
         if (element > TYPE_CPE && element != TYPE_LFE) {
-            av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d", element);
+            av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d\n", element);
             return AVERROR_PATCHWELCOME;
         }
 
@@ -447,7 +492,10 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data,
                avpkt->size * 8 - get_bits_count(&alac->gb));
     }
 
-    *got_frame_ptr = 1;
+    if (alac->channels == ch)
+        *got_frame_ptr = 1;
+    else
+        av_log(avctx, AV_LOG_WARNING, "Failed to decode all channels\n");
 
     return avpkt->size;
 }
@@ -459,7 +507,7 @@ static av_cold int alac_decode_close(AVCodecContext *avctx)
     int ch;
     for (ch = 0; ch < FFMIN(alac->channels, 2); ch++) {
         av_freep(&alac->predict_error_buffer[ch]);
-        if (alac->sample_size == 16)
+        if (!alac->direct_output)
             av_freep(&alac->output_samples_buffer[ch]);
         av_freep(&alac->extra_bits_buffer[ch]);
     }
@@ -476,7 +524,8 @@ static int allocate_buffers(ALACContext *alac)
         FF_ALLOC_OR_GOTO(alac->avctx, alac->predict_error_buffer[ch],
                          buf_size, buf_alloc_fail);
 
-        if (alac->sample_size == 16) {
+        alac->direct_output = alac->sample_size > 16 && av_sample_fmt_is_planar(alac->avctx->sample_fmt);
+        if (!alac->direct_output) {
             FF_ALLOC_OR_GOTO(alac->avctx, alac->output_samples_buffer[ch],
                              buf_size, buf_alloc_fail);
         }
@@ -524,24 +573,26 @@ static int alac_set_info(ALACContext *alac)
 static av_cold int alac_decode_init(AVCodecContext * avctx)
 {
     int ret;
+    int req_packed;
     ALACContext *alac = avctx->priv_data;
     alac->avctx = avctx;
 
     /* initialize from the extradata */
     if (alac->avctx->extradata_size < ALAC_EXTRADATA_SIZE) {
-        av_log(avctx, AV_LOG_ERROR, "alac: extradata is too small\n");
+        av_log(avctx, AV_LOG_ERROR, "extradata is too small\n");
         return AVERROR_INVALIDDATA;
     }
     if (alac_set_info(alac)) {
-        av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n");
+        av_log(avctx, AV_LOG_ERROR, "set_info failed\n");
         return -1;
     }
 
+    req_packed = LIBAVCODEC_VERSION_MAJOR < 55 && !av_sample_fmt_is_planar(avctx->request_sample_fmt);
     switch (alac->sample_size) {
-    case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
+    case 16: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_S16P;
              break;
     case 24:
-    case 32: avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+    case 32: avctx->sample_fmt = req_packed ? AV_SAMPLE_FMT_S32 : AV_SAMPLE_FMT_S32P;
              break;
     default: avpriv_request_sample(avctx, "Sample depth %d", alac->sample_size);
              return AVERROR_PATCHWELCOME;
@@ -557,7 +608,7 @@ static av_cold int alac_decode_init(AVCodecContext * avctx)
         else
             avctx->channels = alac->channels;
     }
-    if (avctx->channels > ALAC_MAX_CHANNELS) {
+    if (avctx->channels > ALAC_MAX_CHANNELS || avctx->channels <= 0 ) {
         av_log(avctx, AV_LOG_ERROR, "Unsupported channel count: %d\n",
                avctx->channels);
         return AVERROR_PATCHWELCOME;
@@ -572,6 +623,13 @@ static av_cold int alac_decode_init(AVCodecContext * avctx)
     return 0;
 }
 
+static int init_thread_copy(AVCodecContext *avctx)
+{
+    ALACContext *alac = avctx->priv_data;
+    alac->avctx = avctx;
+    return allocate_buffers(alac);
+}
+
 AVCodec ff_alac_decoder = {
     .name           = "alac",
     .long_name      = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
@@ -581,5 +639,6 @@ AVCodec ff_alac_decoder = {
     .init           = alac_decode_init,
     .close          = alac_decode_close,
     .decode         = alac_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/alac_data.c b/libavcodec/alac_data.c
index 9e13119..0bcb06c 100644
--- a/libavcodec/alac_data.c
+++ b/libavcodec/alac_data.c
@@ -1,20 +1,20 @@
 /*
  * ALAC encoder and decoder common data
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/alac_data.h b/libavcodec/alac_data.h
index ebb1f33..650d6dc 100644
--- a/libavcodec/alac_data.h
+++ b/libavcodec/alac_data.h
@@ -1,20 +1,20 @@
 /*
  * ALAC encoder and decoder common data
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c
index 401f26f..bc68a06 100644
--- a/libavcodec/alacenc.c
+++ b/libavcodec/alacenc.c
@@ -2,20 +2,20 @@
  * ALAC audio encoder
  * Copyright (c) 2008  Jaikrishnan Menon <realityman@gmx.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -274,7 +274,7 @@ static void alac_linear_predictor(AlacEncodeContext *s, int ch)
         // generate warm-up samples
         residual[0] = samples[0];
         for (i = 1; i <= lpc.lpc_order; i++)
-            residual[i] = samples[i] - samples[i-1];
+            residual[i] = sign_extend(samples[i] - samples[i-1], s->write_sample_size);
 
         // perform lpc on remaining samples
         for (i = lpc.lpc_order + 1; i < s->frame_size; i++) {
@@ -483,7 +483,6 @@ static av_cold int alac_encode_close(AVCodecContext *avctx)
     ff_lpc_end(&s->lpc_ctx);
     av_freep(&avctx->extradata);
     avctx->extradata_size = 0;
-    av_freep(&avctx->coded_frame);
     return 0;
 }
 
@@ -579,12 +578,6 @@ static av_cold int alac_encode_init(AVCodecContext *avctx)
         goto error;
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        ret = AVERROR(ENOMEM);
-        goto error;
-    }
-
     s->avctx = avctx;
 
     if ((ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
@@ -613,10 +606,8 @@ static int alac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     else
         max_frame_size = s->max_coded_frame_size;
 
-    if ((ret = ff_alloc_packet(avpkt, 2 * max_frame_size))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * max_frame_size)) < 0)
         return ret;
-    }
 
     /* use verbatim mode for compression_level 0 */
     if (s->compression_level) {
diff --git a/libavcodec/aliaspixdec.c b/libavcodec/aliaspixdec.c
index 8969e17..bdc4c72 100644
--- a/libavcodec/aliaspixdec.c
+++ b/libavcodec/aliaspixdec.c
@@ -2,20 +2,20 @@
  * Alias PIX image decoder
  * Copyright (C) 2014 Vittorio Giovara <vittorio.giovara@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aliaspixenc.c b/libavcodec/aliaspixenc.c
index 47e0612..1fcea08 100644
--- a/libavcodec/aliaspixenc.c
+++ b/libavcodec/aliaspixenc.c
@@ -2,20 +2,20 @@
  * Alias PIX image encoder
  * Copyright (C) 2014 Vittorio Giovara <vittorio.giovara@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index bd74e0b..7650543 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -2,20 +2,20 @@
  * Provide registration of all codecs, parsers and bitstream filters for libavcodec.
  * Copyright (c) 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -81,7 +81,9 @@ void avcodec_register_all(void)
     REGISTER_HWACCEL(H264_VDA,          h264_vda);
     REGISTER_HWACCEL(H264_VDA_OLD,      h264_vda_old);
     REGISTER_HWACCEL(H264_VDPAU,        h264_vdpau);
+    REGISTER_HWACCEL(MPEG1_XVMC,        mpeg1_xvmc);
     REGISTER_HWACCEL(MPEG1_VDPAU,       mpeg1_vdpau);
+    REGISTER_HWACCEL(MPEG2_XVMC,        mpeg2_xvmc);
     REGISTER_HWACCEL(MPEG2_DXVA2,       mpeg2_dxva2);
     REGISTER_HWACCEL(MPEG2_VAAPI,       mpeg2_vaapi);
     REGISTER_HWACCEL(MPEG2_VDPAU,       mpeg2_vdpau);
@@ -100,14 +102,18 @@ void avcodec_register_all(void)
     REGISTER_DECODER(AASC,              aasc);
     REGISTER_DECODER(AIC,               aic);
     REGISTER_ENCDEC (ALIAS_PIX,         alias_pix);
-    REGISTER_DECODER(AMV,               amv);
+    REGISTER_ENCDEC (AMV,               amv);
     REGISTER_DECODER(ANM,               anm);
     REGISTER_DECODER(ANSI,              ansi);
     REGISTER_ENCDEC (ASV1,              asv1);
     REGISTER_ENCDEC (ASV2,              asv2);
     REGISTER_DECODER(AURA,              aura);
     REGISTER_DECODER(AURA2,             aura2);
+    REGISTER_ENCDEC (AVRP,              avrp);
+    REGISTER_DECODER(AVRN,              avrn);
     REGISTER_DECODER(AVS,               avs);
+    REGISTER_ENCDEC (AVUI,              avui);
+    REGISTER_ENCDEC (AYUV,              ayuv);
     REGISTER_DECODER(BETHSOFTVID,       bethsoftvid);
     REGISTER_DECODER(BFI,               bfi);
     REGISTER_DECODER(BINK,              bink);
@@ -118,13 +124,15 @@ void avcodec_register_all(void)
     REGISTER_DECODER(CAVS,              cavs);
     REGISTER_DECODER(CDGRAPHICS,        cdgraphics);
     REGISTER_DECODER(CDXL,              cdxl);
-    REGISTER_DECODER(CINEPAK,           cinepak);
+    REGISTER_ENCDEC (CINEPAK,           cinepak);
     REGISTER_ENCDEC (CLJR,              cljr);
     REGISTER_DECODER(CLLC,              cllc);
     REGISTER_ENCDEC (COMFORTNOISE,      comfortnoise);
+    REGISTER_DECODER(CPIA,              cpia);
     REGISTER_DECODER(CSCD,              cscd);
     REGISTER_DECODER(CYUV,              cyuv);
     REGISTER_DECODER(DFA,               dfa);
+    REGISTER_DECODER(DIRAC,             dirac);
     REGISTER_ENCDEC (DNXHD,             dnxhd);
     REGISTER_ENCDEC (DPX,               dpx);
     REGISTER_DECODER(DSICINVIDEO,       dsicinvideo);
@@ -146,7 +154,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (FFVHUFF,           ffvhuff);
     REGISTER_DECODER(FIC,               fic);
     REGISTER_ENCDEC (FLASHSV,           flashsv);
-    REGISTER_DECODER(FLASHSV2,          flashsv2);
+    REGISTER_ENCDEC (FLASHSV2,          flashsv2);
     REGISTER_DECODER(FLIC,              flic);
     REGISTER_ENCDEC (FLV,               flv);
     REGISTER_DECODER(FOURXM,            fourxm);
@@ -157,8 +165,11 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (H261,              h261);
     REGISTER_ENCDEC (H263,              h263);
     REGISTER_DECODER(H263I,             h263i);
-    REGISTER_ENCODER(H263P,             h263p);
+    REGISTER_ENCDEC (H263P,             h263p);
     REGISTER_DECODER(H264,              h264);
+    REGISTER_DECODER(H264_CRYSTALHD,    h264_crystalhd);
+    REGISTER_DECODER(H264_VDA,          h264_vda);
+    REGISTER_DECODER(H264_VDPAU,        h264_vdpau);
     REGISTER_DECODER(HEVC,              hevc);
     REGISTER_DECODER(HNM4_VIDEO,        hnm4_video);
     REGISTER_ENCDEC (HUFFYUV,           huffyuv);
@@ -170,7 +181,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(INDEO4,            indeo4);
     REGISTER_DECODER(INDEO5,            indeo5);
     REGISTER_DECODER(INTERPLAY_VIDEO,   interplay_video);
-    REGISTER_DECODER(JPEG2000,          jpeg2000);
+    REGISTER_ENCDEC (JPEG2000,          jpeg2000);
     REGISTER_ENCDEC (JPEGLS,            jpegls);
     REGISTER_DECODER(JV,                jv);
     REGISTER_DECODER(KGV1,              kgv1);
@@ -190,14 +201,21 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (MPEG1VIDEO,        mpeg1video);
     REGISTER_ENCDEC (MPEG2VIDEO,        mpeg2video);
     REGISTER_ENCDEC (MPEG4,             mpeg4);
+    REGISTER_DECODER(MPEG4_CRYSTALHD,   mpeg4_crystalhd);
+    REGISTER_DECODER(MPEG4_VDPAU,       mpeg4_vdpau);
+    REGISTER_DECODER(MPEGVIDEO,         mpegvideo);
+    REGISTER_DECODER(MPEG_VDPAU,        mpeg_vdpau);
+    REGISTER_DECODER(MPEG1_VDPAU,       mpeg1_vdpau);
+    REGISTER_DECODER(MPEG2_CRYSTALHD,   mpeg2_crystalhd);
     REGISTER_DECODER(MSA1,              msa1);
+    REGISTER_DECODER(MSMPEG4_CRYSTALHD, msmpeg4_crystalhd);
     REGISTER_DECODER(MSMPEG4V1,         msmpeg4v1);
     REGISTER_ENCDEC (MSMPEG4V2,         msmpeg4v2);
     REGISTER_ENCDEC (MSMPEG4V3,         msmpeg4v3);
     REGISTER_DECODER(MSRLE,             msrle);
     REGISTER_DECODER(MSS1,              mss1);
     REGISTER_DECODER(MSS2,              mss2);
-    REGISTER_DECODER(MSVIDEO1,          msvideo1);
+    REGISTER_ENCDEC (MSVIDEO1,          msvideo1);
     REGISTER_DECODER(MSZH,              mszh);
     REGISTER_DECODER(MTS2,              mts2);
     REGISTER_DECODER(MVC1,              mvc1);
@@ -214,12 +232,15 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (PNG,               png);
     REGISTER_ENCDEC (PPM,               ppm);
     REGISTER_ENCDEC (PRORES,            prores);
+    REGISTER_ENCODER(PRORES_AW,         prores_aw);
+    REGISTER_ENCODER(PRORES_KS,         prores_ks);
+    REGISTER_DECODER(PRORES_LGPL,       prores_lgpl);
     REGISTER_DECODER(PTX,               ptx);
     REGISTER_DECODER(QDRAW,             qdraw);
     REGISTER_DECODER(QPEG,              qpeg);
     REGISTER_ENCDEC (QTRLE,             qtrle);
-    REGISTER_DECODER(R10K,              r10k);
-    REGISTER_DECODER(R210,              r210);
+    REGISTER_ENCDEC (R10K,              r10k);
+    REGISTER_ENCDEC (R210,              r210);
     REGISTER_ENCDEC (RAWVIDEO,          rawvideo);
     REGISTER_DECODER(RL2,               rl2);
     REGISTER_ENCDEC (ROQ,               roq);
@@ -228,19 +249,21 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (RV20,              rv20);
     REGISTER_DECODER(RV30,              rv30);
     REGISTER_DECODER(RV40,              rv40);
-    REGISTER_DECODER(S302M,             s302m);
+    REGISTER_ENCDEC (S302M,             s302m);
     REGISTER_DECODER(SANM,              sanm);
     REGISTER_ENCDEC (SGI,               sgi);
     REGISTER_DECODER(SGIRLE,            sgirle);
     REGISTER_DECODER(SMACKER,           smacker);
     REGISTER_DECODER(SMC,               smc);
+    REGISTER_DECODER(SMVJPEG,           smvjpeg);
+    REGISTER_ENCDEC (SNOW,              snow);
     REGISTER_DECODER(SP5X,              sp5x);
     REGISTER_ENCDEC (SUNRAST,           sunrast);
     REGISTER_ENCDEC (SVQ1,              svq1);
     REGISTER_DECODER(SVQ3,              svq3);
     REGISTER_ENCDEC (TARGA,             targa);
+    REGISTER_DECODER(TARGA_Y216,        targa_y216);
     REGISTER_DECODER(THEORA,            theora);
-    REGISTER_ENCODER(LIBTWOLAME,        libtwolame);
     REGISTER_DECODER(THP,               thp);
     REGISTER_DECODER(TIERTEXSEQVIDEO,   tiertexseqvideo);
     REGISTER_ENCDEC (TIFF,              tiff);
@@ -254,10 +277,14 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (UTVIDEO,           utvideo);
     REGISTER_ENCDEC (V210,              v210);
     REGISTER_DECODER(V210X,             v210x);
+    REGISTER_ENCDEC (V308,              v308);
+    REGISTER_ENCDEC (V408,              v408);
     REGISTER_ENCDEC (V410,              v410);
     REGISTER_DECODER(VB,                vb);
     REGISTER_DECODER(VBLE,              vble);
     REGISTER_DECODER(VC1,               vc1);
+    REGISTER_DECODER(VC1_CRYSTALHD,     vc1_crystalhd);
+    REGISTER_DECODER(VC1_VDPAU,         vc1_vdpau);
     REGISTER_DECODER(VC1IMAGE,          vc1image);
     REGISTER_DECODER(VCR1,              vcr1);
     REGISTER_DECODER(VMDVIDEO,          vmdvideo);
@@ -275,14 +302,20 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (WMV1,              wmv1);
     REGISTER_ENCDEC (WMV2,              wmv2);
     REGISTER_DECODER(WMV3,              wmv3);
+    REGISTER_DECODER(WMV3_CRYSTALHD,    wmv3_crystalhd);
+    REGISTER_DECODER(WMV3_VDPAU,        wmv3_vdpau);
     REGISTER_DECODER(WMV3IMAGE,         wmv3image);
     REGISTER_DECODER(WNV1,              wnv1);
     REGISTER_DECODER(XAN_WC3,           xan_wc3);
     REGISTER_DECODER(XAN_WC4,           xan_wc4);
     REGISTER_ENCDEC (XBM,               xbm);
+    REGISTER_ENCDEC (XFACE,             xface);
     REGISTER_DECODER(XL,                xl);
     REGISTER_ENCDEC (XWD,               xwd);
+    REGISTER_ENCDEC (Y41P,              y41p);
     REGISTER_DECODER(YOP,               yop);
+    REGISTER_ENCDEC (YUV4,              yuv4);
+    REGISTER_DECODER(ZERO12V,           zero12v);
     REGISTER_DECODER(ZEROCODEC,         zerocodec);
     REGISTER_ENCDEC (ZLIB,              zlib);
     REGISTER_ENCDEC (ZMBV,              zmbv);
@@ -291,7 +324,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (AAC,               aac);
     REGISTER_DECODER(AAC_LATM,          aac_latm);
     REGISTER_ENCDEC (AC3,               ac3);
-    REGISTER_ENCODER(AC3_FIXED,         ac3_fixed);
+    REGISTER_ENCDEC (AC3_FIXED,         ac3_fixed);
     REGISTER_ENCDEC (ALAC,              alac);
     REGISTER_DECODER(ALS,               als);
     REGISTER_DECODER(AMRNB,             amrnb);
@@ -304,11 +337,18 @@ void avcodec_register_all(void)
     REGISTER_DECODER(BINKAUDIO_RDFT,    binkaudio_rdft);
     REGISTER_DECODER(BMV_AUDIO,         bmv_audio);
     REGISTER_DECODER(COOK,              cook);
-    REGISTER_DECODER(DCA,               dca);
+    REGISTER_ENCDEC (DCA,               dca);
+    REGISTER_DECODER(DSD_LSBF,          dsd_lsbf);
+    REGISTER_DECODER(DSD_MSBF,          dsd_msbf);
+    REGISTER_DECODER(DSD_LSBF_PLANAR,   dsd_lsbf_planar);
+    REGISTER_DECODER(DSD_MSBF_PLANAR,   dsd_msbf_planar);
     REGISTER_DECODER(DSICINAUDIO,       dsicinaudio);
     REGISTER_ENCDEC (EAC3,              eac3);
+    REGISTER_DECODER(EVRC,              evrc);
+    REGISTER_DECODER(FFWAVESYNTH,       ffwavesynth);
     REGISTER_ENCDEC (FLAC,              flac);
-    REGISTER_DECODER(G723_1,            g723_1);
+    REGISTER_ENCDEC (G723_1,            g723_1);
+    REGISTER_DECODER(G729,              g729);
     REGISTER_DECODER(GSM,               gsm);
     REGISTER_DECODER(GSM_MS,            gsm_ms);
     REGISTER_DECODER(IAC,               iac);
@@ -321,6 +361,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(MP1FLOAT,          mp1float);
     REGISTER_ENCDEC (MP2,               mp2);
     REGISTER_DECODER(MP2FLOAT,          mp2float);
+    REGISTER_ENCODER(MP2FIXED,          mp2fixed);
     REGISTER_DECODER(MP3,               mp3);
     REGISTER_DECODER(MP3FLOAT,          mp3float);
     REGISTER_DECODER(MP3ADU,            mp3adu);
@@ -341,14 +382,16 @@ void avcodec_register_all(void)
     REGISTER_DECODER(SHORTEN,           shorten);
     REGISTER_DECODER(SIPR,              sipr);
     REGISTER_DECODER(SMACKAUD,          smackaud);
+    REGISTER_ENCDEC (SONIC,             sonic);
+    REGISTER_ENCODER(SONIC_LS,          sonic_ls);
     REGISTER_DECODER(TAK,               tak);
     REGISTER_DECODER(TRUEHD,            truehd);
     REGISTER_DECODER(TRUESPEECH,        truespeech);
-    REGISTER_DECODER(TTA,               tta);
+    REGISTER_ENCDEC (TTA,               tta);
     REGISTER_DECODER(TWINVQ,            twinvq);
     REGISTER_DECODER(VMDAUDIO,          vmdaudio);
     REGISTER_ENCDEC (VORBIS,            vorbis);
-    REGISTER_DECODER(WAVPACK,           wavpack);
+    REGISTER_ENCDEC (WAVPACK,           wavpack);
     REGISTER_DECODER(WMALOSSLESS,       wmalossless);
     REGISTER_DECODER(WMAPRO,            wmapro);
     REGISTER_ENCDEC (WMAV1,             wmav1);
@@ -367,17 +410,18 @@ void avcodec_register_all(void)
     REGISTER_DECODER(PCM_LXF,           pcm_lxf);
     REGISTER_ENCDEC (PCM_MULAW,         pcm_mulaw);
     REGISTER_ENCDEC (PCM_S8,            pcm_s8);
-    REGISTER_DECODER(PCM_S8_PLANAR,     pcm_s8_planar);
+    REGISTER_ENCDEC (PCM_S8_PLANAR,     pcm_s8_planar);
     REGISTER_ENCDEC (PCM_S16BE,         pcm_s16be);
+    REGISTER_ENCDEC (PCM_S16BE_PLANAR,  pcm_s16be_planar);
     REGISTER_ENCDEC (PCM_S16LE,         pcm_s16le);
-    REGISTER_DECODER(PCM_S16LE_PLANAR,  pcm_s16le_planar);
+    REGISTER_ENCDEC (PCM_S16LE_PLANAR,  pcm_s16le_planar);
     REGISTER_ENCDEC (PCM_S24BE,         pcm_s24be);
     REGISTER_ENCDEC (PCM_S24DAUD,       pcm_s24daud);
     REGISTER_ENCDEC (PCM_S24LE,         pcm_s24le);
-    REGISTER_DECODER(PCM_S24LE_PLANAR,  pcm_s24le_planar);
+    REGISTER_ENCDEC (PCM_S24LE_PLANAR,  pcm_s24le_planar);
     REGISTER_ENCDEC (PCM_S32BE,         pcm_s32be);
     REGISTER_ENCDEC (PCM_S32LE,         pcm_s32le);
-    REGISTER_DECODER(PCM_S32LE_PLANAR,  pcm_s32le_planar);
+    REGISTER_ENCDEC (PCM_S32LE_PLANAR,  pcm_s32le_planar);
     REGISTER_ENCDEC (PCM_U8,            pcm_u8);
     REGISTER_ENCDEC (PCM_U16BE,         pcm_u16be);
     REGISTER_ENCDEC (PCM_U16LE,         pcm_u16le);
@@ -385,7 +429,7 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (PCM_U24LE,         pcm_u24le);
     REGISTER_ENCDEC (PCM_U32BE,         pcm_u32be);
     REGISTER_ENCDEC (PCM_U32LE,         pcm_u32le);
-    REGISTER_DECODER(PCM_ZORK ,         pcm_zork);
+    REGISTER_DECODER(PCM_ZORK,          pcm_zork);
 
     /* DPCM codecs */
     REGISTER_DECODER(INTERPLAY_DPCM,    interplay_dpcm);
@@ -396,7 +440,9 @@ void avcodec_register_all(void)
     /* ADPCM codecs */
     REGISTER_DECODER(ADPCM_4XM,         adpcm_4xm);
     REGISTER_ENCDEC (ADPCM_ADX,         adpcm_adx);
+    REGISTER_DECODER(ADPCM_AFC,         adpcm_afc);
     REGISTER_DECODER(ADPCM_CT,          adpcm_ct);
+    REGISTER_DECODER(ADPCM_DTK,         adpcm_dtk);
     REGISTER_DECODER(ADPCM_EA,          adpcm_ea);
     REGISTER_DECODER(ADPCM_EA_MAXIS_XA, adpcm_ea_maxis_xa);
     REGISTER_DECODER(ADPCM_EA_R1,       adpcm_ea_r1);
@@ -405,6 +451,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(ADPCM_EA_XAS,      adpcm_ea_xas);
     REGISTER_ENCDEC (ADPCM_G722,        adpcm_g722);
     REGISTER_ENCDEC (ADPCM_G726,        adpcm_g726);
+    REGISTER_DECODER(ADPCM_G726LE,      adpcm_g726le);
     REGISTER_DECODER(ADPCM_IMA_AMV,     adpcm_ima_amv);
     REGISTER_DECODER(ADPCM_IMA_APC,     adpcm_ima_apc);
     REGISTER_DECODER(ADPCM_IMA_DK3,     adpcm_ima_dk3);
@@ -412,7 +459,9 @@ void avcodec_register_all(void)
     REGISTER_DECODER(ADPCM_IMA_EA_EACS, adpcm_ima_ea_eacs);
     REGISTER_DECODER(ADPCM_IMA_EA_SEAD, adpcm_ima_ea_sead);
     REGISTER_DECODER(ADPCM_IMA_ISS,     adpcm_ima_iss);
+    REGISTER_DECODER(ADPCM_IMA_OKI,     adpcm_ima_oki);
     REGISTER_ENCDEC (ADPCM_IMA_QT,      adpcm_ima_qt);
+    REGISTER_DECODER(ADPCM_IMA_RAD,     adpcm_ima_rad);
     REGISTER_DECODER(ADPCM_IMA_SMJPEG,  adpcm_ima_smjpeg);
     REGISTER_ENCDEC (ADPCM_IMA_WAV,     adpcm_ima_wav);
     REGISTER_DECODER(ADPCM_IMA_WS,      adpcm_ima_ws);
@@ -425,16 +474,32 @@ void avcodec_register_all(void)
     REGISTER_DECODER(ADPCM_VIMA,        adpcm_vima);
     REGISTER_DECODER(ADPCM_XA,          adpcm_xa);
     REGISTER_ENCDEC (ADPCM_YAMAHA,      adpcm_yamaha);
+    REGISTER_DECODER(VIMA,              vima);
 
     /* subtitles */
+    REGISTER_ENCDEC (SSA,               ssa);
     REGISTER_ENCDEC (ASS,               ass);
     REGISTER_ENCDEC (DVBSUB,            dvbsub);
     REGISTER_ENCDEC (DVDSUB,            dvdsub);
+    REGISTER_DECODER(JACOSUB,           jacosub);
+    REGISTER_DECODER(MICRODVD,          microdvd);
+    REGISTER_ENCDEC (MOVTEXT,           movtext);
+    REGISTER_DECODER(MPL2,              mpl2);
     REGISTER_DECODER(PGSSUB,            pgssub);
-    REGISTER_DECODER(SRT,               srt);
+    REGISTER_DECODER(PJS,               pjs);
+    REGISTER_DECODER(REALTEXT,          realtext);
+    REGISTER_DECODER(SAMI,              sami);
+    REGISTER_ENCDEC (SRT,               srt);
+    REGISTER_ENCDEC (SUBRIP,            subrip);
+    REGISTER_DECODER(SUBVIEWER,         subviewer);
+    REGISTER_DECODER(SUBVIEWER1,        subviewer1);
+    REGISTER_DECODER(TEXT,              text);
+    REGISTER_DECODER(VPLAYER,           vplayer);
+    REGISTER_ENCDEC (WEBVTT,            webvtt);
     REGISTER_ENCDEC (XSUB,              xsub);
 
     /* external libraries */
+    REGISTER_DECODER(LIBCELT,           libcelt);
     REGISTER_ENCODER(LIBFAAC,           libfaac);
     REGISTER_ENCDEC (LIBFDK_AAC,        libfdk_aac);
     REGISTER_ENCDEC (LIBGSM,            libgsm);
@@ -446,19 +511,31 @@ void avcodec_register_all(void)
     REGISTER_ENCDEC (LIBOPENJPEG,       libopenjpeg);
     REGISTER_ENCDEC (LIBOPUS,           libopus);
     REGISTER_ENCDEC (LIBSCHROEDINGER,   libschroedinger);
+    REGISTER_ENCODER(LIBSHINE,          libshine);
     REGISTER_ENCDEC (LIBSPEEX,          libspeex);
+    REGISTER_DECODER(LIBSTAGEFRIGHT_H264, libstagefright_h264);
     REGISTER_ENCODER(LIBTHEORA,         libtheora);
+    REGISTER_ENCODER(LIBTWOLAME,        libtwolame);
+    REGISTER_ENCDEC (LIBUTVIDEO,        libutvideo);
     REGISTER_ENCODER(LIBVO_AACENC,      libvo_aacenc);
     REGISTER_ENCODER(LIBVO_AMRWBENC,    libvo_amrwbenc);
-    REGISTER_ENCODER(LIBVORBIS,         libvorbis);
+    REGISTER_ENCDEC (LIBVORBIS,         libvorbis);
     REGISTER_ENCDEC (LIBVPX_VP8,        libvpx_vp8);
     REGISTER_ENCDEC (LIBVPX_VP9,        libvpx_vp9);
     REGISTER_ENCODER(LIBWAVPACK,        libwavpack);
     REGISTER_ENCODER(LIBWEBP,           libwebp);
     REGISTER_ENCODER(LIBX264,           libx264);
+    REGISTER_ENCODER(LIBX264RGB,        libx264rgb);
     REGISTER_ENCODER(LIBX265,           libx265);
     REGISTER_ENCODER(LIBXAVS,           libxavs);
     REGISTER_ENCODER(LIBXVID,           libxvid);
+    REGISTER_DECODER(LIBZVBI_TELETEXT,  libzvbi_teletext);
+    REGISTER_ENCODER(LIBAACPLUS,        libaacplus);
+
+    /* text */
+    REGISTER_DECODER(BINTEXT,           bintext);
+    REGISTER_DECODER(XBIN,              xbin);
+    REGISTER_DECODER(IDF,               idf);
 
     /* parsers */
     REGISTER_PARSER(AAC,                aac);
@@ -471,8 +548,10 @@ void avcodec_register_all(void)
     REGISTER_PARSER(DCA,                dca);
     REGISTER_PARSER(DIRAC,              dirac);
     REGISTER_PARSER(DNXHD,              dnxhd);
+    REGISTER_PARSER(DPX,                dpx);
     REGISTER_PARSER(DVBSUB,             dvbsub);
     REGISTER_PARSER(DVDSUB,             dvdsub);
+    REGISTER_PARSER(DVD_NAV,            dvd_nav);
     REGISTER_PARSER(FLAC,               flac);
     REGISTER_PARSER(GSM,                gsm);
     REGISTER_PARSER(H261,               h261);
@@ -494,6 +573,7 @@ void avcodec_register_all(void)
     REGISTER_PARSER(VORBIS,             vorbis);
     REGISTER_PARSER(VP3,                vp3);
     REGISTER_PARSER(VP8,                vp8);
+    REGISTER_PARSER(VP9,                vp9);
 
     /* bitstream filters */
     REGISTER_BSF(AAC_ADTSTOASC,         aac_adtstoasc);
@@ -503,6 +583,7 @@ void avcodec_register_all(void)
     REGISTER_BSF(IMX_DUMP_HEADER,       imx_dump_header);
     REGISTER_BSF(MJPEG2JPEG,            mjpeg2jpeg);
     REGISTER_BSF(MJPEGA_DUMP_HEADER,    mjpega_dump_header);
+    REGISTER_BSF(MP3_HEADER_DECOMPRESS, mp3_header_decompress);
     REGISTER_BSF(MOV2TEXTSUB,           mov2textsub);
     REGISTER_BSF(NOISE,                 noise);
     REGISTER_BSF(REMOVE_EXTRADATA,      remove_extradata);
diff --git a/libavcodec/alpha/Makefile b/libavcodec/alpha/Makefile
new file mode 100644
index 0000000..796d976
--- /dev/null
+++ b/libavcodec/alpha/Makefile
@@ -0,0 +1,10 @@
+OBJS-$(CONFIG_BLOCKDSP)                 += alpha/blockdsp_alpha.o
+OBJS-$(CONFIG_ME_CMP)                   += alpha/me_cmp_alpha.o         \
+                                           alpha/me_cmp_mvi_asm.o
+OBJS-$(CONFIG_HPELDSP)                  += alpha/hpeldsp_alpha.o        \
+                                           alpha/hpeldsp_alpha_asm.o
+OBJS-$(CONFIG_IDCTDSP)                  += alpha/idctdsp_alpha.o        \
+                                           alpha/idctdsp_alpha_asm.o    \
+                                           alpha/simple_idct_alpha.o
+OBJS-$(CONFIG_MPEGVIDEO)                += alpha/mpegvideo_alpha.o
+OBJS-$(CONFIG_PIXBLOCKDSP)              += alpha/pixblockdsp_alpha.o
diff --git a/libavcodec/alpha/asm.h b/libavcodec/alpha/asm.h
new file mode 100644
index 0000000..827721e
--- /dev/null
+++ b/libavcodec/alpha/asm.h
@@ -0,0 +1,186 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ALPHA_ASM_H
+#define AVCODEC_ALPHA_ASM_H
+
+#include <inttypes.h>
+
+#include "libavutil/common.h"
+
+#if AV_GCC_VERSION_AT_LEAST(2,96)
+# define likely(x)      __builtin_expect((x) != 0, 1)
+# define unlikely(x)    __builtin_expect((x) != 0, 0)
+#else
+# define likely(x)      (x)
+# define unlikely(x)    (x)
+#endif
+
+#define AMASK_BWX (1 << 0)
+#define AMASK_FIX (1 << 1)
+#define AMASK_CIX (1 << 2)
+#define AMASK_MVI (1 << 8)
+
+static inline uint64_t BYTE_VEC(uint64_t x)
+{
+    x |= x <<  8;
+    x |= x << 16;
+    x |= x << 32;
+    return x;
+}
+static inline uint64_t WORD_VEC(uint64_t x)
+{
+    x |= x << 16;
+    x |= x << 32;
+    return x;
+}
+
+#define sextw(x) ((int16_t) (x))
+
+#ifdef __GNUC__
+#define ldq(p)                                                  \
+    (((const union {                                            \
+        uint64_t __l;                                           \
+        __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)];  \
+    } *) (p))->__l)
+#define ldl(p)                                                  \
+    (((const union {                                            \
+        int32_t __l;                                            \
+        __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)];   \
+    } *) (p))->__l)
+#define stq(l, p)                                                       \
+    do {                                                                \
+        (((union {                                                      \
+            uint64_t __l;                                               \
+            __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)];      \
+        } *) (p))->__l) = l;                                            \
+    } while (0)
+#define stl(l, p)                                                       \
+    do {                                                                \
+        (((union {                                                      \
+            int32_t __l;                                                \
+            __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)];       \
+        } *) (p))->__l) = l;                                            \
+    } while (0)
+struct unaligned_long { uint64_t l; } __attribute__((packed));
+#define ldq_u(p)        (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
+#define uldq(a)         (((const struct unaligned_long *) (a))->l)
+
+#if AV_GCC_VERSION_AT_LEAST(3,3)
+#define prefetch(p)     __builtin_prefetch((p), 0, 1)
+#define prefetch_en(p)  __builtin_prefetch((p), 0, 0)
+#define prefetch_m(p)   __builtin_prefetch((p), 1, 1)
+#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
+#define cmpbge          __builtin_alpha_cmpbge
+/* Avoid warnings.  */
+#define extql(a, b)     __builtin_alpha_extql(a, (uint64_t) (b))
+#define extwl(a, b)     __builtin_alpha_extwl(a, (uint64_t) (b))
+#define extqh(a, b)     __builtin_alpha_extqh(a, (uint64_t) (b))
+#define zap             __builtin_alpha_zap
+#define zapnot          __builtin_alpha_zapnot
+#define amask           __builtin_alpha_amask
+#define implver         __builtin_alpha_implver
+#define rpcc            __builtin_alpha_rpcc
+#else
+#define prefetch(p)     __asm__ volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_en(p)  __asm__ volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_m(p)   __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
+#define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extql(a, b)  ({ uint64_t __r; __asm__ ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extwl(a, b)  ({ uint64_t __r; __asm__ ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define extqh(a, b)  ({ uint64_t __r; __asm__ ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define zap(a, b)    ({ uint64_t __r; __asm__ ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
+#define amask(a)     ({ uint64_t __r; __asm__ ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));           __r; })
+#define implver()    ({ uint64_t __r; __asm__ ("implver %0"         : "=r" (__r));                       __r; })
+#define rpcc()       ({ uint64_t __r; __asm__ volatile ("rpcc %0"   : "=r" (__r));                       __r; })
+#endif
+#define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory")
+
+#if AV_GCC_VERSION_AT_LEAST(3,3) && defined(__alpha_max__)
+#define minub8  __builtin_alpha_minub8
+#define minsb8  __builtin_alpha_minsb8
+#define minuw4  __builtin_alpha_minuw4
+#define minsw4  __builtin_alpha_minsw4
+#define maxub8  __builtin_alpha_maxub8
+#define maxsb8  __builtin_alpha_maxsb8
+#define maxuw4  __builtin_alpha_maxuw4
+#define maxsw4  __builtin_alpha_maxsw4
+#define perr    __builtin_alpha_perr
+#define pklb    __builtin_alpha_pklb
+#define pkwb    __builtin_alpha_pkwb
+#define unpkbl  __builtin_alpha_unpkbl
+#define unpkbw  __builtin_alpha_unpkbw
+#else
+#define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
+#define perr(a, b)   ({ uint64_t __r; __asm__ (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
+#define pklb(a)      ({ uint64_t __r; __asm__ (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define pkwb(a)      ({ uint64_t __r; __asm__ (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define unpkbl(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define unpkbw(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#endif
+
+#elif defined(__DECC)           /* Digital/Compaq/hp "ccc" compiler */
+
+#include <c_asm.h>
+#define ldq(p) (*(const uint64_t *) (p))
+#define ldl(p) (*(const int32_t *)  (p))
+#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
+#define stl(l, p) do { *(int32_t *)  (p) = (l); } while (0)
+#define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a)
+#define uldq(a)      (*(const __unaligned uint64_t *) (a))
+#define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b)
+#define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b)
+#define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b)
+#define extqh(a, b)  asm ("extqh   %a0,%a1,%v0", a, b)
+#define zap(a, b)    asm ("zap     %a0,%a1,%v0", a, b)
+#define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b)
+#define amask(a)     asm ("amask   %a0,%v0", a)
+#define implver()    asm ("implver %v0")
+#define rpcc()       asm ("rpcc           %v0")
+#define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b)
+#define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b)
+#define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b)
+#define minsw4(a, b) asm ("minsw4  %a0,%a1,%v0", a, b)
+#define maxub8(a, b) asm ("maxub8  %a0,%a1,%v0", a, b)
+#define maxsb8(a, b) asm ("maxsb8  %a0,%a1,%v0", a, b)
+#define maxuw4(a, b) asm ("maxuw4  %a0,%a1,%v0", a, b)
+#define maxsw4(a, b) asm ("maxsw4  %a0,%a1,%v0", a, b)
+#define perr(a, b)   asm ("perr    %a0,%a1,%v0", a, b)
+#define pklb(a)      asm ("pklb    %a0,%v0", a)
+#define pkwb(a)      asm ("pkwb    %a0,%v0", a)
+#define unpkbl(a)    asm ("unpkbl  %a0,%v0", a)
+#define unpkbw(a)    asm ("unpkbw  %a0,%v0", a)
+#define wh64(a)      asm ("wh64    %a0", a)
+
+#else
+#error "Unknown compiler!"
+#endif
+
+#endif /* AVCODEC_ALPHA_ASM_H */
diff --git a/libavcodec/alpha/blockdsp_alpha.c b/libavcodec/alpha/blockdsp_alpha.c
new file mode 100644
index 0000000..ded439d
--- /dev/null
+++ b/libavcodec/alpha/blockdsp_alpha.c
@@ -0,0 +1,51 @@
+/*
+ * Alpha optimised block operations
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/blockdsp.h"
+#include "asm.h"
+
+static void clear_blocks_axp(int16_t *blocks) {
+    uint64_t *p = (uint64_t *) blocks;
+    int n = sizeof(int16_t) * 6 * 64;
+
+    do {
+        p[0] = 0;
+        p[1] = 0;
+        p[2] = 0;
+        p[3] = 0;
+        p[4] = 0;
+        p[5] = 0;
+        p[6] = 0;
+        p[7] = 0;
+        p += 8;
+        n -= 8 * 8;
+    } while (n);
+}
+
+av_cold void ff_blockdsp_init_alpha(BlockDSPContext *c, unsigned high_bit_depth)
+{
+    if (!high_bit_depth) {
+        c->clear_blocks = clear_blocks_axp;
+    }
+}
diff --git a/libavcodec/alpha/hpeldsp_alpha.c b/libavcodec/alpha/hpeldsp_alpha.c
new file mode 100644
index 0000000..8d54807
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha.c
@@ -0,0 +1,213 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/hpeldsp.h"
+#include "hpeldsp_alpha.h"
+#include "asm.h"
+
+static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
+{
+    return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+static inline uint64_t avg2(uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+#if 0
+/* The XY2 routines basically utilize this scheme, but reuse parts in
+   each iteration.  */
+static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
+{
+    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+    uint64_t r2 = ((  (l1 & BYTE_VEC(0x03))
+                    + (l2 & BYTE_VEC(0x03))
+                    + (l3 & BYTE_VEC(0x03))
+                    + (l4 & BYTE_VEC(0x03))
+                    + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+    return r1 + r2;
+}
+#endif
+
+#define OP(LOAD, STORE)                         \
+    do {                                        \
+        STORE(LOAD(pixels), block);             \
+        pixels += line_size;                    \
+        block += line_size;                     \
+    } while (--h)
+
+#define OP_X2(LOAD, STORE)                                      \
+    do {                                                        \
+        uint64_t pix1, pix2;                                    \
+                                                                \
+        pix1 = LOAD(pixels);                                    \
+        pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);        \
+        STORE(AVG2(pix1, pix2), block);                         \
+        pixels += line_size;                                    \
+        block += line_size;                                     \
+    } while (--h)
+
+#define OP_Y2(LOAD, STORE)                      \
+    do {                                        \
+        uint64_t pix = LOAD(pixels);            \
+        do {                                    \
+            uint64_t next_pix;                  \
+                                                \
+            pixels += line_size;                \
+            next_pix = LOAD(pixels);            \
+            STORE(AVG2(pix, next_pix), block);  \
+            block += line_size;                 \
+            pix = next_pix;                     \
+        } while (--h);                          \
+    } while (0)
+
+#define OP_XY2(LOAD, STORE)                                                 \
+    do {                                                                    \
+        uint64_t pix1 = LOAD(pixels);                                       \
+        uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);           \
+        uint64_t pix_l = (pix1 & BYTE_VEC(0x03))                            \
+                       + (pix2 & BYTE_VEC(0x03));                           \
+        uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2)                    \
+                       + ((pix2 & ~BYTE_VEC(0x03)) >> 2);                   \
+                                                                            \
+        do {                                                                \
+            uint64_t npix1, npix2;                                          \
+            uint64_t npix_l, npix_h;                                        \
+            uint64_t avg;                                                   \
+                                                                            \
+            pixels += line_size;                                            \
+            npix1 = LOAD(pixels);                                           \
+            npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56);              \
+            npix_l = (npix1 & BYTE_VEC(0x03))                               \
+                   + (npix2 & BYTE_VEC(0x03));                              \
+            npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2)                       \
+                   + ((npix2 & ~BYTE_VEC(0x03)) >> 2);                      \
+            avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
+                + pix_h + npix_h;                                           \
+            STORE(avg, block);                                              \
+                                                                            \
+            block += line_size;                                             \
+            pix_l = npix_l;                                                 \
+            pix_h = npix_h;                                                 \
+        } while (--h);                                                      \
+    } while (0)
+
+#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE)                                \
+static void OPNAME ## _pixels ## SUFF ## _axp                               \
+        (uint8_t *restrict block, const uint8_t *restrict pixels,           \
+         ptrdiff_t line_size, int h)                                        \
+{                                                                           \
+    if ((size_t) pixels & 0x7) {                                            \
+        OPKIND(uldq, STORE);                                                \
+    } else {                                                                \
+        OPKIND(ldq, STORE);                                                 \
+    }                                                                       \
+}                                                                           \
+                                                                            \
+static void OPNAME ## _pixels16 ## SUFF ## _axp                             \
+        (uint8_t *restrict block, const uint8_t *restrict pixels,           \
+         ptrdiff_t line_size, int h)                                        \
+{                                                                           \
+    OPNAME ## _pixels ## SUFF ## _axp(block,     pixels,     line_size, h); \
+    OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
+}
+
+#define PIXOP(OPNAME, STORE)                    \
+    MAKE_OP(OPNAME, ,     OP,     STORE)        \
+    MAKE_OP(OPNAME, _x2,  OP_X2,  STORE)        \
+    MAKE_OP(OPNAME, _y2,  OP_Y2,  STORE)        \
+    MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
+
+/* Rounding primitives.  */
+#define AVG2 avg2
+#define AVG4 avg4
+#define AVG4_ROUNDER BYTE_VEC(0x02)
+#define STORE(l, b) stq(l, b)
+PIXOP(put, STORE);
+
+#undef STORE
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(avg, STORE);
+
+/* Not rounding primitives.  */
+#undef AVG2
+#undef AVG4
+#undef AVG4_ROUNDER
+#undef STORE
+#define AVG2 avg2_no_rnd
+#define AVG4 avg4_no_rnd
+#define AVG4_ROUNDER BYTE_VEC(0x01)
+#define STORE(l, b) stq(l, b)
+PIXOP(put_no_rnd, STORE);
+
+#undef STORE
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(avg_no_rnd, STORE);
+
+static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
+                                 ptrdiff_t line_size, int h)
+{
+    put_pixels_axp_asm(block,     pixels,     line_size, h);
+    put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
+}
+
+av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
+{
+    c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
+    c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
+    c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
+    c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
+
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
+
+    c->avg_pixels_tab[0][0] = avg_pixels16_axp;
+    c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
+    c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
+
+    c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
+    c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
+    c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
+    c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
+
+    c->put_pixels_tab[1][0] = put_pixels_axp_asm;
+    c->put_pixels_tab[1][1] = put_pixels_x2_axp;
+    c->put_pixels_tab[1][2] = put_pixels_y2_axp;
+    c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
+
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
+
+    c->avg_pixels_tab[1][0] = avg_pixels_axp;
+    c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
+    c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
+    c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
+}
diff --git a/libavcodec/alpha/hpeldsp_alpha.h b/libavcodec/alpha/hpeldsp_alpha.h
new file mode 100644
index 0000000..985182c
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha.h
@@ -0,0 +1,28 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H
+#define AVCODEC_ALPHA_HPELDSP_ALPHA_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h);
+
+#endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */
diff --git a/libavcodec/alpha/hpeldsp_alpha_asm.S b/libavcodec/alpha/hpeldsp_alpha_asm.S
new file mode 100644
index 0000000..df386c4
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha_asm.S
@@ -0,0 +1,125 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * These functions are scheduled for pca56. They should work
+ * reasonably on ev6, though.
+ */
+
+#include "regdef.h"
+
+
+        .set noat
+        .set noreorder
+        .arch pca56
+        .text
+
+/************************************************************************
+ * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ *                         int line_size, int h)
+ */
+        .align 6
+        .globl put_pixels_axp_asm
+        .ent put_pixels_axp_asm
+put_pixels_axp_asm:
+        .frame sp, 0, ra
+        .prologue 0
+
+        and     a1, 7, t0
+        beq     t0, $aligned
+
+        .align 4
+$unaligned:
+        ldq_u   t0, 0(a1)
+        ldq_u   t1, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+        ldq_u   t2, 0(a1)
+        ldq_u   t3, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+        ldq_u   t4, 0(a1)
+        ldq_u   t5, 8(a1)
+        addq    a1, a2, a1
+        nop
+
+        ldq_u   t6, 0(a1)
+        ldq_u   t7, 8(a1)
+        extql   t0, a1, t0
+        addq    a1, a2, a1
+
+        extqh   t1, a1, t1
+        addq    a0, a2, t8
+        extql   t2, a1, t2
+        addq    t8, a2, t9
+
+        extqh   t3, a1, t3
+        addq    t9, a2, ta
+        extql   t4, a1, t4
+        or      t0, t1, t0
+
+        extqh   t5, a1, t5
+        or      t2, t3, t2
+        extql   t6, a1, t6
+        or      t4, t5, t4
+
+        extqh   t7, a1, t7
+        or      t6, t7, t6
+        stq     t0, 0(a0)
+        stq     t2, 0(t8)
+
+        stq     t4, 0(t9)
+        subq    a3, 4, a3
+        stq     t6, 0(ta)
+        addq    ta, a2, a0
+
+        bne     a3, $unaligned
+        ret
+
+        .align 4
+$aligned:
+        ldq     t0, 0(a1)
+        addq    a1, a2, a1
+        ldq     t1, 0(a1)
+        addq    a1, a2, a1
+
+        ldq     t2, 0(a1)
+        addq    a1, a2, a1
+        ldq     t3, 0(a1)
+
+        addq    a0, a2, t4
+        addq    a1, a2, a1
+        addq    t4, a2, t5
+        subq    a3, 4, a3
+
+        stq     t0, 0(a0)
+        addq    t5, a2, t6
+        stq     t1, 0(t4)
+        addq    t6, a2, a0
+
+        stq     t2, 0(t5)
+        stq     t3, 0(t6)
+
+        bne     a3, $aligned
+        ret
+        .end put_pixels_axp_asm
diff --git a/libavcodec/alpha/idctdsp_alpha.c b/libavcodec/alpha/idctdsp_alpha.c
new file mode 100644
index 0000000..1050697
--- /dev/null
+++ b/libavcodec/alpha/idctdsp_alpha.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/idctdsp.h"
+#include "idctdsp_alpha.h"
+#include "asm.h"
+
+void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
+                                int line_size);
+void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
+                                int line_size);
+
+void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
+                                 int line_size);
+void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
+                                 int line_size);
+
+#if 0
+/* These functions were the base for the optimized assembler routines,
+   and remain here for documentation purposes.  */
+static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
+                                   ptrdiff_t line_size)
+{
+    int i = 8;
+    uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
+
+    do {
+        uint64_t shorts0, shorts1;
+
+        shorts0 = ldq(block);
+        shorts0 = maxsw4(shorts0, 0);
+        shorts0 = minsw4(shorts0, clampmask);
+        stl(pkwb(shorts0), pixels);
+
+        shorts1 = ldq(block + 4);
+        shorts1 = maxsw4(shorts1, 0);
+        shorts1 = minsw4(shorts1, clampmask);
+        stl(pkwb(shorts1), pixels + 4);
+
+        pixels += line_size;
+        block += 8;
+    } while (--i);
+}
+
+void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
+                            ptrdiff_t line_size)
+{
+    int h = 8;
+    /* Keep this function a leaf function by generating the constants
+       manually (mainly for the hack value ;-).  */
+    uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
+    uint64_t signmask  = zap(-1, 0x33);
+    signmask ^= signmask >> 1;  /* 0x8000800080008000 */
+
+    do {
+        uint64_t shorts0, pix0, signs0;
+        uint64_t shorts1, pix1, signs1;
+
+        shorts0 = ldq(block);
+        shorts1 = ldq(block + 4);
+
+        pix0    = unpkbw(ldl(pixels));
+        /* Signed subword add (MMX paddw).  */
+        signs0  = shorts0 & signmask;
+        shorts0 &= ~signmask;
+        shorts0 += pix0;
+        shorts0 ^= signs0;
+        /* Clamp. */
+        shorts0 = maxsw4(shorts0, 0);
+        shorts0 = minsw4(shorts0, clampmask);
+
+        /* Next 4.  */
+        pix1    = unpkbw(ldl(pixels + 4));
+        signs1  = shorts1 & signmask;
+        shorts1 &= ~signmask;
+        shorts1 += pix1;
+        shorts1 ^= signs1;
+        shorts1 = maxsw4(shorts1, 0);
+        shorts1 = minsw4(shorts1, clampmask);
+
+        stl(pkwb(shorts0), pixels);
+        stl(pkwb(shorts1), pixels + 4);
+
+        pixels += line_size;
+        block += 8;
+    } while (--h);
+}
+#endif
+
+av_cold void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx,
+                                   unsigned high_bit_depth)
+{
+    /* amask clears all bits that correspond to present features.  */
+    if (amask(AMASK_MVI) == 0) {
+        c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
+        c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
+    }
+
+    put_pixels_clamped_axp_p = c->put_pixels_clamped;
+    add_pixels_clamped_axp_p = c->add_pixels_clamped;
+
+    if (!high_bit_depth && !avctx->lowres &&
+        (avctx->idct_algo == FF_IDCT_AUTO ||
+         avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
+        c->idct_put = ff_simple_idct_put_axp;
+        c->idct_add = ff_simple_idct_add_axp;
+        c->idct =     ff_simple_idct_axp;
+    }
+}
diff --git a/libavcodec/alpha/idctdsp_alpha.h b/libavcodec/alpha/idctdsp_alpha.h
new file mode 100644
index 0000000..e52cd80
--- /dev/null
+++ b/libavcodec/alpha/idctdsp_alpha.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ALPHA_IDCTDSP_ALPHA_H
+#define AVCODEC_ALPHA_IDCTDSP_ALPHA_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+extern void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
+                                        int line_size);
+extern void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
+                                        int line_size);
+
+void ff_simple_idct_axp(int16_t *block);
+void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block);
+void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block);
+
+#endif /* AVCODEC_ALPHA_IDCTDSP_ALPHA_H */
diff --git a/libavcodec/alpha/idctdsp_alpha_asm.S b/libavcodec/alpha/idctdsp_alpha_asm.S
new file mode 100644
index 0000000..e3a8364
--- /dev/null
+++ b/libavcodec/alpha/idctdsp_alpha_asm.S
@@ -0,0 +1,167 @@
+/*
+ * Alpha optimized IDCT-related routines
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * These functions are scheduled for pca56. They should work
+ * reasonably on ev6, though.
+ */
+
+#include "regdef.h"
+
+        .set noat
+        .set noreorder
+        .arch pca56
+        .text
+
+/************************************************************************
+ * void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
+ *                                 int line_size)
+ */
+        .align 6
+        .globl put_pixels_clamped_mvi_asm
+        .ent put_pixels_clamped_mvi_asm
+put_pixels_clamped_mvi_asm:
+        .frame sp, 0, ra
+        .prologue 0
+
+        lda     t8, -1
+        lda     t9, 8           # loop counter
+        zap     t8, 0xaa, t8    # 00ff00ff00ff00ff
+
+        .align 4
+1:      ldq     t0,  0(a0)
+        ldq     t1,  8(a0)
+        ldq     t2, 16(a0)
+        ldq     t3, 24(a0)
+
+        maxsw4  t0, zero, t0
+        subq    t9, 2, t9
+        maxsw4  t1, zero, t1
+        lda     a0, 32(a0)
+
+        maxsw4  t2, zero, t2
+        addq    a1, a2, ta
+        maxsw4  t3, zero, t3
+        minsw4  t0, t8, t0
+
+        minsw4  t1, t8, t1
+        minsw4  t2, t8, t2
+        minsw4  t3, t8, t3
+        pkwb    t0, t0
+
+        pkwb    t1, t1
+        pkwb    t2, t2
+        pkwb    t3, t3
+        stl     t0, 0(a1)
+
+        stl     t1, 4(a1)
+        addq    ta, a2, a1
+        stl     t2, 0(ta)
+        stl     t3, 4(ta)
+
+        bne     t9, 1b
+        ret
+        .end put_pixels_clamped_mvi_asm
+
+/************************************************************************
+ * void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
+ *                                 int line_size)
+ */
+        .align 6
+        .globl add_pixels_clamped_mvi_asm
+        .ent add_pixels_clamped_mvi_asm
+add_pixels_clamped_mvi_asm:
+        .frame sp, 0, ra
+        .prologue 0
+
+        lda     t1, -1
+        lda     th, 8
+        zap     t1, 0x33, tg
+        nop
+
+        srl     tg, 1, t0
+        xor     tg, t0, tg      # 0x8000800080008000
+        zap     t1, 0xaa, tf    # 0x00ff00ff00ff00ff
+
+        .align 4
+1:      ldl     t1, 0(a1)       # pix0 (try to hit cache line soon)
+        ldl     t4, 4(a1)       # pix1
+        addq    a1, a2, te      # pixels += line_size
+        ldq     t0, 0(a0)       # shorts0
+
+        ldl     t7, 0(te)       # pix2 (try to hit cache line soon)
+        ldl     ta, 4(te)       # pix3
+        ldq     t3, 8(a0)       # shorts1
+        ldq     t6, 16(a0)      # shorts2
+
+        ldq     t9, 24(a0)      # shorts3
+        unpkbw  t1, t1          # 0 0 (quarter/op no.)
+        and     t0, tg, t2      # 0 1
+        unpkbw  t4, t4          # 1 0
+
+        bic     t0, tg, t0      # 0 2
+        unpkbw  t7, t7          # 2 0
+        and     t3, tg, t5      # 1 1
+        addq    t0, t1, t0      # 0 3
+
+        xor     t0, t2, t0      # 0 4
+        unpkbw  ta, ta          # 3 0
+        and     t6, tg, t8      # 2 1
+        maxsw4  t0, zero, t0    # 0 5
+
+        bic     t3, tg, t3      # 1 2
+        bic     t6, tg, t6      # 2 2
+        minsw4  t0, tf, t0      # 0 6
+        addq    t3, t4, t3      # 1 3
+
+        pkwb    t0, t0          # 0 7
+        xor     t3, t5, t3      # 1 4
+        maxsw4  t3, zero, t3    # 1 5
+        addq    t6, t7, t6      # 2 3
+
+        xor     t6, t8, t6      # 2 4
+        and     t9, tg, tb      # 3 1
+        minsw4  t3, tf, t3      # 1 6
+        bic     t9, tg, t9      # 3 2
+
+        maxsw4  t6, zero, t6    # 2 5
+        addq    t9, ta, t9      # 3 3
+        stl     t0, 0(a1)       # 0 8
+        minsw4  t6, tf, t6      # 2 6
+
+        xor     t9, tb, t9      # 3 4
+        maxsw4  t9, zero, t9    # 3 5
+        lda     a0, 32(a0)      # block += 16;
+        pkwb    t3, t3          # 1 7
+
+        minsw4  t9, tf, t9      # 3 6
+        subq    th, 2, th
+        pkwb    t6, t6          # 2 7
+        pkwb    t9, t9          # 3 7
+
+        stl     t3, 4(a1)       # 1 8
+        addq    te, a2, a1      # pixels += line_size
+        stl     t6, 0(te)       # 2 8
+        stl     t9, 4(te)       # 3 8
+
+        bne     th, 1b
+        ret
+        .end add_pixels_clamped_mvi_asm
diff --git a/libavcodec/alpha/me_cmp_alpha.c b/libavcodec/alpha/me_cmp_alpha.c
new file mode 100644
index 0000000..8f36019
--- /dev/null
+++ b/libavcodec/alpha/me_cmp_alpha.c
@@ -0,0 +1,317 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/me_cmp.h"
+#include "asm.h"
+
+int pix_abs16x16_mvi_asm(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+
+static inline uint64_t avg2(uint64_t a, uint64_t b)
+{
+    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
+{
+    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+                + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+    uint64_t r2 = ((  (l1 & BYTE_VEC(0x03))
+                    + (l2 & BYTE_VEC(0x03))
+                    + (l3 & BYTE_VEC(0x03))
+                    + (l4 & BYTE_VEC(0x03))
+                    + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+    return r1 + r2;
+}
+
+static int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int result = 0;
+
+    if ((size_t) pix2 & 0x7) {
+        /* works only when pix2 is actually unaligned */
+        do {                    /* do 8 pixel a time */
+            uint64_t p1, p2;
+
+            p1  = ldq(pix1);
+            p2  = uldq(pix2);
+            result += perr(p1, p2);
+
+            pix1 += line_size;
+            pix2 += line_size;
+        } while (--h);
+    } else {
+        do {
+            uint64_t p1, p2;
+
+            p1 = ldq(pix1);
+            p2 = ldq(pix2);
+            result += perr(p1, p2);
+
+            pix1 += line_size;
+            pix2 += line_size;
+        } while (--h);
+    }
+
+    return result;
+}
+
+#if 0                           /* now done in assembly */
+int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
+{
+    int result = 0;
+    int h = 16;
+
+    if ((size_t) pix2 & 0x7) {
+        /* works only when pix2 is actually unaligned */
+        do {                    /* do 16 pixel a time */
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+            uint64_t t;
+
+            p1_l  = ldq(pix1);
+            p1_r  = ldq(pix1 + 8);
+            t     = ldq_u(pix2 + 8);
+            p2_l  = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
+            p2_r  = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+    } else {
+        do {
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+
+            p1_l = ldq(pix1);
+            p1_r = ldq(pix1 + 8);
+            p2_l = ldq(pix2);
+            p2_r = ldq(pix2 + 8);
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+    }
+
+    return result;
+}
+#endif
+
+static int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int result = 0;
+    uint64_t disalign = (size_t) pix2 & 0x7;
+
+    switch (disalign) {
+    case 0:
+        do {
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+            uint64_t l, r;
+
+            p1_l = ldq(pix1);
+            p1_r = ldq(pix1 + 8);
+            l    = ldq(pix2);
+            r    = ldq(pix2 + 8);
+            p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56));
+            p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+        break;
+    case 7:
+        /* |.......l|lllllllr|rrrrrrr*|
+           This case is special because disalign1 would be 8, which
+           gets treated as 0 by extqh.  At least it is a bit faster
+           that way :)  */
+        do {
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+            uint64_t l, m, r;
+
+            p1_l = ldq(pix1);
+            p1_r = ldq(pix1 + 8);
+            l     = ldq_u(pix2);
+            m     = ldq_u(pix2 + 8);
+            r     = ldq_u(pix2 + 16);
+            p2_l  = avg2(extql(l, disalign) | extqh(m, disalign), m);
+            p2_r  = avg2(extql(m, disalign) | extqh(r, disalign), r);
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+        break;
+    default:
+        do {
+            uint64_t disalign1 = disalign + 1;
+            uint64_t p1_l, p1_r, p2_l, p2_r;
+            uint64_t l, m, r;
+
+            p1_l  = ldq(pix1);
+            p1_r  = ldq(pix1 + 8);
+            l     = ldq_u(pix2);
+            m     = ldq_u(pix2 + 8);
+            r     = ldq_u(pix2 + 16);
+            p2_l  = avg2(extql(l, disalign) | extqh(m, disalign),
+                         extql(l, disalign1) | extqh(m, disalign1));
+            p2_r  = avg2(extql(m, disalign) | extqh(r, disalign),
+                         extql(m, disalign1) | extqh(r, disalign1));
+            pix1 += line_size;
+            pix2 += line_size;
+
+            result += perr(p1_l, p2_l)
+                    + perr(p1_r, p2_r);
+        } while (--h);
+        break;
+    }
+    return result;
+}
+
+static int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int result = 0;
+
+    if ((size_t) pix2 & 0x7) {
+        uint64_t t, p2_l, p2_r;
+        t     = ldq_u(pix2 + 8);
+        p2_l  = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
+        p2_r  = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
+
+        do {
+            uint64_t p1_l, p1_r, np2_l, np2_r;
+            uint64_t t;
+
+            p1_l  = ldq(pix1);
+            p1_r  = ldq(pix1 + 8);
+            pix2 += line_size;
+            t     = ldq_u(pix2 + 8);
+            np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
+            np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
+
+            result += perr(p1_l, avg2(p2_l, np2_l))
+                    + perr(p1_r, avg2(p2_r, np2_r));
+
+            pix1 += line_size;
+            p2_l  = np2_l;
+            p2_r  = np2_r;
+
+        } while (--h);
+    } else {
+        uint64_t p2_l, p2_r;
+        p2_l = ldq(pix2);
+        p2_r = ldq(pix2 + 8);
+        do {
+            uint64_t p1_l, p1_r, np2_l, np2_r;
+
+            p1_l = ldq(pix1);
+            p1_r = ldq(pix1 + 8);
+            pix2 += line_size;
+            np2_l = ldq(pix2);
+            np2_r = ldq(pix2 + 8);
+
+            result += perr(p1_l, avg2(p2_l, np2_l))
+                    + perr(p1_r, avg2(p2_r, np2_r));
+
+            pix1 += line_size;
+            p2_l  = np2_l;
+            p2_r  = np2_r;
+        } while (--h);
+    }
+    return result;
+}
+
+static int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int result = 0;
+
+    uint64_t p1_l, p1_r;
+    uint64_t p2_l, p2_r, p2_x;
+
+    p1_l = ldq(pix1);
+    p1_r = ldq(pix1 + 8);
+
+    if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
+        p2_l = uldq(pix2);
+        p2_r = uldq(pix2 + 8);
+        p2_x = (uint64_t) pix2[16] << 56;
+    } else {
+        p2_l = ldq(pix2);
+        p2_r = ldq(pix2 + 8);
+        p2_x = ldq(pix2 + 16) << 56;
+    }
+
+    do {
+        uint64_t np1_l, np1_r;
+        uint64_t np2_l, np2_r, np2_x;
+
+        pix1 += line_size;
+        pix2 += line_size;
+
+        np1_l = ldq(pix1);
+        np1_r = ldq(pix1 + 8);
+
+        if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
+            np2_l = uldq(pix2);
+            np2_r = uldq(pix2 + 8);
+            np2_x = (uint64_t) pix2[16] << 56;
+        } else {
+            np2_l = ldq(pix2);
+            np2_r = ldq(pix2 + 8);
+            np2_x = ldq(pix2 + 16) << 56;
+        }
+
+        result += perr(p1_l,
+                       avg4( p2_l, ( p2_l >> 8) | ((uint64_t)  p2_r << 56),
+                            np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
+                + perr(p1_r,
+                       avg4( p2_r, ( p2_r >> 8) | ((uint64_t)  p2_x),
+                            np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
+
+        p1_l = np1_l;
+        p1_r = np1_r;
+        p2_l = np2_l;
+        p2_r = np2_r;
+        p2_x = np2_x;
+    } while (--h);
+
+    return result;
+}
+
+av_cold void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx)
+{
+    /* amask clears all bits that correspond to present features.  */
+    if (amask(AMASK_MVI) == 0) {
+        c->sad[0]           = pix_abs16x16_mvi_asm;
+        c->sad[1]           = pix_abs8x8_mvi;
+        c->pix_abs[0][0]    = pix_abs16x16_mvi_asm;
+        c->pix_abs[1][0]    = pix_abs8x8_mvi;
+        c->pix_abs[0][1]    = pix_abs16x16_x2_mvi;
+        c->pix_abs[0][2]    = pix_abs16x16_y2_mvi;
+        c->pix_abs[0][3]    = pix_abs16x16_xy2_mvi;
+    }
+}
diff --git a/libavcodec/alpha/me_cmp_mvi_asm.S b/libavcodec/alpha/me_cmp_mvi_asm.S
new file mode 100644
index 0000000..2399085
--- /dev/null
+++ b/libavcodec/alpha/me_cmp_mvi_asm.S
@@ -0,0 +1,179 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "regdef.h"
+
+/* Some nicer register names.  */
+#define ta t10
+#define tb t11
+#define tc t12
+#define td AT
+/* Danger: these overlap with the argument list and the return value */
+#define te a5
+#define tf a4
+#define tg a3
+#define th v0
+
+        .set noat
+        .set noreorder
+        .arch pca56
+        .text
+
+/*****************************************************************************
+ * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
+ *
+ * This code is written with a pca56 in mind. For ev6, one should
+ * really take the increased latency of 3 cycles for MVI instructions
+ * into account.
+ *
+ * It is important to keep the loading and first use of a register as
+ * far apart as possible, because if a register is accessed before it
+ * has been fetched from memory, the CPU will stall.
+ */
+        .align 4
+        .globl pix_abs16x16_mvi_asm
+        .ent pix_abs16x16_mvi_asm
+pix_abs16x16_mvi_asm:
+        .frame sp, 0, ra, 0
+        .prologue 0
+
+        and     a2, 7, t0
+        clr     v0
+        beq     t0, $aligned
+        .align 4
+$unaligned:
+        /* Registers:
+           line 0:
+           t0:  left_u -> left lo -> left
+           t1:  mid
+           t2:  right_u -> right hi -> right
+           t3:  ref left
+           t4:  ref right
+           line 1:
+           t5:  left_u -> left lo -> left
+           t6:  mid
+           t7:  right_u -> right hi -> right
+           t8:  ref left
+           t9:  ref right
+           temp:
+           ta:  left hi
+           tb:  right lo
+           tc:  error left
+           td:  error right  */
+
+        /* load line 0 */
+        ldq_u   t0, 0(a2)       # left_u
+        ldq_u   t1, 8(a2)       # mid
+        ldq_u   t2, 16(a2)      # right_u
+        ldq     t3, 0(a1)       # ref left
+        ldq     t4, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        addq    a2, a3, a2      # pix2
+        /* load line 1 */
+        ldq_u   t5, 0(a2)       # left_u
+        ldq_u   t6, 8(a2)       # mid
+        ldq_u   t7, 16(a2)      # right_u
+        ldq     t8, 0(a1)       # ref left
+        ldq     t9, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        addq    a2, a3, a2      # pix2
+        /* calc line 0 */
+        extql   t0, a2, t0      # left lo
+        extqh   t1, a2, ta      # left hi
+        extql   t1, a2, tb      # right lo
+        or      t0, ta, t0      # left
+        extqh   t2, a2, t2      # right hi
+        perr    t3, t0, tc      # error left
+        or      t2, tb, t2      # right
+        perr    t4, t2, td      # error right
+        addq    v0, tc, v0      # add error left
+        addq    v0, td, v0      # add error left
+        /* calc line 1 */
+        extql   t5, a2, t5      # left lo
+        extqh   t6, a2, ta      # left hi
+        extql   t6, a2, tb      # right lo
+        or      t5, ta, t5      # left
+        extqh   t7, a2, t7      # right hi
+        perr    t8, t5, tc      # error left
+        or      t7, tb, t7      # right
+        perr    t9, t7, td      # error right
+        addq    v0, tc, v0      # add error left
+        addq    v0, td, v0      # add error left
+        /* loop */
+        subq    a4,  2, a4      # h -= 2
+        bne     a4, $unaligned
+        ret
+
+        .align 4
+$aligned:
+        /* load line 0 */
+        ldq     t0, 0(a2)       # left
+        ldq     t1, 8(a2)       # right
+        addq    a2, a3, a2      # pix2
+        ldq     t2, 0(a1)       # ref left
+        ldq     t3, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        /* load line 1 */
+        ldq     t4, 0(a2)       # left
+        ldq     t5, 8(a2)       # right
+        addq    a2, a3, a2      # pix2
+        ldq     t6, 0(a1)       # ref left
+        ldq     t7, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        /* load line 2 */
+        ldq     t8, 0(a2)       # left
+        ldq     t9, 8(a2)       # right
+        addq    a2, a3, a2      # pix2
+        ldq     ta, 0(a1)       # ref left
+        ldq     tb, 8(a1)       # ref right
+        addq    a1, a3, a1      # pix1
+        /* load line 3 */
+        ldq     tc, 0(a2)       # left
+        ldq     td, 8(a2)       # right
+        addq    a2, a3, a2      # pix2
+        ldq     te, 0(a1)       # ref left
+        ldq     a0, 8(a1)       # ref right
+        /* calc line 0 */
+        perr    t0, t2, t0      # error left
+        addq    a1, a3, a1      # pix1
+        perr    t1, t3, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 1 */
+        perr    t4, t6, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    t5, t7, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 2 */
+        perr    t8, ta, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    t9, tb, t1      # error right
+        addq    v0, t0, v0      # add error left
+        /* calc line 3 */
+        perr    tc, te, t0      # error left
+        addq    v0, t1, v0      # add error right
+        perr    td, a0, t1      # error right
+        addq    v0, t0, v0      # add error left
+        addq    v0, t1, v0      # add error right
+        /* loop */
+        subq    a4,  4, a4      # h -= 4
+        bne     a4, $aligned
+        ret
+        .end pix_abs16x16_mvi_asm
diff --git a/libavcodec/alpha/mpegvideo_alpha.c b/libavcodec/alpha/mpegvideo_alpha.c
new file mode 100644
index 0000000..7ba0f2e
--- /dev/null
+++ b/libavcodec/alpha/mpegvideo_alpha.c
@@ -0,0 +1,110 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/mpegvideo.h"
+#include "asm.h"
+
+static void dct_unquantize_h263_axp(int16_t *block, int n_coeffs,
+                                    uint64_t qscale, uint64_t qadd)
+{
+    uint64_t qmul = qscale << 1;
+    uint64_t correction = WORD_VEC(qmul * 255 >> 8);
+    int i;
+
+    qadd = WORD_VEC(qadd);
+
+    for(i = 0; i <= n_coeffs; block += 4, i += 4) {
+        uint64_t levels, negmask, zeros, add, sub;
+
+        levels = ldq(block);
+        if (levels == 0)
+            continue;
+
+#ifdef __alpha_max__
+        /* I don't think the speed difference justifies runtime
+           detection.  */
+        negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */
+        negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */
+#else
+        negmask = cmpbge(WORD_VEC(0x7fff), levels);
+        negmask &= (negmask >> 1) | (1 << 7);
+        negmask = zap(-1, negmask);
+#endif
+
+        zeros = cmpbge(0, levels);
+        zeros &= zeros >> 1;
+        /* zeros |= zeros << 1 is not needed since qadd <= 255, so
+           zapping the lower byte suffices.  */
+
+        levels *= qmul;
+        levels -= correction & (negmask << 16);
+
+        add = qadd & ~negmask;
+        sub = qadd &  negmask;
+        /* Set qadd to 0 for levels == 0.  */
+        add = zap(add, zeros);
+        levels += add;
+        levels -= sub;
+
+        stq(levels, block);
+    }
+}
+
+static void dct_unquantize_h263_intra_axp(MpegEncContext *s, int16_t *block,
+                                    int n, int qscale)
+{
+    int n_coeffs;
+    uint64_t qadd;
+    int16_t block0 = block[0];
+
+    if (!s->h263_aic) {
+        if (n < 4)
+            block0 *= s->y_dc_scale;
+        else
+            block0 *= s->c_dc_scale;
+        qadd = (qscale - 1) | 1;
+    } else {
+        qadd = 0;
+    }
+
+    if(s->ac_pred)
+        n_coeffs = 63;
+    else
+        n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
+
+    dct_unquantize_h263_axp(block, n_coeffs, qscale, qadd);
+
+    block[0] = block0;
+}
+
+static void dct_unquantize_h263_inter_axp(MpegEncContext *s, int16_t *block,
+                                    int n, int qscale)
+{
+    int n_coeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
+    dct_unquantize_h263_axp(block, n_coeffs, qscale, (qscale - 1) | 1);
+}
+
+av_cold void ff_MPV_common_init_axp(MpegEncContext *s)
+{
+    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp;
+    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp;
+}
diff --git a/libavcodec/alpha/pixblockdsp_alpha.c b/libavcodec/alpha/pixblockdsp_alpha.c
new file mode 100644
index 0000000..4e49c9b
--- /dev/null
+++ b/libavcodec/alpha/pixblockdsp_alpha.c
@@ -0,0 +1,78 @@
+/*
+ * SIMD-optimized pixel operations
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/pixblockdsp.h"
+#include "asm.h"
+
+static void get_pixels_mvi(int16_t *restrict block,
+                           const uint8_t *restrict pixels, int line_size)
+{
+    int h = 8;
+
+    do {
+        uint64_t p;
+
+        p = ldq(pixels);
+        stq(unpkbw(p),       block);
+        stq(unpkbw(p >> 32), block + 4);
+
+        pixels += line_size;
+        block += 8;
+    } while (--h);
+}
+
+static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+                            int stride) {
+    int h = 8;
+    uint64_t mask = 0x4040;
+
+    mask |= mask << 16;
+    mask |= mask << 32;
+    do {
+        uint64_t x, y, c, d, a;
+        uint64_t signs;
+
+        x = ldq(s1);
+        y = ldq(s2);
+        c = cmpbge(x, y);
+        d = x - y;
+        a = zap(mask, c);       /* We use 0x4040404040404040 here...  */
+        d += 4 * a;             /* ...so we can use s4addq here.      */
+        signs = zap(-1, c);
+
+        stq(unpkbw(d)       | (unpkbw(signs)       << 8), block);
+        stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
+
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+    } while (--h);
+}
+
+av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
+                                       unsigned high_bit_depth)
+{
+    if (amask(AMASK_MVI) == 0) {
+        if (!high_bit_depth)
+            c->get_pixels = get_pixels_mvi;
+        c->diff_pixels = diff_pixels_mvi;
+    }
+}
diff --git a/libavcodec/alpha/regdef.h b/libavcodec/alpha/regdef.h
new file mode 100644
index 0000000..f05577a
--- /dev/null
+++ b/libavcodec/alpha/regdef.h
@@ -0,0 +1,77 @@
+/*
+ * Alpha optimized DSP utils
+ * copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Some BSDs don't seem to have regdef.h... sigh  */
+#ifndef AVCODEC_ALPHA_REGDEF_H
+#define AVCODEC_ALPHA_REGDEF_H
+
+#define v0      $0      /* function return value */
+
+#define t0      $1      /* temporary registers (caller-saved) */
+#define t1      $2
+#define t2      $3
+#define t3      $4
+#define t4      $5
+#define t5      $6
+#define t6      $7
+#define t7      $8
+
+#define s0      $9      /* saved-registers (callee-saved registers) */
+#define s1      $10
+#define s2      $11
+#define s3      $12
+#define s4      $13
+#define s5      $14
+#define s6      $15
+#define fp      s6      /* frame-pointer (s6 in frame-less procedures) */
+
+#define a0      $16     /* argument registers (caller-saved) */
+#define a1      $17
+#define a2      $18
+#define a3      $19
+#define a4      $20
+#define a5      $21
+
+#define t8      $22     /* more temps (caller-saved) */
+#define t9      $23
+#define t10     $24
+#define t11     $25
+#define ra      $26     /* return address register */
+#define t12     $27
+
+#define pv      t12     /* procedure-variable register */
+#define AT      $at     /* assembler temporary */
+#define gp      $29     /* global pointer */
+#define sp      $30     /* stack pointer */
+#define zero    $31     /* reads as zero, writes are noops */
+
+/* Some nicer register names.  */
+#define ta t10
+#define tb t11
+#define tc t12
+#define td AT
+/* Danger: these overlap with the argument list and the return value */
+#define te a5
+#define tf a4
+#define tg a3
+#define th v0
+
+#endif /* AVCODEC_ALPHA_REGDEF_H */
diff --git a/libavcodec/alpha/simple_idct_alpha.c b/libavcodec/alpha/simple_idct_alpha.c
new file mode 100644
index 0000000..04be0ce
--- /dev/null
+++ b/libavcodec/alpha/simple_idct_alpha.c
@@ -0,0 +1,303 @@
+/*
+ * Simple IDCT (Alpha optimized)
+ *
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * based upon some outcommented C code from mpeg2dec (idct_mmx.c
+ * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
+ *
+ * Alpha optimizations by Måns Rullgård <mans@mansr.com>
+ *                     and Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "idctdsp_alpha.h"
+#include "asm.h"
+
+// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
+// W4 is actually exactly 16384, but using 16383 works around
+// accumulating rounding errors for some encoders
+#define W1 22725
+#define W2 21407
+#define W3 19266
+#define W4 16383
+#define W5 12873
+#define W6  8867
+#define W7  4520
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
+static inline int idct_row(int16_t *row)
+{
+    int a0, a1, a2, a3, b0, b1, b2, b3, t;
+    uint64_t l, r, t2;
+    l = ldq(row);
+    r = ldq(row + 4);
+
+    if (l == 0 && r == 0)
+        return 0;
+
+    a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
+
+    if (((l & ~0xffffUL) | r) == 0) {
+        a0 >>= ROW_SHIFT;
+        t2 = (uint16_t) a0;
+        t2 |= t2 << 16;
+        t2 |= t2 << 32;
+
+        stq(t2, row);
+        stq(t2, row + 4);
+        return 1;
+    }
+
+    a1 = a0;
+    a2 = a0;
+    a3 = a0;
+
+    t = extwl(l, 4);            /* row[2] */
+    if (t != 0) {
+        t = sextw(t);
+        a0 += W2 * t;
+        a1 += W6 * t;
+        a2 -= W6 * t;
+        a3 -= W2 * t;
+    }
+
+    t = extwl(r, 0);            /* row[4] */
+    if (t != 0) {
+        t = sextw(t);
+        a0 += W4 * t;
+        a1 -= W4 * t;
+        a2 -= W4 * t;
+        a3 += W4 * t;
+    }
+
+    t = extwl(r, 4);            /* row[6] */
+    if (t != 0) {
+        t = sextw(t);
+        a0 += W6 * t;
+        a1 -= W2 * t;
+        a2 += W2 * t;
+        a3 -= W6 * t;
+    }
+
+    t = extwl(l, 2);            /* row[1] */
+    if (t != 0) {
+        t = sextw(t);
+        b0 = W1 * t;
+        b1 = W3 * t;
+        b2 = W5 * t;
+        b3 = W7 * t;
+    } else {
+        b0 = 0;
+        b1 = 0;
+        b2 = 0;
+        b3 = 0;
+    }
+
+    t = extwl(l, 6);            /* row[3] */
+    if (t) {
+        t = sextw(t);
+        b0 += W3 * t;
+        b1 -= W7 * t;
+        b2 -= W1 * t;
+        b3 -= W5 * t;
+    }
+
+
+    t = extwl(r, 2);            /* row[5] */
+    if (t) {
+        t = sextw(t);
+        b0 += W5 * t;
+        b1 -= W1 * t;
+        b2 += W7 * t;
+        b3 += W3 * t;
+    }
+
+    t = extwl(r, 6);            /* row[7] */
+    if (t) {
+        t = sextw(t);
+        b0 += W7 * t;
+        b1 -= W5 * t;
+        b2 += W3 * t;
+        b3 -= W1 * t;
+    }
+
+    row[0] = (a0 + b0) >> ROW_SHIFT;
+    row[1] = (a1 + b1) >> ROW_SHIFT;
+    row[2] = (a2 + b2) >> ROW_SHIFT;
+    row[3] = (a3 + b3) >> ROW_SHIFT;
+    row[4] = (a3 - b3) >> ROW_SHIFT;
+    row[5] = (a2 - b2) >> ROW_SHIFT;
+    row[6] = (a1 - b1) >> ROW_SHIFT;
+    row[7] = (a0 - b0) >> ROW_SHIFT;
+
+    return 2;
+}
+
+static inline void idct_col(int16_t *col)
+{
+    int a0, a1, a2, a3, b0, b1, b2, b3;
+
+    col[0] += (1 << (COL_SHIFT - 1)) / W4;
+
+    a0 = W4 * col[8 * 0];
+    a1 = W4 * col[8 * 0];
+    a2 = W4 * col[8 * 0];
+    a3 = W4 * col[8 * 0];
+
+    if (col[8 * 2]) {
+        a0 += W2 * col[8 * 2];
+        a1 += W6 * col[8 * 2];
+        a2 -= W6 * col[8 * 2];
+        a3 -= W2 * col[8 * 2];
+    }
+
+    if (col[8 * 4]) {
+        a0 += W4 * col[8 * 4];
+        a1 -= W4 * col[8 * 4];
+        a2 -= W4 * col[8 * 4];
+        a3 += W4 * col[8 * 4];
+    }
+
+    if (col[8 * 6]) {
+        a0 += W6 * col[8 * 6];
+        a1 -= W2 * col[8 * 6];
+        a2 += W2 * col[8 * 6];
+        a3 -= W6 * col[8 * 6];
+    }
+
+    if (col[8 * 1]) {
+        b0 = W1 * col[8 * 1];
+        b1 = W3 * col[8 * 1];
+        b2 = W5 * col[8 * 1];
+        b3 = W7 * col[8 * 1];
+    } else {
+        b0 = 0;
+        b1 = 0;
+        b2 = 0;
+        b3 = 0;
+    }
+
+    if (col[8 * 3]) {
+        b0 += W3 * col[8 * 3];
+        b1 -= W7 * col[8 * 3];
+        b2 -= W1 * col[8 * 3];
+        b3 -= W5 * col[8 * 3];
+    }
+
+    if (col[8 * 5]) {
+        b0 += W5 * col[8 * 5];
+        b1 -= W1 * col[8 * 5];
+        b2 += W7 * col[8 * 5];
+        b3 += W3 * col[8 * 5];
+    }
+
+    if (col[8 * 7]) {
+        b0 += W7 * col[8 * 7];
+        b1 -= W5 * col[8 * 7];
+        b2 += W3 * col[8 * 7];
+        b3 -= W1 * col[8 * 7];
+    }
+
+    col[8 * 0] = (a0 + b0) >> COL_SHIFT;
+    col[8 * 7] = (a0 - b0) >> COL_SHIFT;
+    col[8 * 1] = (a1 + b1) >> COL_SHIFT;
+    col[8 * 6] = (a1 - b1) >> COL_SHIFT;
+    col[8 * 2] = (a2 + b2) >> COL_SHIFT;
+    col[8 * 5] = (a2 - b2) >> COL_SHIFT;
+    col[8 * 3] = (a3 + b3) >> COL_SHIFT;
+    col[8 * 4] = (a3 - b3) >> COL_SHIFT;
+}
+
+/* If all rows but the first one are zero after row transformation,
+   all rows will be identical after column transformation.  */
+static inline void idct_col2(int16_t *col)
+{
+    int i;
+    uint64_t l, r;
+
+    for (i = 0; i < 8; ++i) {
+        int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
+
+        a0 *= W4;
+        col[i] = a0 >> COL_SHIFT;
+    }
+
+    l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
+    stq(l, col +  2 * 4); stq(r, col +  3 * 4);
+    stq(l, col +  4 * 4); stq(r, col +  5 * 4);
+    stq(l, col +  6 * 4); stq(r, col +  7 * 4);
+    stq(l, col +  8 * 4); stq(r, col +  9 * 4);
+    stq(l, col + 10 * 4); stq(r, col + 11 * 4);
+    stq(l, col + 12 * 4); stq(r, col + 13 * 4);
+    stq(l, col + 14 * 4); stq(r, col + 15 * 4);
+}
+
+void ff_simple_idct_axp(int16_t *block)
+{
+
+    int i;
+    int rowsZero = 1;           /* all rows except row 0 zero */
+    int rowsConstant = 1;       /* all rows consist of a constant value */
+
+    for (i = 0; i < 8; i++) {
+        int sparseness = idct_row(block + 8 * i);
+
+        if (i > 0 && sparseness > 0)
+            rowsZero = 0;
+        if (sparseness == 2)
+            rowsConstant = 0;
+    }
+
+    if (rowsZero) {
+        idct_col2(block);
+    } else if (rowsConstant) {
+        idct_col(block);
+        for (i = 0; i < 8; i += 2) {
+            uint64_t v = (uint16_t) block[0];
+            uint64_t w = (uint16_t) block[8];
+
+            v |= v << 16;
+            w |= w << 16;
+            v |= v << 32;
+            w |= w << 32;
+            stq(v, block + 0 * 4);
+            stq(v, block + 1 * 4);
+            stq(w, block + 2 * 4);
+            stq(w, block + 3 * 4);
+            block += 4 * 4;
+        }
+    } else {
+        for (i = 0; i < 8; i++)
+            idct_col(block + i);
+    }
+}
+
+void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block)
+{
+    ff_simple_idct_axp(block);
+    put_pixels_clamped_axp_p(block, dest, line_size);
+}
+
+void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block)
+{
+    ff_simple_idct_axp(block);
+    add_pixels_clamped_axp_p(block, dest, line_size);
+}
diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
index b1965a8..cfece44 100644
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -1,28 +1,28 @@
 /*
  * MPEG-4 ALS decoder
- * Copyright (c) 2009 Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
+ * Copyright (c) 2009 Thilo Borgmann <thilo.borgmann _at_ mail.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * MPEG-4 ALS decoder
- * @author Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
+ * @author Thilo Borgmann <thilo.borgmann _at_ mail.de>
  */
 
 #include <inttypes.h>
@@ -199,6 +199,7 @@ typedef struct {
     unsigned int cur_frame_length;  ///< length of the current frame to decode
     unsigned int frame_id;          ///< the frame ID / number of the current frame
     unsigned int js_switch;         ///< if true, joint-stereo decoding is enforced
+    unsigned int cs_switch;         ///< if true, channel rearrangement is done
     unsigned int num_blocks;        ///< number of blocks used in the current frame
     unsigned int s_max;             ///< maximum Rice parameter allowed in entropy coding
     uint8_t *bgmc_lut;              ///< pointer at lookup tables used for BGMC
@@ -281,12 +282,14 @@ static av_cold int read_specific_config(ALSDecContext *ctx)
     GetBitContext gb;
     uint64_t ht_size;
     int i, config_offset;
-    MPEG4AudioConfig m4ac;
+    MPEG4AudioConfig m4ac = {0};
     ALSSpecificConfig *sconf = &ctx->sconf;
     AVCodecContext *avctx    = ctx->avctx;
     uint32_t als_id, header_size, trailer_size;
+    int ret;
 
-    init_get_bits(&gb, avctx->extradata, avctx->extradata_size * 8);
+    if ((ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size)) < 0)
+        return ret;
 
     config_offset = avpriv_mpeg4audio_get_config(&m4ac, avctx->extradata,
                                                  avctx->extradata_size * 8, 1);
@@ -305,7 +308,7 @@ static av_cold int read_specific_config(ALSDecContext *ctx)
     skip_bits_long(&gb, 32); // sample rate already known
     sconf->samples              = get_bits_long(&gb, 32);
     avctx->channels             = m4ac.channels;
-    skip_bits(&gb, 16);      // number of channels already knwon
+    skip_bits(&gb, 16);      // number of channels already known
     skip_bits(&gb, 3);       // skip file_type
     sconf->resolution           = get_bits(&gb, 3);
     sconf->floating             = get_bits1(&gb);
@@ -352,13 +355,21 @@ static av_cold int read_specific_config(ALSDecContext *ctx)
         if (!(sconf->chan_pos = av_malloc(avctx->channels * sizeof(*sconf->chan_pos))))
             return AVERROR(ENOMEM);
 
-        for (i = 0; i < avctx->channels; i++)
-            sconf->chan_pos[i] = get_bits(&gb, chan_pos_bits);
+        ctx->cs_switch = 1;
+
+        for (i = 0; i < avctx->channels; i++) {
+            int idx;
+
+            idx = get_bits(&gb, chan_pos_bits);
+            if (idx >= avctx->channels) {
+                av_log(avctx, AV_LOG_WARNING, "Invalid channel reordering.\n");
+                ctx->cs_switch = 0;
+                break;
+            }
+            sconf->chan_pos[idx] = i;
+        }
 
         align_get_bits(&gb);
-        // TODO: use this to actually do channel sorting
-    } else {
-        sconf->chan_sort = 0;
     }
 
 
@@ -392,7 +403,7 @@ static av_cold int read_specific_config(ALSDecContext *ctx)
         if (get_bits_left(&gb) < 32)
             return AVERROR_INVALIDDATA;
 
-        if (avctx->err_recognition & AV_EF_CRCCHECK) {
+        if (avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_CAREFUL)) {
             ctx->crc_table = av_crc_get_table(AV_CRC_32_IEEE_LE);
             ctx->crc       = 0xFFFFFFFF;
             ctx->crc_org   = ~get_bits_long(&gb, 32);
@@ -428,7 +439,6 @@ static int check_specific_config(ALSDecContext *ctx)
 
     MISSING_ERR(sconf->floating,  "Floating point decoding",     AVERROR_PATCHWELCOME);
     MISSING_ERR(sconf->rlslms,    "Adaptive RLS-LMS prediction", AVERROR_PATCHWELCOME);
-    MISSING_ERR(sconf->chan_sort, "Channel sorting",             0);
 
     return error;
 }
@@ -551,12 +561,15 @@ static void get_block_sizes(ALSDecContext *ctx, unsigned int *div_blocks,
 
 /** Read the block data for a constant block
  */
-static void read_const_block_data(ALSDecContext *ctx, ALSBlockData *bd)
+static int read_const_block_data(ALSDecContext *ctx, ALSBlockData *bd)
 {
     ALSSpecificConfig *sconf = &ctx->sconf;
     AVCodecContext *avctx    = ctx->avctx;
     GetBitContext *gb        = &ctx->gb;
 
+    if (bd->block_length <= 0)
+        return AVERROR_INVALIDDATA;
+
     *bd->raw_samples = 0;
     *bd->const_block = get_bits1(gb);    // 1 = constant value, 0 = zero block (silence)
     bd->js_blocks    = get_bits1(gb);
@@ -571,6 +584,8 @@ static void read_const_block_data(ALSDecContext *ctx, ALSBlockData *bd)
 
     // ensure constant block decoding by reusing this field
     *bd->const_block = 1;
+
+    return 0;
 }
 
 
@@ -669,13 +684,17 @@ static int read_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)
             *bd->opt_order       = get_bits(gb, opt_order_length);
             if (*bd->opt_order > sconf->max_order) {
                 *bd->opt_order = sconf->max_order;
-                av_log(avctx, AV_LOG_ERROR, "Predictor order too large!\n");
+                av_log(avctx, AV_LOG_ERROR, "Predictor order too large.\n");
                 return AVERROR_INVALIDDATA;
             }
         } else {
             *bd->opt_order = sconf->max_order;
         }
-
+        if (*bd->opt_order > bd->block_length) {
+            *bd->opt_order = bd->block_length;
+            av_log(avctx, AV_LOG_ERROR, "Predictor order too large.\n");
+            return AVERROR_INVALIDDATA;
+        }
         opt_order = *bd->opt_order;
 
         if (opt_order) {
@@ -706,7 +725,7 @@ static int read_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)
                     quant_cof[k] = decode_rice(gb, rice_param) + offset;
                     if (quant_cof[k] < -64 || quant_cof[k] > 63) {
                         av_log(avctx, AV_LOG_ERROR,
-                               "quant_cof %"PRIu32" is out of range\n",
+                               "quant_cof %"PRIu32" is out of range.\n",
                                quant_cof[k]);
                         return AVERROR_INVALIDDATA;
                     }
@@ -964,7 +983,7 @@ static int decode_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)
  */
 static int read_block(ALSDecContext *ctx, ALSBlockData *bd)
 {
-    int ret = 0;
+    int ret;
     GetBitContext *gb        = &ctx->gb;
 
     *bd->shift_lsbs = 0;
@@ -972,7 +991,7 @@ static int read_block(ALSDecContext *ctx, ALSBlockData *bd)
     if (get_bits1(gb)) {
         ret = read_var_block_data(ctx, bd);
     } else {
-        read_const_block_data(ctx, bd);
+        ret = read_const_block_data(ctx, bd);
     }
 
     return ret;
@@ -1026,8 +1045,8 @@ static void zero_remaining(unsigned int b, unsigned int b_max,
 {
     unsigned int count = 0;
 
-    for (; b < b_max; b++)
-        count += div_blocks[b];
+    while (b < b_max)
+        count += div_blocks[b++];
 
     if (count)
         memset(buf, 0, sizeof(*buf) * count);
@@ -1132,7 +1151,7 @@ static int decode_blocks(ALSDecContext *ctx, unsigned int ra_frame,
         // reconstruct joint-stereo blocks
         if (bd[0].js_blocks) {
             if (bd[1].js_blocks)
-                av_log(ctx->avctx, AV_LOG_WARNING, "Invalid channel pair!\n");
+                av_log(ctx->avctx, AV_LOG_WARNING, "Invalid channel pair.\n");
 
             for (s = 0; s < div_blocks[b]; s++)
                 bd[0].raw_samples[s] = bd[1].raw_samples[s] - bd[0].raw_samples[s];
@@ -1180,7 +1199,7 @@ static int read_channel_data(ALSDecContext *ctx, ALSChannelData *cd, int c)
         current->master_channel = get_bits_long(gb, av_ceil_log2(channels));
 
         if (current->master_channel >= channels) {
-            av_log(ctx->avctx, AV_LOG_ERROR, "Invalid master channel!\n");
+            av_log(ctx->avctx, AV_LOG_ERROR, "Invalid master channel.\n");
             return AVERROR_INVALIDDATA;
         }
 
@@ -1205,7 +1224,7 @@ static int read_channel_data(ALSDecContext *ctx, ALSChannelData *cd, int c)
     }
 
     if (entries == channels) {
-        av_log(ctx->avctx, AV_LOG_ERROR, "Damaged channel data!\n");
+        av_log(ctx->avctx, AV_LOG_ERROR, "Damaged channel data.\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -1237,7 +1256,7 @@ static int revert_channel_correlation(ALSDecContext *ctx, ALSBlockData *bd,
     }
 
     if (dep == channels) {
-        av_log(ctx->avctx, AV_LOG_WARNING, "Invalid channel correlation!\n");
+        av_log(ctx->avctx, AV_LOG_WARNING, "Invalid channel correlation.\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -1252,14 +1271,16 @@ static int revert_channel_correlation(ALSDecContext *ctx, ALSBlockData *bd,
     bd->quant_cof   = ctx->quant_cof[c];
     bd->raw_samples = ctx->raw_samples[c] + offset;
 
-    dep = 0;
-    while (!ch[dep].stop_flag) {
+    for (dep = 0; !ch[dep].stop_flag; dep++) {
         unsigned int smp;
         unsigned int begin = 1;
         unsigned int end   = bd->block_length - 1;
         int64_t y;
         int32_t *master = ctx->raw_samples[ch[dep].master_channel] + offset;
 
+        if (ch[dep].master_channel == c)
+            continue;
+
         if (ch[dep].time_diff_flag) {
             int t = ch[dep].time_diff_index;
 
@@ -1291,8 +1312,6 @@ static int revert_channel_correlation(ALSDecContext *ctx, ALSBlockData *bd,
                 bd->raw_samples[smp] += y >> 7;
             }
         }
-
-        dep++;
     }
 
     return 0;
@@ -1367,7 +1386,7 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
 
         for (c = 0; c < avctx->channels; c++)
             if (ctx->chan_data[c] < ctx->chan_data_buffer) {
-                av_log(ctx->avctx, AV_LOG_ERROR, "Invalid channel data!\n");
+                av_log(ctx->avctx, AV_LOG_ERROR, "Invalid channel data.\n");
                 return AVERROR_INVALIDDATA;
             }
 
@@ -1423,6 +1442,7 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
                 bd.lpc_cof     = ctx->lpc_cof[c];
                 bd.quant_cof   = ctx->quant_cof[c];
                 bd.raw_samples = ctx->raw_samples[c] + offset;
+
                 if ((ret = decode_block(ctx, &bd)) < 0)
                     return ret;
             }
@@ -1482,19 +1502,23 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
 
     /* get output buffer */
     frame->nb_samples = ctx->cur_frame_length;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     // transform decoded frame into output format
-    #define INTERLEAVE_OUTPUT(bps)                                 \
-    {                                                              \
-        int##bps##_t *dest = (int##bps##_t*)frame->data[0];        \
-        shift = bps - ctx->avctx->bits_per_raw_sample;             \
-        for (sample = 0; sample < ctx->cur_frame_length; sample++) \
-            for (c = 0; c < avctx->channels; c++)                  \
-                *dest++ = ctx->raw_samples[c][sample] << shift;    \
+    #define INTERLEAVE_OUTPUT(bps)                                                   \
+    {                                                                                \
+        int##bps##_t *dest = (int##bps##_t*)frame->data[0];                          \
+        shift = bps - ctx->avctx->bits_per_raw_sample;                               \
+        if (!ctx->cs_switch) {                                                       \
+            for (sample = 0; sample < ctx->cur_frame_length; sample++)               \
+                for (c = 0; c < avctx->channels; c++)                                \
+                    *dest++ = ctx->raw_samples[c][sample] << shift;                  \
+        } else {                                                                     \
+            for (sample = 0; sample < ctx->cur_frame_length; sample++)               \
+                for (c = 0; c < avctx->channels; c++)                                \
+                    *dest++ = ctx->raw_samples[sconf->chan_pos[c]][sample] << shift; \
+        }                                                                            \
     }
 
     if (ctx->avctx->bits_per_raw_sample <= 16) {
@@ -1504,7 +1528,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
     }
 
     // update CRC
-    if (sconf->crc_enabled && (avctx->err_recognition & AV_EF_CRCCHECK)) {
+    if (sconf->crc_enabled && (avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_CAREFUL))) {
         int swap = HAVE_BIGENDIAN != sconf->msb_first;
 
         if (ctx->avctx->bits_per_raw_sample == 24) {
@@ -1744,7 +1768,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     // allocate crc buffer
     if (HAVE_BIGENDIAN != sconf->msb_first && sconf->crc_enabled &&
-        (avctx->err_recognition & AV_EF_CRCCHECK)) {
+        (avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_CAREFUL))) {
         ctx->crc_buffer = av_malloc(sizeof(*ctx->crc_buffer) *
                                     ctx->cur_frame_length *
                                     avctx->channels *
diff --git a/libavcodec/amr.h b/libavcodec/amr.h
index 676c963..1ac73ab 100644
--- a/libavcodec/amr.h
+++ b/libavcodec/amr.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Marcelo Galvao Povoa
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/amrnbdata.h b/libavcodec/amrnbdata.h
index b7d1b89..435fd99 100644
--- a/libavcodec/amrnbdata.h
+++ b/libavcodec/amrnbdata.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2006-2007 Robert Swain
  * Copyright (c) 2009 Colin McQuillan
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -1655,10 +1655,10 @@ static const float ir_filter_medium[AMR_SUBFRAME_SIZE] = {
  0.016998,  0.023804, -0.041779,  0.025696,  0.019989,
 };
 
-static const float *ir_filters_lookup[2]           = {
+static const float * const ir_filters_lookup[2]           = {
     ir_filter_strong,           ir_filter_medium
 };
-static const float *ir_filters_lookup_MODE_7k95[2] = {
+static const float * const ir_filters_lookup_MODE_7k95[2] = {
     ir_filter_strong_MODE_7k95, ir_filter_medium
 };
 
diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c
index 7692cf0..43ddb62 100644
--- a/libavcodec/amrnbdec.c
+++ b/libavcodec/amrnbdec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2006-2007 Robert Swain
  * Copyright (c) 2009 Colin McQuillan
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,6 +47,8 @@
 #include "libavutil/float_dsp.h"
 #include "avcodec.h"
 #include "libavutil/common.h"
+#include "libavutil/avassert.h"
+#include "celp_math.h"
 #include "celp_filters.h"
 #include "acelp_filters.h"
 #include "acelp_vectors.h"
@@ -84,7 +86,7 @@
 /** Maximum sharpening factor
  *
  * The specification says 0.8, which should be 13107, but the reference C code
- * uses 13017 instead. (Amusingly the same applies to SHARP_MAX in bitexact G.729.)
+ * uses 13017 instead. (Amusingly the same applies to SHARP_MAX in g729dec.c.)
  */
 #define SHARP_MAX 0.79449462890625
 
@@ -136,6 +138,11 @@ typedef struct AMRContext {
 
     float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples
 
+    ACELPFContext                     acelpf_ctx; ///< context for filters for ACELP-based codecs
+    ACELPVContext                     acelpv_ctx; ///< context for vector operations for ACELP-based codecs
+    CELPFContext                       celpf_ctx; ///< context for filters for CELP-based codecs
+    CELPMContext                       celpm_ctx; ///< context for fixed point math operations
+
 } AMRContext;
 
 /** Double version of ff_weighted_vector_sumf() */
@@ -162,7 +169,8 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx)
 
     avctx->channels       = 1;
     avctx->channel_layout = AV_CH_LAYOUT_MONO;
-    avctx->sample_rate    = 8000;
+    if (!avctx->sample_rate)
+        avctx->sample_rate = 8000;
     avctx->sample_fmt     = AV_SAMPLE_FMT_FLT;
 
     // p->excitation always points to the same position in p->excitation_buf
@@ -176,6 +184,11 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx)
     for (i = 0; i < 4; i++)
         p->prediction_error[i] = MIN_ENERGY;
 
+    ff_acelp_filter_init(&p->acelpf_ctx);
+    ff_acelp_vectors_init(&p->acelpv_ctx);
+    ff_celp_filter_init(&p->celpf_ctx);
+    ff_celp_math_init(&p->celpm_ctx);
+
     return 0;
 }
 
@@ -219,15 +232,16 @@ static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf,
  * Interpolate the LSF vector (used for fixed gain smoothing).
  * The interpolation is done over all four subframes even in MODE_12k2.
  *
+ * @param[in]     ctx       The Context
  * @param[in,out] lsf_q     LSFs in [0,1] for each subframe
  * @param[in]     lsf_new   New LSFs in [0,1] for subframe 4
  */
-static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new)
+static void interpolate_lsf(ACELPVContext *ctx, float lsf_q[4][LP_FILTER_ORDER], float *lsf_new)
 {
     int i;
 
     for (i = 0; i < 4; i++)
-        ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new,
+        ctx->weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new,
                                 0.25 * (3 - i), 0.25 * (i + 1),
                                 LP_FILTER_ORDER);
 }
@@ -271,7 +285,7 @@ static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER],
     ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER);
 
     if (update)
-        interpolate_lsf(p->lsf_q, lsf_q);
+        interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q);
 
     ff_acelp_lsf2lspd(lsp, lsf_q, LP_FILTER_ORDER);
 }
@@ -334,7 +348,7 @@ static void lsf2lsp_3(AMRContext *p)
     ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER);
 
     // store data for computing the next frame's LSFs
-    interpolate_lsf(p->lsf_q, lsf_q);
+    interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q);
     memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r));
 
     ff_acelp_lsf2lspd(p->lsp[3], lsf_q, LP_FILTER_ORDER);
@@ -400,7 +414,8 @@ static void decode_pitch_vector(AMRContext *p,
 
     /* Calculate the pitch vector by interpolating the past excitation at the
        pitch lag using a b60 hamming windowed sinc function.   */
-    ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int,
+    p->acelpf_ctx.acelp_interpolatef(p->excitation,
+                          p->excitation + 1 - pitch_lag_int,
                           ff_b60_sinc, 6,
                           pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0),
                           10, AMR_SUBFRAME_SIZE);
@@ -484,7 +499,7 @@ static void decode_8_pulses_31bits(const int16_t *fixed_index,
 static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulses,
                                 const enum Mode mode, const int subframe)
 {
-    assert(MODE_4k75 <= mode && mode <= MODE_12k2);
+    av_assert1(MODE_4k75 <= (signed)mode && mode <= MODE_12k2);
 
     if (mode == MODE_12k2) {
         ff_decode_10_pulses_35bits(pulses, fixed_sparse, gray_decode, 5, 3);
@@ -785,12 +800,12 @@ static int synthesis(AMRContext *p, float *lpc,
         for (i = 0; i < AMR_SUBFRAME_SIZE; i++)
             p->pitch_vector[i] *= 0.25;
 
-    ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector,
+    p->acelpv_ctx.weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector,
                             p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE);
 
     // emphasize pitch vector contribution
     if (p->pitch_gain[4] > 0.5 && !overflow) {
-        float energy = avpriv_scalarproduct_float_c(excitation, excitation,
+        float energy = p->celpm_ctx.dot_productf(excitation, excitation,
                                                     AMR_SUBFRAME_SIZE);
         float pitch_factor =
             p->pitch_gain[4] *
@@ -805,7 +820,8 @@ static int synthesis(AMRContext *p, float *lpc,
                                                 AMR_SUBFRAME_SIZE);
     }
 
-    ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE,
+    p->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation,
+                                 AMR_SUBFRAME_SIZE,
                                  LP_FILTER_ORDER);
 
     // detect overflow
@@ -851,10 +867,11 @@ static void update_state(AMRContext *p)
 /**
  * Get the tilt factor of a formant filter from its transfer function
  *
+ * @param p     The Context
  * @param lpc_n LP_FILTER_ORDER coefficients of the numerator
  * @param lpc_d LP_FILTER_ORDER coefficients of the denominator
  */
-static float tilt_factor(float *lpc_n, float *lpc_d)
+static float tilt_factor(AMRContext *p, float *lpc_n, float *lpc_d)
 {
     float rh0, rh1; // autocorrelation at lag 0 and 1
 
@@ -864,11 +881,12 @@ static float tilt_factor(float *lpc_n, float *lpc_d)
 
     hf[0] = 1.0;
     memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER);
-    ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE,
+    p->celpf_ctx.celp_lp_synthesis_filterf(hf, lpc_d, hf,
+                                 AMR_TILT_RESPONSE,
                                  LP_FILTER_ORDER);
 
-    rh0 = avpriv_scalarproduct_float_c(hf, hf,     AMR_TILT_RESPONSE);
-    rh1 = avpriv_scalarproduct_float_c(hf, hf + 1, AMR_TILT_RESPONSE - 1);
+    rh0 = p->celpm_ctx.dot_productf(hf, hf,     AMR_TILT_RESPONSE);
+    rh1 = p->celpm_ctx.dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1);
 
     // The spec only specifies this check for 12.2 and 10.2 kbit/s
     // modes. But in the ref source the tilt is always non-negative.
@@ -888,7 +906,7 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out)
     int i;
     float *samples          = p->samples_in + LP_FILTER_ORDER; // Start of input
 
-    float speech_gain       = avpriv_scalarproduct_float_c(samples, samples,
+    float speech_gain       = p->celpm_ctx.dot_productf(samples, samples,
                                                            AMR_SUBFRAME_SIZE);
 
     float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER];  // Output of pole filter
@@ -909,16 +927,16 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out)
     }
 
     memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER);
-    ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples,
+    p->celpf_ctx.celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples,
                                  AMR_SUBFRAME_SIZE, LP_FILTER_ORDER);
     memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE,
            sizeof(float) * LP_FILTER_ORDER);
 
-    ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n,
+    p->celpf_ctx.celp_lp_zero_synthesis_filterf(buf_out, lpc_n,
                                       pole_out + LP_FILTER_ORDER,
                                       AMR_SUBFRAME_SIZE, LP_FILTER_ORDER);
 
-    ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out,
+    ff_tilt_compensation(&p->tilt_mem, tilt_factor(p, lpc_n, lpc_d), buf_out,
                          AMR_SUBFRAME_SIZE);
 
     ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE,
@@ -945,10 +963,8 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = AMR_BLOCK_SIZE;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     buf_out = (float *)frame->data[0];
 
     p->cur_frame_mode = unpack_bitstream(p, buf, buf_size);
@@ -957,7 +973,8 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
     if (p->cur_frame_mode == MODE_DTX) {
-        avpriv_request_sample(avctx, "dtx mode");
+        avpriv_report_missing_feature(avctx, "dtx mode");
+        av_log(avctx, AV_LOG_INFO, "Note: libopencore_amrnb supports dtx\n");
         return AVERROR_PATCHWELCOME;
     }
 
@@ -995,7 +1012,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
 
         p->fixed_gain[4] =
             ff_amr_set_fixed_gain(fixed_gain_factor,
-                                  avpriv_scalarproduct_float_c(p->fixed_vector,
+                       p->celpm_ctx.dot_productf(p->fixed_vector,
                                                                p->fixed_vector,
                                                                AMR_SUBFRAME_SIZE) /
                                   AMR_SUBFRAME_SIZE,
@@ -1041,7 +1058,8 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
         update_state(p);
     }
 
-    ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros,
+    p->acelpf_ctx.acelp_apply_order_2_transfer_function(buf_out,
+                                             buf_out, highpass_zeros,
                                              highpass_poles,
                                              highpass_gain * AMR_SAMPLE_SCALE,
                                              p->high_pass_mem, AMR_BLOCK_SIZE);
@@ -1052,7 +1070,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
      * for fixed_gain_smooth.
      * The specification has an incorrect formula: the reference decoder uses
      * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */
-    ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
+    p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
                             0.84, 0.16, LP_FILTER_ORDER);
 
     *got_frame_ptr = 1;
diff --git a/libavcodec/amrwbdata.h b/libavcodec/amrwbdata.h
index 81f8b47..8390582 100644
--- a/libavcodec/amrwbdata.h
+++ b/libavcodec/amrwbdata.h
@@ -2,20 +2,20 @@
  * AMR wideband data and definitions
  * Copyright (c) 2010 Marcelo Galvao Povoa
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -1805,7 +1805,7 @@ static const float ir_filter_mid[64] = {
     -7.501221e-02,  2.920532e-02,  1.660156e-02,  7.751465e-02
 };
 
-static const float *ir_filters_lookup[2] = {
+static const float * const ir_filters_lookup[2] = {
     ir_filter_str, ir_filter_mid
 };
 
diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
index 668c84d..bf668bb 100644
--- a/libavcodec/amrwbdec.c
+++ b/libavcodec/amrwbdec.c
@@ -2,20 +2,20 @@
  * AMR wideband decoder
  * Copyright (c) 2010 Marcelo Galvao Povoa
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A particular PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,6 +32,7 @@
 #include "avcodec.h"
 #include "lsp.h"
 #include "celp_filters.h"
+#include "celp_math.h"
 #include "acelp_filters.h"
 #include "acelp_vectors.h"
 #include "acelp_pitch_delay.h"
@@ -41,6 +42,7 @@
 #include "amr.h"
 
 #include "amrwbdata.h"
+#include "mips/amrwbdec_mips.h"
 
 typedef struct {
     AMRWBFrame                             frame; ///< AMRWB parameters decoded from bitstream
@@ -84,6 +86,11 @@ typedef struct {
 
     AVLFG                                   prng; ///< random number generator for white noise excitation
     uint8_t                          first_frame; ///< flag active during decoding of the first frame
+    ACELPFContext                     acelpf_ctx; ///< context for filters for ACELP-based codecs
+    ACELPVContext                     acelpv_ctx; ///< context for vector operations for ACELP-based codecs
+    CELPFContext                       celpf_ctx; ///< context for filters for CELP-based codecs
+    CELPMContext                       celpm_ctx; ///< context for fixed point math operations
+
 } AMRWBContext;
 
 static av_cold int amrwb_decode_init(AVCodecContext *avctx)
@@ -98,7 +105,8 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx)
 
     avctx->channels       = 1;
     avctx->channel_layout = AV_CH_LAYOUT_MONO;
-    avctx->sample_rate    = 16000;
+    if (!avctx->sample_rate)
+        avctx->sample_rate = 16000;
     avctx->sample_fmt     = AV_SAMPLE_FMT_FLT;
 
     av_lfg_init(&ctx->prng, 1);
@@ -112,6 +120,11 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx)
     for (i = 0; i < 4; i++)
         ctx->prediction_error[i] = MIN_ENERGY;
 
+    ff_acelp_filter_init(&ctx->acelpf_ctx);
+    ff_acelp_vectors_init(&ctx->acelpv_ctx);
+    ff_celp_filter_init(&ctx->celpf_ctx);
+    ff_celp_math_init(&ctx->celpm_ctx);
+
     return 0;
 }
 
@@ -326,7 +339,8 @@ static void decode_pitch_vector(AMRWBContext *ctx,
 
     /* Calculate the pitch vector by interpolating the past excitation at the
        pitch lag using a hamming windowed sinc function */
-    ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
+    ctx->acelpf_ctx.acelp_interpolatef(exc,
+                          exc + 1 - pitch_lag_int,
                           ac_inter, 4,
                           pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4),
                           LP_ORDER, AMRWB_SFR_SIZE + 1);
@@ -585,16 +599,18 @@ static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector)
  *
  * @param[in] p_vector, f_vector   Pitch and fixed excitation vectors
  * @param[in] p_gain, f_gain       Pitch and fixed gains
+ * @param[in] ctx                  The context
  */
 // XXX: There is something wrong with the precision here! The magnitudes
 // of the energies are not correct. Please check the reference code carefully
 static float voice_factor(float *p_vector, float p_gain,
-                          float *f_vector, float f_gain)
+                          float *f_vector, float f_gain,
+                          CELPMContext *ctx)
 {
-    double p_ener = (double) avpriv_scalarproduct_float_c(p_vector, p_vector,
+    double p_ener = (double) ctx->dot_productf(p_vector, p_vector,
                                                           AMRWB_SFR_SIZE) *
                     p_gain * p_gain;
-    double f_ener = (double) avpriv_scalarproduct_float_c(f_vector, f_vector,
+    double f_ener = (double) ctx->dot_productf(f_vector, f_vector,
                                                           AMRWB_SFR_SIZE) *
                     f_gain * f_gain;
 
@@ -758,13 +774,13 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
                       float fixed_gain, const float *fixed_vector,
                       float *samples)
 {
-    ff_weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector,
+    ctx->acelpv_ctx.weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector,
                             ctx->pitch_gain[0], fixed_gain, AMRWB_SFR_SIZE);
 
     /* emphasize pitch vector contribution in low bitrate modes */
     if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) {
         int i;
-        float energy = avpriv_scalarproduct_float_c(excitation, excitation,
+        float energy = ctx->celpm_ctx.dot_productf(excitation, excitation,
                                                     AMRWB_SFR_SIZE);
 
         // XXX: Weird part in both ref code and spec. A unknown parameter
@@ -778,7 +794,7 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation,
                                                 energy, AMRWB_SFR_SIZE);
     }
 
-    ff_celp_lp_synthesis_filterf(samples, lpc, excitation,
+    ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation,
                                  AMRWB_SFR_SIZE, LP_ORDER);
 }
 
@@ -810,8 +826,9 @@ static void de_emphasis(float *out, float *in, float m, float mem[1])
  * @param[out] out                 Buffer for interpolated signal
  * @param[in]  in                  Current signal data (length 0.8*o_size)
  * @param[in]  o_size              Output signal length
+ * @param[in] ctx                  The context
  */
-static void upsample_5_4(float *out, const float *in, int o_size)
+static void upsample_5_4(float *out, const float *in, int o_size, CELPMContext *ctx)
 {
     const float *in0 = in - UPS_FIR_SIZE + 1;
     int i, j, k;
@@ -824,7 +841,7 @@ static void upsample_5_4(float *out, const float *in, int o_size)
         i++;
 
         for (k = 1; k < 5; k++) {
-            out[i] = avpriv_scalarproduct_float_c(in0 + int_part,
+            out[i] = ctx->dot_productf(in0 + int_part,
                                                   upsample_fir[4 - frac_part],
                                                   UPS_MEM_SIZE);
             int_part++;
@@ -852,8 +869,8 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth,
     if (ctx->fr_cur_mode == MODE_23k85)
         return qua_hb_gain[hb_idx] * (1.0f / (1 << 14));
 
-    tilt = avpriv_scalarproduct_float_c(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
-           avpriv_scalarproduct_float_c(synth, synth, AMRWB_SFR_SIZE);
+    tilt = ctx->celpm_ctx.dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
+           ctx->celpm_ctx.dot_productf(synth, synth, AMRWB_SFR_SIZE);
 
     /* return gain bounded by [0.1, 1.0] */
     return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0);
@@ -872,7 +889,7 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc,
                                  const float *synth_exc, float hb_gain)
 {
     int i;
-    float energy = avpriv_scalarproduct_float_c(synth_exc, synth_exc,
+    float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc,
                                                 AMRWB_SFR_SIZE);
 
     /* Generate a white-noise excitation */
@@ -1003,7 +1020,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
         float e_isf[LP_ORDER_16k]; // ISF vector for extrapolation
         double e_isp[LP_ORDER_16k];
 
-        ff_weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe],
+        ctx->acelpv_ctx.weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe],
                                 1.0 - isfp_inter[subframe], LP_ORDER);
 
         extrapolate_isf(e_isf);
@@ -1017,7 +1034,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
         lpc_weighting(hb_lpc, ctx->lp_coef[subframe], 0.6, LP_ORDER);
     }
 
-    ff_celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k,
+    ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k,
                                  (mode == MODE_6k60) ? LP_ORDER_16k : LP_ORDER);
 }
 
@@ -1032,6 +1049,8 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples,
  *
  * @remark It is safe to pass the same array in in and out parameters
  */
+
+#ifndef hb_fir_filter
 static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
                           float mem[HB_FIR_SIZE], const float *in)
 {
@@ -1049,6 +1068,7 @@ static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1],
 
     memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
 }
+#endif /* hb_fir_filter */
 
 /**
  * Update context state before the next subframe.
@@ -1092,10 +1112,8 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     buf_out = (float *)frame->data[0];
 
     header_size      = decode_mime_header(ctx, buf);
@@ -1166,7 +1184,7 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
 
         ctx->fixed_gain[0] =
             ff_amr_set_fixed_gain(fixed_gain_factor,
-                                  avpriv_scalarproduct_float_c(ctx->fixed_vector,
+                                  ctx->celpm_ctx.dot_productf(ctx->fixed_vector,
                                                                ctx->fixed_vector,
                                                                AMRWB_SFR_SIZE) /
                                   AMRWB_SFR_SIZE,
@@ -1175,7 +1193,8 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
 
         /* Calculate voice factor and store tilt for next subframe */
         voice_fac      = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0],
-                                      ctx->fixed_vector, ctx->fixed_gain[0]);
+                                      ctx->fixed_vector, ctx->fixed_gain[0],
+                                      &ctx->celpm_ctx);
         ctx->tilt_coef = voice_fac * 0.25 + 0.25;
 
         /* Construct current excitation */
@@ -1201,15 +1220,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
         de_emphasis(&ctx->samples_up[UPS_MEM_SIZE],
                     &ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem);
 
-        ff_acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE],
+        ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE],
             &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles,
             hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE);
 
         upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE],
-                     AMRWB_SFR_SIZE_16k);
+                     AMRWB_SFR_SIZE_16k, &ctx->celpm_ctx);
 
         /* High frequency band (6.4 - 7.0 kHz) generation part */
-        ff_acelp_apply_order_2_transfer_function(hb_samples,
+        ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(hb_samples,
             &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles,
             hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE);
 
diff --git a/libavcodec/anm.c b/libavcodec/anm.c
index 3d5affb..79a87dd 100644
--- a/libavcodec/anm.c
+++ b/libavcodec/anm.c
@@ -2,20 +2,20 @@
  * Deluxe Paint Animation decoder
  * Copyright (c) 2009 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -117,10 +117,8 @@ static int decode_frame(AVCodecContext *avctx,
     uint8_t *dst, *dst_end;
     int count, ret;
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0){
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
     dst     = s->frame->data[0];
     dst_end = s->frame->data[0] + s->frame->linesize[0]*avctx->height;
 
@@ -128,11 +126,11 @@ static int decode_frame(AVCodecContext *avctx,
 
     if (bytestream2_get_byte(&s->gb) != 0x42) {
         avpriv_request_sample(avctx, "Unknown record type");
-        return buf_size;
+        return AVERROR_INVALIDDATA;
     }
     if (bytestream2_get_byte(&s->gb)) {
         avpriv_request_sample(avctx, "Padding bytes");
-        return buf_size;
+        return AVERROR_PATCHWELCOME;
     }
     bytestream2_skip(&s->gb, 2);
 
diff --git a/libavcodec/ansi.c b/libavcodec/ansi.c
index 95b5be4..45c307f 100644
--- a/libavcodec/ansi.c
+++ b/libavcodec/ansi.c
@@ -2,20 +2,20 @@
  * ASCII/ANSI art decoder
  * Copyright (c) 2010 Peter Ross <pross@xvid.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,7 @@
 #include "libavutil/common.h"
 #include "libavutil/frame.h"
 #include "libavutil/lfg.h"
+#include "libavutil/xga_font_data.h"
 #include "avcodec.h"
 #include "cga_data.h"
 #include "internal.h"
@@ -60,6 +61,7 @@ typedef struct {
     int attributes;       /**< attribute flags */
     int fg;               /**< foreground color */
     int bg;               /**< background color */
+    int first_frame;
 
     /* ansi parser state machine */
     enum {
@@ -83,7 +85,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
 
     /* defaults */
-    s->font        = ff_vga16_font;
+    s->font        = avpriv_vga16_font;
     s->font_height = 16;
     s->fg          = DEFAULT_FG_COLOR;
     s->bg          = DEFAULT_BG_COLOR;
@@ -94,12 +96,27 @@ static av_cold int decode_init(AVCodecContext *avctx)
     return 0;
 }
 
+static void set_palette(uint32_t *pal)
+{
+    int r, g, b;
+    memcpy(pal, ff_cga_palette, 16 * 4);
+    pal += 16;
+#define COLOR(x) ((x) * 40 + 55)
+    for (r = 0; r < 6; r++)
+        for (g = 0; g < 6; g++)
+            for (b = 0; b < 6; b++)
+                *pal++ = 0xFF000000 | (COLOR(r) << 16) | (COLOR(g) << 8) | COLOR(b);
+#define GRAY(x) ((x) * 10 + 8)
+    for (g = 0; g < 24; g++)
+        *pal++ = 0xFF000000 | (GRAY(g) << 16) | (GRAY(g) << 8) | GRAY(g);
+}
+
 static void hscroll(AVCodecContext *avctx)
 {
     AnsiContext *s = avctx->priv_data;
     int i;
 
-    if (s->y < avctx->height - s->font_height) {
+    if (s->y <= avctx->height - 2*s->font_height) {
         s->y += s->font_height;
         return;
     }
@@ -152,7 +169,7 @@ static void draw_char(AVCodecContext *avctx, int c)
     ff_draw_pc_font(s->frame->data[0] + s->y * s->frame->linesize[0] + s->x,
                     s->frame->linesize[0], s->font, s->font_height, c, fg, bg);
     s->x += FONT_WIDTH;
-    if (s->x >= avctx->width) {
+    if (s->x > avctx->width - FONT_WIDTH) {
         s->x = 0;
         hscroll(avctx);
     }
@@ -166,8 +183,8 @@ static int execute_code(AVCodecContext * avctx, int c)
 {
     AnsiContext *s = avctx->priv_data;
     int ret, i;
-    int width = 0;
-    int height = 0;
+    int width  = avctx->width;
+    int height = avctx->height;
 
     switch(c) {
     case 'A': //Cursor Up
@@ -193,19 +210,19 @@ static int execute_code(AVCodecContext * avctx, int c)
             s->args[0] = DEFAULT_SCREEN_MODE;
         switch(s->args[0]) {
         case 0: case 1: case 4: case 5: case 13: case 19: //320x200 (25 rows)
-            s->font = ff_cga_font;
+            s->font = avpriv_cga_font;
             s->font_height = 8;
             width  = 40<<3;
             height = 25<<3;
             break;
         case 2: case 3: //640x400 (25 rows)
-            s->font = ff_vga16_font;
+            s->font = avpriv_vga16_font;
             s->font_height = 16;
             width  = 80<<3;
             height = 25<<4;
             break;
         case 6: case 14: //640x200 (25 rows)
-            s->font = ff_cga_font;
+            s->font = avpriv_cga_font;
             s->font_height = 8;
             width  = 80<<3;
             height = 25<<3;
@@ -213,13 +230,13 @@ static int execute_code(AVCodecContext * avctx, int c)
         case 7: //set line wrapping
             break;
         case 15: case 16: //640x350 (43 rows)
-            s->font = ff_cga_font;
+            s->font = avpriv_cga_font;
             s->font_height = 8;
             width  = 80<<3;
             height = 43<<3;
             break;
         case 17: case 18: //640x480 (60 rows)
-            s->font = ff_cga_font;
+            s->font = avpriv_cga_font;
             s->font_height = 8;
             width  = 80<<3;
             height = 60<<4;
@@ -227,20 +244,19 @@ static int execute_code(AVCodecContext * avctx, int c)
         default:
             avpriv_request_sample(avctx, "Unsupported screen mode");
         }
-        if (width != 0 && height != 0 &&
-            (width != avctx->width || height != avctx->height)) {
+        s->x = av_clip(s->x, 0, width  - FONT_WIDTH);
+        s->y = av_clip(s->y, 0, height - s->font_height);
+        if (width != avctx->width || height != avctx->height) {
             av_frame_unref(s->frame);
             ret = ff_set_dimensions(avctx, width, height);
             if (ret < 0)
                 return ret;
-            ret = ff_get_buffer(avctx, s->frame, AV_GET_BUFFER_FLAG_REF);
-            if (ret < 0) {
-                av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+            if ((ret = ff_get_buffer(avctx, s->frame,
+                                     AV_GET_BUFFER_FLAG_REF)) < 0)
                 return ret;
-            }
             s->frame->pict_type           = AV_PICTURE_TYPE_I;
             s->frame->palette_has_changed = 1;
-            memcpy(s->frame->data[1], ff_cga_palette, 16 * 4);
+            set_palette((uint32_t *)s->frame->data[1]);
             erase_screen(avctx);
         } else if (c == 'l') {
             erase_screen(avctx);
@@ -288,12 +304,20 @@ static int execute_code(AVCodecContext * avctx, int c)
                 s->bg = DEFAULT_BG_COLOR;
             } else if (m == 1 || m == 2 || m == 4 || m == 5 || m == 7 || m == 8) {
                 s->attributes |= 1 << (m - 1);
-            } else if (m >= 30 && m <= 38) {
+            } else if (m >= 30 && m <= 37) {
                 s->fg = ansi_to_cga[m - 30];
+            } else if (m == 38 && i + 2 < FFMIN(s->nb_args, MAX_NB_ARGS) && s->args[i + 1] == 5 && s->args[i + 2] < 256) {
+                int index = s->args[i + 2];
+                s->fg = index < 16 ? ansi_to_cga[index] : index;
+                i += 2;
             } else if (m == 39) {
                 s->fg = ansi_to_cga[DEFAULT_FG_COLOR];
             } else if (m >= 40 && m <= 47) {
                 s->bg = ansi_to_cga[m - 40];
+            } else if (m == 48 && i + 2 < FFMIN(s->nb_args, MAX_NB_ARGS) && s->args[i + 1] == 5 && s->args[i + 2] < 256) {
+                int index = s->args[i + 2];
+                s->bg = index < 16 ? ansi_to_cga[index] : index;
+                i += 2;
             } else if (m == 49) {
                 s->fg = ansi_to_cga[DEFAULT_BG_COLOR];
             } else {
@@ -317,6 +341,8 @@ static int execute_code(AVCodecContext * avctx, int c)
         avpriv_request_sample(avctx, "Unknown escape code");
         break;
     }
+    s->x = av_clip(s->x, 0, avctx->width  - FONT_WIDTH);
+    s->y = av_clip(s->y, 0, avctx->height - s->font_height);
     return 0;
 }
 
@@ -330,19 +356,21 @@ static int decode_frame(AVCodecContext *avctx,
     const uint8_t *buf_end   = buf+buf_size;
     int ret, i, count;
 
-    ret = ff_reget_buffer(avctx, s->frame);
-    if (ret < 0){
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
     if (!avctx->frame_number) {
-        memset(s->frame->data[0], 0, avctx->height * FFABS(s->frame->linesize[0]));
+        for (i=0; i<avctx->height; i++)
+            memset(s->frame->data[0]+ i*s->frame->linesize[0], 0, avctx->width);
         memset(s->frame->data[1], 0, AVPALETTE_SIZE);
     }
 
     s->frame->pict_type           = AV_PICTURE_TYPE_I;
     s->frame->palette_has_changed = 1;
-    memcpy(s->frame->data[1], ff_cga_palette, 16 * 4);
+    set_palette((uint32_t *)s->frame->data[1]);
+    if (!s->first_frame) {
+        erase_screen(avctx);
+        s->first_frame = 1;
+    }
 
     while(buf < buf_end) {
         switch(s->state) {
@@ -381,7 +409,7 @@ static int decode_frame(AVCodecContext *avctx,
             if (buf[0] == '[') {
                 s->state   = STATE_CODE;
                 s->nb_args = 0;
-                s->args[0] = 0;
+                s->args[0] = -1;
             } else {
                 s->state = STATE_NORMAL;
                 draw_char(avctx, 0x1B);
@@ -392,8 +420,8 @@ static int decode_frame(AVCodecContext *avctx,
             switch(buf[0]) {
             case '0': case '1': case '2': case '3': case '4':
             case '5': case '6': case '7': case '8': case '9':
-                if (s->nb_args < MAX_NB_ARGS)
-                    s->args[s->nb_args] = s->args[s->nb_args] * 10 + buf[0] - '0';
+                if (s->nb_args < MAX_NB_ARGS && s->args[s->nb_args] < 6553)
+                    s->args[s->nb_args] = FFMAX(s->args[s->nb_args], 0) * 10 + buf[0] - '0';
                 break;
             case ';':
                 s->nb_args++;
@@ -409,7 +437,7 @@ static int decode_frame(AVCodecContext *avctx,
             default:
                 if (s->nb_args > MAX_NB_ARGS)
                     av_log(avctx, AV_LOG_WARNING, "args overflow (%i)\n", s->nb_args);
-                if (s->nb_args < MAX_NB_ARGS && s->args[s->nb_args])
+                if (s->nb_args < MAX_NB_ARGS && s->args[s->nb_args] >= 0)
                     s->nb_args++;
                 if ((ret = execute_code(avctx, buf[0])) < 0)
                     return ret;
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index 344c85b..383b7fe 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Benjamin Zores <ben@geexbox.org>
  *  based upon libdemac from Dave Chapman.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,7 +25,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
-#include "apedsp.h"
+#include "lossless_audiodsp.h"
 #include "avcodec.h"
 #include "bswapdsp.h"
 #include "bytestream.h"
@@ -137,7 +137,7 @@ typedef struct APEContext {
     AVClass *class;                          ///< class for AVOptions
     AVCodecContext *avctx;
     BswapDSPContext bdsp;
-    APEDSPContext adsp;
+    LLAudDSPContext adsp;
     int channels;
     int samples;                             ///< samples left to decode in current frame
     int bps;
@@ -212,19 +212,6 @@ static av_cold int ape_decode_close(AVCodecContext *avctx)
     return 0;
 }
 
-static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2,
-                                              const int16_t *v3,
-                                              int order, int mul)
-{
-    int res = 0;
-
-    while (order--) {
-        res   += *v1 * *v2++;
-        *v1++ += mul * *v3++;
-    }
-    return res;
-}
-
 static av_cold int ape_decode_init(AVCodecContext *avctx)
 {
     APEContext *s = avctx->priv_data;
@@ -263,6 +250,7 @@ static av_cold int ape_decode_init(AVCodecContext *avctx)
     av_log(avctx, AV_LOG_DEBUG, "Compression Level: %d - Flags: %d\n",
            s->compression_level, s->flags);
     if (s->compression_level % 1000 || s->compression_level > COMPRESSION_LEVEL_INSANE ||
+        !s->compression_level ||
         (s->fileversion < 3930 && s->compression_level == COMPRESSION_LEVEL_INSANE)) {
         av_log(avctx, AV_LOG_ERROR, "Incorrect compression level %d\n",
                s->compression_level);
@@ -305,16 +293,8 @@ static av_cold int ape_decode_init(AVCodecContext *avctx)
         s->predictor_decode_stereo = predictor_decode_stereo_3950;
     }
 
-    s->adsp.scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
-
-    if (ARCH_ARM)
-        ff_apedsp_init_arm(&s->adsp);
-    if (ARCH_PPC)
-        ff_apedsp_init_ppc(&s->adsp);
-    if (ARCH_X86)
-        ff_apedsp_init_x86(&s->adsp);
-
     ff_bswapdsp_init(&s->bdsp);
+    ff_llauddsp_init(&s->adsp);
     avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
 
     return 0;
@@ -512,9 +492,12 @@ static inline int ape_decode_value_3860(APEContext *ctx, GetBitContext *gb,
 
     if (!rice->k)
         x = overflow;
-    else
+    else if(rice->k <= MIN_CACHE_BITS) {
         x = (overflow << rice->k) + get_bits(gb, rice->k);
-
+    } else {
+        av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", rice->k);
+        return AVERROR_INVALIDDATA;
+    }
     rice->ksum += x - (rice->ksum + 8 >> 4);
     if (rice->ksum < (rice->k ? 1 << (rice->k + 4) : 0))
         rice->k--;
@@ -541,9 +524,13 @@ static inline int ape_decode_value_3900(APEContext *ctx, APERice *rice)
     } else
         tmpk = (rice->k < 1) ? 0 : rice->k - 1;
 
-    if (tmpk <= 16 || ctx->fileversion < 3910)
+    if (tmpk <= 16 || ctx->fileversion < 3910) {
+        if (tmpk > 23) {
+            av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", tmpk);
+            return AVERROR_INVALIDDATA;
+        }
         x = range_decode_bits(ctx, tmpk);
-    else if (tmpk <= 32) {
+    } else if (tmpk <= 31) {
         x = range_decode_bits(ctx, 16);
         x |= (range_decode_bits(ctx, tmpk - 16) << 16);
     } else {
@@ -619,10 +606,14 @@ static void decode_array_0000(APEContext *ctx, GetBitContext *gb,
         rice->ksum += out[i];
     }
     rice->k = av_log2(rice->ksum / 10) + 1;
+    if (rice->k >= 24)
+        return;
     for (; i < 64; i++) {
         out[i] = get_rice_ook(&ctx->gb, rice->k);
         rice->ksum += out[i];
         rice->k = av_log2(rice->ksum / ((i + 1) * 2)) + 1;
+        if (rice->k >= 24)
+            return;
     }
     ksummax = 1 << rice->k + 7;
     ksummin = rice->k ? (1 << rice->k + 6) : 0;
@@ -922,7 +913,7 @@ static void long_filter_high_3800(int32_t *buffer, int order, int shift,
         sign = APESIGN(buffer[i]);
         for (j = 0; j < order; j++) {
             dotprod += delay[j] * coeffs[j];
-            coeffs[j] -= (((delay[j] >> 30) & 2) - 1) * sign;
+            coeffs[j] += ((delay[j] >> 31) | 1) * sign;
         }
         buffer[i] -= dotprod >> shift;
         for (j = 0; j < order - 1; j++)
@@ -942,7 +933,7 @@ static void long_filter_ehigh_3830(int32_t *buffer, int length)
         sign = APESIGN(buffer[i]);
         for (j = 7; j >= 0; j--) {
             dotprod += delay[j] * coeffs[j];
-            coeffs[j] -= (((delay[j] >> 30) & 2) - 1) * sign;
+            coeffs[j] += ((delay[j] >> 31) | 1) * sign;
         }
         for (j = 7; j > 0; j--)
             delay[j] = delay[j - 1];
@@ -1449,7 +1440,7 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
         }
         if (s->fileversion < 3950) // previous versions overread two bytes
             buf_size += 2;
-        av_fast_malloc(&s->data, &s->data_size, buf_size);
+        av_fast_padded_malloc(&s->data, &s->data_size, buf_size);
         if (!s->data)
             return AVERROR(ENOMEM);
         s->bdsp.bswap_buf((uint32_t *) s->data, (const uint32_t *) buf,
@@ -1472,7 +1463,8 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
             }
             s->ptr += offset;
         } else {
-            init_get_bits(&s->gb, s->ptr, (s->data_end - s->ptr) * 8);
+            if ((ret = init_get_bits8(&s->gb, s->ptr, s->data_end - s->ptr)) < 0)
+                return ret;
             if (s->fileversion > 3800)
                 skip_bits_long(&s->gb, offset * 8);
             else
@@ -1491,7 +1483,6 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
             av_log(avctx, AV_LOG_ERROR, "Error reading frame header\n");
             return AVERROR_INVALIDDATA;
         }
-
     }
 
     if (!s->data) {
@@ -1516,10 +1507,8 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = blockstodecode;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     s->error=0;
 
@@ -1563,7 +1552,7 @@ static int ape_decode_frame(AVCodecContext *avctx, void *data,
 
     *got_frame_ptr = 1;
 
-    return (s->samples == 0) ? avpkt->size : 0;
+    return !s->samples ? avpkt->size : 0;
 }
 
 static void ape_flush(AVCodecContext *avctx)
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 6c2eb99..3daf211 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -18,6 +18,7 @@ OBJS-$(CONFIG_IDCTDSP)                 += arm/idctdsp_init_arm.o        \
                                           arm/idctdsp_arm.o             \
                                           arm/jrevdct_arm.o             \
                                           arm/simple_idct_arm.o
+OBJS-$(CONFIG_LLAUDDSP)                += arm/lossless_audiodsp_init_arm.o
 OBJS-$(CONFIG_ME_CMP)                  += arm/me_cmp_init_arm.o
 OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
 OBJS-$(CONFIG_MPEGVIDEO)               += arm/mpegvideo_arm.o
@@ -29,10 +30,10 @@ OBJS-$(CONFIG_VP3DSP)                  += arm/vp3dsp_init_arm.o
 
 OBJS-$(CONFIG_AAC_DECODER)             += arm/aacpsdsp_init_arm.o       \
                                           arm/sbrdsp_init_arm.o
-OBJS-$(CONFIG_APE_DECODER)             += arm/apedsp_init_arm.o
 OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o
 OBJS-$(CONFIG_FLAC_DECODER)            += arm/flacdsp_init_arm.o        \
                                           arm/flacdsp_arm.o
+OBJS-$(CONFIG_FLAC_ENCODER)            += arm/flacdsp_init_arm.o
 OBJS-$(CONFIG_MLP_DECODER)             += arm/mlpdsp_init_arm.o
 OBJS-$(CONFIG_VC1_DECODER)             += arm/vc1dsp_init_arm.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += arm/vorbisdsp_init_arm.o
@@ -65,6 +66,8 @@ ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP)       += arm/pixblockdsp_armv6.o
 
 ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
 ARMV6-OBJS-$(CONFIG_STARTCODE)         += arm/startcode_armv6.o
+ARMV6-OBJS-$(CONFIG_VC1_DECODER)       += arm/startcode_armv6.o
+ARMV6-OBJS-$(CONFIG_VC1_PARSER)        += arm/startcode_armv6.o
 ARMV6-OBJS-$(CONFIG_VP7_DECODER)       += arm/vp8_armv6.o               \
                                           arm/vp8dsp_init_armv6.o       \
                                           arm/vp8dsp_armv6.o
@@ -110,7 +113,7 @@ NEON-OBJS-$(CONFIG_VP3DSP)             += arm/vp3dsp_neon.o
 
 NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/aacpsdsp_neon.o           \
                                           arm/sbrdsp_neon.o
-NEON-OBJS-$(CONFIG_APE_DECODER)        += arm/apedsp_neon.o
+NEON-OBJS-$(CONFIG_LLAUDDSP)           += arm/lossless_audiodsp_neon.o
 NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/dcadsp_neon.o             \
                                           arm/synth_filter_neon.o
 NEON-OBJS-$(CONFIG_RV30_DECODER)       += arm/rv34dsp_neon.o
diff --git a/libavcodec/arm/aac.h b/libavcodec/arm/aac.h
index 4f143cb..cafa881 100644
--- a/libavcodec/arm/aac.h
+++ b/libavcodec/arm/aac.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/aacpsdsp_init_arm.c b/libavcodec/arm/aacpsdsp_init_arm.c
index 6326376..e04787c 100644
--- a/libavcodec/arm/aacpsdsp_init_arm.c
+++ b/libavcodec/arm/aacpsdsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/aacpsdsp_neon.S b/libavcodec/arm/aacpsdsp_neon.S
index fb00900..a93bbfe 100644
--- a/libavcodec/arm/aacpsdsp_neon.S
+++ b/libavcodec/arm/aacpsdsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S
index ed8eb37..1aea190 100644
--- a/libavcodec/arm/ac3dsp_arm.S
+++ b/libavcodec/arm/ac3dsp_arm.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/ac3dsp_armv6.S b/libavcodec/arm/ac3dsp_armv6.S
index 2028d0b..1d2563d 100644
--- a/libavcodec/arm/ac3dsp_armv6.S
+++ b/libavcodec/arm/ac3dsp_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c
index a48353a..a3c32ff 100644
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,6 +33,14 @@ void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len);
 void ff_ac3_extract_exponents_neon(uint8_t *exp, int32_t *coef, int nb_coefs);
 void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
                                 const int16_t *window, unsigned n);
+void ff_ac3_sum_square_butterfly_int32_neon(int64_t sum[4],
+                                            const int32_t *coef0,
+                                            const int32_t *coef1,
+                                            int len);
+void ff_ac3_sum_square_butterfly_float_neon(float sum[4],
+                                            const float *coef0,
+                                            const float *coef1,
+                                            int len);
 
 void ff_ac3_bit_alloc_calc_bap_armv6(int16_t *mask, int16_t *psd,
                                      int start, int end,
@@ -59,5 +67,7 @@ av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
         c->float_to_fixed24      = ff_float_to_fixed24_neon;
         c->extract_exponents     = ff_ac3_extract_exponents_neon;
         c->apply_window_int16    = ff_apply_window_int16_neon;
+        c->sum_square_butterfly_int32 = ff_ac3_sum_square_butterfly_int32_neon;
+        c->sum_square_butterfly_float = ff_ac3_sum_square_butterfly_float_neon;
     }
 }
diff --git a/libavcodec/arm/ac3dsp_neon.S b/libavcodec/arm/ac3dsp_neon.S
index f97b190..89d0ae8 100644
--- a/libavcodec/arm/ac3dsp_neon.S
+++ b/libavcodec/arm/ac3dsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -131,3 +131,47 @@ function ff_apply_window_int16_neon, export=1
 
         pop             {r4,pc}
 endfunc
+
+function ff_ac3_sum_square_butterfly_int32_neon, export=1
+        vmov.i64        q0,  #0
+        vmov.i64        q1,  #0
+        vmov.i64        q2,  #0
+        vmov.i64        q3,  #0
+1:
+        vld1.32         {d16},    [r1]!
+        vld1.32         {d17},    [r2]!
+        vadd.s32        d18, d16, d17
+        vsub.s32        d19, d16, d17
+        vmlal.s32       q0,  d16, d16
+        vmlal.s32       q1,  d17, d17
+        vmlal.s32       q2,  d18, d18
+        vmlal.s32       q3,  d19, d19
+        subs            r3,  r3,  #2
+        bgt             1b
+        vadd.s64        d0,  d0,  d1
+        vadd.s64        d1,  d2,  d3
+        vadd.s64        d2,  d4,  d5
+        vadd.s64        d3,  d6,  d7
+        vst1.64         {q0-q1},  [r0]
+        bx              lr
+endfunc
+
+function ff_ac3_sum_square_butterfly_float_neon, export=1
+        vmov.f32        q0,  #0.0
+        vmov.f32        q1,  #0.0
+1:
+        vld1.32         {d16},    [r1]!
+        vld1.32         {d17},    [r2]!
+        vadd.f32        d18, d16, d17
+        vsub.f32        d19, d16, d17
+        vmla.f32        d0,  d16, d16
+        vmla.f32        d1,  d17, d17
+        vmla.f32        d2,  d18, d18
+        vmla.f32        d3,  d19, d19
+        subs            r3,  r3,  #2
+        bgt             1b
+        vpadd.f32       d0,  d0,  d1
+        vpadd.f32       d1,  d2,  d3
+        vst1.32         {q0},     [r0]
+        bx              lr
+endfunc
diff --git a/libavcodec/arm/asm-offsets.h b/libavcodec/arm/asm-offsets.h
index 0ea2f04..a2174b0 100644
--- a/libavcodec/arm/asm-offsets.h
+++ b/libavcodec/arm/asm-offsets.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/audiodsp_arm.h b/libavcodec/arm/audiodsp_arm.h
index e97e804..213660d 100644
--- a/libavcodec/arm/audiodsp_arm.h
+++ b/libavcodec/arm/audiodsp_arm.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/audiodsp_init_arm.c b/libavcodec/arm/audiodsp_init_arm.c
index ea9ec3c..74aa52a 100644
--- a/libavcodec/arm/audiodsp_init_arm.c
+++ b/libavcodec/arm/audiodsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * ARM optimized audio functions
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/audiodsp_init_neon.c b/libavcodec/arm/audiodsp_init_neon.c
index af53272..f7bd162 100644
--- a/libavcodec/arm/audiodsp_init_neon.c
+++ b/libavcodec/arm/audiodsp_init_neon.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised audio functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/audiodsp_neon.S b/libavcodec/arm/audiodsp_neon.S
index dfb998d..ab32cef 100644
--- a/libavcodec/arm/audiodsp_neon.S
+++ b/libavcodec/arm/audiodsp_neon.S
@@ -2,20 +2,20 @@
  * ARM NEON optimised audio functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/blockdsp_arm.h b/libavcodec/arm/blockdsp_arm.h
index 6d9c2c3..2688d36 100644
--- a/libavcodec/arm/blockdsp_arm.h
+++ b/libavcodec/arm/blockdsp_arm.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/blockdsp_init_arm.c b/libavcodec/arm/blockdsp_init_arm.c
index a0c0367..3b86a71 100644
--- a/libavcodec/arm/blockdsp_init_arm.c
+++ b/libavcodec/arm/blockdsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * ARM optimized block operations
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/blockdsp_init_neon.c b/libavcodec/arm/blockdsp_init_neon.c
index 5081cf0..62b51fc 100644
--- a/libavcodec/arm/blockdsp_init_neon.c
+++ b/libavcodec/arm/blockdsp_init_neon.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised block operations
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/blockdsp_neon.S b/libavcodec/arm/blockdsp_neon.S
index 98df2c6..9fc63cb 100644
--- a/libavcodec/arm/blockdsp_neon.S
+++ b/libavcodec/arm/blockdsp_neon.S
@@ -2,20 +2,20 @@
  * ARM NEON optimised block functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/cabac.h b/libavcodec/arm/cabac.h
index 6ff5f1a..fdbf86b 100644
--- a/libavcodec/arm/cabac.h
+++ b/libavcodec/arm/cabac.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,12 +59,18 @@ static av_always_inline int get_cabac_inline_arm(CABACContext *c,
         "tst        %[r_c]        , %[r_c]                      \n\t"
         "bne        2f                                          \n\t"
         "ldr        %[r_c]        , [%[c], %[byte]]             \n\t"
+#if UNCHECKED_BITSTREAM_READER
+        "ldrh       %[tmp]        , [%[r_c]]                    \n\t"
+        "add        %[r_c]        , %[r_c]      , #2            \n\t"
+        "str        %[r_c]        , [%[c], %[byte]]             \n\t"
+#else
         "ldr        %[r_b]        , [%[c], %[end]]              \n\t"
         "ldrh       %[tmp]        , [%[r_c]]                    \n\t"
         "cmp        %[r_c]        , %[r_b]                      \n\t"
         "itt        lt                                          \n\t"
         "addlt      %[r_c]        , %[r_c]      , #2            \n\t"
         "strlt      %[r_c]        , [%[c], %[byte]]             \n\t"
+#endif
         "sub        %[r_c]        , %[low]      , #1            \n\t"
         "add        %[r_b]        , %[tables]   , %[norm_off]   \n\t"
         "eor        %[r_c]        , %[low]      , %[r_c]        \n\t"
diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h
index 4aed576..6e87111 100644
--- a/libavcodec/arm/dca.h
+++ b/libavcodec/arm/dca.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@
 #include "libavcodec/dcadsp.h"
 #include "libavcodec/mathops.h"
 
-#if HAVE_ARMV6_INLINE && AV_GCC_VERSION_AT_LEAST(4,4)
+#if HAVE_ARMV6_INLINE && AV_GCC_VERSION_AT_LEAST(4,4) && !CONFIG_THUMB
 
 #define decode_blockcodes decode_blockcodes
 static inline int decode_blockcodes(int code1, int code2, int levels,
@@ -35,46 +35,44 @@ static inline int decode_blockcodes(int code1, int code2, int levels,
 {
     int32_t v0, v1, v2, v3, v4, v5;
 
-    __asm__ ("smmul   %8,  %14, %18           \n"
-             "smmul   %11, %15, %18           \n"
-             "smlabb  %14, %8,  %17, %14      \n"
-             "smlabb  %15, %11, %17, %15      \n"
-             "smmul   %9,  %8,  %18           \n"
-             "smmul   %12, %11, %18           \n"
-             "sub     %14, %14, %16, lsr #1   \n"
-             "sub     %15, %15, %16, lsr #1   \n"
-             "smlabb  %8,  %9,  %17, %8       \n"
-             "smlabb  %11, %12, %17, %11      \n"
-             "smmul   %10, %9,  %18           \n"
-             "smmul   %13, %12, %18           \n"
-             "str     %14, %0                 \n"
-             "str     %15, %4                 \n"
-             "sub     %8,  %8,  %16, lsr #1   \n"
-             "sub     %11, %11, %16, lsr #1   \n"
-             "smlabb  %9,  %10, %17, %9       \n"
-             "smlabb  %12, %13, %17, %12      \n"
-             "smmul   %14, %10, %18           \n"
-             "smmul   %15, %13, %18           \n"
-             "str     %8,  %1                 \n"
-             "str     %11, %5                 \n"
-             "sub     %9,  %9,  %16, lsr #1   \n"
-             "sub     %12, %12, %16, lsr #1   \n"
-             "smlabb  %10, %14, %17, %10      \n"
-             "smlabb  %13, %15, %17, %13      \n"
-             "str     %9,  %2                 \n"
-             "str     %12, %6                 \n"
-             "sub     %10, %10, %16, lsr #1   \n"
-             "sub     %13, %13, %16, lsr #1   \n"
-             "str     %10, %3                 \n"
-             "str     %13, %7                 \n"
-             : "=m"(values[0]), "=m"(values[1]),
-               "=m"(values[2]), "=m"(values[3]),
-               "=m"(values[4]), "=m"(values[5]),
-               "=m"(values[6]), "=m"(values[7]),
-               "=&r"(v0), "=&r"(v1), "=&r"(v2),
+    __asm__ ("smmul   %0,  %6,  %10           \n"
+             "smmul   %3,  %7,  %10           \n"
+             "smlabb  %6,  %0,  %9,  %6       \n"
+             "smlabb  %7,  %3,  %9,  %7       \n"
+             "smmul   %1,  %0,  %10           \n"
+             "smmul   %4,  %3,  %10           \n"
+             "sub     %6,  %6,  %8,  lsr #1   \n"
+             "sub     %7,  %7,  %8,  lsr #1   \n"
+             "smlabb  %0,  %1,  %9,  %0       \n"
+             "smlabb  %3,  %4,  %9,  %3       \n"
+             "smmul   %2,  %1,  %10           \n"
+             "smmul   %5,  %4,  %10           \n"
+             "str     %6,  [%11, #0]          \n"
+             "str     %7,  [%11, #16]         \n"
+             "sub     %0,  %0,  %8,  lsr #1   \n"
+             "sub     %3,  %3,  %8,  lsr #1   \n"
+             "smlabb  %1,  %2,  %9,  %1       \n"
+             "smlabb  %4,  %5,  %9,  %4       \n"
+             "smmul   %6,  %2,  %10           \n"
+             "smmul   %7,  %5,  %10           \n"
+             "str     %0,  [%11, #4]          \n"
+             "str     %3,  [%11, #20]         \n"
+             "sub     %1,  %1,  %8,  lsr #1   \n"
+             "sub     %4,  %4,  %8,  lsr #1   \n"
+             "smlabb  %2,  %6,  %9,  %2       \n"
+             "smlabb  %5,  %7,  %9,  %5       \n"
+             "str     %1,  [%11, #8]          \n"
+             "str     %4,  [%11, #24]         \n"
+             "sub     %2,  %2,  %8,  lsr #1   \n"
+             "sub     %5,  %5,  %8,  lsr #1   \n"
+             "str     %2,  [%11, #12]         \n"
+             "str     %5,  [%11, #28]         \n"
+             : "=&r"(v0), "=&r"(v1), "=&r"(v2),
                "=&r"(v3), "=&r"(v4), "=&r"(v5),
                "+&r"(code1), "+&r"(code2)
-             : "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels]));
+             : "r"(levels - 1), "r"(-levels),
+               "r"(ff_inverse[levels]), "r"(values)
+             : "memory");
 
     return code1 | code2;
 }
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index 5400484..a549515 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index 70580cd..cdc4136 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S
index c9114d4..2e09f0e 100644
--- a/libavcodec/arm/dcadsp_vfp.S
+++ b/libavcodec/arm/dcadsp_vfp.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2013 RISC OS Open Ltd
  * Author: Ben Avison <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/dct-test.c b/libavcodec/arm/dct-test.c
index 70e5c1c..f9076b3 100644
--- a/libavcodec/arm/dct-test.c
+++ b/libavcodec/arm/dct-test.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/fft_fixed_init_arm.c b/libavcodec/arm/fft_fixed_init_arm.c
index 2f749e4..b60bb9f 100644
--- a/libavcodec/arm/fft_fixed_init_arm.c
+++ b/libavcodec/arm/fft_fixed_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,7 +33,9 @@ av_cold void ff_fft_fixed_init_arm(FFTContext *s)
 
     if (have_neon(cpu_flags)) {
         s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
+#if CONFIG_FFT
         s->fft_calc        = ff_fft_fixed_calc_neon;
+#endif
 
 #if CONFIG_MDCT
         if (!s->inverse && s->nbits >= 3) {
diff --git a/libavcodec/arm/fft_fixed_neon.S b/libavcodec/arm/fft_fixed_neon.S
index faddc00..57a8cfb 100644
--- a/libavcodec/arm/fft_fixed_neon.S
+++ b/libavcodec/arm/fft_fixed_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c
index bc143c1..5087f5f 100644
--- a/libavcodec/arm/fft_init_arm.c
+++ b/libavcodec/arm/fft_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -48,8 +48,10 @@ av_cold void ff_fft_init_arm(FFTContext *s)
     }
 
     if (have_neon(cpu_flags)) {
+#if CONFIG_FFT
         s->fft_permute  = ff_fft_permute_neon;
         s->fft_calc     = ff_fft_calc_neon;
+#endif
 #if CONFIG_MDCT
         s->imdct_calc   = ff_imdct_calc_neon;
         s->imdct_half   = ff_imdct_half_neon;
diff --git a/libavcodec/arm/fft_neon.S b/libavcodec/arm/fft_neon.S
index c4d8918..8b9ae2a 100644
--- a/libavcodec/arm/fft_neon.S
+++ b/libavcodec/arm/fft_neon.S
@@ -7,20 +7,20 @@
  * This algorithm (though not any of the implementation details) is
  * based on libdjbfft by D. J. Bernstein.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/fft_vfp.S b/libavcodec/arm/fft_vfp.S
index 130d529..1abe45a 100644
--- a/libavcodec/arm/fft_vfp.S
+++ b/libavcodec/arm/fft_vfp.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2013 RISC OS Open Ltd
  * Author: Ben Avison <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/flacdsp_arm.S b/libavcodec/arm/flacdsp_arm.S
index d4441da..f8861c5 100644
--- a/libavcodec/arm/flacdsp_arm.S
+++ b/libavcodec/arm/flacdsp_arm.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/flacdsp_init_arm.c b/libavcodec/arm/flacdsp_init_arm.c
index 0530cf7..9ddb268 100644
--- a/libavcodec/arm/flacdsp_init_arm.c
+++ b/libavcodec/arm/flacdsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,6 @@ void ff_flac_lpc_16_arm(int32_t *samples, const int coeffs[32], int order,
 av_cold void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt,
                                  int bps)
 {
-    if (bps <= 16)
+    if (bps <= 16 && CONFIG_FLAC_DECODER)
         c->lpc = ff_flac_lpc_16_arm;
 }
diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c
index 7c5bd91..37319ed 100644
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * ARM optimized Format Conversion Utils
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/fmtconvert_neon.S b/libavcodec/arm/fmtconvert_neon.S
index e11e82c..4662e28 100644
--- a/libavcodec/arm/fmtconvert_neon.S
+++ b/libavcodec/arm/fmtconvert_neon.S
@@ -2,20 +2,20 @@
  * ARM NEON optimised Format Conversion Utils
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
index 4e43f42..b14af45 100644
--- a/libavcodec/arm/fmtconvert_vfp.S
+++ b/libavcodec/arm/fmtconvert_vfp.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/fmtconvert_vfp_armv6.S b/libavcodec/arm/fmtconvert_vfp_armv6.S
index fb12de1..7b012bc 100644
--- a/libavcodec/arm/fmtconvert_vfp_armv6.S
+++ b/libavcodec/arm/fmtconvert_vfp_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/h264chroma_init_arm.c b/libavcodec/arm/h264chroma_init_arm.c
index 6f36553..13f7e0d 100644
--- a/libavcodec/arm/h264chroma_init_arm.c
+++ b/libavcodec/arm/h264chroma_init_arm.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised H.264 chroma functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/h264cmc_neon.S b/libavcodec/arm/h264cmc_neon.S
index ee7011b..77ed3c0 100644
--- a/libavcodec/arm/h264cmc_neon.S
+++ b/libavcodec/arm/h264cmc_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index 7afd350..f7aee1f 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -72,11 +72,14 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
 static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth,
                                       const int chroma_format_idc)
 {
+#if HAVE_NEON
     if (bit_depth == 8) {
         c->h264_v_loop_filter_luma   = ff_h264_v_loop_filter_luma_neon;
         c->h264_h_loop_filter_luma   = ff_h264_h_loop_filter_luma_neon;
+        if(chroma_format_idc == 1){
         c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
         c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+        }
 
         c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon;
         c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon;
@@ -96,6 +99,7 @@ static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth,
         c->h264_idct8_dc_add    = ff_h264_idct8_dc_add_neon;
         c->h264_idct8_add4      = ff_h264_idct8_add4_neon;
     }
+#endif // HAVE_NEON
 }
 
 av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index 5e75565..274a547 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
index f588f3e..4f68bdb 100644
--- a/libavcodec/arm/h264idct_neon.S
+++ b/libavcodec/arm/h264idct_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index bbfe63f..6ba7592 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -49,11 +49,12 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
                                         const int bit_depth,
                                         const int chroma_format_idc)
 {
+#if HAVE_NEON
     const int high_depth = bit_depth > 8;
 
     if (high_depth)
         return;
-
+    if(chroma_format_idc == 1){
     h->pred8x8[VERT_PRED8x8     ] = ff_pred8x8_vert_neon;
     h->pred8x8[HOR_PRED8x8      ] = ff_pred8x8_hor_neon;
     if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
@@ -69,6 +70,7 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
         h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = ff_pred8x8_l00_dc_neon;
         h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = ff_pred8x8_0l0_dc_neon;
     }
+    }
 
     h->pred16x16[DC_PRED8x8     ] = ff_pred16x16_dc_neon;
     h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vert_neon;
@@ -79,6 +81,7 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
     if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != AV_CODEC_ID_RV40 &&
         codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
         h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
+#endif // HAVE_NEON
 }
 
 av_cold void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
diff --git a/libavcodec/arm/h264pred_neon.S b/libavcodec/arm/h264pred_neon.S
index 332f94b..4dc47ba 100644
--- a/libavcodec/arm/h264pred_neon.S
+++ b/libavcodec/arm/h264pred_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/h264qpel_init_arm.c b/libavcodec/arm/h264qpel_init_arm.c
index 01615b5..71237be 100644
--- a/libavcodec/arm/h264qpel_init_arm.c
+++ b/libavcodec/arm/h264qpel_init_arm.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised DSP functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/h264qpel_neon.S b/libavcodec/arm/h264qpel_neon.S
index 6c51250..21336c6 100644
--- a/libavcodec/arm/h264qpel_neon.S
+++ b/libavcodec/arm/h264qpel_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/hpeldsp_arm.S b/libavcodec/arm/hpeldsp_arm.S
index 0f8092e..219f793 100644
--- a/libavcodec/arm/hpeldsp_arm.S
+++ b/libavcodec/arm/hpeldsp_arm.S
@@ -2,20 +2,20 @@
 @ ARMv4-optimized halfpel functions
 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
 @
-@ This file is part of Libav.
+@ This file is part of FFmpeg.
 @
-@ Libav is free software; you can redistribute it and/or
+@ FFmpeg is free software; you can redistribute it and/or
 @ modify it under the terms of the GNU Lesser General Public
 @ License as published by the Free Software Foundation; either
 @ version 2.1 of the License, or (at your option) any later version.
 @
-@ Libav is distributed in the hope that it will be useful,
+@ FFmpeg is distributed in the hope that it will be useful,
 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 @ Lesser General Public License for more details.
 @
 @ You should have received a copy of the GNU Lesser General Public
-@ License along with Libav; if not, write to the Free Software
+@ License along with FFmpeg; if not, write to the Free Software
 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 @
 
diff --git a/libavcodec/arm/hpeldsp_arm.h b/libavcodec/arm/hpeldsp_arm.h
index a864152..5f3c774 100644
--- a/libavcodec/arm/hpeldsp_arm.h
+++ b/libavcodec/arm/hpeldsp_arm.h
@@ -1,18 +1,20 @@
 /*
- * This file is part of Libav.
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/hpeldsp_armv6.S b/libavcodec/arm/hpeldsp_armv6.S
index f1abc32..a8bd459 100644
--- a/libavcodec/arm/hpeldsp_armv6.S
+++ b/libavcodec/arm/hpeldsp_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/hpeldsp_init_arm.c b/libavcodec/arm/hpeldsp_init_arm.c
index 6390660..1977b13 100644
--- a/libavcodec/arm/hpeldsp_init_arm.c
+++ b/libavcodec/arm/hpeldsp_init_arm.c
@@ -2,20 +2,20 @@
  * ARM-optimized halfpel functions
  * Copyright (c) 2001 Lionel Ulmer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/hpeldsp_init_armv6.c b/libavcodec/arm/hpeldsp_init_armv6.c
index 67a500d..967a8e0 100644
--- a/libavcodec/arm/hpeldsp_init_armv6.c
+++ b/libavcodec/arm/hpeldsp_init_armv6.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/hpeldsp_init_neon.c b/libavcodec/arm/hpeldsp_init_neon.c
index 76d4eaf..d9feadd 100644
--- a/libavcodec/arm/hpeldsp_init_neon.c
+++ b/libavcodec/arm/hpeldsp_init_neon.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised DSP functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/hpeldsp_neon.S b/libavcodec/arm/hpeldsp_neon.S
index 90bc3cb..cf4a6cf 100644
--- a/libavcodec/arm/hpeldsp_neon.S
+++ b/libavcodec/arm/hpeldsp_neon.S
@@ -2,20 +2,20 @@
  * ARM NEON optimised DSP functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/idct.h b/libavcodec/arm/idct.h
index 168d64b..39cef3a 100644
--- a/libavcodec/arm/idct.h
+++ b/libavcodec/arm/idct.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/idctdsp_arm.S b/libavcodec/arm/idctdsp_arm.S
index 34f467e..e8333c4 100644
--- a/libavcodec/arm/idctdsp_arm.S
+++ b/libavcodec/arm/idctdsp_arm.S
@@ -2,20 +2,20 @@
 @ ARMv4-optimized IDCT functions
 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
 @
-@ This file is part of Libav.
+@ This file is part of FFmpeg.
 @
-@ Libav is free software; you can redistribute it and/or
+@ FFmpeg is free software; you can redistribute it and/or
 @ modify it under the terms of the GNU Lesser General Public
 @ License as published by the Free Software Foundation; either
 @ version 2.1 of the License, or (at your option) any later version.
 @
-@ Libav is distributed in the hope that it will be useful,
+@ FFmpeg is distributed in the hope that it will be useful,
 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 @ Lesser General Public License for more details.
 @
 @ You should have received a copy of the GNU Lesser General Public
-@ License along with Libav; if not, write to the Free Software
+@ License along with FFmpeg; if not, write to the Free Software
 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 @
 
diff --git a/libavcodec/arm/idctdsp_arm.h b/libavcodec/arm/idctdsp_arm.h
index 9012b82..d7bc5cd 100644
--- a/libavcodec/arm/idctdsp_arm.h
+++ b/libavcodec/arm/idctdsp_arm.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/idctdsp_armv6.S b/libavcodec/arm/idctdsp_armv6.S
index c180d73..a6e77d6 100644
--- a/libavcodec/arm/idctdsp_armv6.S
+++ b/libavcodec/arm/idctdsp_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/idctdsp_init_arm.c b/libavcodec/arm/idctdsp_init_arm.c
index 0b32df0..578697e 100644
--- a/libavcodec/arm/idctdsp_init_arm.c
+++ b/libavcodec/arm/idctdsp_init_arm.c
@@ -2,20 +2,20 @@
  * ARM-optimized IDCT functions
  * Copyright (c) 2001 Lionel Ulmer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -70,8 +70,8 @@ av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
     ff_put_pixels_clamped = c->put_pixels_clamped;
     ff_add_pixels_clamped = c->add_pixels_clamped;
 
-    if (!high_bit_depth) {
-        if (avctx->idct_algo == FF_IDCT_AUTO ||
+    if (!avctx->lowres && !high_bit_depth) {
+        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & CODEC_FLAG_BITEXACT)) ||
             avctx->idct_algo == FF_IDCT_ARM) {
             c->idct_put  = j_rev_dct_arm_put;
             c->idct_add  = j_rev_dct_arm_add;
diff --git a/libavcodec/arm/idctdsp_init_armv5te.c b/libavcodec/arm/idctdsp_init_armv5te.c
index 251165d..3d881e1 100644
--- a/libavcodec/arm/idctdsp_init_armv5te.c
+++ b/libavcodec/arm/idctdsp_init_armv5te.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,8 +29,9 @@
 av_cold void ff_idctdsp_init_armv5te(IDCTDSPContext *c, AVCodecContext *avctx,
                                      unsigned high_bit_depth)
 {
-    if (!high_bit_depth &&
+    if (!avctx->lowres && !high_bit_depth &&
         (avctx->idct_algo == FF_IDCT_AUTO ||
+         avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
          avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) {
         c->idct_put  = ff_simple_idct_put_armv5te;
         c->idct_add  = ff_simple_idct_add_armv5te;
diff --git a/libavcodec/arm/idctdsp_init_armv6.c b/libavcodec/arm/idctdsp_init_armv6.c
index 8f0c49b..648f1fd 100644
--- a/libavcodec/arm/idctdsp_init_armv6.c
+++ b/libavcodec/arm/idctdsp_init_armv6.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,8 +32,8 @@ void ff_add_pixels_clamped_armv6(const int16_t *block, uint8_t *pixels,
 av_cold void ff_idctdsp_init_armv6(IDCTDSPContext *c, AVCodecContext *avctx,
                                    unsigned high_bit_depth)
 {
-    if (!high_bit_depth) {
-        if (avctx->idct_algo == FF_IDCT_AUTO ||
+    if (!avctx->lowres && !high_bit_depth) {
+        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & CODEC_FLAG_BITEXACT)) ||
             avctx->idct_algo == FF_IDCT_SIMPLEARMV6) {
             c->idct_put  = ff_simple_idct_put_armv6;
             c->idct_add  = ff_simple_idct_add_armv6;
diff --git a/libavcodec/arm/idctdsp_init_neon.c b/libavcodec/arm/idctdsp_init_neon.c
index c94f7b6..80c391c 100644
--- a/libavcodec/arm/idctdsp_init_neon.c
+++ b/libavcodec/arm/idctdsp_init_neon.c
@@ -2,20 +2,20 @@
  * ARM-NEON-optimized IDCT functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,8 +34,9 @@ void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
 av_cold void ff_idctdsp_init_neon(IDCTDSPContext *c, AVCodecContext *avctx,
                                   unsigned high_bit_depth)
 {
-    if (!high_bit_depth) {
+    if (!avctx->lowres && !high_bit_depth) {
         if (avctx->idct_algo == FF_IDCT_AUTO ||
+            avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
             avctx->idct_algo == FF_IDCT_SIMPLENEON) {
             c->idct_put  = ff_simple_idct_put_neon;
             c->idct_add  = ff_simple_idct_add_neon;
diff --git a/libavcodec/arm/idctdsp_neon.S b/libavcodec/arm/idctdsp_neon.S
index 7095879..1911a33 100644
--- a/libavcodec/arm/idctdsp_neon.S
+++ b/libavcodec/arm/idctdsp_neon.S
@@ -2,20 +2,20 @@
  * ARM-NEON-optimized IDCT functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/int_neon.S b/libavcodec/arm/int_neon.S
index 42f3739..72c4c77 100644
--- a/libavcodec/arm/int_neon.S
+++ b/libavcodec/arm/int_neon.S
@@ -1,21 +1,21 @@
 /*
  * ARM NEON optimised integer operations
- * Copyright (c) 2009 Kostya Shishkov
+ * Copyright (c) 2009 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,7 +35,7 @@ function ff_scalarproduct_int16_neon, export=1
         vmlal.s16       q2,  d18,  d22
         vmlal.s16       q3,  d19,  d23
         subs            r2,  r2,   #16
-        bne             1b
+        bgt             1b
 
         vpadd.s32       d16, d0,   d1
         vpadd.s32       d17, d2,   d3
@@ -48,3 +48,4 @@ function ff_scalarproduct_int16_neon, export=1
         vmov.32         r0,  d3[0]
         bx              lr
 endfunc
+
diff --git a/libavcodec/arm/apedsp_init_arm.c b/libavcodec/arm/lossless_audiodsp_init_arm.c
index 47ea034..981a39a 100644
--- a/libavcodec/arm/apedsp_init_arm.c
+++ b/libavcodec/arm/lossless_audiodsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,12 +23,12 @@
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/arm/cpu.h"
-#include "libavcodec/apedsp.h"
+#include "libavcodec/lossless_audiodsp.h"
 
 int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2,
                                              const int16_t *v3, int len, int mul);
 
-av_cold void ff_apedsp_init_arm(APEDSPContext *c)
+av_cold void ff_llauddsp_init_arm(LLAudDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
 
diff --git a/libavcodec/arm/apedsp_neon.S b/libavcodec/arm/lossless_audiodsp_neon.S
index 7cfbf43..ba7c45f 100644
--- a/libavcodec/arm/apedsp_neon.S
+++ b/libavcodec/arm/lossless_audiodsp_neon.S
@@ -2,20 +2,20 @@
  * ARM NEON optimised integer operations
  * Copyright (c) 2009 Kostya Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,7 +47,7 @@ function ff_scalarproduct_and_madd_int16_neon, export=1
         vst1.16         {q10},     [r12,:128]!
         subs            r3,  r3,   #16
         vst1.16         {q13},     [r12,:128]!
-        bne             1b
+        bgt             1b
 
         vpadd.s32       d16, d0,   d1
         vpadd.s32       d17, d2,   d3
diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
index 45ac67d..dc57c55 100644
--- a/libavcodec/arm/mathops.h
+++ b/libavcodec/arm/mathops.h
@@ -2,20 +2,20 @@
  * simple math operations
  * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mdct_fixed_neon.S b/libavcodec/arm/mdct_fixed_neon.S
index c77be59..365c5e7 100644
--- a/libavcodec/arm/mdct_fixed_neon.S
+++ b/libavcodec/arm/mdct_fixed_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mdct_neon.S b/libavcodec/arm/mdct_neon.S
index bfe259c..a6952fa 100644
--- a/libavcodec/arm/mdct_neon.S
+++ b/libavcodec/arm/mdct_neon.S
@@ -2,20 +2,20 @@
  * ARM NEON optimised MDCT
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mdct_vfp.S b/libavcodec/arm/mdct_vfp.S
index f3fe668..43f6d14 100644
--- a/libavcodec/arm/mdct_vfp.S
+++ b/libavcodec/arm/mdct_vfp.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2013 RISC OS Open Ltd
  * Author: Ben Avison <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/me_cmp_armv6.S b/libavcodec/arm/me_cmp_armv6.S
index 436e20d..fa5a823 100644
--- a/libavcodec/arm/me_cmp_armv6.S
+++ b/libavcodec/arm/me_cmp_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/me_cmp_init_arm.c b/libavcodec/arm/me_cmp_init_arm.c
index 819d901..eb48b38 100644
--- a/libavcodec/arm/me_cmp_init_arm.c
+++ b/libavcodec/arm/me_cmp_init_arm.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mlpdsp_armv5te.S b/libavcodec/arm/mlpdsp_armv5te.S
index cf7d367..069e76a 100644
--- a/libavcodec/arm/mlpdsp_armv5te.S
+++ b/libavcodec/arm/mlpdsp_armv5te.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2014 RISC OS Open Ltd
  * Author: Ben Avison <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mlpdsp_armv6.S b/libavcodec/arm/mlpdsp_armv6.S
index 3c88021..d98f807 100644
--- a/libavcodec/arm/mlpdsp_armv6.S
+++ b/libavcodec/arm/mlpdsp_armv6.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2014 RISC OS Open Ltd
  * Author: Ben Avison <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
index 4cdd10c..34a5f61 100644
--- a/libavcodec/arm/mlpdsp_init_arm.c
+++ b/libavcodec/arm/mlpdsp_init_arm.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2014 RISC OS Open Ltd
  * Author: Ben Avison <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mpegaudiodsp_fixed_armv6.S b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
index 49bd0bc..977abb6 100644
--- a/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
+++ b/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mpegaudiodsp_init_arm.c b/libavcodec/arm/mpegaudiodsp_init_arm.c
index e73aee6..98e0c8a 100644
--- a/libavcodec/arm/mpegaudiodsp_init_arm.c
+++ b/libavcodec/arm/mpegaudiodsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mpegvideo_arm.c b/libavcodec/arm/mpegvideo_arm.c
index 7567127..5f5473d 100644
--- a/libavcodec/arm/mpegvideo_arm.c
+++ b/libavcodec/arm/mpegvideo_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2002 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mpegvideo_arm.h b/libavcodec/arm/mpegvideo_arm.h
index 226ba69..78e07e1 100644
--- a/libavcodec/arm/mpegvideo_arm.h
+++ b/libavcodec/arm/mpegvideo_arm.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mpegvideo_armv5te.c b/libavcodec/arm/mpegvideo_armv5te.c
index 2066cbc..a572290 100644
--- a/libavcodec/arm/mpegvideo_armv5te.c
+++ b/libavcodec/arm/mpegvideo_armv5te.c
@@ -2,24 +2,25 @@
  * Optimization of some functions from mpegvideo.c for armv5te
  * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/mpegvideo.h"
 #include "mpegvideo_arm.h"
@@ -55,7 +56,7 @@ static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
     int level, qmul, qadd;
     int nCoeffs;
 
-    assert(s->block_last_index[n]>=0);
+    av_assert2(s->block_last_index[n]>=0);
 
     qmul = qscale << 1;
 
@@ -84,7 +85,7 @@ static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s,
     int qmul, qadd;
     int nCoeffs;
 
-    assert(s->block_last_index[n]>=0);
+    av_assert2(s->block_last_index[n]>=0);
 
     qadd = (qscale - 1) | 1;
     qmul = qscale << 1;
diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S
index 4426e15..8687d6b 100644
--- a/libavcodec/arm/mpegvideo_armv5te_s.S
+++ b/libavcodec/arm/mpegvideo_armv5te_s.S
@@ -2,20 +2,20 @@
  * Optimization of some functions from mpegvideo.c for armv5te
  * Copyright (c) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mpegvideo_neon.S b/libavcodec/arm/mpegvideo_neon.S
index 3e1f7b5..1889d7a 100644
--- a/libavcodec/arm/mpegvideo_neon.S
+++ b/libavcodec/arm/mpegvideo_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mpegvideoencdsp_armv6.S b/libavcodec/arm/mpegvideoencdsp_armv6.S
index 99db501..ab0dad7 100644
--- a/libavcodec/arm/mpegvideoencdsp_armv6.S
+++ b/libavcodec/arm/mpegvideoencdsp_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/mpegvideoencdsp_init_arm.c b/libavcodec/arm/mpegvideoencdsp_init_arm.c
index ab9ba3e..4bfe835 100644
--- a/libavcodec/arm/mpegvideoencdsp_init_arm.c
+++ b/libavcodec/arm/mpegvideoencdsp_init_arm.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/neon.S b/libavcodec/arm/neon.S
index 716a607..787bc4b 100644
--- a/libavcodec/arm/neon.S
+++ b/libavcodec/arm/neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/neontest.c b/libavcodec/arm/neontest.c
index b77bcd7..a3b5b8e 100644
--- a/libavcodec/arm/neontest.c
+++ b/libavcodec/arm/neontest.c
@@ -2,20 +2,20 @@
  * check NEON registers for clobbers
  * Copyright (c) 2013 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/pixblockdsp_armv6.S b/libavcodec/arm/pixblockdsp_armv6.S
index 4c925a4..b10ea78 100644
--- a/libavcodec/arm/pixblockdsp_armv6.S
+++ b/libavcodec/arm/pixblockdsp_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c
index f20769b..b77c523 100644
--- a/libavcodec/arm/pixblockdsp_init_arm.c
+++ b/libavcodec/arm/pixblockdsp_init_arm.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/rdft_neon.S b/libavcodec/arm/rdft_neon.S
index 7d01d53..781d976 100644
--- a/libavcodec/arm/rdft_neon.S
+++ b/libavcodec/arm/rdft_neon.S
@@ -2,20 +2,20 @@
  * ARM NEON optimised RDFT
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/rv34dsp_init_arm.c b/libavcodec/arm/rv34dsp_init_arm.c
index 5ce787b..8bfe90b 100644
--- a/libavcodec/arm/rv34dsp_init_arm.c
+++ b/libavcodec/arm/rv34dsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/rv34dsp_neon.S b/libavcodec/arm/rv34dsp_neon.S
index a29123f..3d4a83d 100644
--- a/libavcodec/arm/rv34dsp_neon.S
+++ b/libavcodec/arm/rv34dsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/rv40dsp_init_arm.c b/libavcodec/arm/rv40dsp_init_arm.c
index df3e461..c24854d 100644
--- a/libavcodec/arm/rv40dsp_init_arm.c
+++ b/libavcodec/arm/rv40dsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/rv40dsp_neon.S b/libavcodec/arm/rv40dsp_neon.S
index 6bd45eb..099f88c 100644
--- a/libavcodec/arm/rv40dsp_neon.S
+++ b/libavcodec/arm/rv40dsp_neon.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2011 Janne Grunau <janne-libav@jannau.net>
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/sbrdsp_init_arm.c b/libavcodec/arm/sbrdsp_init_arm.c
index 4da7967..4fb69f9 100644
--- a/libavcodec/arm/sbrdsp_init_arm.c
+++ b/libavcodec/arm/sbrdsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/sbrdsp_neon.S b/libavcodec/arm/sbrdsp_neon.S
index 610397f..e66abd6 100644
--- a/libavcodec/arm/sbrdsp_neon.S
+++ b/libavcodec/arm/sbrdsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/simple_idct_arm.S b/libavcodec/arm/simple_idct_arm.S
index bf9ee3d..c630073 100644
--- a/libavcodec/arm/simple_idct_arm.S
+++ b/libavcodec/arm/simple_idct_arm.S
@@ -4,22 +4,22 @@
  * Author: Frederic Boulay <dilb@handhelds.org>
  *
  * The function defined in this file is derived from the simple_idct function
- * from the libavcodec library part of the Libav project.
+ * from the libavcodec library part of the FFmpeg project.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/simple_idct_armv5te.S b/libavcodec/arm/simple_idct_armv5te.S
index bf509ee..d1f10b7 100644
--- a/libavcodec/arm/simple_idct_armv5te.S
+++ b/libavcodec/arm/simple_idct_armv5te.S
@@ -4,20 +4,20 @@
  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/simple_idct_armv6.S b/libavcodec/arm/simple_idct_armv6.S
index 6072346..79cf5d4 100644
--- a/libavcodec/arm/simple_idct_armv6.S
+++ b/libavcodec/arm/simple_idct_armv6.S
@@ -4,20 +4,20 @@
  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2007 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/simple_idct_neon.S b/libavcodec/arm/simple_idct_neon.S
index a1cde8d..c3e573c 100644
--- a/libavcodec/arm/simple_idct_neon.S
+++ b/libavcodec/arm/simple_idct_neon.S
@@ -6,20 +6,20 @@
  * Based on Simple IDCT
  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/startcode.h b/libavcodec/arm/startcode.h
index d7996c1..cf25d9d 100644
--- a/libavcodec/arm/startcode.h
+++ b/libavcodec/arm/startcode.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/startcode_armv6.S b/libavcodec/arm/startcode_armv6.S
index 64078b2..a46f009 100644
--- a/libavcodec/arm/startcode_armv6.S
+++ b/libavcodec/arm/startcode_armv6.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2013 RISC OS Open Ltd
  * Author: Ben Avison <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/synth_filter_neon.S b/libavcodec/arm/synth_filter_neon.S
index 62bb667..5417be7 100644
--- a/libavcodec/arm/synth_filter_neon.S
+++ b/libavcodec/arm/synth_filter_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/synth_filter_vfp.S b/libavcodec/arm/synth_filter_vfp.S
index 5d79e50..596734c 100644
--- a/libavcodec/arm/synth_filter_vfp.S
+++ b/libavcodec/arm/synth_filter_vfp.S
@@ -2,20 +2,20 @@
  * Copyright (c) 2013 RISC OS Open Ltd
  * Author: Ben Avison <bavison@riscosopen.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vc1dsp.h b/libavcodec/arm/vc1dsp.h
index 30f059f..cd01ac5 100644
--- a/libavcodec/arm/vc1dsp.h
+++ b/libavcodec/arm/vc1dsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vc1dsp_init_arm.c b/libavcodec/arm/vc1dsp_init_arm.c
index 6d4eb79..4a84848 100644
--- a/libavcodec/arm/vc1dsp_init_arm.c
+++ b/libavcodec/arm/vc1dsp_init_arm.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,10 +23,14 @@
 #include "libavcodec/vc1dsp.h"
 #include "vc1dsp.h"
 
+int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size);
+
 av_cold void ff_vc1dsp_init_arm(VC1DSPContext *dsp)
 {
     int cpu_flags = av_get_cpu_flags();
 
+    if (have_armv6(cpu_flags))
+        dsp->vc1_find_start_code_candidate = ff_startcode_find_candidate_armv6;
     if (have_neon(cpu_flags))
         ff_vc1dsp_init_neon(dsp);
 }
diff --git a/libavcodec/arm/vc1dsp_init_neon.c b/libavcodec/arm/vc1dsp_init_neon.c
index 9ded7a2..bb873e6 100644
--- a/libavcodec/arm/vc1dsp_init_neon.c
+++ b/libavcodec/arm/vc1dsp_init_neon.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,40 +37,38 @@ void ff_vc1_inv_trans_4x4_dc_neon(uint8_t *dest, int linesize, int16_t *block);
 void ff_put_pixels8x8_neon(uint8_t *block, const uint8_t *pixels,
                            ptrdiff_t line_size, int rnd);
 
-void ff_put_vc1_mspel_mc10_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc20_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc30_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-
-void ff_put_vc1_mspel_mc01_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc02_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc03_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-
-void ff_put_vc1_mspel_mc11_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc12_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc13_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-
-void ff_put_vc1_mspel_mc21_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc22_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc23_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-
-void ff_put_vc1_mspel_mc31_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc32_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
-void ff_put_vc1_mspel_mc33_neon(uint8_t *dst, const uint8_t *src,
-                                ptrdiff_t stride, int rnd);
+#define DECL_PUT(X, Y) \
+void ff_put_vc1_mspel_mc##X##Y##_neon(uint8_t *dst, const uint8_t *src, \
+                                      ptrdiff_t stride, int rnd); \
+static void ff_put_vc1_mspel_mc##X##Y##_16_neon(uint8_t *dst, const uint8_t *src, \
+                                         ptrdiff_t stride, int rnd) \
+{ \
+  ff_put_vc1_mspel_mc##X##Y##_neon(dst+0, src+0, stride, rnd); \
+  ff_put_vc1_mspel_mc##X##Y##_neon(dst+8, src+8, stride, rnd); \
+  dst += 8*stride; src += 8*stride; \
+  ff_put_vc1_mspel_mc##X##Y##_neon(dst+0, src+0, stride, rnd); \
+  ff_put_vc1_mspel_mc##X##Y##_neon(dst+8, src+8, stride, rnd); \
+}
+
+DECL_PUT(1, 0)
+DECL_PUT(2, 0)
+DECL_PUT(3, 0)
+
+DECL_PUT(0, 1)
+DECL_PUT(0, 2)
+DECL_PUT(0, 3)
+
+DECL_PUT(1, 1)
+DECL_PUT(1, 2)
+DECL_PUT(1, 3)
+
+DECL_PUT(2, 1)
+DECL_PUT(2, 2)
+DECL_PUT(2, 3)
+
+DECL_PUT(3, 1)
+DECL_PUT(3, 2)
+DECL_PUT(3, 3)
 
 void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, int stride, int h,
                                 int x, int y);
@@ -81,6 +79,10 @@ void ff_put_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
 void ff_avg_vc1_chroma_mc4_neon(uint8_t *dst, uint8_t *src, int stride, int h,
                                 int x, int y);
 
+#define FN_ASSIGN(X, Y) \
+    dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = ff_put_vc1_mspel_mc##X##Y##_16_neon; \
+    dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = ff_put_vc1_mspel_mc##X##Y##_neon
+
 av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp)
 {
     dsp->vc1_inv_trans_8x8 = ff_vc1_inv_trans_8x8_neon;
@@ -92,23 +94,26 @@ av_cold void ff_vc1dsp_init_neon(VC1DSPContext *dsp)
     dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_neon;
     dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_neon;
 
-    dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_pixels8x8_neon;
+    dsp->put_vc1_mspel_pixels_tab[1][ 0] = ff_put_pixels8x8_neon;
     if (HAVE_AS_DN_DIRECTIVE) {
-    dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_neon;
-    dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_neon;
-    dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_neon;
-    dsp->put_vc1_mspel_pixels_tab[ 4] = ff_put_vc1_mspel_mc01_neon;
-    dsp->put_vc1_mspel_pixels_tab[ 5] = ff_put_vc1_mspel_mc11_neon;
-    dsp->put_vc1_mspel_pixels_tab[ 6] = ff_put_vc1_mspel_mc21_neon;
-    dsp->put_vc1_mspel_pixels_tab[ 7] = ff_put_vc1_mspel_mc31_neon;
-    dsp->put_vc1_mspel_pixels_tab[ 8] = ff_put_vc1_mspel_mc02_neon;
-    dsp->put_vc1_mspel_pixels_tab[ 9] = ff_put_vc1_mspel_mc12_neon;
-    dsp->put_vc1_mspel_pixels_tab[10] = ff_put_vc1_mspel_mc22_neon;
-    dsp->put_vc1_mspel_pixels_tab[11] = ff_put_vc1_mspel_mc32_neon;
-    dsp->put_vc1_mspel_pixels_tab[12] = ff_put_vc1_mspel_mc03_neon;
-    dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_neon;
-    dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_neon;
-    dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_neon;
+    FN_ASSIGN(1, 0);
+    FN_ASSIGN(2, 0);
+    FN_ASSIGN(3, 0);
+
+    FN_ASSIGN(0, 1);
+    FN_ASSIGN(1, 1);
+    FN_ASSIGN(2, 1);
+    FN_ASSIGN(3, 1);
+
+    FN_ASSIGN(0, 2);
+    FN_ASSIGN(1, 2);
+    FN_ASSIGN(2, 2);
+    FN_ASSIGN(3, 2);
+
+    FN_ASSIGN(0, 3);
+    FN_ASSIGN(1, 3);
+    FN_ASSIGN(2, 3);
+    FN_ASSIGN(3, 3);
     }
 
     dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_vc1_chroma_mc8_neon;
diff --git a/libavcodec/arm/vc1dsp_neon.S b/libavcodec/arm/vc1dsp_neon.S
index fa87ede..c4f4db9 100644
--- a/libavcodec/arm/vc1dsp_neon.S
+++ b/libavcodec/arm/vc1dsp_neon.S
@@ -4,20 +4,20 @@
  * Copyright (c) 2010 Rob Clark <rob@ti.com>
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/videodsp_arm.h b/libavcodec/arm/videodsp_arm.h
index a708759..112cbb8 100644
--- a/libavcodec/arm/videodsp_arm.h
+++ b/libavcodec/arm/videodsp_arm.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/videodsp_armv5te.S b/libavcodec/arm/videodsp_armv5te.S
index bbd0a61..55bcce5 100644
--- a/libavcodec/arm/videodsp_armv5te.S
+++ b/libavcodec/arm/videodsp_armv5te.S
@@ -2,20 +2,20 @@
 @ ARMv5te-optimized core video DSP functions
 @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
 @
-@ This file is part of Libav.
+@ This file is part of FFmpeg
 @
-@ Libav is free software; you can redistribute it and/or
+@ FFmpeg is free software; you can redistribute it and/or
 @ modify it under the terms of the GNU Lesser General Public
 @ License as published by the Free Software Foundation; either
 @ version 2.1 of the License, or (at your option) any later version.
 @
-@ Libav is distributed in the hope that it will be useful,
+@ FFmpeg is distributed in the hope that it will be useful,
 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 @ Lesser General Public License for more details.
 @
 @ You should have received a copy of the GNU Lesser General Public
-@ License along with Libav; if not, write to the Free Software
+@ License along with FFmpeg; if not, write to the Free Software
 @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 @
 
diff --git a/libavcodec/arm/videodsp_init_arm.c b/libavcodec/arm/videodsp_init_arm.c
index 20c6e4a..a89abb2 100644
--- a/libavcodec/arm/videodsp_init_arm.c
+++ b/libavcodec/arm/videodsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2012 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/videodsp_init_armv5te.c b/libavcodec/arm/videodsp_init_armv5te.c
index 832191f..1ea1f34 100644
--- a/libavcodec/arm/videodsp_init_armv5te.c
+++ b/libavcodec/arm/videodsp_init_armv5te.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2012 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,5 +27,7 @@ void ff_prefetch_arm(uint8_t *mem, ptrdiff_t stride, int h);
 
 av_cold void ff_videodsp_init_armv5te(VideoDSPContext *ctx, int bpc)
 {
+#if HAVE_ARMV5TE_EXTERNAL
     ctx->prefetch = ff_prefetch_arm;
+#endif
 }
diff --git a/libavcodec/arm/vorbisdsp_init_arm.c b/libavcodec/arm/vorbisdsp_init_arm.c
index 853ba2d..f4b3d80 100644
--- a/libavcodec/arm/vorbisdsp_init_arm.c
+++ b/libavcodec/arm/vorbisdsp_init_arm.c
@@ -2,20 +2,20 @@
  * ARM NEON optimised DSP functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vorbisdsp_neon.S b/libavcodec/arm/vorbisdsp_neon.S
index 7df876c..79ce54f 100644
--- a/libavcodec/arm/vorbisdsp_neon.S
+++ b/libavcodec/arm/vorbisdsp_neon.S
@@ -2,20 +2,20 @@
  * ARM NEON optimised DSP functions
  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp3dsp_init_arm.c b/libavcodec/arm/vp3dsp_init_arm.c
index 11e1f1c..d924636 100644
--- a/libavcodec/arm/vp3dsp_init_arm.c
+++ b/libavcodec/arm/vp3dsp_init_arm.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S
index 58bd97d..2942d48 100644
--- a/libavcodec/arm/vp3dsp_neon.S
+++ b/libavcodec/arm/vp3dsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 David Conrad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp56_arith.h b/libavcodec/arm/vp56_arith.h
index 6bc9456..feb1247 100644
--- a/libavcodec/arm/vp56_arith.h
+++ b/libavcodec/arm/vp56_arith.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp6dsp_init_arm.c b/libavcodec/arm/vp6dsp_init_arm.c
index 4ec41ed..ed68321 100644
--- a/libavcodec/arm/vp6dsp_init_arm.c
+++ b/libavcodec/arm/vp6dsp_init_arm.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp6dsp_neon.S b/libavcodec/arm/vp6dsp_neon.S
index 10b4d0f..03dd28d 100644
--- a/libavcodec/arm/vp6dsp_neon.S
+++ b/libavcodec/arm/vp6dsp_neon.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp8.h b/libavcodec/arm/vp8.h
index 93b2788..965342d 100644
--- a/libavcodec/arm/vp8.h
+++ b/libavcodec/arm/vp8.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp8_armv6.S b/libavcodec/arm/vp8_armv6.S
index 3863dc3..e7d25a4 100644
--- a/libavcodec/arm/vp8_armv6.S
+++ b/libavcodec/arm/vp8_armv6.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2010 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp8dsp.h b/libavcodec/arm/vp8dsp.h
index 0d55e0f..7281d0b 100644
--- a/libavcodec/arm/vp8dsp.h
+++ b/libavcodec/arm/vp8dsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp8dsp_armv6.S b/libavcodec/arm/vp8dsp_armv6.S
index 03100cd..1adcbbd 100644
--- a/libavcodec/arm/vp8dsp_armv6.S
+++ b/libavcodec/arm/vp8dsp_armv6.S
@@ -5,20 +5,20 @@
  * Copyright (c) 2010 Rob Clark <rob@ti.com>
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * This code was partially ported from libvpx, which uses this license:
diff --git a/libavcodec/arm/vp8dsp_init_arm.c b/libavcodec/arm/vp8dsp_init_arm.c
index aa77dba..8b80176 100644
--- a/libavcodec/arm/vp8dsp_init_arm.c
+++ b/libavcodec/arm/vp8dsp_init_arm.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp8dsp_init_armv6.c b/libavcodec/arm/vp8dsp_init_armv6.c
index febe4e7..a5bcd73 100644
--- a/libavcodec/arm/vp8dsp_init_armv6.c
+++ b/libavcodec/arm/vp8dsp_init_armv6.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp8dsp_init_neon.c b/libavcodec/arm/vp8dsp_init_neon.c
index 2b6c775..53f1f23 100644
--- a/libavcodec/arm/vp8dsp_init_neon.c
+++ b/libavcodec/arm/vp8dsp_init_neon.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/arm/vp8dsp_neon.S b/libavcodec/arm/vp8dsp_neon.S
index 544332c..5319346 100644
--- a/libavcodec/arm/vp8dsp_neon.S
+++ b/libavcodec/arm/vp8dsp_neon.S
@@ -4,20 +4,20 @@
  * Copyright (c) 2010 Rob Clark <rob@ti.com>
  * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ass.c b/libavcodec/ass.c
index 098050a..a5b5ae5 100644
--- a/libavcodec/ass.c
+++ b/libavcodec/ass.c
@@ -1,66 +1,52 @@
 /*
- * SSA/ASS common funtions
+ * SSA/ASS common functions
  * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
 #include "ass.h"
+#include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
 #include "libavutil/common.h"
 
-/**
- * Generate a suitable AVCodecContext.subtitle_header for SUBTITLE_ASS.
- *
- * @param avctx pointer to the AVCodecContext
- * @param font name of the default font face to use
- * @param font_size default font size to use
- * @param color default text color to use (ABGR)
- * @param back_color default background color to use (ABGR)
- * @param bold 1 for bold text, 0 for normal text
- * @param italic 1 for italic text, 0 for normal text
- * @param underline 1 for underline text, 0 for normal text
- * @param alignment position of the text (left, center, top...), defined after
- *                  the layout of the numpad (1-3 sub, 4-6 mid, 7-9 top)
- * @return >= 0 on success otherwise an error code <0
- */
-static int ass_subtitle_header(AVCodecContext *avctx,
-                               const char *font, int font_size,
-                               int color, int back_color,
-                               int bold, int italic, int underline,
-                               int alignment)
+int ff_ass_subtitle_header(AVCodecContext *avctx,
+                           const char *font, int font_size,
+                           int color, int back_color,
+                           int bold, int italic, int underline,
+                           int alignment)
 {
-    char header[512];
-
-    snprintf(header, sizeof(header),
+    avctx->subtitle_header = av_asprintf(
              "[Script Info]\r\n"
              "ScriptType: v4.00+\r\n"
+             "PlayResX: 384\r\n"
+             "PlayResY: 288\r\n"
              "\r\n"
              "[V4+ Styles]\r\n"
              "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding\r\n"
              "Style: Default,%s,%d,&H%x,&H%x,&H%x,&H%x,%d,%d,%d,1,1,0,%d,10,10,10,0,0\r\n"
              "\r\n"
              "[Events]\r\n"
-             "Format: Layer, Start, End, Text\r\n",
+             "Format: Layer, Start, End, Style, Text\r\n",
              font, font_size, color, color, back_color, back_color,
              -bold, -italic, -underline, alignment);
 
-    avctx->subtitle_header = av_strdup(header);
     if (!avctx->subtitle_header)
         return AVERROR(ENOMEM);
     avctx->subtitle_header_size = strlen(avctx->subtitle_header);
@@ -69,7 +55,7 @@ static int ass_subtitle_header(AVCodecContext *avctx,
 
 int ff_ass_subtitle_header_default(AVCodecContext *avctx)
 {
-    return ass_subtitle_header(avctx, ASS_DEFAULT_FONT,
+    return ff_ass_subtitle_header(avctx, ASS_DEFAULT_FONT,
                                ASS_DEFAULT_FONT_SIZE,
                                ASS_DEFAULT_COLOR,
                                ASS_DEFAULT_BACK_COLOR,
@@ -79,47 +65,127 @@ int ff_ass_subtitle_header_default(AVCodecContext *avctx)
                                ASS_DEFAULT_ALIGNMENT);
 }
 
-void ff_ass_init(AVSubtitle *sub)
+static void insert_ts(AVBPrint *buf, int ts)
 {
-    memset(sub, 0, sizeof(*sub));
-}
+    if (ts == -1) {
+        av_bprintf(buf, "9:59:59.99,");
+    } else {
+        int h, m, s;
 
-static int ts_to_string(char *str, int strlen, int ts)
-{
-    int h, m, s;
-    h = ts/360000;  ts -= 360000*h;
-    m = ts/  6000;  ts -=   6000*m;
-    s = ts/   100;  ts -=    100*s;
-    return snprintf(str, strlen, "%d:%02d:%02d.%02d", h, m, s, ts);
+        h = ts/360000;  ts -= 360000*h;
+        m = ts/  6000;  ts -=   6000*m;
+        s = ts/   100;  ts -=    100*s;
+        av_bprintf(buf, "%d:%02d:%02d.%02d,", h, m, s, ts);
+    }
 }
 
-int ff_ass_add_rect(AVSubtitle *sub, const char *dialog,
-                    int ts_start, int ts_end, int raw)
+int ff_ass_bprint_dialog(AVBPrint *buf, const char *dialog,
+                         int ts_start, int duration, int raw)
 {
-    int len = 0, dlen, duration = ts_end - ts_start;
-    char s_start[16], s_end[16], header[48] = {0};
-    AVSubtitleRect **rects;
+    int dlen;
 
-    if (!raw) {
-        ts_to_string(s_start, sizeof(s_start), ts_start);
-        ts_to_string(s_end,   sizeof(s_end),   ts_end  );
-        len = snprintf(header, sizeof(header), "Dialogue: 0,%s,%s,",
-                       s_start, s_end);
+    if (!raw || raw == 2) {
+        long int layer = 0;
+
+        if (raw == 2) {
+            /* skip ReadOrder */
+            dialog = strchr(dialog, ',');
+            if (!dialog)
+                return AVERROR_INVALIDDATA;
+            dialog++;
+
+            /* extract Layer or Marked */
+            layer = strtol(dialog, (char**)&dialog, 10);
+            if (*dialog != ',')
+                return AVERROR_INVALIDDATA;
+            dialog++;
+        }
+        av_bprintf(buf, "Dialogue: %ld,", layer);
+        insert_ts(buf, ts_start);
+        insert_ts(buf, duration == -1 ? -1 : ts_start + duration);
+        if (raw != 2)
+            av_bprintf(buf, "Default,");
     }
 
     dlen = strcspn(dialog, "\n");
     dlen += dialog[dlen] == '\n';
 
+    av_bprintf(buf, "%.*s", dlen, dialog);
+    if (raw == 2)
+        av_bprintf(buf, "\r\n");
+
+    return dlen;
+}
+
+int ff_ass_add_rect(AVSubtitle *sub, const char *dialog,
+                    int ts_start, int duration, int raw)
+{
+    AVBPrint buf;
+    int ret, dlen;
+    AVSubtitleRect **rects;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    if ((ret = ff_ass_bprint_dialog(&buf, dialog, ts_start, duration, raw)) < 0)
+        goto err;
+    dlen = ret;
+    if (!av_bprint_is_complete(&buf))
+        goto errnomem;
+
     rects = av_realloc(sub->rects, (sub->num_rects+1) * sizeof(*sub->rects));
     if (!rects)
-        return AVERROR(ENOMEM);
+        goto errnomem;
     sub->rects = rects;
     sub->end_display_time = FFMAX(sub->end_display_time, 10 * duration);
     rects[sub->num_rects]       = av_mallocz(sizeof(*rects[0]));
     rects[sub->num_rects]->type = SUBTITLE_ASS;
-    rects[sub->num_rects]->ass  = av_malloc(len + dlen + 1);
-    strcpy (rects[sub->num_rects]->ass      , header);
-    av_strlcpy(rects[sub->num_rects]->ass + len, dialog, dlen + 1);
+    ret = av_bprint_finalize(&buf, &rects[sub->num_rects]->ass);
+    if (ret < 0)
+        goto err;
     sub->num_rects++;
     return dlen;
+
+errnomem:
+    ret = AVERROR(ENOMEM);
+err:
+    av_bprint_finalize(&buf, NULL);
+    return ret;
+}
+
+void ff_ass_bprint_text_event(AVBPrint *buf, const char *p, int size,
+                             const char *linebreaks, int keep_ass_markup)
+{
+    const char *p_end = p + size;
+
+    for (; p < p_end && *p; p++) {
+
+        /* forced custom line breaks, not accounted as "normal" EOL */
+        if (linebreaks && strchr(linebreaks, *p)) {
+            av_bprintf(buf, "\\N");
+
+        /* standard ASS escaping so random characters don't get mis-interpreted
+         * as ASS */
+        } else if (!keep_ass_markup && strchr("{}\\", *p)) {
+            av_bprintf(buf, "\\%c", *p);
+
+        /* some packets might end abruptly (no \0 at the end, like for example
+         * in some cases of demuxing from a classic video container), some
+         * might be terminated with \n or \r\n which we have to remove (for
+         * consistency with those who haven't), and we also have to deal with
+         * evil cases such as \r at the end of the buffer (and no \0 terminated
+         * character) */
+        } else if (p[0] == '\n') {
+            /* some stuff left so we can insert a line break */
+            if (p < p_end - 1)
+                av_bprintf(buf, "\\N");
+        } else if (p[0] == '\r' && p < p_end - 1 && p[1] == '\n') {
+            /* \r followed by a \n, we can skip it. We don't insert the \N yet
+             * because we don't know if it is followed by more text */
+            continue;
+
+        /* finally, a sane character */
+        } else {
+            av_bprint_chars(buf, *p, 1);
+        }
+    }
+    av_bprintf(buf, "\r\n");
 }
diff --git a/libavcodec/ass.h b/libavcodec/ass.h
index 594b5f3..2df38e6 100644
--- a/libavcodec/ass.h
+++ b/libavcodec/ass.h
@@ -1,21 +1,21 @@
 /*
- * SSA/ASS common funtions
+ * SSA/ASS common functions
  * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,6 +23,7 @@
 #define AVCODEC_ASS_H
 
 #include "avcodec.h"
+#include "libavutil/bprint.h"
 
 /**
  * @name Default values for ASS style
@@ -39,6 +40,27 @@
 /** @} */
 
 /**
+ * Generate a suitable AVCodecContext.subtitle_header for SUBTITLE_ASS.
+ *
+ * @param avctx pointer to the AVCodecContext
+ * @param font name of the default font face to use
+ * @param font_size default font size to use
+ * @param color default text color to use (ABGR)
+ * @param back_color default background color to use (ABGR)
+ * @param bold 1 for bold text, 0 for normal text
+ * @param italic 1 for italic text, 0 for normal text
+ * @param underline 1 for underline text, 0 for normal text
+ * @param alignment position of the text (left, center, top...), defined after
+ *                  the layout of the numpad (1-3 sub, 4-6 mid, 7-9 top)
+ * @return >= 0 on success otherwise an error code <0
+ */
+int ff_ass_subtitle_header(AVCodecContext *avctx,
+                           const char *font, int font_size,
+                           int color, int back_color,
+                           int bold, int italic, int underline,
+                           int alignment);
+
+/**
  * Generate a suitable AVCodecContext.subtitle_header for SUBTITLE_ASS
  * with default style.
  *
@@ -48,20 +70,38 @@
 int ff_ass_subtitle_header_default(AVCodecContext *avctx);
 
 /**
- * Initialize an AVSubtitle structure for use with ff_ass_add_rect().
+ * Add an ASS dialog line to an AVSubtitle as a new AVSubtitleRect.
  *
  * @param sub pointer to the AVSubtitle
+ * @param dialog ASS dialog to add to sub
+ * @param ts_start start timestamp for this dialog (in 1/100 second unit)
+ * @param duration duration for this dialog (in 1/100 second unit), can be -1
+ *                 to last until the end of the presentation
+ * @param raw when set to 2, it indicates that dialog contains an ASS
+ *                           dialog line as muxed in Matroska
+ *            when set to 1, it indicates that dialog contains a whole SSA
+ *                           dialog line which should be copied as is.
+ *            when set to 0, it indicates that dialog contains only the Text
+ *                           part of the ASS dialog line, the rest of the line
+ *                           will be generated.
+ * @return number of characters read from dialog. It can be less than the whole
+ *         length of dialog, if dialog contains several lines of text.
+ *         A negative value indicates an error.
  */
-void ff_ass_init(AVSubtitle *sub);
+int ff_ass_add_rect(AVSubtitle *sub, const char *dialog,
+                    int ts_start, int duration, int raw);
 
 /**
- * Add an ASS dialog line to an AVSubtitle as a new AVSubtitleRect.
+ * Add an ASS dialog line to an AVBPrint buffer.
  *
- * @param sub pointer to the AVSubtitle
+ * @param buf pointer to an initialized AVBPrint buffer
  * @param dialog ASS dialog to add to sub
  * @param ts_start start timestamp for this dialog (in 1/100 second unit)
- * @param ts_end end timestamp for this dialog (in 1/100 second unit)
- * @param raw when set to 1, it indicates that dialog contains a whole ASS
+ * @param duration duration for this dialog (in 1/100 second unit), can be -1
+ *                 to last until the end of the presentation
+ * @param raw when set to 2, it indicates that dialog contains an ASS
+ *                           dialog line as muxed in Matroska
+ *            when set to 1, it indicates that dialog contains a whole SSA
  *                           dialog line which should be copied as is.
  *            when set to 0, it indicates that dialog contains only the Text
  *                           part of the ASS dialog line, the rest of the line
@@ -70,7 +110,19 @@ void ff_ass_init(AVSubtitle *sub);
  *         length of dialog, if dialog contains several lines of text.
  *         A negative value indicates an error.
  */
-int ff_ass_add_rect(AVSubtitle *sub, const char *dialog,
-                    int ts_start, int ts_end, int raw);
+int ff_ass_bprint_dialog(AVBPrint *buf, const char *dialog,
+                         int ts_start, int duration, int raw);
 
+/**
+ * Escape a text subtitle using ASS syntax into an AVBPrint buffer.
+ * Newline characters will be escaped to \N.
+ *
+ * @param buf pointer to an initialized AVBPrint buffer
+ * @param p source text
+ * @param size size of the source text
+ * @param linebreaks additional newline chars, which will be escaped to \N
+ * @param keep_ass_markup braces and backslash will not be escaped if set
+ */
+void ff_ass_bprint_text_event(AVBPrint *buf, const char *p, int size,
+                             const char *linebreaks, int keep_ass_markup);
 #endif /* AVCODEC_ASS_H */
diff --git a/libavcodec/ass_split.c b/libavcodec/ass_split.c
new file mode 100644
index 0000000..413e9c8
--- /dev/null
+++ b/libavcodec/ass_split.c
@@ -0,0 +1,476 @@
+/*
+ * SSA/ASS spliting functions
+ * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "ass_split.h"
+
+typedef enum {
+    ASS_STR,
+    ASS_INT,
+    ASS_FLT,
+    ASS_COLOR,
+    ASS_TIMESTAMP,
+    ASS_ALGN,
+} ASSFieldType;
+
+typedef struct {
+    const char *name;
+    int type;
+    int offset;
+} ASSFields;
+
+typedef struct {
+    const char *section;
+    const char *format_header;
+    const char *fields_header;
+    int         size;
+    int         offset;
+    int         offset_count;
+    ASSFields   fields[10];
+} ASSSection;
+
+static const ASSSection ass_sections[] = {
+    { .section       = "Script Info",
+      .offset        = offsetof(ASS, script_info),
+      .fields = {{"ScriptType", ASS_STR, offsetof(ASSScriptInfo, script_type)},
+                 {"Collisions", ASS_STR, offsetof(ASSScriptInfo, collisions) },
+                 {"PlayResX",   ASS_INT, offsetof(ASSScriptInfo, play_res_x) },
+                 {"PlayResY",   ASS_INT, offsetof(ASSScriptInfo, play_res_y) },
+                 {"Timer",      ASS_FLT, offsetof(ASSScriptInfo, timer)      },
+                 {0},
+        }
+    },
+    { .section       = "V4+ Styles",
+      .format_header = "Format",
+      .fields_header = "Style",
+      .size          = sizeof(ASSStyle),
+      .offset        = offsetof(ASS, styles),
+      .offset_count  = offsetof(ASS, styles_count),
+      .fields = {{"Name",         ASS_STR,  offsetof(ASSStyle, name)         },
+                 {"Fontname",     ASS_STR,  offsetof(ASSStyle, font_name)    },
+                 {"Fontsize",     ASS_INT,  offsetof(ASSStyle, font_size)    },
+                 {"PrimaryColour",ASS_COLOR,offsetof(ASSStyle, primary_color)},
+                 {"BackColour",   ASS_COLOR,offsetof(ASSStyle, back_color)   },
+                 {"Bold",         ASS_INT,  offsetof(ASSStyle, bold)         },
+                 {"Italic",       ASS_INT,  offsetof(ASSStyle, italic)       },
+                 {"Underline",    ASS_INT,  offsetof(ASSStyle, underline)    },
+                 {"Alignment",    ASS_INT,  offsetof(ASSStyle, alignment)    },
+                 {0},
+        }
+    },
+    { .section       = "V4 Styles",
+      .format_header = "Format",
+      .fields_header = "Style",
+      .size          = sizeof(ASSStyle),
+      .offset        = offsetof(ASS, styles),
+      .offset_count  = offsetof(ASS, styles_count),
+      .fields = {{"Name",         ASS_STR,  offsetof(ASSStyle, name)         },
+                 {"Fontname",     ASS_STR,  offsetof(ASSStyle, font_name)    },
+                 {"Fontsize",     ASS_INT,  offsetof(ASSStyle, font_size)    },
+                 {"PrimaryColour",ASS_COLOR,offsetof(ASSStyle, primary_color)},
+                 {"BackColour",   ASS_COLOR,offsetof(ASSStyle, back_color)   },
+                 {"Bold",         ASS_INT,  offsetof(ASSStyle, bold)         },
+                 {"Italic",       ASS_INT,  offsetof(ASSStyle, italic)       },
+                 {"Alignment",    ASS_ALGN, offsetof(ASSStyle, alignment)    },
+                 {0},
+        }
+    },
+    { .section       = "Events",
+      .format_header = "Format",
+      .fields_header = "Dialogue",
+      .size          = sizeof(ASSDialog),
+      .offset        = offsetof(ASS, dialogs),
+      .offset_count  = offsetof(ASS, dialogs_count),
+      .fields = {{"Layer",  ASS_INT,        offsetof(ASSDialog, layer)       },
+                 {"Start",  ASS_TIMESTAMP,  offsetof(ASSDialog, start)       },
+                 {"End",    ASS_TIMESTAMP,  offsetof(ASSDialog, end)         },
+                 {"Style",  ASS_STR,        offsetof(ASSDialog, style)       },
+                 {"Text",   ASS_STR,        offsetof(ASSDialog, text)        },
+                 {0},
+        }
+    },
+};
+
+
+typedef int (*ASSConvertFunc)(void *dest, const char *buf, int len);
+
+static int convert_str(void *dest, const char *buf, int len)
+{
+    char *str = av_malloc(len + 1);
+    if (str) {
+        memcpy(str, buf, len);
+        str[len] = 0;
+        if (*(void **)dest)
+            av_free(*(void **)dest);
+        *(char **)dest = str;
+    }
+    return !str;
+}
+static int convert_int(void *dest, const char *buf, int len)
+{
+    return sscanf(buf, "%d", (int *)dest) == 1;
+}
+static int convert_flt(void *dest, const char *buf, int len)
+{
+    return sscanf(buf, "%f", (float *)dest) == 1;
+}
+static int convert_color(void *dest, const char *buf, int len)
+{
+    return sscanf(buf, "&H%8x", (int *)dest) == 1 ||
+           sscanf(buf, "%d",    (int *)dest) == 1;
+}
+static int convert_timestamp(void *dest, const char *buf, int len)
+{
+    int c, h, m, s, cs;
+    if ((c = sscanf(buf, "%d:%02d:%02d.%02d", &h, &m, &s, &cs)) == 4)
+        *(int *)dest = 360000*h + 6000*m + 100*s + cs;
+    return c == 4;
+}
+static int convert_alignment(void *dest, const char *buf, int len)
+{
+    int a;
+    if (sscanf(buf, "%d", &a) == 1) {
+        /* convert V4 Style alignment to V4+ Style */
+        *(int *)dest = a + ((a&4) >> 1) - 5*!!(a&8);
+        return 1;
+    }
+    return 0;
+}
+
+static const ASSConvertFunc convert_func[] = {
+    [ASS_STR]       = convert_str,
+    [ASS_INT]       = convert_int,
+    [ASS_FLT]       = convert_flt,
+    [ASS_COLOR]     = convert_color,
+    [ASS_TIMESTAMP] = convert_timestamp,
+    [ASS_ALGN]      = convert_alignment,
+};
+
+
+struct ASSSplitContext {
+    ASS ass;
+    int current_section;
+    int field_number[FF_ARRAY_ELEMS(ass_sections)];
+    int *field_order[FF_ARRAY_ELEMS(ass_sections)];
+};
+
+
+static uint8_t *realloc_section_array(ASSSplitContext *ctx)
+{
+    const ASSSection *section = &ass_sections[ctx->current_section];
+    int *count = (int *)((uint8_t *)&ctx->ass + section->offset_count);
+    void **section_ptr = (void **)((uint8_t *)&ctx->ass + section->offset);
+    uint8_t *tmp = av_realloc(*section_ptr, (*count+1)*section->size);
+    if (!tmp)
+        return NULL;
+    *section_ptr = tmp;
+    tmp += *count * section->size;
+    memset(tmp, 0, section->size);
+    (*count)++;
+    return tmp;
+}
+
+static inline int is_eol(char buf)
+{
+    return buf == '\r' || buf == '\n' || buf == 0;
+}
+
+static inline const char *skip_space(const char *buf)
+{
+    while (*buf == ' ')
+        buf++;
+    return buf;
+}
+
+static const char *ass_split_section(ASSSplitContext *ctx, const char *buf)
+{
+    const ASSSection *section = &ass_sections[ctx->current_section];
+    int *number = &ctx->field_number[ctx->current_section];
+    int *order = ctx->field_order[ctx->current_section];
+    int *tmp, i, len;
+
+    while (buf && *buf) {
+        if (buf[0] == '[') {
+            ctx->current_section = -1;
+            break;
+        }
+        if (buf[0] == ';' || (buf[0] == '!' && buf[1] == ':')) {
+            /* skip comments */
+        } else if (section->format_header && !order) {
+            len = strlen(section->format_header);
+            if (strncmp(buf, section->format_header, len) || buf[len] != ':')
+                return NULL;
+            buf += len + 1;
+            while (!is_eol(*buf)) {
+                buf = skip_space(buf);
+                len = strcspn(buf, ", \r\n");
+                if (!(tmp = av_realloc(order, (*number + 1) * sizeof(*order))))
+                    return NULL;
+                order = tmp;
+                order[*number] = -1;
+                for (i=0; section->fields[i].name; i++)
+                    if (!strncmp(buf, section->fields[i].name, len)) {
+                        order[*number] = i;
+                        break;
+                    }
+                (*number)++;
+                buf = skip_space(buf + len + (buf[len] == ','));
+            }
+            ctx->field_order[ctx->current_section] = order;
+        } else if (section->fields_header) {
+            len = strlen(section->fields_header);
+            if (!strncmp(buf, section->fields_header, len) && buf[len] == ':') {
+                uint8_t *ptr, *struct_ptr = realloc_section_array(ctx);
+                if (!struct_ptr)  return NULL;
+                buf += len + 1;
+                for (i=0; !is_eol(*buf) && i < *number; i++) {
+                    int last = i == *number - 1;
+                    buf = skip_space(buf);
+                    len = strcspn(buf, last ? "\r\n" : ",\r\n");
+                    if (order[i] >= 0) {
+                        ASSFieldType type = section->fields[order[i]].type;
+                        ptr = struct_ptr + section->fields[order[i]].offset;
+                        convert_func[type](ptr, buf, len);
+                    }
+                    buf += len;
+                    if (!last && *buf) buf++;
+                    buf = skip_space(buf);
+                }
+            }
+        } else {
+            len = strcspn(buf, ":\r\n");
+            if (buf[len] == ':') {
+                for (i=0; section->fields[i].name; i++)
+                    if (!strncmp(buf, section->fields[i].name, len)) {
+                        ASSFieldType type = section->fields[i].type;
+                        uint8_t *ptr = (uint8_t *)&ctx->ass + section->offset;
+                        ptr += section->fields[i].offset;
+                        buf = skip_space(buf + len + 1);
+                        convert_func[type](ptr, buf, strcspn(buf, "\r\n"));
+                        break;
+                    }
+            }
+        }
+        buf += strcspn(buf, "\n");
+        buf += !!*buf;
+    }
+    return buf;
+}
+
+static int ass_split(ASSSplitContext *ctx, const char *buf)
+{
+    char c, section[16];
+    int i;
+
+    if (ctx->current_section >= 0)
+        buf = ass_split_section(ctx, buf);
+
+    while (buf && *buf) {
+        if (sscanf(buf, "[%15[0-9A-Za-z+ ]]%c", section, &c) == 2) {
+            buf += strcspn(buf, "\n");
+            buf += !!*buf;
+            for (i=0; i<FF_ARRAY_ELEMS(ass_sections); i++)
+                if (!strcmp(section, ass_sections[i].section)) {
+                    ctx->current_section = i;
+                    buf = ass_split_section(ctx, buf);
+                }
+        } else {
+            buf += strcspn(buf, "\n");
+            buf += !!*buf;
+        }
+    }
+    return buf ? 0 : AVERROR_INVALIDDATA;
+}
+
+ASSSplitContext *ff_ass_split(const char *buf)
+{
+    ASSSplitContext *ctx = av_mallocz(sizeof(*ctx));
+    ctx->current_section = -1;
+    if (ass_split(ctx, buf) < 0) {
+        ff_ass_split_free(ctx);
+        return NULL;
+    }
+    return ctx;
+}
+
+static void free_section(ASSSplitContext *ctx, const ASSSection *section)
+{
+    uint8_t *ptr = (uint8_t *)&ctx->ass + section->offset;
+    int i, j, *count, c = 1;
+
+    if (section->format_header) {
+        ptr   = *(void **)ptr;
+        count = (int *)((uint8_t *)&ctx->ass + section->offset_count);
+    } else
+        count = &c;
+
+    if (ptr)
+        for (i=0; i<*count; i++, ptr += section->size)
+            for (j=0; section->fields[j].name; j++) {
+                const ASSFields *field = &section->fields[j];
+                if (field->type == ASS_STR)
+                    av_freep(ptr + field->offset);
+            }
+    *count = 0;
+
+    if (section->format_header)
+        av_freep((uint8_t *)&ctx->ass + section->offset);
+}
+
+ASSDialog *ff_ass_split_dialog(ASSSplitContext *ctx, const char *buf,
+                               int cache, int *number)
+{
+    ASSDialog *dialog = NULL;
+    int i, count;
+    if (!cache)
+        for (i=0; i<FF_ARRAY_ELEMS(ass_sections); i++)
+            if (!strcmp(ass_sections[i].section, "Events")) {
+                free_section(ctx, &ass_sections[i]);
+                break;
+            }
+    count = ctx->ass.dialogs_count;
+    if (ass_split(ctx, buf) == 0)
+        dialog = ctx->ass.dialogs + count;
+    if (number)
+        *number = ctx->ass.dialogs_count - count;
+    return dialog;
+}
+
+void ff_ass_split_free(ASSSplitContext *ctx)
+{
+    if (ctx) {
+        int i;
+        for (i=0; i<FF_ARRAY_ELEMS(ass_sections); i++) {
+            free_section(ctx, &ass_sections[i]);
+            av_freep(&(ctx->field_order[i]));
+        }
+        av_free(ctx);
+    }
+}
+
+
+int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
+                                const char *buf)
+{
+    const char *text = NULL;
+    char new_line[2];
+    int text_len = 0;
+
+    while (buf && *buf) {
+        if (text && callbacks->text &&
+            (sscanf(buf, "\\%1[nN]", new_line) == 1 ||
+             !strncmp(buf, "{\\", 2))) {
+            callbacks->text(priv, text, text_len);
+            text = NULL;
+        }
+        if (sscanf(buf, "\\%1[nN]", new_line) == 1) {
+            if (callbacks->new_line)
+                callbacks->new_line(priv, new_line[0] == 'N');
+            buf += 2;
+        } else if (!strncmp(buf, "{\\", 2)) {
+            buf++;
+            while (*buf == '\\') {
+                char style[2], c[2], sep[2], c_num[2] = "0", tmp[128] = {0};
+                unsigned int color = 0xFFFFFFFF;
+                int len, size = -1, an = -1, alpha = -1;
+                int x1, y1, x2, y2, t1 = -1, t2 = -1;
+                if (sscanf(buf, "\\%1[bisu]%1[01\\}]%n", style, c, &len) > 1) {
+                    int close = c[0] == '0' ? 1 : c[0] == '1' ? 0 : -1;
+                    len += close != -1;
+                    if (callbacks->style)
+                        callbacks->style(priv, style[0], close);
+                } else if (sscanf(buf, "\\c%1[\\}]%n", sep, &len) > 0 ||
+                           sscanf(buf, "\\c&H%X&%1[\\}]%n", &color, sep, &len) > 1 ||
+                           sscanf(buf, "\\%1[1234]c%1[\\}]%n", c_num, sep, &len) > 1 ||
+                           sscanf(buf, "\\%1[1234]c&H%X&%1[\\}]%n", c_num, &color, sep, &len) > 2) {
+                    if (callbacks->color)
+                        callbacks->color(priv, color, c_num[0] - '0');
+                } else if (sscanf(buf, "\\alpha%1[\\}]%n", sep, &len) > 0 ||
+                           sscanf(buf, "\\alpha&H%2X&%1[\\}]%n", &alpha, sep, &len) > 1 ||
+                           sscanf(buf, "\\%1[1234]a%1[\\}]%n", c_num, sep, &len) > 1 ||
+                           sscanf(buf, "\\%1[1234]a&H%2X&%1[\\}]%n", c_num, &alpha, sep, &len) > 2) {
+                    if (callbacks->alpha)
+                        callbacks->alpha(priv, alpha, c_num[0] - '0');
+                } else if (sscanf(buf, "\\fn%1[\\}]%n", sep, &len) > 0 ||
+                           sscanf(buf, "\\fn%127[^\\}]%1[\\}]%n", tmp, sep, &len) > 1) {
+                    if (callbacks->font_name)
+                        callbacks->font_name(priv, tmp[0] ? tmp : NULL);
+                } else if (sscanf(buf, "\\fs%1[\\}]%n", sep, &len) > 0 ||
+                           sscanf(buf, "\\fs%u%1[\\}]%n", &size, sep, &len) > 1) {
+                    if (callbacks->font_size)
+                        callbacks->font_size(priv, size);
+                } else if (sscanf(buf, "\\a%1[\\}]%n", sep, &len) > 0 ||
+                           sscanf(buf, "\\a%2u%1[\\}]%n", &an, sep, &len) > 1 ||
+                           sscanf(buf, "\\an%1[\\}]%n", sep, &len) > 0 ||
+                           sscanf(buf, "\\an%1u%1[\\}]%n", &an, sep, &len) > 1) {
+                    if (an != -1 && buf[2] != 'n')
+                        an = (an&3) + (an&4 ? 6 : an&8 ? 3 : 0);
+                    if (callbacks->alignment)
+                        callbacks->alignment(priv, an);
+                } else if (sscanf(buf, "\\r%1[\\}]%n", sep, &len) > 0 ||
+                           sscanf(buf, "\\r%127[^\\}]%1[\\}]%n", tmp, sep, &len) > 1) {
+                    if (callbacks->cancel_overrides)
+                        callbacks->cancel_overrides(priv, tmp);
+                } else if (sscanf(buf, "\\move(%d,%d,%d,%d)%1[\\}]%n", &x1, &y1, &x2, &y2, sep, &len) > 4 ||
+                           sscanf(buf, "\\move(%d,%d,%d,%d,%d,%d)%1[\\}]%n", &x1, &y1, &x2, &y2, &t1, &t2, sep, &len) > 6) {
+                    if (callbacks->move)
+                        callbacks->move(priv, x1, y1, x2, y2, t1, t2);
+                } else if (sscanf(buf, "\\pos(%d,%d)%1[\\}]%n", &x1, &y1, sep, &len) > 2) {
+                    if (callbacks->move)
+                        callbacks->move(priv, x1, y1, x1, y1, -1, -1);
+                } else if (sscanf(buf, "\\org(%d,%d)%1[\\}]%n", &x1, &y1, sep, &len) > 2) {
+                    if (callbacks->origin)
+                        callbacks->origin(priv, x1, y1);
+                } else {
+                    len = strcspn(buf+1, "\\}") + 2;  /* skip unknown code */
+                }
+                buf += len - 1;
+            }
+            if (*buf++ != '}')
+                return AVERROR_INVALIDDATA;
+        } else {
+            if (!text) {
+                text = buf;
+                text_len = 1;
+            } else
+                text_len++;
+            buf++;
+        }
+    }
+    if (text && callbacks->text)
+        callbacks->text(priv, text, text_len);
+    if (callbacks->end)
+        callbacks->end(priv);
+    return 0;
+}
+
+ASSStyle *ff_ass_style_get(ASSSplitContext *ctx, const char *style)
+{
+    ASS *ass = &ctx->ass;
+    int i;
+
+    if (!style || !*style)
+        style = "Default";
+    for (i=0; i<ass->styles_count; i++)
+        if (!strcmp(ass->styles[i].name, style))
+            return ass->styles + i;
+    return NULL;
+}
diff --git a/libavcodec/ass_split.h b/libavcodec/ass_split.h
new file mode 100644
index 0000000..06c1ce3
--- /dev/null
+++ b/libavcodec/ass_split.h
@@ -0,0 +1,172 @@
+/*
+ * SSA/ASS spliting functions
+ * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ASS_SPLIT_H
+#define AVCODEC_ASS_SPLIT_H
+
+/**
+ * fields extracted from the [Script Info] section
+ */
+typedef struct {
+    char *script_type;    /**< SSA script format version (eg. v4.00) */
+    char *collisions;     /**< how subtitles are moved to prevent collisions */
+    int   play_res_x;     /**< video width that ASS coords are referring to */
+    int   play_res_y;     /**< video height that ASS coords are referring to */
+    float timer;          /**< time multiplier to apply to SSA clock (in %) */
+} ASSScriptInfo;
+
+/**
+ * fields extracted from the [V4(+) Styles] section
+ */
+typedef struct {
+    char *name;           /**< name of the tyle (case sensitive) */
+    char *font_name;      /**< font face (case sensitive) */
+    int   font_size;      /**< font height */
+    int   primary_color;  /**< color that a subtitle will normally appear in */
+    int   back_color;     /**< color of the subtitle outline or shadow */
+    int   bold;           /**< whether text is bold (1) or not (0) */
+    int   italic;         /**< whether text is italic (1) or not (0) */
+    int   underline;      /**< whether text is underlined (1) or not (0) */
+    int   alignment;      /**< position of the text (left, center, top...),
+                               defined after the layout of the numpad
+                               (1-3 sub, 4-6 mid, 7-9 top) */
+} ASSStyle;
+
+/**
+ * fields extracted from the [Events] section
+ */
+typedef struct {
+    int   layer;    /**< higher numbered layers are drawn over lower numbered */
+    int   start;    /**< start time of the dialog in centiseconds */
+    int   end;      /**< end time of the dialog in centiseconds */
+    char *style;    /**< name of the ASSStyle to use with this dialog */
+    char *text;     /**< actual text which will be displayed as a subtitle,
+                         can include style override control codes (see
+                         ff_ass_split_override_codes()) */
+} ASSDialog;
+
+/**
+ * structure containing the whole split ASS data
+ */
+typedef struct {
+    ASSScriptInfo script_info;   /**< general information about the SSA script*/
+    ASSStyle     *styles;        /**< array of split out styles */
+    int           styles_count;  /**< number of ASSStyle in the styles array */
+    ASSDialog    *dialogs;       /**< array of split out dialogs */
+    int           dialogs_count; /**< number of ASSDialog in the dialogs array*/
+} ASS;
+
+/**
+ * This struct can be casted to ASS to access to the split data.
+ */
+typedef struct ASSSplitContext ASSSplitContext;
+
+/**
+ * Split a full ASS file or a ASS header from a string buffer and store
+ * the split structure in a newly allocated context.
+ *
+ * @param buf String containing the ASS formated data.
+ * @return Newly allocated struct containing split data.
+ */
+ASSSplitContext *ff_ass_split(const char *buf);
+
+/**
+ * Split one or several ASS "Dialogue" lines from a string buffer and store
+ * them in a already initialized context.
+ *
+ * @param ctx Context previously initialized by ff_ass_split().
+ * @param buf String containing the ASS "Dialogue" lines.
+ * @param cache Set to 1 to keep all the previously split ASSDialog in
+ *              the context, or set to 0 to free all the previously split
+ *              ASSDialog.
+ * @param number If not NULL, the pointed integer will be set to the number
+ *               of split ASSDialog.
+ * @return Pointer to the first split ASSDialog.
+ */
+ASSDialog *ff_ass_split_dialog(ASSSplitContext *ctx, const char *buf,
+                               int cache, int *number);
+
+/**
+ * Free all the memory allocated for an ASSSplitContext.
+ *
+ * @param ctx Context previously initialized by ff_ass_split().
+ */
+void ff_ass_split_free(ASSSplitContext *ctx);
+
+
+/**
+ * Set of callback functions corresponding to each override codes that can
+ * be encountered in a "Dialogue" Text field.
+ */
+typedef struct {
+    /**
+     * @defgroup ass_styles    ASS styles
+     * @{
+     */
+    void (*text)(void *priv, const char *text, int len);
+    void (*new_line)(void *priv, int forced);
+    void (*style)(void *priv, char style, int close);
+    void (*color)(void *priv, unsigned int /* color */, unsigned int color_id);
+    void (*alpha)(void *priv, int alpha, int alpha_id);
+    void (*font_name)(void *priv, const char *name);
+    void (*font_size)(void *priv, int size);
+    void (*alignment)(void *priv, int alignment);
+    void (*cancel_overrides)(void *priv, const char *style);
+    /** @} */
+
+    /**
+     * @defgroup ass_functions    ASS functions
+     * @{
+     */
+    void (*move)(void *priv, int x1, int y1, int x2, int y2, int t1, int t2);
+    void (*origin)(void *priv, int x, int y);
+    /** @} */
+
+    /**
+     * @defgroup ass_end    end of Dialogue Event
+     * @{
+     */
+    void (*end)(void *priv);
+    /** @} */
+} ASSCodesCallbacks;
+
+/**
+ * Split override codes out of a ASS "Dialogue" Text field.
+ *
+ * @param callbacks Set of callback functions called for each override code
+ *                  encountered.
+ * @param priv Opaque pointer passed to the callback functions.
+ * @param buf The ASS "Dialogue" Text field to split.
+ * @return >= 0 on success otherwise an error code <0
+ */
+int ff_ass_split_override_codes(const ASSCodesCallbacks *callbacks, void *priv,
+                                const char *buf);
+
+/**
+ * Find an ASSStyle structure by its name.
+ *
+ * @param ctx Context previously initialized by ff_ass_split().
+ * @param style name of the style to search for.
+ * @return the ASSStyle corresponding to style, or NULL if style can't be found
+ */
+ASSStyle *ff_ass_style_get(ASSSplitContext *ctx, const char *style);
+
+#endif /* AVCODEC_ASS_SPLIT_H */
diff --git a/libavcodec/assdec.c b/libavcodec/assdec.c
index 7a69582..11dbde0 100644
--- a/libavcodec/assdec.c
+++ b/libavcodec/assdec.c
@@ -2,20 +2,20 @@
  * SSA/ASS decoder
  * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,29 +23,45 @@
 
 #include "avcodec.h"
 #include "ass.h"
+#include "ass_split.h"
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 
 static av_cold int ass_decode_init(AVCodecContext *avctx)
 {
-    avctx->subtitle_header = av_malloc(avctx->extradata_size);
-    if (!avctx->extradata)
+    avctx->subtitle_header = av_malloc(avctx->extradata_size + 1);
+    if (!avctx->subtitle_header)
         return AVERROR(ENOMEM);
     memcpy(avctx->subtitle_header, avctx->extradata, avctx->extradata_size);
+    avctx->subtitle_header[avctx->extradata_size] = 0;
     avctx->subtitle_header_size = avctx->extradata_size;
+    avctx->priv_data = ff_ass_split(avctx->extradata);
+    if(!avctx->priv_data)
+        return -1;
     return 0;
 }
 
-static int ass_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr,
+static int ass_decode_close(AVCodecContext *avctx)
+{
+    ff_ass_split_free(avctx->priv_data);
+    avctx->priv_data = NULL;
+    return 0;
+}
+
+#if CONFIG_SSA_DECODER
+static int ssa_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr,
                             AVPacket *avpkt)
 {
     const char *ptr = avpkt->data;
     int len, size = avpkt->size;
 
-    ff_ass_init(data);
-
     while (size > 0) {
-        len = ff_ass_add_rect(data, ptr, 0, 0/* FIXME: duration */, 1);
+        int duration;
+        ASSDialog *dialog = ff_ass_split_dialog(avctx->priv_data, ptr, 0, NULL);
+        if (!dialog)
+            return AVERROR_INVALIDDATA;
+        duration = dialog->end - dialog->start;
+        len = ff_ass_add_rect(data, ptr, 0, duration, 1);
         if (len < 0)
             return len;
         ptr  += len;
@@ -56,11 +72,49 @@ static int ass_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr,
     return avpkt->size;
 }
 
-AVCodec ff_ass_decoder = {
-    .name         = "ass",
+AVCodec ff_ssa_decoder = {
+    .name         = "ssa",
     .long_name    = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"),
     .type         = AVMEDIA_TYPE_SUBTITLE,
     .id           = AV_CODEC_ID_SSA,
     .init         = ass_decode_init,
+    .decode       = ssa_decode_frame,
+    .close        = ass_decode_close,
+};
+#endif
+
+#if CONFIG_ASS_DECODER
+static int ass_decode_frame(AVCodecContext *avctx, void *data, int *got_sub_ptr,
+                            AVPacket *avpkt)
+{
+    int ret;
+    AVSubtitle *sub = data;
+    const char *ptr = avpkt->data;
+    static const AVRational ass_tb = {1, 100};
+    const int ts_start    = av_rescale_q(avpkt->pts,      avctx->time_base, ass_tb);
+    const int ts_duration = av_rescale_q(avpkt->duration, avctx->time_base, ass_tb);
+
+    if (avpkt->size <= 0)
+        return avpkt->size;
+
+    ret = ff_ass_add_rect(sub, ptr, ts_start, ts_duration, 2);
+    if (ret < 0) {
+        if (ret == AVERROR_INVALIDDATA)
+            av_log(avctx, AV_LOG_ERROR, "Invalid ASS packet\n");
+        return ret;
+    }
+
+    *got_sub_ptr = avpkt->size > 0;
+    return avpkt->size;
+}
+
+AVCodec ff_ass_decoder = {
+    .name         = "ass",
+    .long_name    = NULL_IF_CONFIG_SMALL("ASS (Advanced SubStation Alpha) subtitle"),
+    .type         = AVMEDIA_TYPE_SUBTITLE,
+    .id           = AV_CODEC_ID_ASS,
+    .init         = ass_decode_init,
     .decode       = ass_decode_frame,
+    .close        = ass_decode_close,
 };
+#endif
diff --git a/libavcodec/assenc.c b/libavcodec/assenc.c
index caf266e..5dc3b09 100644
--- a/libavcodec/assenc.c
+++ b/libavcodec/assenc.c
@@ -2,37 +2,44 @@
  * SSA/ASS encoder
  * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <string.h>
 
 #include "avcodec.h"
+#include "ass_split.h"
+#include "ass.h"
 #include "libavutil/avstring.h"
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 
+typedef struct {
+    int id; ///< current event id, ReadOrder field
+} ASSEncodeContext;
+
 static av_cold int ass_encode_init(AVCodecContext *avctx)
 {
-    avctx->extradata = av_malloc(avctx->subtitle_header_size);
+    avctx->extradata = av_malloc(avctx->subtitle_header_size + 1);
     if (!avctx->extradata)
         return AVERROR(ENOMEM);
     memcpy(avctx->extradata, avctx->subtitle_header, avctx->subtitle_header_size);
     avctx->extradata_size = avctx->subtitle_header_size;
+    avctx->extradata[avctx->extradata_size] = 0;
     return 0;
 }
 
@@ -40,15 +47,54 @@ static int ass_encode_frame(AVCodecContext *avctx,
                             unsigned char *buf, int bufsize,
                             const AVSubtitle *sub)
 {
+    ASSEncodeContext *s = avctx->priv_data;
     int i, len, total_len = 0;
 
     for (i=0; i<sub->num_rects; i++) {
+        char ass_line[2048];
+        const char *ass = sub->rects[i]->ass;
+
         if (sub->rects[i]->type != SUBTITLE_ASS) {
             av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
             return -1;
         }
 
-        len = av_strlcpy(buf+total_len, sub->rects[i]->ass, bufsize-total_len);
+        if (strncmp(ass, "Dialogue: ", 10)) {
+            av_log(avctx, AV_LOG_ERROR, "AVSubtitle rectangle ass \"%s\""
+                   " does not look like a SSA markup\n", ass);
+            return AVERROR_INVALIDDATA;
+        }
+
+        if (avctx->codec->id == AV_CODEC_ID_ASS) {
+            long int layer;
+            char *p;
+
+            if (i > 0) {
+                av_log(avctx, AV_LOG_ERROR, "ASS encoder supports only one "
+                       "ASS rectangle field.\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            ass += 10; // skip "Dialogue: "
+            /* parse Layer field. If it's a Marked field, the content
+             * will be "Marked=N" instead of the layer num, so we will
+             * have layer=0, which is fine. */
+            layer = strtol(ass, &p, 10);
+
+#define SKIP_ENTRY(ptr) do {        \
+    char *sep = strchr(ptr, ',');   \
+    if (sep)                        \
+        ptr = sep + 1;              \
+} while (0)
+
+            SKIP_ENTRY(p); // skip layer or marked
+            SKIP_ENTRY(p); // skip start timestamp
+            SKIP_ENTRY(p); // skip end timestamp
+            snprintf(ass_line, sizeof(ass_line), "%d,%ld,%s", ++s->id, layer, p);
+            ass_line[strcspn(ass_line, "\r\n")] = 0;
+            ass = ass_line;
+        }
+        len = av_strlcpy(buf+total_len, ass, bufsize-total_len);
 
         if (len > bufsize-total_len-1) {
             av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
@@ -61,11 +107,26 @@ static int ass_encode_frame(AVCodecContext *avctx,
     return total_len;
 }
 
-AVCodec ff_ass_encoder = {
-    .name         = "ass",
+#if CONFIG_SSA_ENCODER
+AVCodec ff_ssa_encoder = {
+    .name         = "ssa",
     .long_name    = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"),
     .type         = AVMEDIA_TYPE_SUBTITLE,
     .id           = AV_CODEC_ID_SSA,
     .init         = ass_encode_init,
     .encode_sub   = ass_encode_frame,
+    .priv_data_size = sizeof(ASSEncodeContext),
+};
+#endif
+
+#if CONFIG_ASS_ENCODER
+AVCodec ff_ass_encoder = {
+    .name         = "ass",
+    .long_name    = NULL_IF_CONFIG_SMALL("ASS (Advanced SubStation Alpha) subtitle"),
+    .type         = AVMEDIA_TYPE_SUBTITLE,
+    .id           = AV_CODEC_ID_ASS,
+    .init         = ass_encode_init,
+    .encode_sub   = ass_encode_frame,
+    .priv_data_size = sizeof(ASSEncodeContext),
 };
+#endif
diff --git a/libavcodec/asv.c b/libavcodec/asv.c
index 71c5e5f..e5999c9 100644
--- a/libavcodec/asv.c
+++ b/libavcodec/asv.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/asv.h b/libavcodec/asv.h
index 9ae3737..a0e8fef 100644
--- a/libavcodec/asv.h
+++ b/libavcodec/asv.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/asvdec.c b/libavcodec/asvdec.c
index 252f88a..2329875 100644
--- a/libavcodec/asvdec.c
+++ b/libavcodec/asvdec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,7 +29,6 @@
 #include "asv.h"
 #include "avcodec.h"
 #include "blockdsp.h"
-#include "put_bits.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "mathops.h"
@@ -212,10 +211,8 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame * const p     = data;
     int mb_x, mb_y, ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
     p->pict_type = AV_PICTURE_TYPE_I;
     p->key_frame = 1;
 
@@ -278,8 +275,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     int i;
 
     if (avctx->extradata_size < 1) {
-        av_log(avctx, AV_LOG_ERROR, "No extradata provided\n");
-        return AVERROR_INVALIDDATA;
+        av_log(avctx, AV_LOG_WARNING, "No extradata provided\n");
     }
 
     ff_asv_common_init(avctx);
@@ -289,8 +285,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     ff_init_scantable(a->idsp.idct_permutation, &a->scantable, ff_asv_scantab);
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
 
-    a->inv_qscale = avctx->extradata[0];
-    if (a->inv_qscale == 0) {
+    if (avctx->extradata_size < 1 || (a->inv_qscale = avctx->extradata[0]) == 0) {
         av_log(avctx, AV_LOG_ERROR, "illegal qscale 0\n");
         if (avctx->codec_id == AV_CODEC_ID_ASV1)
             a->inv_qscale = 6;
@@ -317,6 +312,7 @@ static av_cold int decode_end(AVCodecContext *avctx)
     return 0;
 }
 
+#if CONFIG_ASV1_DECODER
 AVCodec ff_asv1_decoder = {
     .name           = "asv1",
     .long_name      = NULL_IF_CONFIG_SMALL("ASUS V1"),
@@ -328,7 +324,9 @@ AVCodec ff_asv1_decoder = {
     .decode         = decode_frame,
     .capabilities   = CODEC_CAP_DR1,
 };
+#endif
 
+#if CONFIG_ASV2_DECODER
 AVCodec ff_asv2_decoder = {
     .name           = "asv2",
     .long_name      = NULL_IF_CONFIG_SMALL("ASUS V2"),
@@ -340,4 +338,5 @@ AVCodec ff_asv2_decoder = {
     .decode         = decode_frame,
     .capabilities   = CODEC_CAP_DR1,
 };
+#endif
 
diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c
index 9944ffa..02cf2db 100644
--- a/libavcodec/asvenc.c
+++ b/libavcodec/asvenc.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,6 +29,7 @@
 #include "asv.h"
 #include "avcodec.h"
 #include "fdctdsp.h"
+#include "internal.h"
 #include "mathops.h"
 #include "mpeg12data.h"
 
@@ -115,7 +116,7 @@ static inline void asv2_encode_block(ASV1Context *a, int16_t block[64]){
         if( (block[index + 1] = (block[index + 1]*a->q_intra_matrix[index + 1] + (1<<15))>>16) ) ccp |= 2;
         if( (block[index + 9] = (block[index + 9]*a->q_intra_matrix[index + 9] + (1<<15))>>16) ) ccp |= 1;
 
-        assert(i || ccp<8);
+        av_assert2(i || ccp<8);
         if(i) put_bits(&a->pb, ff_asv_ac_ccp_tab[ccp][1], ff_asv_ac_ccp_tab[ccp][0]);
         else  put_bits(&a->pb, ff_asv_dc_ccp_tab[ccp][1], ff_asv_dc_ccp_tab[ccp][0]);
 
@@ -181,13 +182,52 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int size, ret;
     int mb_x, mb_y;
 
-    if (!pkt->data &&
-        (ret = av_new_packet(pkt, a->mb_height*a->mb_width*MAX_MB_SIZE +
-                                  FF_MIN_BUFFER_SIZE)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if (pict->width % 16 || pict->height % 16) {
+        AVFrame *clone = av_frame_alloc();
+        int i;
+
+        if (!clone)
+            return AVERROR(ENOMEM);
+        clone->format = pict->format;
+        clone->width  = FFALIGN(pict->width, 16);
+        clone->height = FFALIGN(pict->height, 16);
+        ret = av_frame_get_buffer(clone, 32);
+        if (ret < 0) {
+            av_frame_free(&clone);
+            return ret;
+        }
+
+        ret = av_frame_copy(clone, pict);
+        if (ret < 0) {
+            av_frame_free(&clone);
+            return ret;
+        }
+
+        for (i = 0; i<3; i++) {
+            int x, y;
+            int w  = FF_CEIL_RSHIFT(pict->width, !!i);
+            int h  = FF_CEIL_RSHIFT(pict->height, !!i);
+            int w2 = FF_CEIL_RSHIFT(clone->width, !!i);
+            int h2 = FF_CEIL_RSHIFT(clone->height, !!i);
+            for (y=0; y<h; y++)
+                for (x=w; x<w2; x++)
+                    clone->data[i][x + y*clone->linesize[i]] =
+                        clone->data[i][w - 1 + y*clone->linesize[i]];
+            for (y=h; y<h2; y++)
+                for (x=0; x<w2; x++)
+                    clone->data[i][x + y*clone->linesize[i]] =
+                        clone->data[i][x + (h-1)*clone->linesize[i]];
+        }
+        ret = encode_frame(avctx, pkt, clone, got_packet);
+
+        av_frame_free(&clone);
         return ret;
     }
 
+    if ((ret = ff_alloc_packet2(avctx, pkt, a->mb_height*a->mb_width*MAX_MB_SIZE +
+                                  FF_MIN_BUFFER_SIZE)) < 0)
+        return ret;
+
     init_put_bits(&a->pb, pkt->data, pkt->size);
 
     for(mb_y=0; mb_y<a->mb_height2; mb_y++){
@@ -241,17 +281,11 @@ static av_cold int encode_init(AVCodecContext *avctx){
     int i;
     const int scale= avctx->codec_id == AV_CODEC_ID_ASV1 ? 1 : 2;
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-    avctx->coded_frame->key_frame = 1;
-
     ff_asv_common_init(avctx);
     ff_fdctdsp_init(&a->fdsp, avctx);
     ff_pixblockdsp_init(&a->pdsp, avctx);
 
-    if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
+    if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
 
     a->inv_qscale= (32*scale*FF_QUALITY_SCALE +  avctx->global_quality/2) / avctx->global_quality;
 
@@ -267,12 +301,6 @@ static av_cold int encode_init(AVCodecContext *avctx){
 
     return 0;
 }
-static av_cold int asv_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-
-    return 0;
-}
 
 #if CONFIG_ASV1_ENCODER
 AVCodec ff_asv1_encoder = {
@@ -283,7 +311,6 @@ AVCodec ff_asv1_encoder = {
     .priv_data_size = sizeof(ASV1Context),
     .init           = encode_init,
     .encode2        = encode_frame,
-    .close          = asv_encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P,
                                                     AV_PIX_FMT_NONE },
 };
@@ -298,7 +325,6 @@ AVCodec ff_asv2_encoder = {
     .priv_data_size = sizeof(ASV1Context),
     .init           = encode_init,
     .encode2        = encode_frame,
-    .close          = asv_encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P,
                                                     AV_PIX_FMT_NONE },
 };
diff --git a/libavcodec/atrac.c b/libavcodec/atrac.c
index f36db9e..12e8997 100644
--- a/libavcodec/atrac.c
+++ b/libavcodec/atrac.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2006-2013 Maxim Poliakovski
  * Copyright (c) 2006-2008 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -124,7 +124,8 @@ void ff_atrac_gain_compensation(AtracGCContext *gctx, float *in, float *prev,
     memcpy(prev, &in[num_samples], num_samples * sizeof(float));
 }
 
-void ff_atrac_iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float *delayBuf, float *temp)
+void ff_atrac_iqmf(float *inlo, float *inhi, unsigned int nIn, float *pOut,
+                   float *delayBuf, float *temp)
 {
     int   i, j;
     float   *p1, *p3;
diff --git a/libavcodec/atrac.h b/libavcodec/atrac.h
index 8909323..05208bb 100644
--- a/libavcodec/atrac.h
+++ b/libavcodec/atrac.h
@@ -4,20 +4,20 @@
  * Copyright (c) 2009-2013 Maxim Poliakovski
  * Copyright (c) 2009 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -91,6 +91,7 @@ void ff_atrac_gain_compensation(AtracGCContext *gctx, float *in, float *prev,
  * @param delayBuf  delayBuf buffer
  * @param temp      temp buffer
  */
-void ff_atrac_iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float *delayBuf, float *temp);
+void ff_atrac_iqmf(float *inlo, float *inhi, unsigned int nIn, float *pOut,
+                   float *delayBuf, float *temp);
 
 #endif /* AVCODEC_ATRAC_H */
diff --git a/libavcodec/atrac1.c b/libavcodec/atrac1.c
index 9a89785..d059d75 100644
--- a/libavcodec/atrac1.c
+++ b/libavcodec/atrac1.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2009 Maxim Poliakovski
  * Copyright (c) 2009 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -242,7 +242,7 @@ static int at1_unpack_dequant(GetBitContext* gb, AT1SUCtx* su,
                      */
                     spec[pos+i] = get_sbits(gb, word_len) * scale_factor * max_quant;
                 }
-            } else { /* word_len = 0 -> empty BFU, zero all specs in the emty BFU */
+            } else { /* word_len = 0 -> empty BFU, zero all specs in the empty BFU */
                 memset(&spec[pos], 0, num_specs * sizeof(float));
             }
         }
@@ -287,10 +287,8 @@ static int atrac1_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = AT1_SU_SAMPLES;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     for (ch = 0; ch < avctx->channels; ch++) {
         AT1SUCtx* su = &q->SUs[ch];
@@ -343,6 +341,11 @@ static av_cold int atrac1_decode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
+    if (avctx->block_align <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported block align.");
+        return AVERROR_PATCHWELCOME;
+    }
+
     /* Init the mdct transforms */
     if ((ret = ff_mdct_init(&q->mdct_ctx[0], 6, 1, -1.0/ (1 << 15))) ||
         (ret = ff_mdct_init(&q->mdct_ctx[1], 8, 1, -1.0/ (1 << 15))) ||
diff --git a/libavcodec/atrac1data.h b/libavcodec/atrac1data.h
index d4b8cd0..62c218b 100644
--- a/libavcodec/atrac1data.h
+++ b/libavcodec/atrac1data.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2009 Maxim Poliakovski
  * Copyright (c) 2009 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,7 +43,7 @@ static const uint8_t bfu_bands_t[4]  = {0, 20, 36, 52};
  */
 static const uint8_t specs_per_bfu[52] = {
      8,  8,  8,  8,  4,  4,  4,  4,  8,  8,  8,  8,  6,  6,  6,  6, 6, 6, 6, 6, // low band
-     6,  6,  6,  6,  7,  7,  7,  7,  9,  9,  9,  9, 10, 10, 10, 10,             // midle band
+     6,  6,  6,  6,  7,  7,  7,  7,  9,  9,  9,  9, 10, 10, 10, 10,             // middle band
     12, 12, 12, 12, 12, 12, 12, 12, 20, 20, 20, 20, 20, 20, 20, 20              // high band
 };
 
diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c
index 76fd0d1..435f1ac 100644
--- a/libavcodec/atrac3.c
+++ b/libavcodec/atrac3.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2006-2008 Maxim Poliakovski
  * Copyright (c) 2006-2008 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,6 +38,7 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/float_dsp.h"
+#include "libavutil/libm.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "fft.h"
@@ -105,8 +106,8 @@ typedef struct ATRAC3Context {
     int scrambled_stream;
     //@}
 
-    AtracGCContext  gainc_ctx;
-    FFTContext mdct_ctx;
+    AtracGCContext    gainc_ctx;
+    FFTContext        mdct_ctx;
     FmtConvertContext fmt_conv;
     AVFloatDSPContext fdsp;
 } ATRAC3Context;
@@ -409,17 +410,17 @@ static int decode_tonal_components(GetBitContext *gb,
 static int decode_gain_control(GetBitContext *gb, GainBlock *block,
                                int num_bands)
 {
-    int i, j;
+    int b, j;
     int *level, *loc;
 
     AtracGainInfo *gain = block->g_block;
 
-    for (i = 0; i <= num_bands; i++) {
-        gain[i].num_points    = get_bits(gb, 3);
-        level                 = gain[i].lev_code;
-        loc                   = gain[i].loc_code;
+    for (b = 0; b <= num_bands; b++) {
+        gain[b].num_points = get_bits(gb, 3);
+        level              = gain[b].lev_code;
+        loc                = gain[b].loc_code;
 
-        for (j = 0; j < gain[i].num_points; j++) {
+        for (j = 0; j < gain[b].num_points; j++) {
             level[j] = get_bits(gb, 4);
             loc[j]   = get_bits(gb, 5);
             if (j && loc[j] <= loc[j - 1])
@@ -428,8 +429,8 @@ static int decode_gain_control(GetBitContext *gb, GainBlock *block,
     }
 
     /* Clear the unused blocks. */
-    for (; i < 4 ; i++)
-        gain[i].num_points = 0;
+    for (; b < 4 ; b++)
+        gain[b].num_points = 0;
 
     return 0;
 }
@@ -520,7 +521,7 @@ static void reverse_matrixing(float *su1, float *su2, int *prev_code,
             }
             break;
         default:
-            assert(0);
+            av_assert1(0);
         }
     }
 }
@@ -675,7 +676,7 @@ static int decode_frame(AVCodecContext *avctx, const uint8_t *databuf,
 
 
         /* set the bitstream reader at the start of the second Sound Unit*/
-        init_get_bits(&q->gb, ptr1, (avctx->block_align - i) * 8);
+        init_get_bits8(&q->gb, ptr1, q->decoded_bytes_buffer + avctx->block_align - ptr1);
 
         /* Fill the Weighting coeffs delay buffer */
         memmove(q->weighting_delay, &q->weighting_delay[2],
@@ -749,10 +750,8 @@ static int atrac3_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = SAMPLES_PER_FRAME;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     /* Check if we need to descramble and what buffer to pass on. */
     if (q->scrambled_stream) {
@@ -773,7 +772,7 @@ static int atrac3_decode_frame(AVCodecContext *avctx, void *data,
     return avctx->block_align;
 }
 
-static av_cold void atrac3_init_static_data(AVCodec *codec)
+static av_cold void atrac3_init_static_data(void)
 {
     int i;
 
@@ -793,6 +792,7 @@ static av_cold void atrac3_init_static_data(AVCodec *codec)
 
 static av_cold int atrac3_decode_init(AVCodecContext *avctx)
 {
+    static int static_init_done;
     int i, ret;
     int version, delay, samples_per_frame, frame_factor;
     const uint8_t *edata_ptr = avctx->extradata;
@@ -803,6 +803,10 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
+    if (!static_init_done)
+        atrac3_init_static_data();
+    static_init_done = 1;
+
     /* Take care of the codec-specific extradata. */
     if (avctx->extradata_size == 14) {
         /* Parse the extradata, WAV format */
@@ -831,7 +835,7 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
                    avctx->channels, frame_factor);
             return AVERROR_INVALIDDATA;
         }
-    } else if (avctx->extradata_size == 10) {
+    } else if (avctx->extradata_size == 12 || avctx->extradata_size == 10) {
         /* Parse the extradata, RM format. */
         version                = bytestream_get_be32(&edata_ptr);
         samples_per_frame      = bytestream_get_be16(&edata_ptr);
@@ -868,8 +872,10 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
     if (q->coding_mode == STEREO)
         av_log(avctx, AV_LOG_DEBUG, "Normal stereo detected.\n");
     else if (q->coding_mode == JOINT_STEREO) {
-        if (avctx->channels != 2)
+        if (avctx->channels != 2) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid coding mode\n");
             return AVERROR_INVALIDDATA;
+        }
         av_log(avctx, AV_LOG_DEBUG, "Joint stereo detected.\n");
     } else {
         av_log(avctx, AV_LOG_ERROR, "Unknown channel coding mode %x!\n",
@@ -912,7 +918,7 @@ static av_cold int atrac3_decode_init(AVCodecContext *avctx)
     avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
     ff_fmt_convert_init(&q->fmt_conv, avctx);
 
-    q->units = av_mallocz(sizeof(*q->units) * avctx->channels);
+    q->units = av_mallocz_array(avctx->channels, sizeof(*q->units));
     if (!q->units) {
         atrac3_decode_close(avctx);
         return AVERROR(ENOMEM);
@@ -928,7 +934,6 @@ AVCodec ff_atrac3_decoder = {
     .id               = AV_CODEC_ID_ATRAC3,
     .priv_data_size   = sizeof(ATRAC3Context),
     .init             = atrac3_decode_init,
-    .init_static_data = atrac3_init_static_data,
     .close            = atrac3_decode_close,
     .decode           = atrac3_decode_frame,
     .capabilities     = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
diff --git a/libavcodec/atrac3data.h b/libavcodec/atrac3data.h
index 4f5c122..5d91274 100644
--- a/libavcodec/atrac3data.h
+++ b/libavcodec/atrac3data.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2006-2007 Maxim Poliakovski
  * Copyright (c) 2006-2007 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/atrac3plus.c b/libavcodec/atrac3plus.c
index f337fab..08c90cd 100644
--- a/libavcodec/atrac3plus.c
+++ b/libavcodec/atrac3plus.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2013 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -80,7 +80,7 @@ static av_cold void build_canonical_huff(const uint8_t *cb, const uint8_t *xlat,
     *tab_offset += 1 << max_len;
 }
 
-av_cold void ff_atrac3p_init_vlcs(AVCodec *codec)
+av_cold void ff_atrac3p_init_vlcs(void)
 {
     int i, wl_vlc_offs, ct_vlc_offs, sf_vlc_offs, tab_offset;
 
diff --git a/libavcodec/atrac3plus.h b/libavcodec/atrac3plus.h
index e56c444..1b001fa 100644
--- a/libavcodec/atrac3plus.h
+++ b/libavcodec/atrac3plus.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2013 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -155,10 +155,8 @@ typedef struct Atrac3pChanUnitCtx {
 
 /**
  * Initialize VLC tables for bitstream parsing.
- *
- * @param[in]   codec    ptr to the AVCodec
  */
-void ff_atrac3p_init_vlcs(AVCodec *codec);
+void ff_atrac3p_init_vlcs(void);
 
 /**
  * Decode bitstream data of a channel unit.
@@ -169,8 +167,8 @@ void ff_atrac3p_init_vlcs(AVCodec *codec);
  * @param[in]     avctx         ptr to the AVCodecContext
  * @return result code: 0 = OK, otherwise - error code
  */
-int  ff_atrac3p_decode_channel_unit(GetBitContext *gb, Atrac3pChanUnitCtx *ctx,
-                                    int num_channels, AVCodecContext *avctx);
+int ff_atrac3p_decode_channel_unit(GetBitContext *gb, Atrac3pChanUnitCtx *ctx,
+                                   int num_channels, AVCodecContext *avctx);
 
 /**
  * Initialize IMDCT transform.
diff --git a/libavcodec/atrac3plus_data.h b/libavcodec/atrac3plus_data.h
index 5026a59..2a107ee 100644
--- a/libavcodec/atrac3plus_data.h
+++ b/libavcodec/atrac3plus_data.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2013 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/atrac3plusdec.c b/libavcodec/atrac3plusdec.c
index ddbfb53..3a6b3cf 100644
--- a/libavcodec/atrac3plusdec.c
+++ b/libavcodec/atrac3plusdec.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2013 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -148,6 +148,8 @@ static av_cold int atrac3p_decode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
+    ff_atrac3p_init_vlcs();
+
     avpriv_float_dsp_init(&ctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 
     /* initialize IPQF */
@@ -164,8 +166,8 @@ static av_cold int atrac3p_decode_init(AVCodecContext *avctx)
 
     ctx->my_channel_layout = avctx->channel_layout;
 
-    ctx->ch_units = av_mallocz(sizeof(*ctx->ch_units) *
-                               ctx->num_channel_blocks);
+    ctx->ch_units = av_mallocz_array(ctx->num_channel_blocks, sizeof(*ctx->ch_units));
+
     if (!ctx->ch_units) {
         atrac3p_decode_close(avctx);
         return AVERROR(ENOMEM);
@@ -383,13 +385,12 @@ static int atrac3p_decode_frame(AVCodecContext *avctx, void *data,
 }
 
 AVCodec ff_atrac3p_decoder = {
-    .name             = "atrac3plus",
-    .long_name        = NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform Acoustic Coding 3+)"),
-    .type             = AVMEDIA_TYPE_AUDIO,
-    .id               = AV_CODEC_ID_ATRAC3P,
-    .priv_data_size   = sizeof(ATRAC3PContext),
-    .init             = atrac3p_decode_init,
-    .init_static_data = ff_atrac3p_init_vlcs,
-    .close            = atrac3p_decode_close,
-    .decode           = atrac3p_decode_frame,
+    .name           = "atrac3plus",
+    .long_name      = NULL_IF_CONFIG_SMALL("ATRAC3+ (Adaptive TRansform Acoustic Coding 3+)"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_ATRAC3P,
+    .priv_data_size = sizeof(ATRAC3PContext),
+    .init           = atrac3p_decode_init,
+    .close          = atrac3p_decode_close,
+    .decode         = atrac3p_decode_frame,
 };
diff --git a/libavcodec/atrac3plusdsp.c b/libavcodec/atrac3plusdsp.c
index 468f098..3522af1 100644
--- a/libavcodec/atrac3plusdsp.c
+++ b/libavcodec/atrac3plusdsp.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2013 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/audio_frame_queue.c b/libavcodec/audio_frame_queue.c
index 0a8b25c..1220345 100644
--- a/libavcodec/audio_frame_queue.c
+++ b/libavcodec/audio_frame_queue.c
@@ -2,110 +2,72 @@
  * Audio Frame Queue
  * Copyright (c) 2012 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
-#include "libavutil/mathematics.h"
-#include "internal.h"
 #include "audio_frame_queue.h"
+#include "internal.h"
+#include "libavutil/avassert.h"
 
 av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
 {
-    afq->avctx             = avctx;
-    afq->next_pts          = AV_NOPTS_VALUE;
+    afq->avctx = avctx;
     afq->remaining_delay   = avctx->delay;
     afq->remaining_samples = avctx->delay;
-    afq->frame_queue       = NULL;
-}
-
-static void delete_next_frame(AudioFrameQueue *afq)
-{
-    AudioFrame *f = afq->frame_queue;
-    if (f) {
-        afq->frame_queue = f->next;
-        f->next = NULL;
-        av_freep(&f);
-    }
+    afq->frame_count       = 0;
 }
 
 void ff_af_queue_close(AudioFrameQueue *afq)
 {
-    /* remove/free any remaining frames */
-    while (afq->frame_queue)
-        delete_next_frame(afq);
+    if(afq->frame_count)
+        av_log(afq->avctx, AV_LOG_WARNING, "%d frames left in the queue on closing\n", afq->frame_count);
+    av_freep(&afq->frames);
     memset(afq, 0, sizeof(*afq));
 }
 
-#ifdef DEBUG
-static void af_queue_log_state(AudioFrameQueue *afq)
-{
-    AudioFrame *f;
-    av_dlog(afq->avctx, "remaining delay   = %d\n", afq->remaining_delay);
-    av_dlog(afq->avctx, "remaining samples = %d\n", afq->remaining_samples);
-    av_dlog(afq->avctx, "frames:\n");
-    f = afq->frame_queue;
-    while (f) {
-        av_dlog(afq->avctx, "  [ pts=%9"PRId64" duration=%d ]\n",
-                f->pts, f->duration);
-        f = f->next;
-    }
-}
-#endif /* DEBUG */
-
 int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
 {
-    AudioFrame *new_frame;
-    AudioFrame *queue_end = afq->frame_queue;
-
-    /* find the end of the queue */
-    while (queue_end && queue_end->next)
-        queue_end = queue_end->next;
-
-    /* allocate new frame queue entry */
-    if (!(new_frame = av_malloc(sizeof(*new_frame))))
+    AudioFrame *new = av_fast_realloc(afq->frames, &afq->frame_alloc, sizeof(*afq->frames)*(afq->frame_count+1));
+    if(!new)
         return AVERROR(ENOMEM);
+    afq->frames = new;
+    new += afq->frame_count;
 
     /* get frame parameters */
-    new_frame->next = NULL;
-    new_frame->duration = f->nb_samples;
+    new->duration = f->nb_samples;
+    new->duration += afq->remaining_delay;
     if (f->pts != AV_NOPTS_VALUE) {
-        new_frame->pts = av_rescale_q(f->pts,
+        new->pts = av_rescale_q(f->pts,
                                       afq->avctx->time_base,
                                       (AVRational){ 1, afq->avctx->sample_rate });
-        afq->next_pts = new_frame->pts + new_frame->duration;
+        new->pts -= afq->remaining_delay;
+        if(afq->frame_count && new[-1].pts >= new->pts)
+            av_log(afq->avctx, AV_LOG_WARNING, "Queue input is backward in time\n");
     } else {
-        new_frame->pts = AV_NOPTS_VALUE;
-        afq->next_pts  = AV_NOPTS_VALUE;
+        new->pts = AV_NOPTS_VALUE;
     }
-
-    /* add new frame to the end of the queue */
-    if (!queue_end)
-        afq->frame_queue = new_frame;
-    else
-        queue_end->next = new_frame;
+    afq->remaining_delay = 0;
 
     /* add frame sample count */
     afq->remaining_samples += f->nb_samples;
 
-#ifdef DEBUG
-    af_queue_log_state(afq);
-#endif
+    afq->frame_count++;
 
     return 0;
 }
@@ -115,50 +77,37 @@ void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts,
 {
     int64_t out_pts = AV_NOPTS_VALUE;
     int removed_samples = 0;
+    int i;
 
-#ifdef DEBUG
-    af_queue_log_state(afq);
-#endif
-
-    /* get output pts from the next frame or generated pts */
-    if (afq->frame_queue) {
-        if (afq->frame_queue->pts != AV_NOPTS_VALUE)
-            out_pts = afq->frame_queue->pts - afq->remaining_delay;
-    } else {
-        if (afq->next_pts != AV_NOPTS_VALUE)
-            out_pts = afq->next_pts - afq->remaining_delay;
+    if (afq->frame_count || afq->frame_alloc) {
+        if (afq->frames->pts != AV_NOPTS_VALUE)
+            out_pts = afq->frames->pts;
     }
-    if (pts) {
-        if (out_pts != AV_NOPTS_VALUE)
-            *pts = ff_samples_to_time_base(afq->avctx, out_pts);
-        else
-            *pts = AV_NOPTS_VALUE;
-    }
-
-    /* if the delay is larger than the packet duration, we use up delay samples
-       for the output packet and leave all frames in the queue */
-    if (afq->remaining_delay >= nb_samples) {
-        removed_samples      += nb_samples;
-        afq->remaining_delay -= nb_samples;
-    }
-    /* remove frames from the queue until we have enough to cover the
-       requested number of samples or until the queue is empty */
-    while (removed_samples < nb_samples && afq->frame_queue) {
-        removed_samples += afq->frame_queue->duration;
-        delete_next_frame(afq);
+    if(!afq->frame_count)
+        av_log(afq->avctx, AV_LOG_WARNING, "Trying to remove %d samples, but the queue is empty\n", nb_samples);
+    if (pts)
+        *pts = ff_samples_to_time_base(afq->avctx, out_pts);
+
+    for(i=0; nb_samples && i<afq->frame_count; i++){
+        int n= FFMIN(afq->frames[i].duration, nb_samples);
+        afq->frames[i].duration -= n;
+        nb_samples              -= n;
+        removed_samples         += n;
+        if(afq->frames[i].pts != AV_NOPTS_VALUE)
+            afq->frames[i].pts      += n;
     }
     afq->remaining_samples -= removed_samples;
-
-    /* if there are no frames left and we have room for more samples, use
-       any remaining delay samples */
-    if (removed_samples < nb_samples && afq->remaining_samples > 0) {
-        int add_samples = FFMIN(afq->remaining_samples,
-                                nb_samples - removed_samples);
-        removed_samples        += add_samples;
-        afq->remaining_samples -= add_samples;
+    i -= i && afq->frames[i-1].duration;
+    memmove(afq->frames, afq->frames + i, sizeof(*afq->frames) * (afq->frame_count - i));
+    afq->frame_count -= i;
+
+    if(nb_samples){
+        av_assert0(!afq->frame_count);
+        av_assert0(afq->remaining_samples == afq->remaining_delay);
+        if(afq->frames && afq->frames[0].pts != AV_NOPTS_VALUE)
+            afq->frames[0].pts += nb_samples;
+        av_log(afq->avctx, AV_LOG_DEBUG, "Trying to remove %d more samples than there are in the queue\n", nb_samples);
     }
-    if (removed_samples > nb_samples)
-        av_log(afq->avctx, AV_LOG_WARNING, "frame_size is too large\n");
     if (duration)
         *duration = ff_samples_to_time_base(afq->avctx, removed_samples);
 }
diff --git a/libavcodec/audio_frame_queue.h b/libavcodec/audio_frame_queue.h
index 4a29770..2e317bb 100644
--- a/libavcodec/audio_frame_queue.h
+++ b/libavcodec/audio_frame_queue.h
@@ -2,20 +2,20 @@
  * Audio Frame Queue
  * Copyright (c) 2012 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,15 +27,15 @@
 typedef struct AudioFrame {
     int64_t pts;
     int duration;
-    struct AudioFrame *next;
 } AudioFrame;
 
 typedef struct AudioFrameQueue {
     AVCodecContext *avctx;
-    int64_t next_pts;
     int remaining_delay;
     int remaining_samples;
-    AudioFrame *frame_queue;
+    AudioFrame *frames;
+    unsigned frame_count;
+    unsigned frame_alloc;
 } AudioFrameQueue;
 
 /**
diff --git a/libavcodec/audioconvert.c b/libavcodec/audioconvert.c
new file mode 100644
index 0000000..5e46fae
--- /dev/null
+++ b/libavcodec/audioconvert.c
@@ -0,0 +1,120 @@
+/*
+ * audio conversion
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio conversion
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/libm.h"
+#include "libavutil/samplefmt.h"
+#include "avcodec.h"
+#include "audioconvert.h"
+
+#if FF_API_AUDIO_CONVERT
+
+struct AVAudioConvert {
+    int in_channels, out_channels;
+    int fmt_pair;
+};
+
+AVAudioConvert *av_audio_convert_alloc(enum AVSampleFormat out_fmt, int out_channels,
+                                       enum AVSampleFormat in_fmt, int in_channels,
+                                       const float *matrix, int flags)
+{
+    AVAudioConvert *ctx;
+    if (in_channels!=out_channels)
+        return NULL;  /* FIXME: not supported */
+    ctx = av_malloc(sizeof(AVAudioConvert));
+    if (!ctx)
+        return NULL;
+    ctx->in_channels = in_channels;
+    ctx->out_channels = out_channels;
+    ctx->fmt_pair = out_fmt + AV_SAMPLE_FMT_NB*in_fmt;
+    return ctx;
+}
+
+void av_audio_convert_free(AVAudioConvert *ctx)
+{
+    av_free(ctx);
+}
+
+int av_audio_convert(AVAudioConvert *ctx,
+                           void * const out[6], const int out_stride[6],
+                     const void * const  in[6], const int  in_stride[6], int len)
+{
+    int ch;
+
+    //FIXME optimize common cases
+
+    for(ch=0; ch<ctx->out_channels; ch++){
+        const int is=  in_stride[ch];
+        const int os= out_stride[ch];
+        const uint8_t *pi=  in[ch];
+        uint8_t *po= out[ch];
+        uint8_t *end= po + os*len;
+        if(!out[ch])
+            continue;
+
+#define CONV(ofmt, otype, ifmt, expr)\
+if(ctx->fmt_pair == ofmt + AV_SAMPLE_FMT_NB*ifmt){\
+    do{\
+        *(otype*)po = expr; pi += is; po += os;\
+    }while(po < end);\
+}
+
+//FIXME put things below under ifdefs so we do not waste space for cases no codec will need
+//FIXME rounding ?
+
+             CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const uint8_t*)pi)
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)<<8)
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)<<24)
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
+        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80)
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi)
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi<<16)
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
+        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80)
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi>>16)
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi)
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
+        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float*)pi * (1U<<31))))
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, *(const float*)pi)
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const float*)pi)
+        else CONV(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(  lrint(*(const double*)pi * (1<<7)) + 0x80))
+        else CONV(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(  lrint(*(const double*)pi * (1<<15))))
+        else CONV(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double*)pi * (1U<<31))))
+        else CONV(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const double*)pi)
+        else CONV(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const double*)pi)
+        else return -1;
+    }
+    return 0;
+}
+
+#endif /* FF_API_AUDIO_CONVERT */
diff --git a/libavcodec/audioconvert.h b/libavcodec/audioconvert.h
new file mode 100644
index 0000000..556ab31
--- /dev/null
+++ b/libavcodec/audioconvert.h
@@ -0,0 +1,84 @@
+/*
+ * audio conversion
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2008 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AUDIOCONVERT_H
+#define AVCODEC_AUDIOCONVERT_H
+
+/**
+ * @file
+ * Audio format conversion routines
+ * This interface is deprecated and will be dropped in a future
+ * version. You should use the libswresample library instead.
+ */
+
+#if FF_API_AUDIO_CONVERT
+
+#include "libavutil/cpu.h"
+#include "avcodec.h"
+#include "libavutil/channel_layout.h"
+
+struct AVAudioConvert;
+typedef struct AVAudioConvert AVAudioConvert;
+
+/**
+ * Create an audio sample format converter context
+ * @param out_fmt Output sample format
+ * @param out_channels Number of output channels
+ * @param in_fmt Input sample format
+ * @param in_channels Number of input channels
+ * @param[in] matrix Channel mixing matrix (of dimension in_channel*out_channels). Set to NULL to ignore.
+ * @param flags See AV_CPU_FLAG_xx
+ * @return NULL on error
+ * @deprecated See libswresample
+ */
+
+attribute_deprecated
+AVAudioConvert *av_audio_convert_alloc(enum AVSampleFormat out_fmt, int out_channels,
+                                       enum AVSampleFormat in_fmt, int in_channels,
+                                       const float *matrix, int flags);
+
+/**
+ * Free audio sample format converter context
+ * @deprecated See libswresample
+ */
+
+attribute_deprecated
+void av_audio_convert_free(AVAudioConvert *ctx);
+
+/**
+ * Convert between audio sample formats
+ * @param[in] out array of output buffers for each channel. set to NULL to ignore processing of the given channel.
+ * @param[in] out_stride distance between consecutive output samples (measured in bytes)
+ * @param[in] in array of input buffers for each channel
+ * @param[in] in_stride distance between consecutive input samples (measured in bytes)
+ * @param len length of audio frame size (measured in samples)
+ * @deprecated See libswresample
+ */
+
+attribute_deprecated
+int av_audio_convert(AVAudioConvert *ctx,
+                           void * const out[6], const int out_stride[6],
+                     const void * const  in[6], const int  in_stride[6], int len);
+
+#endif /* FF_API_AUDIO_CONVERT */
+
+#endif /* AVCODEC_AUDIOCONVERT_H */
diff --git a/libavcodec/audiodsp.c b/libavcodec/audiodsp.c
index f7e6167..85b5a74 100644
--- a/libavcodec/audiodsp.c
+++ b/libavcodec/audiodsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/audiodsp.h b/libavcodec/audiodsp.h
index 58205a1..b55bf85 100644
--- a/libavcodec/audiodsp.h
+++ b/libavcodec/audiodsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/aura.c b/libavcodec/aura.c
index a32c18b..8d0f16a 100644
--- a/libavcodec/aura.c
+++ b/libavcodec/aura.c
@@ -1,20 +1,20 @@
 /*
  * Aura 2 decoder
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,10 +59,8 @@ static int aura_decode_frame(AVCodecContext *avctx,
     /* pixel data starts 48 bytes in, after 3x16-byte tables */
     buf += 48;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     Y = frame->data[0];
     U = frame->data[1];
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 3569ac1..efe94ee 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,6 +33,7 @@
 #include "libavutil/avutil.h"
 #include "libavutil/buffer.h"
 #include "libavutil/cpu.h"
+#include "libavutil/channel_layout.h"
 #include "libavutil/dict.h"
 #include "libavutil/frame.h"
 #include "libavutil/log.h"
@@ -97,7 +98,8 @@
  *
  * If you add a codec ID to this list, add it so that
  * 1. no value of a existing codec ID changes (that would break ABI),
- * 2. it is as close as possible to similar codecs.
+ * 2. Give it a value which when taken as ASCII is recognized uniquely by a human as this specific codec.
+ *    This ensures that 2 forks can independently add AVCodecIDs without producing conflicts.
  *
  * After adding new codec IDs, do not forget to add an entry to the codec
  * descriptor list and bump libavcodec minor version.
@@ -278,21 +280,50 @@ enum AVCodecID {
     AV_CODEC_ID_MSS2,
     AV_CODEC_ID_VP9,
     AV_CODEC_ID_AIC,
-    AV_CODEC_ID_ESCAPE130,
-    AV_CODEC_ID_G2M,
-    AV_CODEC_ID_WEBP,
+    AV_CODEC_ID_ESCAPE130_DEPRECATED,
+    AV_CODEC_ID_G2M_DEPRECATED,
+    AV_CODEC_ID_WEBP_DEPRECATED,
     AV_CODEC_ID_HNM4_VIDEO,
-    AV_CODEC_ID_HEVC,
+    AV_CODEC_ID_HEVC_DEPRECATED,
     AV_CODEC_ID_FIC,
     AV_CODEC_ID_ALIAS_PIX,
-    AV_CODEC_ID_BRENDER_PIX,
-    AV_CODEC_ID_PAF_VIDEO,
-    AV_CODEC_ID_EXR,
-    AV_CODEC_ID_VP7,
-    AV_CODEC_ID_SANM,
-    AV_CODEC_ID_SGIRLE,
-    AV_CODEC_ID_MVC1,
-    AV_CODEC_ID_MVC2,
+    AV_CODEC_ID_BRENDER_PIX_DEPRECATED,
+    AV_CODEC_ID_PAF_VIDEO_DEPRECATED,
+    AV_CODEC_ID_EXR_DEPRECATED,
+    AV_CODEC_ID_VP7_DEPRECATED,
+    AV_CODEC_ID_SANM_DEPRECATED,
+    AV_CODEC_ID_SGIRLE_DEPRECATED,
+    AV_CODEC_ID_MVC1_DEPRECATED,
+    AV_CODEC_ID_MVC2_DEPRECATED,
+
+    AV_CODEC_ID_BRENDER_PIX= MKBETAG('B','P','I','X'),
+    AV_CODEC_ID_Y41P       = MKBETAG('Y','4','1','P'),
+    AV_CODEC_ID_ESCAPE130  = MKBETAG('E','1','3','0'),
+    AV_CODEC_ID_EXR        = MKBETAG('0','E','X','R'),
+    AV_CODEC_ID_AVRP       = MKBETAG('A','V','R','P'),
+
+    AV_CODEC_ID_012V       = MKBETAG('0','1','2','V'),
+    AV_CODEC_ID_G2M        = MKBETAG( 0 ,'G','2','M'),
+    AV_CODEC_ID_AVUI       = MKBETAG('A','V','U','I'),
+    AV_CODEC_ID_AYUV       = MKBETAG('A','Y','U','V'),
+    AV_CODEC_ID_TARGA_Y216 = MKBETAG('T','2','1','6'),
+    AV_CODEC_ID_V308       = MKBETAG('V','3','0','8'),
+    AV_CODEC_ID_V408       = MKBETAG('V','4','0','8'),
+    AV_CODEC_ID_YUV4       = MKBETAG('Y','U','V','4'),
+    AV_CODEC_ID_SANM       = MKBETAG('S','A','N','M'),
+    AV_CODEC_ID_PAF_VIDEO  = MKBETAG('P','A','F','V'),
+    AV_CODEC_ID_AVRN       = MKBETAG('A','V','R','n'),
+    AV_CODEC_ID_CPIA       = MKBETAG('C','P','I','A'),
+    AV_CODEC_ID_XFACE      = MKBETAG('X','F','A','C'),
+    AV_CODEC_ID_SGIRLE     = MKBETAG('S','G','I','R'),
+    AV_CODEC_ID_MVC1       = MKBETAG('M','V','C','1'),
+    AV_CODEC_ID_MVC2       = MKBETAG('M','V','C','2'),
+    AV_CODEC_ID_SNOW       = MKBETAG('S','N','O','W'),
+    AV_CODEC_ID_WEBP       = MKBETAG('W','E','B','P'),
+    AV_CODEC_ID_SMVJPEG    = MKBETAG('S','M','V','J'),
+    AV_CODEC_ID_HEVC       = MKBETAG('H','2','6','5'),
+#define AV_CODEC_ID_H265 AV_CODEC_ID_HEVC
+    AV_CODEC_ID_VP7        = MKBETAG('V','P','7','0'),
 
     /* various PCM "codecs" */
     AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
@@ -324,8 +355,11 @@ enum AVCodecID {
     AV_CODEC_ID_PCM_LXF,
     AV_CODEC_ID_S302M,
     AV_CODEC_ID_PCM_S8_PLANAR,
-    AV_CODEC_ID_PCM_S24LE_PLANAR,
-    AV_CODEC_ID_PCM_S32LE_PLANAR,
+    AV_CODEC_ID_PCM_S24LE_PLANAR_DEPRECATED,
+    AV_CODEC_ID_PCM_S32LE_PLANAR_DEPRECATED,
+    AV_CODEC_ID_PCM_S24LE_PLANAR = MKBETAG(24,'P','S','P'),
+    AV_CODEC_ID_PCM_S32LE_PLANAR = MKBETAG(32,'P','S','P'),
+    AV_CODEC_ID_PCM_S16BE_PLANAR = MKBETAG('P','S','P',16),
 
     /* various ADPCM codecs */
     AV_CODEC_ID_ADPCM_IMA_QT = 0x11000,
@@ -358,7 +392,14 @@ enum AVCodecID {
     AV_CODEC_ID_ADPCM_IMA_ISS,
     AV_CODEC_ID_ADPCM_G722,
     AV_CODEC_ID_ADPCM_IMA_APC,
-    AV_CODEC_ID_ADPCM_VIMA,
+    AV_CODEC_ID_ADPCM_VIMA_DEPRECATED,
+    AV_CODEC_ID_ADPCM_VIMA = MKBETAG('V','I','M','A'),
+    AV_CODEC_ID_VIMA       = MKBETAG('V','I','M','A'),
+    AV_CODEC_ID_ADPCM_AFC  = MKBETAG('A','F','C',' '),
+    AV_CODEC_ID_ADPCM_IMA_OKI = MKBETAG('O','K','I',' '),
+    AV_CODEC_ID_ADPCM_DTK  = MKBETAG('D','T','K',' '),
+    AV_CODEC_ID_ADPCM_IMA_RAD = MKBETAG('R','A','D',' '),
+    AV_CODEC_ID_ADPCM_G726LE = MKBETAG('6','2','7','G'),
 
     /* AMR */
     AV_CODEC_ID_AMR_NB = 0x12000,
@@ -438,12 +479,24 @@ enum AVCodecID {
     AV_CODEC_ID_RALF,
     AV_CODEC_ID_IAC,
     AV_CODEC_ID_ILBC,
-    AV_CODEC_ID_OPUS,
+    AV_CODEC_ID_OPUS_DEPRECATED,
     AV_CODEC_ID_COMFORT_NOISE,
-    AV_CODEC_ID_TAK,
+    AV_CODEC_ID_TAK_DEPRECATED,
     AV_CODEC_ID_METASOUND,
-    AV_CODEC_ID_PAF_AUDIO,
+    AV_CODEC_ID_PAF_AUDIO_DEPRECATED,
     AV_CODEC_ID_ON2AVC,
+    AV_CODEC_ID_FFWAVESYNTH = MKBETAG('F','F','W','S'),
+    AV_CODEC_ID_SONIC       = MKBETAG('S','O','N','C'),
+    AV_CODEC_ID_SONIC_LS    = MKBETAG('S','O','N','L'),
+    AV_CODEC_ID_PAF_AUDIO   = MKBETAG('P','A','F','A'),
+    AV_CODEC_ID_OPUS        = MKBETAG('O','P','U','S'),
+    AV_CODEC_ID_TAK         = MKBETAG('t','B','a','K'),
+    AV_CODEC_ID_EVRC        = MKBETAG('s','e','v','c'),
+    AV_CODEC_ID_SMV         = MKBETAG('s','s','m','v'),
+    AV_CODEC_ID_DSD_LSBF    = MKBETAG('D','S','D','L'),
+    AV_CODEC_ID_DSD_MSBF    = MKBETAG('D','S','D','M'),
+    AV_CODEC_ID_DSD_LSBF_PLANAR = MKBETAG('D','S','D','1'),
+    AV_CODEC_ID_DSD_MSBF_PLANAR = MKBETAG('D','S','D','8'),
 
     /* subtitle codecs */
     AV_CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
@@ -456,10 +509,32 @@ enum AVCodecID {
     AV_CODEC_ID_HDMV_PGS_SUBTITLE,
     AV_CODEC_ID_DVB_TELETEXT,
     AV_CODEC_ID_SRT,
+    AV_CODEC_ID_MICRODVD   = MKBETAG('m','D','V','D'),
+    AV_CODEC_ID_EIA_608    = MKBETAG('c','6','0','8'),
+    AV_CODEC_ID_JACOSUB    = MKBETAG('J','S','U','B'),
+    AV_CODEC_ID_SAMI       = MKBETAG('S','A','M','I'),
+    AV_CODEC_ID_REALTEXT   = MKBETAG('R','T','X','T'),
+    AV_CODEC_ID_SUBVIEWER1 = MKBETAG('S','b','V','1'),
+    AV_CODEC_ID_SUBVIEWER  = MKBETAG('S','u','b','V'),
+    AV_CODEC_ID_SUBRIP     = MKBETAG('S','R','i','p'),
+    AV_CODEC_ID_WEBVTT     = MKBETAG('W','V','T','T'),
+    AV_CODEC_ID_MPL2       = MKBETAG('M','P','L','2'),
+    AV_CODEC_ID_VPLAYER    = MKBETAG('V','P','l','r'),
+    AV_CODEC_ID_PJS        = MKBETAG('P','h','J','S'),
+    AV_CODEC_ID_ASS        = MKBETAG('A','S','S',' '),  ///< ASS as defined in Matroska
 
     /* other specific kind of codecs (generally used for attachments) */
     AV_CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
     AV_CODEC_ID_TTF = 0x18000,
+    AV_CODEC_ID_BINTEXT    = MKBETAG('B','T','X','T'),
+    AV_CODEC_ID_XBIN       = MKBETAG('X','B','I','N'),
+    AV_CODEC_ID_IDF        = MKBETAG( 0 ,'I','D','F'),
+    AV_CODEC_ID_OTF        = MKBETAG( 0 ,'O','T','F'),
+    AV_CODEC_ID_SMPTE_KLV  = MKBETAG('K','L','V','A'),
+    AV_CODEC_ID_DVD_NAV    = MKBETAG('D','N','A','V'),
+    AV_CODEC_ID_TIMED_ID3  = MKBETAG('T','I','D','3'),
+    AV_CODEC_ID_BIN_DATA   = MKBETAG('D','A','T','A'),
+
 
     AV_CODEC_ID_PROBE = 0x19000, ///< codec_id is not known (like AV_CODEC_ID_NONE) but lavf should attempt to identify it
 
@@ -468,6 +543,10 @@ enum AVCodecID {
     AV_CODEC_ID_MPEG4SYSTEMS = 0x20001, /**< _FAKE_ codec to indicate a MPEG-4 Systems
                                 * stream (only used by libavformat) */
     AV_CODEC_ID_FFMETADATA = 0x21000,   ///< Dummy codec for streams containing only metadata information.
+
+#if FF_API_CODEC_ID
+#include "old_codec_ids.h"
+#endif
 };
 
 /**
@@ -492,6 +571,13 @@ typedef struct AVCodecDescriptor {
      * Codec properties, a combination of AV_CODEC_PROP_* flags.
      */
     int             props;
+
+    /**
+     * MIME type(s) associated with the codec.
+     * May be NULL; if not, a NULL-terminated array of MIME types.
+     * The first item is always non-NULL and is the preferred MIME type.
+     */
+    const char *const *mime_types;
 } AVCodecDescriptor;
 
 /**
@@ -519,6 +605,16 @@ typedef struct AVCodecDescriptor {
  * equal.
  */
 #define AV_CODEC_PROP_REORDER       (1 << 3)
+/**
+ * Subtitle codec is bitmap based
+ * Decoded AVSubtitle data can be read from the AVSubtitleRect->pict field.
+ */
+#define AV_CODEC_PROP_BITMAP_SUB    (1 << 16)
+/**
+ * Subtitle codec is text based.
+ * Decoded AVSubtitle data can be read from the AVSubtitleRect->ass field.
+ */
+#define AV_CODEC_PROP_TEXT_SUB      (1 << 17)
 
 /**
  * @ingroup lavc_decoding
@@ -528,7 +624,7 @@ typedef struct AVCodecDescriptor {
  * Note: If the first 23 bits of the additional bytes are not 0, then damaged
  * MPEG bitstreams could cause overread and segfault.
  */
-#define FF_INPUT_BUFFER_PADDING_SIZE 8
+#define FF_INPUT_BUFFER_PADDING_SIZE 32
 
 /**
  * @ingroup lavc_encoding
@@ -552,6 +648,7 @@ enum Motion_Est_ID {
     ME_HEX,         ///< hexagon based search
     ME_UMH,         ///< uneven multi-hexagon search
     ME_TESA,        ///< transformed exhaustive search algorithm
+    ME_ITER=50,     ///< iterative search
 };
 
 /**
@@ -564,6 +661,7 @@ enum AVDiscard{
     AVDISCARD_DEFAULT =  0, ///< discard useless packets like 0 size packets in avi
     AVDISCARD_NONREF  =  8, ///< discard all non reference
     AVDISCARD_BIDIR   = 16, ///< discard all bidirectional frames
+    AVDISCARD_NONINTRA= 24, ///< discard all non intra frames
     AVDISCARD_NONKEY  = 32, ///< discard all frames except keyframes
     AVDISCARD_ALL     = 48, ///< discard all
 };
@@ -664,9 +762,11 @@ typedef struct RcOverride{
 #define CODEC_FLAG2_FAST          0x00000001 ///< Allow non spec compliant speedup tricks.
 #define CODEC_FLAG2_NO_OUTPUT     0x00000004 ///< Skip bitstream encoding.
 #define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< Place global headers at every keyframe instead of in extradata.
+#define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format. DEPRECATED!!!!
 #define CODEC_FLAG2_IGNORE_CROP   0x00010000 ///< Discard cropping information from SPS.
 
 #define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
+#define CODEC_FLAG2_SHOW_ALL      0x00400000 ///< Show all frames before the first keyframe
 
 /* Unsupported options :
  *              Syntax Arithmetic coding (SAC)
@@ -684,7 +784,13 @@ typedef struct RcOverride{
 #define CODEC_CAP_DR1             0x0002
 #define CODEC_CAP_TRUNCATED       0x0008
 #if FF_API_XVMC
-/* Codec can export data for HW decoding (XvMC). */
+/* Codec can export data for HW decoding. This flag indicates that
+ * the codec would call get_format() with list that might contain HW accelerated
+ * pixel formats (XvMC, VDPAU, VAAPI, etc). The application can pick any of them
+ * including raw image format.
+ * The application can use the passed context to determine bitstream version,
+ * chroma format, resolution etc.
+ */
 #define CODEC_CAP_HWACCEL         0x0010
 #endif /* FF_API_XVMC */
 /**
@@ -769,6 +875,14 @@ typedef struct RcOverride{
  * Audio encoder supports receiving a different number of samples in each call.
  */
 #define CODEC_CAP_VARIABLE_FRAME_SIZE 0x10000
+/**
+ * Codec is intra only.
+ */
+#define CODEC_CAP_INTRA_ONLY       0x40000000
+/**
+ * Codec is lossless.
+ */
+#define CODEC_CAP_LOSSLESS         0x80000000
 
 #if FF_API_MB_TYPE
 //The following defines may change, don't expect compatibility if you use them.
@@ -911,6 +1025,70 @@ enum AVPacketSideDataType {
      * See libavutil/display.h for a detailed description of the data.
      */
     AV_PKT_DATA_DISPLAYMATRIX,
+
+    /**
+     * Recommmends skipping the specified number of samples
+     * @code
+     * u32le number of samples to skip from start of this packet
+     * u32le number of samples to skip from end of this packet
+     * u8    reason for start skip
+     * u8    reason for end   skip (0=padding silence, 1=convergence)
+     * @endcode
+     */
+    AV_PKT_DATA_SKIP_SAMPLES=70,
+
+    /**
+     * An AV_PKT_DATA_JP_DUALMONO side data packet indicates that
+     * the packet may contain "dual mono" audio specific to Japanese DTV
+     * and if it is true, recommends only the selected channel to be used.
+     * @code
+     * u8    selected channels (0=mail/left, 1=sub/right, 2=both)
+     * @endcode
+     */
+    AV_PKT_DATA_JP_DUALMONO,
+
+    /**
+     * A list of zero terminated key/value strings. There is no end marker for
+     * the list, so it is required to rely on the side data size to stop.
+     */
+    AV_PKT_DATA_STRINGS_METADATA,
+
+    /**
+     * Subtitle event position
+     * @code
+     * u32le x1
+     * u32le y1
+     * u32le x2
+     * u32le y2
+     * @endcode
+     */
+    AV_PKT_DATA_SUBTITLE_POSITION,
+
+    /**
+     * Data found in BlockAdditional element of matroska container. There is
+     * no end marker for the data, so it is required to rely on the side data
+     * size to recognize the end. 8 byte id (as found in BlockAddId) followed
+     * by data.
+     */
+    AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
+
+    /**
+     * The optional first identifier line of a WebVTT cue.
+     */
+    AV_PKT_DATA_WEBVTT_IDENTIFIER,
+
+    /**
+     * The optional settings (rendering instructions) that immediately
+     * follow the timestamp specifier of a WebVTT cue.
+     */
+    AV_PKT_DATA_WEBVTT_SETTINGS,
+
+    /**
+     * A list of zero terminated key/value strings. There is no end marker for
+     * the list, so it is required to rely on the side data size to stop. This
+     * side data includes updated metadata which appeared in the stream.
+     */
+    AV_PKT_DATA_METADATA_UPDATE,
 };
 
 typedef struct AVPacketSideData {
@@ -927,7 +1105,7 @@ typedef struct AVPacketSideData {
  * For video, it should typically contain one compressed frame. For audio it may
  * contain several compressed frames.
  *
- * AVPacket is one of the few structs in Libav, whose size is a part of public
+ * AVPacket is one of the few structs in FFmpeg, whose size is a part of public
  * ABI. Thus it may be allocated on stack and no new fields can be added to it
  * without libavcodec and libavformat major bump.
  *
@@ -1039,6 +1217,8 @@ enum AVFieldOrder {
  * New fields can be added to the end with minor version bumps.
  * Removal, reordering and changes to existing fields require a major
  * version bump.
+ * Please use AVOptions (av_opt* / av_set/get*()) to access these fields from user
+ * applications.
  * sizeof(AVCodecContext) must not be used outside libav*.
  */
 typedef struct AVCodecContext {
@@ -1150,7 +1330,7 @@ typedef struct AVCodecContext {
      * rv10: additional flags
      * mpeg4: global headers (they can be in the bitstream or here)
      * The allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
-     * than extradata_size to avoid prolems if it is read with the bitstream reader.
+     * than extradata_size to avoid problems if it is read with the bitstream reader.
      * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
      * - encoding: Set/allocated/freed by libavcodec.
      * - decoding: Set/allocated/freed by user.
@@ -1180,6 +1360,11 @@ typedef struct AVCodecContext {
     /**
      * Codec delay.
      *
+     * Encoding: Number of frames delay there will be from the encoder input to
+     *           the decoder output. (we assume the decoder matches the spec)
+     * Decoding: Number of frames delay in addition to what a standard decoder
+     *           as specified in the spec would produce.
+     *
      * Video:
      *   Number of frames the decoded output will be delayed relative to the
      *   encoded input.
@@ -1219,7 +1404,7 @@ typedef struct AVCodecContext {
 
     /**
      * Bitstream width / height, may be different from width/height e.g. when
-     * the decoded frame is cropped before being output.
+     * the decoded frame is cropped before being output or lowres is enabled.
      * - encoding: unused
      * - decoding: May be set by the user before opening the decoder if known
      *             e.g. from the container. During decoding, the decoder may
@@ -1241,7 +1426,7 @@ typedef struct AVCodecContext {
     /**
      * Pixel format, see AV_PIX_FMT_xxx.
      * May be set by the demuxer if known from headers.
-     * May be overriden by the decoder if it knows better.
+     * May be overridden by the decoder if it knows better.
      * - encoding: Set by user.
      * - decoding: Set by user if known, overridden by libavcodec if known
      */
@@ -1250,7 +1435,7 @@ typedef struct AVCodecContext {
     /**
      * Motion estimation algorithm used for video coding.
      * 1 (zero), 2 (full), 3 (log), 4 (phods), 5 (epzs), 6 (x1), 7 (hex),
-     * 8 (umh), 10 (tesa) [7, 8, 10 are x264 specific]
+     * 8 (umh), 9 (iter), 10 (tesa) [7, 8, 10 are x264 specific, 9 is snow specific]
      * - encoding: MUST be set by user.
      * - decoding: unused
      */
@@ -1457,6 +1642,8 @@ typedef struct AVCodecContext {
 #define FF_CMP_VSAD   8
 #define FF_CMP_VSSE   9
 #define FF_CMP_NSSE   10
+#define FF_CMP_W53    11
+#define FF_CMP_W97    12
 #define FF_CMP_DCTMAX 13
 #define FF_CMP_DCT264 14
 #define FF_CMP_CHROMA 256
@@ -1562,7 +1749,7 @@ typedef struct AVCodecContext {
      * XVideo Motion Acceleration
      * - encoding: forbidden
      * - decoding: set by decoder
-     * @deprecated XvMC support is slated for removal.
+     * @deprecated XvMC doesn't need it anymore.
      */
     attribute_deprecated int xvmc_acceleration;
 #endif /* FF_API_XVMC */
@@ -1777,7 +1964,7 @@ typedef struct AVCodecContext {
 
     /** Field order
      * - encoding: set by libavcodec
-     * - decoding: Set by libavcodec
+     * - decoding: Set by user.
      */
     enum AVFieldOrder field_order;
 
@@ -1841,7 +2028,7 @@ typedef struct AVCodecContext {
     /**
      * Audio channel layout.
      * - encoding: set by user.
-     * - decoding: set by libavcodec.
+     * - decoding: set by user, may be overwritten by libavcodec.
      */
     uint64_t channel_layout;
 
@@ -1860,9 +2047,10 @@ typedef struct AVCodecContext {
     enum AVAudioServiceType audio_service_type;
 
     /**
-     * Used to request a sample format from the decoder.
-     * - encoding: unused.
+     * desired sample format
+     * - encoding: Not used.
      * - decoding: Set by user.
+     * Decoder will decode to this format if it can.
      */
     enum AVSampleFormat request_sample_fmt;
 
@@ -2016,6 +2204,8 @@ typedef struct AVCodecContext {
      * avcodec_align_dimensions2() should be used to find the required width and
      * height, as they normally need to be rounded up to the next multiple of 16.
      *
+     * Some decoders do not support linesizes changing between frames.
+     *
      * If frame multithreading is used and thread_safe_callbacks is set,
      * this callback may be called from a different thread, but not from more
      * than one at once. Does not need to be reentrant.
@@ -2082,7 +2272,7 @@ typedef struct AVCodecContext {
 
     /**
      * ratecontrol qmin qmax limiting method
-     * 0-> clipping, 1-> use a nice continuous function to limit qscale wthin qmin/qmax.
+     * 0-> clipping, 1-> use a nice continuous function to limit qscale within qmin/qmax.
      * - encoding: Set by user.
      * - decoding: unused
      */
@@ -2116,7 +2306,7 @@ typedef struct AVCodecContext {
     /**
      * maximum bitrate
      * - encoding: Set by user.
-     * - decoding: unused
+     * - decoding: Set by libavcodec.
      */
     int rc_max_rate;
 
@@ -2179,14 +2369,14 @@ typedef struct AVCodecContext {
     int context_model;
 
     /**
-     * minimum Lagrange multipler
+     * minimum Lagrange multiplier
      * - encoding: Set by user.
      * - decoding: unused
      */
     int lmin;
 
     /**
-     * maximum Lagrange multipler
+     * maximum Lagrange multiplier
      * - encoding: Set by user.
      * - decoding: unused
      */
@@ -2240,9 +2430,9 @@ typedef struct AVCodecContext {
     int max_prediction_order;
 
     /**
-     * GOP timecode frame start number, in non drop frame format
-     * - encoding: Set by user.
-     * - decoding: unused
+     * GOP timecode frame start number
+     * - encoding: Set by user, in non drop frame format
+     * - decoding: Set by libavcodec (timecode in the 25 bits format, -1 if unset)
      */
     int64_t timecode_frame_start;
 
@@ -2347,6 +2537,7 @@ typedef struct AVCodecContext {
     int error_concealment;
 #define FF_EC_GUESS_MVS   1
 #define FF_EC_DEBLOCK     2
+#define FF_EC_FAVOR_INTER 256
 
     /**
      * debug
@@ -2375,17 +2566,20 @@ typedef struct AVCodecContext {
 #define FF_DEBUG_MMCO        0x00000800
 #define FF_DEBUG_BUGS        0x00001000
 #if FF_API_DEBUG_MV
-#define FF_DEBUG_VIS_QP      0x00002000
-#define FF_DEBUG_VIS_MB_TYPE 0x00004000
+#define FF_DEBUG_VIS_QP      0x00002000 ///< only access through AVOptions from outside libavcodec
+#define FF_DEBUG_VIS_MB_TYPE 0x00004000 ///< only access through AVOptions from outside libavcodec
 #endif
 #define FF_DEBUG_BUFFERS     0x00008000
 #define FF_DEBUG_THREADS     0x00010000
+#define FF_DEBUG_NOMC        0x01000000
 
 #if FF_API_DEBUG_MV
     /**
-     * @deprecated this option does not have any effect
+     * debug
+     * Code outside libavcodec should access this field using AVOptions
+     * - encoding: Set by user.
+     * - decoding: Set by user.
      */
-    attribute_deprecated
     int debug_mv;
 #define FF_DEBUG_VIS_MV_P_FOR  0x00000001 //visualize forward predicted MVs of P frames
 #define FF_DEBUG_VIS_MV_B_FOR  0x00000002 //visualize forward predicted MVs of B frames
@@ -2406,9 +2600,15 @@ typedef struct AVCodecContext {
  * decoder returning an error.
  */
 #define AV_EF_CRCCHECK  (1<<0)
-#define AV_EF_BITSTREAM (1<<1)
-#define AV_EF_BUFFER    (1<<2)
-#define AV_EF_EXPLODE   (1<<3)
+#define AV_EF_BITSTREAM (1<<1)          ///< detect bitstream specification deviations
+#define AV_EF_BUFFER    (1<<2)          ///< detect improper bitstream length
+#define AV_EF_EXPLODE   (1<<3)          ///< abort decoding on minor error detection
+
+#define AV_EF_IGNORE_ERR (1<<15)        ///< ignore errors and continue
+#define AV_EF_CAREFUL    (1<<16)        ///< consider things that violate the spec, are fast to calculate and have not been seen in the wild as errors
+#define AV_EF_COMPLIANT  (1<<17)        ///< consider all spec non compliances as errors
+#define AV_EF_AGGRESSIVE (1<<18)        ///< consider things that a sane encoder should not do as an error
+
 
     /**
      * opaque 64bit number (generally a PTS) that will be reordered and
@@ -2430,8 +2630,8 @@ typedef struct AVCodecContext {
      * Hardware accelerator context.
      * For some hardware accelerators, a global context needs to be
      * provided by the user. In that case, this holds display-dependent
-     * data Libav cannot instantiate itself. Please refer to the
-     * Libav HW accelerator documentation to know how to fill this
+     * data FFmpeg cannot instantiate itself. Please refer to the
+     * FFmpeg HW accelerator documentation to know how to fill this
      * is. e.g. for VA API, this is a struct vaapi_context.
      * - encoding: unused
      * - decoding: Set by user
@@ -2490,6 +2690,7 @@ typedef struct AVCodecContext {
 #if FF_API_ARCH_ALPHA
 #define FF_IDCT_SIMPLEALPHA   23
 #endif
+#define FF_IDCT_SIMPLEAUTO    128
 
     /**
      * bits per sample/pixel from the demuxer (needed for huffyuv).
@@ -2510,10 +2711,10 @@ typedef struct AVCodecContext {
      * low resolution decoding, 1-> 1/2 size, 2->1/4 size
      * - encoding: unused
      * - decoding: Set by user.
-     *
-     * @deprecated use decoder private options instead
+     * Code outside libavcodec should access this field using:
+     * av_codec_{get,set}_lowres(avctx)
      */
-    attribute_deprecated int lowres;
+     int lowres;
 #endif
 
     /**
@@ -2600,7 +2801,7 @@ typedef struct AVCodecContext {
 #endif
 
     /**
-     * noise vs. sse weight for the nsse comparsion function
+     * noise vs. sse weight for the nsse comparison function
      * - encoding: Set by user.
      * - decoding: unused
      */
@@ -2688,6 +2889,7 @@ typedef struct AVCodecContext {
 #define FF_PROFILE_HEVC_MAIN                        1
 #define FF_PROFILE_HEVC_MAIN_10                     2
 #define FF_PROFILE_HEVC_MAIN_STILL_PICTURE          3
+#define FF_PROFILE_HEVC_REXT                        4
 
     /**
      * level
@@ -2698,21 +2900,21 @@ typedef struct AVCodecContext {
 #define FF_LEVEL_UNKNOWN -99
 
     /**
-     *
+     * Skip loop filtering for selected frames.
      * - encoding: unused
      * - decoding: Set by user.
      */
     enum AVDiscard skip_loop_filter;
 
     /**
-     *
+     * Skip IDCT/dequantization for selected frames.
      * - encoding: unused
      * - decoding: Set by user.
      */
     enum AVDiscard skip_idct;
 
     /**
-     *
+     * Skip decoding for selected frames.
      * - encoding: unused
      * - decoding: Set by user.
      */
@@ -2768,8 +2970,123 @@ typedef struct AVCodecContext {
      * use AVOptions to set this field.
      */
     int side_data_only_packets;
+
+    /**
+     * Timebase in which pkt_dts/pts and AVPacket.dts/pts are.
+     * Code outside libavcodec should access this field using:
+     * av_codec_{get,set}_pkt_timebase(avctx)
+     * - encoding unused.
+     * - decoding set by user.
+     */
+    AVRational pkt_timebase;
+
+    /**
+     * AVCodecDescriptor
+     * Code outside libavcodec should access this field using:
+     * av_codec_{get,set}_codec_descriptor(avctx)
+     * - encoding: unused.
+     * - decoding: set by libavcodec.
+     */
+    const AVCodecDescriptor *codec_descriptor;
+
+#if !FF_API_LOWRES
+    /**
+     * low resolution decoding, 1-> 1/2 size, 2->1/4 size
+     * - encoding: unused
+     * - decoding: Set by user.
+     * Code outside libavcodec should access this field using:
+     * av_codec_{get,set}_lowres(avctx)
+     */
+     int lowres;
+#endif
+
+    /**
+     * Current statistics for PTS correction.
+     * - decoding: maintained and used by libavcodec, not intended to be used by user apps
+     * - encoding: unused
+     */
+    int64_t pts_correction_num_faulty_pts; /// Number of incorrect PTS values so far
+    int64_t pts_correction_num_faulty_dts; /// Number of incorrect DTS values so far
+    int64_t pts_correction_last_pts;       /// PTS of the last frame
+    int64_t pts_correction_last_dts;       /// DTS of the last frame
+
+    /**
+     * Character encoding of the input subtitles file.
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    char *sub_charenc;
+
+    /**
+     * Subtitles character encoding mode. Formats or codecs might be adjusting
+     * this setting (if they are doing the conversion themselves for instance).
+     * - decoding: set by libavcodec
+     * - encoding: unused
+     */
+    int sub_charenc_mode;
+#define FF_SUB_CHARENC_MODE_DO_NOTHING  -1  ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance)
+#define FF_SUB_CHARENC_MODE_AUTOMATIC    0  ///< libavcodec will select the mode itself
+#define FF_SUB_CHARENC_MODE_PRE_DECODER  1  ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv
+
+    /**
+     * Skip processing alpha if supported by codec.
+     * Note that if the format uses pre-multiplied alpha (common with VP6,
+     * and recommended due to better video quality/compression)
+     * the image will look as if alpha-blended onto a black background.
+     * However for formats that do not use pre-multiplied alpha
+     * there might be serious artefacts (though e.g. libswscale currently
+     * assumes pre-multiplied alpha anyway).
+     * Code outside libavcodec should access this field using AVOptions
+     *
+     * - decoding: set by user
+     * - encoding: unused
+     */
+    int skip_alpha;
+
+    /**
+     * Number of samples to skip after a discontinuity
+     * - decoding: unused
+     * - encoding: set by libavcodec
+     */
+    int seek_preroll;
+
+#if !FF_API_DEBUG_MV
+    /**
+     * debug motion vectors
+     * Code outside libavcodec should access this field using AVOptions
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int debug_mv;
+#define FF_DEBUG_VIS_MV_P_FOR  0x00000001 //visualize forward predicted MVs of P frames
+#define FF_DEBUG_VIS_MV_B_FOR  0x00000002 //visualize forward predicted MVs of B frames
+#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 //visualize backward predicted MVs of B frames
+#endif
+
+    /**
+     * custom intra quantization matrix
+     * Code outside libavcodec should access this field using av_codec_g/set_chroma_intra_matrix()
+     * - encoding: Set by user, can be NULL.
+     * - decoding: unused.
+     */
+    uint16_t *chroma_intra_matrix;
 } AVCodecContext;
 
+AVRational av_codec_get_pkt_timebase         (const AVCodecContext *avctx);
+void       av_codec_set_pkt_timebase         (AVCodecContext *avctx, AVRational val);
+
+const AVCodecDescriptor *av_codec_get_codec_descriptor(const AVCodecContext *avctx);
+void                     av_codec_set_codec_descriptor(AVCodecContext *avctx, const AVCodecDescriptor *desc);
+
+int  av_codec_get_lowres(const AVCodecContext *avctx);
+void av_codec_set_lowres(AVCodecContext *avctx, int val);
+
+int  av_codec_get_seek_preroll(const AVCodecContext *avctx);
+void av_codec_set_seek_preroll(AVCodecContext *avctx, int val);
+
+uint16_t *av_codec_get_chroma_intra_matrix(const AVCodecContext *avctx);
+void av_codec_set_chroma_intra_matrix(AVCodecContext *avctx, uint16_t *val);
+
 /**
  * AVProfile.
  */
@@ -2811,7 +3128,7 @@ typedef struct AVCodec {
     const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
     const uint64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
 #if FF_API_LOWRES
-    attribute_deprecated uint8_t max_lowres; ///< maximum value for lowres supported by the decoder
+    uint8_t max_lowres;                     ///< maximum value for lowres supported by the decoder, no direct access, use av_codec_get_max_lowres()
 #endif
     const AVClass *priv_class;              ///< AVClass for the private context
     const AVProfile *profiles;              ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
@@ -2879,6 +3196,10 @@ typedef struct AVCodec {
     void (*flush)(AVCodecContext *);
 } AVCodec;
 
+int av_codec_get_max_lowres(const AVCodec *codec);
+
+struct MpegEncContext;
+
 /**
  * AVHWAccel.
  */
@@ -2952,6 +3273,7 @@ typedef struct AVHWAccel {
      *
      * Meaningful slice information (codec specific) is guaranteed to
      * be parsed at this point. This function is mandatory.
+     * The only exception is XvMC, that works on MB level.
      *
      * @param avctx the codec context
      * @param buf the slice data buffer base
@@ -2981,6 +3303,17 @@ typedef struct AVHWAccel {
     int frame_priv_data_size;
 
     /**
+     * Called for every Macroblock in a slice.
+     *
+     * XvMC uses it to replace the ff_MPV_decode_mb().
+     * Instead of decoding to raw picture, MB parameters are
+     * stored in an array provided by the video driver.
+     *
+     * @param s the mpeg context
+     */
+    void (*decode_mb)(struct MpegEncContext *s);
+
+    /**
      * Initialize the hwaccel private data.
      *
      * This will be called from ff_get_format(), after hwaccel and
@@ -3012,11 +3345,13 @@ typedef struct AVHWAccel {
  */
 
 /**
- * four components are given, that's all.
- * the last component is alpha
+ * Picture data structure.
+ *
+ * Up to four components can be stored into it, the last component is
+ * alpha.
  */
 typedef struct AVPicture {
-    uint8_t *data[AV_NUM_DATA_POINTERS];
+    uint8_t *data[AV_NUM_DATA_POINTERS];    ///< pointers to the image data planes
     int linesize[AV_NUM_DATA_POINTERS];     ///< number of bytes per line
 } AVPicture;
 
@@ -3024,9 +3359,6 @@ typedef struct AVPicture {
  * @}
  */
 
-#define AVPALETTE_SIZE 1024
-#define AVPALETTE_COUNT 256
-
 enum AVSubtitleType {
     SUBTITLE_NONE,
 
@@ -3065,10 +3397,11 @@ typedef struct AVSubtitleRect {
 
     /**
      * 0 terminated ASS/SSA compatible event line.
-     * The pressentation of this is unaffected by the other values in this
+     * The presentation of this is unaffected by the other values in this
      * struct.
      */
     char *ass;
+
     int flags;
 } AVSubtitleRect;
 
@@ -3167,13 +3500,29 @@ int avcodec_get_context_defaults3(AVCodecContext *s, const AVCodec *codec);
 const AVClass *avcodec_get_class(void);
 
 /**
+ * Get the AVClass for AVFrame. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *avcodec_get_frame_class(void);
+
+/**
+ * Get the AVClass for AVSubtitleRect. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ */
+const AVClass *avcodec_get_subtitle_rect_class(void);
+
+/**
  * Copy the settings of the source AVCodecContext into the destination
  * AVCodecContext. The resulting destination codec context will be
  * unopened, i.e. you are required to call avcodec_open2() before you
  * can use this AVCodecContext to decode/encode video/audio data.
  *
  * @param dest target codec context, should be initialized with
- *             avcodec_alloc_context3(), but otherwise uninitialized
+ *             avcodec_alloc_context3(NULL), but otherwise uninitialized
  * @param src source codec context
  * @return AVERROR() on error (e.g. memory allocation error), 0 on success
  */
@@ -3344,6 +3693,20 @@ int av_packet_from_data(AVPacket *pkt, uint8_t *data, int size);
 int av_dup_packet(AVPacket *pkt);
 
 /**
+ * Copy packet, including contents
+ *
+ * @return 0 on success, negative AVERROR on fail
+ */
+int av_copy_packet(AVPacket *dst, const AVPacket *src);
+
+/**
+ * Copy packet side data
+ *
+ * @return 0 on success, negative AVERROR on fail
+ */
+int av_copy_packet_side_data(AVPacket *dst, const AVPacket *src);
+
+/**
  * Free a packet.
  *
  * @param pkt packet to free
@@ -3383,6 +3746,29 @@ int av_packet_shrink_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
 uint8_t* av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
                                  int *size);
 
+int av_packet_merge_side_data(AVPacket *pkt);
+
+int av_packet_split_side_data(AVPacket *pkt);
+
+/**
+ * Pack a dictionary for use in side_data.
+ *
+ * @param dict The dictionary to pack.
+ * @param size pointer to store the size of the returned data
+ * @return pointer to data if successful, NULL otherwise
+ */
+uint8_t *av_packet_pack_dictionary(AVDictionary *dict, int *size);
+/**
+ * Unpack a dictionary from side_data.
+ *
+ * @param data data from side_data
+ * @param size size of the data
+ * @param dict the metadata storage dictionary
+ * @return 0 on success, < 0 on failure
+ */
+int av_packet_unpack_dictionary(const uint8_t *data, int size, AVDictionary **dict);
+
+
 /**
  * Convenience function to free all the side data stored.
  * All the other fields stay untouched.
@@ -3407,7 +3793,7 @@ void av_packet_free_side_data(AVPacket *pkt);
  *
  * @return 0 on success, a negative AVERROR on error.
  */
-int av_packet_ref(AVPacket *dst, AVPacket *src);
+int av_packet_ref(AVPacket *dst, const AVPacket *src);
 
 /**
  * Wipe the packet.
@@ -3529,6 +3915,88 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
                                int linesize_align[AV_NUM_DATA_POINTERS]);
 
 /**
+ * Converts AVChromaLocation to swscale x/y chroma position.
+ *
+ * The positions represent the chroma (0,0) position in a coordinates system
+ * with luma (0,0) representing the origin and luma(1,1) representing 256,256
+ *
+ * @param xpos  horizontal chroma sample position
+ * @param ypos  vertical   chroma sample position
+ */
+int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos);
+
+/**
+ * Converts swscale x/y chroma position to AVChromaLocation.
+ *
+ * The positions represent the chroma (0,0) position in a coordinates system
+ * with luma (0,0) representing the origin and luma(1,1) representing 256,256
+ *
+ * @param xpos  horizontal chroma sample position
+ * @param ypos  vertical   chroma sample position
+ */
+enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos);
+
+#if FF_API_OLD_DECODE_AUDIO
+/**
+ * Wrapper function which calls avcodec_decode_audio4.
+ *
+ * @deprecated Use avcodec_decode_audio4 instead.
+ *
+ * Decode the audio frame of size avpkt->size from avpkt->data into samples.
+ * Some decoders may support multiple frames in a single AVPacket, such
+ * decoders would then just decode the first frame. In this case,
+ * avcodec_decode_audio3 has to be called again with an AVPacket that contains
+ * the remaining data in order to decode the second frame etc.
+ * If no frame
+ * could be outputted, frame_size_ptr is zero. Otherwise, it is the
+ * decompressed frame size in bytes.
+ *
+ * @warning You must set frame_size_ptr to the allocated size of the
+ * output buffer before calling avcodec_decode_audio3().
+ *
+ * @warning The input buffer must be FF_INPUT_BUFFER_PADDING_SIZE larger than
+ * the actual read bytes because some optimized bitstream readers read 32 or 64
+ * bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer avpkt->data should be set to 0 to ensure that
+ * no overreading happens for damaged MPEG streams.
+ *
+ * @warning You must not provide a custom get_buffer() when using
+ * avcodec_decode_audio3().  Doing so will override it with
+ * avcodec_default_get_buffer.  Use avcodec_decode_audio4() instead,
+ * which does allow the application to provide a custom get_buffer().
+ *
+ * @note You might have to align the input buffer avpkt->data and output buffer
+ * samples. The alignment requirements depend on the CPU: On some CPUs it isn't
+ * necessary at all, on others it won't work at all if not aligned and on others
+ * it will work but it will have an impact on performance.
+ *
+ * In practice, avpkt->data should have 4 byte alignment at minimum and
+ * samples should be 16 byte aligned unless the CPU doesn't need it
+ * (AltiVec and SSE do).
+ *
+ * @note Codecs which have the CODEC_CAP_DELAY capability set have a delay
+ * between input and output, these need to be fed with avpkt->data=NULL,
+ * avpkt->size=0 at the end to return the remaining frames.
+ *
+ * @param avctx the codec context
+ * @param[out] samples the output buffer, sample type in avctx->sample_fmt
+ *                     If the sample format is planar, each channel plane will
+ *                     be the same size, with no padding between channels.
+ * @param[in,out] frame_size_ptr the output buffer size in bytes
+ * @param[in] avpkt The input AVPacket containing the input buffer.
+ *            You can create such packet with av_init_packet() and by then setting
+ *            data and size, some decoders might in addition need other fields.
+ *            All decoders are designed to use the least fields possible though.
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame data was decompressed (used) from the input AVPacket.
+ */
+attribute_deprecated int avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
+                         int *frame_size_ptr,
+                         AVPacket *avpkt);
+#endif
+
+/**
  * Decode the audio frame of size avpkt->size from avpkt->data into frame.
  *
  * Some decoders may support multiple frames in a single AVPacket. Such
@@ -3577,7 +4045,7 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
  *         AVPacket is returned.
  */
 int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame,
-                          int *got_frame_ptr, AVPacket *avpkt);
+                          int *got_frame_ptr, const AVPacket *avpkt);
 
 /**
  * Decode the video frame of size avpkt->size from avpkt->data into picture.
@@ -3610,7 +4078,7 @@ int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame,
  *             next call to this function or until closing or flushing the
  *             decoder. The caller may not write to it.
  *
- * @param[in] avpkt The input AVpacket containing the input buffer.
+ * @param[in] avpkt The input AVPacket containing the input buffer.
  *            You can create such packet with av_init_packet() and by then setting
  *            data and size, some decoders might in addition need other fields like
  *            flags&AV_PKT_FLAG_KEY. All decoders are designed to use the least
@@ -3621,7 +4089,7 @@ int avcodec_decode_audio4(AVCodecContext *avctx, AVFrame *frame,
  */
 int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
                          int *got_picture_ptr,
-                         AVPacket *avpkt);
+                         const AVPacket *avpkt);
 
 /**
  * Decode a subtitle message.
@@ -3633,9 +4101,17 @@ int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
  * and reusing a get_buffer written for video codecs would probably perform badly
  * due to a potentially very different allocation pattern.
  *
+ * Some decoders (those marked with CODEC_CAP_DELAY) have a delay between input
+ * and output. This means that for some packets they will not immediately
+ * produce decoded output and need to be flushed at the end of decoding to get
+ * all the decoded data. Flushing is done by calling this function with packets
+ * with avpkt->data set to NULL and avpkt->size set to 0 until it stops
+ * returning subtitles. It is safe to flush even those decoders that are not
+ * marked with CODEC_CAP_DELAY, then no subtitles will be returned.
+ *
  * @param avctx the codec context
- * @param[out] sub The AVSubtitle in which the decoded subtitle will be stored, must be
-                   freed with avsubtitle_free if *got_sub_ptr is set.
+ * @param[out] sub The Preallocated AVSubtitle in which the decoded subtitle will be stored,
+ *                 must be freed with avsubtitle_free if *got_sub_ptr is set.
  * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero.
  * @param[in] avpkt The input AVPacket containing the input buffer.
  */
@@ -3693,6 +4169,7 @@ typedef struct AVCodecParserContext {
 #define PARSER_FLAG_ONCE                      0x0002
 /// Set if the parser has a valid file offset
 #define PARSER_FLAG_FETCHED_OFFSET            0x0004
+#define PARSER_FLAG_USE_CODEC_TS              0x1000
 
     int64_t offset;      ///< byte offset from starting packet start
     int64_t cur_frame_end[AV_PARSER_PTS_NB];
@@ -3865,7 +4342,7 @@ int av_parser_parse2(AVCodecParserContext *s,
 
 /**
  * @return 0 if the output buffer is a subset of the input, 1 if it is allocated and must be freed
- * @deprecated use AVBitstreamFilter
+ * @deprecated use AVBitStreamFilter
  */
 int av_parser_change(AVCodecParserContext *s,
                      AVCodecContext *avctx,
@@ -3899,6 +4376,36 @@ AVCodec *avcodec_find_encoder(enum AVCodecID id);
  */
 AVCodec *avcodec_find_encoder_by_name(const char *name);
 
+#if FF_API_OLD_ENCODE_AUDIO
+/**
+ * Encode an audio frame from samples into buf.
+ *
+ * @deprecated Use avcodec_encode_audio2 instead.
+ *
+ * @note The output buffer should be at least FF_MIN_BUFFER_SIZE bytes large.
+ * However, for codecs with avctx->frame_size equal to 0 (e.g. PCM) the user
+ * will know how much space is needed because it depends on the value passed
+ * in buf_size as described below. In that case a lower value can be used.
+ *
+ * @param avctx the codec context
+ * @param[out] buf the output buffer
+ * @param[in] buf_size the output buffer size
+ * @param[in] samples the input buffer containing the samples
+ * The number of samples read from this buffer is frame_size*channels,
+ * both of which are defined in avctx.
+ * For codecs which have avctx->frame_size equal to 0 (e.g. PCM) the number of
+ * samples read from samples is equal to:
+ * buf_size * 8 / (avctx->channels * av_get_bits_per_sample(avctx->codec_id))
+ * This also implies that av_get_bits_per_sample() must not return 0 for these
+ * codecs.
+ * @return On error a negative value is returned, on success zero or the number
+ * of bytes used to encode the data read from the input buffer.
+ */
+int attribute_deprecated avcodec_encode_audio(AVCodecContext *avctx,
+                                              uint8_t *buf, int buf_size,
+                                              const short *samples);
+#endif
+
 /**
  * Encode a frame of audio.
  *
@@ -3912,11 +4419,12 @@ AVCodec *avcodec_find_encoder_by_name(const char *name);
  *                  The user can supply an output buffer by setting
  *                  avpkt->data and avpkt->size prior to calling the
  *                  function, but if the size of the user-provided data is not
- *                  large enough, encoding will fail. All other AVPacket fields
- *                  will be reset by the encoder using av_init_packet(). If
- *                  avpkt->data is NULL, the encoder will allocate it.
- *                  The encoder will set avpkt->size to the size of the
- *                  output packet.
+ *                  large enough, encoding will fail. If avpkt->data and
+ *                  avpkt->size are set, avpkt->destruct must also be set. All
+ *                  other AVPacket fields will be reset by the encoder using
+ *                  av_init_packet(). If avpkt->data is NULL, the encoder will
+ *                  allocate it. The encoder will set avpkt->size to the size
+ *                  of the output packet.
  *
  *                  If this function fails or produces no output, avpkt will be
  *                  freed using av_free_packet() (i.e. avpkt->destruct will be
@@ -3940,6 +4448,26 @@ AVCodec *avcodec_find_encoder_by_name(const char *name);
 int avcodec_encode_audio2(AVCodecContext *avctx, AVPacket *avpkt,
                           const AVFrame *frame, int *got_packet_ptr);
 
+#if FF_API_OLD_ENCODE_VIDEO
+/**
+ * @deprecated use avcodec_encode_video2() instead.
+ *
+ * Encode a video frame from pict into buf.
+ * The input picture should be
+ * stored using a specific format, namely avctx.pix_fmt.
+ *
+ * @param avctx the codec context
+ * @param[out] buf the output buffer for the bitstream of encoded frame
+ * @param[in] buf_size the size of the output buffer in bytes
+ * @param[in] pict the input picture to encode
+ * @return On error a negative value is returned, on success zero or the number
+ * of bytes used from the output buffer.
+ */
+attribute_deprecated
+int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const AVFrame *pict);
+#endif
+
 /**
  * Encode a frame of video.
  *
@@ -3985,21 +4513,121 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
  * @}
  */
 
+#if FF_API_AVCODEC_RESAMPLE
+/**
+ * @defgroup lavc_resample Audio resampling
+ * @ingroup libavc
+ * @deprecated use libswresample instead
+ *
+ * @{
+ */
+struct ReSampleContext;
+struct AVResampleContext;
+
+typedef struct ReSampleContext ReSampleContext;
+
+/**
+ *  Initialize audio resampling context.
+ *
+ * @param output_channels  number of output channels
+ * @param input_channels   number of input channels
+ * @param output_rate      output sample rate
+ * @param input_rate       input sample rate
+ * @param sample_fmt_out   requested output sample format
+ * @param sample_fmt_in    input sample format
+ * @param filter_length    length of each FIR filter in the filterbank relative to the cutoff frequency
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear           if 1 then the used FIR filter will be linearly interpolated
+                           between the 2 closest, if 0 the closest will be used
+ * @param cutoff           cutoff frequency, 1.0 corresponds to half the output sampling rate
+ * @return allocated ReSampleContext, NULL if error occurred
+ */
+attribute_deprecated
+ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
+                                        int output_rate, int input_rate,
+                                        enum AVSampleFormat sample_fmt_out,
+                                        enum AVSampleFormat sample_fmt_in,
+                                        int filter_length, int log2_phase_count,
+                                        int linear, double cutoff);
+
+attribute_deprecated
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
+
+/**
+ * Free resample context.
+ *
+ * @param s a non-NULL pointer to a resample context previously
+ *          created with av_audio_resample_init()
+ */
+attribute_deprecated
+void audio_resample_close(ReSampleContext *s);
+
+
+/**
+ * Initialize an audio resampler.
+ * Note, if either rate is not an integer then simply scale both rates up so they are.
+ * @param filter_length length of each FIR filter in the filterbank relative to the cutoff freq
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear If 1 then the used FIR filter will be linearly interpolated
+                 between the 2 closest, if 0 the closest will be used
+ * @param cutoff cutoff frequency, 1.0 corresponds to half the output sampling rate
+ */
+attribute_deprecated
+struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff);
+
+/**
+ * Resample an array of samples using a previously configured context.
+ * @param src an array of unconsumed samples
+ * @param consumed the number of samples of src which have been consumed are returned here
+ * @param src_size the number of unconsumed samples available
+ * @param dst_size the amount of space in samples available in dst
+ * @param update_ctx If this is 0 then the context will not be modified, that way several channels can be resampled with the same context.
+ * @return the number of samples written in dst or -1 if an error occurred
+ */
+attribute_deprecated
+int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx);
+
+
+/**
+ * Compensate samplerate/timestamp drift. The compensation is done by changing
+ * the resampler parameters, so no audible clicks or similar distortions occur
+ * @param compensation_distance distance in output samples over which the compensation should be performed
+ * @param sample_delta number of output samples which should be output less
+ *
+ * example: av_resample_compensate(c, 10, 500)
+ * here instead of 510 samples only 500 samples would be output
+ *
+ * note, due to rounding the actual compensation might be slightly different,
+ * especially if the compensation_distance is large and the in_rate used during init is small
+ */
+attribute_deprecated
+void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance);
+attribute_deprecated
+void av_resample_close(struct AVResampleContext *c);
+
+/**
+ * @}
+ */
+#endif
+
 /**
  * @addtogroup lavc_picture
  * @{
  */
 
 /**
- * Allocate memory for a picture.  Call avpicture_free() to free it.
+ * Allocate memory for the pixels of a picture and setup the AVPicture
+ * fields for it.
+ *
+ * Call avpicture_free() to free it.
  *
- * @see avpicture_fill()
+ * @param picture            the picture structure to be filled in
+ * @param pix_fmt            the pixel format of the picture
+ * @param width              the width of the picture
+ * @param height             the height of the picture
+ * @return zero if successful, a negative error code otherwise
  *
- * @param picture the picture to be filled in
- * @param pix_fmt the format of the picture
- * @param width the width of the picture
- * @param height the height of the picture
- * @return zero if successful, a negative value if not
+ * @see av_image_alloc(), avpicture_fill()
  */
 int avpicture_alloc(AVPicture *picture, enum AVPixelFormat pix_fmt, int width, int height);
 
@@ -4013,58 +4641,64 @@ int avpicture_alloc(AVPicture *picture, enum AVPixelFormat pix_fmt, int width, i
 void avpicture_free(AVPicture *picture);
 
 /**
- * Fill in the AVPicture fields.
- * The fields of the given AVPicture are filled in by using the 'ptr' address
- * which points to the image data buffer. Depending on the specified picture
- * format, one or multiple image data pointers and line sizes will be set.
- * If a planar format is specified, several pointers will be set pointing to
- * the different picture planes and the line sizes of the different planes
- * will be stored in the lines_sizes array.
- * Call with ptr == NULL to get the required size for the ptr buffer.
+ * Setup the picture fields based on the specified image parameters
+ * and the provided image data buffer.
  *
- * To allocate the buffer and fill in the AVPicture fields in one call,
+ * The picture fields are filled in by using the image data buffer
+ * pointed to by ptr.
+ *
+ * If ptr is NULL, the function will fill only the picture linesize
+ * array and return the required size for the image buffer.
+ *
+ * To allocate an image buffer and fill the picture data in one call,
  * use avpicture_alloc().
  *
- * @param picture AVPicture whose fields are to be filled in
- * @param ptr Buffer which will contain or contains the actual image data
- * @param pix_fmt The format in which the picture data is stored.
- * @param width the width of the image in pixels
- * @param height the height of the image in pixels
- * @return size of the image data in bytes
+ * @param picture       the picture to be filled in
+ * @param ptr           buffer where the image data is stored, or NULL
+ * @param pix_fmt       the pixel format of the image
+ * @param width         the width of the image in pixels
+ * @param height        the height of the image in pixels
+ * @return the size in bytes required for src, a negative error code
+ * in case of failure
+ *
+ * @see av_image_fill_arrays()
  */
-int avpicture_fill(AVPicture *picture, uint8_t *ptr,
+int avpicture_fill(AVPicture *picture, const uint8_t *ptr,
                    enum AVPixelFormat pix_fmt, int width, int height);
 
 /**
  * Copy pixel data from an AVPicture into a buffer.
- * The data is stored compactly, without any gaps for alignment or padding
- * which may be applied by avpicture_fill().
  *
- * @see avpicture_get_size()
+ * avpicture_get_size() can be used to compute the required size for
+ * the buffer to fill.
  *
- * @param[in] src AVPicture containing image data
- * @param[in] pix_fmt The format in which the picture data is stored.
- * @param[in] width the width of the image in pixels.
- * @param[in] height the height of the image in pixels.
- * @param[out] dest A buffer into which picture data will be copied.
- * @param[in] dest_size The size of 'dest'.
- * @return The number of bytes written to dest, or a negative value (error code) on error.
+ * @param src        source picture with filled data
+ * @param pix_fmt    picture pixel format
+ * @param width      picture width
+ * @param height     picture height
+ * @param dest       destination buffer
+ * @param dest_size  destination buffer size in bytes
+ * @return the number of bytes written to dest, or a negative value
+ * (error code) on error, for example if the destination buffer is not
+ * big enough
+ *
+ * @see av_image_copy_to_buffer()
  */
-int avpicture_layout(const AVPicture* src, enum AVPixelFormat pix_fmt,
+int avpicture_layout(const AVPicture *src, enum AVPixelFormat pix_fmt,
                      int width, int height,
                      unsigned char *dest, int dest_size);
 
 /**
  * Calculate the size in bytes that a picture of the given width and height
  * would occupy if stored in the given picture format.
- * Note that this returns the size of a compact representation as generated
- * by avpicture_layout(), which can be smaller than the size required for e.g.
- * avpicture_fill().
  *
- * @param pix_fmt the given picture format
- * @param width the width of the image
- * @param height the height of the image
- * @return Image data size in bytes or -1 on error (e.g. too large dimensions).
+ * @param pix_fmt    picture pixel format
+ * @param width      picture width
+ * @param height     picture height
+ * @return the computed picture buffer size or a negative error code
+ * in case of error
+ *
+ * @see av_image_get_buffer_size().
  */
 int avpicture_get_size(enum AVPixelFormat pix_fmt, int width, int height);
 
@@ -4079,7 +4713,7 @@ int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
                           enum AVPixelFormat pix_fmt, int width, int height);
 #endif
 /**
- * Copy image src to dst. Wraps av_picture_data_copy() above.
+ * Copy image src to dst. Wraps av_image_copy().
  */
 void av_picture_copy(AVPicture *dst, const AVPicture *src,
                      enum AVPixelFormat pix_fmt, int width, int height);
@@ -4117,10 +4751,21 @@ int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width,
  */
 
 /**
- * @deprecated Use av_pix_fmt_get_chroma_sub_sample
+ * Utility function to access log2_chroma_w log2_chroma_h from
+ * the pixel format AVPixFmtDescriptor.
+ *
+ * This function asserts that pix_fmt is valid. See av_pix_fmt_get_chroma_sub_sample
+ * for one that returns a failure code and continues in case of invalid
+ * pix_fmts.
+ *
+ * @param[in]  pix_fmt the pixel format
+ * @param[out] h_shift store log2_chroma_w
+ * @param[out] v_shift store log2_chroma_h
+ *
+ * @see av_pix_fmt_get_chroma_sub_sample
  */
 
-void attribute_deprecated avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift);
+void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift);
 
 /**
  * Return a value representing the fourCC code associated to the
@@ -4129,29 +4774,8 @@ void attribute_deprecated avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_f
  */
 unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat pix_fmt);
 
-#define FF_LOSS_RESOLUTION  0x0001 /**< loss due to resolution change */
-#define FF_LOSS_DEPTH       0x0002 /**< loss due to color depth change */
-#define FF_LOSS_COLORSPACE  0x0004 /**< loss due to color space conversion */
-#define FF_LOSS_ALPHA       0x0008 /**< loss of alpha bits */
-#define FF_LOSS_COLORQUANT  0x0010 /**< loss due to color quantization */
-#define FF_LOSS_CHROMA      0x0020 /**< loss of chroma (e.g. RGB to gray conversion) */
-
-/**
- * Compute what kind of losses will occur when converting from one specific
- * pixel format to another.
- * When converting from one pixel format to another, information loss may occur.
- * For example, when converting from RGB24 to GRAY, the color information will
- * be lost. Similarly, other losses occur when converting from some formats to
- * other formats. These losses can involve loss of chroma, but also loss of
- * resolution, loss of color depth, loss due to the color space conversion, loss
- * of the alpha bits or loss due to color quantization.
- * avcodec_get_fix_fmt_loss() informs you about the various types of losses
- * which will occur when converting from one pixel format to another.
- *
- * @param[in] dst_pix_fmt destination pixel format
- * @param[in] src_pix_fmt source pixel format
- * @param[in] has_alpha Whether the source pixel format alpha channel is used.
- * @return Combination of flags informing you what kind of losses will occur.
+/**
+ * @deprecated see av_get_pix_fmt_loss()
  */
 int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt, enum AVPixelFormat src_pix_fmt,
                              int has_alpha);
@@ -4161,7 +4785,7 @@ int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt, enum AVPixelFormat
  * format.  When converting from one pixel format to another, information loss
  * may occur.  For example, when converting from RGB24 to GRAY, the color
  * information will be lost. Similarly, other losses occur when converting from
- * some formats to other formats. avcodec_find_best_pix_fmt2() searches which of
+ * some formats to other formats. avcodec_find_best_pix_fmt_of_2() searches which of
  * the given pixel formats should be used to suffer the least amount of loss.
  * The pixel formats from which it chooses one, are determined by the
  * pix_fmt_list parameter.
@@ -4173,9 +4797,26 @@ int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt, enum AVPixelFormat
  * @param[out] loss_ptr Combination of flags informing you what kind of losses will occur.
  * @return The best pixel format to convert to or -1 if none was found.
  */
-enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat *pix_fmt_list,
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list,
+                                            enum AVPixelFormat src_pix_fmt,
+                                            int has_alpha, int *loss_ptr);
+
+/**
+ * @deprecated see av_find_best_pix_fmt_of_2()
+ */
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+                                            enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr);
+
+attribute_deprecated
+#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
+enum AVPixelFormat avcodec_find_best_pix_fmt2(const enum AVPixelFormat *pix_fmt_list,
                                               enum AVPixelFormat src_pix_fmt,
                                               int has_alpha, int *loss_ptr);
+#else
+enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+                                            enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr);
+#endif
+
 
 enum AVPixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
 
@@ -4218,7 +4859,12 @@ int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2,
 //FIXME func typedef
 
 /**
- * Fill audio frame data and linesize.
+ * Fill AVFrame audio data and linesize pointers.
+ *
+ * The buffer buf must be a preallocated buffer with a size big enough
+ * to contain the specified samples amount. The filled AVFrame data
+ * pointers will point to this buffer.
+ *
  * AVFrame extended_data channel pointers are allocated if necessary for
  * planar audio.
  *
@@ -4231,7 +4877,9 @@ int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2,
  * @param buf         buffer to use for frame data
  * @param buf_size    size of buffer
  * @param align       plane size sample alignment (0 = default)
- * @return            0 on success, negative error code on failure
+ * @return            >=0 on success, negative error code on failure
+ * @todo return the size in bytes required to store the samples in
+ * case of success, at the next libavutil bump
  */
 int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
                              enum AVSampleFormat sample_fmt, const uint8_t *buf,
@@ -4257,6 +4905,14 @@ void avcodec_flush_buffers(AVCodecContext *avctx);
 int av_get_bits_per_sample(enum AVCodecID codec_id);
 
 /**
+ * Return the PCM codec associated with a sample format.
+ * @param be  endianness, 0 for little, 1 for big,
+ *            -1 (or anything else) for native
+ * @return  AV_CODEC_ID_PCM_* or AV_CODEC_ID_NONE
+ */
+enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be);
+
+/**
  * Return codec bits per sample.
  * Only return non-zero if the bits per sample is exactly correct, not an
  * approximation.
@@ -4296,28 +4952,98 @@ typedef struct AVBitStreamFilter {
     struct AVBitStreamFilter *next;
 } AVBitStreamFilter;
 
+/**
+ * Register a bitstream filter.
+ *
+ * The filter will be accessible to the application code through
+ * av_bitstream_filter_next() or can be directly initialized with
+ * av_bitstream_filter_init().
+ *
+ * @see avcodec_register_all()
+ */
 void av_register_bitstream_filter(AVBitStreamFilter *bsf);
+
+/**
+ * Create and initialize a bitstream filter context given a bitstream
+ * filter name.
+ *
+ * The returned context must be freed with av_bitstream_filter_close().
+ *
+ * @param name    the name of the bitstream filter
+ * @return a bitstream filter context if a matching filter was found
+ * and successfully initialized, NULL otherwise
+ */
 AVBitStreamFilterContext *av_bitstream_filter_init(const char *name);
+
+/**
+ * Filter bitstream.
+ *
+ * This function filters the buffer buf with size buf_size, and places the
+ * filtered buffer in the buffer pointed to by poutbuf.
+ *
+ * The output buffer must be freed by the caller.
+ *
+ * @param bsfc            bitstream filter context created by av_bitstream_filter_init()
+ * @param avctx           AVCodecContext accessed by the filter, may be NULL.
+ *                        If specified, this must point to the encoder context of the
+ *                        output stream the packet is sent to.
+ * @param args            arguments which specify the filter configuration, may be NULL
+ * @param poutbuf         pointer which is updated to point to the filtered buffer
+ * @param poutbuf_size    pointer which is updated to the filtered buffer size in bytes
+ * @param buf             buffer containing the data to filter
+ * @param buf_size        size in bytes of buf
+ * @param keyframe        set to non-zero if the buffer to filter corresponds to a key-frame packet data
+ * @return >= 0 in case of success, or a negative error code in case of failure
+ *
+ * If the return value is positive, an output buffer is allocated and
+ * is available in *poutbuf, and is distinct from the input buffer.
+ *
+ * If the return value is 0, the output buffer is not allocated and
+ * should be considered identical to the input buffer, or in case
+ * *poutbuf was set it points to the input buffer (not necessarily to
+ * its starting address).
+ */
 int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
                                AVCodecContext *avctx, const char *args,
                                uint8_t **poutbuf, int *poutbuf_size,
                                const uint8_t *buf, int buf_size, int keyframe);
+
+/**
+ * Release bitstream filter context.
+ *
+ * @param bsf the bitstream filter context created with
+ * av_bitstream_filter_init(), can be NULL
+ */
 void av_bitstream_filter_close(AVBitStreamFilterContext *bsf);
 
+/**
+ * If f is NULL, return the first registered bitstream filter,
+ * if f is non-NULL, return the next registered bitstream filter
+ * after f, or NULL if f is the last one.
+ *
+ * This function can be used to iterate over all registered bitstream
+ * filters.
+ */
 AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f);
 
 /* memory */
 
 /**
- * Allocate a buffer with padding, reusing the given one if large enough.
- *
  * Same behaviour av_fast_malloc but the buffer has additional
- * FF_INPUT_PADDING_SIZE at the end which will always memset to 0.
+ * FF_INPUT_BUFFER_PADDING_SIZE at the end which will always be 0.
  *
+ * In addition the whole buffer will initially and after resizes
+ * be 0-initialized so that no uninitialized data will ever appear.
  */
 void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size);
 
 /**
+ * Same behaviour av_fast_padded_malloc except that buffer will always
+ * be 0-initialized after call.
+ */
+void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size);
+
+/**
  * Encode extradata length to a buffer. Used by xiph codecs.
  *
  * @param s buffer to write to; must be at least (v/255+1) bytes long
@@ -4329,7 +5055,7 @@ unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
 #if FF_API_MISSING_SAMPLE
 /**
  * Log a generic warning message about a missing feature. This function is
- * intended to be used internally by Libav (libavcodec, libavformat, etc.)
+ * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
  * only, and would normally not be used by applications.
  * @param[in] avc a pointer to an arbitrary struct of which the first field is
  * a pointer to an AVClass struct
@@ -4345,7 +5071,7 @@ void av_log_missing_feature(void *avc, const char *feature, int want_sample);
 
 /**
  * Log a generic warning message asking for a sample. This function is
- * intended to be used internally by Libav (libavcodec, libavformat, etc.)
+ * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
  * only, and would normally not be used by applications.
  * @param[in] avc a pointer to an arbitrary struct of which the first field is
  * a pointer to an AVClass struct
@@ -4385,7 +5111,7 @@ enum AVLockOp {
  * lockmgr should store/get a pointer to a user allocated mutex. It's
  * NULL upon AV_LOCK_CREATE and != NULL for all other ops.
  *
- * @param cb User defined callback. Note: Libav may invoke calls to this
+ * @param cb User defined callback. Note: FFmpeg may invoke calls to this
  *           callback during the call to av_lockmgr_register().
  *           Thus, the application must be prepared to handle that.
  *           If cb is set to NULL the lockmgr will be unregistered.
@@ -4400,6 +5126,12 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op));
 enum AVMediaType avcodec_get_type(enum AVCodecID codec_id);
 
 /**
+ * Get the name of a codec.
+ * @return  a static string identifying the codec; never NULL
+ */
+const char *avcodec_get_name(enum AVCodecID id);
+
+/**
  * @return a positive value if s is open (i.e. avcodec_open2() was called on it
  * with no corresponding avcodec_close()), 0 otherwise.
  */
diff --git a/libavcodec/avcodecres.rc b/libavcodec/avcodecres.rc
new file mode 100644
index 0000000..4b69686
--- /dev/null
+++ b/libavcodec/avcodecres.rc
@@ -0,0 +1,55 @@
+/*
+ * Windows resource file for libavcodec
+ *
+ * Copyright (C) 2012 James Almer
+ * Copyright (C) 2013 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <windows.h>
+#include "libavcodec/version.h"
+#include "libavutil/ffversion.h"
+#include "config.h"
+
+1 VERSIONINFO
+FILEVERSION     LIBAVCODEC_VERSION_MAJOR, LIBAVCODEC_VERSION_MINOR, LIBAVCODEC_VERSION_MICRO, 0
+PRODUCTVERSION  LIBAVCODEC_VERSION_MAJOR, LIBAVCODEC_VERSION_MINOR, LIBAVCODEC_VERSION_MICRO, 0
+FILEFLAGSMASK   VS_FFI_FILEFLAGSMASK
+FILEOS          VOS_NT_WINDOWS32
+FILETYPE        VFT_DLL
+{
+    BLOCK "StringFileInfo"
+    {
+        BLOCK "040904B0"
+        {
+            VALUE "CompanyName",      "FFmpeg Project"
+            VALUE "FileDescription",  "FFmpeg codec library"
+            VALUE "FileVersion",      AV_STRINGIFY(LIBAVCODEC_VERSION)
+            VALUE "InternalName",     "libavcodec"
+            VALUE "LegalCopyright",   "Copyright (C) 2000-" AV_STRINGIFY(CONFIG_THIS_YEAR) " FFmpeg Project"
+            VALUE "OriginalFilename", "avcodec" BUILDSUF "-" AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR) SLIBSUF
+            VALUE "ProductName",      "FFmpeg"
+            VALUE "ProductVersion",   FFMPEG_VERSION
+        }
+    }
+
+    BLOCK "VarFileInfo"
+    {
+        VALUE "Translation", 0x0409, 0x04B0
+    }
+}
diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c
new file mode 100644
index 0000000..2521256
--- /dev/null
+++ b/libavcodec/avdct.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2014 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "idctdsp.h"
+#include "fdctdsp.h"
+#include "pixblockdsp.h"
+#include "avdct.h"
+
+#define OFFSET(x) offsetof(AVDCT,x)
+#define DEFAULT 0 //should be NAN but it does not work as it is not a constant in glibc as required by ANSI/ISO C
+//these names are too long to be readable
+#define V AV_OPT_FLAG_VIDEO_PARAM
+#define A AV_OPT_FLAG_AUDIO_PARAM
+#define E AV_OPT_FLAG_ENCODING_PARAM
+#define D AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption avdct_options[] = {
+{"dct", "DCT algorithm", OFFSET(dct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E, "dct"},
+{"auto", "autoselect a good one (default)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_AUTO }, INT_MIN, INT_MAX, V|E, "dct"},
+{"fastint", "fast integer (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FASTINT }, INT_MIN, INT_MAX, V|E, "dct"},
+{"int", "accurate integer", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_INT }, INT_MIN, INT_MAX, V|E, "dct"},
+{"mmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, "dct"},
+{"altivec", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, "dct"},
+{"faan", "floating point AAN DCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, "dct"},
+
+{"idct", "select IDCT implementation", OFFSET(idct_algo), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|E|D, "idct"},
+{"auto", "autoselect a good one (default)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_AUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"int", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_INT }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simple", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLE }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplemmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"arm", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"altivec", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_ALTIVEC }, INT_MIN, INT_MAX, V|E|D, "idct"},
+#if FF_API_ARCH_SH4
+{"sh4", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SH4 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+#endif
+{"simplearm", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARM }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv5te", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV5TE }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simplearmv6", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEARMV6 }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simpleneon", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLENEON }, INT_MIN, INT_MAX, V|E|D, "idct"},
+#if FF_API_ARCH_ALPHA
+{"simplealpha", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEALPHA }, INT_MIN, INT_MAX, V|E|D, "idct"},
+#endif
+{"ipp", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_IPP }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"xvidmmx", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVIDMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"faani", "floating point AAN IDCT (experimental / for debugging)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
+{"simpleauto", "experimental / for debugging", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEAUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
+{NULL},
+};
+
+static const AVClass avdct_class = {
+    .class_name              = "AVDCT",
+    .option                  = avdct_options,
+    .version                 = LIBAVUTIL_VERSION_INT,
+};
+
+const AVClass *avcodec_dct_get_class(void)
+{
+    return &avdct_class;
+}
+
+AVDCT *avcodec_dct_alloc(void)
+{
+    AVDCT *dsp = av_mallocz(sizeof(AVDCT));
+
+    if (!dsp)
+        return NULL;
+
+    dsp->av_class = &avdct_class;
+    av_opt_set_defaults(dsp);
+
+    return dsp;
+}
+
+int avcodec_dct_init(AVDCT *dsp)
+{
+    AVCodecContext *avctx = avcodec_alloc_context3(NULL);
+
+    if (!avctx)
+        return AVERROR(ENOMEM);
+
+    avctx->idct_algo = dsp->idct_algo;
+    avctx->dct_algo  = dsp->dct_algo;
+
+#define COPY(src, name) memcpy(&dsp->name, &src.name, sizeof(dsp->name))
+
+#if CONFIG_IDCTDSP
+    {
+        IDCTDSPContext idsp;
+        ff_idctdsp_init(&idsp, avctx);
+        COPY(idsp, idct);
+        COPY(idsp, idct_permutation);
+    }
+#endif
+
+#if CONFIG_FDCTDSP
+    {
+        FDCTDSPContext fdsp;
+        ff_fdctdsp_init(&fdsp, avctx);
+        COPY(fdsp, fdct);
+    }
+#endif
+
+#if CONFIG_PIXBLOCKDSP
+    {
+        PixblockDSPContext pdsp;
+        ff_pixblockdsp_init(&pdsp, avctx);
+        COPY(pdsp, get_pixels);
+    }
+#endif
+
+    avcodec_close(avctx);
+    av_free(avctx);
+
+    return 0;
+}
diff --git a/libavcodec/avdct.h b/libavcodec/avdct.h
new file mode 100644
index 0000000..4190203
--- /dev/null
+++ b/libavcodec/avdct.h
@@ -0,0 +1,82 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVDCT_H
+#define AVCODEC_AVDCT_H
+
+#include "libavutil/opt.h"
+
+/**
+ * AVDCT context.
+ * @note function pointers can be NULL if the specific features have been
+ *       disabled at build time.
+ */
+typedef struct AVDCT {
+    const AVClass *av_class;
+
+    void (*idct)(int16_t *block /* align 16 */);
+
+    /**
+     * IDCT input permutation.
+     * Several optimized IDCTs need a permutated input (relative to the
+     * normal order of the reference IDCT).
+     * This permutation must be performed before the idct_put/add.
+     * Note, normally this can be merged with the zigzag/alternate scan<br>
+     * An example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference IDCT -> ...)
+     * - (x -> reference DCT -> reference IDCT -> x)
+     * - (x -> reference DCT -> simple_mmx_perm = idct_permutation
+     *    -> simple_idct_mmx -> x)
+     * - (-> decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant
+     *    -> simple_idct_mmx -> ...)
+     */
+    uint8_t idct_permutation[64];
+
+    void (*fdct)(int16_t *block /* align 16 */);
+
+
+    /**
+     * DCT algorithm.
+     * must use AVOptions to set this field.
+     */
+    int dct_algo;
+
+    /**
+     * IDCT algorithm.
+     * must use AVOptions to set this field.
+     */
+    int idct_algo;
+
+    void (*get_pixels)(int16_t *block /* align 16 */,
+                       const uint8_t *pixels /* align 8 */,
+                       int line_size);
+} AVDCT;
+
+/**
+ * Allocates a AVDCT context.
+ * This needs to be initialized with avcodec_dct_init() after optionally
+ * configuring it with AVOptions.
+ *
+ * To free it use av_free()
+ */
+AVDCT *avcodec_dct_alloc(void);
+int avcodec_dct_init(AVDCT *);
+
+const AVClass *avcodec_dct_get_class(void);
+
+#endif /* AVCODEC_AVDCT_H */
diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 513f57e..2200f37 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@
 
 FFTContext *av_fft_init(int nbits, int inverse)
 {
-    FFTContext *s = av_malloc(sizeof(*s));
+    FFTContext *s = av_mallocz(sizeof(*s));
 
     if (s && ff_fft_init(s, nbits, inverse))
         av_freep(&s);
diff --git a/libavcodec/avfft.h b/libavcodec/avfft.h
index e2e727d..0c0f9b8 100644
--- a/libavcodec/avfft.h
+++ b/libavcodec/avfft.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
index 25eabdb..a87e8e3 100644
--- a/libavcodec/avpacket.c
+++ b/libavcodec/avpacket.c
@@ -2,20 +2,20 @@
  * AVPacket functions for libavcodec
  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,9 @@
 #include "libavutil/mathematics.h"
 #include "libavutil/mem.h"
 #include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
 #if FF_API_DESTRUCT_PACKET
 
 void av_destruct_packet(AVPacket *pkt)
@@ -187,43 +190,55 @@ do {                                         \
         dst = data;                                                     \
     } while (0)
 
-int av_dup_packet(AVPacket *pkt)
+/* Makes duplicates of data, side_data, but does not copy any other fields */
+static int copy_packet_data(AVPacket *pkt, const AVPacket *src, int dup)
 {
-    AVPacket tmp_pkt;
-
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (!pkt->buf && pkt->data
-#if FF_API_DESTRUCT_PACKET
-        && !pkt->destruct
-#endif
-        ) {
-FF_ENABLE_DEPRECATION_WARNINGS
-        tmp_pkt = *pkt;
-
-        pkt->data      = NULL;
-        pkt->side_data = NULL;
-        DUP_DATA(pkt->data, tmp_pkt.data, pkt->size, 1, ALLOC_BUF);
+    pkt->data      = NULL;
+    pkt->side_data = NULL;
+    if (pkt->buf) {
+        AVBufferRef *ref = av_buffer_ref(src->buf);
+        if (!ref)
+            return AVERROR(ENOMEM);
+        pkt->buf  = ref;
+        pkt->data = ref->data;
+    } else {
+        DUP_DATA(pkt->data, src->data, pkt->size, 1, ALLOC_BUF);
+    }
 #if FF_API_DESTRUCT_PACKET
 FF_DISABLE_DEPRECATION_WARNINGS
-        pkt->destruct = dummy_destruct_packet;
+    pkt->destruct = dummy_destruct_packet;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
+    if (pkt->side_data_elems && dup)
+        pkt->side_data = src->side_data;
+    if (pkt->side_data_elems && !dup) {
+        return av_copy_packet_side_data(pkt, src);
+    }
+    return 0;
 
-        if (pkt->side_data_elems) {
-            int i;
+failed_alloc:
+    av_free_packet(pkt);
+    return AVERROR(ENOMEM);
+}
 
-            DUP_DATA(pkt->side_data, tmp_pkt.side_data,
-                     pkt->side_data_elems * sizeof(*pkt->side_data), 0, ALLOC_MALLOC);
+int av_copy_packet_side_data(AVPacket *pkt, const AVPacket *src)
+{
+    if (src->side_data_elems) {
+        int i;
+        DUP_DATA(pkt->side_data, src->side_data,
+                src->side_data_elems * sizeof(*src->side_data), 0, ALLOC_MALLOC);
+        if (src != pkt) {
             memset(pkt->side_data, 0,
-                   pkt->side_data_elems * sizeof(*pkt->side_data));
-            for (i = 0; i < pkt->side_data_elems; i++) {
-                DUP_DATA(pkt->side_data[i].data, tmp_pkt.side_data[i].data,
-                         tmp_pkt.side_data[i].size, 1, ALLOC_MALLOC);
-                pkt->side_data[i].size = tmp_pkt.side_data[i].size;
-                pkt->side_data[i].type = tmp_pkt.side_data[i].type;
-            }
+                   src->side_data_elems * sizeof(*src->side_data));
+        }
+        for (i = 0; i < src->side_data_elems; i++) {
+            DUP_DATA(pkt->side_data[i].data, src->side_data[i].data,
+                    src->side_data[i].size, 1, ALLOC_MALLOC);
+            pkt->side_data[i].size = src->side_data[i].size;
+            pkt->side_data[i].type = src->side_data[i].type;
         }
     }
+    pkt->side_data_elems = src->side_data_elems;
     return 0;
 
 failed_alloc:
@@ -231,6 +246,29 @@ failed_alloc:
     return AVERROR(ENOMEM);
 }
 
+int av_dup_packet(AVPacket *pkt)
+{
+    AVPacket tmp_pkt;
+
+FF_DISABLE_DEPRECATION_WARNINGS
+    if (!pkt->buf && pkt->data
+#if FF_API_DESTRUCT_PACKET
+        && !pkt->destruct
+#endif
+        ) {
+FF_ENABLE_DEPRECATION_WARNINGS
+        tmp_pkt = *pkt;
+        return copy_packet_data(pkt, &tmp_pkt, 1);
+    }
+    return 0;
+}
+
+int av_copy_packet(AVPacket *dst, const AVPacket *src)
+{
+    *dst = *src;
+    return copy_packet_data(dst, src, 0);
+}
+
 void av_packet_free_side_data(AVPacket *pkt)
 {
     int i;
@@ -274,7 +312,7 @@ uint8_t *av_packet_new_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
     if (!pkt->side_data)
         return NULL;
 
-    pkt->side_data[elems].data = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+    pkt->side_data[elems].data = av_mallocz(size + FF_INPUT_BUFFER_PADDING_SIZE);
     if (!pkt->side_data[elems].data)
         return NULL;
     pkt->side_data[elems].size = size;
@@ -299,6 +337,150 @@ uint8_t *av_packet_get_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
     return NULL;
 }
 
+#define FF_MERGE_MARKER 0x8c4d9d108e25e9feULL
+
+int av_packet_merge_side_data(AVPacket *pkt){
+    if(pkt->side_data_elems){
+        AVBufferRef *buf;
+        int i;
+        uint8_t *p;
+        uint64_t size= pkt->size + 8LL + FF_INPUT_BUFFER_PADDING_SIZE;
+        AVPacket old= *pkt;
+        for (i=0; i<old.side_data_elems; i++) {
+            size += old.side_data[i].size + 5LL;
+        }
+        if (size > INT_MAX)
+            return AVERROR(EINVAL);
+        buf = av_buffer_alloc(size);
+        if (!buf)
+            return AVERROR(ENOMEM);
+        pkt->buf = buf;
+        pkt->data = p = buf->data;
+#if FF_API_DESTRUCT_PACKET
+FF_DISABLE_DEPRECATION_WARNINGS
+        pkt->destruct = dummy_destruct_packet;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        pkt->size = size - FF_INPUT_BUFFER_PADDING_SIZE;
+        bytestream_put_buffer(&p, old.data, old.size);
+        for (i=old.side_data_elems-1; i>=0; i--) {
+            bytestream_put_buffer(&p, old.side_data[i].data, old.side_data[i].size);
+            bytestream_put_be32(&p, old.side_data[i].size);
+            *p++ = old.side_data[i].type | ((i==old.side_data_elems-1)*128);
+        }
+        bytestream_put_be64(&p, FF_MERGE_MARKER);
+        av_assert0(p-pkt->data == pkt->size);
+        memset(p, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+        av_free_packet(&old);
+        pkt->side_data_elems = 0;
+        pkt->side_data = NULL;
+        return 1;
+    }
+    return 0;
+}
+
+int av_packet_split_side_data(AVPacket *pkt){
+    if (!pkt->side_data_elems && pkt->size >12 && AV_RB64(pkt->data + pkt->size - 8) == FF_MERGE_MARKER){
+        int i;
+        unsigned int size;
+        uint8_t *p;
+
+        p = pkt->data + pkt->size - 8 - 5;
+        for (i=1; ; i++){
+            size = AV_RB32(p);
+            if (size>INT_MAX || p - pkt->data < size)
+                return 0;
+            if (p[4]&128)
+                break;
+            p-= size+5;
+        }
+
+        pkt->side_data = av_malloc_array(i, sizeof(*pkt->side_data));
+        if (!pkt->side_data)
+            return AVERROR(ENOMEM);
+
+        p= pkt->data + pkt->size - 8 - 5;
+        for (i=0; ; i++){
+            size= AV_RB32(p);
+            av_assert0(size<=INT_MAX && p - pkt->data >= size);
+            pkt->side_data[i].data = av_mallocz(size + FF_INPUT_BUFFER_PADDING_SIZE);
+            pkt->side_data[i].size = size;
+            pkt->side_data[i].type = p[4]&127;
+            if (!pkt->side_data[i].data)
+                return AVERROR(ENOMEM);
+            memcpy(pkt->side_data[i].data, p-size, size);
+            pkt->size -= size + 5;
+            if(p[4]&128)
+                break;
+            p-= size+5;
+        }
+        pkt->size -= 8;
+        pkt->side_data_elems = i+1;
+        return 1;
+    }
+    return 0;
+}
+
+uint8_t *av_packet_pack_dictionary(AVDictionary *dict, int *size)
+{
+    AVDictionaryEntry *t = NULL;
+    uint8_t *data = NULL;
+    *size = 0;
+
+    if (!dict)
+        return NULL;
+
+    while ((t = av_dict_get(dict, "", t, AV_DICT_IGNORE_SUFFIX))) {
+        const size_t keylen   = strlen(t->key);
+        const size_t valuelen = strlen(t->value);
+        const size_t new_size = *size + keylen + 1 + valuelen + 1;
+        uint8_t *const new_data = av_realloc(data, new_size);
+
+        if (!new_data)
+            goto fail;
+        data = new_data;
+        if (new_size > INT_MAX)
+            goto fail;
+
+        memcpy(data + *size, t->key, keylen + 1);
+        memcpy(data + *size + keylen + 1, t->value, valuelen + 1);
+
+        *size = new_size;
+    }
+
+    return data;
+
+fail:
+    av_freep(&data);
+    *size = 0;
+    return NULL;
+}
+
+int av_packet_unpack_dictionary(const uint8_t *data, int size, AVDictionary **dict)
+{
+    const uint8_t *end = data + size;
+    int ret = 0;
+
+    if (!dict || !data || !size)
+        return ret;
+    if (size && end[-1])
+        return AVERROR_INVALIDDATA;
+    while (data < end) {
+        const uint8_t *key = data;
+        const uint8_t *val = data + strlen(key) + 1;
+
+        if (val >= end)
+            return AVERROR_INVALIDDATA;
+
+        ret = av_dict_set(dict, key, val, 0);
+        if (ret < 0)
+            break;
+        data = val + strlen(val) + 1;
+    }
+
+    return ret;
+}
+
 int av_packet_shrink_side_data(AVPacket *pkt, enum AVPacketSideDataType type,
                                int size)
 {
@@ -352,7 +534,7 @@ void av_packet_unref(AVPacket *pkt)
     pkt->size = 0;
 }
 
-int av_packet_ref(AVPacket *dst, AVPacket *src)
+int av_packet_ref(AVPacket *dst, const AVPacket *src)
 {
     int ret;
 
diff --git a/libavcodec/avpicture.c b/libavcodec/avpicture.c
index a50bbc4..a6f89ef 100644
--- a/libavcodec/avpicture.c
+++ b/libavcodec/avpicture.c
@@ -2,20 +2,20 @@
  * AVPicture management routines
  * Copyright (c) 2001, 2002, 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,71 +31,24 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/colorspace.h"
 
-int avpicture_fill(AVPicture *picture, uint8_t *ptr,
+int avpicture_fill(AVPicture *picture, const uint8_t *ptr,
                    enum AVPixelFormat pix_fmt, int width, int height)
 {
-    int ret;
-
-    if ((ret = av_image_check_size(width, height, 0, NULL)) < 0)
-        return ret;
-
-    if ((ret = av_image_fill_linesizes(picture->linesize, pix_fmt, width)) < 0)
-        return ret;
-
-    return av_image_fill_pointers(picture->data, pix_fmt,
-                                  height, ptr, picture->linesize);
+    return av_image_fill_arrays(picture->data, picture->linesize,
+                                ptr, pix_fmt, width, height, 1);
 }
 
-int avpicture_layout(const AVPicture* src, enum AVPixelFormat pix_fmt,
-                     int width, int height,
+int avpicture_layout(const AVPicture* src, enum AVPixelFormat pix_fmt, int width, int height,
                      unsigned char *dest, int dest_size)
 {
-    int i, j, nb_planes = 0, linesizes[4];
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    int size = avpicture_get_size(pix_fmt, width, height);
-
-    if (size > dest_size || size < 0)
-        return AVERROR(EINVAL);
-
-    for (i = 0; i < desc->nb_components; i++)
-        nb_planes = FFMAX(desc->comp[i].plane, nb_planes);
-
-    nb_planes++;
-
-    av_image_fill_linesizes(linesizes, pix_fmt, width);
-    for (i = 0; i < nb_planes; i++) {
-        int h, shift = (i == 1 || i == 2) ? desc->log2_chroma_h : 0;
-        const unsigned char *s = src->data[i];
-        h = (height + (1 << shift) - 1) >> shift;
-
-        for (j = 0; j < h; j++) {
-            memcpy(dest, s, linesizes[i]);
-            dest += linesizes[i];
-            s += src->linesize[i];
-        }
-    }
-
-    if (desc->flags & AV_PIX_FMT_FLAG_PAL)
-        memcpy((unsigned char *)(((size_t)dest + 3) & ~3),
-               src->data[1], 256 * 4);
-
-    return size;
+    return av_image_copy_to_buffer(dest, dest_size,
+                                   (const uint8_t * const*)src->data, src->linesize,
+                                   pix_fmt, width, height, 1);
 }
 
 int avpicture_get_size(enum AVPixelFormat pix_fmt, int width, int height)
 {
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
-    AVPicture dummy_pict;
-    int ret;
-
-    if (!desc)
-        return AVERROR(EINVAL);
-    if ((ret = av_image_check_size(width, height, 0, NULL)) < 0)
-        return ret;
-    if (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
-        // do not include palette for these pseudo-paletted formats
-        return width * height;
-    return avpicture_fill(&dummy_pict, NULL, pix_fmt, width, height);
+    return av_image_get_buffer_size(pix_fmt, width, height, 1);
 }
 
 int avpicture_alloc(AVPicture *picture,
@@ -119,7 +72,7 @@ void avpicture_free(AVPicture *picture)
 void av_picture_copy(AVPicture *dst, const AVPicture *src,
                      enum AVPixelFormat pix_fmt, int width, int height)
 {
-    av_image_copy(dst->data, dst->linesize, src->data,
+    av_image_copy(dst->data, dst->linesize, (const uint8_t **)src->data,
                   src->linesize, pix_fmt, width, height);
 }
 
diff --git a/libavcodec/avr32/mathops.h b/libavcodec/avr32/mathops.h
index 528b7ad..85f42b5 100644
--- a/libavcodec/avr32/mathops.h
+++ b/libavcodec/avr32/mathops.h
@@ -2,20 +2,20 @@
  * Simple math operations
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/avrndec.c b/libavcodec/avrndec.c
new file mode 100644
index 0000000..7a50a5c
--- /dev/null
+++ b/libavcodec/avrndec.c
@@ -0,0 +1,131 @@
+/*
+ * AVRn decoder
+ * Copyright (c) 2012 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "mjpeg.h"
+#include "mjpegdec.h"
+#include "libavutil/imgutils.h"
+
+typedef struct {
+    MJpegDecodeContext mjpeg_ctx;
+    int is_mjpeg;
+    int interlace; //FIXME use frame.interlaced_frame
+    int tff;
+} AVRnContext;
+
+static av_cold int init(AVCodecContext *avctx)
+{
+    AVRnContext *a = avctx->priv_data;
+    int ret;
+
+    // Support "Resolution 1:1" for Avid AVI Codec
+    a->is_mjpeg = avctx->extradata_size < 31 || memcmp(&avctx->extradata[28], "1:1", 3);
+
+    if(!a->is_mjpeg && avctx->lowres) {
+        av_log(avctx, AV_LOG_ERROR, "lowres is not possible with rawvideo\n");
+        return AVERROR(EINVAL);
+    }
+
+    if(a->is_mjpeg)
+        return ff_mjpeg_decode_init(avctx);
+
+    if ((ret = av_image_check_size(avctx->width, avctx->height, 0, avctx)) < 0)
+        return ret;
+
+    avctx->pix_fmt = AV_PIX_FMT_UYVY422;
+
+    if(avctx->extradata_size >= 9 && avctx->extradata[4]+28 < avctx->extradata_size) {
+        int ndx = avctx->extradata[4] + 4;
+        a->interlace = !memcmp(avctx->extradata + ndx, "1:1(", 4);
+        if(a->interlace) {
+            a->tff = avctx->extradata[ndx + 24] == 1;
+        }
+    }
+
+    return 0;
+}
+
+static av_cold int end(AVCodecContext *avctx)
+{
+    AVRnContext *a = avctx->priv_data;
+
+    if(a->is_mjpeg)
+        ff_mjpeg_decode_end(avctx);
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame, AVPacket *avpkt)
+{
+    AVRnContext *a = avctx->priv_data;
+    AVFrame *p = data;
+    const uint8_t *buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    int y, ret, true_height;
+
+    if(a->is_mjpeg)
+        return ff_mjpeg_decode_frame(avctx, data, got_frame, avpkt);
+
+    true_height    = buf_size / (2*avctx->width);
+
+    if(buf_size < 2*avctx->width * avctx->height) {
+        av_log(avctx, AV_LOG_ERROR, "packet too small\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
+        return ret;
+    p->pict_type= AV_PICTURE_TYPE_I;
+    p->key_frame= 1;
+
+    if(a->interlace) {
+        buf += (true_height - avctx->height)*avctx->width;
+        for(y = 0; y < avctx->height-1; y+=2) {
+            memcpy(p->data[0] + (y+ a->tff)*p->linesize[0], buf                             , 2*avctx->width);
+            memcpy(p->data[0] + (y+!a->tff)*p->linesize[0], buf + avctx->width*true_height+4, 2*avctx->width);
+            buf += 2*avctx->width;
+        }
+    } else {
+        buf += (true_height - avctx->height)*avctx->width*2;
+        for(y = 0; y < avctx->height; y++) {
+            memcpy(p->data[0] + y*p->linesize[0], buf, 2*avctx->width);
+            buf += 2*avctx->width;
+        }
+    }
+
+    *got_frame      = 1;
+    return buf_size;
+}
+
+AVCodec ff_avrn_decoder = {
+    .name           = "avrn",
+    .long_name      = NULL_IF_CONFIG_SMALL("Avid AVI Codec"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AVRN,
+    .priv_data_size = sizeof(AVRnContext),
+    .init           = init,
+    .close          = end,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
+};
diff --git a/libavcodec/avs.c b/libavcodec/avs.c
index 53e3320..c4eaf20 100644
--- a/libavcodec/avs.c
+++ b/libavcodec/avs.c
@@ -2,20 +2,20 @@
  * AVS video decoder.
  * Copyright (c) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -58,12 +58,10 @@ avs_decode_frame(AVCodecContext * avctx,
     int i, j, x, y, stride, ret, vect_w = 3, vect_h = 3;
     AvsVideoSubType sub_type;
     AvsBlockType type;
-    GetBitContext change_map;
+    GetBitContext change_map = {0}; //init to silence warning
 
-    if ((ret = ff_reget_buffer(avctx, p)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, p)) < 0)
         return ret;
-    }
     p->pict_type = AV_PICTURE_TYPE_P;
     p->key_frame = 0;
 
@@ -85,8 +83,10 @@ avs_decode_frame(AVCodecContext * avctx,
         if (first >= 256 || last > 256 || buf_end - buf < 4 + 4 + 3 * (last - first))
             return AVERROR_INVALIDDATA;
         buf += 4;
-        for (i=first; i<last; i++, buf+=3)
+        for (i=first; i<last; i++, buf+=3) {
             pal[i] = (buf[0] << 18) | (buf[1] << 10) | (buf[2] << 2);
+            pal[i] |= 0xFFU << 24 | (pal[i] >> 6) & 0x30303;
+        }
 
         sub_type = buf[0];
         type = buf[1];
diff --git a/libavcodec/avuidec.c b/libavcodec/avuidec.c
new file mode 100644
index 0000000..7fb644c
--- /dev/null
+++ b/libavcodec/avuidec.c
@@ -0,0 +1,130 @@
+/*
+ * AVID Meridien decoder
+ *
+ * Copyright (c) 2012 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "libavutil/intreadwrite.h"
+
+static av_cold int avui_decode_init(AVCodecContext *avctx)
+{
+    avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
+    return 0;
+}
+
+static int avui_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame, AVPacket *avpkt)
+{
+    int ret;
+    AVFrame *pic = data;
+    const uint8_t *src = avpkt->data, *extradata = avctx->extradata;
+    const uint8_t *srca;
+    uint8_t *y, *u, *v, *a;
+    int transparent, interlaced = 1, skip, opaque_length, i, j, k;
+    uint32_t extradata_size = avctx->extradata_size;
+
+    while (extradata_size >= 24) {
+        uint32_t atom_size = AV_RB32(extradata);
+        if (!memcmp(&extradata[4], "APRGAPRG0001", 12)) {
+            interlaced = extradata[19] != 1;
+            break;
+        }
+        if (atom_size && atom_size <= extradata_size) {
+            extradata      += atom_size;
+            extradata_size -= atom_size;
+        } else {
+            break;
+        }
+    }
+    if (avctx->height == 486) {
+        skip = 10;
+    } else {
+        skip = 16;
+    }
+    opaque_length = 2 * avctx->width * (avctx->height + skip) + 4 * interlaced;
+    if (avpkt->size < opaque_length) {
+        av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
+        return AVERROR(EINVAL);
+    }
+    transparent = avctx->bits_per_coded_sample == 32 &&
+                  avpkt->size >= opaque_length * 2 + 4;
+    srca = src + opaque_length + 5;
+
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
+
+    pic->key_frame = 1;
+    pic->pict_type = AV_PICTURE_TYPE_I;
+
+    if (!interlaced) {
+        src  += avctx->width * skip;
+        srca += avctx->width * skip;
+    }
+
+    for (i = 0; i < interlaced + 1; i++) {
+        src  += avctx->width * skip;
+        srca += avctx->width * skip;
+        if (interlaced && avctx->height == 486) {
+            y = pic->data[0] + (1 - i) * pic->linesize[0];
+            u = pic->data[1] + (1 - i) * pic->linesize[1];
+            v = pic->data[2] + (1 - i) * pic->linesize[2];
+            a = pic->data[3] + (1 - i) * pic->linesize[3];
+        } else {
+            y = pic->data[0] + i * pic->linesize[0];
+            u = pic->data[1] + i * pic->linesize[1];
+            v = pic->data[2] + i * pic->linesize[2];
+            a = pic->data[3] + i * pic->linesize[3];
+        }
+
+        for (j = 0; j < avctx->height >> interlaced; j++) {
+            for (k = 0; k < avctx->width >> 1; k++) {
+                u[    k    ] = *src++;
+                y[2 * k    ] = *src++;
+                a[2 * k    ] = 0xFF - (transparent ? *srca++ : 0);
+                srca++;
+                v[    k    ] = *src++;
+                y[2 * k + 1] = *src++;
+                a[2 * k + 1] = 0xFF - (transparent ? *srca++ : 0);
+                srca++;
+            }
+
+            y += (interlaced + 1) * pic->linesize[0];
+            u += (interlaced + 1) * pic->linesize[1];
+            v += (interlaced + 1) * pic->linesize[2];
+            a += (interlaced + 1) * pic->linesize[3];
+        }
+        src  += 4;
+        srca += 4;
+    }
+    *got_frame       = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_avui_decoder = {
+    .name         = "avui",
+    .long_name    = NULL_IF_CONFIG_SMALL("Avid Meridien Uncompressed"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_AVUI,
+    .init         = avui_decode_init,
+    .decode       = avui_decode_frame,
+    .capabilities = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/avuienc.c b/libavcodec/avuienc.c
new file mode 100644
index 0000000..700b8cb
--- /dev/null
+++ b/libavcodec/avuienc.c
@@ -0,0 +1,113 @@
+/*
+ * AVID Meridien encoder
+ *
+ * Copyright (c) 2012 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int avui_encode_init(AVCodecContext *avctx)
+{
+    avctx->coded_frame = av_frame_alloc();
+
+    if (avctx->width != 720 || avctx->height != 486 && avctx->height != 576) {
+        av_log(avctx, AV_LOG_ERROR, "Only 720x486 and 720x576 are supported.\n");
+        return AVERROR(EINVAL);
+    }
+    if (!avctx->coded_frame) {
+        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
+        return AVERROR(ENOMEM);
+    }
+    if (!(avctx->extradata = av_mallocz(24 + FF_INPUT_BUFFER_PADDING_SIZE)))
+        return AVERROR(ENOMEM);
+    avctx->extradata_size = 24;
+    memcpy(avctx->extradata, "\0\0\0\x18""APRGAPRG0001", 16);
+    if (avctx->field_order > AV_FIELD_PROGRESSIVE) {
+        avctx->extradata[19] = 2;
+    } else {
+        avctx->extradata[19] = 1;
+    }
+
+
+    return 0;
+}
+
+static int avui_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                             const AVFrame *pic, int *got_packet)
+{
+    uint8_t *dst;
+    int i, j, skip, ret, size, interlaced;
+
+    interlaced = avctx->field_order > AV_FIELD_PROGRESSIVE;
+
+    if (avctx->height == 486) {
+        skip = 10;
+    } else {
+        skip = 16;
+    }
+    size = 2 * avctx->width * (avctx->height + skip) + 8 * interlaced;
+    if ((ret = ff_alloc_packet2(avctx, pkt, size)) < 0)
+        return ret;
+    dst = pkt->data;
+    if (!interlaced) {
+        dst += avctx->width * skip;
+    }
+
+    avctx->coded_frame->key_frame = 1;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
+    for (i = 0; i <= interlaced; i++) {
+        uint8_t *src;
+        if (interlaced && avctx->height == 486) {
+            src = pic->data[0] + (1 - i) * pic->linesize[0];
+        } else {
+            src = pic->data[0] + i * pic->linesize[0];
+        }
+        dst += avctx->width * skip + 4 * i;
+        for (j = 0; j < avctx->height; j += interlaced + 1) {
+            memcpy(dst, src, avctx->width * 2);
+            src += (interlaced + 1) * pic->linesize[0];
+            dst += avctx->width * 2;
+        }
+    }
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int avui_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+AVCodec ff_avui_encoder = {
+    .name         = "avui",
+    .long_name    = NULL_IF_CONFIG_SMALL("Avid Meridien Uncompressed"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_AVUI,
+    .init         = avui_encode_init,
+    .encode2      = avui_encode_frame,
+    .close        = avui_encode_close,
+    .capabilities = CODEC_CAP_EXPERIMENTAL,
+    .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_UYVY422, AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/bethsoftvideo.c b/libavcodec/bethsoftvideo.c
index 7e93a27..37cd22e 100644
--- a/libavcodec/bethsoftvideo.c
+++ b/libavcodec/bethsoftvideo.c
@@ -2,20 +2,20 @@
  * Bethesda VID video decoder
  * Copyright (C) 2007 Nicholas Tung
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,7 +59,8 @@ static int set_palette(BethsoftvidContext *ctx)
         return AVERROR_INVALIDDATA;
 
     for(a = 0; a < 256; a++){
-        palette[a] = bytestream2_get_be24u(&ctx->g) * 4;
+        palette[a] = 0xFFU << 24 | bytestream2_get_be24u(&ctx->g) * 4;
+        palette[a] |= palette[a] >> 6 & 0x30303;
     }
     ctx->frame->palette_has_changed = 1;
     return 0;
@@ -78,10 +79,8 @@ static int bethsoftvid_decode_frame(AVCodecContext *avctx,
     int code, ret;
     int yoffset;
 
-    if ((ret = ff_reget_buffer(avctx, vid->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, vid->frame)) < 0)
         return ret;
-    }
     wrap_to_next_line = vid->frame->linesize[0] - avctx->width;
 
     if (avpkt->side_data_elems > 0 &&
diff --git a/libavcodec/bethsoftvideo.h b/libavcodec/bethsoftvideo.h
index 5cbbdfd..d5b5d0a 100644
--- a/libavcodec/bethsoftvideo.h
+++ b/libavcodec/bethsoftvideo.h
@@ -2,20 +2,20 @@
  * Bethesda VID video decoder
  * Copyright (C) 2007 Nicholas Tung
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/bfi.c b/libavcodec/bfi.c
index 75b6710..c7ac378 100644
--- a/libavcodec/bfi.c
+++ b/libavcodec/bfi.c
@@ -2,20 +2,20 @@
  * Brute Force & Ignorance (BFI) video decoder
  * Copyright (c) 2008 Sisir Koppaka
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,7 @@
 typedef struct BFIContext {
     AVCodecContext *avctx;
     uint8_t *dst;
+    uint32_t pal[256];
 } BFIContext;
 
 static av_cold int bfi_decode_init(AVCodecContext *avctx)
@@ -41,6 +42,8 @@ static av_cold int bfi_decode_init(AVCodecContext *avctx)
     BFIContext *bfi = avctx->priv_data;
     avctx->pix_fmt  = AV_PIX_FMT_PAL8;
     bfi->dst        = av_mallocz(avctx->width * avctx->height);
+    if (!bfi->dst)
+        return AVERROR(ENOMEM);
     return 0;
 }
 
@@ -57,10 +60,8 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
     uint32_t *pal;
     int i, j, ret, height = avctx->height;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     bytestream2_init(&g, avpkt->data, buf_size);
 
@@ -76,16 +77,19 @@ static int bfi_decode_frame(AVCodecContext *avctx, void *data,
         pal = (uint32_t *)frame->data[1];
         for (i = 0; i < avctx->extradata_size / 3; i++) {
             int shift = 16;
-            *pal = 0;
+            *pal = 0xFFU << 24;
             for (j = 0; j < 3; j++, shift -= 8)
                 *pal += ((avctx->extradata[i * 3 + j] << 2) |
                          (avctx->extradata[i * 3 + j] >> 4)) << shift;
             pal++;
         }
+        memcpy(bfi->pal, frame->data[1], sizeof(bfi->pal));
         frame->palette_has_changed = 1;
     } else {
         frame->pict_type = AV_PICTURE_TYPE_P;
         frame->key_frame = 0;
+        frame->palette_has_changed = 0;
+        memcpy(frame->data[1], bfi->pal, sizeof(bfi->pal));
     }
 
     bytestream2_skip(&g, 4); // Unpacked size, not required.
diff --git a/libavcodec/bfin/README b/libavcodec/bfin/README
new file mode 100644
index 0000000..afb3461
--- /dev/null
+++ b/libavcodec/bfin/README
@@ -0,0 +1,6 @@
+BFIN optimizations have been removed in
+commit 880e2aa23645ed9871c66ee1cbd00f93c72d2d73
+The last revission with the optimizations is fa4e17c14035ebf43130fb369e1728cdd98d0b72
+
+If you want to maintain these (or other) BFIN optimizations in ffmpeg, then please
+contact ffmpeg-devel@ffmpeg.org
diff --git a/libavcodec/bgmc.c b/libavcodec/bgmc.c
index c7f732e..1a6817b 100644
--- a/libavcodec/bgmc.c
+++ b/libavcodec/bgmc.c
@@ -1,28 +1,28 @@
 /*
  * Block Gilbert-Moore decoder
- * Copyright (c) 2010 Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
+ * Copyright (c) 2010 Thilo Borgmann <thilo.borgmann _at_ mail.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * Block Gilbert-Moore decoder as used by MPEG-4 ALS
- * @author Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
+ * @author Thilo Borgmann <thilo.borgmann _at_ mail.de>
  */
 
 #include "libavutil/attributes.h"
@@ -460,8 +460,8 @@ static uint8_t *bgmc_lut_getp(uint8_t *lut, int *lut_status, int delta)
 av_cold int ff_bgmc_init(AVCodecContext *avctx,
                          uint8_t **cf_lut, int **cf_lut_status)
 {
-    *cf_lut        = av_malloc(sizeof(*cf_lut)        * LUT_BUFF * 16 * LUT_SIZE);
-    *cf_lut_status = av_malloc(sizeof(*cf_lut_status) * LUT_BUFF);
+    *cf_lut        = av_malloc(sizeof(**cf_lut)        * LUT_BUFF * 16 * LUT_SIZE);
+    *cf_lut_status = av_malloc(sizeof(**cf_lut_status) * LUT_BUFF);
 
     if (!*cf_lut || !*cf_lut_status) {
         ff_bgmc_end(cf_lut, cf_lut_status);
@@ -469,7 +469,7 @@ av_cold int ff_bgmc_init(AVCodecContext *avctx,
         return AVERROR(ENOMEM);
     } else {
         // initialize lut_status buffer to a value never used to compare against
-        memset(*cf_lut_status, -1, sizeof(*cf_lut_status) * LUT_BUFF);
+        memset(*cf_lut_status, -1, sizeof(**cf_lut_status) * LUT_BUFF);
     }
 
     return 0;
diff --git a/libavcodec/bgmc.h b/libavcodec/bgmc.h
index 3d5b490..4893736 100644
--- a/libavcodec/bgmc.h
+++ b/libavcodec/bgmc.h
@@ -1,28 +1,28 @@
 /*
  * Block Gilbert-Moore decoder
- * Copyright (c) 2010 Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
+ * Copyright (c) 2010 Thilo Borgmann <thilo.borgmann _at_ mail.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * Block Gilbert-Moore decoder header
- * @author Thilo Borgmann <thilo.borgmann _at_ googlemail.com>
+ * @author Thilo Borgmann <thilo.borgmann _at_ mail.de>
  */
 
 
diff --git a/libavcodec/bink.c b/libavcodec/bink.c
index e34585b..bc3d25c 100644
--- a/libavcodec/bink.c
+++ b/libavcodec/bink.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2009 Konstantin Shishkov
  * Copyright (C) 2011 Peter Ross <pross@xvid.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -120,6 +120,7 @@ typedef struct BinkContext {
     int            version;              ///< internal Bink file version
     int            has_alpha;
     int            swap_planes;
+    unsigned       frame_num;
 
     Bundle         bundle[BINKB_NB_SRC]; ///< bundles for decoding all data types
     Tree           col_high[16];         ///< trees for decoding high nibble in "colours" data type
@@ -143,7 +144,7 @@ enum BlockTypes {
 };
 
 /**
- * Initialize length length in all bundles.
+ * Initialize length in all bundles.
  *
  * @param c     decoder context
  * @param width plane width
@@ -174,7 +175,7 @@ static void init_lengths(BinkContext *c, int width, int bw)
  *
  * @param c decoder context
  */
-static av_cold void init_bundles(BinkContext *c)
+static av_cold int init_bundles(BinkContext *c)
 {
     int bw, bh, blocks;
     int i;
@@ -184,9 +185,13 @@ static av_cold void init_bundles(BinkContext *c)
     blocks = bw * bh;
 
     for (i = 0; i < BINKB_NB_SRC; i++) {
-        c->bundle[i].data = av_malloc(blocks * 64);
+        c->bundle[i].data = av_mallocz(blocks * 64);
+        if (!c->bundle[i].data)
+            return AVERROR(ENOMEM);
         c->bundle[i].data_end = c->bundle[i].data + blocks * 64;
     }
+
+    return 0;
 }
 
 /**
@@ -679,11 +684,12 @@ static int read_dct_coeffs(GetBitContext *gb, int32_t block[64], const uint8_t *
         quant_idx = get_bits(gb, 4);
     } else {
         quant_idx = q;
+        if (quant_idx > 15U) {
+            av_log(NULL, AV_LOG_ERROR, "quant_index %d out of range\n", quant_idx);
+            return AVERROR_INVALIDDATA;
+        }
     }
 
-    if (quant_idx >= 16)
-        return AVERROR_INVALIDDATA;
-
     quant = quant_matrices[quant_idx];
 
     block[0] = (block[0] * quant[0]) >> 11;
@@ -866,7 +872,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = binkb_get_value(c, BINKB_SRC_INTRA_DC);
                 qp = binkb_get_value(c, BINKB_SRC_INTRA_Q);
-                read_dct_coeffs(gb, dctblock, bink_scan, binkb_intra_quant, qp);
+                read_dct_coeffs(gb, dctblock, bink_scan, (const int32_t (*)[64])binkb_intra_quant, qp);
                 c->binkdsp.idct_put(dst, stride, dctblock);
                 break;
             case 3:
@@ -899,7 +905,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = binkb_get_value(c, BINKB_SRC_INTER_DC);
                 qp = binkb_get_value(c, BINKB_SRC_INTER_Q);
-                read_dct_coeffs(gb, dctblock, bink_scan, binkb_inter_quant, qp);
+                read_dct_coeffs(gb, dctblock, bink_scan, (const int32_t (*)[64])binkb_inter_quant, qp);
                 c->binkdsp.idct_add(dst, stride, dctblock);
                 break;
             case 5:
@@ -1136,6 +1142,11 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
                 xoff = get_value(c, BINK_SRC_X_OFF);
                 yoff = get_value(c, BINK_SRC_Y_OFF);
                 ref = prev + xoff + yoff * stride;
+                if (ref < ref_start || ref > ref_end) {
+                    av_log(c->avctx, AV_LOG_ERROR, "Copy out of bounds @%d, %d\n",
+                           bx*8 + xoff, by*8 + yoff);
+                    return -1;
+                }
                 c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
                 memset(dctblock, 0, sizeof(*dctblock) * 64);
                 dctblock[0] = get_value(c, BINK_SRC_INTER_DC);
@@ -1177,15 +1188,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     int bits_count = pkt->size << 3;
 
     if (c->version > 'b') {
-        if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
             return ret;
-        }
     } else {
-        if ((ret = ff_reget_buffer(avctx, c->last)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        if ((ret = ff_reget_buffer(avctx, c->last)) < 0)
             return ret;
-        }
         if ((ret = av_frame_ref(frame, c->last)) < 0)
             return ret;
     }
@@ -1200,6 +1207,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     if (c->version >= 'i')
         skip_bits_long(&gb, 32);
 
+    c->frame_num++;
+
     for (plane = 0; plane < 3; plane++) {
         plane_idx = (!plane || !c->swap_planes) ? plane : (plane ^ 3);
 
@@ -1208,7 +1217,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
                 return ret;
         } else {
             if ((ret = binkb_decode_plane(c, frame, &gb, plane_idx,
-                                          !avctx->frame_number, !!plane)) < 0)
+                                          c->frame_num == 1, !!plane)) < 0)
                 return ret;
         }
         if (get_bits_count(&gb) >= bits_count)
@@ -1234,41 +1243,28 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
 static av_cold void binkb_calc_quant(void)
 {
     uint8_t inv_bink_scan[64];
-    double s[64];
+    static const int s[64]={
+        1073741824,1489322693,1402911301,1262586814,1073741824, 843633538, 581104888, 296244703,
+        1489322693,2065749918,1945893874,1751258219,1489322693,1170153332, 806015634, 410903207,
+        1402911301,1945893874,1832991949,1649649171,1402911301,1102260336, 759250125, 387062357,
+        1262586814,1751258219,1649649171,1484645031,1262586814, 992008094, 683307060, 348346918,
+        1073741824,1489322693,1402911301,1262586814,1073741824, 843633538, 581104888, 296244703,
+         843633538,1170153332,1102260336, 992008094, 843633538, 662838617, 456571181, 232757969,
+         581104888, 806015634, 759250125, 683307060, 581104888, 456571181, 314491699, 160326478,
+         296244703, 410903207, 387062357, 348346918, 296244703, 232757969, 160326478,  81733730,
+    };
     int i, j;
-
-    for (j = 0; j < 8; j++) {
-        for (i = 0; i < 8; i++) {
-            if (j && j != 4)
-               if (i && i != 4)
-                   s[j*8 + i] = cos(j * M_PI/16.0) * cos(i * M_PI/16.0) * 2.0;
-               else
-                   s[j*8 + i] = cos(j * M_PI/16.0) * sqrt(2.0);
-            else
-               if (i && i != 4)
-                   s[j*8 + i] = cos(i * M_PI/16.0) * sqrt(2.0);
-               else
-                   s[j*8 + i] = 1.0;
-        }
-    }
-
+#define C (1LL<<30)
     for (i = 0; i < 64; i++)
         inv_bink_scan[bink_scan[i]] = i;
 
     for (j = 0; j < 16; j++) {
         for (i = 0; i < 64; i++) {
             int k = inv_bink_scan[i];
-            if (s[i] == 1.0) {
-                binkb_intra_quant[j][k] = (1L << 12) * binkb_intra_seed[i] *
-                                          binkb_num[j]/binkb_den[j];
-                binkb_inter_quant[j][k] = (1L << 12) * binkb_inter_seed[i] *
-                                          binkb_num[j]/binkb_den[j];
-            } else {
-                binkb_intra_quant[j][k] = (1L << 12) * binkb_intra_seed[i] * s[i] *
-                                          binkb_num[j]/(double)binkb_den[j];
-                binkb_inter_quant[j][k] = (1L << 12) * binkb_inter_seed[i] * s[i] *
-                                          binkb_num[j]/(double)binkb_den[j];
-            }
+            binkb_intra_quant[j][k] = binkb_intra_seed[i] * (int64_t)s[i] *
+                                        binkb_num[j]/(binkb_den[j] * (C>>12));
+            binkb_inter_quant[j][k] = binkb_inter_seed[i] * (int64_t)s[i] *
+                                        binkb_num[j]/(binkb_den[j] * (C>>12));
         }
     }
 }
@@ -1314,7 +1310,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
     ff_hpeldsp_init(&c->hdsp, avctx->flags);
     ff_binkdsp_init(&c->binkdsp);
 
-    init_bundles(c);
+    if ((ret = init_bundles(c)) < 0) {
+        free_bundles(c);
+        return ret;
+    }
 
     if (c->version == 'b') {
         if (!binkb_initialised) {
@@ -1336,6 +1335,13 @@ static av_cold int decode_end(AVCodecContext *avctx)
     return 0;
 }
 
+static void flush(AVCodecContext *avctx)
+{
+    BinkContext * const c = avctx->priv_data;
+
+    c->frame_num = 0;
+}
+
 AVCodec ff_bink_decoder = {
     .name           = "binkvideo",
     .long_name      = NULL_IF_CONFIG_SMALL("Bink video"),
@@ -1345,5 +1351,6 @@ AVCodec ff_bink_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
+    .flush          = flush,
     .capabilities   = CODEC_CAP_DR1,
 };
diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
index ddaa613..ffa32d1 100644
--- a/libavcodec/binkaudio.c
+++ b/libavcodec/binkaudio.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2007-2011 Peter Ross (pross@xvid.org)
  * Copyright (c) 2009 Daniel Verkamp (daniel@drv.nu)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -82,14 +82,14 @@ static av_cold int decode_init(AVCodecContext *avctx)
         frame_len_bits = 11;
     }
 
-    if (avctx->channels > MAX_CHANNELS) {
-        av_log(avctx, AV_LOG_ERROR, "too many channels: %d\n", avctx->channels);
-        return -1;
+    if (avctx->channels < 1 || avctx->channels > MAX_CHANNELS) {
+        av_log(avctx, AV_LOG_ERROR, "invalid number of channels: %d\n", avctx->channels);
+        return AVERROR_INVALIDDATA;
     }
     avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO :
                                                    AV_CH_LAYOUT_STEREO;
 
-    s->version_b = avctx->extradata && avctx->extradata[3] == 'b';
+    s->version_b = avctx->extradata_size >= 4 && avctx->extradata[3] == 'b';
 
     if (avctx->codec->id == AV_CODEC_ID_BINKAUDIO_RDFT) {
         // audio is already interleaved for the RDFT format variant
@@ -306,9 +306,11 @@ static int decode_frame(AVCodecContext *avctx, void *data,
         buf = av_realloc(s->packet_buffer, avpkt->size + FF_INPUT_BUFFER_PADDING_SIZE);
         if (!buf)
             return AVERROR(ENOMEM);
+        memset(buf + avpkt->size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
         s->packet_buffer = buf;
         memcpy(s->packet_buffer, avpkt->data, avpkt->size);
-        init_get_bits(gb, s->packet_buffer, avpkt->size * 8);
+        if ((ret = init_get_bits8(gb, s->packet_buffer, avpkt->size)) < 0)
+            return ret;
         consumed = avpkt->size;
 
         /* skip reported size */
@@ -317,10 +319,8 @@ static int decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = s->frame_len;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     if (decode_block(s, (float **)frame->extended_data,
                      avctx->codec->id == AV_CODEC_ID_BINKAUDIO_DCT)) {
diff --git a/libavcodec/binkdata.h b/libavcodec/binkdata.h
index 3da6b7e..57619be 100644
--- a/libavcodec/binkdata.h
+++ b/libavcodec/binkdata.h
@@ -2,20 +2,20 @@
  * Bink video decoder
  * Copyright (C) 2009 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/binkdsp.c b/libavcodec/binkdsp.c
index 0dfe12c..9d70e23 100644
--- a/libavcodec/binkdsp.c
+++ b/libavcodec/binkdsp.c
@@ -2,20 +2,20 @@
  * Bink DSP routines
  * Copyright (c) 2009 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -129,7 +129,7 @@ static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align
     }
 }
 
-static void add_pixels8_c(uint8_t *restrict pixels, int16_t *block,
+static void add_pixels8_c(uint8_t *av_restrict pixels, int16_t *block,
                           int line_size)
 {
     int i;
diff --git a/libavcodec/binkdsp.h b/libavcodec/binkdsp.h
index 418afb9..f319d1f 100644
--- a/libavcodec/binkdsp.h
+++ b/libavcodec/binkdsp.h
@@ -2,20 +2,20 @@
  * Bink DSP routines
  * Copyright (c) 2009 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/bintext.c b/libavcodec/bintext.c
new file mode 100644
index 0000000..97fceb1
--- /dev/null
+++ b/libavcodec/bintext.c
@@ -0,0 +1,258 @@
+/*
+ * Binary text decoder
+ * eXtended BINary text (XBIN) decoder
+ * iCEDraw File decoder
+ * Copyright (c) 2010 Peter Ross (pross@xvid.org)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Binary text decoder
+ * eXtended BINary text (XBIN) decoder
+ * iCEDraw File decoder
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/xga_font_data.h"
+#include "avcodec.h"
+#include "cga_data.h"
+#include "bintext.h"
+#include "internal.h"
+
+typedef struct XbinContext {
+    AVFrame *frame;
+    int palette[16];
+    int flags;
+    int font_height;
+    const uint8_t *font;
+    int x, y;
+} XbinContext;
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    XbinContext *s = avctx->priv_data;
+    uint8_t *p;
+    int i;
+
+    avctx->pix_fmt = AV_PIX_FMT_PAL8;
+    p = avctx->extradata;
+    if (p) {
+        s->font_height = p[0];
+        s->flags = p[1];
+        p += 2;
+        if(avctx->extradata_size < 2 + (!!(s->flags & BINTEXT_PALETTE))*3*16
+                                     + (!!(s->flags & BINTEXT_FONT))*s->font_height*256) {
+            av_log(avctx, AV_LOG_ERROR, "not enough extradata\n");
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        s->font_height = 8;
+        s->flags = 0;
+    }
+
+    if ((s->flags & BINTEXT_PALETTE)) {
+        for (i = 0; i < 16; i++) {
+            s->palette[i] = 0xFF000000 | (AV_RB24(p) << 2) | ((AV_RB24(p) >> 4) & 0x30303);
+            p += 3;
+        }
+    } else {
+        for (i = 0; i < 16; i++)
+            s->palette[i] = 0xFF000000 | ff_cga_palette[i];
+    }
+
+    if ((s->flags & BINTEXT_FONT)) {
+        s->font = p;
+    } else {
+        switch(s->font_height) {
+        default:
+            av_log(avctx, AV_LOG_WARNING, "font height %i not supported\n", s->font_height);
+            s->font_height = 8;
+        case 8:
+            s->font = avpriv_cga_font;
+            break;
+        case 16:
+            s->font = avpriv_vga16_font;
+            break;
+        }
+    }
+
+    s->frame = av_frame_alloc();
+    if (!s->frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+#define DEFAULT_BG_COLOR 0
+av_unused static void hscroll(AVCodecContext *avctx)
+{
+    XbinContext *s = avctx->priv_data;
+    if (s->y < avctx->height - s->font_height) {
+        s->y += s->font_height;
+    } else {
+        memmove(s->frame->data[0], s->frame->data[0] + s->font_height*s->frame->linesize[0],
+            (avctx->height - s->font_height)*s->frame->linesize[0]);
+        memset(s->frame->data[0] + (avctx->height - s->font_height)*s->frame->linesize[0],
+            DEFAULT_BG_COLOR, s->font_height * s->frame->linesize[0]);
+    }
+}
+
+#define FONT_WIDTH 8
+
+/**
+ * Draw character to screen
+ */
+static void draw_char(AVCodecContext *avctx, int c, int a)
+{
+    XbinContext *s = avctx->priv_data;
+    if (s->y > avctx->height - s->font_height)
+        return;
+    ff_draw_pc_font(s->frame->data[0] + s->y * s->frame->linesize[0] + s->x,
+                    s->frame->linesize[0], s->font, s->font_height, c,
+                    a & 0x0F, a >> 4);
+    s->x += FONT_WIDTH;
+    if (s->x > avctx->width - FONT_WIDTH) {
+        s->x = 0;
+        s->y += s->font_height;
+    }
+}
+
+static int decode_frame(AVCodecContext *avctx,
+                            void *data, int *got_frame,
+                            AVPacket *avpkt)
+{
+    XbinContext *s = avctx->priv_data;
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    const uint8_t *buf_end = buf+buf_size;
+    int ret;
+
+    s->x = s->y = 0;
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
+        return ret;
+    s->frame->pict_type           = AV_PICTURE_TYPE_I;
+    s->frame->palette_has_changed = 1;
+    memcpy(s->frame->data[1], s->palette, 16 * 4);
+
+    if (avctx->codec_id == AV_CODEC_ID_XBIN) {
+        while (buf + 2 < buf_end) {
+            int i,c,a;
+            int type  = *buf >> 6;
+            int count = (*buf & 0x3F) + 1;
+            buf++;
+            switch (type) {
+            case 0: //no compression
+                for (i = 0; i < count && buf + 1 < buf_end; i++) {
+                    draw_char(avctx, buf[0], buf[1]);
+                    buf += 2;
+                }
+                break;
+            case 1: //character compression
+                c = *buf++;
+                for (i = 0; i < count && buf < buf_end; i++)
+                    draw_char(avctx, c, *buf++);
+                break;
+            case 2: //attribute compression
+                a = *buf++;
+                for (i = 0; i < count && buf < buf_end; i++)
+                    draw_char(avctx, *buf++, a);
+                break;
+            case 3: //character/attribute compression
+                c = *buf++;
+                a = *buf++;
+                for (i = 0; i < count && buf < buf_end; i++)
+                    draw_char(avctx, c, a);
+                break;
+            }
+        }
+    } else if (avctx->codec_id == AV_CODEC_ID_IDF) {
+        while (buf + 2 < buf_end) {
+            if (AV_RL16(buf) == 1) {
+               int i;
+               if (buf + 6 > buf_end)
+                   break;
+               for (i = 0; i < buf[2]; i++)
+                   draw_char(avctx, buf[4], buf[5]);
+               buf += 6;
+            } else {
+               draw_char(avctx, buf[0], buf[1]);
+               buf += 2;
+            }
+        }
+    } else {
+        while (buf + 1 < buf_end) {
+            draw_char(avctx, buf[0], buf[1]);
+            buf += 2;
+        }
+    }
+
+    if ((ret = av_frame_ref(data, s->frame)) < 0)
+        return ret;
+    *got_frame      = 1;
+    return buf_size;
+}
+
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    XbinContext *s = avctx->priv_data;
+
+    av_frame_free(&s->frame);
+
+    return 0;
+}
+
+#if CONFIG_BINTEXT_DECODER
+AVCodec ff_bintext_decoder = {
+    .name           = "bintext",
+    .long_name      = NULL_IF_CONFIG_SMALL("Binary text"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_BINTEXT,
+    .priv_data_size = sizeof(XbinContext),
+    .init           = decode_init,
+    .close          = decode_end,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+};
+#endif
+#if CONFIG_XBIN_DECODER
+AVCodec ff_xbin_decoder = {
+    .name           = "xbin",
+    .long_name      = NULL_IF_CONFIG_SMALL("eXtended BINary text"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_XBIN,
+    .priv_data_size = sizeof(XbinContext),
+    .init           = decode_init,
+    .close          = decode_end,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+};
+#endif
+#if CONFIG_IDF_DECODER
+AVCodec ff_idf_decoder = {
+    .name           = "idf",
+    .long_name      = NULL_IF_CONFIG_SMALL("iCEDraw text"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_IDF,
+    .priv_data_size = sizeof(XbinContext),
+    .init           = decode_init,
+    .close          = decode_end,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+};
+#endif
diff --git a/libavcodec/bintext.h b/libavcodec/bintext.h
new file mode 100644
index 0000000..21428ba
--- /dev/null
+++ b/libavcodec/bintext.h
@@ -0,0 +1,37 @@
+/*
+ * Binary text decoder
+ * Copyright (c) 2010 Peter Ross (pross@xvid.org)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Binary text decoder
+ */
+
+#ifndef AVCODEC_BINTEXT_H
+#define AVCODEC_BINTEXT_H
+
+/* flag values passed between avformat and avcodec;
+ * while these are identical to the XBIN flags, they are also used
+ * for the BINTEXT and IDF decoders.
+ */
+#define BINTEXT_PALETTE  0x1
+#define BINTEXT_FONT     0x2
+
+#endif /* AVCODEC_BINTEXT_H */
diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c
index 27e658b..8018489 100644
--- a/libavcodec/bit_depth_template.c
+++ b/libavcodec/bit_depth_template.c
@@ -1,23 +1,24 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "mathops.h"
 #include "rnd_avg.h"
+#include "libavutil/intreadwrite.h"
 
 #ifndef BIT_DEPTH
 #define BIT_DEPTH 8
@@ -71,7 +72,7 @@
 #   define pixel4 uint32_t
 #   define dctcoef int16_t
 
-#   define INIT_CLIP const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
+#   define INIT_CLIP
 #   define no_rnd_avg_pixel4 no_rnd_avg32
 #   define    rnd_avg_pixel4    rnd_avg32
 #   define AV_RN2P  AV_RN16
@@ -83,7 +84,7 @@
 #   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
 
 #   define av_clip_pixel(a) av_clip_uint8(a)
-#   define CLIP(a) cm[a]
+#   define CLIP(a) av_clip_uint8(a)
 #endif
 
 #define FUNC3(a, b, c)  a ## _ ## b ## c
diff --git a/libavcodec/bitstream.c b/libavcodec/bitstream.c
index 8e9f657..d041643 100644
--- a/libavcodec/bitstream.c
+++ b/libavcodec/bitstream.c
@@ -6,20 +6,20 @@
  *
  * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,8 @@
  * bitstream api.
  */
 
+#include "libavutil/atomic.h"
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "mathops.h"
 #include "get_bits.h"
@@ -106,17 +108,16 @@ static int alloc_table(VLC *vlc, int size, int use_static)
 
     vlc->table_size += size;
     if (vlc->table_size > vlc->table_allocated) {
-        int err;
         if (use_static)
-            return AVERROR_BUG;
+            abort(); // cannot do anything, init_vlc() is used with too little memory
         vlc->table_allocated += (1 << vlc->bits);
-        if ((err = av_reallocp(&vlc->table,
-                               sizeof(VLC_TYPE) * 2 *
-                               vlc->table_allocated)) < 0) {
+        vlc->table = av_realloc_f(vlc->table, vlc->table_allocated, sizeof(VLC_TYPE) * 2);
+        if (!vlc->table) {
             vlc->table_allocated = 0;
             vlc->table_size = 0;
-            return err;
+            return AVERROR(ENOMEM);
         }
+        memset(vlc->table + vlc->table_allocated - (1 << vlc->bits), 0, sizeof(VLC_TYPE) * 2 << vlc->bits);
     }
     return index;
 }
@@ -162,19 +163,16 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
     int table_size, table_index, index, code_prefix, symbol, subtable_bits;
     int i, j, k, n, nb, inc;
     uint32_t code;
-    VLC_TYPE (*table)[2];
+    volatile VLC_TYPE (* volatile table)[2]; // the double volatile is needed to prevent a internal compiler error in gcc 4.2
 
     table_size = 1 << table_nb_bits;
+    if (table_nb_bits > 30)
+       return -1;
     table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
     av_dlog(NULL, "new table index=%d size=%d\n", table_index, table_size);
     if (table_index < 0)
         return table_index;
-    table = &vlc->table[table_index];
-
-    for (i = 0; i < table_size; i++) {
-        table[i][1] = 0; //bits
-        table[i][0] = -1; //codes
-    }
+    table = (volatile VLC_TYPE (*)[2])&vlc->table[table_index];
 
     /* first pass: map codes and compute auxiliary table sizes */
     for (i = 0; i < nb_codes; i++) {
@@ -192,8 +190,9 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
                 inc = 1 << n;
             }
             for (k = 0; k < nb; k++) {
+                int bits = table[j][1];
                 av_dlog(NULL, "%4x: code=%d n=%d\n", j, i, n);
-                if (table[j][1] /*bits*/ != 0) {
+                if (bits != 0 && bits != n) {
                     av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
                     return AVERROR_INVALIDDATA;
                 }
@@ -228,11 +227,17 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
             if (index < 0)
                 return index;
             /* note: realloc has been done, so reload tables */
-            table = &vlc->table[table_index];
+            table = (volatile VLC_TYPE (*)[2])&vlc->table[table_index];
             table[j][0] = index; //code
             i = k-1;
         }
     }
+
+    for (i = 0; i < table_size; i++) {
+        if (table[i][1] == 0) //bits
+            table[i][0] = -1; //codes
+    }
+
     return table_index;
 }
 
@@ -263,7 +268,7 @@ static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
    'use_static' should be set to 1 for tables, which should be freed
    with av_free_static(), 0 if ff_free_vlc() will be used.
 */
-int ff_init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+int ff_init_vlc_sparse(VLC *vlc_arg, int nb_bits, int nb_codes,
                        const void *bits, int bits_wrap, int bits_size,
                        const void *codes, int codes_wrap, int codes_size,
                        const void *symbols, int symbols_wrap, int symbols_size,
@@ -271,42 +276,56 @@ int ff_init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
 {
     VLCcode *buf;
     int i, j, ret;
+    VLCcode localbuf[1500]; // the maximum currently needed is 1296 by rv34
+    VLC localvlc, *vlc;
 
+    vlc = vlc_arg;
     vlc->bits = nb_bits;
     if (flags & INIT_VLC_USE_NEW_STATIC) {
-        if (vlc->table_size && vlc->table_size == vlc->table_allocated) {
-            return 0;
-        } else if (vlc->table_size) {
-            return AVERROR_BUG;
-        }
+        av_assert0(nb_codes + 1 <= FF_ARRAY_ELEMS(localbuf));
+        buf = localbuf;
+        localvlc = *vlc_arg;
+        vlc = &localvlc;
+        vlc->table_size = 0;
     } else {
         vlc->table           = NULL;
         vlc->table_allocated = 0;
         vlc->table_size      = 0;
-    }
 
-    av_dlog(NULL, "build table nb_codes=%d\n", nb_codes);
+        buf = av_malloc_array((nb_codes + 1), sizeof(VLCcode));
+        if (!buf)
+            return AVERROR(ENOMEM);
+    }
 
-    buf = av_malloc((nb_codes + 1) * sizeof(VLCcode));
-    if (!buf)
-        return AVERROR(ENOMEM);
 
-    assert(symbols_size <= 2 || !symbols);
+    av_assert0(symbols_size <= 2 || !symbols);
     j = 0;
-#define COPY(condition)                                                     \
+#define COPY(condition)\
     for (i = 0; i < nb_codes; i++) {                                        \
         GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);               \
         if (!(condition))                                                   \
             continue;                                                       \
+        if (buf[j].bits > 3*nb_bits || buf[j].bits>32) {                    \
+            av_log(NULL, AV_LOG_ERROR, "Too long VLC (%d) in init_vlc\n", buf[j].bits);\
+            if (!(flags & INIT_VLC_USE_NEW_STATIC))                         \
+                av_free(buf);                                               \
+            return -1;                                                      \
+        }                                                                   \
         GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);            \
+        if (buf[j].code >= (1LL<<buf[j].bits)) {                            \
+            av_log(NULL, AV_LOG_ERROR, "Invalid code in init_vlc\n");       \
+            if (!(flags & INIT_VLC_USE_NEW_STATIC))                         \
+                av_free(buf);                                               \
+            return -1;                                                      \
+        }                                                                   \
         if (flags & INIT_VLC_LE)                                            \
             buf[j].code = bitswap_32(buf[j].code);                          \
         else                                                                \
             buf[j].code <<= 32 - buf[j].bits;                               \
         if (symbols)                                                        \
             GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size) \
-            else                                                            \
-                buf[j].symbol = i;                                          \
+        else                                                                \
+            buf[j].symbol = i;                                              \
         j++;                                                                \
     }
     COPY(buf[j].bits > nb_bits);
@@ -317,15 +336,19 @@ int ff_init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
 
     ret = build_table(vlc, nb_bits, nb_codes, buf, flags);
 
-    av_free(buf);
-    if (ret < 0) {
-        av_freep(&vlc->table);
-        return ret;
+    if (flags & INIT_VLC_USE_NEW_STATIC) {
+        if(vlc->table_size != vlc->table_allocated)
+            av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);
+
+        av_assert0(ret >= 0);
+        *vlc_arg = *vlc;
+    } else {
+        av_free(buf);
+        if (ret < 0) {
+            av_freep(&vlc->table);
+            return ret;
+        }
     }
-    if ((flags & INIT_VLC_USE_NEW_STATIC) &&
-        vlc->table_size != vlc->table_allocated)
-        av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n",
-               vlc->table_size, vlc->table_allocated);
     return 0;
 }
 
diff --git a/libavcodec/bitstream_filter.c b/libavcodec/bitstream_filter.c
index f524d3e..751b90d 100644
--- a/libavcodec/bitstream_filter.c
+++ b/libavcodec/bitstream_filter.c
@@ -1,26 +1,27 @@
 /*
  * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <string.h>
 
 #include "avcodec.h"
+#include "libavutil/atomic.h"
 #include "libavutil/mem.h"
 
 static AVBitStreamFilter *first_bitstream_filter = NULL;
@@ -35,8 +36,9 @@ AVBitStreamFilter *av_bitstream_filter_next(const AVBitStreamFilter *f)
 
 void av_register_bitstream_filter(AVBitStreamFilter *bsf)
 {
-    bsf->next              = first_bitstream_filter;
-    first_bitstream_filter = bsf;
+    do {
+        bsf->next = first_bitstream_filter;
+    } while(bsf->next != avpriv_atomic_ptr_cas((void * volatile *)&first_bitstream_filter, bsf->next, bsf));
 }
 
 AVBitStreamFilterContext *av_bitstream_filter_init(const char *name)
@@ -59,6 +61,8 @@ AVBitStreamFilterContext *av_bitstream_filter_init(const char *name)
 
 void av_bitstream_filter_close(AVBitStreamFilterContext *bsfc)
 {
+    if (!bsfc)
+        return;
     if (bsfc->filter->close)
         bsfc->filter->close(bsfc);
     av_freep(&bsfc->priv_data);
diff --git a/libavcodec/blockdsp.c b/libavcodec/blockdsp.c
index e3d2ca1..f5259f6 100644
--- a/libavcodec/blockdsp.c
+++ b/libavcodec/blockdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -65,6 +65,8 @@ av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx)
     c->fill_block_tab[0] = fill_block16_c;
     c->fill_block_tab[1] = fill_block8_c;
 
+    if (ARCH_ALPHA)
+        ff_blockdsp_init_alpha(c, high_bit_depth);
     if (ARCH_ARM)
         ff_blockdsp_init_arm(c, high_bit_depth);
     if (ARCH_PPC)
diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h
index 32c671c..c7ad265 100644
--- a/libavcodec/blockdsp.h
+++ b/libavcodec/blockdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -40,6 +40,7 @@ typedef struct BlockDSPContext {
 
 void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx);
 
+void ff_blockdsp_init_alpha(BlockDSPContext *c, unsigned high_bit_depth);
 void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth);
 void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth);
 #if FF_API_XVMC
diff --git a/libavcodec/bmp.c b/libavcodec/bmp.c
index 15c33a0..458fd0c 100644
--- a/libavcodec/bmp.c
+++ b/libavcodec/bmp.c
@@ -2,20 +2,20 @@
  * BMP image format decoder
  * Copyright (c) 2005 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -40,7 +40,8 @@ static int bmp_decode_frame(AVCodecContext *avctx,
     BiCompression comp;
     unsigned int ihsize;
     int i, j, n, linesize, ret;
-    uint32_t rgb[3];
+    uint32_t rgb[3] = {0};
+    uint32_t alpha = 0;
     uint8_t *ptr;
     int dsize;
     const uint8_t *buf0 = buf;
@@ -69,7 +70,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
 
     hsize  = bytestream_get_le32(&buf); /* header size */
     ihsize = bytestream_get_le32(&buf); /* more header size */
-    if (ihsize + 14 > hsize) {
+    if (ihsize + 14LL > hsize) {
         av_log(avctx, AV_LOG_ERROR, "invalid header size %u\n", hsize);
         return AVERROR_INVALIDDATA;
     }
@@ -86,7 +87,8 @@ static int bmp_decode_frame(AVCodecContext *avctx,
     }
 
     switch (ihsize) {
-    case  40: // windib v3
+    case  40: // windib
+    case  56: // windib v3
     case  64: // OS/2 v2
     case 108: // windib v4
     case 124: // windib v5
@@ -110,7 +112,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
 
     depth = bytestream_get_le16(&buf);
 
-    if (ihsize == 40)
+    if (ihsize >= 40)
         comp = bytestream_get_le32(&buf);
     else
         comp = BMP_RGB;
@@ -126,6 +128,8 @@ static int bmp_decode_frame(AVCodecContext *avctx,
         rgb[0] = bytestream_get_le32(&buf);
         rgb[1] = bytestream_get_le32(&buf);
         rgb[2] = bytestream_get_le32(&buf);
+        if (ihsize > 40)
+        alpha = bytestream_get_le32(&buf);
     }
 
     avctx->width  = width;
@@ -136,21 +140,21 @@ static int bmp_decode_frame(AVCodecContext *avctx,
     switch (depth) {
     case 32:
         if (comp == BMP_BITFIELDS) {
-            rgb[0] = (rgb[0] >> 15) & 3;
-            rgb[1] = (rgb[1] >> 15) & 3;
-            rgb[2] = (rgb[2] >> 15) & 3;
-
-            if (rgb[0] + rgb[1] + rgb[2] != 3 ||
-                rgb[0] == rgb[1] || rgb[0] == rgb[2] || rgb[1] == rgb[2]) {
-                break;
+            if (rgb[0] == 0xFF000000 && rgb[1] == 0x00FF0000 && rgb[2] == 0x0000FF00)
+                avctx->pix_fmt = alpha ? AV_PIX_FMT_ABGR : AV_PIX_FMT_0BGR;
+            else if (rgb[0] == 0x00FF0000 && rgb[1] == 0x0000FF00 && rgb[2] == 0x000000FF)
+                avctx->pix_fmt = alpha ? AV_PIX_FMT_BGRA : AV_PIX_FMT_BGR0;
+            else if (rgb[0] == 0x0000FF00 && rgb[1] == 0x00FF0000 && rgb[2] == 0xFF000000)
+                avctx->pix_fmt = alpha ? AV_PIX_FMT_ARGB : AV_PIX_FMT_0RGB;
+            else if (rgb[0] == 0x000000FF && rgb[1] == 0x0000FF00 && rgb[2] == 0x00FF0000)
+                avctx->pix_fmt = alpha ? AV_PIX_FMT_RGBA : AV_PIX_FMT_RGB0;
+            else {
+                av_log(avctx, AV_LOG_ERROR, "Unknown bitfields %0X %0X %0X\n", rgb[0], rgb[1], rgb[2]);
+                return AVERROR(EINVAL);
             }
         } else {
-            rgb[0] = 2;
-            rgb[1] = 1;
-            rgb[2] = 0;
+            avctx->pix_fmt = AV_PIX_FMT_BGRA;
         }
-
-        avctx->pix_fmt = AV_PIX_FMT_BGR24;
         break;
     case 24:
         avctx->pix_fmt = AV_PIX_FMT_BGR24;
@@ -199,10 +203,8 @@ static int bmp_decode_frame(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
     p->pict_type = AV_PICTURE_TYPE_I;
     p->key_frame = 1;
 
@@ -210,7 +212,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
     dsize = buf_size - hsize;
 
     /* Line size in file multiple of 4 */
-    n = ((avctx->width * depth) / 8 + 3) & ~3;
+    n = ((avctx->width * depth + 31) / 8) & ~3;
 
     if (n * avctx->height > dsize && comp != BMP_RLE4 && comp != BMP_RLE8) {
         av_log(avctx, AV_LOG_ERROR, "not enough data (%d < %d)\n",
@@ -246,20 +248,26 @@ static int bmp_decode_frame(AVCodecContext *avctx,
             } else if (t) {
                 colors = t;
             }
+        } else {
+            colors = FFMIN(256, (hsize-ihsize-14) / 3);
         }
         buf = buf0 + 14 + ihsize; //palette location
         // OS/2 bitmap, 3 bytes per palette entry
         if ((hsize-ihsize-14) < (colors << 2)) {
+            if ((hsize-ihsize-14) < colors * 3) {
+                av_log(avctx, AV_LOG_ERROR, "palette doesn't fit in packet\n");
+                return AVERROR_INVALIDDATA;
+            }
             for (i = 0; i < colors; i++)
-                ((uint32_t*)p->data[1])[i] = bytestream_get_le24(&buf);
+                ((uint32_t*)p->data[1])[i] = (0xFFU<<24) | bytestream_get_le24(&buf);
         } else {
             for (i = 0; i < colors; i++)
-                ((uint32_t*)p->data[1])[i] = bytestream_get_le32(&buf);
+                ((uint32_t*)p->data[1])[i] = 0xFFU << 24 | bytestream_get_le32(&buf);
         }
         buf = buf0 + hsize;
     }
     if (comp == BMP_RLE4 || comp == BMP_RLE8) {
-        if (height < 0) {
+        if (comp == BMP_RLE8 && height < 0) {
             p->data[0]    +=  p->linesize[0] * (avctx->height - 1);
             p->linesize[0] = -p->linesize[0];
         }
@@ -290,6 +298,7 @@ static int bmp_decode_frame(AVCodecContext *avctx,
             break;
         case 8:
         case 24:
+        case 32:
             for (i = 0; i < avctx->height; i++) {
                 memcpy(ptr, buf, n);
                 buf += n;
@@ -319,23 +328,6 @@ static int bmp_decode_frame(AVCodecContext *avctx,
                 ptr += linesize;
             }
             break;
-        case 32:
-            for (i = 0; i < avctx->height; i++) {
-                const uint8_t *src = buf;
-                uint8_t *dst       = ptr;
-
-                for (j = 0; j < avctx->width; j++) {
-                    dst[0] = src[rgb[2]];
-                    dst[1] = src[rgb[1]];
-                    dst[2] = src[rgb[0]];
-                    dst += 3;
-                    src += 4;
-                }
-
-                buf += n;
-                ptr += linesize;
-            }
-            break;
         default:
             av_log(avctx, AV_LOG_ERROR, "BMP decoder is broken\n");
             return AVERROR_INVALIDDATA;
diff --git a/libavcodec/bmp.h b/libavcodec/bmp.h
index a472f59..fb21090 100644
--- a/libavcodec/bmp.h
+++ b/libavcodec/bmp.h
@@ -2,20 +2,20 @@
  * internals for BMP codecs
  * Copyright (c) 2005 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/bmp_parser.c b/libavcodec/bmp_parser.c
index b85dd8b..eae8ae0 100644
--- a/libavcodec/bmp_parser.c
+++ b/libavcodec/bmp_parser.c
@@ -2,20 +2,20 @@
  * BMP parser
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/bmpenc.c b/libavcodec/bmpenc.c
index a14fc61..2a1956d 100644
--- a/libavcodec/bmpenc.c
+++ b/libavcodec/bmpenc.c
@@ -3,24 +3,25 @@
  * Copyright (c) 2006, 2007 Michel Bardiaux
  * Copyright (c) 2009 Daniel Verkamp <daniel at drv.nu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "bmp.h"
@@ -32,6 +33,9 @@ static const uint32_t rgb444_masks[]  = { 0x0F00, 0x00F0, 0x000F };
 
 static av_cold int bmp_encode_init(AVCodecContext *avctx){
     switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_BGRA:
+        avctx->bits_per_coded_sample = 32;
+        break;
     case AV_PIX_FMT_BGR24:
         avctx->bits_per_coded_sample = 24;
         break;
@@ -53,7 +57,7 @@ static av_cold int bmp_encode_init(AVCodecContext *avctx){
         break;
     default:
         av_log(avctx, AV_LOG_INFO, "unsupported pixel format\n");
-        return -1;
+        return AVERROR(EINVAL);
     }
 
     avctx->coded_frame = av_frame_alloc();
@@ -69,6 +73,7 @@ static int bmp_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     const AVFrame * const p = pict;
     int n_bytes_image, n_bytes_per_row, n_bytes, i, n, hsize, ret;
     const uint32_t *pal = NULL;
+    uint32_t palette256[256];
     int pad_bytes_per_row, pal_entries = 0, compression = BMP_RGB;
     int bit_count = avctx->bits_per_coded_sample;
     uint8_t *ptr, *buf;
@@ -91,7 +96,10 @@ static int bmp_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     case AV_PIX_FMT_RGB4_BYTE:
     case AV_PIX_FMT_BGR4_BYTE:
     case AV_PIX_FMT_GRAY8:
-        avpriv_set_systematic_pal2((uint32_t*)p->data[1], avctx->pix_fmt);
+        av_assert1(bit_count == 8);
+        avpriv_set_systematic_pal2(palette256, avctx->pix_fmt);
+        pal = palette256;
+        break;
     case AV_PIX_FMT_PAL8:
         pal = (uint32_t *)p->data[1];
         break;
@@ -110,10 +118,8 @@ static int bmp_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 #define SIZE_BITMAPINFOHEADER 40
     hsize = SIZE_BITMAPFILEHEADER + SIZE_BITMAPINFOHEADER + (pal_entries << 2);
     n_bytes = n_bytes_image + hsize;
-    if ((ret = ff_alloc_packet(pkt, n_bytes)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, n_bytes)) < 0)
         return ret;
-    }
     buf = pkt->data;
     bytestream_put_byte(&buf, 'B');                   // BITMAPFILEHEADER.bfType
     bytestream_put_byte(&buf, 'M');                   // do.
@@ -172,8 +178,8 @@ AVCodec ff_bmp_encoder = {
     .encode2        = bmp_encode_frame,
     .close          = bmp_encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]){
-        AV_PIX_FMT_BGR24,
-        AV_PIX_FMT_RGB555, AV_PIX_FMT_RGB444, AV_PIX_FMT_RGB565,
+        AV_PIX_FMT_BGRA, AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_RGB565, AV_PIX_FMT_RGB555, AV_PIX_FMT_RGB444,
         AV_PIX_FMT_RGB8, AV_PIX_FMT_BGR8, AV_PIX_FMT_RGB4_BYTE, AV_PIX_FMT_BGR4_BYTE, AV_PIX_FMT_GRAY8, AV_PIX_FMT_PAL8,
         AV_PIX_FMT_MONOBLACK,
         AV_PIX_FMT_NONE
diff --git a/libavcodec/bmvaudio.c b/libavcodec/bmvaudio.c
index 0f8c224..0e473df 100644
--- a/libavcodec/bmvaudio.c
+++ b/libavcodec/bmvaudio.c
@@ -2,20 +2,20 @@
  * Discworld II BMV audio decoder
  * Copyright (c) 2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -58,10 +58,8 @@ static int bmv_aud_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = total_blocks * 32;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     output_samples = (int16_t *)frame->data[0];
 
     for (blocks = 0; blocks < total_blocks; blocks++) {
diff --git a/libavcodec/bmvvideo.c b/libavcodec/bmvvideo.c
index ebc8e7a..5143b2a 100644
--- a/libavcodec/bmvvideo.c
+++ b/libavcodec/bmvvideo.c
@@ -2,23 +2,24 @@
  * Discworld II BMV video decoder
  * Copyright (c) 2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 
 #include "avcodec.h"
@@ -100,6 +101,8 @@ static int decode_bmv_frame(const uint8_t *source, int src_len, uint8_t *frame,
         }
         if (!(val & 0xC)) {
             for (;;) {
+                if(shift>22)
+                    return -1;
                 if (!read_two_nibbles) {
                     if (src < source || src >= source_end)
                         return AVERROR_INVALIDDATA;
@@ -133,6 +136,7 @@ static int decode_bmv_frame(const uint8_t *source, int src_len, uint8_t *frame,
         }
         advance_mode = val & 1;
         len = (val >> 1) - 1;
+        av_assert0(len>0);
         mode += 1 + advance_mode;
         if (mode >= 4)
             mode -= 3;
@@ -185,8 +189,6 @@ static int decode_bmv_frame(const uint8_t *source, int src_len, uint8_t *frame,
                 memset(dst, val, len);
             }
             break;
-        default:
-            break;
         }
         if (dst == dst_end)
             return 0;
@@ -227,7 +229,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             return AVERROR_INVALIDDATA;
         }
         for (i = 0; i < 256; i++)
-            c->pal[i] = bytestream_get_be24(&c->stream);
+            c->pal[i] = 0xFFU << 24 | bytestream_get_be24(&c->stream);
     }
     if (type & BMV_SCROLL) {
         if (c->stream - pkt->data > pkt->size - 2) {
@@ -241,10 +243,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         scr_off = 0;
     }
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     if (decode_bmv_frame(c->stream, pkt->size - (c->stream - pkt->data), c->frame, scr_off)) {
         av_log(avctx, AV_LOG_ERROR, "Error decoding frame data\n");
@@ -276,6 +276,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
     c->avctx = avctx;
     avctx->pix_fmt = AV_PIX_FMT_PAL8;
 
+    if (avctx->width != SCREEN_WIDE || avctx->height != SCREEN_HIGH) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid dimension %dx%d\n", avctx->width, avctx->height);
+        return AVERROR_INVALIDDATA;
+    }
+
     c->frame = c->frame_base + 640;
 
     return 0;
diff --git a/libavcodec/brenderpix.c b/libavcodec/brenderpix.c
index 25aebed..02d922f 100644
--- a/libavcodec/brenderpix.c
+++ b/libavcodec/brenderpix.c
@@ -2,20 +2,20 @@
  * BRender PIX (.pix) image decoder
  * Copyright (c) 2012 Aleksi Nurmi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -134,7 +134,7 @@ static int pix_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 {
     AVFrame *frame = data;
 
-    int ret, i, j;
+    int ret, i;
     GetByteContext gb;
 
     unsigned int bytes_pp;
@@ -142,6 +142,7 @@ static int pix_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     unsigned int chunk_type;
     unsigned int data_len;
     unsigned int bytes_per_scanline;
+    unsigned int bytes_left;
     PixHeader hdr;
 
     bytestream2_init(&gb, avpkt->data, avpkt->size);
@@ -168,7 +169,7 @@ static int pix_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     ret = pix_decode_header(&hdr, &gb);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Invalid header length.\n");
-        return AVERROR_INVALIDDATA;
+        return ret;
     }
     switch (hdr.format) {
     case 3:
@@ -187,7 +188,10 @@ static int pix_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         avctx->pix_fmt = AV_PIX_FMT_RGB24;
         bytes_pp = 3;
         break;
-    case 7: // XRGB
+    case 7:
+        avctx->pix_fmt = AV_PIX_FMT_0RGB;
+        bytes_pp = 4;
+        break;
     case 8: // ARGB
         avctx->pix_fmt = AV_PIX_FMT_ARGB;
         bytes_pp = 4;
@@ -219,22 +223,21 @@ static int pix_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         ret = pix_decode_header(&palhdr, &gb);
         if (ret < 0) {
             av_log(avctx, AV_LOG_ERROR, "Invalid palette header length.\n");
-            return AVERROR_INVALIDDATA;
+            return ret;
         }
         if (palhdr.format != 7)
             avpriv_request_sample(avctx, "Palette not in RGB format");
 
         chunk_type = bytestream2_get_be32(&gb);
         data_len = bytestream2_get_be32(&gb);
-        if (chunk_type != IMAGE_DATA_CHUNK ||
-            bytestream2_get_bytes_left(&gb) < data_len) {
+        bytestream2_skip(&gb, 8);
+        if (chunk_type != IMAGE_DATA_CHUNK || data_len != 1032 ||
+            bytestream2_get_bytes_left(&gb) < 1032) {
             av_log(avctx, AV_LOG_ERROR, "Invalid palette data.\n");
             return AVERROR_INVALIDDATA;
         }
-
         // palette data is surrounded by 8 null bytes (both top and bottom)
-        bytestream2_skip(&gb, 8);
-        // convert to machine endian format (ARGB)
+        // convert 0RGB to machine endian format (ARGB32)
         for (i = 0; i < 256; ++i)
             *pal_out++ = (0xFFU << 24) | bytestream2_get_be32u(&gb);
         bytestream2_skip(&gb, 8);
@@ -259,9 +262,10 @@ static int pix_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     // read the image data to the buffer
     bytes_per_scanline = bytes_pp * hdr.width;
-    if (chunk_type != IMAGE_DATA_CHUNK ||
-        data_len < bytes_per_scanline * hdr.height ||
-        bytestream2_get_bytes_left(&gb) < data_len) {
+    bytes_left = bytestream2_get_bytes_left(&gb);
+
+    if (chunk_type != IMAGE_DATA_CHUNK || data_len != bytes_left ||
+        bytes_left / bytes_per_scanline < hdr.height) {
         av_log(avctx, AV_LOG_ERROR, "Invalid image data.\n");
         return AVERROR_INVALIDDATA;
     }
@@ -271,12 +275,6 @@ static int pix_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                         bytes_per_scanline,
                         bytes_per_scanline, hdr.height);
 
-    // make alpha opaque for XRGB
-    if (hdr.format == 7)
-        for (j = 0; j < frame->height; j++)
-            for (i = 0; i < frame->linesize[0]; i += 4)
-                frame->data[0][j * frame->linesize[0] + i] = 0xFF;
-
     frame->pict_type = AV_PICTURE_TYPE_I;
     frame->key_frame = 1;
     *got_frame = 1;
diff --git a/libavcodec/bswapdsp.c b/libavcodec/bswapdsp.c
index 6700cfd..a6e1ec0 100644
--- a/libavcodec/bswapdsp.c
+++ b/libavcodec/bswapdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/bswapdsp.h b/libavcodec/bswapdsp.h
index fd10a88..f167d77 100644
--- a/libavcodec/bswapdsp.h
+++ b/libavcodec/bswapdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/bytestream.h b/libavcodec/bytestream.h
index 3eab225..c2cb601 100644
--- a/libavcodec/bytestream.h
+++ b/libavcodec/bytestream.h
@@ -3,20 +3,20 @@
  * copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr>
  * Copyright (c) 2012 Aneesh Dogra (lionaneesh) <lionaneesh@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
 #include <stdint.h>
 #include <string.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
 
@@ -131,6 +132,7 @@ static av_always_inline void bytestream2_init(GetByteContext *g,
                                               const uint8_t *buf,
                                               int buf_size)
 {
+    av_assert0(buf_size >= 0);
     g->buffer       = buf;
     g->buffer_start = buf;
     g->buffer_end   = buf + buf_size;
@@ -140,6 +142,7 @@ static av_always_inline void bytestream2_init_writer(PutByteContext *p,
                                                      uint8_t *buf,
                                                      int buf_size)
 {
+    av_assert0(buf_size >= 0);
     p->buffer       = buf;
     p->buffer_start = buf;
     p->buffer_end   = buf + buf_size;
diff --git a/libavcodec/c93.c b/libavcodec/c93.c
index c7cc682..ad3fa3b 100644
--- a/libavcodec/c93.c
+++ b/libavcodec/c93.c
@@ -2,20 +2,20 @@
  * Interplay C93 video decoder
  * Copyright (c) 2007 Anssi Hannula <anssi.hannula@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -133,12 +133,13 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     uint8_t *out;
     int stride, ret, i, x, y, b, bt = 0;
 
+    if ((ret = ff_set_dimensions(avctx, WIDTH, HEIGHT)) < 0)
+        return ret;
+
     c93->currentpic ^= 1;
 
-    if ((ret = ff_reget_buffer(avctx, newpic)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, newpic)) < 0)
         return ret;
-    }
 
     stride = newpic->linesize[0];
 
@@ -176,7 +177,14 @@ static int decode_frame(AVCodecContext *avctx, void *data,
             case C93_4X4_FROM_PREV:
                 for (j = 0; j < 8; j += 4) {
                     for (i = 0; i < 8; i += 4) {
-                        offset = bytestream2_get_le16(&gb);
+                        int offset = bytestream2_get_le16(&gb);
+                        int from_x = offset % WIDTH;
+                        int from_y = offset / WIDTH;
+                        if (block_type == C93_4X4_FROM_CURR && from_y == y+j &&
+                            (FFABS(from_x - x-i) < 4 || FFABS(from_x - x-i) > WIDTH-4)) {
+                            avpriv_request_sample(avctx, "block overlap %d %d %d %d\n", from_x, x+i, from_y, y+j);
+                            return AVERROR_INVALIDDATA;
+                        }
                         if ((ret = copy_block(avctx, &out[j*stride+i],
                                               copy_from, offset, 4, stride)) < 0)
                             return ret;
@@ -236,7 +244,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     if (b & C93_HAS_PALETTE) {
         uint32_t *palette = (uint32_t *) newpic->data[1];
         for (i = 0; i < 256; i++) {
-            palette[i] = bytestream2_get_be24(&gb);
+            palette[i] = 0xFFU << 24 | bytestream2_get_be24(&gb);
         }
         newpic->palette_has_changed = 1;
     } else {
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c
index b6f56f0..65579d8 100644
--- a/libavcodec/cabac.c
+++ b/libavcodec/cabac.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,7 @@
 #include <string.h>
 
 #include "libavutil/common.h"
+#include "libavutil/timer.h"
 #include "get_bits.h"
 #include "cabac.h"
 #include "cabac_functions.h"
@@ -106,6 +107,19 @@ static const uint8_t last_coeff_flag_offset_8x8[63] = {
  *
  * @param buf_size size of buf in bits
  */
+void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size){
+    init_put_bits(&c->pb, buf, buf_size);
+
+    c->low= 0;
+    c->range= 0x1FE;
+    c->outstanding_count= 0;
+    c->pb.bit_left++; //avoids firstBitFlag
+}
+
+/**
+ *
+ * @param buf_size size of buf in bits
+ */
 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
     c->bytestream_start=
     c->bytestream= buf;
@@ -134,7 +148,6 @@ void ff_init_cabac_states(void)
             ff_h264_lps_range[j*2*64+2*i+0]=
             ff_h264_lps_range[j*2*64+2*i+1]= lps_range[i][j];
         }
-
         ff_h264_mlps_state[128 + 2 * i + 0] = 2 * mps_state[i] + 0;
         ff_h264_mlps_state[128 + 2 * i + 1] = 2 * mps_state[i] + 1;
 
@@ -152,3 +165,151 @@ void ff_init_cabac_states(void)
 
     initialized = 1;
 }
+
+#ifdef TEST
+#define SIZE 10240
+
+#include "libavutil/lfg.h"
+#include "avcodec.h"
+
+static inline void put_cabac_bit(CABACContext *c, int b){
+    put_bits(&c->pb, 1, b);
+    for(;c->outstanding_count; c->outstanding_count--){
+        put_bits(&c->pb, 1, 1-b);
+    }
+}
+
+static inline void renorm_cabac_encoder(CABACContext *c){
+    while(c->range < 0x100){
+        //FIXME optimize
+        if(c->low<0x100){
+            put_cabac_bit(c, 0);
+        }else if(c->low<0x200){
+            c->outstanding_count++;
+            c->low -= 0x100;
+        }else{
+            put_cabac_bit(c, 1);
+            c->low -= 0x200;
+        }
+
+        c->range+= c->range;
+        c->low += c->low;
+    }
+}
+
+static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
+    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
+
+    if(bit == ((*state)&1)){
+        c->range -= RangeLPS;
+        *state    = ff_h264_mlps_state[128 + *state];
+    }else{
+        c->low += c->range - RangeLPS;
+        c->range = RangeLPS;
+        *state= ff_h264_mlps_state[127 - *state];
+    }
+
+    renorm_cabac_encoder(c);
+}
+
+/**
+ * @param bit 0 -> write zero bit, !=0 write one bit
+ */
+static void put_cabac_bypass(CABACContext *c, int bit){
+    c->low += c->low;
+
+    if(bit){
+        c->low += c->range;
+    }
+//FIXME optimize
+    if(c->low<0x200){
+        put_cabac_bit(c, 0);
+    }else if(c->low<0x400){
+        c->outstanding_count++;
+        c->low -= 0x200;
+    }else{
+        put_cabac_bit(c, 1);
+        c->low -= 0x400;
+    }
+}
+
+/**
+ *
+ * @return the number of bytes written
+ */
+static int put_cabac_terminate(CABACContext *c, int bit){
+    c->range -= 2;
+
+    if(!bit){
+        renorm_cabac_encoder(c);
+    }else{
+        c->low += c->range;
+        c->range= 2;
+
+        renorm_cabac_encoder(c);
+
+        av_assert0(c->low <= 0x1FF);
+        put_cabac_bit(c, c->low>>9);
+        put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
+
+        flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
+    }
+
+    return (put_bits_count(&c->pb)+7)>>3;
+}
+
+int main(void){
+    CABACContext c;
+    uint8_t b[9*SIZE];
+    uint8_t r[9*SIZE];
+    int i;
+    uint8_t state[10]= {0};
+    AVLFG prng;
+
+    av_lfg_init(&prng, 1);
+    ff_init_cabac_encoder(&c, b, SIZE);
+    ff_init_cabac_states();
+
+    for(i=0; i<SIZE; i++){
+        if(2*i<SIZE) r[i] = av_lfg_get(&prng) % 7;
+        else         r[i] = (i>>8)&1;
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        put_cabac_bypass(&c, r[i]&1);
+STOP_TIMER("put_cabac_bypass")
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        put_cabac(&c, state, r[i]&1);
+STOP_TIMER("put_cabac")
+    }
+
+    put_cabac_terminate(&c, 1);
+
+    ff_init_cabac_decoder(&c, b, SIZE);
+
+    memset(state, 0, sizeof(state));
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        if( (r[i]&1) != get_cabac_bypass(&c) )
+            av_log(NULL, AV_LOG_ERROR, "CABAC bypass failure at %d\n", i);
+STOP_TIMER("get_cabac_bypass")
+    }
+
+    for(i=0; i<SIZE; i++){
+START_TIMER
+        if( (r[i]&1) != get_cabac_noinline(&c, state) )
+            av_log(NULL, AV_LOG_ERROR, "CABAC failure at %d\n", i);
+STOP_TIMER("get_cabac")
+    }
+    if(!get_cabac_terminate(&c))
+        av_log(NULL, AV_LOG_ERROR, "where's the Terminator?\n");
+
+    return 0;
+}
+
+#endif /* TEST */
diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
index 426f338..eb6b521 100644
--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,11 +43,14 @@ extern uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
 typedef struct CABACContext{
     int low;
     int range;
+    int outstanding_count;
     const uint8_t *bytestream_start;
     const uint8_t *bytestream;
     const uint8_t *bytestream_end;
+    PutBitContext pb;
 }CABACContext;
 
+void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
 void ff_init_cabac_states(void);
 
diff --git a/libavcodec/cabac_functions.h b/libavcodec/cabac_functions.h
index 4b8f1bc..d7d6d7d 100644
--- a/libavcodec/cabac_functions.h
+++ b/libavcodec/cabac_functions.h
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -54,7 +54,9 @@ static void refill(CABACContext *c){
         c->low+= c->bytestream[0]<<1;
 #endif
     c->low -= CABAC_MASK;
+#if !UNCHECKED_BITSTREAM_READER
     if (c->bytestream < c->bytestream_end)
+#endif
         c->bytestream += CABAC_BITS / 8;
 }
 
@@ -82,7 +84,9 @@ static void refill2(CABACContext *c){
 #endif
 
     c->low += x<<i;
+#if !UNCHECKED_BITSTREAM_READER
     if (c->bytestream < c->bytestream_end)
+#endif
         c->bytestream += CABAC_BITS/8;
 }
 
diff --git a/libavcodec/cavs.c b/libavcodec/cavs.c
index 2be50a7..a41a8aa 100644
--- a/libavcodec/cavs.c
+++ b/libavcodec/cavs.c
@@ -2,20 +2,20 @@
  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -74,15 +74,16 @@ static inline int get_bs(cavs_vector *mvP, cavs_vector *mvQ, int b)
 {
     if ((mvP->ref == REF_INTRA) || (mvQ->ref == REF_INTRA))
         return 2;
-    if ((abs(mvP->x - mvQ->x) >= 4) || (abs(mvP->y - mvQ->y) >= 4))
+    if((abs(mvP->x - mvQ->x) >= 4) ||
+       (abs(mvP->y - mvQ->y) >= 4) ||
+       (mvP->ref != mvQ->ref))
         return 1;
     if (b) {
         mvP += MV_BWD_OFFS;
         mvQ += MV_BWD_OFFS;
-        if ((abs(mvP->x - mvQ->x) >= 4) || (abs(mvP->y - mvQ->y) >= 4))
-            return 1;
-    } else {
-        if (mvP->ref != mvQ->ref)
+        if((abs(mvP->x - mvQ->x) >= 4) ||
+           (abs(mvP->y - mvQ->y) >= 4) ||
+           (mvP->ref != mvQ->ref))
             return 1;
     }
     return 0;
@@ -148,6 +149,8 @@ void ff_cavs_filter(AVSContext *h, enum cavs_mb mb_type)
                 qp_avg = (h->qp + h->left_qp + 1) >> 1;
                 SET_PARAMS;
                 h->cdsp.cavs_filter_lv(h->cy, h->l_stride, alpha, beta, tc, bs[0], bs[1]);
+                qp_avg = (ff_cavs_chroma_qp[h->qp] + ff_cavs_chroma_qp[h->left_qp] + 1) >> 1;
+                SET_PARAMS;
                 h->cdsp.cavs_filter_cv(h->cu, h->c_stride, alpha, beta, tc, bs[0], bs[1]);
                 h->cdsp.cavs_filter_cv(h->cv, h->c_stride, alpha, beta, tc, bs[0], bs[1]);
             }
@@ -160,6 +163,8 @@ void ff_cavs_filter(AVSContext *h, enum cavs_mb mb_type)
                 qp_avg = (h->qp + h->top_qp[h->mbx] + 1) >> 1;
                 SET_PARAMS;
                 h->cdsp.cavs_filter_lh(h->cy, h->l_stride, alpha, beta, tc, bs[4], bs[5]);
+                qp_avg = (ff_cavs_chroma_qp[h->qp] + ff_cavs_chroma_qp[h->top_qp[h->mbx]] + 1) >> 1;
+                SET_PARAMS;
                 h->cdsp.cavs_filter_ch(h->cu, h->c_stride, alpha, beta, tc, bs[4], bs[5]);
                 h->cdsp.cavs_filter_ch(h->cv, h->c_stride, alpha, beta, tc, bs[4], bs[5]);
             }
@@ -233,9 +238,14 @@ void ff_cavs_load_intra_pred_chroma(AVSContext *h)
     /* extend borders by one pixel */
     h->left_border_u[9]              = h->left_border_u[8];
     h->left_border_v[9]              = h->left_border_v[8];
-    h->top_border_u[h->mbx * 10 + 9] = h->top_border_u[h->mbx * 10 + 8];
-    h->top_border_v[h->mbx * 10 + 9] = h->top_border_v[h->mbx * 10 + 8];
-    if (h->mbx && h->mby) {
+    if(h->flags & C_AVAIL) {
+        h->top_border_u[h->mbx*10 + 9] = h->top_border_u[h->mbx*10 + 11];
+        h->top_border_v[h->mbx*10 + 9] = h->top_border_v[h->mbx*10 + 11];
+    } else {
+        h->top_border_u[h->mbx * 10 + 9] = h->top_border_u[h->mbx * 10 + 8];
+        h->top_border_v[h->mbx * 10 + 9] = h->top_border_v[h->mbx * 10 + 8];
+    }
+    if((h->flags & A_AVAIL) && (h->flags & B_AVAIL)) {
         h->top_border_u[h->mbx * 10] = h->left_border_u[0] = h->topleft_border_u;
         h->top_border_v[h->mbx * 10] = h->left_border_v[0] = h->topleft_border_v;
     } else {
@@ -527,7 +537,7 @@ void ff_cavs_inter(AVSContext *h, enum cavs_mb mb_type)
 static inline void scale_mv(AVSContext *h, int *d_x, int *d_y,
                             cavs_vector *src, int distp)
 {
-    int den = h->scale_den[src->ref];
+    int den = h->scale_den[FFMAX(src->ref, 0)];
 
     *d_x = (src->x * distp * den + 256 + (src->x >> 31)) >> 9;
     *d_y = (src->y * distp * den + 256 + (src->y >> 31)) >> 9;
@@ -574,7 +584,7 @@ void ff_cavs_mv(AVSContext *h, enum cavs_mv_loc nP, enum cavs_mv_loc nC,
 
     mvP->ref  = ref;
     mvP->dist = h->dist[mvP->ref];
-    if (mvC->ref == NOT_AVAIL)
+    if (mvC->ref == NOT_AVAIL || (nP == MV_FWD_X3) || (nP == MV_BWD_X3 ))
         mvC = &h->mv[nP - 5];  // set to top-left (mvD)
     if (mode == MV_PRED_PSKIP &&
         (mvA->ref == NOT_AVAIL ||
@@ -704,7 +714,7 @@ int ff_cavs_next_mb(AVSContext *h)
  *
  ****************************************************************************/
 
-void ff_cavs_init_pic(AVSContext *h)
+int ff_cavs_init_pic(AVSContext *h)
 {
     int i;
 
@@ -725,6 +735,8 @@ void ff_cavs_init_pic(AVSContext *h)
     h->luma_scan[3]   = 8 * h->l_stride + 8;
     h->mbx            = h->mby = h->mbidx = 0;
     h->flags          = 0;
+
+    return 0;
 }
 
 /*****************************************************************************
@@ -742,16 +754,16 @@ void ff_cavs_init_top_lines(AVSContext *h)
 {
     /* alloc top line of predictors */
     h->top_qp       = av_mallocz(h->mb_width);
-    h->top_mv[0]    = av_mallocz((h->mb_width * 2 + 1) * sizeof(cavs_vector));
-    h->top_mv[1]    = av_mallocz((h->mb_width * 2 + 1) * sizeof(cavs_vector));
-    h->top_pred_Y   = av_mallocz(h->mb_width * 2 * sizeof(*h->top_pred_Y));
-    h->top_border_y = av_mallocz((h->mb_width + 1) * 16);
-    h->top_border_u = av_mallocz(h->mb_width * 10);
-    h->top_border_v = av_mallocz(h->mb_width * 10);
+    h->top_mv[0]    = av_mallocz_array(h->mb_width * 2 + 1,  sizeof(cavs_vector));
+    h->top_mv[1]    = av_mallocz_array(h->mb_width * 2 + 1,  sizeof(cavs_vector));
+    h->top_pred_Y   = av_mallocz_array(h->mb_width * 2,  sizeof(*h->top_pred_Y));
+    h->top_border_y = av_mallocz_array(h->mb_width + 1,  16);
+    h->top_border_u = av_mallocz_array(h->mb_width,  10);
+    h->top_border_v = av_mallocz_array(h->mb_width,  10);
 
     /* alloc space for co-located MVs and types */
-    h->col_mv        = av_mallocz(h->mb_width * h->mb_height * 4 *
-                                  sizeof(cavs_vector));
+    h->col_mv        = av_mallocz_array(h->mb_width * h->mb_height,
+                                        4 * sizeof(cavs_vector));
     h->col_type_base = av_mallocz(h->mb_width * h->mb_height);
     h->block         = av_mallocz(64 * sizeof(int16_t));
 }
diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h
index cfae055..12f3962 100644
--- a/libavcodec/cavs.h
+++ b/libavcodec/cavs.h
@@ -2,20 +2,20 @@
  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -216,6 +216,7 @@ typedef struct AVSContext {
     int luma_scan[4];
     int qp;
     int qp_fixed;
+    int pic_qp_fixed;
     int cbp;
     ScanTable scantable;
 
@@ -241,6 +242,7 @@ typedef struct AVSContext {
     int16_t *block;
 } AVSContext;
 
+extern const uint8_t     ff_cavs_chroma_qp[64];
 extern const uint8_t     ff_cavs_partition_flags[30];
 extern const cavs_vector ff_cavs_intra_mv;
 extern const cavs_vector ff_cavs_dir_mv;
@@ -269,7 +271,7 @@ void ff_cavs_mv(AVSContext *h, enum cavs_mv_loc nP, enum cavs_mv_loc nC,
                 enum cavs_mv_pred mode, enum cavs_block size, int ref);
 void ff_cavs_init_mb(AVSContext *h);
 int  ff_cavs_next_mb(AVSContext *h);
-void ff_cavs_init_pic(AVSContext *h);
+int ff_cavs_init_pic(AVSContext *h);
 void ff_cavs_init_top_lines(AVSContext *h);
 int ff_cavs_init(AVCodecContext *avctx);
 int ff_cavs_end (AVCodecContext *avctx);
diff --git a/libavcodec/cavs_parser.c b/libavcodec/cavs_parser.c
index 84f647c..6067a39 100644
--- a/libavcodec/cavs_parser.c
+++ b/libavcodec/cavs_parser.c
@@ -2,20 +2,20 @@
  * Chinese AVS video (AVS1-P2, JiZhun profile) parser.
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/cavsdata.c b/libavcodec/cavsdata.c
index 4e4a131..2835a4b 100644
--- a/libavcodec/cavsdata.c
+++ b/libavcodec/cavsdata.c
@@ -2,20 +2,20 @@
  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -54,6 +54,13 @@ const uint8_t ff_cavs_partition_flags[30] = {
                       SPLITH|SPLITV, //B_8X8 = 29
 };
 
+const uint8_t ff_cavs_chroma_qp[64] = {
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 43, 44, 44,
+  45, 45, 46, 46, 47, 47, 48, 48, 48, 49, 49, 49, 50, 50, 50, 51
+};
+
 /** mark block as "no prediction from this direction"
     e.g. forward motion vector in BWD partition */
 const cavs_vector ff_cavs_dir_mv   = {0,0,1,REF_DIR};
diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index fbbd048..34b65e6 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -2,20 +2,20 @@
  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,7 @@
  * @author Stefan Gehrer <stefan.gehrer@gmx.de>
  */
 
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "golomb.h"
@@ -51,13 +52,6 @@ static const uint8_t cbp_tab[64][2] = {
 
 static const uint8_t scan3x3[4] = { 4, 5, 7, 8 };
 
-static const uint8_t cavs_chroma_qp[64] = {
-   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 43, 44, 44,
-  45, 45, 46, 46, 47, 47, 48, 48, 48, 49, 49, 49, 50, 50, 50, 51
-};
-
 static const uint8_t dequant_shift[64] = {
   14, 14, 14, 14, 14, 14, 14, 14,
   13, 13, 13, 13, 13, 13, 13, 13,
@@ -509,11 +503,15 @@ static inline void mv_pred_sym(AVSContext *h, cavs_vector *src,
 /** kth-order exponential golomb code */
 static inline int get_ue_code(GetBitContext *gb, int order)
 {
+    unsigned ret = get_ue_golomb(gb);
+    if (ret >= ((1U<<31)>>order)) {
+        av_log(NULL, AV_LOG_ERROR, "get_ue_code: value too larger\n");
+        return AVERROR_INVALIDDATA;
+    }
     if (order) {
-        int ret = get_ue_golomb(gb) << order;
-        return ret + get_bits(gb, order);
+        return (ret<<order) + get_bits(gb, order);
     }
-    return get_ue_golomb(gb);
+    return ret;
 }
 
 static inline int dequant(AVSContext *h, int16_t *level_buf, uint8_t *run_buf,
@@ -550,29 +548,32 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb,
                                  const struct dec_2dvlc *r, int esc_golomb_order,
                                  int qp, uint8_t *dst, int stride)
 {
-    int i, level_code, esc_code, level, run, mask, ret;
+    int i, esc_code, level, mask, ret;
+    unsigned int level_code, run;
     int16_t level_buf[65];
     uint8_t run_buf[65];
     int16_t *block = h->block;
 
-    for (i = 0;i < 65; i++) {
+    for (i = 0; i < 65; i++) {
         level_code = get_ue_code(gb, r->golomb_order);
         if (level_code >= ESCAPE_CODE) {
             run      = ((level_code - ESCAPE_CODE) >> 1) + 1;
+            if(run > 64) {
+                av_log(h->avctx, AV_LOG_ERROR, "run %d is too large\n", run);
+                return AVERROR_INVALIDDATA;
+            }
             esc_code = get_ue_code(gb, esc_golomb_order);
             level    = esc_code + (run > r->max_run ? 1 : r->level_add[run]);
             while (level > r->inc_limit)
                 r++;
             mask  = -(level_code & 1);
             level = (level ^ mask) - mask;
-        } else if (level_code >= 0) {
+        } else {
             level = r->rltab[level_code][0];
             if (!level) //end of block signal
                 break;
             run = r->rltab[level_code][1];
             r  += r->rltab[level_code][2];
-        } else {
-            break;
         }
         level_buf[i] = level;
         run_buf[i]   = run;
@@ -590,10 +591,10 @@ static inline void decode_residual_chroma(AVSContext *h)
 {
     if (h->cbp & (1 << 4))
         decode_residual_block(h, &h->gb, chroma_dec, 0,
-                              cavs_chroma_qp[h->qp], h->cu, h->c_stride);
+                              ff_cavs_chroma_qp[h->qp], h->cu, h->c_stride);
     if (h->cbp & (1 << 5))
         decode_residual_block(h, &h->gb, chroma_dec, 0,
-                              cavs_chroma_qp[h->qp], h->cv, h->c_stride);
+                              ff_cavs_chroma_qp[h->qp], h->cv, h->c_stride);
 }
 
 static inline int decode_residual_inter(AVSContext *h)
@@ -602,7 +603,7 @@ static inline int decode_residual_inter(AVSContext *h)
 
     /* get coded block pattern */
     int cbp = get_ue_golomb(&h->gb);
-    if (cbp > 63 || cbp < 0) {
+    if (cbp > 63U) {
         av_log(h->avctx, AV_LOG_ERROR, "illegal inter cbp %d\n", cbp);
         return AVERROR_INVALIDDATA;
     }
@@ -673,7 +674,7 @@ static int decode_mb_i(AVSContext *h, int cbp_code)
     /* get coded block pattern */
     if (h->cur.f->pict_type == AV_PICTURE_TYPE_I)
         cbp_code = get_ue_golomb(gb);
-    if (cbp_code > 63 || cbp_code < 0) {
+    if (cbp_code > 63U) {
         av_log(h->avctx, AV_LOG_ERROR, "illegal intra cbp\n");
         return AVERROR_INVALIDDATA;
     }
@@ -760,7 +761,7 @@ static void decode_mb_p(AVSContext *h, enum cavs_mb mb_type)
     h->col_type_base[h->mbidx] = mb_type;
 }
 
-static void decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
+static int decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
 {
     int block;
     enum cavs_sub_mb sub_type[4];
@@ -797,6 +798,8 @@ static void decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
         ff_cavs_mv(h, MV_BWD_X0, MV_BWD_C2, MV_PRED_MEDIAN, BLK_16X16, 0);
         break;
     case B_8X8:
+#define TMP_UNUSED_INX  7
+        flags = 0;
         for (block = 0; block < 4; block++)
             sub_type[block] = get_bits(&h->gb, 2);
         for (block = 0; block < 4; block++) {
@@ -804,11 +807,30 @@ static void decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
             case B_SUB_DIRECT:
                 if (!h->col_type_base[h->mbidx]) {
                     /* intra MB at co-location, do in-plane prediction */
-                    ff_cavs_mv(h, mv_scan[block], mv_scan[block] - 3,
-                               MV_PRED_BSKIP, BLK_8X8, 1);
-                    ff_cavs_mv(h, mv_scan[block] + MV_BWD_OFFS,
-                               mv_scan[block] - 3 + MV_BWD_OFFS,
-                               MV_PRED_BSKIP, BLK_8X8, 0);
+                    if(flags==0) {
+                        // if col-MB is a Intra MB, current Block size is 16x16.
+                        // AVS standard section 9.9.1
+                        if(block>0){
+                            h->mv[TMP_UNUSED_INX              ] = h->mv[MV_FWD_X0              ];
+                            h->mv[TMP_UNUSED_INX + MV_BWD_OFFS] = h->mv[MV_FWD_X0 + MV_BWD_OFFS];
+                        }
+                        ff_cavs_mv(h, MV_FWD_X0, MV_FWD_C2,
+                                   MV_PRED_BSKIP, BLK_8X8, 1);
+                        ff_cavs_mv(h, MV_FWD_X0+MV_BWD_OFFS,
+                                   MV_FWD_C2+MV_BWD_OFFS,
+                                   MV_PRED_BSKIP, BLK_8X8, 0);
+                        if(block>0) {
+                            flags = mv_scan[block];
+                            h->mv[flags              ] = h->mv[MV_FWD_X0              ];
+                            h->mv[flags + MV_BWD_OFFS] = h->mv[MV_FWD_X0 + MV_BWD_OFFS];
+                            h->mv[MV_FWD_X0              ] = h->mv[TMP_UNUSED_INX              ];
+                            h->mv[MV_FWD_X0 + MV_BWD_OFFS] = h->mv[TMP_UNUSED_INX + MV_BWD_OFFS];
+                        } else
+                            flags = MV_FWD_X0;
+                    } else {
+                        h->mv[mv_scan[block]              ] = h->mv[flags              ];
+                        h->mv[mv_scan[block] + MV_BWD_OFFS] = h->mv[flags + MV_BWD_OFFS];
+                    }
                 } else
                     mv_pred_direct(h, &h->mv[mv_scan[block]],
                                    &h->col_mv[h->mbidx * 4 + block]);
@@ -824,6 +846,7 @@ static void decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
                 break;
             }
         }
+#undef TMP_UNUSED_INX
         for (block = 0; block < 4; block++) {
             if (sub_type[block] == B_SUB_BWD)
                 ff_cavs_mv(h, mv_scan[block] + MV_BWD_OFFS,
@@ -832,7 +855,11 @@ static void decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
         }
         break;
     default:
-        assert((mb_type > B_SYM_16X16) && (mb_type < B_8X8));
+        if (mb_type <= B_SYM_16X16) {
+            av_log(h->avctx, AV_LOG_ERROR, "Invalid mb_type %d in B frame\n", mb_type);
+            return AVERROR_INVALIDDATA;
+        }
+        av_assert2(mb_type < B_8X8);
         flags = ff_cavs_partition_flags[mb_type];
         if (mb_type & 1) { /* 16x8 macroblock types */
             if (flags & FWD0)
@@ -867,6 +894,8 @@ static void decode_mb_b(AVSContext *h, enum cavs_mb mb_type)
     if (mb_type != B_SKIP)
         decode_residual_inter(h);
     ff_cavs_filter(h, mb_type);
+
+    return 0;
 }
 
 /*****************************************************************************
@@ -879,12 +908,18 @@ static inline int decode_slice_header(AVSContext *h, GetBitContext *gb)
 {
     if (h->stc > 0xAF)
         av_log(h->avctx, AV_LOG_ERROR, "unexpected start code 0x%02x\n", h->stc);
+
+    if (h->stc >= h->mb_height) {
+        av_log(h->avctx, AV_LOG_ERROR, "stc 0x%02x is too large\n", h->stc);
+        return AVERROR_INVALIDDATA;
+    }
+
     h->mby   = h->stc;
     h->mbidx = h->mby * h->mb_width;
 
     /* mark top macroblocks as unavailable */
     h->flags &= ~(B_AVAIL | C_AVAIL);
-    if ((h->mby == 0) && (!h->qp_fixed)) {
+    if (!h->pic_qp_fixed) {
         h->qp_fixed = get_bits1(gb);
         h->qp       = get_bits(gb, 6);
     }
@@ -977,16 +1012,17 @@ static int decode_pic(AVSContext *h)
             return AVERROR(ENOMEM);
     }
 
-    ff_cavs_init_pic(h);
+    if ((ret = ff_cavs_init_pic(h)) < 0)
+        return ret;
     h->cur.poc = get_bits(&h->gb, 8) * 2;
 
     /* get temporal distances and MV scaling factors */
     if (h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
-        h->dist[0] = (h->cur.poc - h->DPB[0].poc  + 512) % 512;
+        h->dist[0] = (h->cur.poc - h->DPB[0].poc) & 511;
     } else {
-        h->dist[0] = (h->DPB[0].poc  - h->cur.poc + 512) % 512;
+        h->dist[0] = (h->DPB[0].poc  - h->cur.poc) & 511;
     }
-    h->dist[1] = (h->cur.poc - h->DPB[1].poc  + 512) % 512;
+    h->dist[1] = (h->cur.poc - h->DPB[1].poc) & 511;
     h->scale_den[0] = h->dist[0] ? 512/h->dist[0] : 0;
     h->scale_den[1] = h->dist[1] ? 512/h->dist[1] : 0;
     if (h->cur.f->pict_type == AV_PICTURE_TYPE_B) {
@@ -1006,6 +1042,7 @@ static int decode_pic(AVSContext *h)
         skip_bits1(&h->gb);     //advanced_pred_mode_disable
     skip_bits1(&h->gb);        //top_field_first
     skip_bits1(&h->gb);        //repeat_first_field
+    h->pic_qp_fixed =
     h->qp_fixed = get_bits1(&h->gb);
     h->qp       = get_bits(&h->gb, 6);
     if (h->cur.f->pict_type == AV_PICTURE_TYPE_I) {
@@ -1093,6 +1130,10 @@ static int decode_seq_header(AVSContext *h)
                                       "Width/height changing in CAVS");
         return AVERROR_PATCHWELCOME;
     }
+    if (width <= 0 || height <= 0) {
+        av_log(h->avctx, AV_LOG_ERROR, "Dimensions invalid\n");
+        return AVERROR_INVALIDDATA;
+    }
     h->width  = width;
     h->height = height;
 
@@ -1140,12 +1181,17 @@ static int cavs_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return 0;
     }
 
+    h->stc = 0;
+
     buf_ptr = buf;
     buf_end = buf + buf_size;
     for(;;) {
         buf_ptr = avpriv_find_start_code(buf_ptr, buf_end, &stc);
-        if ((stc & 0xFFFFFE00) || buf_ptr == buf_end)
+        if ((stc & 0xFFFFFE00) || buf_ptr == buf_end) {
+            if (!h->stc)
+                av_log(h->avctx, AV_LOG_WARNING, "no frame decoded\n");
             return FFMAX(0, buf_ptr - buf);
+        }
         input_size = (buf_end - buf_ptr) * 8;
         switch (stc) {
         case CAVS_START_CODE:
@@ -1168,8 +1214,8 @@ static int cavs_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 break;
             *got_frame = 1;
             if (h->cur.f->pict_type != AV_PICTURE_TYPE_B) {
-                if (h->DPB[1].f->data[0]) {
-                    if ((ret = av_frame_ref(data, h->DPB[1].f)) < 0)
+                if (h->DPB[!h->low_delay].f->data[0]) {
+                    if ((ret = av_frame_ref(data, h->DPB[!h->low_delay].f)) < 0)
                         return ret;
                 } else {
                     *got_frame = 0;
diff --git a/libavcodec/cavsdsp.c b/libavcodec/cavsdsp.c
index cc78989..91f6d73 100644
--- a/libavcodec/cavsdsp.c
+++ b/libavcodec/cavsdsp.c
@@ -5,20 +5,20 @@
  *
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/cavsdsp.h b/libavcodec/cavsdsp.h
index 248afd5..847f5c4 100644
--- a/libavcodec/cavsdsp.h
+++ b/libavcodec/cavsdsp.h
@@ -2,20 +2,20 @@
  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
  * Copyright (c) 2006  Stefan Gehrer <stefan.gehrer@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/cbrt_tablegen.c b/libavcodec/cbrt_tablegen.c
index e92c0f1..e0a8e63 100644
--- a/libavcodec/cbrt_tablegen.c
+++ b/libavcodec/cbrt_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
index 60d900a..0db64fc 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -42,7 +42,7 @@ static void cbrt_tableinit(void)
                 float f;
                 uint32_t i;
             } f;
-            f.f = powf(i, 1.0 / 3.0) * i;
+            f.f = pow(i, 1.0 / 3.0) * i;
             cbrt_tab[i] = f.i;
         }
     }
diff --git a/libavcodec/cdgraphics.c b/libavcodec/cdgraphics.c
index b8a6fb8..b7a8fa7 100644
--- a/libavcodec/cdgraphics.c
+++ b/libavcodec/cdgraphics.c
@@ -2,20 +2,20 @@
  * CD Graphics Video Decoder
  * Copyright (c) 2009 Michael Tison
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -119,7 +119,7 @@ static void cdg_load_palette(CDGraphicsContext *cc, uint8_t *data, int low)
         r = ((color >> 8) & 0x000F) * 17;
         g = ((color >> 4) & 0x000F) * 17;
         b = ((color     ) & 0x000F) * 17;
-        palette[i + array_offset] = r << 16 | g << 8 | b;
+        palette[i + array_offset] = 0xFFU << 24 | r << 16 | g << 8 | b;
     }
     cc->frame->palette_has_changed = 1;
 }
@@ -265,7 +265,7 @@ static int cdg_decode_frame(AVCodecContext *avctx,
     int buf_size       = avpkt->size;
     int ret;
     uint8_t command, inst;
-    uint8_t cdg_data[CDG_DATA_SIZE];
+    uint8_t cdg_data[CDG_DATA_SIZE] = {0};
     AVFrame *frame = data;
     CDGraphicsContext *cc = avctx->priv_data;
 
@@ -273,20 +273,25 @@ static int cdg_decode_frame(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_ERROR, "buffer too small for decoder\n");
         return AVERROR(EINVAL);
     }
+    if (buf_size > CDG_HEADER_SIZE + CDG_DATA_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "buffer too big for decoder\n");
+        return AVERROR(EINVAL);
+    }
 
-    ret = ff_reget_buffer(avctx, cc->frame);
-    if (ret) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, cc->frame)) < 0)
         return ret;
-    }
-    if (!avctx->frame_number)
+    if (!avctx->frame_number) {
         memset(cc->frame->data[0], 0, cc->frame->linesize[0] * avctx->height);
+        memset(cc->frame->data[1], 0, AVPALETTE_SIZE);
+    }
 
     command = bytestream_get_byte(&buf);
     inst    = bytestream_get_byte(&buf);
     inst    &= CDG_MASK;
     buf += 2;  /// skipping 2 unneeded bytes
-    bytestream_get_buffer(&buf, cdg_data, buf_size - CDG_HEADER_SIZE);
+
+    if (buf_size > CDG_HEADER_SIZE)
+        bytestream_get_buffer(&buf, cdg_data, buf_size - CDG_HEADER_SIZE);
 
     if ((command & CDG_MASK) == CDG_COMMAND) {
         switch (inst) {
@@ -327,11 +332,8 @@ static int cdg_decode_frame(AVCodecContext *avctx,
                 return AVERROR(EINVAL);
             }
 
-            ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF);
-            if (ret) {
-                av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+            if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
                 return ret;
-            }
 
             cdg_scroll(cc, cdg_data, frame, inst == CDG_INST_SCROLL_COPY);
             av_frame_unref(cc->frame);
diff --git a/libavcodec/cdxl.c b/libavcodec/cdxl.c
index 80a3671..13ad57c 100644
--- a/libavcodec/cdxl.c
+++ b/libavcodec/cdxl.c
@@ -2,23 +2,31 @@
  * CDXL video decoder
  * Copyright (c) 2011-2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/**
+ * @file
+ * Commodore CDXL video decoder
+ * @author Paul B Mahol
+ */
+
+#define UNCHECKED_BITSTREAM_READER 1
+
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
@@ -26,8 +34,8 @@
 #include "internal.h"
 
 #define BIT_PLANAR   0x00
-#define BYTE_PLANAR  0x20
-#define CHUNKY       0x40
+#define CHUNKY       0x20
+#define BYTE_PLANAR  0x40
 #define BIT_LINE     0x80
 #define BYTE_LINE    0xC0
 
@@ -63,7 +71,7 @@ static void import_palette(CDXLVideoContext *c, uint32_t *new_palette)
         unsigned r   = ((rgb >> 8) & 0xF) * 0x11;
         unsigned g   = ((rgb >> 4) & 0xF) * 0x11;
         unsigned b   =  (rgb       & 0xF) * 0x11;
-        AV_WN32(&new_palette[i], (r << 16) | (g << 8) | b);
+        AV_WN32(&new_palette[i], (0xFFU << 24) | (r << 16) | (g << 8) | b);
     }
 }
 
@@ -115,6 +123,7 @@ static void cdxl_decode_rgb(CDXLVideoContext *c, AVFrame *frame)
 {
     uint32_t *new_palette = (uint32_t *)frame->data[1];
 
+    memset(frame->data[1], 0, AVPALETTE_SIZE);
     import_palette(c, new_palette);
     import_format(c, frame->linesize[0], frame->data[0]);
 }
@@ -255,10 +264,8 @@ static int cdxl_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_PATCHWELCOME;
     }
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
     p->pict_type = AV_PICTURE_TYPE_I;
 
     if (encoding) {
@@ -282,7 +289,7 @@ static av_cold int cdxl_decode_end(AVCodecContext *avctx)
 {
     CDXLVideoContext *c = avctx->priv_data;
 
-    av_free(c->new_video);
+    av_freep(&c->new_video);
 
     return 0;
 }
diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c
index 381ffa4..a81fd88 100644
--- a/libavcodec/celp_filters.c
+++ b/libavcodec/celp_filters.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,6 +24,7 @@
 
 #include "avcodec.h"
 #include "celp_filters.h"
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 
 void ff_celp_convolve_circ(int16_t* fc_out, const int16_t* fc_in,
@@ -104,6 +105,8 @@ void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
     c -= filter_coeffs[1] * filter_coeffs[0];
     c -= filter_coeffs[0] * b;
 
+    av_assert2((filter_length&1)==0 && filter_length>=4);
+
     old_out0 = out[-4];
     old_out1 = out[-3];
     old_out2 = out[-2];
@@ -133,7 +136,7 @@ void ff_celp_lp_synthesis_filterf(float *out, const float *filter_coeffs,
         out2 -= val * old_out2;
         out3 -= val * old_out3;
 
-        for (i = 5; i <= filter_length; i += 2) {
+        for (i = 5; i < filter_length; i += 2) {
             old_out3 = out[-i];
             val = filter_coeffs[i-1];
 
@@ -205,3 +208,12 @@ void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs,
             out[n] += filter_coeffs[i-1] * in[n-i];
     }
 }
+
+void ff_celp_filter_init(CELPFContext *c)
+{
+    c->celp_lp_synthesis_filterf        = ff_celp_lp_synthesis_filterf;
+    c->celp_lp_zero_synthesis_filterf   = ff_celp_lp_zero_synthesis_filterf;
+
+    if(HAVE_MIPSFPU)
+        ff_celp_filter_init_mips(c);
+}
diff --git a/libavcodec/celp_filters.h b/libavcodec/celp_filters.h
index c328258..f644ec3 100644
--- a/libavcodec/celp_filters.h
+++ b/libavcodec/celp_filters.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,55 @@
 
 #include <stdint.h>
 
+typedef struct CELPFContext {
+    /**
+     * LP synthesis filter.
+     * @param[out] out pointer to output buffer
+     *        - the array out[-filter_length, -1] must
+     *        contain the previous result of this filter
+     * @param filter_coeffs filter coefficients.
+     * @param in input signal
+     * @param buffer_length amount of data to process
+     * @param filter_length filter length (10 for 10th order LP filter). Must be
+     *                      greater than 4 and even.
+     *
+     * @note Output buffer must contain filter_length samples of past
+     *       speech data before pointer.
+     *
+     * Routine applies 1/A(z) filter to given speech data.
+     */
+    void (*celp_lp_synthesis_filterf)(float *out, const float *filter_coeffs,
+                                      const float *in, int buffer_length,
+                                      int filter_length);
+
+    /**
+     * LP zero synthesis filter.
+     * @param[out] out pointer to output buffer
+     * @param filter_coeffs filter coefficients.
+     * @param in input signal
+     *        - the array in[-filter_length, -1] must
+     *        contain the previous input of this filter
+     * @param buffer_length amount of data to process (should be a multiple of eight)
+     * @param filter_length filter length (10 for 10th order LP filter;
+     *                                      should be a multiple of two)
+     *
+     * @note Output buffer must contain filter_length samples of past
+     *       speech data before pointer.
+     *
+     * Routine applies A(z) filter to given speech data.
+     */
+    void (*celp_lp_zero_synthesis_filterf)(float *out, const float *filter_coeffs,
+                                           const float *in, int buffer_length,
+                                           int filter_length);
+
+}CELPFContext;
+
+/**
+ * Initialize CELPFContext.
+ */
+void ff_celp_filter_init(CELPFContext *c);
+void ff_celp_filter_init_mips(CELPFContext *c);
+
 /**
  * Circularly convolve fixed vector with a phase dispersion impulse
  *        response filter (D.6.2 of G.729 and 6.1.5 of AMR).
diff --git a/libavcodec/celp_math.c b/libavcodec/celp_math.c
index a9ebef6..a96b1ae 100644
--- a/libavcodec/celp_math.c
+++ b/libavcodec/celp_math.c
@@ -3,28 +3,29 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <inttypes.h>
 #include <limits.h>
-#include <assert.h>
 
+#include "libavutil/avassert.h"
 #include "avcodec.h"
+#include "mathops.h"
 #include "celp_math.h"
 #include "libavutil/common.h"
 
@@ -48,7 +49,7 @@ int ff_exp2(uint16_t power)
 {
     unsigned int result= exp2a[power>>10] + 0x10000;
 
-    assert(power <= 0x7fff);
+    av_assert2(power <= 0x7fff);
 
     result= (result<<3) + ((result*exp2b[(power>>5)&31])>>17);
     return result + ((result*(power&31)*89)>>22);
@@ -61,10 +62,17 @@ int ff_exp2(uint16_t power)
  */
 static const uint16_t tab_log2[33] =
 {
+#ifdef G729_BITEXACT
+      0,   1455,   2866,   4236,   5568,   6863,   8124,   9352,
+  10549,  11716,  12855,  13967,  15054,  16117,  17156,  18172,
+  19167,  20142,  21097,  22033,  22951,  23852,  24735,  25603,
+  26455,  27291,  28113,  28922,  29716,  30497,  31266,  32023,  32767,
+#else
       4,   1459,   2870,   4240,   5572,   6867,   8127,   9355,
   10552,  11719,  12858,  13971,  15057,  16120,  17158,  18175,
   19170,  20145,  21100,  22036,  22954,  23854,  24738,  25605,
   26457,  27294,  28116,  28924,  29719,  30500,  31269,  32025,  32769,
+#endif
 };
 
 int ff_log2_q15(uint32_t value)
@@ -86,3 +94,33 @@ int ff_log2_q15(uint32_t value)
 
     return (power_int << 15) + value;
 }
+
+int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length)
+{
+    int i;
+    int64_t sum = 0;
+
+    for (i = 0; i < length; i++)
+        sum += MUL16(a[i], b[i]);
+
+    return sum;
+}
+
+float ff_dot_productf(const float* a, const float* b, int length)
+{
+    float sum = 0;
+    int i;
+
+    for(i=0; i<length; i++)
+        sum += a[i] * b[i];
+
+    return sum;
+}
+
+void ff_celp_math_init(CELPMContext *c)
+{
+    c->dot_productf   = ff_dot_productf;
+
+    if(HAVE_MIPSFPU)
+        ff_celp_math_init_mips(c);
+}
diff --git a/libavcodec/celp_math.h b/libavcodec/celp_math.h
index ed3f8c0..18d3ad9 100644
--- a/libavcodec/celp_math.h
+++ b/libavcodec/celp_math.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,25 @@
 
 #include <stdint.h>
 
+typedef struct CELPMContext {
+    /**
+     * Return the dot product.
+     * @param a input data array
+     * @param b input data array
+     * @param length number of elements
+     *
+     * @return dot product = sum of elementwise products
+     */
+    float (*dot_productf)(const float* a, const float* b, int length);
+
+}CELPMContext;
+
+/**
+ * Initialize CELPMContext.
+ */
+void ff_celp_math_init(CELPMContext *c);
+void ff_celp_math_init_mips(CELPMContext *c);
+
 /**
  * fixed-point implementation of exp2(x) in [0; 1] domain.
  * @param power argument to exp2, 0 <= power <= 0x7fff
@@ -55,4 +74,24 @@ static inline int bidir_sal(int value, int offset)
     else           return value <<  offset;
 }
 
+/**
+ * returns the dot product of 2 int16_t vectors.
+ * @param a input data array
+ * @param b input data array
+ * @param length number of elements
+ *
+ * @return dot product = sum of elementwise products
+ */
+int64_t ff_dot_product(const int16_t *a, const int16_t *b, int length);
+
+/**
+ * Return the dot product.
+ * @param a input data array
+ * @param b input data array
+ * @param length number of elements
+ *
+ * @return dot product = sum of elementwise products
+ */
+float ff_dot_productf(const float* a, const float* b, int length);
+
 #endif /* AVCODEC_CELP_MATH_H */
diff --git a/libavcodec/cga_data.c b/libavcodec/cga_data.c
index 2c63ff2..023a86b 100644
--- a/libavcodec/cga_data.c
+++ b/libavcodec/cga_data.c
@@ -1,435 +1,46 @@
 /*
  * CGA/EGA/VGA ROM data
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * CGA/EGA/VGA ROM data
+ * @note fonts are in libavutil/xga_font_data.[ch]
  */
 
 #include <stdint.h>
 #include "cga_data.h"
 
-const uint8_t ff_cga_font[2048] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0xbd, 0x99, 0x81, 0x7e,
- 0x7e, 0xff, 0xdb, 0xff, 0xc3, 0xe7, 0xff, 0x7e, 0x6c, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, 0x00,
- 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x38, 0x7c, 0x38, 0xfe, 0xfe, 0x7c, 0x38, 0x7c,
- 0x10, 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x7c, 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x18, 0x00, 0x00,
- 0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
- 0xff, 0xc3, 0x99, 0xbd, 0xbd, 0x99, 0xc3, 0xff, 0x0f, 0x07, 0x0f, 0x7d, 0xcc, 0xcc, 0xcc, 0x78,
- 0x3c, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x70, 0xf0, 0xe0,
- 0x7f, 0x63, 0x7f, 0x63, 0x63, 0x67, 0xe6, 0xc0, 0x99, 0x5a, 0x3c, 0xe7, 0xe7, 0x3c, 0x5a, 0x99,
- 0x80, 0xe0, 0xf8, 0xfe, 0xf8, 0xe0, 0x80, 0x00, 0x02, 0x0e, 0x3e, 0xfe, 0x3e, 0x0e, 0x02, 0x00,
- 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x66, 0x00,
- 0x7f, 0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x00, 0x3e, 0x63, 0x38, 0x6c, 0x6c, 0x38, 0xcc, 0x78,
- 0x00, 0x00, 0x00, 0x00, 0x7e, 0x7e, 0x7e, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x7e, 0x3c, 0x18, 0xff,
- 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
- 0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
- 0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xfe, 0x00, 0x00, 0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00,
- 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0xff, 0x7e, 0x3c, 0x18, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x78, 0x78, 0x30, 0x30, 0x00, 0x30, 0x00,
- 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, 0x6c, 0xfe, 0x6c, 0xfe, 0x6c, 0x6c, 0x00,
- 0x30, 0x7c, 0xc0, 0x78, 0x0c, 0xf8, 0x30, 0x00, 0x00, 0xc6, 0xcc, 0x18, 0x30, 0x66, 0xc6, 0x00,
- 0x38, 0x6c, 0x38, 0x76, 0xdc, 0xcc, 0x76, 0x00, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x18, 0x30, 0x60, 0x60, 0x60, 0x30, 0x18, 0x00, 0x60, 0x30, 0x18, 0x18, 0x18, 0x30, 0x60, 0x00,
- 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, 0x00, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x30, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00,
- 0x7c, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0x7c, 0x00, 0x30, 0x70, 0x30, 0x30, 0x30, 0x30, 0xfc, 0x00,
- 0x78, 0xcc, 0x0c, 0x38, 0x60, 0xcc, 0xfc, 0x00, 0x78, 0xcc, 0x0c, 0x38, 0x0c, 0xcc, 0x78, 0x00,
- 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, 0x0c, 0x1e, 0x00, 0xfc, 0xc0, 0xf8, 0x0c, 0x0c, 0xcc, 0x78, 0x00,
- 0x38, 0x60, 0xc0, 0xf8, 0xcc, 0xcc, 0x78, 0x00, 0xfc, 0xcc, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x00,
- 0x78, 0xcc, 0xcc, 0x78, 0xcc, 0xcc, 0x78, 0x00, 0x78, 0xcc, 0xcc, 0x7c, 0x0c, 0x18, 0x70, 0x00,
- 0x00, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30, 0x60,
- 0x18, 0x30, 0x60, 0xc0, 0x60, 0x30, 0x18, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x00, 0xfc, 0x00, 0x00,
- 0x60, 0x30, 0x18, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x78, 0xcc, 0x0c, 0x18, 0x30, 0x00, 0x30, 0x00,
- 0x7c, 0xc6, 0xde, 0xde, 0xde, 0xc0, 0x78, 0x00, 0x30, 0x78, 0xcc, 0xcc, 0xfc, 0xcc, 0xcc, 0x00,
- 0xfc, 0x66, 0x66, 0x7c, 0x66, 0x66, 0xfc, 0x00, 0x3c, 0x66, 0xc0, 0xc0, 0xc0, 0x66, 0x3c, 0x00,
- 0xf8, 0x6c, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0xfe, 0x62, 0x68, 0x78, 0x68, 0x62, 0xfe, 0x00,
- 0xfe, 0x62, 0x68, 0x78, 0x68, 0x60, 0xf0, 0x00, 0x3c, 0x66, 0xc0, 0xc0, 0xce, 0x66, 0x3e, 0x00,
- 0xcc, 0xcc, 0xcc, 0xfc, 0xcc, 0xcc, 0xcc, 0x00, 0x78, 0x30, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00,
- 0x1e, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0x78, 0x00, 0xe6, 0x66, 0x6c, 0x78, 0x6c, 0x66, 0xe6, 0x00,
- 0xf0, 0x60, 0x60, 0x60, 0x62, 0x66, 0xfe, 0x00, 0xc6, 0xee, 0xfe, 0xfe, 0xd6, 0xc6, 0xc6, 0x00,
- 0xc6, 0xe6, 0xf6, 0xde, 0xce, 0xc6, 0xc6, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x38, 0x00,
- 0xfc, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xdc, 0x78, 0x1c, 0x00,
- 0xfc, 0x66, 0x66, 0x7c, 0x6c, 0x66, 0xe6, 0x00, 0x78, 0xcc, 0xe0, 0x70, 0x1c, 0xcc, 0x78, 0x00,
- 0xfc, 0xb4, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xfc, 0x00,
- 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x00, 0xc6, 0xc6, 0xc6, 0xd6, 0xfe, 0xee, 0xc6, 0x00,
- 0xc6, 0xc6, 0x6c, 0x38, 0x38, 0x6c, 0xc6, 0x00, 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x30, 0x78, 0x00,
- 0xfe, 0xc6, 0x8c, 0x18, 0x32, 0x66, 0xfe, 0x00, 0x78, 0x60, 0x60, 0x60, 0x60, 0x60, 0x78, 0x00,
- 0xc0, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x02, 0x00, 0x78, 0x18, 0x18, 0x18, 0x18, 0x18, 0x78, 0x00,
- 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
- 0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x76, 0x00,
- 0xe0, 0x60, 0x60, 0x7c, 0x66, 0x66, 0xdc, 0x00, 0x00, 0x00, 0x78, 0xcc, 0xc0, 0xcc, 0x78, 0x00,
- 0x1c, 0x0c, 0x0c, 0x7c, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00,
- 0x38, 0x6c, 0x60, 0xf0, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0x7c, 0x0c, 0xf8,
- 0xe0, 0x60, 0x6c, 0x76, 0x66, 0x66, 0xe6, 0x00, 0x30, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
- 0x0c, 0x00, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0x78, 0xe0, 0x60, 0x66, 0x6c, 0x78, 0x6c, 0xe6, 0x00,
- 0x70, 0x30, 0x30, 0x30, 0x30, 0x30, 0x78, 0x00, 0x00, 0x00, 0xcc, 0xfe, 0xfe, 0xd6, 0xc6, 0x00,
- 0x00, 0x00, 0xf8, 0xcc, 0xcc, 0xcc, 0xcc, 0x00, 0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0x78, 0x00,
- 0x00, 0x00, 0xdc, 0x66, 0x66, 0x7c, 0x60, 0xf0, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0x7c, 0x0c, 0x1e,
- 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x7c, 0xc0, 0x78, 0x0c, 0xf8, 0x00,
- 0x10, 0x30, 0x7c, 0x30, 0x30, 0x34, 0x18, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00,
- 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0x78, 0x30, 0x00, 0x00, 0x00, 0xc6, 0xd6, 0xfe, 0xfe, 0x6c, 0x00,
- 0x00, 0x00, 0xc6, 0x6c, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xf8,
- 0x00, 0x00, 0xfc, 0x98, 0x30, 0x64, 0xfc, 0x00, 0x1c, 0x30, 0x30, 0xe0, 0x30, 0x30, 0x1c, 0x00,
- 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x00, 0xe0, 0x30, 0x30, 0x1c, 0x30, 0x30, 0xe0, 0x00,
- 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0x00,
- 0x78, 0xcc, 0xc0, 0xcc, 0x78, 0x18, 0x0c, 0x78, 0x00, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0x7e, 0x00,
- 0x1c, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00, 0x7e, 0xc3, 0x3c, 0x06, 0x3e, 0x66, 0x3f, 0x00,
- 0xcc, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x7e, 0x00, 0xe0, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x7e, 0x00,
- 0x30, 0x30, 0x78, 0x0c, 0x7c, 0xcc, 0x7e, 0x00, 0x00, 0x00, 0x78, 0xc0, 0xc0, 0x78, 0x0c, 0x38,
- 0x7e, 0xc3, 0x3c, 0x66, 0x7e, 0x60, 0x3c, 0x00, 0xcc, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00,
- 0xe0, 0x00, 0x78, 0xcc, 0xfc, 0xc0, 0x78, 0x00, 0xcc, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
- 0x7c, 0xc6, 0x38, 0x18, 0x18, 0x18, 0x3c, 0x00, 0xe0, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
- 0xc6, 0x38, 0x6c, 0xc6, 0xfe, 0xc6, 0xc6, 0x00, 0x30, 0x30, 0x00, 0x78, 0xcc, 0xfc, 0xcc, 0x00,
- 0x1c, 0x00, 0xfc, 0x60, 0x78, 0x60, 0xfc, 0x00, 0x00, 0x00, 0x7f, 0x0c, 0x7f, 0xcc, 0x7f, 0x00,
- 0x3e, 0x6c, 0xcc, 0xfe, 0xcc, 0xcc, 0xce, 0x00, 0x78, 0xcc, 0x00, 0x78, 0xcc, 0xcc, 0x78, 0x00,
- 0x00, 0xcc, 0x00, 0x78, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0xe0, 0x00, 0x78, 0xcc, 0xcc, 0x78, 0x00,
- 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0x7e, 0x00, 0x00, 0xe0, 0x00, 0xcc, 0xcc, 0xcc, 0x7e, 0x00,
- 0x00, 0xcc, 0x00, 0xcc, 0xcc, 0x7c, 0x0c, 0xf8, 0xc3, 0x18, 0x3c, 0x66, 0x66, 0x3c, 0x18, 0x00,
- 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x18, 0x18, 0x7e, 0xc0, 0xc0, 0x7e, 0x18, 0x18,
- 0x38, 0x6c, 0x64, 0xf0, 0x60, 0xe6, 0xfc, 0x00, 0xcc, 0xcc, 0x78, 0xfc, 0x30, 0xfc, 0x30, 0x30,
- 0xf8, 0xcc, 0xcc, 0xfa, 0xc6, 0xcf, 0xc6, 0xc7, 0x0e, 0x1b, 0x18, 0x3c, 0x18, 0x18, 0xd8, 0x70,
- 0x1c, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0x7e, 0x00, 0x38, 0x00, 0x70, 0x30, 0x30, 0x30, 0x78, 0x00,
- 0x00, 0x1c, 0x00, 0x78, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x1c, 0x00, 0xcc, 0xcc, 0xcc, 0x7e, 0x00,
- 0x00, 0xf8, 0x00, 0xf8, 0xcc, 0xcc, 0xcc, 0x00, 0xfc, 0x00, 0xcc, 0xec, 0xfc, 0xdc, 0xcc, 0x00,
- 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x7c, 0x00, 0x00,
- 0x30, 0x00, 0x30, 0x60, 0xc0, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xc0, 0xc0, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0xfc, 0x0c, 0x0c, 0x00, 0x00, 0xc3, 0xc6, 0xcc, 0xde, 0x33, 0x66, 0xcc, 0x0f,
- 0xc3, 0xc6, 0xcc, 0xdb, 0x37, 0x6f, 0xcf, 0x03, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00,
- 0x00, 0x33, 0x66, 0xcc, 0x66, 0x33, 0x00, 0x00, 0x00, 0xcc, 0x66, 0x33, 0x66, 0xcc, 0x00, 0x00,
- 0x22, 0x88, 0x22, 0x88, 0x22, 0x88, 0x22, 0x88, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
- 0xdb, 0x77, 0xdb, 0xee, 0xdb, 0x77, 0xdb, 0xee, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18,
- 0x36, 0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36,
- 0x00, 0x00, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x36, 0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0xfe, 0x06, 0xf6, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00,
- 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18,
- 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x37, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0xf7, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00,
- 0x36, 0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00,
- 0x36, 0x36, 0x36, 0x36, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18,
- 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, 0x00, 0x00, 0x00,
- 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18,
- 0x00, 0x00, 0x00, 0x00, 0x3f, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36,
- 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
- 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x76, 0xdc, 0xc8, 0xdc, 0x76, 0x00, 0x00, 0x78, 0xcc, 0xf8, 0xcc, 0xf8, 0xc0, 0xc0,
- 0x00, 0xfc, 0xcc, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00,
- 0xfc, 0xcc, 0x60, 0x30, 0x60, 0xcc, 0xfc, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, 0xd8, 0x70, 0x00,
- 0x00, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0xc0, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x00,
- 0xfc, 0x30, 0x78, 0xcc, 0xcc, 0x78, 0x30, 0xfc, 0x38, 0x6c, 0xc6, 0xfe, 0xc6, 0x6c, 0x38, 0x00,
- 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x6c, 0xee, 0x00, 0x1c, 0x30, 0x18, 0x7c, 0xcc, 0xcc, 0x78, 0x00,
- 0x00, 0x00, 0x7e, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x06, 0x0c, 0x7e, 0xdb, 0xdb, 0x7e, 0x60, 0xc0,
- 0x38, 0x60, 0xc0, 0xf8, 0xc0, 0x60, 0x38, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x00,
- 0x00, 0xfc, 0x00, 0xfc, 0x00, 0xfc, 0x00, 0x00, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x00, 0xfc, 0x00,
- 0x60, 0x30, 0x18, 0x30, 0x60, 0x00, 0xfc, 0x00, 0x18, 0x30, 0x60, 0x30, 0x18, 0x00, 0xfc, 0x00,
- 0x0e, 0x1b, 0x1b, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0x70,
- 0x30, 0x30, 0x00, 0xfc, 0x00, 0x30, 0x30, 0x00, 0x00, 0x76, 0xdc, 0x00, 0x76, 0xdc, 0x00, 0x00,
- 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x3c, 0x1c,
- 0x78, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x70, 0x18, 0x30, 0x60, 0x78, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x3c, 0x3c, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
-
-const uint8_t ff_vga16_font[4096] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, 0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7e, 0xff, 0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, 0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x18, 0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, 0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0x00, 0x00, 0x1e, 0x0e, 0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, 0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7f, 0x63, 0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x02, 0x06, 0x0e, 0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7f, 0xdb, 0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, 0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
- 0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, 0x18, 0x18, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, 0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x30, 0x18, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x66, 0xc3, 0xc3, 0xdb, 0xdb, 0xc3, 0xc3, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7c, 0xc6, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, 0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xfe, 0xc0, 0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xde, 0xde, 0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xf8, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x1e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, 0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc3, 0xe7, 0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, 0x0c, 0x0e, 0x00, 0x00,
- 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, 0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
- 0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xe0, 0x60, 0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00,
- 0x00, 0x00, 0xe0, 0x60, 0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00,
- 0x00, 0x00, 0xe0, 0x60, 0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, 0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, 0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x10, 0x30, 0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x70, 0x18, 0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
- 0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x10, 0x38, 0x6c, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x38, 0x6c, 0x38, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, 0x3c, 0x00, 0x00, 0x00,
- 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x18, 0x3c, 0x66, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
- 0x38, 0x6c, 0x38, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
- 0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, 0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x3e, 0x6c, 0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc6, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
- 0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x18, 0x18, 0x7e, 0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0xfc, 0x66, 0x66, 0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0x70, 0x00, 0x00,
- 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x0c, 0x18, 0x30, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x76, 0xdc, 0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
- 0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, 0x0c, 0x1f, 0x00, 0x00,
- 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00,
- 0x00, 0x00, 0x18, 0x18, 0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, 0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
- 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
- 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
- 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, 0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
- 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x38, 0x6c, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x0f, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
-};
-
 const uint32_t ff_cga_palette[16] = {
-    0x000000, 0x0000AA, 0x00AA00, 0x00AAAA, 0xAA0000, 0xAA00AA, 0xAA5500, 0xAAAAAA,
-    0x555555, 0x5555FF, 0x55FF55, 0x55FFFF, 0xFF5555, 0xFF55FF, 0xFFFF55, 0xFFFFFF,
+    0xFF000000, 0xFF0000AA, 0xFF00AA00, 0xFF00AAAA, 0xFFAA0000, 0xFFAA00AA, 0xFFAA5500, 0xFFAAAAAA,
+    0xFF555555, 0xFF5555FF, 0xFF55FF55, 0xFF55FFFF, 0xFFFF5555, 0xFFFF55FF, 0xFFFFFF55, 0xFFFFFFFF,
 };
 
 const uint32_t ff_ega_palette[64] = {
-    0x000000, 0x0000AA, 0x00AA00, 0x00AAAA, 0xAA0000, 0xAA00AA, 0xAAAA00, 0xAAAAAA,
-    0x000055, 0x0000FF, 0x00AA55, 0x00AAFF, 0xAA0055, 0xAA00FF, 0xAAAA55, 0xAAAAFF,
-    0x005500, 0x0055AA, 0x00FF00, 0x00FFAA, 0xAA5500, 0xAA55AA, 0xAAFF00, 0xAAFFAA,
-    0x005555, 0x0055FF, 0x00FF55, 0x00FFFF, 0xAA5555, 0xAA55FF, 0xAAFF55, 0xAAFFFF,
-    0x550000, 0x5500AA, 0x55AA00, 0x55AAAA, 0xFF0000, 0xFF00AA, 0xFFAA00, 0xFFAAAA,
-    0x550055, 0x5500FF, 0x55AA55, 0x55AAFF, 0xFF0055, 0xFF00FF, 0xFFAA55, 0xFFAAFF,
-    0x555500, 0x5555AA, 0x55FF00, 0x55FFAA, 0xFF5500, 0xFF55AA, 0xFFFF00, 0xFFFFAA,
-    0x555555, 0x5555FF, 0x55FF55, 0x55FFFF, 0xFF5555, 0xFF55FF, 0xFFFF55, 0xFFFFFF
+    0xFF000000, 0xFF0000AA, 0xFF00AA00, 0xFF00AAAA, 0xFFAA0000, 0xFFAA00AA, 0xFFAAAA00, 0xFFAAAAAA,
+    0xFF000055, 0xFF0000FF, 0xFF00AA55, 0xFF00AAFF, 0xFFAA0055, 0xFFAA00FF, 0xFFAAAA55, 0xFFAAAAFF,
+    0xFF005500, 0xFF0055AA, 0xFF00FF00, 0xFF00FFAA, 0xFFAA5500, 0xFFAA55AA, 0xFFAAFF00, 0xFFAAFFAA,
+    0xFF005555, 0xFF0055FF, 0xFF00FF55, 0xFF00FFFF, 0xFFAA5555, 0xFFAA55FF, 0xFFAAFF55, 0xFFAAFFFF,
+    0xFF550000, 0xFF5500AA, 0xFF55AA00, 0xFF55AAAA, 0xFFFF0000, 0xFFFF00AA, 0xFFFFAA00, 0xFFFFAAAA,
+    0xFF550055, 0xFF5500FF, 0xFF55AA55, 0xFF55AAFF, 0xFFFF0055, 0xFFFF00FF, 0xFFFFAA55, 0xFFFFAAFF,
+    0xFF555500, 0xFF5555AA, 0xFF55FF00, 0xFF55FFAA, 0xFFFF5500, 0xFFFF55AA, 0xFFFFFF00, 0xFFFFFFAA,
+    0xFF555555, 0xFF5555FF, 0xFF55FF55, 0xFF55FFFF, 0xFFFF5555, 0xFFFF55FF, 0xFFFFFF55, 0xFFFFFFFF
 };
 
 void ff_draw_pc_font(uint8_t *dst, int linesize, const uint8_t *font, int font_height, int ch, int fg, int bg)
diff --git a/libavcodec/cga_data.h b/libavcodec/cga_data.h
index 2149cfd..3f5281a 100644
--- a/libavcodec/cga_data.h
+++ b/libavcodec/cga_data.h
@@ -1,26 +1,27 @@
 /*
  * CGA/EGA/VGA ROM data
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * CGA/EGA/VGA ROM data
+ * @note fonts are in libavutil/xga_font_data.[ch]
  */
 
 #ifndef AVCODEC_CGA_DATA_H
@@ -28,8 +29,6 @@
 
 #include <stdint.h>
 
-extern const uint8_t ff_cga_font[2048];
-extern const uint8_t ff_vga16_font[4096];
 extern const uint32_t ff_cga_palette[16];
 extern const uint32_t ff_ega_palette[64];
 
diff --git a/libavcodec/chomp_bsf.c b/libavcodec/chomp_bsf.c
index 9ed7496..2b93fa9 100644
--- a/libavcodec/chomp_bsf.c
+++ b/libavcodec/chomp_bsf.c
@@ -2,20 +2,20 @@
  * Chomp bitstream filter
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -41,7 +41,6 @@ static int chomp_filter(AVBitStreamFilterContext *bsfc,
  * This filter removes a string of NULL bytes from the end of a packet.
  */
 AVBitStreamFilter ff_chomp_bsf = {
-    "chomp",
-    0,
-    chomp_filter,
+    .name   = "chomp",
+    .filter = chomp_filter,
 };
diff --git a/libavcodec/cinepak.c b/libavcodec/cinepak.c
index caf14cb..082d0b2 100644
--- a/libavcodec/cinepak.c
+++ b/libavcodec/cinepak.c
@@ -2,20 +2,20 @@
  * Cinepak Video Decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,9 @@
  *   http://www.csse.monash.edu.au/~timf/
  * @see For more information on the quirky data inside Sega FILM/CPK files, visit:
  *   http://wiki.multimedia.cx/index.php?title=Sega_FILM
+ *
+ * Cinepak colorspace support (c) 2013 Rl, Aetey Global Technologies AB
+ * @author Cinepak colorspace, Rl, Aetey Global Technologies AB
  */
 
 #include <stdio.h>
@@ -40,10 +43,7 @@
 #include "internal.h"
 
 
-typedef struct {
-    uint8_t  y0, y1, y2, y3;
-    uint8_t  u, v;
-} cvid_codebook;
+typedef uint8_t cvid_codebook[12];
 
 #define MAX_STRIPS      32
 
@@ -79,12 +79,14 @@ static void cinepak_decode_codebook (cvid_codebook *codebook,
     const uint8_t *eod = (data + size);
     uint32_t flag, mask;
     int      i, n;
+    uint8_t *p;
 
     /* check if this chunk contains 4- or 6-element vectors */
     n    = (chunk_id & 0x04) ? 4 : 6;
     flag = 0;
     mask = 0;
 
+    p = codebook[0];
     for (i=0; i < 256; i++) {
         if ((chunk_id & 0x01) && !(mask >>= 1)) {
             if ((data + 4) > eod)
@@ -96,28 +98,33 @@ static void cinepak_decode_codebook (cvid_codebook *codebook,
         }
 
         if (!(chunk_id & 0x01) || (flag & mask)) {
+            int k, kk;
+
             if ((data + n) > eod)
                 break;
 
+            for (k = 0; k < 4; ++k) {
+                int r = *data++;
+                for (kk = 0; kk < 3; ++kk)
+                    *p++ = r;
+            }
             if (n == 6) {
-                codebook[i].y0 = *data++;
-                codebook[i].y1 = *data++;
-                codebook[i].y2 = *data++;
-                codebook[i].y3 = *data++;
-                codebook[i].u  = 128 + *data++;
-                codebook[i].v  = 128 + *data++;
-            } else {
-                /* this codebook type indicates either greyscale or
-                 * palettized video; if palettized, U & V components will
-                 * not be used so it is safe to set them to 128 for the
-                 * benefit of greyscale rendering in YUV420P */
-                codebook[i].y0 = *data++;
-                codebook[i].y1 = *data++;
-                codebook[i].y2 = *data++;
-                codebook[i].y3 = *data++;
-                codebook[i].u  = 128;
-                codebook[i].v  = 128;
+                int r, g, b, u, v;
+                u = *(int8_t *)data++;
+                v = *(int8_t *)data++;
+                p -= 12;
+                for(k=0; k<4; ++k) {
+                    r = *p++ + v*2;
+                    g = *p++ - (u/2) - v;
+                    b = *p   + u*2;
+                    p -= 2;
+                    *p++ = av_clip_uint8(r);
+                    *p++ = av_clip_uint8(g);
+                    *p++ = av_clip_uint8(b);
+                }
             }
+        } else {
+            p += 12;
         }
     }
 }
@@ -127,25 +134,31 @@ static int cinepak_decode_vectors (CinepakContext *s, cvid_strip *strip,
 {
     const uint8_t   *eod = (data + size);
     uint32_t         flag, mask;
-    cvid_codebook   *codebook;
+    uint8_t         *cb0, *cb1, *cb2, *cb3;
     unsigned int     x, y;
-    uint32_t         iy[4];
-    uint32_t         iu[2];
-    uint32_t         iv[2];
+    char            *ip0, *ip1, *ip2, *ip3;
 
     flag = 0;
     mask = 0;
 
     for (y=strip->y1; y < strip->y2; y+=4) {
 
-        iy[0] = strip->x1 + (y * s->frame->linesize[0]);
-        iy[1] = iy[0] + s->frame->linesize[0];
-        iy[2] = iy[1] + s->frame->linesize[0];
-        iy[3] = iy[2] + s->frame->linesize[0];
-        iu[0] = (strip->x1/2) + ((y/2) * s->frame->linesize[1]);
-        iu[1] = iu[0] + s->frame->linesize[1];
-        iv[0] = (strip->x1/2) + ((y/2) * s->frame->linesize[2]);
-        iv[1] = iv[0] + s->frame->linesize[2];
+/* take care of y dimension not being multiple of 4, such streams exist */
+        ip0 = ip1 = ip2 = ip3 = s->frame->data[0] +
+          (s->palette_video?strip->x1:strip->x1*3) + (y * s->frame->linesize[0]);
+        if(s->avctx->height - y > 1) {
+            ip1 = ip0 + s->frame->linesize[0];
+            if(s->avctx->height - y > 2) {
+                ip2 = ip1 + s->frame->linesize[0];
+                if(s->avctx->height - y > 3) {
+                    ip3 = ip2 + s->frame->linesize[0];
+                }
+            }
+        }
+/* to get the correct picture for not-multiple-of-4 cases let us fill
+ * each block from the bottom up, thus possibly overwriting the top line
+ * more than once but ending with the correct data in place
+ * (instead of in-loop checking) */
 
         for (x=strip->x1; x < strip->x2; x+=4) {
             if ((chunk_id & 0x01) && !(mask >>= 1)) {
@@ -168,97 +181,82 @@ static int cinepak_decode_vectors (CinepakContext *s, cvid_strip *strip,
                 }
 
                 if ((chunk_id & 0x02) || (~flag & mask)) {
+                    uint8_t *p;
                     if (data >= eod)
                         return AVERROR_INVALIDDATA;
 
-                    codebook = &strip->v1_codebook[*data++];
-                    s->frame->data[0][iy[0] + 0] = codebook->y0;
-                    s->frame->data[0][iy[0] + 1] = codebook->y0;
-                    s->frame->data[0][iy[1] + 0] = codebook->y0;
-                    s->frame->data[0][iy[1] + 1] = codebook->y0;
-                    if (!s->palette_video) {
-                        s->frame->data[1][iu[0]] = codebook->u;
-                        s->frame->data[2][iv[0]] = codebook->v;
-                    }
-
-                    s->frame->data[0][iy[0] + 2] = codebook->y1;
-                    s->frame->data[0][iy[0] + 3] = codebook->y1;
-                    s->frame->data[0][iy[1] + 2] = codebook->y1;
-                    s->frame->data[0][iy[1] + 3] = codebook->y1;
-                    if (!s->palette_video) {
-                        s->frame->data[1][iu[0] + 1] = codebook->u;
-                        s->frame->data[2][iv[0] + 1] = codebook->v;
-                    }
-
-                    s->frame->data[0][iy[2] + 0] = codebook->y2;
-                    s->frame->data[0][iy[2] + 1] = codebook->y2;
-                    s->frame->data[0][iy[3] + 0] = codebook->y2;
-                    s->frame->data[0][iy[3] + 1] = codebook->y2;
-                    if (!s->palette_video) {
-                        s->frame->data[1][iu[1]] = codebook->u;
-                        s->frame->data[2][iv[1]] = codebook->v;
-                    }
-
-                    s->frame->data[0][iy[2] + 2] = codebook->y3;
-                    s->frame->data[0][iy[2] + 3] = codebook->y3;
-                    s->frame->data[0][iy[3] + 2] = codebook->y3;
-                    s->frame->data[0][iy[3] + 3] = codebook->y3;
-                    if (!s->palette_video) {
-                        s->frame->data[1][iu[1] + 1] = codebook->u;
-                        s->frame->data[2][iv[1] + 1] = codebook->v;
+                    p = strip->v1_codebook[*data++];
+                    if (s->palette_video) {
+                        ip3[0] = ip3[1] = ip2[0] = ip2[1] = p[6];
+                        ip3[2] = ip3[3] = ip2[2] = ip2[3] = p[9];
+                        ip1[0] = ip1[1] = ip0[0] = ip0[1] = p[0];
+                        ip1[2] = ip1[3] = ip0[2] = ip0[3] = p[3];
+                    } else {
+                        p += 6;
+                        memcpy(ip3 + 0, p, 3); memcpy(ip3 + 3, p, 3);
+                        memcpy(ip2 + 0, p, 3); memcpy(ip2 + 3, p, 3);
+                        p += 3; /* ... + 9 */
+                        memcpy(ip3 + 6, p, 3); memcpy(ip3 + 9, p, 3);
+                        memcpy(ip2 + 6, p, 3); memcpy(ip2 + 9, p, 3);
+                        p -= 9; /* ... + 0 */
+                        memcpy(ip1 + 0, p, 3); memcpy(ip1 + 3, p, 3);
+                        memcpy(ip0 + 0, p, 3); memcpy(ip0 + 3, p, 3);
+                        p += 3; /* ... + 3 */
+                        memcpy(ip1 + 6, p, 3); memcpy(ip1 + 9, p, 3);
+                        memcpy(ip0 + 6, p, 3); memcpy(ip0 + 9, p, 3);
                     }
 
                 } else if (flag & mask) {
                     if ((data + 4) > eod)
                         return AVERROR_INVALIDDATA;
 
-                    codebook = &strip->v4_codebook[*data++];
-                    s->frame->data[0][iy[0] + 0] = codebook->y0;
-                    s->frame->data[0][iy[0] + 1] = codebook->y1;
-                    s->frame->data[0][iy[1] + 0] = codebook->y2;
-                    s->frame->data[0][iy[1] + 1] = codebook->y3;
-                    if (!s->palette_video) {
-                        s->frame->data[1][iu[0]] = codebook->u;
-                        s->frame->data[2][iv[0]] = codebook->v;
-                    }
-
-                    codebook = &strip->v4_codebook[*data++];
-                    s->frame->data[0][iy[0] + 2] = codebook->y0;
-                    s->frame->data[0][iy[0] + 3] = codebook->y1;
-                    s->frame->data[0][iy[1] + 2] = codebook->y2;
-                    s->frame->data[0][iy[1] + 3] = codebook->y3;
-                    if (!s->palette_video) {
-                        s->frame->data[1][iu[0] + 1] = codebook->u;
-                        s->frame->data[2][iv[0] + 1] = codebook->v;
-                    }
-
-                    codebook = &strip->v4_codebook[*data++];
-                    s->frame->data[0][iy[2] + 0] = codebook->y0;
-                    s->frame->data[0][iy[2] + 1] = codebook->y1;
-                    s->frame->data[0][iy[3] + 0] = codebook->y2;
-                    s->frame->data[0][iy[3] + 1] = codebook->y3;
-                    if (!s->palette_video) {
-                        s->frame->data[1][iu[1]] = codebook->u;
-                        s->frame->data[2][iv[1]] = codebook->v;
-                    }
-
-                    codebook = &strip->v4_codebook[*data++];
-                    s->frame->data[0][iy[2] + 2] = codebook->y0;
-                    s->frame->data[0][iy[2] + 3] = codebook->y1;
-                    s->frame->data[0][iy[3] + 2] = codebook->y2;
-                    s->frame->data[0][iy[3] + 3] = codebook->y3;
-                    if (!s->palette_video) {
-                        s->frame->data[1][iu[1] + 1] = codebook->u;
-                        s->frame->data[2][iv[1] + 1] = codebook->v;
+                    cb0 = strip->v4_codebook[*data++];
+                    cb1 = strip->v4_codebook[*data++];
+                    cb2 = strip->v4_codebook[*data++];
+                    cb3 = strip->v4_codebook[*data++];
+                    if (s->palette_video) {
+                        uint8_t *p;
+                        p = ip3;
+                        *p++ = cb2[6];
+                        *p++ = cb2[9];
+                        *p++ = cb3[6];
+                        *p   = cb3[9];
+                        p = ip2;
+                        *p++ = cb2[0];
+                        *p++ = cb2[3];
+                        *p++ = cb3[0];
+                        *p   = cb3[3];
+                        p = ip1;
+                        *p++ = cb0[6];
+                        *p++ = cb0[9];
+                        *p++ = cb1[6];
+                        *p   = cb1[9];
+                        p = ip0;
+                        *p++ = cb0[0];
+                        *p++ = cb0[3];
+                        *p++ = cb1[0];
+                        *p   = cb1[3];
+                    } else {
+                        memcpy(ip3 + 0, cb2 + 6, 6);
+                        memcpy(ip3 + 6, cb3 + 6, 6);
+                        memcpy(ip2 + 0, cb2 + 0, 6);
+                        memcpy(ip2 + 6, cb3 + 0, 6);
+                        memcpy(ip1 + 0, cb0 + 6, 6);
+                        memcpy(ip1 + 6, cb1 + 6, 6);
+                        memcpy(ip0 + 0, cb0 + 0, 6);
+                        memcpy(ip0 + 6, cb1 + 0, 6);
                     }
 
                 }
             }
 
-            iy[0] += 4;  iy[1] += 4;
-            iy[2] += 4;  iy[3] += 4;
-            iu[0] += 2;  iu[1] += 2;
-            iv[0] += 2;  iv[1] += 2;
+            if (s->palette_video) {
+                ip0 += 4;  ip1 += 4;
+                ip2 += 4;  ip3 += 4;
+            } else {
+                ip0 += 12;  ip1 += 12;
+                ip2 += 12;  ip3 += 12;
+            }
         }
     }
 
@@ -362,15 +360,23 @@ static int cinepak_decode (CinepakContext *s)
 
     num_strips = FFMIN(num_strips, MAX_STRIPS);
 
+    s->frame->key_frame = 0;
+
     for (i=0; i < num_strips; i++) {
         if ((s->data + 12) > eod)
             return AVERROR_INVALIDDATA;
 
         s->strips[i].id = s->data[0];
-        s->strips[i].y1 = y0;
-        s->strips[i].x1 = 0;
-        s->strips[i].y2 = y0 + AV_RB16 (&s->data[8]);
-        s->strips[i].x2 = s->avctx->width;
+/* zero y1 means "relative to the previous stripe" */
+        if (!(s->strips[i].y1 = AV_RB16 (&s->data[4])))
+            s->strips[i].y2 = (s->strips[i].y1 = y0) + AV_RB16 (&s->data[8]);
+        else
+            s->strips[i].y2 = AV_RB16 (&s->data[8]);
+        s->strips[i].x1 = AV_RB16 (&s->data[6]);
+        s->strips[i].x2 = AV_RB16 (&s->data[10]);
+
+        if (s->strips[i].id == 0x10)
+            s->frame->key_frame = 1;
 
         strip_size = AV_RB24 (&s->data[1]) - 12;
         if (strip_size < 0)
@@ -403,12 +409,13 @@ static av_cold int cinepak_decode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->width = (avctx->width + 3) & ~3;
     s->height = (avctx->height + 3) & ~3;
+
     s->sega_film_skip_bytes = -1;  /* uninitialized state */
 
     // check for paletted data
     if (avctx->bits_per_coded_sample != 8) {
         s->palette_video = 0;
-        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+        avctx->pix_fmt = AV_PIX_FMT_RGB24;
     } else {
         s->palette_video = 1;
         avctx->pix_fmt = AV_PIX_FMT_PAL8;
@@ -432,10 +439,8 @@ static int cinepak_decode_frame(AVCodecContext *avctx,
     s->data = buf;
     s->size = buf_size;
 
-    if ((ret = ff_reget_buffer(avctx, s->frame))) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     if (s->palette_video) {
         const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, NULL);
@@ -445,7 +450,9 @@ static int cinepak_decode_frame(AVCodecContext *avctx,
         }
     }
 
-    cinepak_decode(s);
+    if ((ret = cinepak_decode(s)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "cinepak_decode failed\n");
+    }
 
     if (s->palette_video)
         memcpy (s->frame->data[1], s->pal, AVPALETTE_SIZE);
diff --git a/libavcodec/cinepakenc.c b/libavcodec/cinepakenc.c
new file mode 100644
index 0000000..7277345
--- /dev/null
+++ b/libavcodec/cinepakenc.c
@@ -0,0 +1,1335 @@
+/*
+ * Cinepak encoder (c) 2011 Tomas H�rdin
+ * http://titan.codemill.se/~tomhar/cinepakenc.patch
+ *
+ * Fixes and improvements, vintage decoders compatibility
+ *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+ * TODO:
+ * - optimize: color space conversion, ...
+ * - implement options to set the min/max number of strips?
+ * MAYBE:
+ * - "optimally" split the frame into several non-regular areas
+ *   using a separate codebook pair for each area and approximating
+ *   the area by several rectangular strips (generally not full width ones)
+ *   (use quadtree splitting? a simple fixed-granularity grid?)
+ *
+ *
+ * version 2014-01-23 Rl
+ * - added option handling for flexibility
+ *
+ * version 2014-01-21 Rl
+ * - believe it or not, now we get even smaller files, with better quality
+ *   (which means I missed an optimization earlier :)
+ *
+ * version 2014-01-20 Rl
+ * - made the encoder compatible with vintage decoders
+ *   and added some yet unused code for possible future
+ *   incremental codebook updates
+ * - fixed a small memory leak
+ *
+ * version 2013-04-28 Rl
+ * - bugfixed codebook optimization logic
+ *
+ * version 2013-02-14 Rl
+ * "Valentine's Day" version:
+ * - made strip division more robust
+ * - minimized bruteforcing the number of strips,
+ *   (costs some R/D but speeds up compession a lot), the heuristic
+ *   assumption is that score as a function of the number of strips has
+ *   one wide minimum which moves slowly, of course not fully true
+ * - simplified codebook generation,
+ *   the old code was meant for other optimizations than we actually do
+ * - optimized the codebook generation / error estimation for MODE_MC
+ *
+ * version 2013-02-12 Rl
+ * - separated codebook training sets, avoided the transfer of wasted bytes,
+ *   which yields both better quality and smaller files
+ * - now using the correct colorspace (TODO: move conversion to libswscale)
+ *
+ * version 2013-02-08 Rl
+ * - fixes/optimization in multistrip encoding and codebook size choice,
+ *   quality/bitrate is now better than that of the binary proprietary encoder
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#include "libavutil/lfg.h"
+#include "elbg.h"
+#include "internal.h"
+
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+
+#define CVID_HEADER_SIZE 10
+#define STRIP_HEADER_SIZE 12
+#define CHUNK_HEADER_SIZE 4
+
+#define MB_SIZE 4           //4x4 MBs
+#define MB_AREA (MB_SIZE*MB_SIZE)
+
+#define VECTOR_MAX 6        //six or four entries per vector depending on format
+#define CODEBOOK_MAX 256    //size of a codebook
+
+#define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
+#define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
+// MAX_STRIPS limits the maximum quality you can reach
+//            when you want hight quality on high resolutions,
+// MIN_STRIPS limits the minimum efficiently encodable bit rate
+//            on low resolutions
+// the numbers are only used for brute force optimization for the first frame,
+// for the following frames they are adaptively readjusted
+// NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
+// of strips, currently 32
+
+typedef enum {
+    MODE_V1_ONLY = 0,
+    MODE_V1_V4,
+    MODE_MC,
+
+    MODE_COUNT,
+} CinepakMode;
+
+typedef enum {
+    ENC_V1,
+    ENC_V4,
+    ENC_SKIP,
+
+    ENC_UNCERTAIN
+} mb_encoding;
+
+typedef struct {
+    int v1_vector;                  //index into v1 codebook
+    int v1_error;                   //error when using V1 encoding
+    int v4_vector[4];               //indices into v4 codebooks
+    int v4_error;                   //error when using V4 encoding
+    int skip_error;                 //error when block is skipped (aka copied from last frame)
+    mb_encoding best_encoding;      //last result from calculate_mode_score()
+} mb_info;
+
+typedef struct {
+    int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
+    int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
+    int v1_size;
+    int v4_size;
+    CinepakMode mode;
+} strip_info;
+
+typedef struct {
+    const AVClass *class;
+    AVCodecContext *avctx;
+    unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
+    AVFrame *last_frame;
+    AVFrame *best_frame;
+    AVFrame *scratch_frame;
+    AVFrame *input_frame;
+    enum AVPixelFormat pix_fmt;
+    int w, h;
+    int frame_buf_size;
+    int curframe, keyint;
+    AVLFG randctx;
+    uint64_t lambda;
+    int *codebook_input;
+    int *codebook_closest;
+    mb_info *mb;                                //MB RD state
+    int min_strips;          //the current limit
+    int max_strips;          //the current limit
+#ifdef CINEPAKENC_DEBUG
+    mb_info *best_mb;                           //TODO: remove. only used for printing stats
+    int num_v1_mode, num_v4_mode, num_mc_mode;
+    int num_v1_encs, num_v4_encs, num_skips;
+#endif
+// options
+    int max_extra_cb_iterations;
+    int skip_empty_cb;
+    int min_min_strips;
+    int max_max_strips;
+    int strip_number_delta_range;
+} CinepakEncContext;
+
+#define OFFSET(x) offsetof(CinepakEncContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
+    { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
+    { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
+    { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
+    { NULL },
+};
+
+static const AVClass cinepak_class = {
+    .class_name = "cinepak",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static av_cold int cinepak_encode_init(AVCodecContext *avctx)
+{
+    CinepakEncContext *s = avctx->priv_data;
+    int x, mb_count, strip_buf_size, frame_buf_size;
+
+    if (avctx->width & 3 || avctx->height & 3) {
+        av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
+                avctx->width, avctx->height);
+        return AVERROR(EINVAL);
+    }
+
+    if (s->min_min_strips > s->max_max_strips) {
+        av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
+                s->min_min_strips, s->max_max_strips);
+        return AVERROR(EINVAL);
+    }
+
+    if (!(s->last_frame = av_frame_alloc()))
+        return AVERROR(ENOMEM);
+    if (!(s->best_frame = av_frame_alloc()))
+        goto enomem;
+    if (!(s->scratch_frame = av_frame_alloc()))
+        goto enomem;
+    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
+        if (!(s->input_frame = av_frame_alloc()))
+            goto enomem;
+
+    if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
+        goto enomem;
+
+    if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
+        goto enomem;
+
+    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
+        if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
+            goto enomem;
+
+    mb_count = avctx->width * avctx->height / MB_AREA;
+
+    //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
+    //and full codebooks being replaced in INTER mode,
+    // which is 34 bits per MB
+    //and 2*256 extra flag bits per strip
+    strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
+
+    frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
+
+    if (!(s->strip_buf = av_malloc(strip_buf_size)))
+        goto enomem;
+
+    if (!(s->frame_buf = av_malloc(frame_buf_size)))
+        goto enomem;
+
+    if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
+        goto enomem;
+
+#ifdef CINEPAKENC_DEBUG
+    if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
+        goto enomem;
+#endif
+
+    av_lfg_init(&s->randctx, 1);
+    s->avctx = avctx;
+    s->w = avctx->width;
+    s->h = avctx->height;
+    s->frame_buf_size = frame_buf_size;
+    s->curframe = 0;
+    s->keyint = avctx->keyint_min;
+    s->pix_fmt = avctx->pix_fmt;
+
+    //set up AVFrames
+    s->last_frame->data[0]        = s->pict_bufs[0];
+    s->last_frame->linesize[0]    = s->w;
+    s->best_frame->data[0]        = s->pict_bufs[1];
+    s->best_frame->linesize[0]    = s->w;
+    s->scratch_frame->data[0]     = s->pict_bufs[2];
+    s->scratch_frame->linesize[0] = s->w;
+
+    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
+        s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
+        s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
+        s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
+
+        s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
+        s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
+        s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
+
+        s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
+        s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
+        s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
+
+        s->input_frame->data[0]       = s->pict_bufs[3];
+        s->input_frame->linesize[0]   = s->w;
+        s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
+        s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
+        s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
+    }
+
+    s->min_strips = s->min_min_strips;
+    s->max_strips = s->max_max_strips;
+
+#ifdef CINEPAKENC_DEBUG
+    s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
+#endif
+
+    return 0;
+
+enomem:
+    av_frame_free(&s->last_frame);
+    av_frame_free(&s->best_frame);
+    av_frame_free(&s->scratch_frame);
+    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
+        av_frame_free(&s->input_frame);
+    av_freep(&s->codebook_input);
+    av_freep(&s->codebook_closest);
+    av_freep(&s->strip_buf);
+    av_freep(&s->frame_buf);
+    av_freep(&s->mb);
+#ifdef CINEPAKENC_DEBUG
+    av_freep(&s->best_mb);
+#endif
+
+    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
+        av_freep(&s->pict_bufs[x]);
+
+    return AVERROR(ENOMEM);
+}
+
+static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
+#ifdef CINEPAK_REPORT_SERR
+, int64_t *serr
+#endif
+)
+{
+    //score = FF_LAMBDA_SCALE * error + lambda * bits
+    int x;
+    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
+    int mb_count = s->w * h / MB_AREA;
+    mb_info *mb;
+    int64_t score1, score2, score3;
+    int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
+                   (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
+                   CHUNK_HEADER_SIZE) << 3;
+
+    //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9lli score mb_count %i", info->v1_size, info->v4_size, (long long int)ret, mb_count);
+
+#ifdef CINEPAK_REPORT_SERR
+    *serr = 0;
+#endif
+
+    switch(info->mode) {
+    case MODE_V1_ONLY:
+        //one byte per MB
+        ret += s->lambda * 8 * mb_count;
+
+// while calculating we assume all blocks are ENC_V1
+        for(x = 0; x < mb_count; x++) {
+            mb = &s->mb[x];
+            ret += FF_LAMBDA_SCALE * mb->v1_error;
+#ifdef CINEPAK_REPORT_SERR
+            *serr += mb->v1_error;
+#endif
+// this function is never called for report in MODE_V1_ONLY
+//            if(!report)
+            mb->best_encoding = ENC_V1;
+        }
+
+        break;
+    case MODE_V1_V4:
+        //9 or 33 bits per MB
+        if(report) {
+// no moves between the corresponding training sets are allowed
+            *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
+            for(x = 0; x < mb_count; x++) {
+                int mberr;
+                mb = &s->mb[x];
+                if(mb->best_encoding == ENC_V1)
+                    score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
+                else
+                    score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
+                ret += score1;
+#ifdef CINEPAK_REPORT_SERR
+                *serr += mberr;
+#endif
+            }
+        } else { // find best mode per block
+            for(x = 0; x < mb_count; x++) {
+                mb = &s->mb[x];
+                score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
+                score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
+
+                if(score1 <= score2) {
+                    ret += score1;
+#ifdef CINEPAK_REPORT_SERR
+                    *serr += mb->v1_error;
+#endif
+                    mb->best_encoding = ENC_V1;
+                } else {
+                    ret += score2;
+#ifdef CINEPAK_REPORT_SERR
+                    *serr += mb->v4_error;
+#endif
+                    mb->best_encoding = ENC_V4;
+                }
+            }
+        }
+
+        break;
+    case MODE_MC:
+        //1, 10 or 34 bits per MB
+        if(report) {
+            int v1_shrunk = 0, v4_shrunk = 0;
+            for(x = 0; x < mb_count; x++) {
+                mb = &s->mb[x];
+// it is OK to move blocks to ENC_SKIP here
+// but not to any codebook encoding!
+                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
+                if(mb->best_encoding == ENC_SKIP) {
+                    ret += score1;
+#ifdef CINEPAK_REPORT_SERR
+                    *serr += mb->skip_error;
+#endif
+                } else if(mb->best_encoding == ENC_V1) {
+                    if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
+                        mb->best_encoding = ENC_SKIP;
+                        ++v1_shrunk;
+                        ret += score1;
+#ifdef CINEPAK_REPORT_SERR
+                        *serr += mb->skip_error;
+#endif
+                    } else {
+                        ret += score2;
+#ifdef CINEPAK_REPORT_SERR
+                        *serr += mb->v1_error;
+#endif
+                    }
+                } else {
+                    if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
+                        mb->best_encoding = ENC_SKIP;
+                        ++v4_shrunk;
+                        ret += score1;
+#ifdef CINEPAK_REPORT_SERR
+                        *serr += mb->skip_error;
+#endif
+                    } else {
+                        ret += score3;
+#ifdef CINEPAK_REPORT_SERR
+                        *serr += mb->v4_error;
+#endif
+                    }
+                }
+            }
+            *training_set_v1_shrunk = v1_shrunk;
+            *training_set_v4_shrunk = v4_shrunk;
+        } else { // find best mode per block
+            for(x = 0; x < mb_count; x++) {
+                mb = &s->mb[x];
+                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
+                score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
+                score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
+
+                if(score1 <= score2 && score1 <= score3) {
+                    ret += score1;
+#ifdef CINEPAK_REPORT_SERR
+                    *serr += mb->skip_error;
+#endif
+                    mb->best_encoding = ENC_SKIP;
+                } else if(score2 <= score3) {
+                    ret += score2;
+#ifdef CINEPAK_REPORT_SERR
+                    *serr += mb->v1_error;
+#endif
+                    mb->best_encoding = ENC_V1;
+                } else {
+                    ret += score3;
+#ifdef CINEPAK_REPORT_SERR
+                    *serr += mb->v4_error;
+#endif
+                    mb->best_encoding = ENC_V4;
+                }
+            }
+        }
+
+        break;
+    }
+
+    return ret;
+}
+
+static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
+{
+    buf[0] = chunk_type;
+    AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
+    return CHUNK_HEADER_SIZE;
+}
+
+static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
+{
+    int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
+    int incremental_codebook_replacement_mode = 0; // hardcoded here,
+                // the compiler should notice that this is a constant -- rl
+
+    ret = write_chunk_header(buf,
+          s->pix_fmt == AV_PIX_FMT_RGB24 ?
+           chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
+           chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
+          entry_size * size
+           + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
+
+// we do codebook encoding according to the "intra" mode
+// but we keep the "dead" code for reference in case we will want
+// to use incremental codebook updates (which actually would give us
+// "kind of" motion compensation, especially in 1 strip/frame case) -- rl
+// (of course, the code will be not useful as-is)
+    if(incremental_codebook_replacement_mode) {
+        int flags = 0;
+        int flagsind;
+        for(x = 0; x < size; x++) {
+            if(flags == 0) {
+                flagsind = ret;
+                ret += 4;
+                flags = 0x80000000;
+            } else
+                flags = ((flags>>1) | 0x80000000);
+            for(y = 0; y < entry_size; y++)
+                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
+            if((flags&0xffffffff) == 0xffffffff) {
+                AV_WB32(&buf[flagsind], flags);
+                flags = 0;
+            }
+        }
+        if(flags)
+            AV_WB32(&buf[flagsind], flags);
+    } else
+        for(x = 0; x < size; x++)
+            for(y = 0; y < entry_size; y++)
+                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
+
+    return ret;
+}
+
+//sets out to the sub picture starting at (x,y) in in
+static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, AVPicture *out)
+{
+    out->data[0] = in->data[0] + x + y * in->linesize[0];
+    out->linesize[0] = in->linesize[0];
+
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
+        out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
+        out->linesize[1] = in->linesize[1];
+
+        out->data[2] = in->data[2] + (x >> 1) + (y >> 1) * in->linesize[2];
+        out->linesize[2] = in->linesize[2];
+    }
+}
+
+//decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
+static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
+{
+    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
+
+    sub_pict->data[0][0] =
+            sub_pict->data[0][1] =
+            sub_pict->data[0][    sub_pict->linesize[0]] =
+            sub_pict->data[0][1+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
+
+    sub_pict->data[0][2] =
+            sub_pict->data[0][3] =
+            sub_pict->data[0][2+  sub_pict->linesize[0]] =
+            sub_pict->data[0][3+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
+
+    sub_pict->data[0][2*sub_pict->linesize[0]] =
+            sub_pict->data[0][1+2*sub_pict->linesize[0]] =
+            sub_pict->data[0][  3*sub_pict->linesize[0]] =
+            sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
+
+    sub_pict->data[0][2+2*sub_pict->linesize[0]] =
+            sub_pict->data[0][3+2*sub_pict->linesize[0]] =
+            sub_pict->data[0][2+3*sub_pict->linesize[0]] =
+            sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
+
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
+        sub_pict->data[1][0] =
+            sub_pict->data[1][1] =
+            sub_pict->data[1][    sub_pict->linesize[1]] =
+            sub_pict->data[1][1+  sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
+
+        sub_pict->data[2][0] =
+            sub_pict->data[2][1] =
+            sub_pict->data[2][    sub_pict->linesize[2]] =
+            sub_pict->data[2][1+  sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
+    }
+}
+
+//decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
+static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
+{
+    int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
+
+    for(i = y = 0; y < 4; y += 2) {
+        for(x = 0; x < 4; x += 2, i++) {
+            sub_pict->data[0][x   +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
+            sub_pict->data[0][x+1 +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
+            sub_pict->data[0][x   + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
+            sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
+
+            if(s->pix_fmt == AV_PIX_FMT_RGB24) {
+                sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
+                sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
+            }
+        }
+    }
+}
+
+static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b)
+{
+    int y, p;
+
+    for(y = 0; y < MB_SIZE; y++) {
+        memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
+               MB_SIZE);
+    }
+
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
+        for(p = 1; p <= 2; p++) {
+            for(y = 0; y < MB_SIZE/2; y++) {
+                memcpy(a->data[p] + y*a->linesize[p],
+                       b->data[p] + y*b->linesize[p],
+                       MB_SIZE/2);
+            }
+        }
+    }
+}
+
+static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
+{
+    int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
+    int needs_extra_bit, should_write_temp;
+    unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
+    mb_info *mb;
+    AVPicture sub_scratch = {{0}}, sub_last = {{0}};
+
+    //encode codebooks
+////// MacOS vintage decoder compatibility dictates the presence of
+////// the codebook chunk even when the codebook is empty - pretty dumb...
+////// and also the certain order of the codebook chunks -- rl
+    if(info->v4_size || !s->skip_empty_cb)
+        ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
+
+    if(info->v1_size || !s->skip_empty_cb)
+        ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
+
+    //update scratch picture
+    for(z = y = 0; y < h; y += MB_SIZE) {
+        for(x = 0; x < s->w; x += MB_SIZE, z++) {
+            mb = &s->mb[z];
+
+            get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
+
+            if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
+                get_sub_picture(s, x, y, last_pict, &sub_last);
+                copy_mb(s, &sub_scratch, &sub_last);
+            } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
+                decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
+            else
+                decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
+        }
+    }
+
+    switch(info->mode) {
+    case MODE_V1_ONLY:
+        //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
+        ret += write_chunk_header(buf + ret, 0x32, mb_count);
+
+        for(x = 0; x < mb_count; x++)
+            buf[ret++] = s->mb[x].v1_vector;
+
+        break;
+    case MODE_V1_V4:
+        //remember header position
+        header_ofs = ret;
+        ret += CHUNK_HEADER_SIZE;
+
+        for(x = 0; x < mb_count; x += 32) {
+            flags = 0;
+            for(y = x; y < FFMIN(x+32, mb_count); y++)
+                if(s->mb[y].best_encoding == ENC_V4)
+                    flags |= 1 << (31 - y + x);
+
+            AV_WB32(&buf[ret], flags);
+            ret += 4;
+
+            for(y = x; y < FFMIN(x+32, mb_count); y++) {
+                mb = &s->mb[y];
+
+                if(mb->best_encoding == ENC_V1)
+                    buf[ret++] = mb->v1_vector;
+                else
+                    for(z = 0; z < 4; z++)
+                        buf[ret++] = mb->v4_vector[z];
+            }
+        }
+
+        write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
+
+        break;
+    case MODE_MC:
+        //remember header position
+        header_ofs = ret;
+        ret += CHUNK_HEADER_SIZE;
+        flags = bits = temp_size = 0;
+
+        for(x = 0; x < mb_count; x++) {
+            mb = &s->mb[x];
+            flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
+            needs_extra_bit = 0;
+            should_write_temp = 0;
+
+            if(mb->best_encoding != ENC_SKIP) {
+                if(bits < 32)
+                    flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
+                else
+                    needs_extra_bit = 1;
+            }
+
+            if(bits == 32) {
+                AV_WB32(&buf[ret], flags);
+                ret += 4;
+                flags = bits = 0;
+
+                if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
+                    memcpy(&buf[ret], temp, temp_size);
+                    ret += temp_size;
+                    temp_size = 0;
+                } else
+                    should_write_temp = 1;
+            }
+
+            if(needs_extra_bit) {
+                flags = (mb->best_encoding == ENC_V4) << 31;
+                bits = 1;
+            }
+
+            if(mb->best_encoding == ENC_V1)
+                temp[temp_size++] = mb->v1_vector;
+            else if(mb->best_encoding == ENC_V4)
+                for(z = 0; z < 4; z++)
+                    temp[temp_size++] = mb->v4_vector[z];
+
+            if(should_write_temp) {
+                memcpy(&buf[ret], temp, temp_size);
+                ret += temp_size;
+                temp_size = 0;
+            }
+        }
+
+        if(bits > 0) {
+            AV_WB32(&buf[ret], flags);
+            ret += 4;
+            memcpy(&buf[ret], temp, temp_size);
+            ret += temp_size;
+        }
+
+        write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
+
+        break;
+    }
+
+    return ret;
+}
+
+//computes distortion of 4x4 MB in b compared to a
+static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *b)
+{
+    int x, y, p, d, ret = 0;
+
+    for(y = 0; y < MB_SIZE; y++) {
+        for(x = 0; x < MB_SIZE; x++) {
+            d = a->data[0][x + y*a->linesize[0]] - b->data[0][x + y*b->linesize[0]];
+            ret += d*d;
+        }
+    }
+
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
+        for(p = 1; p <= 2; p++) {
+            for(y = 0; y < MB_SIZE/2; y++) {
+                for(x = 0; x < MB_SIZE/2; x++) {
+                    d = a->data[p][x + y*a->linesize[p]] - b->data[p][x + y*b->linesize[p]];
+                    ret += d*d;
+                }
+            }
+        }
+    }
+
+    return ret;
+}
+
+// return the possibly adjusted size of the codebook
+#define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
+static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
+                    int v1mode, strip_info *info,
+                    mb_encoding encoding)
+{
+    int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
+    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
+    int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
+    int size = v1mode ? info->v1_size : info->v4_size;
+    int64_t total_error = 0;
+    uint8_t vq_pict_buf[(MB_AREA*3)/2];
+    AVPicture sub_pict, vq_pict;
+
+    for(mbn = i = y = 0; y < h; y += MB_SIZE) {
+        for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
+            int *base;
+
+            if(CERTAIN(encoding)) {
+// use for the training only the blocks known to be to be encoded [sic:-]
+               if(s->mb[mbn].best_encoding != encoding) continue;
+            }
+
+            base = s->codebook_input + i*entry_size;
+            if(v1mode) {
+                //subsample
+                for(j = y2 = 0; y2 < entry_size; y2 += 2) {
+                    for(x2 = 0; x2 < 4; x2 += 2, j++) {
+                        plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
+                        shift = y2 < 4 ? 0 : 1;
+                        x3 = shift ? 0 : x2;
+                        y3 = shift ? 0 : y2;
+                        base[j] = (pict->data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * pict->linesize[plane]] +
+                                   pict->data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * pict->linesize[plane]] +
+                                   pict->data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * pict->linesize[plane]] +
+                                   pict->data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * pict->linesize[plane]]) >> 2;
+                    }
+                }
+            } else {
+                //copy
+                for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
+                    for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
+                        for(k = 0; k < entry_size; k++, j++) {
+                            plane = k >= 4 ? k - 3 : 0;
+
+                            if(k >= 4) {
+                                x3 = (x+x2) >> 1;
+                                y3 = (y+y2) >> 1;
+                            } else {
+                                x3 = x + x2 + (k & 1);
+                                y3 = y + y2 + (k >> 1);
+                            }
+
+                            base[j] = pict->data[plane][x3 + y3*pict->linesize[plane]];
+                        }
+                    }
+                }
+            }
+            i += v1mode ? 1 : 4;
+        }
+    }
+//    if(i < mbn*(v1mode ? 1 : 4)) {
+//        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
+//    }
+
+    if(i == 0) // empty training set, nothing to do
+        return 0;
+    if(i < size) {
+        //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
+        size = i;
+    }
+
+    avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
+    avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
+
+    //setup vq_pict, which contains a single MB
+    vq_pict.data[0] = vq_pict_buf;
+    vq_pict.linesize[0] = MB_SIZE;
+    vq_pict.data[1] = &vq_pict_buf[MB_AREA];
+    vq_pict.data[2] = vq_pict.data[1] + (MB_AREA >> 2);
+    vq_pict.linesize[1] = vq_pict.linesize[2] = MB_SIZE >> 1;
+
+    //copy indices
+    for(i = j = y = 0; y < h; y += MB_SIZE) {
+        for(x = 0; x < s->w; x += MB_SIZE, j++) {
+            mb_info *mb = &s->mb[j];
+// skip uninteresting blocks if we know their preferred encoding
+            if(CERTAIN(encoding) && mb->best_encoding != encoding)
+                continue;
+
+            //point sub_pict to current MB
+            get_sub_picture(s, x, y, pict, &sub_pict);
+
+            if(v1mode) {
+                mb->v1_vector = s->codebook_closest[i];
+
+                //fill in vq_pict with V1 data
+                decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
+
+                mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
+                total_error += mb->v1_error;
+            } else {
+                for(k = 0; k < 4; k++)
+                    mb->v4_vector[k] = s->codebook_closest[i+k];
+
+                //fill in vq_pict with V4 data
+                decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
+
+                mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
+                total_error += mb->v4_error;
+            }
+            i += v1mode ? 1 : 4;
+        }
+    }
+// check that we did it right in the beginning of the function
+    av_assert0(i >= size); // training set is no smaller than the codebook
+
+    //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %lli\n", v1mode, size, i, (long long int)total_error);
+
+    return size;
+}
+
+static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
+{
+    int x, y, i;
+    AVPicture sub_last, sub_pict;
+
+    for(i = y = 0; y < h; y += MB_SIZE) {
+        for(x = 0; x < s->w; x += MB_SIZE, i++) {
+            get_sub_picture(s, x, y, last_pict, &sub_last);
+            get_sub_picture(s, x, y, pict,      &sub_pict);
+
+            s->mb[i].skip_error = compute_mb_distortion(s, &sub_last, &sub_pict);
+        }
+    }
+}
+
+static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
+{
+// actually we are exclusively using intra strip coding (how much can we win
+// otherwise? how to choose which part of a codebook to update?),
+// keyframes are different only because we disallow ENC_SKIP on them -- rl
+// (besides, the logic here used to be inverted: )
+//    buf[0] = keyframe ? 0x11: 0x10;
+    buf[0] = keyframe ? 0x10: 0x11;
+    AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
+//    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
+    AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
+    AV_WB16(&buf[6], 0);
+//    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
+    AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
+    AV_WB16(&buf[10], s->w);
+    //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
+}
+
+static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
+#ifdef CINEPAK_REPORT_SERR
+, int64_t *best_serr
+#endif
+)
+{
+    int64_t score = 0;
+#ifdef CINEPAK_REPORT_SERR
+    int64_t serr;
+#endif
+    int best_size = 0;
+    strip_info info;
+// for codebook optimization:
+    int v1enough, v1_size, v4enough, v4_size;
+    int new_v1_size, new_v4_size;
+    int v1shrunk, v4shrunk;
+
+    if(!keyframe)
+        calculate_skip_errors(s, h, last_pict, pict, &info);
+
+    //try some powers of 4 for the size of the codebooks
+    //constraint the v4 codebook to be no bigger than v1 one,
+    //(and no less than v1_size/4)
+    //thus making v1 preferable and possibly losing small details? should be ok
+#define SMALLEST_CODEBOOK 1
+    for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
+        for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
+            //try all modes
+            for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
+                //don't allow MODE_MC in intra frames
+                if(keyframe && mode == MODE_MC)
+                    continue;
+
+                if(mode == MODE_V1_ONLY) {
+                    info.v1_size = v1_size;
+// the size may shrink even before optimizations if the input is short:
+                    info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
+                    if(info.v1_size < v1_size)
+// too few eligible blocks, no sense in trying bigger sizes
+                        v1enough = 1;
+
+                    info.v4_size = 0;
+                } else { // mode != MODE_V1_ONLY
+                    // if v4 codebook is empty then only allow V1-only mode
+                    if(!v4_size)
+                        continue;
+
+                    if(mode == MODE_V1_V4) {
+                        info.v4_size = v4_size;
+                        info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
+                        if(info.v4_size < v4_size)
+// too few eligible blocks, no sense in trying bigger sizes
+                            v4enough = 1;
+                    }
+                }
+
+                info.mode = mode;
+// choose the best encoding per block, based on current experience
+                score = calculate_mode_score(s, h, &info, 0,
+                                             &v1shrunk, &v4shrunk
+#ifdef CINEPAK_REPORT_SERR
+, &serr
+#endif
+);
+
+                if(mode != MODE_V1_ONLY){
+                    int extra_iterations_limit = s->max_extra_cb_iterations;
+// recompute the codebooks, omitting the extra blocks
+// we assume we _may_ come here with more blocks to encode than before
+                    info.v1_size = v1_size;
+                    new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
+                    if(new_v1_size < info.v1_size){
+                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
+                        info.v1_size = new_v1_size;
+                    }
+// we assume we _may_ come here with more blocks to encode than before
+                    info.v4_size = v4_size;
+                    new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
+                    if(new_v4_size < info.v4_size) {
+                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
+                        info.v4_size = new_v4_size;
+                    }
+// calculate the resulting score
+// (do not move blocks to codebook encodings now, as some blocks may have
+// got bigger errors despite a smaller training set - but we do not
+// ever grow the training sets back)
+                    for(;;) {
+                        score = calculate_mode_score(s, h, &info, 1,
+                                                     &v1shrunk, &v4shrunk
+#ifdef CINEPAK_REPORT_SERR
+, &serr
+#endif
+);
+// do we have a reason to reiterate? if so, have we reached the limit?
+                        if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
+// recompute the codebooks, omitting the extra blocks
+                        if(v1shrunk) {
+                            info.v1_size = v1_size;
+                            new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
+                            if(new_v1_size < info.v1_size){
+                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
+                                info.v1_size = new_v1_size;
+                            }
+                        }
+                        if(v4shrunk) {
+                            info.v4_size = v4_size;
+                            new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
+                            if(new_v4_size < info.v4_size) {
+                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
+                                info.v4_size = new_v4_size;
+                            }
+                        }
+                    }
+                }
+
+                //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %lli\n", v1_size, v4_size, (long long int)score);
+
+                if(best_size == 0 || score < *best_score) {
+
+                    *best_score = score;
+#ifdef CINEPAK_REPORT_SERR
+                    *best_serr = serr;
+#endif
+                    best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
+
+                    //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B", mode, info.v1_size, info.v4_size, (long long int)score, best_size);
+                    //av_log(s->avctx, AV_LOG_INFO, "\n");
+#ifdef CINEPAK_REPORT_SERR
+                    av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18lli %i B\n", mode, v1_size, v4_size, (long long int)serr, best_size);
+#endif
+
+#ifdef CINEPAKENC_DEBUG
+                    //save MB encoding choices
+                    memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
+#endif
+
+                    //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
+                    write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
+
+                }
+            }
+        }
+    }
+
+#ifdef CINEPAKENC_DEBUG
+    //gather stats. this will only work properly of MAX_STRIPS == 1
+    if(best_info.mode == MODE_V1_ONLY) {
+        s->num_v1_mode++;
+        s->num_v1_encs += s->w*h/MB_AREA;
+    } else {
+        if(best_info.mode == MODE_V1_V4)
+            s->num_v4_mode++;
+        else
+            s->num_mc_mode++;
+
+        int x;
+        for(x = 0; x < s->w*h/MB_AREA; x++)
+            if(s->best_mb[x].best_encoding == ENC_V1)
+                s->num_v1_encs++;
+            else if(s->best_mb[x].best_encoding == ENC_V4)
+                s->num_v4_encs++;
+            else
+                s->num_skips++;
+    }
+#endif
+
+    best_size += STRIP_HEADER_SIZE;
+    memcpy(buf, s->strip_buf, best_size);
+
+    return best_size;
+}
+
+static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
+{
+    buf[0] = isakeyframe ? 0 : 1;
+    AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
+    AV_WB16(&buf[4], s->w);
+    AV_WB16(&buf[6], s->h);
+    AV_WB16(&buf[8], num_strips);
+
+    return CVID_HEADER_SIZE;
+}
+
+static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
+{
+    int num_strips, strip, i, y, nexty, size, temp_size;
+    AVPicture last_pict, pict, scratch_pict;
+    int64_t best_score = 0, score, score_temp;
+#ifdef CINEPAK_REPORT_SERR
+    int64_t best_serr = 0, serr, serr_temp;
+#endif
+
+    int best_nstrips = -1, best_size = -1; // mark as uninitialzed
+
+    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
+        int x;
+// build a copy of the given frame in the correct colorspace
+        for(y = 0; y < s->h; y += 2) {
+            for(x = 0; x < s->w; x += 2) {
+                uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
+                ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
+                ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
+                get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
+                r = g = b = 0;
+                for(i=0; i<4; ++i) {
+                    int i1, i2;
+                    i1 = (i&1); i2 = (i>=2);
+                    rr = ir[i2][i1*3+0];
+                    gg = ir[i2][i1*3+1];
+                    bb = ir[i2][i1*3+2];
+                    r += rr; g += gg; b += bb;
+// using fixed point arithmetic for portable repeatability, scaling by 2^23
+// "Y"
+//                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
+                    rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
+                    if(      rr <   0) rr =   0;
+                    else if (rr > 255) rr = 255;
+                    scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
+                }
+// let us scale down as late as possible
+//                r /= 4; g /= 4; b /= 4;
+// "U"
+//                rr = -0.1429*r - 0.2857*g + 0.4286*b;
+                rr = (-299683*r - 599156*g + 898839*b) >> 23;
+                if(      rr < -128) rr = -128;
+                else if (rr >  127) rr =  127;
+                scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
+// "V"
+//                rr = 0.3571*r - 0.2857*g - 0.0714*b;
+                rr = (748893*r - 599156*g - 149737*b) >> 23;
+                if(      rr < -128) rr = -128;
+                else if (rr >  127) rr =  127;
+                scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
+            }
+        }
+    }
+
+    //would be nice but quite certainly incompatible with vintage players:
+    // support encoding zero strips (meaning skip the whole frame)
+    for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
+        score = 0;
+        size = 0;
+#ifdef CINEPAK_REPORT_SERR
+        serr = 0;
+#endif
+
+        for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
+            int strip_height;
+
+            nexty = strip * s->h / num_strips; // <= s->h
+            //make nexty the next multiple of 4 if not already there
+            if(nexty & 3)
+                nexty += 4 - (nexty & 3);
+
+            strip_height = nexty - y;
+            if(strip_height <= 0) { // can this ever happen?
+                av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
+                continue;
+            }
+
+            if(s->pix_fmt == AV_PIX_FMT_RGB24)
+                get_sub_picture(s, 0, y, (AVPicture*)s->input_frame,    &pict);
+            else
+                get_sub_picture(s, 0, y, (AVPicture*)frame,              &pict);
+            get_sub_picture(s, 0, y, (AVPicture*)s->last_frame,    &last_pict);
+            get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
+
+            if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
+#ifdef CINEPAK_REPORT_SERR
+, &serr_temp
+#endif
+)) < 0)
+                return temp_size;
+
+            score += score_temp;
+#ifdef CINEPAK_REPORT_SERR
+            serr += serr_temp;
+#endif
+            size += temp_size;
+            //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
+            //av_log(s->avctx, AV_LOG_INFO, "\n");
+        }
+
+        if(best_score == 0 || score < best_score) {
+            best_score = score;
+#ifdef CINEPAK_REPORT_SERR
+            best_serr = serr;
+#endif
+            best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
+            //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)score, best_size);
+#ifdef CINEPAK_REPORT_SERR
+            av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12lli, %i B\n", num_strips, (long long int)serr, best_size);
+#endif
+
+            FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
+            memcpy(buf, s->frame_buf, best_size);
+            best_nstrips = num_strips;
+        }
+// avoid trying too many strip numbers without a real reason
+// (this makes the processing of the very first frame faster)
+        if(num_strips - best_nstrips > 4)
+            break;
+    }
+
+    av_assert0(best_nstrips >= 0 && best_size >= 0);
+
+// let the number of strips slowly adapt to the changes in the contents,
+// compared to full bruteforcing every time this will occasionally lead
+// to some r/d performance loss but makes encoding up to several times faster
+    if(!s->strip_number_delta_range) {
+        if(best_nstrips == s->max_strips) { // let us try to step up
+            s->max_strips = best_nstrips + 1;
+            if(s->max_strips >= s->max_max_strips)
+                s->max_strips = s->max_max_strips;
+        } else { // try to step down
+            s->max_strips = best_nstrips;
+        }
+        s->min_strips = s->max_strips - 1;
+        if(s->min_strips < s->min_min_strips)
+            s->min_strips = s->min_min_strips;
+    } else {
+        s->max_strips = best_nstrips + s->strip_number_delta_range;
+        if(s->max_strips >= s->max_max_strips)
+            s->max_strips = s->max_max_strips;
+        s->min_strips = best_nstrips - s->strip_number_delta_range;
+        if(s->min_strips < s->min_min_strips)
+            s->min_strips = s->min_min_strips;
+    }
+
+    return best_size;
+}
+
+static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                                const AVFrame *frame, int *got_packet)
+{
+    CinepakEncContext *s = avctx->priv_data;
+    int ret;
+
+    s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size)) < 0)
+        return ret;
+    ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
+    pkt->size = ret;
+    if (s->curframe == 0)
+        pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+
+    FFSWAP(AVFrame *, s->last_frame, s->best_frame);
+
+    if (++s->curframe >= s->keyint)
+        s->curframe = 0;
+
+    return 0;
+}
+
+static av_cold int cinepak_encode_end(AVCodecContext *avctx)
+{
+    CinepakEncContext *s = avctx->priv_data;
+    int x;
+
+    av_frame_free(&s->last_frame);
+    av_frame_free(&s->best_frame);
+    av_frame_free(&s->scratch_frame);
+    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
+        av_frame_free(&s->input_frame);
+    av_freep(&s->codebook_input);
+    av_freep(&s->codebook_closest);
+    av_freep(&s->strip_buf);
+    av_freep(&s->frame_buf);
+    av_freep(&s->mb);
+#ifdef CINEPAKENC_DEBUG
+    av_freep(&s->best_mb);
+#endif
+
+    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
+        av_freep(&s->pict_bufs[x]);
+
+#ifdef CINEPAKENC_DEBUG
+    av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
+        s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
+#endif
+
+    return 0;
+}
+
+AVCodec ff_cinepak_encoder = {
+    .name           = "cinepak",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_CINEPAK,
+    .priv_data_size = sizeof(CinepakEncContext),
+    .init           = cinepak_encode_init,
+    .encode2        = cinepak_encode_frame,
+    .close          = cinepak_encode_end,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
+    .long_name      = NULL_IF_CONFIG_SMALL("Cinepak / CVID"),
+    .priv_class     = &cinepak_class,
+};
diff --git a/libavcodec/cljrdec.c b/libavcodec/cljrdec.c
index e74d1fa..68c8771 100644
--- a/libavcodec/cljrdec.c
+++ b/libavcodec/cljrdec.c
@@ -2,20 +2,20 @@
  * Cirrus Logic AccuPak (CLJR) decoder
  * Copyright (c) 2003 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,16 +43,14 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
     }
 
-    if (buf_size < avctx->height * avctx->width) {
+    if (buf_size / avctx->height < avctx->width) {
         av_log(avctx, AV_LOG_ERROR,
                "Resolution larger than buffer size. Invalid header?\n");
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
     p->pict_type = AV_PICTURE_TYPE_I;
     p->key_frame = 1;
 
@@ -63,10 +61,10 @@ static int decode_frame(AVCodecContext *avctx,
         uint8_t *cb   = &p->data[1][y * p->linesize[1]];
         uint8_t *cr   = &p->data[2][y * p->linesize[2]];
         for (x = 0; x < avctx->width; x += 4) {
-            luma[3] = get_bits(&gb, 5) << 3;
-            luma[2] = get_bits(&gb, 5) << 3;
-            luma[1] = get_bits(&gb, 5) << 3;
-            luma[0] = get_bits(&gb, 5) << 3;
+            luma[3] = (get_bits(&gb, 5)*33) >> 2;
+            luma[2] = (get_bits(&gb, 5)*33) >> 2;
+            luma[1] = (get_bits(&gb, 5)*33) >> 2;
+            luma[0] = (get_bits(&gb, 5)*33) >> 2;
             luma += 4;
             *(cb++) = get_bits(&gb, 6) << 2;
             *(cr++) = get_bits(&gb, 6) << 2;
@@ -93,3 +91,4 @@ AVCodec ff_cljr_decoder = {
     .decode         = decode_frame,
     .capabilities   = CODEC_CAP_DR1,
 };
+
diff --git a/libavcodec/cljrenc.c b/libavcodec/cljrenc.c
index 2c31555..c672f80 100644
--- a/libavcodec/cljrenc.c
+++ b/libavcodec/cljrenc.c
@@ -2,20 +2,20 @@
  * Cirrus Logic AccuPak (CLJR) encoder
  * Copyright (c) 2003 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,39 +25,39 @@
  */
 
 #include "libavutil/common.h"
+#include "libavutil/opt.h"
 
 #include "avcodec.h"
 #include "internal.h"
 #include "put_bits.h"
 
-static av_cold int encode_init(AVCodecContext *avctx)
-{
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
-    return 0;
-}
-
-static av_cold int encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
+typedef struct CLJRContext {
+    AVClass        *avclass;
+    int             dither_type;
+} CLJRContext;
 
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *p, int *got_packet)
 {
+    CLJRContext *a = avctx->priv_data;
     PutBitContext pb;
     int x, y, ret;
+    uint32_t dither= avctx->frame_number;
+    static const uint32_t ordered_dither[2][2] =
+    {
+        { 0x10400000, 0x104F0000 },
+        { 0xCB2A0000, 0xCB250000 },
+    };
 
-    if ((ret = ff_alloc_packet(pkt, 32*avctx->height*avctx->width/4)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
-        return ret;
+    if (avctx->width%4 && avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
+         av_log(avctx, AV_LOG_ERROR,
+                "Widths which are not a multiple of 4 might fail with some decoders, "
+                "use vstrict=-1 / -strict -1 to use %d anyway.\n", avctx->width);
+         return AVERROR_EXPERIMENTAL;
     }
 
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-    avctx->coded_frame->key_frame = 1;
+    if ((ret = ff_alloc_packet2(avctx, pkt, 32*avctx->height*avctx->width/4)) < 0)
+        return ret;
 
     init_put_bits(&pb, pkt->data, pkt->size);
 
@@ -65,14 +65,25 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         uint8_t *luma = &p->data[0][y * p->linesize[0]];
         uint8_t *cb   = &p->data[1][y * p->linesize[1]];
         uint8_t *cr   = &p->data[2][y * p->linesize[2]];
+        uint8_t luma_tmp[4];
         for (x = 0; x < avctx->width; x += 4) {
-            put_bits(&pb, 5, luma[3] >> 3);
-            put_bits(&pb, 5, luma[2] >> 3);
-            put_bits(&pb, 5, luma[1] >> 3);
-            put_bits(&pb, 5, luma[0] >> 3);
+            switch (a->dither_type) {
+            case 0: dither = 0x492A0000;                       break;
+            case 1: dither = dither * 1664525 + 1013904223;    break;
+            case 2: dither = ordered_dither[ y&1 ][ (x>>2)&1 ];break;
+            }
+            if (x+3 >= avctx->width) {
+                memset(luma_tmp, 0, sizeof(luma_tmp));
+                memcpy(luma_tmp, luma, avctx->width - x);
+                luma = luma_tmp;
+            }
+            put_bits(&pb, 5, (249*(luma[3] +  (dither>>29)   )) >> 11);
+            put_bits(&pb, 5, (249*(luma[2] + ((dither>>26)&7))) >> 11);
+            put_bits(&pb, 5, (249*(luma[1] + ((dither>>23)&7))) >> 11);
+            put_bits(&pb, 5, (249*(luma[0] + ((dither>>20)&7))) >> 11);
             luma += 4;
-            put_bits(&pb, 6, *(cb++) >> 2);
-            put_bits(&pb, 6, *(cr++) >> 2);
+            put_bits(&pb, 6, (253*(*(cb++) + ((dither>>18)&3))) >> 10);
+            put_bits(&pb, 6, (253*(*(cr++) + ((dither>>16)&3))) >> 10);
         }
     }
 
@@ -84,14 +95,28 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
+#define OFFSET(x) offsetof(CLJRContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "dither_type",   "Dither type",   OFFSET(dither_type),        AV_OPT_TYPE_INT, { .i64=1 }, 0, 2, VE},
+    { NULL },
+};
+
+static const AVClass cljr_class = {
+    .class_name = "cljr encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_cljr_encoder = {
     .name           = "cljr",
     .long_name      = NULL_IF_CONFIG_SMALL("Cirrus Logic AccuPak"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_CLJR,
-    .init           = encode_init,
+    .priv_data_size = sizeof(CLJRContext),
     .encode2        = encode_frame,
-    .close          = encode_close,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV411P,
                                                    AV_PIX_FMT_NONE },
+    .priv_class     = &cljr_class,
 };
diff --git a/libavcodec/cllc.c b/libavcodec/cllc.c
index f081c68..9c710bb 100644
--- a/libavcodec/cllc.c
+++ b/libavcodec/cllc.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2012-2013 Derek Buitenhuis
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -412,11 +412,8 @@ static int cllc_decode_frame(AVCodecContext *avctx, void *data,
         avctx->pix_fmt             = AV_PIX_FMT_YUV422P;
         avctx->bits_per_raw_sample = 8;
 
-        ret = ff_get_buffer(avctx, pic, 0);
-        if (ret < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Could not allocate buffer.\n");
+        if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
             return ret;
-        }
 
         ret = decode_yuv_frame(ctx, &gb, pic);
         if (ret < 0)
@@ -428,11 +425,8 @@ static int cllc_decode_frame(AVCodecContext *avctx, void *data,
         avctx->pix_fmt             = AV_PIX_FMT_RGB24;
         avctx->bits_per_raw_sample = 8;
 
-        ret = ff_get_buffer(avctx, pic, 0);
-        if (ret < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Could not allocate buffer.\n");
+        if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
             return ret;
-        }
 
         ret = decode_rgb24_frame(ctx, &gb, pic);
         if (ret < 0)
@@ -443,11 +437,8 @@ static int cllc_decode_frame(AVCodecContext *avctx, void *data,
         avctx->pix_fmt             = AV_PIX_FMT_ARGB;
         avctx->bits_per_raw_sample = 8;
 
-        ret = ff_get_buffer(avctx, pic, 0);
-        if (ret < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Could not allocate buffer.\n");
+        if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
             return ret;
-        }
 
         ret = decode_argb_frame(ctx, &gb, pic);
         if (ret < 0)
diff --git a/libavcodec/cngdec.c b/libavcodec/cngdec.c
index 89f5c81..855baaa 100644
--- a/libavcodec/cngdec.c
+++ b/libavcodec/cngdec.c
@@ -2,20 +2,20 @@
  * RFC 3389 comfort noise generator
  * Copyright (c) 2012 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,12 +59,12 @@ static av_cold int cng_decode_init(AVCodecContext *avctx)
 
     p->order            = 12;
     avctx->frame_size   = 640;
-    p->refl_coef        = av_mallocz(p->order * sizeof(*p->refl_coef));
-    p->target_refl_coef = av_mallocz(p->order * sizeof(*p->target_refl_coef));
-    p->lpc_coef         = av_mallocz(p->order * sizeof(*p->lpc_coef));
-    p->filter_out       = av_mallocz((avctx->frame_size + p->order) *
+    p->refl_coef        = av_mallocz_array(p->order, sizeof(*p->refl_coef));
+    p->target_refl_coef = av_mallocz_array(p->order, sizeof(*p->target_refl_coef));
+    p->lpc_coef         = av_mallocz_array(p->order, sizeof(*p->lpc_coef));
+    p->filter_out       = av_mallocz_array(avctx->frame_size + p->order,
                                      sizeof(*p->filter_out));
-    p->excitation       = av_mallocz(avctx->frame_size * sizeof(*p->excitation));
+    p->excitation       = av_mallocz_array(avctx->frame_size, sizeof(*p->excitation));
     if (!p->refl_coef || !p->target_refl_coef || !p->lpc_coef ||
         !p->filter_out || !p->excitation) {
         cng_decode_close(avctx);
@@ -142,10 +142,8 @@ static int cng_decode_frame(AVCodecContext *avctx, void *data,
                                  p->excitation, avctx->frame_size, p->order);
 
     frame->nb_samples = avctx->frame_size;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     buf_out = (int16_t *)frame->data[0];
     for (i = 0; i < avctx->frame_size; i++)
         buf_out[i] = p->filter_out[i + p->order];
diff --git a/libavcodec/cngenc.c b/libavcodec/cngenc.c
index 98f3c4e..bf5f7bf 100644
--- a/libavcodec/cngenc.c
+++ b/libavcodec/cngenc.c
@@ -2,20 +2,20 @@
  * RFC 3389 comfort noise generator
  * Copyright (c) 2012 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -56,8 +56,8 @@ static av_cold int cng_encode_init(AVCodecContext *avctx)
     p->order = 10;
     if ((ret = ff_lpc_init(&p->lpc, avctx->frame_size, p->order, FF_LPC_TYPE_LEVINSON)) < 0)
         return ret;
-    p->samples32 = av_malloc(avctx->frame_size * sizeof(*p->samples32));
-    p->ref_coef = av_malloc(p->order * sizeof(*p->ref_coef));
+    p->samples32 = av_malloc_array(avctx->frame_size, sizeof(*p->samples32));
+    p->ref_coef = av_malloc_array(p->order, sizeof(*p->ref_coef));
     if (!p->samples32 || !p->ref_coef) {
         cng_encode_close(avctx);
         return AVERROR(ENOMEM);
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index c21c57e..9e9728b 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -1,18 +1,21 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * This table was generated from the long and short names of AVCodecs
+ * please see the respective codec sources for authorship
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,6 +26,8 @@
 #include "avcodec.h"
 #include "version.h"
 
+#define MT(...) (const char *const[]){ __VA_ARGS__, NULL }
+
 static const AVCodecDescriptor codec_descriptors[] = {
     /* video codecs */
     {
@@ -82,6 +87,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "mjpeg",
         .long_name = NULL_IF_CONFIG_SMALL("Motion JPEG"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+        .mime_types= MT("image/jpeg"),
     },
     {
         .id        = AV_CODEC_ID_MJPEGB,
@@ -399,6 +405,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
     {
+        .id        = AV_CODEC_ID_SNOW,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "snow",
+        .long_name = NULL_IF_CONFIG_SMALL("Snow"),
+        .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_TSCC,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "tscc",
@@ -516,6 +529,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "bmp",
         .long_name = NULL_IF_CONFIG_SMALL("BMP (Windows and OS/2 bitmap)"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-ms-bmp"),
     },
     {
         .id        = AV_CODEC_ID_CSCD,
@@ -587,6 +601,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("JPEG 2000"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
                      AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/jp2"),
     },
     {
         .id        = AV_CODEC_ID_VMNC,
@@ -1013,7 +1028,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "cdxl",
         .long_name = NULL_IF_CONFIG_SMALL("Commodore CDXL video"),
-        .props     = AV_CODEC_PROP_LOSSY,
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
         .id        = AV_CODEC_ID_ZEROCODEC,
@@ -1072,6 +1087,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_Y41P,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "y41p",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed YUV 4:1:1 12-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
         .id        = AV_CODEC_ID_ESCAPE130,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "escape130",
@@ -1079,6 +1101,88 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_AVRP,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avrp",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid 1:1 10-bit RGB Packer"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_012V,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "012v",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_AVUI,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avui",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid Meridien Uncompressed"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_AYUV,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "ayuv",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed MS 4:4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_TARGA_Y216,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "targa_y216",
+        .long_name = NULL_IF_CONFIG_SMALL("Pinnacle TARGA CineWave YUV16"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_V308,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v308",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_V408,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "v408",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed QT 4:4:4:4"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_YUV4,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "yuv4",
+        .long_name = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:2:0"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_AVRN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "avrn",
+        .long_name = NULL_IF_CONFIG_SMALL("Avid AVI Codec"),
+    },
+    {
+        .id        = AV_CODEC_ID_CPIA,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "cpia",
+        .long_name = NULL_IF_CONFIG_SMALL("CPiA video format"),
+    },
+    {
+        .id        = AV_CODEC_ID_XFACE,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xface",
+        .long_name = NULL_IF_CONFIG_SMALL("X-face image"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SMVJPEG,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "smv",
+        .long_name = NULL_IF_CONFIG_SMALL("Sigmatel Motion Video"),
+    },
+
+    {
         .id        = AV_CODEC_ID_G2M,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "g2m",
@@ -1096,7 +1200,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .id        = AV_CODEC_ID_HEVC,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "hevc",
-        .long_name = NULL_IF_CONFIG_SMALL("HEVC (High Efficiency Video Coding)"),
+        .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding)"),
         .props     = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_REORDER,
     },
     {
@@ -1124,7 +1228,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .id        = AV_CODEC_ID_SANM,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "sanm",
-        .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM video"),
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM/SMUSH video"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
@@ -1161,7 +1265,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .id        = AV_CODEC_ID_DPX,
         .type      = AVMEDIA_TYPE_VIDEO,
         .name      = "dpx",
-        .long_name = NULL_IF_CONFIG_SMALL("DPX image"),
+        .long_name = NULL_IF_CONFIG_SMALL("DPX (Digital Picture Exchange) image"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
     },
     {
@@ -1178,6 +1282,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "gif",
         .long_name = NULL_IF_CONFIG_SMALL("GIF (Graphics Interchange Format)"),
         .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/gif"),
     },
     {
         .id        = AV_CODEC_ID_JPEGLS,
@@ -1200,6 +1305,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "pam",
         .long_name = NULL_IF_CONFIG_SMALL("PAM (Portable AnyMap) image"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-portable-pixmap"),
     },
     {
         .id        = AV_CODEC_ID_PBM,
@@ -1214,6 +1320,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "pcx",
         .long_name = NULL_IF_CONFIG_SMALL("PC Paintbrush PCX image"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-pcx"),
     },
     {
         .id        = AV_CODEC_ID_PGM,
@@ -1235,6 +1342,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "png",
         .long_name = NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"),
         .props     = AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/png"),
     },
     {
         .id        = AV_CODEC_ID_PPM,
@@ -1277,6 +1385,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "targa",
         .long_name = NULL_IF_CONFIG_SMALL("Truevision Targa image"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-targa", "image/x-tga"),
     },
     {
         .id        = AV_CODEC_ID_TIFF,
@@ -1284,6 +1393,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "tiff",
         .long_name = NULL_IF_CONFIG_SMALL("TIFF image"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/tiff"),
     },
     {
         .id        = AV_CODEC_ID_TXD,
@@ -1306,6 +1416,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("WebP"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
                      AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/webp"),
     },
     {
         .id        = AV_CODEC_ID_WMV3IMAGE,
@@ -1327,6 +1438,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .name      = "xwd",
         .long_name = NULL_IF_CONFIG_SMALL("XWD (X Window Dump) image"),
         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+        .mime_types= MT("image/x-xwindowdump"),
     },
 
     /* various PCM "codecs" */
@@ -1376,13 +1488,15 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .id        = AV_CODEC_ID_PCM_MULAW,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "pcm_mulaw",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM mu-law"),
+        .long_name = NULL_IF_CONFIG_SMALL("PCM mu-law / G.711 mu-law"),
+        .props     = AV_CODEC_PROP_LOSSY,
     },
     {
         .id        = AV_CODEC_ID_PCM_ALAW,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "pcm_alaw",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM A-law"),
+        .long_name = NULL_IF_CONFIG_SMALL("PCM A-law / G.711 A-law"),
+        .props     = AV_CODEC_PROP_LOSSY,
     },
     {
         .id        = AV_CODEC_ID_PCM_S32LE,
@@ -1455,10 +1569,17 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_PCM_S16BE_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s16be_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit big-endian planar"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
         .id        = AV_CODEC_ID_PCM_S16LE_PLANAR,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "pcm_s16le_planar",
-        .long_name = NULL_IF_CONFIG_SMALL("PCM 16-bit little-endian planar"),
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 16-bit little-endian planar"),
         .props     = AV_CODEC_PROP_LOSSLESS,
     },
     {
@@ -1529,7 +1650,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "s302m",
         .long_name = NULL_IF_CONFIG_SMALL("SMPTE 302M"),
-        .props     = AV_CODEC_PROP_LOSSY,
+        .props     = AV_CODEC_PROP_LOSSLESS,
     },
     {
         .id        = AV_CODEC_ID_PCM_S8_PLANAR,
@@ -1751,6 +1872,41 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_ADPCM_AFC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_afc",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube AFC"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_OKI,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_oki",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Dialogic OKI"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_DTK,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_dtk",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM Nintendo Gamecube DTK"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_IMA_RAD,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_ima_rad",
+        .long_name = NULL_IF_CONFIG_SMALL("ADPCM IMA Radical"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_G726LE,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_g726le",
+        .long_name = NULL_IF_CONFIG_SMALL("G.726 ADPCM little-endian"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
         .id        = AV_CODEC_ID_ADPCM_VIMA,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "adpcm_vima",
@@ -2251,6 +2407,24 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .props     = AV_CODEC_PROP_LOSSY,
     },
     {
+        .id        = AV_CODEC_ID_FFWAVESYNTH,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "wavesynth",
+        .long_name = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
+    },
+    {
+        .id        = AV_CODEC_ID_SONIC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sonic",
+        .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
+    },
+    {
+        .id        = AV_CODEC_ID_SONIC_LS,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "sonicls",
+        .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
+    },
+    {
         .id        = AV_CODEC_ID_OPUS,
         .type      = AVMEDIA_TYPE_AUDIO,
         .name      = "opus",
@@ -2292,6 +2466,48 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .long_name = NULL_IF_CONFIG_SMALL("On2 Audio for Video Codec"),
         .props     = AV_CODEC_PROP_LOSSY,
     },
+    {
+        .id        = AV_CODEC_ID_EVRC,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "evrc",
+        .long_name = NULL_IF_CONFIG_SMALL("EVRC (Enhanced Variable Rate Codec)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SMV,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "smv",
+        .long_name = NULL_IF_CONFIG_SMALL("SMV (Selectable Mode Vocoder)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_LSBF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_lsbf",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), least significant bit first"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_MSBF,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_msbf",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_LSBF_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_lsbf_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), least significant bit first, planar"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_DSD_MSBF_PLANAR,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "dsd_msbf_planar",
+        .long_name = NULL_IF_CONFIG_SMALL("DSD (Direct Stream Digital), most significant bit first, planar"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
 
     /* subtitle codecs */
     {
@@ -2299,42 +2515,56 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "dvd_subtitle",
         .long_name = NULL_IF_CONFIG_SMALL("DVD subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
     },
     {
         .id        = AV_CODEC_ID_DVB_SUBTITLE,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "dvb_subtitle",
         .long_name = NULL_IF_CONFIG_SMALL("DVB subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
     },
     {
         .id        = AV_CODEC_ID_TEXT,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "text",
         .long_name = NULL_IF_CONFIG_SMALL("raw UTF-8 text"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
         .id        = AV_CODEC_ID_XSUB,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "xsub",
         .long_name = NULL_IF_CONFIG_SMALL("XSUB"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_ASS,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "ass",
+        .long_name = NULL_IF_CONFIG_SMALL("ASS (Advanced SSA) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
         .id        = AV_CODEC_ID_SSA,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "ssa",
-        .long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) / ASS (Advanced SSA) subtitle"),
+        .long_name = NULL_IF_CONFIG_SMALL("SSA (SubStation Alpha) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
         .id        = AV_CODEC_ID_MOV_TEXT,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "mov_text",
         .long_name = NULL_IF_CONFIG_SMALL("MOV text"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
     },
     {
         .id        = AV_CODEC_ID_HDMV_PGS_SUBTITLE,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "hdmv_pgs_subtitle",
         .long_name = NULL_IF_CONFIG_SMALL("HDMV Presentation Graphic Stream subtitles"),
+        .props     = AV_CODEC_PROP_BITMAP_SUB,
     },
     {
         .id        = AV_CODEC_ID_DVB_TELETEXT,
@@ -2346,7 +2576,270 @@ static const AVCodecDescriptor codec_descriptors[] = {
         .id        = AV_CODEC_ID_SRT,
         .type      = AVMEDIA_TYPE_SUBTITLE,
         .name      = "srt",
-        .long_name = NULL_IF_CONFIG_SMALL("SubRip Text"),
+        .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle with embedded timing"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBRIP,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subrip",
+        .long_name = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_MICRODVD,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "microdvd",
+        .long_name = NULL_IF_CONFIG_SMALL("MicroDVD subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_MPL2,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "mpl2",
+        .long_name = NULL_IF_CONFIG_SMALL("MPL2 subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_EIA_608,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "eia_608",
+        .long_name = NULL_IF_CONFIG_SMALL("EIA-608 closed captions"),
+    },
+    {
+        .id        = AV_CODEC_ID_JACOSUB,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "jacosub",
+        .long_name = NULL_IF_CONFIG_SMALL("JACOsub subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_PJS,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "pjs",
+        .long_name = NULL_IF_CONFIG_SMALL("PJS (Phoenix Japanimation Society) subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SAMI,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "sami",
+        .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_REALTEXT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "realtext",
+        .long_name = NULL_IF_CONFIG_SMALL("RealText subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBVIEWER1,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subviewer1",
+        .long_name = NULL_IF_CONFIG_SMALL("SubViewer v1 subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_SUBVIEWER,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "subviewer",
+        .long_name = NULL_IF_CONFIG_SMALL("SubViewer subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_VPLAYER,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "vplayer",
+        .long_name = NULL_IF_CONFIG_SMALL("VPlayer subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+    {
+        .id        = AV_CODEC_ID_WEBVTT,
+        .type      = AVMEDIA_TYPE_SUBTITLE,
+        .name      = "webvtt",
+        .long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
+        .props     = AV_CODEC_PROP_TEXT_SUB,
+    },
+
+    /* other kind of codecs and pseudo-codecs */
+    {
+        .id        = AV_CODEC_ID_TTF,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "ttf",
+        .long_name = NULL_IF_CONFIG_SMALL("TrueType font"),
+        .mime_types= MT("application/x-truetype-font", "application/x-font"),
+    },
+    {
+        .id        = AV_CODEC_ID_BINTEXT,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "bintext",
+        .long_name = NULL_IF_CONFIG_SMALL("Binary text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_XBIN,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "xbin",
+        .long_name = NULL_IF_CONFIG_SMALL("eXtended BINary text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_IDF,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "idf",
+        .long_name = NULL_IF_CONFIG_SMALL("iCEDraw text"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY,
+    },
+    {
+        .id        = AV_CODEC_ID_OTF,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "otf",
+        .long_name = NULL_IF_CONFIG_SMALL("OpenType font"),
+        .mime_types= MT("application/vnd.ms-opentype"),
+    },
+    {
+        .id        = AV_CODEC_ID_SMPTE_KLV,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "klv",
+        .long_name = NULL_IF_CONFIG_SMALL("SMPTE 336M Key-Length-Value (KLV) metadata"),
+    },
+    {
+        .id        = AV_CODEC_ID_DVD_NAV,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "dvd_nav_packet",
+        .long_name = NULL_IF_CONFIG_SMALL("DVD Nav packet"),
+    },
+    {
+        .id        = AV_CODEC_ID_TIMED_ID3,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "timed_id3",
+        .long_name = NULL_IF_CONFIG_SMALL("timed ID3 metadata"),
+    },
+    {
+        .id        = AV_CODEC_ID_BIN_DATA,
+        .type      = AVMEDIA_TYPE_DATA,
+        .name      = "bin_data",
+        .long_name = NULL_IF_CONFIG_SMALL("binary data"),
+        .mime_types= MT("application/octet-stream"),
+    },
+
+    /* deprecated codec ids */
+    {
+        .id        = AV_CODEC_ID_BRENDER_PIX_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "brender_pix_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("BRender PIX image (deprecated id)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_ESCAPE130_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "escape130_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("Escape 130 (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_EXR_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "exr_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("OpenEXR image (deprecated id)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_G2M_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "g2m_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("Go2Meeting (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_HEVC_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "hevc_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("H.265 / HEVC (High Efficiency Video Coding) (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PAF_VIDEO_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "paf_video_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Video (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_SANM_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "sanm_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts SANM/SMUSH video (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_VP7_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "vp7_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("On2 VP7 (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_WEBP_DEPRECATED,
+        .type      = AVMEDIA_TYPE_VIDEO,
+        .name      = "webp_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("WebP (deprecated id)"),
+        .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSY |
+                     AV_CODEC_PROP_LOSSLESS,
+    },
+
+    {
+        .id        = AV_CODEC_ID_VIMA,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "vima",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_ADPCM_VIMA_DEPRECATED,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "adpcm_vima_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_OPUS_DEPRECATED,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "opus_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("Opus (Opus Interactive Audio Codec) (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PAF_AUDIO_DEPRECATED,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "paf_audio_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("Amazing Studio Packed Animation File Audio (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSY,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S24LE_PLANAR_DEPRECATED,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s24le_planar_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 24-bit little-endian planar (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_PCM_S32LE_PLANAR_DEPRECATED,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "pcm_s32le_planar_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("PCM signed 32-bit little-endian planar (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
+    },
+    {
+        .id        = AV_CODEC_ID_TAK_DEPRECATED,
+        .type      = AVMEDIA_TYPE_AUDIO,
+        .name      = "tak_deprecated",
+        .long_name = NULL_IF_CONFIG_SMALL("TAK (Tom's lossless Audio Kompressor) (deprecated id)"),
+        .props     = AV_CODEC_PROP_LOSSLESS,
     },
 };
 
diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index 2d77899..5860288 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Sascha Sommer
  * Copyright (c) 2005 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -52,6 +52,7 @@
 #include "fft.h"
 #include "internal.h"
 #include "sinewin.h"
+#include "unary.h"
 
 #include "cookdata.h"
 
@@ -219,7 +220,7 @@ static av_cold int init_cook_mlt(COOKContext *q)
     int j, ret;
     int mlt_size = q->samples_per_channel;
 
-    if ((q->mlt_window = av_malloc(mlt_size * sizeof(*q->mlt_window))) == 0)
+    if ((q->mlt_window = av_malloc_array(mlt_size, sizeof(*q->mlt_window))) == 0)
         return AVERROR(ENOMEM);
 
     /* Initialize the MLT window: simple sine window. */
@@ -229,7 +230,7 @@ static av_cold int init_cook_mlt(COOKContext *q)
 
     /* Initialize the MDCT. */
     if ((ret = ff_mdct_init(&q->mdct_ctx, av_log2(mlt_size) + 1, 1, 1.0 / 32768.0))) {
-        av_free(q->mlt_window);
+        av_freep(&q->mlt_window);
         return ret;
     }
     av_log(q->avctx, AV_LOG_DEBUG, "MDCT initialized, order = %d.\n",
@@ -303,8 +304,8 @@ static av_cold int cook_decode_close(AVCodecContext *avctx)
     av_log(avctx, AV_LOG_DEBUG, "Deallocating memory.\n");
 
     /* Free allocated memory buffers. */
-    av_free(q->mlt_window);
-    av_free(q->decoded_bytes_buffer);
+    av_freep(&q->mlt_window);
+    av_freep(&q->decoded_bytes_buffer);
 
     /* Free the transform. */
     ff_mdct_end(&q->mdct_ctx);
@@ -332,11 +333,7 @@ static void decode_gain_info(GetBitContext *gb, int *gaininfo)
 {
     int i, n;
 
-    while (get_bits1(gb)) {
-        /* NOTHING */
-    }
-
-    n = get_bits_count(gb) - 1;     // amount of elements*2 to update
+    n = get_unary(gb, 0, get_bits_left(gb));     // amount of elements*2 to update
 
     i = 0;
     while (n--) {
@@ -397,7 +394,7 @@ static int decode_envelope(COOKContext *q, COOKSubpacket *p,
  * @param category              pointer to the category array
  * @param category_index        pointer to the category_index array
  */
-static void categorize(COOKContext *q, COOKSubpacket *p, int *quant_index_table,
+static void categorize(COOKContext *q, COOKSubpacket *p, const int *quant_index_table,
                        int *category, int *category_index)
 {
     int exp_idx, bias, tmpbias1, tmpbias2, bits_left, num_bits, index, v, i, j;
@@ -630,13 +627,17 @@ static int mono_decode(COOKContext *q, COOKSubpacket *p, float *mlt_buffer)
     int category_index[128] = { 0 };
     int category[128]       = { 0 };
     int quant_index_table[102];
-    int res;
+    int res, i;
 
     if ((res = decode_envelope(q, p, quant_index_table)) < 0)
         return res;
     q->num_vectors = get_bits(&q->gb, p->log2_numvector_size);
     categorize(q, p, quant_index_table, category, category_index);
     expand_category(q, category, category_index);
+    for (i=0; i<p->total_subbands; i++) {
+        if (category[i] > 7)
+            return AVERROR_INVALIDDATA;
+    }
     decode_vectors(q, p, category, quant_index_table, mlt_buffer);
 
     return 0;
@@ -736,7 +737,7 @@ static void imlt_gain(COOKContext *q, float *inbuffer,
  * @param q                 pointer to the COOKContext
  * @param decouple_tab      decoupling array
  */
-static void decouple_info(COOKContext *q, COOKSubpacket *p, int *decouple_tab)
+static int decouple_info(COOKContext *q, COOKSubpacket *p, int *decouple_tab)
 {
     int i;
     int vlc    = get_bits1(&q->gb);
@@ -745,7 +746,7 @@ static void decouple_info(COOKContext *q, COOKSubpacket *p, int *decouple_tab)
     int length = end - start + 1;
 
     if (start > end)
-        return;
+        return 0;
 
     if (vlc)
         for (i = 0; i < length; i++)
@@ -753,11 +754,18 @@ static void decouple_info(COOKContext *q, COOKSubpacket *p, int *decouple_tab)
                                                p->channel_coupling.table,
                                                p->channel_coupling.bits, 2);
     else
-        for (i = 0; i < length; i++)
-            decouple_tab[start + i] = get_bits(&q->gb, p->js_vlc_bits);
+        for (i = 0; i < length; i++) {
+            int v = get_bits(&q->gb, p->js_vlc_bits);
+            if (v == (1<<p->js_vlc_bits)-1) {
+                av_log(q->avctx, AV_LOG_ERROR, "decouple value too large\n");
+                return AVERROR_INVALIDDATA;
+            }
+            decouple_tab[start + i] = v;
+        }
+    return 0;
 }
 
-/*
+/**
  * function decouples a pair of signals from a single signal via multiplication.
  *
  * @param q                 pointer to the COOKContext
@@ -805,10 +813,10 @@ static int joint_decode(COOKContext *q, COOKSubpacket *p,
     /* Make sure the buffers are zeroed out. */
     memset(mlt_buffer_left,  0, 1024 * sizeof(*mlt_buffer_left));
     memset(mlt_buffer_right, 0, 1024 * sizeof(*mlt_buffer_right));
-    decouple_info(q, p, decouple_tab);
+    if ((res = decouple_info(q, p, decouple_tab)) < 0)
+        return res;
     if ((res = mono_decode(q, p, decode_buffer)) < 0)
         return res;
-
     /* The two channels are stored interleaved in decode_buffer. */
     for (i = 0; i < p->js_subband_start; i++) {
         for (j = 0; j < SUBBAND_SIZE; j++) {
@@ -927,7 +935,7 @@ static int decode_subpacket(COOKContext *q, COOKSubpacket *p,
                           p->mono_previous_buffer1,
                           outbuffer ? outbuffer[p->ch_idx] : NULL);
 
-    if (p->num_channels == 2)
+    if (p->num_channels == 2) {
         if (p->joint_stereo)
             mlt_compensate_output(q, q->decode_buffer_2, &p->gains1,
                                   p->mono_previous_buffer2,
@@ -936,6 +944,7 @@ static int decode_subpacket(COOKContext *q, COOKSubpacket *p,
             mlt_compensate_output(q, q->decode_buffer_2, &p->gains2,
                                   p->mono_previous_buffer2,
                                   outbuffer ? outbuffer[p->ch_idx + 1] : NULL);
+    }
 
     return 0;
 }
@@ -959,10 +968,8 @@ static int cook_decode_frame(AVCodecContext *avctx, void *data,
     /* get output buffer */
     if (q->discarded_packets >= 2) {
         frame->nb_samples = q->samples_per_channel;
-        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
             return ret;
-        }
         samples = (float **)frame->extended_data;
     }
 
@@ -1046,7 +1053,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
     int extradata_size = avctx->extradata_size;
     int s = 0;
     unsigned int channel_mask = 0;
-    int samples_per_frame;
+    int samples_per_frame = 0;
     int ret;
     q->avctx = avctx;
 
@@ -1080,6 +1087,11 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
         if (extradata_size >= 8) {
             bytestream_get_be32(&edata_ptr);    // Unknown unused
             q->subpacket[s].js_subband_start = bytestream_get_be16(&edata_ptr);
+            if (q->subpacket[s].js_subband_start >= 51) {
+                av_log(avctx, AV_LOG_ERROR, "js_subband_start %d is too large\n", q->subpacket[s].js_subband_start);
+                return AVERROR_INVALIDDATA;
+            }
+
             q->subpacket[s].js_vlc_bits = bytestream_get_be16(&edata_ptr);
             extradata_size -= 8;
         }
@@ -1187,11 +1199,20 @@ static av_cold int cook_decode_init(AVCodecContext *avctx)
             avpriv_request_sample(avctx, "subbands > 50");
             return AVERROR_PATCHWELCOME;
         }
+        if (q->subpacket[s].subbands == 0) {
+            avpriv_request_sample(avctx, "subbands = 0");
+            return AVERROR_PATCHWELCOME;
+        }
         q->subpacket[s].gains1.now      = q->subpacket[s].gain_1;
         q->subpacket[s].gains1.previous = q->subpacket[s].gain_2;
         q->subpacket[s].gains2.now      = q->subpacket[s].gain_3;
         q->subpacket[s].gains2.previous = q->subpacket[s].gain_4;
 
+        if (q->num_subpackets + q->subpacket[s].num_channels > q->avctx->channels) {
+            av_log(avctx, AV_LOG_ERROR, "Too many subpackets %d for channels %d\n", q->num_subpackets, q->avctx->channels);
+            return AVERROR_INVALIDDATA;
+        }
+
         q->num_subpackets++;
         s++;
         if (s > MAX_SUBPACKETS) {
diff --git a/libavcodec/cook_parser.c b/libavcodec/cook_parser.c
index f140e90..6dbbfd8 100644
--- a/libavcodec/cook_parser.c
+++ b/libavcodec/cook_parser.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -40,11 +40,12 @@ static int cook_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
 {
     CookParseContext *s = s1->priv_data;
 
-    if (s->duration)
-        s1->duration = s->duration;
-    else if (avctx->extradata && avctx->extradata_size >= 8 && avctx->channels)
+    if (!s->duration &&
+                avctx->extradata && avctx->extradata_size >= 8 && avctx->channels)
         s->duration = AV_RB16(avctx->extradata + 4) / avctx->channels;
 
+    s1->duration = s->duration;
+
     /* always return the full packet. this parser isn't doing any splitting or
        combining, only setting packet duration */
     *poutbuf      = buf;
diff --git a/libavcodec/cookdata.h b/libavcodec/cookdata.h
index c4c26fa..7b9cba3 100644
--- a/libavcodec/cookdata.h
+++ b/libavcodec/cookdata.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Sascha Sommer
  * Copyright (c) 2005 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/copy_block.h b/libavcodec/copy_block.h
index 10718cc..9ed451f 100644
--- a/libavcodec/copy_block.h
+++ b/libavcodec/copy_block.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,6 +23,17 @@
 
 #include "libavutil/intreadwrite.h"
 
+static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_COPY16U(dst, src);
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
 static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
 {
     int i;
diff --git a/libavcodec/cos_tablegen.c b/libavcodec/cos_tablegen.c
index 92b8295..9af83f4 100644
--- a/libavcodec/cos_tablegen.c
+++ b/libavcodec/cos_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -61,7 +61,7 @@ int main(int argc, char *argv[])
     printf("#include \"libavcodec/%s\"\n", do_sin ? "rdft.h" : "fft.h");
     for (i = 4; i <= BITS; i++) {
         int m = 1 << i;
-        double freq = 2*M_PI/m;
+        double freq = 2*3.14159265358979323846/m;
         printf("%s(%i) = {\n   ", do_sin ? "SINTABLE" : "COSTABLE", m);
         for (j = 0; j < m/2 - 1; j++) {
             int idx = j > m/4 ? m/2 - j : j;
diff --git a/libavcodec/cpia.c b/libavcodec/cpia.c
new file mode 100644
index 0000000..9036cb3
--- /dev/null
+++ b/libavcodec/cpia.c
@@ -0,0 +1,233 @@
+/*
+ * CPiA video decoder.
+ * Copyright (c) 2010 Hans de Goede <hdegoede@redhat.com>
+ *
+ * This decoder is based on the LGPL code available at
+ * https://v4l4j.googlecode.com/svn/v4l4j/trunk/libvideo/libv4lconvert/cpia1.c
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "internal.h"
+
+
+#define FRAME_HEADER_SIZE 64
+#define MAGIC_0         0x19    /**< First header byte */
+#define MAGIC_1         0x68    /**< Second header byte */
+#define SUBSAMPLE_420      0
+#define SUBSAMPLE_422      1
+#define YUVORDER_YUYV      0
+#define YUVORDER_UYVY      1
+#define NOT_COMPRESSED     0
+#define COMPRESSED         1
+#define NO_DECIMATION      0
+#define DECIMATION_ENAB    1
+#define EOL             0xfd    /**< End Of Line marker */
+#define EOI             0xff    /**< End Of Image marker */
+
+
+typedef struct {
+    AVFrame *frame;
+} CpiaContext;
+
+
+static int cpia_decode_frame(AVCodecContext *avctx,
+                             void *data, int *got_frame, AVPacket* avpkt)
+{
+    CpiaContext* const cpia = avctx->priv_data;
+    int i,j,ret;
+
+    uint8_t* const header = avpkt->data;
+    uint8_t* src;
+    int src_size;
+    uint16_t linelength;
+    uint8_t skip;
+
+    AVFrame *frame = cpia->frame;
+    uint8_t *y, *u, *v, *y_end, *u_end, *v_end;
+
+    // Check header
+    if ( avpkt->size < FRAME_HEADER_SIZE
+      || header[0] != MAGIC_0 || header[1] != MAGIC_1
+      || (header[17] != SUBSAMPLE_420 && header[17] != SUBSAMPLE_422)
+      || (header[18] != YUVORDER_YUYV && header[18] != YUVORDER_UYVY)
+      || (header[28] != NOT_COMPRESSED && header[28] != COMPRESSED)
+      || (header[29] != NO_DECIMATION && header[29] != DECIMATION_ENAB)
+    ) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid header!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // currently unsupported properties
+    if (header[17] == SUBSAMPLE_422) {
+        avpriv_report_missing_feature(avctx, "4:2:2 subsampling");
+        return AVERROR_PATCHWELCOME;
+    }
+    if (header[18] == YUVORDER_UYVY) {
+        avpriv_report_missing_feature(avctx, "YUV byte order UYVY");
+        return AVERROR_PATCHWELCOME;
+    }
+    if (header[29] == DECIMATION_ENAB) {
+        avpriv_report_missing_feature(avctx, "Decimation");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    src = header + FRAME_HEADER_SIZE;
+    src_size = avpkt->size - FRAME_HEADER_SIZE;
+
+    if (header[28] == NOT_COMPRESSED) {
+        frame->pict_type = AV_PICTURE_TYPE_I;
+        frame->key_frame = 1;
+    } else {
+        frame->pict_type = AV_PICTURE_TYPE_P;
+        frame->key_frame = 0;
+    }
+
+    // Get buffer filled with previous frame
+    if ((ret = ff_reget_buffer(avctx, frame)) < 0)
+        return ret;
+
+
+    for ( i = 0;
+          i < frame->height;
+          i++, src += linelength, src_size -= linelength
+    ) {
+        // Read line length, two byte little endian
+        linelength = AV_RL16(src);
+        src += 2;
+
+        if (src_size < linelength) {
+            av_frame_set_decode_error_flags(frame, FF_DECODE_ERROR_INVALID_BITSTREAM);
+            av_log(avctx, AV_LOG_WARNING, "Frame ended unexpectedly!\n");
+            break;
+        }
+        if (src[linelength - 1] != EOL) {
+            av_frame_set_decode_error_flags(frame, FF_DECODE_ERROR_INVALID_BITSTREAM);
+            av_log(avctx, AV_LOG_WARNING, "Wrong line length %d or line not terminated properly (found 0x%02x)!\n", linelength, src[linelength - 1]);
+            break;
+        }
+
+        /* Update the data pointers. Y data is on every line.
+         * U and V data on every second line
+         */
+        y = &frame->data[0][i * frame->linesize[0]];
+        u = &frame->data[1][(i >> 1) * frame->linesize[1]];
+        v = &frame->data[2][(i >> 1) * frame->linesize[2]];
+        y_end = y + frame->linesize[0] - 1;
+        u_end = u + frame->linesize[1] - 1;
+        v_end = v + frame->linesize[2] - 1;
+
+        if ((i & 1) && header[17] == SUBSAMPLE_420) {
+            /* We are on a odd line and 420 subsample is used.
+             * On this line only Y values are specified, one per pixel.
+             */
+            for (j = 0; j < linelength - 1; j++) {
+                if (y > y_end) {
+                    av_frame_set_decode_error_flags(frame, FF_DECODE_ERROR_INVALID_BITSTREAM);
+                    av_log(avctx, AV_LOG_WARNING, "Decoded data exceeded linesize!\n");
+                    break;
+                }
+                if ((src[j] & 1) && header[28] == COMPRESSED) {
+                    /* It seems that odd lines are always uncompressed, but
+                     * we do it according to specification anyways.
+                     */
+                    skip = src[j] >> 1;
+                    y += skip;
+                } else {
+                    *(y++) = src[j];
+                }
+            }
+        } else if (header[17] == SUBSAMPLE_420) {
+            /* We are on an even line and 420 subsample is used.
+             * On this line each pair of pixels is described by four bytes.
+             */
+            for (j = 0; j < linelength - 4; ) {
+                if (y + 1 > y_end || u > u_end || v > v_end) {
+                    av_frame_set_decode_error_flags(frame, FF_DECODE_ERROR_INVALID_BITSTREAM);
+                    av_log(avctx, AV_LOG_WARNING, "Decoded data exceeded linesize!\n");
+                    break;
+                }
+                if ((src[j] & 1) && header[28] == COMPRESSED) {
+                    // Skip amount of pixels and move forward one byte
+                    skip = src[j] >> 1;
+                    y += skip;
+                    u += skip >> 1;
+                    v += skip >> 1;
+                    j++;
+                } else {
+                    // Set image data as specified and move forward 4 bytes
+                    *(y++) = src[j];
+                    *(u++) = src[j+1];
+                    *(y++) = src[j+2];
+                    *(v++) = src[j+3];
+                    j += 4;
+                }
+            }
+        }
+    }
+
+    *got_frame = 1;
+    if ((ret = av_frame_ref(data, cpia->frame)) < 0)
+        return ret;
+
+    return avpkt->size;
+}
+
+static av_cold int cpia_decode_init(AVCodecContext *avctx)
+{
+    CpiaContext *s = avctx->priv_data;
+
+    // output pixel format
+    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+
+    /* The default timebase set by the v4l2 demuxer leads to probing which is buggy.
+     * Set some reasonable time_base to skip this.
+     */
+    if (avctx->time_base.num == 1 && avctx->time_base.den == 1000000) {
+        avctx->time_base.num = 1;
+        avctx->time_base.den = 60;
+    }
+
+    s->frame = av_frame_alloc();
+    if (!s->frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold int cpia_decode_end(AVCodecContext *avctx)
+{
+    CpiaContext *s = avctx->priv_data;
+
+    av_frame_free(&s->frame);
+
+    return 0;
+}
+
+AVCodec ff_cpia_decoder = {
+    .name           = "cpia",
+    .long_name      = NULL_IF_CONFIG_SMALL("CPiA video format"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_CPIA,
+    .priv_data_size = sizeof(CpiaContext),
+    .init           = cpia_decode_init,
+    .close          = cpia_decode_end,
+    .decode         = cpia_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c
new file mode 100644
index 0000000..45b2d46
--- /dev/null
+++ b/libavcodec/crystalhd.c
@@ -0,0 +1,1226 @@
+/*
+ * - CrystalHD decoder module -
+ *
+ * Copyright(C) 2010,2011 Philip Langdale <ffmpeg.philipl@overt.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * - Principles of Operation -
+ *
+ * The CrystalHD decoder operates at the bitstream level - which is an even
+ * higher level than the decoding hardware you typically see in modern GPUs.
+ * This means it has a very simple interface, in principle. You feed demuxed
+ * packets in one end and get decoded picture (fields/frames) out the other.
+ *
+ * Of course, nothing is ever that simple. Due, at the very least, to b-frame
+ * dependencies in the supported formats, the hardware has a delay between
+ * when a packet goes in, and when a picture comes out. Furthermore, this delay
+ * is not just a function of time, but also one of the dependency on additional
+ * frames being fed into the decoder to satisfy the b-frame dependencies.
+ *
+ * As such, a pipeline will build up that is roughly equivalent to the required
+ * DPB for the file being played. If that was all it took, things would still
+ * be simple - so, of course, it isn't.
+ *
+ * The hardware has a way of indicating that a picture is ready to be copied out,
+ * but this is unreliable - and sometimes the attempt will still fail so, based
+ * on testing, the code will wait until 3 pictures are ready before starting
+ * to copy out - and this has the effect of extending the pipeline.
+ *
+ * Finally, while it is tempting to say that once the decoder starts outputting
+ * frames, the software should never fail to return a frame from a decode(),
+ * this is a hard assertion to make, because the stream may switch between
+ * differently encoded content (number of b-frames, interlacing, etc) which
+ * might require a longer pipeline than before. If that happened, you could
+ * deadlock trying to retrieve a frame that can't be decoded without feeding
+ * in additional packets.
+ *
+ * As such, the code will return in the event that a picture cannot be copied
+ * out, leading to an increase in the length of the pipeline. This in turn,
+ * means we have to be sensitive to the time it takes to decode a picture;
+ * We do not want to give up just because the hardware needed a little more
+ * time to prepare the picture! For this reason, there are delays included
+ * in the decode() path that ensure that, under normal conditions, the hardware
+ * will only fail to return a frame if it really needs additional packets to
+ * complete the decoding.
+ *
+ * Finally, to be explicit, we do not want the pipeline to grow without bound
+ * for two reasons: 1) The hardware can only buffer a finite number of packets,
+ * and 2) The client application may not be able to cope with arbitrarily long
+ * delays in the video path relative to the audio path. For example. MPlayer
+ * can only handle a 20 picture delay (although this is arbitrary, and needs
+ * to be extended to fully support the CrystalHD where the delay could be up
+ * to 32 pictures - consider PAFF H.264 content with 16 b-frames).
+ */
+
+/*****************************************************************************
+ * Includes
+ ****************************************************************************/
+
+#define _XOPEN_SOURCE 600
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <libcrystalhd/bc_dts_types.h>
+#include <libcrystalhd/bc_dts_defs.h>
+#include <libcrystalhd/libcrystalhd_if.h>
+
+#include "avcodec.h"
+#include "h264.h"
+#include "internal.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+/** Timeout parameter passed to DtsProcOutput() in us */
+#define OUTPUT_PROC_TIMEOUT 50
+/** Step between fake timestamps passed to hardware in units of 100ns */
+#define TIMESTAMP_UNIT 100000
+/** Initial value in us of the wait in decode() */
+#define BASE_WAIT 10000
+/** Increment in us to adjust wait in decode() */
+#define WAIT_UNIT 1000
+
+
+/*****************************************************************************
+ * Module private data
+ ****************************************************************************/
+
+typedef enum {
+    RET_ERROR           = -1,
+    RET_OK              = 0,
+    RET_COPY_AGAIN      = 1,
+    RET_SKIP_NEXT_COPY  = 2,
+    RET_COPY_NEXT_FIELD = 3,
+} CopyRet;
+
+typedef struct OpaqueList {
+    struct OpaqueList *next;
+    uint64_t fake_timestamp;
+    uint64_t reordered_opaque;
+    uint8_t pic_type;
+} OpaqueList;
+
+typedef struct {
+    AVClass *av_class;
+    AVCodecContext *avctx;
+    AVFrame *pic;
+    HANDLE dev;
+
+    uint8_t *orig_extradata;
+    uint32_t orig_extradata_size;
+
+    AVBitStreamFilterContext *bsfc;
+    AVCodecParserContext *parser;
+
+    uint8_t is_70012;
+    uint8_t *sps_pps_buf;
+    uint32_t sps_pps_size;
+    uint8_t is_nal;
+    uint8_t output_ready;
+    uint8_t need_second_field;
+    uint8_t skip_next_output;
+    uint64_t decode_wait;
+
+    uint64_t last_picture;
+
+    OpaqueList *head;
+    OpaqueList *tail;
+
+    /* Options */
+    uint32_t sWidth;
+    uint8_t bframe_bug;
+} CHDContext;
+
+static const AVOption options[] = {
+    { "crystalhd_downscale_width",
+      "Turn on downscaling to the specified width",
+      offsetof(CHDContext, sWidth),
+      AV_OPT_TYPE_INT, {.i64 = 0}, 0, UINT32_MAX,
+      AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM, },
+    { NULL, },
+};
+
+
+/*****************************************************************************
+ * Helper functions
+ ****************************************************************************/
+
+static inline BC_MEDIA_SUBTYPE id2subtype(CHDContext *priv, enum AVCodecID id)
+{
+    switch (id) {
+    case AV_CODEC_ID_MPEG4:
+        return BC_MSUBTYPE_DIVX;
+    case AV_CODEC_ID_MSMPEG4V3:
+        return BC_MSUBTYPE_DIVX311;
+    case AV_CODEC_ID_MPEG2VIDEO:
+        return BC_MSUBTYPE_MPEG2VIDEO;
+    case AV_CODEC_ID_VC1:
+        return BC_MSUBTYPE_VC1;
+    case AV_CODEC_ID_WMV3:
+        return BC_MSUBTYPE_WMV3;
+    case AV_CODEC_ID_H264:
+        return priv->is_nal ? BC_MSUBTYPE_AVC1 : BC_MSUBTYPE_H264;
+    default:
+        return BC_MSUBTYPE_INVALID;
+    }
+}
+
+static inline void print_frame_info(CHDContext *priv, BC_DTS_PROC_OUT *output)
+{
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tYBuffSz: %u\n", output->YbuffSz);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tYBuffDoneSz: %u\n",
+           output->YBuffDoneSz);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tUVBuffDoneSz: %u\n",
+           output->UVBuffDoneSz);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tTimestamp: %"PRIu64"\n",
+           output->PicInfo.timeStamp);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tPicture Number: %u\n",
+           output->PicInfo.picture_number);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tWidth: %u\n",
+           output->PicInfo.width);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tHeight: %u\n",
+           output->PicInfo.height);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tChroma: 0x%03x\n",
+           output->PicInfo.chroma_format);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tPulldown: %u\n",
+           output->PicInfo.pulldown);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tFlags: 0x%08x\n",
+           output->PicInfo.flags);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tFrame Rate/Res: %u\n",
+           output->PicInfo.frame_rate);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tAspect Ratio: %u\n",
+           output->PicInfo.aspect_ratio);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tColor Primaries: %u\n",
+           output->PicInfo.colour_primaries);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tMetaData: %u\n",
+           output->PicInfo.picture_meta_payload);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tSession Number: %u\n",
+           output->PicInfo.sess_num);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tycom: %u\n",
+           output->PicInfo.ycom);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tCustom Aspect: %u\n",
+           output->PicInfo.custom_aspect_ratio_width_height);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tFrames to Drop: %u\n",
+           output->PicInfo.n_drop);
+    av_log(priv->avctx, AV_LOG_VERBOSE, "\tH264 Valid Fields: 0x%08x\n",
+           output->PicInfo.other.h264.valid);
+}
+
+
+/*****************************************************************************
+ * OpaqueList functions
+ ****************************************************************************/
+
+static uint64_t opaque_list_push(CHDContext *priv, uint64_t reordered_opaque,
+                                 uint8_t pic_type)
+{
+    OpaqueList *newNode = av_mallocz(sizeof (OpaqueList));
+    if (!newNode) {
+        av_log(priv->avctx, AV_LOG_ERROR,
+               "Unable to allocate new node in OpaqueList.\n");
+        return 0;
+    }
+    if (!priv->head) {
+        newNode->fake_timestamp = TIMESTAMP_UNIT;
+        priv->head              = newNode;
+    } else {
+        newNode->fake_timestamp = priv->tail->fake_timestamp + TIMESTAMP_UNIT;
+        priv->tail->next        = newNode;
+    }
+    priv->tail = newNode;
+    newNode->reordered_opaque = reordered_opaque;
+    newNode->pic_type = pic_type;
+
+    return newNode->fake_timestamp;
+}
+
+/*
+ * The OpaqueList is built in decode order, while elements will be removed
+ * in presentation order. If frames are reordered, this means we must be
+ * able to remove elements that are not the first element.
+ *
+ * Returned node must be freed by caller.
+ */
+static OpaqueList *opaque_list_pop(CHDContext *priv, uint64_t fake_timestamp)
+{
+    OpaqueList *node = priv->head;
+
+    if (!priv->head) {
+        av_log(priv->avctx, AV_LOG_ERROR,
+               "CrystalHD: Attempted to query non-existent timestamps.\n");
+        return NULL;
+    }
+
+    /*
+     * The first element is special-cased because we have to manipulate
+     * the head pointer rather than the previous element in the list.
+     */
+    if (priv->head->fake_timestamp == fake_timestamp) {
+        priv->head = node->next;
+
+        if (!priv->head->next)
+            priv->tail = priv->head;
+
+        node->next = NULL;
+        return node;
+    }
+
+    /*
+     * The list is processed at arm's length so that we have the
+     * previous element available to rewrite its next pointer.
+     */
+    while (node->next) {
+        OpaqueList *current = node->next;
+        if (current->fake_timestamp == fake_timestamp) {
+            node->next = current->next;
+
+            if (!node->next)
+               priv->tail = node;
+
+            current->next = NULL;
+            return current;
+        } else {
+            node = current;
+        }
+    }
+
+    av_log(priv->avctx, AV_LOG_VERBOSE,
+           "CrystalHD: Couldn't match fake_timestamp.\n");
+    return NULL;
+}
+
+
+/*****************************************************************************
+ * Video decoder API function definitions
+ ****************************************************************************/
+
+static void flush(AVCodecContext *avctx)
+{
+    CHDContext *priv = avctx->priv_data;
+
+    avctx->has_b_frames     = 0;
+    priv->last_picture      = -1;
+    priv->output_ready      = 0;
+    priv->need_second_field = 0;
+    priv->skip_next_output  = 0;
+    priv->decode_wait       = BASE_WAIT;
+
+    av_frame_unref (priv->pic);
+
+    /* Flush mode 4 flushes all software and hardware buffers. */
+    DtsFlushInput(priv->dev, 4);
+}
+
+
+static av_cold int uninit(AVCodecContext *avctx)
+{
+    CHDContext *priv = avctx->priv_data;
+    HANDLE device;
+
+    device = priv->dev;
+    DtsStopDecoder(device);
+    DtsCloseDecoder(device);
+    DtsDeviceClose(device);
+
+    /*
+     * Restore original extradata, so that if the decoder is
+     * reinitialised, the bitstream detection and filtering
+     * will work as expected.
+     */
+    if (priv->orig_extradata) {
+        av_free(avctx->extradata);
+        avctx->extradata = priv->orig_extradata;
+        avctx->extradata_size = priv->orig_extradata_size;
+        priv->orig_extradata = NULL;
+        priv->orig_extradata_size = 0;
+    }
+
+    av_parser_close(priv->parser);
+    if (priv->bsfc) {
+        av_bitstream_filter_close(priv->bsfc);
+    }
+
+    av_free(priv->sps_pps_buf);
+
+    av_frame_free (&priv->pic);
+
+    if (priv->head) {
+       OpaqueList *node = priv->head;
+       while (node) {
+          OpaqueList *next = node->next;
+          av_free(node);
+          node = next;
+       }
+    }
+
+    return 0;
+}
+
+
+static av_cold int init(AVCodecContext *avctx)
+{
+    CHDContext* priv;
+    BC_STATUS ret;
+    BC_INFO_CRYSTAL version;
+    BC_INPUT_FORMAT format = {
+        .FGTEnable   = FALSE,
+        .Progressive = TRUE,
+        .OptFlags    = 0x80000000 | vdecFrameRate59_94 | 0x40,
+        .width       = avctx->width,
+        .height      = avctx->height,
+    };
+
+    BC_MEDIA_SUBTYPE subtype;
+
+    uint32_t mode = DTS_PLAYBACK_MODE |
+                    DTS_LOAD_FILE_PLAY_FW |
+                    DTS_SKIP_TX_CHK_CPB |
+                    DTS_PLAYBACK_DROP_RPT_MODE |
+                    DTS_SINGLE_THREADED_MODE |
+                    DTS_DFLT_RESOLUTION(vdecRESOLUTION_1080p23_976);
+
+    av_log(avctx, AV_LOG_VERBOSE, "CrystalHD Init for %s\n",
+           avctx->codec->name);
+
+    avctx->pix_fmt = AV_PIX_FMT_YUYV422;
+
+    /* Initialize the library */
+    priv               = avctx->priv_data;
+    priv->avctx        = avctx;
+    priv->is_nal       = avctx->extradata_size > 0 && *(avctx->extradata) == 1;
+    priv->last_picture = -1;
+    priv->decode_wait  = BASE_WAIT;
+    priv->pic          = av_frame_alloc();
+
+    subtype = id2subtype(priv, avctx->codec->id);
+    switch (subtype) {
+    case BC_MSUBTYPE_AVC1:
+        {
+            uint8_t *dummy_p;
+            int dummy_int;
+
+            /* Back up the extradata so it can be restored at close time. */
+            priv->orig_extradata = av_malloc(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+            if (!priv->orig_extradata) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Failed to allocate copy of extradata\n");
+                return AVERROR(ENOMEM);
+            }
+            priv->orig_extradata_size = avctx->extradata_size;
+            memcpy(priv->orig_extradata, avctx->extradata, avctx->extradata_size);
+
+            priv->bsfc = av_bitstream_filter_init("h264_mp4toannexb");
+            if (!priv->bsfc) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "Cannot open the h264_mp4toannexb BSF!\n");
+                return AVERROR_BSF_NOT_FOUND;
+            }
+            av_bitstream_filter_filter(priv->bsfc, avctx, NULL, &dummy_p,
+                                       &dummy_int, NULL, 0, 0);
+        }
+        subtype = BC_MSUBTYPE_H264;
+        // Fall-through
+    case BC_MSUBTYPE_H264:
+        format.startCodeSz = 4;
+        // Fall-through
+    case BC_MSUBTYPE_VC1:
+    case BC_MSUBTYPE_WVC1:
+    case BC_MSUBTYPE_WMV3:
+    case BC_MSUBTYPE_WMVA:
+    case BC_MSUBTYPE_MPEG2VIDEO:
+    case BC_MSUBTYPE_DIVX:
+    case BC_MSUBTYPE_DIVX311:
+        format.pMetaData  = avctx->extradata;
+        format.metaDataSz = avctx->extradata_size;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "CrystalHD: Unknown codec name\n");
+        return AVERROR(EINVAL);
+    }
+    format.mSubtype = subtype;
+
+    if (priv->sWidth) {
+        format.bEnableScaling = 1;
+        format.ScalingParams.sWidth = priv->sWidth;
+    }
+
+    /* Get a decoder instance */
+    av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: starting up\n");
+    // Initialize the Link and Decoder devices
+    ret = DtsDeviceOpen(&priv->dev, mode);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: DtsDeviceOpen failed\n");
+        goto fail;
+    }
+
+    ret = DtsCrystalHDVersion(priv->dev, &version);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "CrystalHD: DtsCrystalHDVersion failed\n");
+        goto fail;
+    }
+    priv->is_70012 = version.device == 0;
+
+    if (priv->is_70012 &&
+        (subtype == BC_MSUBTYPE_DIVX || subtype == BC_MSUBTYPE_DIVX311)) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "CrystalHD: BCM70012 doesn't support MPEG4-ASP/DivX/Xvid\n");
+        goto fail;
+    }
+
+    ret = DtsSetInputFormat(priv->dev, &format);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "CrystalHD: SetInputFormat failed\n");
+        goto fail;
+    }
+
+    ret = DtsOpenDecoder(priv->dev, BC_STREAM_TYPE_ES);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "CrystalHD: DtsOpenDecoder failed\n");
+        goto fail;
+    }
+
+    ret = DtsSetColorSpace(priv->dev, OUTPUT_MODE422_YUY2);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "CrystalHD: DtsSetColorSpace failed\n");
+        goto fail;
+    }
+    ret = DtsStartDecoder(priv->dev);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "CrystalHD: DtsStartDecoder failed\n");
+        goto fail;
+    }
+    ret = DtsStartCapture(priv->dev);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "CrystalHD: DtsStartCapture failed\n");
+        goto fail;
+    }
+
+    if (avctx->codec->id == AV_CODEC_ID_H264) {
+        priv->parser = av_parser_init(avctx->codec->id);
+        if (!priv->parser)
+            av_log(avctx, AV_LOG_WARNING,
+                   "Cannot open the h.264 parser! Interlaced h.264 content "
+                   "will not be detected reliably.\n");
+        priv->parser->flags = PARSER_FLAG_COMPLETE_FRAMES;
+    }
+    av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Init complete.\n");
+
+    return 0;
+
+ fail:
+    uninit(avctx);
+    return -1;
+}
+
+
+static inline CopyRet copy_frame(AVCodecContext *avctx,
+                                 BC_DTS_PROC_OUT *output,
+                                 void *data, int *got_frame)
+{
+    BC_STATUS ret;
+    BC_DTS_STATUS decoder_status = { 0, };
+    uint8_t trust_interlaced;
+    uint8_t interlaced;
+
+    CHDContext *priv = avctx->priv_data;
+    int64_t pkt_pts  = AV_NOPTS_VALUE;
+    uint8_t pic_type = 0;
+
+    uint8_t bottom_field = (output->PicInfo.flags & VDEC_FLAG_BOTTOMFIELD) ==
+                           VDEC_FLAG_BOTTOMFIELD;
+    uint8_t bottom_first = !!(output->PicInfo.flags & VDEC_FLAG_BOTTOM_FIRST);
+
+    int width    = output->PicInfo.width;
+    int height   = output->PicInfo.height;
+    int bwidth;
+    uint8_t *src = output->Ybuff;
+    int sStride;
+    uint8_t *dst;
+    int dStride;
+
+    if (output->PicInfo.timeStamp != 0) {
+        OpaqueList *node = opaque_list_pop(priv, output->PicInfo.timeStamp);
+        if (node) {
+            pkt_pts = node->reordered_opaque;
+            pic_type = node->pic_type;
+            av_free(node);
+        } else {
+            /*
+             * We will encounter a situation where a timestamp cannot be
+             * popped if a second field is being returned. In this case,
+             * each field has the same timestamp and the first one will
+             * cause it to be popped. To keep subsequent calculations
+             * simple, pic_type should be set a FIELD value - doesn't
+             * matter which, but I chose BOTTOM.
+             */
+            pic_type = PICT_BOTTOM_FIELD;
+        }
+        av_log(avctx, AV_LOG_VERBOSE, "output \"pts\": %"PRIu64"\n",
+               output->PicInfo.timeStamp);
+        av_log(avctx, AV_LOG_VERBOSE, "output picture type %d\n",
+               pic_type);
+    }
+
+    ret = DtsGetDriverStatus(priv->dev, &decoder_status);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR,
+               "CrystalHD: GetDriverStatus failed: %u\n", ret);
+       return RET_ERROR;
+    }
+
+    /*
+     * For most content, we can trust the interlaced flag returned
+     * by the hardware, but sometimes we can't. These are the
+     * conditions under which we can trust the flag:
+     *
+     * 1) It's not h.264 content
+     * 2) The UNKNOWN_SRC flag is not set
+     * 3) We know we're expecting a second field
+     * 4) The hardware reports this picture and the next picture
+     *    have the same picture number.
+     *
+     * Note that there can still be interlaced content that will
+     * fail this check, if the hardware hasn't decoded the next
+     * picture or if there is a corruption in the stream. (In either
+     * case a 0 will be returned for the next picture number)
+     */
+    trust_interlaced = avctx->codec->id != AV_CODEC_ID_H264 ||
+                       !(output->PicInfo.flags & VDEC_FLAG_UNKNOWN_SRC) ||
+                       priv->need_second_field ||
+                       (decoder_status.picNumFlags & ~0x40000000) ==
+                       output->PicInfo.picture_number;
+
+    /*
+     * If we got a false negative for trust_interlaced on the first field,
+     * we will realise our mistake here when we see that the picture number is that
+     * of the previous picture. We cannot recover the frame and should discard the
+     * second field to keep the correct number of output frames.
+     */
+    if (output->PicInfo.picture_number == priv->last_picture && !priv->need_second_field) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Incorrectly guessed progressive frame. Discarding second field\n");
+        /* Returning without providing a picture. */
+        return RET_OK;
+    }
+
+    interlaced = (output->PicInfo.flags & VDEC_FLAG_INTERLACED_SRC) &&
+                 trust_interlaced;
+
+    if (!trust_interlaced && (decoder_status.picNumFlags & ~0x40000000) == 0) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "Next picture number unknown. Assuming progressive frame.\n");
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Interlaced state: %d | trust_interlaced %d\n",
+           interlaced, trust_interlaced);
+
+    if (priv->pic->data[0] && !priv->need_second_field)
+        av_frame_unref(priv->pic);
+
+    priv->need_second_field = interlaced && !priv->need_second_field;
+
+    if (!priv->pic->data[0]) {
+        if (ff_get_buffer(avctx, priv->pic, AV_GET_BUFFER_FLAG_REF) < 0)
+            return RET_ERROR;
+    }
+
+    bwidth = av_image_get_linesize(avctx->pix_fmt, width, 0);
+    if (priv->is_70012) {
+        int pStride;
+
+        if (width <= 720)
+            pStride = 720;
+        else if (width <= 1280)
+            pStride = 1280;
+        else pStride = 1920;
+        sStride = av_image_get_linesize(avctx->pix_fmt, pStride, 0);
+    } else {
+        sStride = bwidth;
+    }
+
+    dStride = priv->pic->linesize[0];
+    dst     = priv->pic->data[0];
+
+    av_log(priv->avctx, AV_LOG_VERBOSE, "CrystalHD: Copying out frame\n");
+
+    if (interlaced) {
+        int dY = 0;
+        int sY = 0;
+
+        height /= 2;
+        if (bottom_field) {
+            av_log(priv->avctx, AV_LOG_VERBOSE, "Interlaced: bottom field\n");
+            dY = 1;
+        } else {
+            av_log(priv->avctx, AV_LOG_VERBOSE, "Interlaced: top field\n");
+            dY = 0;
+        }
+
+        for (sY = 0; sY < height; dY++, sY++) {
+            memcpy(&(dst[dY * dStride]), &(src[sY * sStride]), bwidth);
+            dY++;
+        }
+    } else {
+        av_image_copy_plane(dst, dStride, src, sStride, bwidth, height);
+    }
+
+    priv->pic->interlaced_frame = interlaced;
+    if (interlaced)
+        priv->pic->top_field_first = !bottom_first;
+
+    priv->pic->pkt_pts = pkt_pts;
+
+    if (!priv->need_second_field) {
+        *got_frame       = 1;
+        if ((ret = av_frame_ref(data, priv->pic)) < 0) {
+            return ret;
+        }
+    }
+
+    /*
+     * Two types of PAFF content have been observed. One form causes the
+     * hardware to return a field pair and the other individual fields,
+     * even though the input is always individual fields. We must skip
+     * copying on the next decode() call to maintain pipeline length in
+     * the first case.
+     */
+    if (!interlaced && (output->PicInfo.flags & VDEC_FLAG_UNKNOWN_SRC) &&
+        (pic_type == PICT_TOP_FIELD || pic_type == PICT_BOTTOM_FIELD)) {
+        av_log(priv->avctx, AV_LOG_VERBOSE, "Fieldpair from two packets.\n");
+        return RET_SKIP_NEXT_COPY;
+    }
+
+    /*
+     * The logic here is purely based on empirical testing with samples.
+     * If we need a second field, it could come from a second input packet,
+     * or it could come from the same field-pair input packet at the current
+     * field. In the first case, we should return and wait for the next time
+     * round to get the second field, while in the second case, we should
+     * ask the decoder for it immediately.
+     *
+     * Testing has shown that we are dealing with the fieldpair -> two fields
+     * case if the VDEC_FLAG_UNKNOWN_SRC is not set or if the input picture
+     * type was PICT_FRAME (in this second case, the flag might still be set)
+     */
+    return priv->need_second_field &&
+           (!(output->PicInfo.flags & VDEC_FLAG_UNKNOWN_SRC) ||
+            pic_type == PICT_FRAME) ?
+           RET_COPY_NEXT_FIELD : RET_OK;
+}
+
+
+static inline CopyRet receive_frame(AVCodecContext *avctx,
+                                    void *data, int *got_frame)
+{
+    BC_STATUS ret;
+    BC_DTS_PROC_OUT output = {
+        .PicInfo.width  = avctx->width,
+        .PicInfo.height = avctx->height,
+    };
+    CHDContext *priv = avctx->priv_data;
+    HANDLE dev       = priv->dev;
+
+    *got_frame = 0;
+
+    // Request decoded data from the driver
+    ret = DtsProcOutputNoCopy(dev, OUTPUT_PROC_TIMEOUT, &output);
+    if (ret == BC_STS_FMT_CHANGE) {
+        av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Initial format change\n");
+        avctx->width  = output.PicInfo.width;
+        avctx->height = output.PicInfo.height;
+        switch ( output.PicInfo.aspect_ratio ) {
+        case vdecAspectRatioSquare:
+            avctx->sample_aspect_ratio = (AVRational) {  1,  1};
+            break;
+        case vdecAspectRatio12_11:
+            avctx->sample_aspect_ratio = (AVRational) { 12, 11};
+            break;
+        case vdecAspectRatio10_11:
+            avctx->sample_aspect_ratio = (AVRational) { 10, 11};
+            break;
+        case vdecAspectRatio16_11:
+            avctx->sample_aspect_ratio = (AVRational) { 16, 11};
+            break;
+        case vdecAspectRatio40_33:
+            avctx->sample_aspect_ratio = (AVRational) { 40, 33};
+            break;
+        case vdecAspectRatio24_11:
+            avctx->sample_aspect_ratio = (AVRational) { 24, 11};
+            break;
+        case vdecAspectRatio20_11:
+            avctx->sample_aspect_ratio = (AVRational) { 20, 11};
+            break;
+        case vdecAspectRatio32_11:
+            avctx->sample_aspect_ratio = (AVRational) { 32, 11};
+            break;
+        case vdecAspectRatio80_33:
+            avctx->sample_aspect_ratio = (AVRational) { 80, 33};
+            break;
+        case vdecAspectRatio18_11:
+            avctx->sample_aspect_ratio = (AVRational) { 18, 11};
+            break;
+        case vdecAspectRatio15_11:
+            avctx->sample_aspect_ratio = (AVRational) { 15, 11};
+            break;
+        case vdecAspectRatio64_33:
+            avctx->sample_aspect_ratio = (AVRational) { 64, 33};
+            break;
+        case vdecAspectRatio160_99:
+            avctx->sample_aspect_ratio = (AVRational) {160, 99};
+            break;
+        case vdecAspectRatio4_3:
+            avctx->sample_aspect_ratio = (AVRational) {  4,  3};
+            break;
+        case vdecAspectRatio16_9:
+            avctx->sample_aspect_ratio = (AVRational) { 16,  9};
+            break;
+        case vdecAspectRatio221_1:
+            avctx->sample_aspect_ratio = (AVRational) {221,  1};
+            break;
+        }
+        return RET_COPY_AGAIN;
+    } else if (ret == BC_STS_SUCCESS) {
+        int copy_ret = -1;
+        if (output.PoutFlags & BC_POUT_FLAGS_PIB_VALID) {
+            if (priv->last_picture == -1) {
+                /*
+                 * Init to one less, so that the incrementing code doesn't
+                 * need to be special-cased.
+                 */
+                priv->last_picture = output.PicInfo.picture_number - 1;
+            }
+
+            if (avctx->codec->id == AV_CODEC_ID_MPEG4 &&
+                output.PicInfo.timeStamp == 0 && priv->bframe_bug) {
+                av_log(avctx, AV_LOG_VERBOSE,
+                       "CrystalHD: Not returning packed frame twice.\n");
+                priv->last_picture++;
+                DtsReleaseOutputBuffs(dev, NULL, FALSE);
+                return RET_COPY_AGAIN;
+            }
+
+            print_frame_info(priv, &output);
+
+            if (priv->last_picture + 1 < output.PicInfo.picture_number) {
+                av_log(avctx, AV_LOG_WARNING,
+                       "CrystalHD: Picture Number discontinuity\n");
+                /*
+                 * Have we lost frames? If so, we need to shrink the
+                 * pipeline length appropriately.
+                 *
+                 * XXX: I have no idea what the semantics of this situation
+                 * are so I don't even know if we've lost frames or which
+                 * ones.
+                 *
+                 * In any case, only warn the first time.
+                 */
+               priv->last_picture = output.PicInfo.picture_number - 1;
+            }
+
+            copy_ret = copy_frame(avctx, &output, data, got_frame);
+            if (*got_frame > 0) {
+                avctx->has_b_frames--;
+                priv->last_picture++;
+                av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Pipeline length: %u\n",
+                       avctx->has_b_frames);
+            }
+        } else {
+            /*
+             * An invalid frame has been consumed.
+             */
+            av_log(avctx, AV_LOG_ERROR, "CrystalHD: ProcOutput succeeded with "
+                                        "invalid PIB\n");
+            avctx->has_b_frames--;
+            copy_ret = RET_OK;
+        }
+        DtsReleaseOutputBuffs(dev, NULL, FALSE);
+
+        return copy_ret;
+    } else if (ret == BC_STS_BUSY) {
+        return RET_COPY_AGAIN;
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "CrystalHD: ProcOutput failed %d\n", ret);
+        return RET_ERROR;
+    }
+}
+
+
+static int decode(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
+{
+    BC_STATUS ret;
+    BC_DTS_STATUS decoder_status = { 0, };
+    CopyRet rec_ret;
+    CHDContext *priv   = avctx->priv_data;
+    HANDLE dev         = priv->dev;
+    uint8_t *in_data   = avpkt->data;
+    int len            = avpkt->size;
+    int free_data      = 0;
+    uint8_t pic_type   = 0;
+
+    av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: decode_frame\n");
+
+    if (avpkt->size == 7 && !priv->bframe_bug) {
+        /*
+         * The use of a drop frame triggers the bug
+         */
+        av_log(avctx, AV_LOG_INFO,
+               "CrystalHD: Enabling work-around for packed b-frame bug\n");
+        priv->bframe_bug = 1;
+    } else if (avpkt->size == 8 && priv->bframe_bug) {
+        /*
+         * Delay frames don't trigger the bug
+         */
+        av_log(avctx, AV_LOG_INFO,
+               "CrystalHD: Disabling work-around for packed b-frame bug\n");
+        priv->bframe_bug = 0;
+    }
+
+    if (len) {
+        int32_t tx_free = (int32_t)DtsTxFreeSize(dev);
+
+        if (priv->parser) {
+            int ret = 0;
+
+            if (priv->bsfc) {
+                ret = av_bitstream_filter_filter(priv->bsfc, avctx, NULL,
+                                                 &in_data, &len,
+                                                 avpkt->data, len, 0);
+            }
+            free_data = ret > 0;
+
+            if (ret >= 0) {
+                uint8_t *pout;
+                int psize;
+                int index;
+                H264Context *h = priv->parser->priv_data;
+
+                index = av_parser_parse2(priv->parser, avctx, &pout, &psize,
+                                         in_data, len, avctx->internal->pkt->pts,
+                                         avctx->internal->pkt->dts, 0);
+                if (index < 0) {
+                    av_log(avctx, AV_LOG_WARNING,
+                           "CrystalHD: Failed to parse h.264 packet to "
+                           "detect interlacing.\n");
+                } else if (index != len) {
+                    av_log(avctx, AV_LOG_WARNING,
+                           "CrystalHD: Failed to parse h.264 packet "
+                           "completely. Interlaced frames may be "
+                           "incorrectly detected.\n");
+                } else {
+                    av_log(avctx, AV_LOG_VERBOSE,
+                           "CrystalHD: parser picture type %d\n",
+                           h->picture_structure);
+                    pic_type = h->picture_structure;
+                }
+            } else {
+                av_log(avctx, AV_LOG_WARNING,
+                       "CrystalHD: mp4toannexb filter failed to filter "
+                       "packet. Interlaced frames may be incorrectly "
+                       "detected.\n");
+            }
+        }
+
+        if (len < tx_free - 1024) {
+            /*
+             * Despite being notionally opaque, either libcrystalhd or
+             * the hardware itself will mangle pts values that are too
+             * small or too large. The docs claim it should be in units
+             * of 100ns. Given that we're nominally dealing with a black
+             * box on both sides, any transform we do has no guarantee of
+             * avoiding mangling so we need to build a mapping to values
+             * we know will not be mangled.
+             */
+            uint64_t pts = opaque_list_push(priv, avctx->internal->pkt->pts, pic_type);
+            if (!pts) {
+                if (free_data) {
+                    av_freep(&in_data);
+                }
+                return AVERROR(ENOMEM);
+            }
+            av_log(priv->avctx, AV_LOG_VERBOSE,
+                   "input \"pts\": %"PRIu64"\n", pts);
+            ret = DtsProcInput(dev, in_data, len, pts, 0);
+            if (free_data) {
+                av_freep(&in_data);
+            }
+            if (ret == BC_STS_BUSY) {
+                av_log(avctx, AV_LOG_WARNING,
+                       "CrystalHD: ProcInput returned busy\n");
+                usleep(BASE_WAIT);
+                return AVERROR(EBUSY);
+            } else if (ret != BC_STS_SUCCESS) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "CrystalHD: ProcInput failed: %u\n", ret);
+                return -1;
+            }
+            avctx->has_b_frames++;
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "CrystalHD: Input buffer full\n");
+            len = 0; // We didn't consume any bytes.
+        }
+    } else {
+        av_log(avctx, AV_LOG_INFO, "CrystalHD: No more input data\n");
+    }
+
+    if (priv->skip_next_output) {
+        av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Skipping next output.\n");
+        priv->skip_next_output = 0;
+        avctx->has_b_frames--;
+        return len;
+    }
+
+    ret = DtsGetDriverStatus(dev, &decoder_status);
+    if (ret != BC_STS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "CrystalHD: GetDriverStatus failed\n");
+        return -1;
+    }
+
+    /*
+     * No frames ready. Don't try to extract.
+     *
+     * Empirical testing shows that ReadyListCount can be a damn lie,
+     * and ProcOut still fails when count > 0. The same testing showed
+     * that two more iterations were needed before ProcOutput would
+     * succeed.
+     */
+    if (priv->output_ready < 2) {
+        if (decoder_status.ReadyListCount != 0)
+            priv->output_ready++;
+        usleep(BASE_WAIT);
+        av_log(avctx, AV_LOG_INFO, "CrystalHD: Filling pipeline.\n");
+        return len;
+    } else if (decoder_status.ReadyListCount == 0) {
+        /*
+         * After the pipeline is established, if we encounter a lack of frames
+         * that probably means we're not giving the hardware enough time to
+         * decode them, so start increasing the wait time at the end of a
+         * decode call.
+         */
+        usleep(BASE_WAIT);
+        priv->decode_wait += WAIT_UNIT;
+        av_log(avctx, AV_LOG_INFO, "CrystalHD: No frames ready. Returning\n");
+        return len;
+    }
+
+    do {
+        rec_ret = receive_frame(avctx, data, got_frame);
+        if (rec_ret == RET_OK && *got_frame == 0) {
+            /*
+             * This case is for when the encoded fields are stored
+             * separately and we get a separate avpkt for each one. To keep
+             * the pipeline stable, we should return nothing and wait for
+             * the next time round to grab the second field.
+             * H.264 PAFF is an example of this.
+             */
+            av_log(avctx, AV_LOG_VERBOSE, "Returning after first field.\n");
+            avctx->has_b_frames--;
+        } else if (rec_ret == RET_COPY_NEXT_FIELD) {
+            /*
+             * This case is for when the encoded fields are stored in a
+             * single avpkt but the hardware returns then separately. Unless
+             * we grab the second field before returning, we'll slip another
+             * frame in the pipeline and if that happens a lot, we're sunk.
+             * So we have to get that second field now.
+             * Interlaced mpeg2 and vc1 are examples of this.
+             */
+            av_log(avctx, AV_LOG_VERBOSE, "Trying to get second field.\n");
+            while (1) {
+                usleep(priv->decode_wait);
+                ret = DtsGetDriverStatus(dev, &decoder_status);
+                if (ret == BC_STS_SUCCESS &&
+                    decoder_status.ReadyListCount > 0) {
+                    rec_ret = receive_frame(avctx, data, got_frame);
+                    if ((rec_ret == RET_OK && *got_frame > 0) ||
+                        rec_ret == RET_ERROR)
+                        break;
+                }
+            }
+            av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Got second field.\n");
+        } else if (rec_ret == RET_SKIP_NEXT_COPY) {
+            /*
+             * Two input packets got turned into a field pair. Gawd.
+             */
+            av_log(avctx, AV_LOG_VERBOSE,
+                   "Don't output on next decode call.\n");
+            priv->skip_next_output = 1;
+        }
+        /*
+         * If rec_ret == RET_COPY_AGAIN, that means that either we just handled
+         * a FMT_CHANGE event and need to go around again for the actual frame,
+         * we got a busy status and need to try again, or we're dealing with
+         * packed b-frames, where the hardware strangely returns the packed
+         * p-frame twice. We choose to keep the second copy as it carries the
+         * valid pts.
+         */
+    } while (rec_ret == RET_COPY_AGAIN);
+    usleep(priv->decode_wait);
+    return len;
+}
+
+
+#if CONFIG_H264_CRYSTALHD_DECODER
+static AVClass h264_class = {
+    "h264_crystalhd",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_h264_crystalhd_decoder = {
+    .name           = "h264_crystalhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (CrystalHD acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .priv_data_size = sizeof(CHDContext),
+    .init           = init,
+    .close          = uninit,
+    .decode         = decode,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush          = flush,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
+    .priv_class     = &h264_class,
+};
+#endif
+
+#if CONFIG_MPEG2_CRYSTALHD_DECODER
+static AVClass mpeg2_class = {
+    "mpeg2_crystalhd",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_mpeg2_crystalhd_decoder = {
+    .name           = "mpeg2_crystalhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-2 Video (CrystalHD acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG2VIDEO,
+    .priv_data_size = sizeof(CHDContext),
+    .init           = init,
+    .close          = uninit,
+    .decode         = decode,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush          = flush,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
+    .priv_class     = &mpeg2_class,
+};
+#endif
+
+#if CONFIG_MPEG4_CRYSTALHD_DECODER
+static AVClass mpeg4_class = {
+    "mpeg4_crystalhd",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_mpeg4_crystalhd_decoder = {
+    .name           = "mpeg4_crystalhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 Part 2 (CrystalHD acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG4,
+    .priv_data_size = sizeof(CHDContext),
+    .init           = init,
+    .close          = uninit,
+    .decode         = decode,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush          = flush,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
+    .priv_class     = &mpeg4_class,
+};
+#endif
+
+#if CONFIG_MSMPEG4_CRYSTALHD_DECODER
+static AVClass msmpeg4_class = {
+    "msmpeg4_crystalhd",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_msmpeg4_crystalhd_decoder = {
+    .name           = "msmpeg4_crystalhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 Part 2 Microsoft variant version 3 (CrystalHD acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MSMPEG4V3,
+    .priv_data_size = sizeof(CHDContext),
+    .init           = init,
+    .close          = uninit,
+    .decode         = decode,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
+    .flush          = flush,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
+    .priv_class     = &msmpeg4_class,
+};
+#endif
+
+#if CONFIG_VC1_CRYSTALHD_DECODER
+static AVClass vc1_class = {
+    "vc1_crystalhd",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_vc1_crystalhd_decoder = {
+    .name           = "vc1_crystalhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("SMPTE VC-1 (CrystalHD acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_VC1,
+    .priv_data_size = sizeof(CHDContext),
+    .init           = init,
+    .close          = uninit,
+    .decode         = decode,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush          = flush,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
+    .priv_class     = &vc1_class,
+};
+#endif
+
+#if CONFIG_WMV3_CRYSTALHD_DECODER
+static AVClass wmv3_class = {
+    "wmv3_crystalhd",
+    av_default_item_name,
+    options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_wmv3_crystalhd_decoder = {
+    .name           = "wmv3_crystalhd",
+    .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 9 (CrystalHD acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_WMV3,
+    .priv_data_size = sizeof(CHDContext),
+    .init           = init,
+    .close          = uninit,
+    .decode         = decode,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+    .flush          = flush,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUYV422, AV_PIX_FMT_NONE},
+    .priv_class     = &wmv3_class,
+};
+#endif
diff --git a/libavcodec/cscd.c b/libavcodec/cscd.c
index 9ae7e33..e875dd7 100644
--- a/libavcodec/cscd.c
+++ b/libavcodec/cscd.c
@@ -2,20 +2,20 @@
  * CamStudio decoder
  * Copyright (c) 2006 Reimar Doeffinger
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include <stdio.h>
@@ -31,14 +31,15 @@
 #include "libavutil/lzo.h"
 
 typedef struct {
+    AVFrame *pic;
     int linelen, height, bpp;
     unsigned int decomp_size;
     unsigned char* decomp_buf;
 } CamStudioContext;
 
-static void copy_frame_default(AVFrame *f, const uint8_t *src, int src_stride,
+static void copy_frame_default(AVFrame *f, const uint8_t *src,
                                int linelen, int height) {
-    int i;
+    int i, src_stride = FFALIGN(linelen, 4);
     uint8_t *dst = f->data[0];
     dst += (height - 1) * f->linesize[0];
     for (i = height; i; i--) {
@@ -48,9 +49,9 @@ static void copy_frame_default(AVFrame *f, const uint8_t *src, int src_stride,
     }
 }
 
-static void add_frame_default(AVFrame *f, const uint8_t *src, int src_stride,
+static void add_frame_default(AVFrame *f, const uint8_t *src,
                               int linelen, int height) {
-    int i, j;
+    int i, j, src_stride = FFALIGN(linelen, 4);
     uint8_t *dst = f->data[0];
     dst += (height - 1) * f->linesize[0];
     for (i = height; i; i--) {
@@ -61,87 +62,11 @@ static void add_frame_default(AVFrame *f, const uint8_t *src, int src_stride,
     }
 }
 
-#if !HAVE_BIGENDIAN
-#define copy_frame_16(f, s, l, h) copy_frame_default(f, s, l, l, h)
-#define copy_frame_32(f, s, l, h) copy_frame_default(f, s, l, l, h)
-#define add_frame_16(f, s, l, h) add_frame_default(f, s, l, l, h)
-#define add_frame_32(f, s, l, h) add_frame_default(f, s, l, l, h)
-#else
-static void copy_frame_16(AVFrame *f, const uint8_t *src,
-                          int linelen, int height) {
-    int i, j;
-    uint8_t *dst = f->data[0];
-    dst += (height - 1) * f->linesize[0];
-    for (i = height; i; i--) {
-        for (j = linelen / 2; j; j--) {
-          dst[0] = src[1];
-          dst[1] = src[0];
-          src += 2;
-          dst += 2;
-        }
-        dst -= f->linesize[0] + linelen;
-    }
-}
-
-static void copy_frame_32(AVFrame *f, const uint8_t *src,
-                          int linelen, int height) {
-    int i, j;
-    uint8_t *dst = f->data[0];
-    dst += (height - 1) * f->linesize[0];
-    for (i = height; i; i--) {
-        for (j = linelen / 4; j; j--) {
-          dst[0] = src[3];
-          dst[1] = src[2];
-          dst[2] = src[1];
-          dst[3] = src[0];
-          src += 4;
-          dst += 4;
-        }
-        dst -= f->linesize[0] + linelen;
-    }
-}
-
-static void add_frame_16(AVFrame *f, const uint8_t *src,
-                         int linelen, int height) {
-    int i, j;
-    uint8_t *dst = f->data[0];
-    dst += (height - 1) * f->linesize[0];
-    for (i = height; i; i--) {
-        for (j = linelen / 2; j; j--) {
-          dst[0] += src[1];
-          dst[1] += src[0];
-          src += 2;
-          dst += 2;
-        }
-        dst -= f->linesize[0] + linelen;
-    }
-}
-
-static void add_frame_32(AVFrame *f, const uint8_t *src,
-                         int linelen, int height) {
-    int i, j;
-    uint8_t *dst = f->data[0];
-    dst += (height - 1) * f->linesize[0];
-    for (i = height; i; i--) {
-        for (j = linelen / 4; j; j--) {
-          dst[0] += src[3];
-          dst[1] += src[2];
-          dst[2] += src[1];
-          dst[3] += src[0];
-          src += 4;
-          dst += 4;
-        }
-        dst -= f->linesize[0] + linelen;
-    }
-}
-#endif
-
 static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                         AVPacket *avpkt) {
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     CamStudioContext *c = avctx->priv_data;
-    AVFrame *picture = data;
     int ret;
 
     if (buf_size < 2) {
@@ -149,10 +74,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_get_buffer(avctx, picture, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, c->pic)) < 0)
         return ret;
-    }
 
     // decompress data
     switch ((buf[0] >> 1) & 7) {
@@ -180,36 +103,21 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     // flip upside down, add difference frame
     if (buf[0] & 1) { // keyframe
-        picture->pict_type = AV_PICTURE_TYPE_I;
-        picture->key_frame = 1;
-        switch (c->bpp) {
-          case 16:
-              copy_frame_16(picture, c->decomp_buf, c->linelen, c->height);
-              break;
-          case 32:
-              copy_frame_32(picture, c->decomp_buf, c->linelen, c->height);
-              break;
-          default:
-              copy_frame_default(picture, c->decomp_buf, FFALIGN(c->linelen, 4),
+        c->pic->pict_type = AV_PICTURE_TYPE_I;
+        c->pic->key_frame = 1;
+              copy_frame_default(c->pic, c->decomp_buf,
                                  c->linelen, c->height);
-        }
     } else {
-        picture->pict_type = AV_PICTURE_TYPE_P;
-        picture->key_frame = 0;
-        switch (c->bpp) {
-          case 16:
-              add_frame_16(picture, c->decomp_buf, c->linelen, c->height);
-              break;
-          case 32:
-              add_frame_32(picture, c->decomp_buf, c->linelen, c->height);
-              break;
-          default:
-              add_frame_default(picture, c->decomp_buf, FFALIGN(c->linelen, 4),
+        c->pic->pict_type = AV_PICTURE_TYPE_P;
+        c->pic->key_frame = 0;
+              add_frame_default(c->pic, c->decomp_buf,
                                 c->linelen, c->height);
-        }
     }
 
     *got_frame = 1;
+    if ((ret = av_frame_ref(data, c->pic)) < 0)
+        return ret;
+
     return buf_size;
 }
 
@@ -217,9 +125,9 @@ static av_cold int decode_init(AVCodecContext *avctx) {
     CamStudioContext *c = avctx->priv_data;
     int stride;
     switch (avctx->bits_per_coded_sample) {
-        case 16: avctx->pix_fmt = AV_PIX_FMT_RGB555; break;
+        case 16: avctx->pix_fmt = AV_PIX_FMT_RGB555LE; break;
         case 24: avctx->pix_fmt = AV_PIX_FMT_BGR24; break;
-        case 32: avctx->pix_fmt = AV_PIX_FMT_RGB32; break;
+        case 32: avctx->pix_fmt = AV_PIX_FMT_BGRA; break;
         default:
             av_log(avctx, AV_LOG_ERROR,
                    "CamStudio codec error: invalid depth %i bpp\n",
@@ -229,21 +137,23 @@ static av_cold int decode_init(AVCodecContext *avctx) {
     c->bpp = avctx->bits_per_coded_sample;
     c->linelen = avctx->width * avctx->bits_per_coded_sample / 8;
     c->height = avctx->height;
-    stride = c->linelen;
-    if (avctx->bits_per_coded_sample == 24)
-        stride = FFALIGN(stride, 4);
+    stride = FFALIGN(c->linelen, 4);
     c->decomp_size = c->height * stride;
     c->decomp_buf = av_malloc(c->decomp_size + AV_LZO_OUTPUT_PADDING);
     if (!c->decomp_buf) {
         av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n");
         return AVERROR(ENOMEM);
     }
+    c->pic = av_frame_alloc();
+    if (!c->pic)
+        return AVERROR(ENOMEM);
     return 0;
 }
 
 static av_cold int decode_end(AVCodecContext *avctx) {
     CamStudioContext *c = avctx->priv_data;
     av_freep(&c->decomp_buf);
+    av_frame_free(&c->pic);
     return 0;
 }
 
diff --git a/libavcodec/cyuv.c b/libavcodec/cyuv.c
index f628ba1..c686123 100644
--- a/libavcodec/cyuv.c
+++ b/libavcodec/cyuv.c
@@ -6,20 +6,20 @@
  *
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -52,7 +52,6 @@ static av_cold int cyuv_decode_init(AVCodecContext *avctx)
     if (s->width & 0x3)
         return AVERROR_INVALIDDATA;
     s->height = avctx->height;
-    avctx->pix_fmt = AV_PIX_FMT_YUV411P;
 
     return 0;
 }
@@ -82,6 +81,7 @@ static int cyuv_decode_frame(AVCodecContext *avctx,
     int stream_ptr;
     unsigned char cur_byte;
     int pixel_groups;
+    int rawsize = s->height * FFALIGN(s->width,2) * 2;
     int ret;
 
     if (avctx->codec_id == AV_CODEC_ID_AURA) {
@@ -92,7 +92,11 @@ static int cyuv_decode_frame(AVCodecContext *avctx,
      * followed by (height) lines each with 3 bytes to represent groups
      * of 4 pixels. Thus, the total size of the buffer ought to be:
      *    (3 * 16) + height * (width * 3 / 4) */
-    if (buf_size != 48 + s->height * (s->width * 3 / 4)) {
+    if (buf_size == 48 + s->height * (s->width * 3 / 4)) {
+        avctx->pix_fmt = AV_PIX_FMT_YUV411P;
+    } else if(buf_size == rawsize ) {
+        avctx->pix_fmt = AV_PIX_FMT_UYVY422;
+    } else {
         av_log(avctx, AV_LOG_ERROR, "got a buffer with %d bytes when %d were expected\n",
                buf_size, 48 + s->height * (s->width * 3 / 4));
         return AVERROR_INVALIDDATA;
@@ -101,15 +105,22 @@ static int cyuv_decode_frame(AVCodecContext *avctx,
     /* pixel data starts 48 bytes in, after 3x16-byte tables */
     stream_ptr = 48;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     y_plane = frame->data[0];
     u_plane = frame->data[1];
     v_plane = frame->data[2];
 
+    if (buf_size == rawsize) {
+        int linesize = FFALIGN(s->width,2) * 2;
+        y_plane += frame->linesize[0] * s->height;
+        for (stream_ptr = 0; stream_ptr < rawsize; stream_ptr += linesize) {
+            y_plane -= frame->linesize[0];
+            memcpy(y_plane, buf+stream_ptr, linesize);
+        }
+    } else {
+
     /* iterate through each line in the height */
     for (y_ptr = 0, u_ptr = 0, v_ptr = 0;
          y_ptr < (s->height * frame->linesize[0]);
@@ -157,6 +168,7 @@ static int cyuv_decode_frame(AVCodecContext *avctx,
 
         }
     }
+    }
 
     *got_frame = 1;
 
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 3fab173..fcdfef2 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -1,20 +1,24 @@
 /*
  * DCA compatible decoder data
+ * Copyright (C) 2004 Gildas Bazin
+ * Copyright (C) 2004 Benjamin Zores
+ * Copyright (C) 2006 Benjamin Larsson
+ * Copyright (C) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dca.h b/libavcodec/dca.h
index 0037de2..d60b282 100644
--- a/libavcodec/dca.h
+++ b/libavcodec/dca.h
@@ -5,20 +5,20 @@
  * Copyright (C) 2006 Benjamin Larsson
  * Copyright (C) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dca_parser.c b/libavcodec/dca_parser.c
index e233f50..9b73371 100644
--- a/libavcodec/dca_parser.c
+++ b/libavcodec/dca_parser.c
@@ -5,20 +5,20 @@
  * Copyright (C) 2006 Benjamin Larsson
  * Copyright (C) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,7 +37,7 @@ typedef struct DCAParseContext {
 #define IS_MARKER(state, i, buf, buf_size) \
  ((state == DCA_MARKER_14B_LE && (i < buf_size-2) && (buf[i+1] & 0xF0) == 0xF0 && buf[i+2] == 0x07) \
  || (state == DCA_MARKER_14B_BE && (i < buf_size-2) && buf[i+1] == 0x07 && (buf[i+2] & 0xF0) == 0xF0) \
- || state == DCA_MARKER_RAW_LE || state == DCA_MARKER_RAW_BE)
+ || state == DCA_MARKER_RAW_LE || state == DCA_MARKER_RAW_BE || state == DCA_HD_MARKER)
 
 /**
  * Find the end of the current frame in the bitstream.
@@ -58,11 +58,7 @@ static int dca_find_frame_end(DCAParseContext * pc1, const uint8_t * buf,
         for (i = 0; i < buf_size; i++) {
             state = (state << 8) | buf[i];
             if (IS_MARKER(state, i, buf, buf_size)) {
-                if (pc1->lastmarker && state == pc1->lastmarker) {
-                    start_found = 1;
-                    i++;
-                    break;
-                } else if (!pc1->lastmarker) {
+                if (!pc1->lastmarker || state == pc1->lastmarker || pc1->lastmarker == DCA_HD_MARKER) {
                     start_found = 1;
                     pc1->lastmarker = state;
                     i++;
@@ -77,7 +73,7 @@ static int dca_find_frame_end(DCAParseContext * pc1, const uint8_t * buf,
             state = (state << 8) | buf[i];
             if (state == DCA_HD_MARKER && !pc1->hd_pos)
                 pc1->hd_pos = pc1->size;
-            if (state == pc1->lastmarker && IS_MARKER(state, i, buf, buf_size)) {
+            if (IS_MARKER(state, i, buf, buf_size) && (state == pc1->lastmarker || pc1->lastmarker == DCA_HD_MARKER)) {
                 if(pc1->framesize > pc1->size)
                     continue;
                 pc->frame_start_found = 0;
diff --git a/libavcodec/dcadata.h b/libavcodec/dcadata.h
index 28c40cf..72aebde 100644
--- a/libavcodec/dcadata.h
+++ b/libavcodec/dcadata.h
@@ -3,20 +3,20 @@
  * Copyright (C) 2004 Gildas Bazin
  * Copyright (c) 2006 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index e988e94..c671fcd 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -5,20 +5,20 @@
  * Copyright (C) 2006 Benjamin Larsson
  * Copyright (C) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,7 +37,6 @@
 #include "avcodec.h"
 #include "fft.h"
 #include "get_bits.h"
-#include "put_bits.h"
 #include "dcadata.h"
 #include "dcahuff.h"
 #include "dca.h"
@@ -59,6 +58,8 @@
 #define DCA_SUBFRAMES_MAX     (16)
 #define DCA_BLOCKS_MAX        (16)
 #define DCA_LFE_MAX            (3)
+#define DCA_CHSETS_MAX         (4)
+#define DCA_CHSET_CHANS_MAX    (8)
 
 enum DCAMode {
     DCA_MONO = 0,
@@ -94,6 +95,68 @@ enum DCAExSSSpeakerMask {
     DCA_EXSS_REAR_HIGH_LEFT_RIGHT  = 0x8000,
 };
 
+enum DCAXxchSpeakerMask {
+    DCA_XXCH_FRONT_CENTER          = 0x0000001,
+    DCA_XXCH_FRONT_LEFT            = 0x0000002,
+    DCA_XXCH_FRONT_RIGHT           = 0x0000004,
+    DCA_XXCH_SIDE_REAR_LEFT        = 0x0000008,
+    DCA_XXCH_SIDE_REAR_RIGHT       = 0x0000010,
+    DCA_XXCH_LFE1                  = 0x0000020,
+    DCA_XXCH_REAR_CENTER           = 0x0000040,
+    DCA_XXCH_SURROUND_REAR_LEFT    = 0x0000080,
+    DCA_XXCH_SURROUND_REAR_RIGHT   = 0x0000100,
+    DCA_XXCH_SIDE_SURROUND_LEFT    = 0x0000200,
+    DCA_XXCH_SIDE_SURROUND_RIGHT   = 0x0000400,
+    DCA_XXCH_FRONT_CENTER_LEFT     = 0x0000800,
+    DCA_XXCH_FRONT_CENTER_RIGHT    = 0x0001000,
+    DCA_XXCH_FRONT_HIGH_LEFT       = 0x0002000,
+    DCA_XXCH_FRONT_HIGH_CENTER     = 0x0004000,
+    DCA_XXCH_FRONT_HIGH_RIGHT      = 0x0008000,
+    DCA_XXCH_LFE2                  = 0x0010000,
+    DCA_XXCH_SIDE_FRONT_LEFT       = 0x0020000,
+    DCA_XXCH_SIDE_FRONT_RIGHT      = 0x0040000,
+    DCA_XXCH_OVERHEAD              = 0x0080000,
+    DCA_XXCH_SIDE_HIGH_LEFT        = 0x0100000,
+    DCA_XXCH_SIDE_HIGH_RIGHT       = 0x0200000,
+    DCA_XXCH_REAR_HIGH_CENTER      = 0x0400000,
+    DCA_XXCH_REAR_HIGH_LEFT        = 0x0800000,
+    DCA_XXCH_REAR_HIGH_RIGHT       = 0x1000000,
+    DCA_XXCH_REAR_LOW_CENTER       = 0x2000000,
+    DCA_XXCH_REAR_LOW_LEFT         = 0x4000000,
+    DCA_XXCH_REAR_LOW_RIGHT        = 0x8000000,
+};
+
+static const uint32_t map_xxch_to_native[28] = {
+    AV_CH_FRONT_CENTER,
+    AV_CH_FRONT_LEFT,
+    AV_CH_FRONT_RIGHT,
+    AV_CH_SIDE_LEFT,
+    AV_CH_SIDE_RIGHT,
+    AV_CH_LOW_FREQUENCY,
+    AV_CH_BACK_CENTER,
+    AV_CH_BACK_LEFT,
+    AV_CH_BACK_RIGHT,
+    AV_CH_SIDE_LEFT,           /* side surround left -- dup sur side L */
+    AV_CH_SIDE_RIGHT,          /* side surround right -- dup sur side R */
+    AV_CH_FRONT_LEFT_OF_CENTER,
+    AV_CH_FRONT_RIGHT_OF_CENTER,
+    AV_CH_TOP_FRONT_LEFT,
+    AV_CH_TOP_FRONT_CENTER,
+    AV_CH_TOP_FRONT_RIGHT,
+    AV_CH_LOW_FREQUENCY,        /* lfe2 -- duplicate lfe1 position */
+    AV_CH_FRONT_LEFT_OF_CENTER, /* side front left -- dup front cntr L */
+    AV_CH_FRONT_RIGHT_OF_CENTER,/* side front right -- dup front cntr R */
+    AV_CH_TOP_CENTER,           /* overhead */
+    AV_CH_TOP_FRONT_LEFT,       /* side high left -- dup */
+    AV_CH_TOP_FRONT_RIGHT,      /* side high right -- dup */
+    AV_CH_TOP_BACK_CENTER,
+    AV_CH_TOP_BACK_LEFT,
+    AV_CH_TOP_BACK_RIGHT,
+    AV_CH_BACK_CENTER,          /* rear low center -- dup */
+    AV_CH_BACK_LEFT,            /* rear low left -- dup */
+    AV_CH_BACK_RIGHT            /* read low right -- dup  */
+};
+
 enum DCAExtensionMask {
     DCA_EXT_CORE       = 0x001, ///< core in core substream
     DCA_EXT_XXCH       = 0x002, ///< XXCh channels extension in core substream
@@ -285,7 +348,7 @@ static av_always_inline int get_bitalloc(GetBitContext *gb, BitAlloc *ba,
 }
 
 typedef struct {
-    AVClass *class;             ///< class for AVOptions
+    const AVClass *class;       ///< class for AVOptions
     AVCodecContext *avctx;
     /* Frame header */
     int frame_type;             ///< type of the current frame
@@ -385,6 +448,20 @@ typedef struct {
     int xch_base_channel;       ///< index of first (only) channel containing XCH data
     int xch_disable;            ///< whether the XCh extension should be decoded or not
 
+    /* XXCH extension information */
+    int xxch_chset;
+    int xxch_nbits_spk_mask;
+    uint32_t xxch_core_spkmask;
+    uint32_t xxch_spk_masks[4]; /* speaker masks, last element is core mask */
+    int xxch_chset_nch[4];
+    float xxch_dmix_sf[DCA_CHSETS_MAX];
+
+    uint32_t xxch_dmix_embedded;  /* lower layer has mix pre-embedded, per chset */
+    float xxch_dmix_coeff[DCA_PRIM_CHANNELS_MAX][32]; /* worst case sizing */
+
+    int8_t xxch_order_tab[32];
+    int8_t lfe_index;
+
     /* ExSS header parser */
     int static_fields;          ///< static fields present
     int mix_metadata;           ///< mixing metadata present
@@ -401,6 +478,8 @@ typedef struct {
     FmtConvertContext fmt_conv;
 } DCAContext;
 
+static float dca_dmix_code(unsigned code);
+
 static const uint16_t dca_vlc_offs[] = {
         0,   512,   640,   768,  1282,  1794,  2436,  3080,  3770,  4454,  5364,
      5372,  5380,  5388,  5392,  5396,  5412,  5420,  5428,  5460,  5492,  5508,
@@ -471,16 +550,95 @@ static inline void get_array(GetBitContext *gb, int *dst, int len, int bits)
         *dst++ = get_bits(gb, bits);
 }
 
-static int dca_parse_audio_coding_header(DCAContext *s, int base_channel)
+static inline int dca_xxch2index(DCAContext *s, int xxch_ch)
+{
+    int i, base, mask;
+
+    /* locate channel set containing the channel */
+    for (i = -1, base = 0, mask = (s->xxch_core_spkmask & ~DCA_XXCH_LFE1);
+         i <= s->xxch_chset && !(mask & xxch_ch); mask = s->xxch_spk_masks[++i])
+        base += av_popcount(mask);
+
+    return base + av_popcount(mask & (xxch_ch - 1));
+}
+
+static int dca_parse_audio_coding_header(DCAContext *s, int base_channel,
+                                         int xxch)
 {
     int i, j;
     static const float adj_table[4] = { 1.0, 1.1250, 1.2500, 1.4375 };
     static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
     static const int thr[11]    = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
+    int hdr_pos = 0, hdr_size = 0;
+    float scale_factor;
+    int this_chans, acc_mask;
+    int embedded_downmix;
+    int nchans, mask[8];
+    int coeff, ichan;
+
+    /* xxch has arbitrary sized audio coding headers */
+    if (xxch) {
+        hdr_pos  = get_bits_count(&s->gb);
+        hdr_size = get_bits(&s->gb, 7) + 1;
+    }
 
-    s->total_channels = get_bits(&s->gb, 3) + 1 + base_channel;
+    nchans = get_bits(&s->gb, 3) + 1;
+    s->total_channels = nchans + base_channel;
     s->prim_channels  = s->total_channels;
 
+    /* obtain speaker layout mask & downmix coefficients for XXCH */
+    if (xxch) {
+        acc_mask = s->xxch_core_spkmask;
+
+        this_chans = get_bits(&s->gb, s->xxch_nbits_spk_mask - 6) << 6;
+        s->xxch_spk_masks[s->xxch_chset] = this_chans;
+        s->xxch_chset_nch[s->xxch_chset] = nchans;
+
+        for (i = 0; i <= s->xxch_chset; i++)
+            acc_mask |= s->xxch_spk_masks[i];
+
+        /* check for downmixing information */
+        if (get_bits1(&s->gb)) {
+            embedded_downmix = get_bits1(&s->gb);
+            coeff            = get_bits(&s->gb, 6);
+
+            if (coeff<1 || coeff>61) {
+                av_log(s->avctx, AV_LOG_ERROR, "6bit coeff %d is out of range\n", coeff);
+                return AVERROR_INVALIDDATA;
+            }
+
+            scale_factor     = -1.0f / dca_dmix_code((coeff<<2)-3);
+
+            s->xxch_dmix_sf[s->xxch_chset] = scale_factor;
+
+            for (i = base_channel; i < s->prim_channels; i++) {
+                mask[i] = get_bits(&s->gb, s->xxch_nbits_spk_mask);
+            }
+
+            for (j = base_channel; j < s->prim_channels; j++) {
+                memset(s->xxch_dmix_coeff[j], 0, sizeof(s->xxch_dmix_coeff[0]));
+                s->xxch_dmix_embedded |= (embedded_downmix << j);
+                for (i = 0; i < s->xxch_nbits_spk_mask; i++) {
+                    if (mask[j] & (1 << i)) {
+                        if ((1 << i) == DCA_XXCH_LFE1) {
+                            av_log(s->avctx, AV_LOG_WARNING,
+                                   "DCA-XXCH: dmix to LFE1 not supported.\n");
+                            continue;
+                        }
+
+                        coeff = get_bits(&s->gb, 7);
+                        ichan = dca_xxch2index(s, 1 << i);
+                        if ((coeff&63)<1 || (coeff&63)>61) {
+                            av_log(s->avctx, AV_LOG_ERROR, "7bit coeff %d is out of range\n", coeff);
+                            return AVERROR_INVALIDDATA;
+                        }
+                        s->xxch_dmix_coeff[j][ichan] = dca_dmix_code((coeff<<2)-3);
+                    }
+                }
+            }
+        }
+    }
+
     if (s->prim_channels > DCA_PRIM_CHANNELS_MAX)
         s->prim_channels = DCA_PRIM_CHANNELS_MAX;
 
@@ -517,9 +675,16 @@ static int dca_parse_audio_coding_header(DCAContext *s, int base_channel)
             if (s->quant_index_huffman[i][j] < thr[j])
                 s->scalefactor_adj[i][j] = adj_table[get_bits(&s->gb, 2)];
 
-    if (s->crc_present) {
-        /* Audio header CRC check */
-        get_bits(&s->gb, 16);
+    if (!xxch) {
+        if (s->crc_present) {
+            /* Audio header CRC check */
+            get_bits(&s->gb, 16);
+        }
+    } else {
+        /* Skip to the end of the header, also ignore CRC if present  */
+        i = get_bits_count(&s->gb);
+        if (hdr_pos + 8 * hdr_size > i)
+            skip_bits_long(&s->gb, hdr_pos + 8 * hdr_size - i);
     }
 
     s->current_subframe    = 0;
@@ -591,6 +756,7 @@ static int dca_parse_frame_header(DCAContext *s)
     s->predictor_history = get_bits(&s->gb, 1);
 
     if (s->lfe > 2) {
+        s->lfe = 0;
         av_log(s->avctx, AV_LOG_ERROR, "Invalid LFE value: %d\n", s->lfe);
         return AVERROR_INVALIDDATA;
     }
@@ -652,7 +818,7 @@ static int dca_parse_frame_header(DCAContext *s)
     /* Primary audio coding header */
     s->subframes         = get_bits(&s->gb, 4) + 1;
 
-    return dca_parse_audio_coding_header(s, 0);
+    return dca_parse_audio_coding_header(s, 0, 0);
 }
 
 
@@ -828,6 +994,7 @@ static int dca_subframe_header(DCAContext *s, int base_channel, int block_index)
 
     /* Low frequency effect data */
     if (!base_channel && s->lfe) {
+        int quant7;
         /* LFE samples */
         int lfe_samples = 2 * s->lfe * (4 + block_index);
         int lfe_end_sample = 2 * s->lfe * (4 + block_index + s->subsubframes[s->current_subframe]);
@@ -839,8 +1006,12 @@ static int dca_subframe_header(DCAContext *s, int base_channel, int block_index)
         }
 
         /* Scale factor index */
-        skip_bits(&s->gb, 1);
-        s->lfe_scale_factor = scale_factor_quant7[get_bits(&s->gb, 7)];
+        quant7 = get_bits(&s->gb, 8);
+        if (quant7 > 127) {
+            avpriv_request_sample(s->avctx, "LFEScaleIndex larger than 127");
+            return AVERROR_INVALIDDATA;
+        }
+        s->lfe_scale_factor = scale_factor_quant7[quant7];
 
         /* Quantization step size * scale factor */
         lfe_scale = 0.035 * s->lfe_scale_factor;
@@ -1006,7 +1177,7 @@ static void dca_downmix(float **samples, int srcfmt, int lfe_present,
     switch (srcfmt) {
     case DCA_MONO:
     case DCA_4F2R:
-        av_log(NULL, 0, "Not implemented!\n");
+        av_log(NULL, AV_LOG_ERROR, "Not implemented!\n");
         break;
     case DCA_CHANNEL:
     case DCA_STEREO:
@@ -1200,7 +1371,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
                         else if (s->predictor_history)
                             sum += adpcm_vb[s->prediction_vq[k][l]][n - 1] *
                                    s->subband_samples_hist[k][l][m - n + 4];
-                    subband_samples[k][l][m] += sum * 1.0f / 8192;
+                    subband_samples[k][l][m] += sum * (1.0f / 8192);
                 }
             }
         }
@@ -1209,7 +1380,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
          * Decode VQ encoded high frequencies
          */
         if (s->subband_activity[k] > s->vq_start_subband[k]) {
-            if (!s->debug_flag & 0x01) {
+            if (!(s->debug_flag & 0x01)) {
                 av_log(s->avctx, AV_LOG_DEBUG,
                        "Stream with high frequencies VQ coding\n");
                 s->debug_flag |= 0x01;
@@ -1260,7 +1431,7 @@ static int dca_filter_channels(DCAContext *s, int block_index)
     if (s->lfe) {
         lfe_interpolation_fir(s, s->lfe, 2 * s->lfe,
                               s->lfe_data + 2 * s->lfe * (block_index + 4),
-                              s->samples_chanptr[dca_lfe_index[s->amode]]);
+                              s->samples_chanptr[s->lfe_index]);
         /* Outputs 20bits pcm samples */
     }
 
@@ -1468,11 +1639,11 @@ static int dca_exss_parse_asset_header(DCAContext *s)
 {
     int header_pos = get_bits_count(&s->gb);
     int header_size;
-    int channels;
+    int channels = 0;
     int embedded_stereo = 0;
     int embedded_6ch    = 0;
     int drc_code_present;
-    int extensions_mask;
+    int av_uninit(extensions_mask);
     int i, j;
 
     if (get_bits_left(&s->gb) < 16)
@@ -1612,26 +1783,246 @@ static int dca_exss_parse_asset_header(DCAContext *s)
     return 0;
 }
 
+static int dca_xbr_parse_frame(DCAContext *s)
+{
+    int scale_table_high[DCA_CHSET_CHANS_MAX][DCA_SUBBANDS][2];
+    int active_bands[DCA_CHSETS_MAX][DCA_CHSET_CHANS_MAX];
+    int abits_high[DCA_CHSET_CHANS_MAX][DCA_SUBBANDS];
+    int anctemp[DCA_CHSET_CHANS_MAX];
+    int chset_fsize[DCA_CHSETS_MAX];
+    int n_xbr_ch[DCA_CHSETS_MAX];
+    int hdr_size, num_chsets, xbr_tmode, hdr_pos;
+    int i, j, k, l, chset, chan_base;
+
+    av_log(s->avctx, AV_LOG_DEBUG, "DTS-XBR: decoding XBR extension\n");
+
+    /* get bit position of sync header */
+    hdr_pos = get_bits_count(&s->gb) - 32;
+
+    hdr_size = get_bits(&s->gb, 6) + 1;
+    num_chsets = get_bits(&s->gb, 2) + 1;
+
+    for(i = 0; i < num_chsets; i++)
+        chset_fsize[i] = get_bits(&s->gb, 14) + 1;
+
+    xbr_tmode = get_bits1(&s->gb);
+
+    for(i = 0; i < num_chsets; i++) {
+        n_xbr_ch[i] = get_bits(&s->gb, 3) + 1;
+        k = get_bits(&s->gb, 2) + 5;
+        for(j = 0; j < n_xbr_ch[i]; j++)
+            active_bands[i][j] = get_bits(&s->gb, k) + 1;
+    }
+
+    /* skip to the end of the header */
+    i = get_bits_count(&s->gb);
+    if(hdr_pos + hdr_size * 8 > i)
+        skip_bits_long(&s->gb, hdr_pos + hdr_size * 8 - i);
+
+    /* loop over the channel data sets */
+    /* only decode as many channels as we've decoded base data for */
+    for(chset = 0, chan_base = 0;
+        chset < num_chsets && chan_base + n_xbr_ch[chset] <= s->prim_channels;
+        chan_base += n_xbr_ch[chset++]) {
+        int start_posn = get_bits_count(&s->gb);
+        int subsubframe = 0;
+        int subframe = 0;
+
+        /* loop over subframes */
+        for (k = 0; k < (s->sample_blocks / 8); k++) {
+            /* parse header if we're on first subsubframe of a block */
+            if(subsubframe == 0) {
+                /* Parse subframe header */
+                for(i = 0; i < n_xbr_ch[chset]; i++) {
+                    anctemp[i] = get_bits(&s->gb, 2) + 2;
+                }
+
+                for(i = 0; i < n_xbr_ch[chset]; i++) {
+                    get_array(&s->gb, abits_high[i], active_bands[chset][i], anctemp[i]);
+                }
+
+                for(i = 0; i < n_xbr_ch[chset]; i++) {
+                    anctemp[i] = get_bits(&s->gb, 3);
+                    if(anctemp[i] < 1) {
+                        av_log(s->avctx, AV_LOG_ERROR, "DTS-XBR: SYNC ERROR\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                }
+
+                /* generate scale factors */
+                for(i = 0; i < n_xbr_ch[chset]; i++) {
+                    const uint32_t *scale_table;
+                    int nbits;
+
+                    if (s->scalefactor_huffman[chan_base+i] == 6) {
+                        scale_table = scale_factor_quant7;
+                    } else {
+                        scale_table = scale_factor_quant6;
+                    }
+
+                    nbits = anctemp[i];
+
+                    for(j = 0; j < active_bands[chset][i]; j++) {
+                        if(abits_high[i][j] > 0) {
+                            scale_table_high[i][j][0] =
+                                scale_table[get_bits(&s->gb, nbits)];
+
+                            if(xbr_tmode && s->transition_mode[i][j]) {
+                                scale_table_high[i][j][1] =
+                                    scale_table[get_bits(&s->gb, nbits)];
+                            }
+                        }
+                    }
+                }
+            }
+
+            /* decode audio array for this block */
+            for(i = 0; i < n_xbr_ch[chset]; i++) {
+                for(j = 0; j < active_bands[chset][i]; j++) {
+                    const int xbr_abits = abits_high[i][j];
+                    const float quant_step_size = lossless_quant_d[xbr_abits];
+                    const int sfi = xbr_tmode && s->transition_mode[i][j] && subsubframe >= s->transition_mode[i][j];
+                    const float rscale = quant_step_size * scale_table_high[i][j][sfi];
+                    float *subband_samples = s->subband_samples[k][chan_base+i][j];
+                    int block[8];
+
+                    if(xbr_abits <= 0)
+                        continue;
+
+                    if(xbr_abits > 7) {
+                        get_array(&s->gb, block, 8, xbr_abits - 3);
+                    } else {
+                        int block_code1, block_code2, size, levels, err;
+
+                        size   = abits_sizes[xbr_abits - 1];
+                        levels = abits_levels[xbr_abits - 1];
+
+                        block_code1 = get_bits(&s->gb, size);
+                        block_code2 = get_bits(&s->gb, size);
+                        err = decode_blockcodes(block_code1, block_code2,
+                                                levels, block);
+                        if (err) {
+                            av_log(s->avctx, AV_LOG_ERROR,
+                                   "ERROR: DTS-XBR: block code look-up failed\n");
+                            return AVERROR_INVALIDDATA;
+                        }
+                    }
+
+                    /* scale & sum into subband */
+                    for(l = 0; l < 8; l++)
+                        subband_samples[l] += (float)block[l] * rscale;
+                }
+            }
+
+            /* check DSYNC marker */
+            if(s->aspf || subsubframe == s->subsubframes[subframe] - 1) {
+                if(get_bits(&s->gb, 16) != 0xffff) {
+                    av_log(s->avctx, AV_LOG_ERROR, "DTS-XBR: Didn't get subframe DSYNC\n");
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+
+            /* advance sub-sub-frame index */
+            if(++subsubframe >= s->subsubframes[subframe]) {
+                subsubframe = 0;
+                subframe++;
+            }
+        }
+
+        /* skip to next channel set */
+        i = get_bits_count(&s->gb);
+        if(start_posn + chset_fsize[chset] * 8 != i) {
+            j = start_posn + chset_fsize[chset] * 8 - i;
+            if(j < 0 || j >= 8)
+                av_log(s->avctx, AV_LOG_ERROR, "DTS-XBR: end of channel set,"
+                       " skipping further than expected (%d bits)\n", j);
+            skip_bits_long(&s->gb, j);
+        }
+    }
+
+    return 0;
+}
+
+/* parse initial header for XXCH and dump details */
+static int dca_xxch_decode_frame(DCAContext *s)
+{
+    int hdr_size, spkmsk_bits, num_chsets, core_spk, hdr_pos;
+    int i, chset, base_channel, chstart, fsize[8];
+
+    /* assume header word has already been parsed */
+    hdr_pos     = get_bits_count(&s->gb) - 32;
+    hdr_size    = get_bits(&s->gb, 6) + 1;
+  /*chhdr_crc   =*/ skip_bits1(&s->gb);
+    spkmsk_bits = get_bits(&s->gb, 5) + 1;
+    num_chsets  = get_bits(&s->gb, 2) + 1;
+
+    for (i = 0; i < num_chsets; i++)
+        fsize[i] = get_bits(&s->gb, 14) + 1;
+
+    core_spk               = get_bits(&s->gb, spkmsk_bits);
+    s->xxch_core_spkmask   = core_spk;
+    s->xxch_nbits_spk_mask = spkmsk_bits;
+    s->xxch_dmix_embedded  = 0;
+
+    /* skip to the end of the header */
+    i = get_bits_count(&s->gb);
+    if (hdr_pos + hdr_size * 8 > i)
+        skip_bits_long(&s->gb, hdr_pos + hdr_size * 8 - i);
+
+    for (chset = 0; chset < num_chsets; chset++) {
+        chstart       = get_bits_count(&s->gb);
+        base_channel  = s->prim_channels;
+        s->xxch_chset = chset;
+
+        /* XXCH and Core headers differ, see 6.4.2 "XXCH Channel Set Header" vs.
+           5.3.2 "Primary Audio Coding Header", DTS Spec 1.3.1 */
+        dca_parse_audio_coding_header(s, base_channel, 1);
+
+        /* decode channel data */
+        for (i = 0; i < (s->sample_blocks / 8); i++) {
+            if (dca_decode_block(s, base_channel, i)) {
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "Error decoding DTS-XXCH extension\n");
+                continue;
+            }
+        }
+
+        /* skip to end of this section */
+        i = get_bits_count(&s->gb);
+        if (chstart + fsize[chset] * 8 > i)
+            skip_bits_long(&s->gb, chstart + fsize[chset] * 8 - i);
+    }
+    s->xxch_chset = num_chsets;
+
+    return 0;
+}
+
 /**
  * Parse extension substream header (HD)
  */
 static void dca_exss_parse_header(DCAContext *s)
 {
+    int asset_size[8];
     int ss_index;
     int blownup;
     int num_audiop = 1;
     int num_assets = 1;
     int active_ss_mask[8];
     int i, j;
+    int start_posn;
+    int hdrsize;
+    uint32_t mkr;
 
     if (get_bits_left(&s->gb) < 52)
         return;
 
+    start_posn = get_bits_count(&s->gb) - 32;
+
     skip_bits(&s->gb, 8); // user data
     ss_index = get_bits(&s->gb, 2);
 
     blownup = get_bits1(&s->gb);
-    skip_bits(&s->gb,  8 + 4 * blownup); // header_size
+    hdrsize = get_bits(&s->gb,  8 + 4 * blownup) + 1; // header_size
     skip_bits(&s->gb, 16 + 4 * blownup); // hd_size
 
     s->static_fields = get_bits1(&s->gb);
@@ -1683,8 +2074,10 @@ static void dca_exss_parse_header(DCAContext *s)
         }
     }
 
+    av_assert0(num_assets > 0); // silence a warning
+
     for (i = 0; i < num_assets; i++)
-        skip_bits_long(&s->gb, 16 + 4 * blownup);  // asset size
+        asset_size[i] = get_bits_long(&s->gb, 16 + 4 * blownup);
 
     for (i = 0; i < num_assets; i++) {
         if (dca_exss_parse_asset_header(s))
@@ -1693,13 +2086,38 @@ static void dca_exss_parse_header(DCAContext *s)
 
     /* not parsed further, we were only interested in the extensions mask
      * from the asset header */
+
+        j = get_bits_count(&s->gb);
+        if (start_posn + hdrsize * 8 > j)
+            skip_bits_long(&s->gb, start_posn + hdrsize * 8 - j);
+
+        for (i = 0; i < num_assets; i++) {
+            start_posn = get_bits_count(&s->gb);
+            mkr        = get_bits_long(&s->gb, 32);
+
+            /* parse extensions that we know about */
+            if (mkr == 0x655e315e) {
+                dca_xbr_parse_frame(s);
+            } else if (mkr == 0x47004a03) {
+                dca_xxch_decode_frame(s);
+                s->core_ext_mask |= DCA_EXT_XXCH; /* xxx use for chan reordering */
+            } else {
+                av_log(s->avctx, AV_LOG_DEBUG,
+                       "DTS-ExSS: unknown marker = 0x%08x\n", mkr);
+            }
+
+            /* skip to end of block */
+            j = get_bits_count(&s->gb);
+            if (start_posn + asset_size[i] * 8 > j)
+                skip_bits_long(&s->gb, start_posn + asset_size[i] * 8 - j);
+        }
 }
 
 static float dca_dmix_code(unsigned code)
 {
     int sign = (code >> 8) - 1;
     code &= 0xff;
-    return ((dca_dmixtable[code] ^ sign) - sign) * (1.0 / (1U << 15));
+    return ((dca_dmixtable[code] ^ sign) - sign) * (1.0 / (1 << 15));
 }
 
 /**
@@ -1712,15 +2130,25 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
     AVFrame *frame     = data;
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
-
+    int channel_mask;
+    int channel_layout;
     int lfe_samples;
     int num_core_channels = 0;
     int i, ret;
-    float  **samples_flt;
+    float **samples_flt;
+    float *src_chan;
+    float *dst_chan;
     DCAContext *s = avctx->priv_data;
-    int channels, full_channels;
     int core_ss_end;
-
+    int channels, full_channels;
+    float scale;
+    int achan;
+    int chset;
+    int mask;
+    int lavc;
+    int posn;
+    int j, k;
+    int endch;
 
     s->xch_present = 0;
 
@@ -1751,6 +2179,49 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
     /* record number of core channels incase less than max channels are requested */
     num_core_channels = s->prim_channels;
 
+    if (s->prim_channels + !!s->lfe > 2 &&
+        avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
+            /* Stereo downmix coefficients
+             *
+             * The decoder can only downmix to 2-channel, so we need to ensure
+             * embedded downmix coefficients are actually targeting 2-channel.
+             */
+            if (s->core_downmix && (s->core_downmix_amode == DCA_STEREO ||
+                                    s->core_downmix_amode == DCA_STEREO_TOTAL)) {
+                for (i = 0; i < num_core_channels + !!s->lfe; i++) {
+                    /* Range checked earlier */
+                    s->downmix_coef[i][0] = dca_dmix_code(s->core_downmix_codes[i][0]);
+                    s->downmix_coef[i][1] = dca_dmix_code(s->core_downmix_codes[i][1]);
+                }
+                s->output = s->core_downmix_amode;
+            } else {
+                int am = s->amode & DCA_CHANNEL_MASK;
+                if (am >= FF_ARRAY_ELEMS(dca_default_coeffs)) {
+                    av_log(s->avctx, AV_LOG_ERROR,
+                           "Invalid channel mode %d\n", am);
+                    return AVERROR_INVALIDDATA;
+                }
+                if (num_core_channels + !!s->lfe >
+                    FF_ARRAY_ELEMS(dca_default_coeffs[0])) {
+                    avpriv_request_sample(s->avctx, "Downmixing %d channels",
+                                          s->prim_channels + !!s->lfe);
+                    return AVERROR_PATCHWELCOME;
+                }
+                for (i = 0; i < num_core_channels + !!s->lfe; i++) {
+                    s->downmix_coef[i][0] = dca_default_coeffs[am][i][0];
+                    s->downmix_coef[i][1] = dca_default_coeffs[am][i][1];
+                }
+            }
+            av_dlog(s->avctx, "Stereo downmix coeffs:\n");
+            for (i = 0; i < num_core_channels + !!s->lfe; i++) {
+                av_dlog(s->avctx, "L, input channel %d = %f\n", i,
+                        s->downmix_coef[i][0]);
+                av_dlog(s->avctx, "R, input channel %d = %f\n", i,
+                        s->downmix_coef[i][1]);
+            }
+            av_dlog(s->avctx, "\n");
+    }
+
     if (s->ext_coding)
         s->core_ext_mask = dca_ext_audio_descr_mask[s->ext_descr];
     else
@@ -1760,7 +2231,7 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
 
     /* only scan for extensions if ext_descr was unknown or indicated a
      * supported XCh extension */
-    if (s->core_ext_mask < 0 || s->core_ext_mask & DCA_EXT_XCH) {
+    if (s->core_ext_mask < 0 || s->core_ext_mask & (DCA_EXT_XCH | DCA_EXT_XXCH)) {
 
         /* if ext_descr was unknown, clear s->core_ext_mask so that the
          * extensions scan can fill it up */
@@ -1797,8 +2268,13 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
                     continue;
                 }
 
+                if (s->xch_base_channel < 2) {
+                    avpriv_request_sample(avctx, "XCh with fewer than 2 base channels");
+                    continue;
+                }
+
                 /* much like core primary audio coding header */
-                dca_parse_audio_coding_header(s, s->xch_base_channel);
+                dca_parse_audio_coding_header(s, s->xch_base_channel, 0);
 
                 for (i = 0; i < (s->sample_blocks / 8); i++)
                     if ((ret = dca_decode_block(s, s->xch_base_channel, i))) {
@@ -1814,6 +2290,7 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
                 /* usually found either in core or HD part in DTS-HD HRA streams,
                  * but not in DTS-ES which contains XCh extensions instead */
                 s->core_ext_mask |= DCA_EXT_XXCH;
+                dca_xxch_decode_frame(s);
                 break;
 
             case 0x1d95f262: {
@@ -1853,107 +2330,154 @@ static int dca_decode_frame(AVCodecContext *avctx, void *data,
 
     full_channels = channels = s->prim_channels + !!s->lfe;
 
-    if (s->amode < 16) {
-        avctx->channel_layout = dca_core_channel_layout[s->amode];
-
-        if (s->prim_channels + !!s->lfe > 2 &&
-            avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
-            /*
-             * Neither the core's auxiliary data nor our default tables contain
-             * downmix coefficients for the additional channel coded in the XCh
-             * extension, so when we're doing a Stereo downmix, don't decode it.
-             */
-            s->xch_disable = 1;
-        }
+    /* If we have XXCH then the channel layout is managed differently */
+    /* note that XLL will also have another way to do things */
+    if (!(s->core_ext_mask & DCA_EXT_XXCH)
+        || (s->core_ext_mask & DCA_EXT_XXCH && avctx->request_channels > 0
+            && avctx->request_channels
+            < num_core_channels + !!s->lfe + s->xxch_chset_nch[0]))
+    { /* xxx should also do MA extensions */
+        if (s->amode < 16) {
+            avctx->channel_layout = dca_core_channel_layout[s->amode];
+
+            if (s->prim_channels + !!s->lfe > 2 &&
+                avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
+                /*
+                 * Neither the core's auxiliary data nor our default tables contain
+                 * downmix coefficients for the additional channel coded in the XCh
+                 * extension, so when we're doing a Stereo downmix, don't decode it.
+                 */
+                s->xch_disable = 1;
+            }
 
 #if FF_API_REQUEST_CHANNELS
 FF_DISABLE_DEPRECATION_WARNINGS
-        if (s->xch_present && !s->xch_disable &&
-            (!avctx->request_channels ||
-             avctx->request_channels > num_core_channels + !!s->lfe)) {
+            if (s->xch_present && !s->xch_disable &&
+                (!avctx->request_channels ||
+                 avctx->request_channels > num_core_channels + !!s->lfe)) {
 FF_ENABLE_DEPRECATION_WARNINGS
 #else
-        if (s->xch_present && !s->xch_disable) {
+            if (s->xch_present && !s->xch_disable) {
 #endif
-            avctx->channel_layout |= AV_CH_BACK_CENTER;
-            if (s->lfe) {
-                avctx->channel_layout |= AV_CH_LOW_FREQUENCY;
-                s->channel_order_tab = dca_channel_reorder_lfe_xch[s->amode];
+                avctx->channel_layout |= AV_CH_BACK_CENTER;
+                if (s->lfe) {
+                    avctx->channel_layout |= AV_CH_LOW_FREQUENCY;
+                    s->channel_order_tab = dca_channel_reorder_lfe_xch[s->amode];
+                } else {
+                    s->channel_order_tab = dca_channel_reorder_nolfe_xch[s->amode];
+                }
+                if (s->channel_order_tab[s->xch_base_channel] < 0)
+                    return AVERROR_INVALIDDATA;
             } else {
-                s->channel_order_tab = dca_channel_reorder_nolfe_xch[s->amode];
+                channels = num_core_channels + !!s->lfe;
+                s->xch_present = 0; /* disable further xch processing */
+                if (s->lfe) {
+                    avctx->channel_layout |= AV_CH_LOW_FREQUENCY;
+                    s->channel_order_tab = dca_channel_reorder_lfe[s->amode];
+                } else
+                    s->channel_order_tab = dca_channel_reorder_nolfe[s->amode];
+            }
+
+            if (channels > !!s->lfe &&
+                s->channel_order_tab[channels - 1 - !!s->lfe] < 0)
+                return AVERROR_INVALIDDATA;
+
+            if (av_get_channel_layout_nb_channels(avctx->channel_layout) != channels) {
+                av_log(avctx, AV_LOG_ERROR, "Number of channels %d mismatches layout %d\n", channels, av_get_channel_layout_nb_channels(avctx->channel_layout));
+                return AVERROR_INVALIDDATA;
             }
+
+            if (num_core_channels + !!s->lfe > 2 &&
+                avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
+                channels = 2;
+                s->output = s->prim_channels == 2 ? s->amode : DCA_STEREO;
+                avctx->channel_layout = AV_CH_LAYOUT_STEREO;
+            }
+            else if (avctx->request_channel_layout & AV_CH_LAYOUT_NATIVE) {
+                static const int8_t dca_channel_order_native[9] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+                s->channel_order_tab = dca_channel_order_native;
+            }
+            s->lfe_index = dca_lfe_index[s->amode];
         } else {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Non standard configuration %d !\n", s->amode);
+            return AVERROR_INVALIDDATA;
+        }
+
+        s->xxch_dmix_embedded = 0;
+    } else {
+        /* we only get here if an XXCH channel set can be added to the mix */
+        channel_mask = s->xxch_core_spkmask;
+
+        if (avctx->request_channels > 0
+            && avctx->request_channels < s->prim_channels) {
             channels = num_core_channels + !!s->lfe;
-            s->xch_present = 0; /* disable further xch processing */
-            if (s->lfe) {
-                avctx->channel_layout |= AV_CH_LOW_FREQUENCY;
-                s->channel_order_tab = dca_channel_reorder_lfe[s->amode];
-            } else
-                s->channel_order_tab = dca_channel_reorder_nolfe[s->amode];
+            for (i = 0; i < s->xxch_chset && channels + s->xxch_chset_nch[i]
+                                              <= avctx->request_channels; i++) {
+                channels += s->xxch_chset_nch[i];
+                channel_mask |= s->xxch_spk_masks[i];
+            }
+        } else {
+            channels = s->prim_channels + !!s->lfe;
+            for (i = 0; i < s->xxch_chset; i++) {
+                channel_mask |= s->xxch_spk_masks[i];
+            }
         }
 
-        if (channels > !!s->lfe &&
-            s->channel_order_tab[channels - 1 - !!s->lfe] < 0)
-            return AVERROR_INVALIDDATA;
+        /* Given the DTS spec'ed channel mask, generate an avcodec version */
+        channel_layout = 0;
+        for (i = 0; i < s->xxch_nbits_spk_mask; ++i) {
+            if (channel_mask & (1 << i)) {
+                channel_layout |= map_xxch_to_native[i];
+            }
+        }
 
-        if (num_core_channels + !!s->lfe > 2 &&
-            avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
-            channels = 2;
-            s->output = s->prim_channels == 2 ? s->amode : DCA_STEREO;
-            avctx->channel_layout = AV_CH_LAYOUT_STEREO;
+        /* make sure that we have managed to get equivalent dts/avcodec channel
+         * masks in some sense -- unfortunately some channels could overlap */
+        if (av_popcount(channel_mask) != av_popcount(channel_layout)) {
+            av_log(avctx, AV_LOG_DEBUG,
+                   "DTS-XXCH: Inconsistent avcodec/dts channel layouts\n");
+            return AVERROR_INVALIDDATA;
+        }
 
-            /* Stereo downmix coefficients
-             *
-             * The decoder can only downmix to 2-channel, so we need to ensure
-             * embedded downmix coefficients are actually targeting 2-channel.
-             */
-            if (s->core_downmix && (s->core_downmix_amode == DCA_STEREO ||
-                                    s->core_downmix_amode == DCA_STEREO_TOTAL)) {
-                for (i = 0; i < num_core_channels + !!s->lfe; i++) {
-                    /* Range checked earlier */
-                    s->downmix_coef[i][0] = dca_dmix_code(s->core_downmix_codes[i][0]);
-                    s->downmix_coef[i][1] = dca_dmix_code(s->core_downmix_codes[i][1]);
-                }
-                s->output = s->core_downmix_amode;
-            } else {
-                int am = s->amode & DCA_CHANNEL_MASK;
-                if (am >= FF_ARRAY_ELEMS(dca_default_coeffs)) {
-                    av_log(s->avctx, AV_LOG_ERROR,
-                           "Invalid channel mode %d\n", am);
-                    return AVERROR_INVALIDDATA;
-                }
-                if (num_core_channels + !!s->lfe >
-                    FF_ARRAY_ELEMS(dca_default_coeffs[0])) {
-                    avpriv_request_sample(s->avctx, "Downmixing %d channels",
-                                          s->prim_channels + !!s->lfe);
-                    return AVERROR_PATCHWELCOME;
-                }
-                for (i = 0; i < num_core_channels + !!s->lfe; i++) {
-                    s->downmix_coef[i][0] = dca_default_coeffs[am][i][0];
-                    s->downmix_coef[i][1] = dca_default_coeffs[am][i][1];
+        avctx->channel_layout = channel_layout;
+
+        if (!(avctx->request_channel_layout & AV_CH_LAYOUT_NATIVE)) {
+            /* Estimate DTS --> avcodec ordering table */
+            for (chset = -1, j = 0; chset < s->xxch_chset; ++chset) {
+                mask = chset >= 0 ? s->xxch_spk_masks[chset]
+                                  : s->xxch_core_spkmask;
+                for (i = 0; i < s->xxch_nbits_spk_mask; i++) {
+                    if (mask & ~(DCA_XXCH_LFE1 | DCA_XXCH_LFE2) & (1 << i)) {
+                        lavc = map_xxch_to_native[i];
+                        posn = av_popcount(channel_layout & (lavc - 1));
+                        s->xxch_order_tab[j++] = posn;
+                    }
                 }
+
             }
-            av_dlog(s->avctx, "Stereo downmix coeffs:\n");
-            for (i = 0; i < num_core_channels + !!s->lfe; i++) {
-                av_dlog(s->avctx, "L, input channel %d = %f\n", i,
-                        s->downmix_coef[i][0]);
-                av_dlog(s->avctx, "R, input channel %d = %f\n", i,
-                        s->downmix_coef[i][1]);
-            }
-            av_dlog(s->avctx, "\n");
+
+            s->lfe_index = av_popcount(channel_layout & (AV_CH_LOW_FREQUENCY-1));
+        } else { /* native ordering */
+            for (i = 0; i < channels; i++)
+                s->xxch_order_tab[i] = i;
+
+            s->lfe_index = channels - 1;
         }
-    } else {
-        av_log(avctx, AV_LOG_ERROR, "Non standard configuration %d !\n", s->amode);
-        return AVERROR_INVALIDDATA;
+
+        s->channel_order_tab = s->xxch_order_tab;
+    }
+
+    if (avctx->channels != channels) {
+        if (avctx->channels)
+            av_log(avctx, AV_LOG_INFO, "Number of channels changed in DCA decoder (%d -> %d)\n", avctx->channels, channels);
+        avctx->channels = channels;
     }
-    avctx->channels = channels;
 
     /* get output buffer */
     frame->nb_samples = 256 * (s->sample_blocks / 8);
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples_flt = (float **)frame->extended_data;
 
     /* allocate buffer for extra channels if downmixing */
@@ -1997,6 +2521,53 @@ FF_ENABLE_DEPRECATION_WARNINGS
             s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
             s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
         }
+
+        /* If stream contains XXCH, we might need to undo an embedded downmix */
+        if (s->xxch_dmix_embedded) {
+            /* Loop over channel sets in turn */
+            ch = num_core_channels;
+            for (chset = 0; chset < s->xxch_chset; chset++) {
+                endch = ch + s->xxch_chset_nch[chset];
+                mask = s->xxch_dmix_embedded;
+
+                /* undo downmix */
+                for (j = ch; j < endch; j++) {
+                    if (mask & (1 << j)) { /* this channel has been mixed-out */
+                        src_chan = s->samples_chanptr[s->channel_order_tab[j]];
+                        for (k = 0; k < endch; k++) {
+                            achan = s->channel_order_tab[k];
+                            scale = s->xxch_dmix_coeff[j][k];
+                            if (scale != 0.0) {
+                                dst_chan = s->samples_chanptr[achan];
+                                s->fdsp.vector_fmac_scalar(dst_chan, src_chan,
+                                                           -scale, 256);
+                            }
+                        }
+                    }
+                }
+
+                /* if a downmix has been embedded then undo the pre-scaling */
+                if ((mask & (1 << ch)) && s->xxch_dmix_sf[chset] != 1.0f) {
+                    scale = s->xxch_dmix_sf[chset];
+
+                    for (j = 0; j < ch; j++) {
+                        src_chan = s->samples_chanptr[s->channel_order_tab[j]];
+                        for (k = 0; k < 256; k++)
+                            src_chan[k] *= scale;
+                    }
+
+                    /* LFE channel is always part of core, scale if it exists */
+                    if (s->lfe) {
+                        src_chan = s->samples_chanptr[s->lfe_index];
+                        for (k = 0; k < 256; k++)
+                            src_chan[k] *= scale;
+                    }
+                }
+
+                ch = endch;
+            }
+
+        }
     }
 
     /* update lfe history */
@@ -2082,6 +2653,7 @@ static const AVClass dca_decoder_class = {
     .item_name  = av_default_item_name,
     .option     = options,
     .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DECODER,
 };
 
 AVCodec ff_dca_decoder = {
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index 18732dc..7d50442 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2004 Gildas Bazin
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index 0fa75a5..abf577b 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -22,7 +22,7 @@
 #include "avfft.h"
 #include "synth_filter.h"
 
-#define DCA_SUBBANDS 32
+#define DCA_SUBBANDS 64
 
 typedef struct DCADSPContext {
     void (*lfe_fir[2])(float *out, const float *in, const float *coefs);
diff --git a/libavcodec/dcaenc.c b/libavcodec/dcaenc.c
new file mode 100644
index 0000000..905cdc9
--- /dev/null
+++ b/libavcodec/dcaenc.c
@@ -0,0 +1,975 @@
+/*
+ * DCA encoder
+ * Copyright (C) 2008-2012 Alexander E. Patrakov
+ *               2010 Benjamin Larsson
+ *               2011 Xiang Wang
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "avcodec.h"
+#include "dca.h"
+#include "dcadata.h"
+#include "dcaenc.h"
+#include "internal.h"
+#include "mathops.h"
+#include "put_bits.h"
+
+#define MAX_CHANNELS 6
+#define DCA_MAX_FRAME_SIZE 16384
+#define DCA_HEADER_SIZE 13
+#define DCA_LFE_SAMPLES 8
+
+#define DCA_SUBBANDS 32
+#define SUBFRAMES 1
+#define SUBSUBFRAMES 2
+#define SUBBAND_SAMPLES (SUBFRAMES * SUBSUBFRAMES * 8)
+#define AUBANDS 25
+
+typedef struct DCAContext {
+    PutBitContext pb;
+    int frame_size;
+    int frame_bits;
+    int fullband_channels;
+    int channels;
+    int lfe_channel;
+    int samplerate_index;
+    int bitrate_index;
+    int channel_config;
+    const int32_t *band_interpolation;
+    const int32_t *band_spectrum;
+    int lfe_scale_factor;
+    softfloat lfe_quant;
+    int32_t lfe_peak_cb;
+
+    int32_t history[512][MAX_CHANNELS]; /* This is a circular buffer */
+    int32_t subband[SUBBAND_SAMPLES][DCA_SUBBANDS][MAX_CHANNELS];
+    int32_t quantized[SUBBAND_SAMPLES][DCA_SUBBANDS][MAX_CHANNELS];
+    int32_t peak_cb[DCA_SUBBANDS][MAX_CHANNELS];
+    int32_t downsampled_lfe[DCA_LFE_SAMPLES];
+    int32_t masking_curve_cb[SUBSUBFRAMES][256];
+    int abits[DCA_SUBBANDS][MAX_CHANNELS];
+    int scale_factor[DCA_SUBBANDS][MAX_CHANNELS];
+    softfloat quant[DCA_SUBBANDS][MAX_CHANNELS];
+    int32_t eff_masking_curve_cb[256];
+    int32_t band_masking_cb[32];
+    int32_t worst_quantization_noise;
+    int32_t worst_noise_ever;
+    int consumed_bits;
+} DCAContext;
+
+static int32_t cos_table[2048];
+static int32_t band_interpolation[2][512];
+static int32_t band_spectrum[2][8];
+static int32_t auf[9][AUBANDS][256];
+static int32_t cb_to_add[256];
+static int32_t cb_to_level[2048];
+static int32_t lfe_fir_64i[512];
+
+/* Transfer function of outer and middle ear, Hz -> dB */
+static double hom(double f)
+{
+    double f1 = f / 1000;
+
+    return -3.64 * pow(f1, -0.8)
+           + 6.8 * exp(-0.6 * (f1 - 3.4) * (f1 - 3.4))
+           - 6.0 * exp(-0.15 * (f1 - 8.7) * (f1 - 8.7))
+           - 0.0006 * (f1 * f1) * (f1 * f1);
+}
+
+static double gammafilter(int i, double f)
+{
+    double h = (f - fc[i]) / erb[i];
+
+    h = 1 + h * h;
+    h = 1 / (h * h);
+    return 20 * log10(h);
+}
+
+static int encode_init(AVCodecContext *avctx)
+{
+    DCAContext *c = avctx->priv_data;
+    uint64_t layout = avctx->channel_layout;
+    int i, min_frame_bits;
+
+    c->fullband_channels = c->channels = avctx->channels;
+    c->lfe_channel = (avctx->channels == 3 || avctx->channels == 6);
+    c->band_interpolation = band_interpolation[1];
+    c->band_spectrum = band_spectrum[1];
+    c->worst_quantization_noise = -2047;
+    c->worst_noise_ever = -2047;
+
+    if (!layout) {
+        av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The "
+                                      "encoder will guess the layout, but it "
+                                      "might be incorrect.\n");
+        layout = av_get_default_channel_layout(avctx->channels);
+    }
+    switch (layout) {
+    case AV_CH_LAYOUT_MONO:         c->channel_config = 0; break;
+    case AV_CH_LAYOUT_STEREO:       c->channel_config = 2; break;
+    case AV_CH_LAYOUT_2_2:          c->channel_config = 8; break;
+    case AV_CH_LAYOUT_5POINT0:      c->channel_config = 9; break;
+    case AV_CH_LAYOUT_5POINT1:      c->channel_config = 9; break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported channel layout!\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    if (c->lfe_channel)
+        c->fullband_channels--;
+
+    for (i = 0; i < 9; i++) {
+        if (sample_rates[i] == avctx->sample_rate)
+            break;
+    }
+    if (i == 9)
+        return AVERROR(EINVAL);
+    c->samplerate_index = i;
+
+    if (avctx->bit_rate < 32000 || avctx->bit_rate > 3840000) {
+        av_log(avctx, AV_LOG_ERROR, "Bit rate %i not supported.", avctx->bit_rate);
+        return AVERROR(EINVAL);
+    }
+    for (i = 0; dca_bit_rates[i] < avctx->bit_rate; i++)
+        ;
+    c->bitrate_index = i;
+    avctx->bit_rate = dca_bit_rates[i];
+    c->frame_bits = FFALIGN((avctx->bit_rate * 512 + avctx->sample_rate - 1) / avctx->sample_rate, 32);
+    min_frame_bits = 132 + (493 + 28 * 32) * c->fullband_channels + c->lfe_channel * 72;
+    if (c->frame_bits < min_frame_bits || c->frame_bits > (DCA_MAX_FRAME_SIZE << 3))
+        return AVERROR(EINVAL);
+
+    c->frame_size = (c->frame_bits + 7) / 8;
+
+    avctx->frame_size = 32 * SUBBAND_SAMPLES;
+
+    if (!cos_table[0]) {
+        int j, k;
+
+        for (i = 0; i < 2048; i++) {
+            cos_table[i]   = (int32_t)(0x7fffffff * cos(M_PI * i / 1024));
+            cb_to_level[i] = (int32_t)(0x7fffffff * pow(10, -0.005 * i));
+        }
+
+        /* FIXME: probably incorrect */
+        for (i = 0; i < 256; i++) {
+            lfe_fir_64i[i] = (int32_t)(0x01ffffff * lfe_fir_64[i]);
+            lfe_fir_64i[511 - i] = (int32_t)(0x01ffffff * lfe_fir_64[i]);
+        }
+
+        for (i = 0; i < 512; i++) {
+            band_interpolation[0][i] = (int32_t)(0x1000000000ULL * fir_32bands_perfect[i]);
+            band_interpolation[1][i] = (int32_t)(0x1000000000ULL * fir_32bands_nonperfect[i]);
+        }
+
+        for (i = 0; i < 9; i++) {
+            for (j = 0; j < AUBANDS; j++) {
+                for (k = 0; k < 256; k++) {
+                    double freq = sample_rates[i] * (k + 0.5) / 512;
+
+                    auf[i][j][k] = (int32_t)(10 * (hom(freq) + gammafilter(j, freq)));
+                }
+            }
+        }
+
+        for (i = 0; i < 256; i++) {
+            double add = 1 + pow(10, -0.01 * i);
+            cb_to_add[i] = (int32_t)(100 * log10(add));
+        }
+        for (j = 0; j < 8; j++) {
+            double accum = 0;
+            for (i = 0; i < 512; i++) {
+                double reconst = fir_32bands_perfect[i] * ((i & 64) ? (-1) : 1);
+                accum += reconst * cos(2 * M_PI * (i + 0.5 - 256) * (j + 0.5) / 512);
+            }
+            band_spectrum[0][j] = (int32_t)(200 * log10(accum));
+        }
+        for (j = 0; j < 8; j++) {
+            double accum = 0;
+            for (i = 0; i < 512; i++) {
+                double reconst = fir_32bands_nonperfect[i] * ((i & 64) ? (-1) : 1);
+                accum += reconst * cos(2 * M_PI * (i + 0.5 - 256) * (j + 0.5) / 512);
+            }
+            band_spectrum[1][j] = (int32_t)(200 * log10(accum));
+        }
+    }
+    return 0;
+}
+
+static inline int32_t cos_t(int x)
+{
+    return cos_table[x & 2047];
+}
+
+static inline int32_t sin_t(int x)
+{
+    return cos_t(x - 512);
+}
+
+static inline int32_t half32(int32_t a)
+{
+    return (a + 1) >> 1;
+}
+
+static inline int32_t mul32(int32_t a, int32_t b)
+{
+    int64_t r = (int64_t)a * b + 0x80000000ULL;
+    return r >> 32;
+}
+
+static void subband_transform(DCAContext *c, const int32_t *input)
+{
+    int ch, subs, i, k, j;
+
+    for (ch = 0; ch < c->fullband_channels; ch++) {
+        /* History is copied because it is also needed for PSY */
+        int32_t hist[512];
+        int hist_start = 0;
+
+        for (i = 0; i < 512; i++)
+            hist[i] = c->history[i][ch];
+
+        for (subs = 0; subs < SUBBAND_SAMPLES; subs++) {
+            int32_t accum[64];
+            int32_t resp;
+            int band;
+
+            /* Calculate the convolutions at once */
+            for (i = 0; i < 64; i++)
+                accum[i] = 0;
+
+            for (k = 0, i = hist_start, j = 0;
+                    i < 512; k = (k + 1) & 63, i++, j++)
+                accum[k] += mul32(hist[i], c->band_interpolation[j]);
+            for (i = 0; i < hist_start; k = (k + 1) & 63, i++, j++)
+                accum[k] += mul32(hist[i], c->band_interpolation[j]);
+
+            for (k = 16; k < 32; k++)
+                accum[k] = accum[k] - accum[31 - k];
+            for (k = 32; k < 48; k++)
+                accum[k] = accum[k] + accum[95 - k];
+
+            for (band = 0; band < 32; band++) {
+                resp = 0;
+                for (i = 16; i < 48; i++) {
+                    int s = (2 * band + 1) * (2 * (i + 16) + 1);
+                    resp += mul32(accum[i], cos_t(s << 3)) >> 3;
+                }
+
+                c->subband[subs][band][ch] = ((band + 1) & 2) ? -resp : resp;
+            }
+
+            /* Copy in 32 new samples from input */
+            for (i = 0; i < 32; i++)
+                hist[i + hist_start] = input[(subs * 32 + i) * c->channels + ch];
+            hist_start = (hist_start + 32) & 511;
+        }
+    }
+}
+
+static void lfe_downsample(DCAContext *c, const int32_t *input)
+{
+    /* FIXME: make 128x LFE downsampling possible */
+    int i, j, lfes;
+    int32_t hist[512];
+    int32_t accum;
+    int hist_start = 0;
+
+    for (i = 0; i < 512; i++)
+        hist[i] = c->history[i][c->channels - 1];
+
+    for (lfes = 0; lfes < DCA_LFE_SAMPLES; lfes++) {
+        /* Calculate the convolution */
+        accum = 0;
+
+        for (i = hist_start, j = 0; i < 512; i++, j++)
+            accum += mul32(hist[i], lfe_fir_64i[j]);
+        for (i = 0; i < hist_start; i++, j++)
+            accum += mul32(hist[i], lfe_fir_64i[j]);
+
+        c->downsampled_lfe[lfes] = accum;
+
+        /* Copy in 64 new samples from input */
+        for (i = 0; i < 64; i++)
+            hist[i + hist_start] = input[(lfes * 64 + i) * c->channels + c->channels - 1];
+
+        hist_start = (hist_start + 64) & 511;
+    }
+}
+
+typedef struct {
+    int32_t re;
+    int32_t im;
+} cplx32;
+
+static void fft(const int32_t in[2 * 256], cplx32 out[256])
+{
+    cplx32 buf[256], rin[256], rout[256];
+    int i, j, k, l;
+
+    /* do two transforms in parallel */
+    for (i = 0; i < 256; i++) {
+        /* Apply the Hann window */
+        rin[i].re = mul32(in[2 * i], 0x3fffffff - (cos_t(8 * i + 2) >> 1));
+        rin[i].im = mul32(in[2 * i + 1], 0x3fffffff - (cos_t(8 * i + 6) >> 1));
+    }
+    /* pre-rotation */
+    for (i = 0; i < 256; i++) {
+        buf[i].re = mul32(cos_t(4 * i + 2), rin[i].re)
+                  - mul32(sin_t(4 * i + 2), rin[i].im);
+        buf[i].im = mul32(cos_t(4 * i + 2), rin[i].im)
+                  + mul32(sin_t(4 * i + 2), rin[i].re);
+    }
+
+    for (j = 256, l = 1; j != 1; j >>= 1, l <<= 1) {
+        for (k = 0; k < 256; k += j) {
+            for (i = k; i < k + j / 2; i++) {
+                cplx32 sum, diff;
+                int t = 8 * l * i;
+
+                sum.re = buf[i].re + buf[i + j / 2].re;
+                sum.im = buf[i].im + buf[i + j / 2].im;
+
+                diff.re = buf[i].re - buf[i + j / 2].re;
+                diff.im = buf[i].im - buf[i + j / 2].im;
+
+                buf[i].re = half32(sum.re);
+                buf[i].im = half32(sum.im);
+
+                buf[i + j / 2].re = mul32(diff.re, cos_t(t))
+                                  - mul32(diff.im, sin_t(t));
+                buf[i + j / 2].im = mul32(diff.im, cos_t(t))
+                                  + mul32(diff.re, sin_t(t));
+            }
+        }
+    }
+    /* post-rotation */
+    for (i = 0; i < 256; i++) {
+        int b = ff_reverse[i];
+        rout[i].re = mul32(buf[b].re, cos_t(4 * i))
+                   - mul32(buf[b].im, sin_t(4 * i));
+        rout[i].im = mul32(buf[b].im, cos_t(4 * i))
+                   + mul32(buf[b].re, sin_t(4 * i));
+    }
+    for (i = 0; i < 256; i++) {
+        /* separate the results of the two transforms */
+        cplx32 o1, o2;
+
+        o1.re =  rout[i].re - rout[255 - i].re;
+        o1.im =  rout[i].im + rout[255 - i].im;
+
+        o2.re =  rout[i].im - rout[255 - i].im;
+        o2.im = -rout[i].re - rout[255 - i].re;
+
+        /* combine them into one long transform */
+        out[i].re = mul32( o1.re + o2.re, cos_t(2 * i + 1))
+                  + mul32( o1.im - o2.im, sin_t(2 * i + 1));
+        out[i].im = mul32( o1.im + o2.im, cos_t(2 * i + 1))
+                  + mul32(-o1.re + o2.re, sin_t(2 * i + 1));
+    }
+}
+
+static int32_t get_cb(int32_t in)
+{
+    int i, res;
+
+    res = 0;
+    if (in < 0)
+        in = -in;
+    for (i = 1024; i > 0; i >>= 1) {
+        if (cb_to_level[i + res] >= in)
+            res += i;
+    }
+    return -res;
+}
+
+static int32_t add_cb(int32_t a, int32_t b)
+{
+    if (a < b)
+        FFSWAP(int32_t, a, b);
+
+    if (a - b >= 256)
+        return a;
+    return a + cb_to_add[a - b];
+}
+
+static void adjust_jnd(int samplerate_index,
+                       const int32_t in[512], int32_t out_cb[256])
+{
+    int32_t power[256];
+    cplx32 out[256];
+    int32_t out_cb_unnorm[256];
+    int32_t denom;
+    const int32_t ca_cb = -1114;
+    const int32_t cs_cb = 928;
+    int i, j;
+
+    fft(in, out);
+
+    for (j = 0; j < 256; j++) {
+        power[j] = add_cb(get_cb(out[j].re), get_cb(out[j].im));
+        out_cb_unnorm[j] = -2047; /* and can only grow */
+    }
+
+    for (i = 0; i < AUBANDS; i++) {
+        denom = ca_cb; /* and can only grow */
+        for (j = 0; j < 256; j++)
+            denom = add_cb(denom, power[j] + auf[samplerate_index][i][j]);
+        for (j = 0; j < 256; j++)
+            out_cb_unnorm[j] = add_cb(out_cb_unnorm[j],
+                    -denom + auf[samplerate_index][i][j]);
+    }
+
+    for (j = 0; j < 256; j++)
+        out_cb[j] = add_cb(out_cb[j], -out_cb_unnorm[j] - ca_cb - cs_cb);
+}
+
+typedef void (*walk_band_t)(DCAContext *c, int band1, int band2, int f,
+                            int32_t spectrum1, int32_t spectrum2, int channel,
+                            int32_t * arg);
+
+static void walk_band_low(DCAContext *c, int band, int channel,
+                          walk_band_t walk, int32_t *arg)
+{
+    int f;
+
+    if (band == 0) {
+        for (f = 0; f < 4; f++)
+            walk(c, 0, 0, f, 0, -2047, channel, arg);
+    } else {
+        for (f = 0; f < 8; f++)
+            walk(c, band, band - 1, 8 * band - 4 + f,
+                    c->band_spectrum[7 - f], c->band_spectrum[f], channel, arg);
+    }
+}
+
+static void walk_band_high(DCAContext *c, int band, int channel,
+                           walk_band_t walk, int32_t *arg)
+{
+    int f;
+
+    if (band == 31) {
+        for (f = 0; f < 4; f++)
+            walk(c, 31, 31, 256 - 4 + f, 0, -2047, channel, arg);
+    } else {
+        for (f = 0; f < 8; f++)
+            walk(c, band, band + 1, 8 * band + 4 + f,
+                    c->band_spectrum[f], c->band_spectrum[7 - f], channel, arg);
+    }
+}
+
+static void update_band_masking(DCAContext *c, int band1, int band2,
+                                int f, int32_t spectrum1, int32_t spectrum2,
+                                int channel, int32_t * arg)
+{
+    int32_t value = c->eff_masking_curve_cb[f] - spectrum1;
+
+    if (value < c->band_masking_cb[band1])
+        c->band_masking_cb[band1] = value;
+}
+
+static void calc_masking(DCAContext *c, const int32_t *input)
+{
+    int i, k, band, ch, ssf;
+    int32_t data[512];
+
+    for (i = 0; i < 256; i++)
+        for (ssf = 0; ssf < SUBSUBFRAMES; ssf++)
+            c->masking_curve_cb[ssf][i] = -2047;
+
+    for (ssf = 0; ssf < SUBSUBFRAMES; ssf++)
+        for (ch = 0; ch < c->fullband_channels; ch++) {
+            for (i = 0, k = 128 + 256 * ssf; k < 512; i++, k++)
+                data[i] = c->history[k][ch];
+            for (k -= 512; i < 512; i++, k++)
+                data[i] = input[k * c->channels + ch];
+            adjust_jnd(c->samplerate_index, data, c->masking_curve_cb[ssf]);
+        }
+    for (i = 0; i < 256; i++) {
+        int32_t m = 2048;
+
+        for (ssf = 0; ssf < SUBSUBFRAMES; ssf++)
+            if (c->masking_curve_cb[ssf][i] < m)
+                m = c->masking_curve_cb[ssf][i];
+        c->eff_masking_curve_cb[i] = m;
+    }
+
+    for (band = 0; band < 32; band++) {
+        c->band_masking_cb[band] = 2048;
+        walk_band_low(c, band, 0, update_band_masking, NULL);
+        walk_band_high(c, band, 0, update_band_masking, NULL);
+    }
+}
+
+static void find_peaks(DCAContext *c)
+{
+    int band, ch;
+
+    for (band = 0; band < 32; band++)
+        for (ch = 0; ch < c->fullband_channels; ch++) {
+            int sample;
+            int32_t m = 0;
+
+            for (sample = 0; sample < SUBBAND_SAMPLES; sample++) {
+                int32_t s = abs(c->subband[sample][band][ch]);
+                if (m < s)
+                    m = s;
+            }
+            c->peak_cb[band][ch] = get_cb(m);
+        }
+
+    if (c->lfe_channel) {
+        int sample;
+        int32_t m = 0;
+
+        for (sample = 0; sample < DCA_LFE_SAMPLES; sample++)
+            if (m < abs(c->downsampled_lfe[sample]))
+                m = abs(c->downsampled_lfe[sample]);
+        c->lfe_peak_cb = get_cb(m);
+    }
+}
+
+static const int snr_fudge = 128;
+#define USED_1ABITS 1
+#define USED_NABITS 2
+#define USED_26ABITS 4
+
+static int init_quantization_noise(DCAContext *c, int noise)
+{
+    int ch, band, ret = 0;
+
+    c->consumed_bits = 132 + 493 * c->fullband_channels;
+    if (c->lfe_channel)
+        c->consumed_bits += 72;
+
+    /* attempt to guess the bit distribution based on the prevoius frame */
+    for (ch = 0; ch < c->fullband_channels; ch++) {
+        for (band = 0; band < 32; band++) {
+            int snr_cb = c->peak_cb[band][ch] - c->band_masking_cb[band] - noise;
+
+            if (snr_cb >= 1312) {
+                c->abits[band][ch] = 26;
+                ret |= USED_26ABITS;
+            } else if (snr_cb >= 222) {
+                c->abits[band][ch] = 8 + mul32(snr_cb - 222, 69000000);
+                ret |= USED_NABITS;
+            } else if (snr_cb >= 0) {
+                c->abits[band][ch] = 2 + mul32(snr_cb, 106000000);
+                ret |= USED_NABITS;
+            } else {
+                c->abits[band][ch] = 1;
+                ret |= USED_1ABITS;
+            }
+        }
+    }
+
+    for (band = 0; band < 32; band++)
+        for (ch = 0; ch < c->fullband_channels; ch++) {
+            c->consumed_bits += bit_consumption[c->abits[band][ch]];
+        }
+
+    return ret;
+}
+
+static void assign_bits(DCAContext *c)
+{
+    /* Find the bounds where the binary search should work */
+    int low, high, down;
+    int used_abits = 0;
+
+    init_quantization_noise(c, c->worst_quantization_noise);
+    low = high = c->worst_quantization_noise;
+    if (c->consumed_bits > c->frame_bits) {
+        while (c->consumed_bits > c->frame_bits) {
+            av_assert0(used_abits != USED_1ABITS);
+            low = high;
+            high += snr_fudge;
+            used_abits = init_quantization_noise(c, high);
+        }
+    } else {
+        while (c->consumed_bits <= c->frame_bits) {
+            high = low;
+            if (used_abits == USED_26ABITS)
+                goto out; /* The requested bitrate is too high, pad with zeros */
+            low -= snr_fudge;
+            used_abits = init_quantization_noise(c, low);
+        }
+    }
+
+    /* Now do a binary search between low and high to see what fits */
+    for (down = snr_fudge >> 1; down; down >>= 1) {
+        init_quantization_noise(c, high - down);
+        if (c->consumed_bits <= c->frame_bits)
+            high -= down;
+    }
+    init_quantization_noise(c, high);
+out:
+    c->worst_quantization_noise = high;
+    if (high > c->worst_noise_ever)
+        c->worst_noise_ever = high;
+}
+
+static void shift_history(DCAContext *c, const int32_t *input)
+{
+    int k, ch;
+
+    for (k = 0; k < 512; k++)
+        for (ch = 0; ch < c->channels; ch++)
+            c->history[k][ch] = input[k * c->channels + ch];
+}
+
+static int32_t quantize_value(int32_t value, softfloat quant)
+{
+    int32_t offset = 1 << (quant.e - 1);
+
+    value = mul32(value, quant.m) + offset;
+    value = value >> quant.e;
+    return value;
+}
+
+static int calc_one_scale(int32_t peak_cb, int abits, softfloat *quant)
+{
+    int32_t peak;
+    int our_nscale, try_remove;
+    softfloat our_quant;
+
+    av_assert0(peak_cb <= 0);
+    av_assert0(peak_cb >= -2047);
+
+    our_nscale = 127;
+    peak = cb_to_level[-peak_cb];
+
+    for (try_remove = 64; try_remove > 0; try_remove >>= 1) {
+        if (scalefactor_inv[our_nscale - try_remove].e + stepsize_inv[abits].e <= 17)
+            continue;
+        our_quant.m = mul32(scalefactor_inv[our_nscale - try_remove].m, stepsize_inv[abits].m);
+        our_quant.e = scalefactor_inv[our_nscale - try_remove].e + stepsize_inv[abits].e - 17;
+        if ((quant_levels[abits] - 1) / 2 < quantize_value(peak, our_quant))
+            continue;
+        our_nscale -= try_remove;
+    }
+
+    if (our_nscale >= 125)
+        our_nscale = 124;
+
+    quant->m = mul32(scalefactor_inv[our_nscale].m, stepsize_inv[abits].m);
+    quant->e = scalefactor_inv[our_nscale].e + stepsize_inv[abits].e - 17;
+    av_assert0((quant_levels[abits] - 1) / 2 >= quantize_value(peak, *quant));
+
+    return our_nscale;
+}
+
+static void calc_scales(DCAContext *c)
+{
+    int band, ch;
+
+    for (band = 0; band < 32; band++)
+        for (ch = 0; ch < c->fullband_channels; ch++)
+            c->scale_factor[band][ch] = calc_one_scale(c->peak_cb[band][ch],
+                                                       c->abits[band][ch],
+                                                       &c->quant[band][ch]);
+
+    if (c->lfe_channel)
+        c->lfe_scale_factor = calc_one_scale(c->lfe_peak_cb, 11, &c->lfe_quant);
+}
+
+static void quantize_all(DCAContext *c)
+{
+    int sample, band, ch;
+
+    for (sample = 0; sample < SUBBAND_SAMPLES; sample++)
+        for (band = 0; band < 32; band++)
+            for (ch = 0; ch < c->fullband_channels; ch++)
+                c->quantized[sample][band][ch] = quantize_value(c->subband[sample][band][ch], c->quant[band][ch]);
+}
+
+static void put_frame_header(DCAContext *c)
+{
+    /* SYNC */
+    put_bits(&c->pb, 16, 0x7ffe);
+    put_bits(&c->pb, 16, 0x8001);
+
+    /* Frame type: normal */
+    put_bits(&c->pb, 1, 1);
+
+    /* Deficit sample count: none */
+    put_bits(&c->pb, 5, 31);
+
+    /* CRC is not present */
+    put_bits(&c->pb, 1, 0);
+
+    /* Number of PCM sample blocks */
+    put_bits(&c->pb, 7, SUBBAND_SAMPLES - 1);
+
+    /* Primary frame byte size */
+    put_bits(&c->pb, 14, c->frame_size - 1);
+
+    /* Audio channel arrangement */
+    put_bits(&c->pb, 6, c->channel_config);
+
+    /* Core audio sampling frequency */
+    put_bits(&c->pb, 4, bitstream_sfreq[c->samplerate_index]);
+
+    /* Transmission bit rate */
+    put_bits(&c->pb, 5, c->bitrate_index);
+
+    /* Embedded down mix: disabled */
+    put_bits(&c->pb, 1, 0);
+
+    /* Embedded dynamic range flag: not present */
+    put_bits(&c->pb, 1, 0);
+
+    /* Embedded time stamp flag: not present */
+    put_bits(&c->pb, 1, 0);
+
+    /* Auxiliary data flag: not present */
+    put_bits(&c->pb, 1, 0);
+
+    /* HDCD source: no */
+    put_bits(&c->pb, 1, 0);
+
+    /* Extension audio ID: N/A */
+    put_bits(&c->pb, 3, 0);
+
+    /* Extended audio data: not present */
+    put_bits(&c->pb, 1, 0);
+
+    /* Audio sync word insertion flag: after each sub-frame */
+    put_bits(&c->pb, 1, 0);
+
+    /* Low frequency effects flag: not present or 64x subsampling */
+    put_bits(&c->pb, 2, c->lfe_channel ? 2 : 0);
+
+    /* Predictor history switch flag: on */
+    put_bits(&c->pb, 1, 1);
+
+    /* No CRC */
+    /* Multirate interpolator switch: non-perfect reconstruction */
+    put_bits(&c->pb, 1, 0);
+
+    /* Encoder software revision: 7 */
+    put_bits(&c->pb, 4, 7);
+
+    /* Copy history: 0 */
+    put_bits(&c->pb, 2, 0);
+
+    /* Source PCM resolution: 16 bits, not DTS ES */
+    put_bits(&c->pb, 3, 0);
+
+    /* Front sum/difference coding: no */
+    put_bits(&c->pb, 1, 0);
+
+    /* Surrounds sum/difference coding: no */
+    put_bits(&c->pb, 1, 0);
+
+    /* Dialog normalization: 0 dB */
+    put_bits(&c->pb, 4, 0);
+}
+
+static void put_primary_audio_header(DCAContext *c)
+{
+    static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
+    static const int thr[11]    = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
+
+    int ch, i;
+    /* Number of subframes */
+    put_bits(&c->pb, 4, SUBFRAMES - 1);
+
+    /* Number of primary audio channels */
+    put_bits(&c->pb, 3, c->fullband_channels - 1);
+
+    /* Subband activity count */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        put_bits(&c->pb, 5, DCA_SUBBANDS - 2);
+
+    /* High frequency VQ start subband */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        put_bits(&c->pb, 5, DCA_SUBBANDS - 1);
+
+    /* Joint intensity coding index: 0, 0 */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        put_bits(&c->pb, 3, 0);
+
+    /* Transient mode codebook: A4, A4 (arbitrary) */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        put_bits(&c->pb, 2, 0);
+
+    /* Scale factor code book: 7 bit linear, 7-bit sqrt table (for each channel) */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        put_bits(&c->pb, 3, 6);
+
+    /* Bit allocation quantizer select: linear 5-bit */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        put_bits(&c->pb, 3, 6);
+
+    /* Quantization index codebook select: dummy data
+       to avoid transmission of scale factor adjustment */
+    for (i = 1; i < 11; i++)
+        for (ch = 0; ch < c->fullband_channels; ch++)
+            put_bits(&c->pb, bitlen[i], thr[i]);
+
+    /* Scale factor adjustment index: not transmitted */
+    /* Audio header CRC check word: not transmitted */
+}
+
+static void put_subframe_samples(DCAContext *c, int ss, int band, int ch)
+{
+    if (c->abits[band][ch] <= 7) {
+        int sum, i, j;
+        for (i = 0; i < 8; i += 4) {
+            sum = 0;
+            for (j = 3; j >= 0; j--) {
+                sum *= quant_levels[c->abits[band][ch]];
+                sum += c->quantized[ss * 8 + i + j][band][ch];
+                sum += (quant_levels[c->abits[band][ch]] - 1) / 2;
+            }
+            put_bits(&c->pb, bit_consumption[c->abits[band][ch]] / 4, sum);
+        }
+    } else {
+        int i;
+        for (i = 0; i < 8; i++) {
+            int bits = bit_consumption[c->abits[band][ch]] / 16;
+            int32_t mask = (1 << bits) - 1;
+            put_bits(&c->pb, bits, c->quantized[ss * 8 + i][band][ch] & mask);
+        }
+    }
+}
+
+static void put_subframe(DCAContext *c, int subframe)
+{
+    int i, band, ss, ch;
+
+    /* Subsubframes count */
+    put_bits(&c->pb, 2, SUBSUBFRAMES -1);
+
+    /* Partial subsubframe sample count: dummy */
+    put_bits(&c->pb, 3, 0);
+
+    /* Prediction mode: no ADPCM, in each channel and subband */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        for (band = 0; band < DCA_SUBBANDS; band++)
+            put_bits(&c->pb, 1, 0);
+
+    /* Prediction VQ address: not transmitted */
+    /* Bit allocation index */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        for (band = 0; band < DCA_SUBBANDS; band++)
+            put_bits(&c->pb, 5, c->abits[band][ch]);
+
+    if (SUBSUBFRAMES > 1) {
+        /* Transition mode: none for each channel and subband */
+        for (ch = 0; ch < c->fullband_channels; ch++)
+            for (band = 0; band < DCA_SUBBANDS; band++)
+                put_bits(&c->pb, 1, 0); /* codebook A4 */
+    }
+
+    /* Scale factors */
+    for (ch = 0; ch < c->fullband_channels; ch++)
+        for (band = 0; band < DCA_SUBBANDS; band++)
+            put_bits(&c->pb, 7, c->scale_factor[band][ch]);
+
+    /* Joint subband scale factor codebook select: not transmitted */
+    /* Scale factors for joint subband coding: not transmitted */
+    /* Stereo down-mix coefficients: not transmitted */
+    /* Dynamic range coefficient: not transmitted */
+    /* Stde information CRC check word: not transmitted */
+    /* VQ encoded high frequency subbands: not transmitted */
+
+    /* LFE data: 8 samples and scalefactor */
+    if (c->lfe_channel) {
+        for (i = 0; i < DCA_LFE_SAMPLES; i++)
+            put_bits(&c->pb, 8, quantize_value(c->downsampled_lfe[i], c->lfe_quant) & 0xff);
+        put_bits(&c->pb, 8, c->lfe_scale_factor);
+    }
+
+    /* Audio data (subsubframes) */
+    for (ss = 0; ss < SUBSUBFRAMES ; ss++)
+        for (ch = 0; ch < c->fullband_channels; ch++)
+            for (band = 0; band < DCA_SUBBANDS; band++)
+                    put_subframe_samples(c, ss, band, ch);
+
+    /* DSYNC */
+    put_bits(&c->pb, 16, 0xffff);
+}
+
+static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                        const AVFrame *frame, int *got_packet_ptr)
+{
+    DCAContext *c = avctx->priv_data;
+    const int32_t *samples;
+    int ret, i;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, c->frame_size )) < 0)
+        return ret;
+
+    samples = (const int32_t *)frame->data[0];
+
+    subband_transform(c, samples);
+    if (c->lfe_channel)
+        lfe_downsample(c, samples);
+
+    calc_masking(c, samples);
+    find_peaks(c);
+    assign_bits(c);
+    calc_scales(c);
+    quantize_all(c);
+    shift_history(c, samples);
+
+    init_put_bits(&c->pb, avpkt->data, avpkt->size);
+    put_frame_header(c);
+    put_primary_audio_header(c);
+    for (i = 0; i < SUBFRAMES; i++)
+        put_subframe(c, i);
+
+    flush_put_bits(&c->pb);
+
+    avpkt->pts      = frame->pts;
+    avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
+    avpkt->size     = c->frame_size + 1;
+    *got_packet_ptr = 1;
+    return 0;
+}
+
+static const AVCodecDefault defaults[] = {
+    { "b",          "1411200" },
+    { NULL },
+};
+
+AVCodec ff_dca_encoder = {
+    .name                  = "dca",
+    .long_name             = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_DTS,
+    .priv_data_size        = sizeof(DCAContext),
+    .init                  = encode_init,
+    .encode2               = encode_frame,
+    .capabilities          = CODEC_CAP_EXPERIMENTAL,
+    .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32,
+                                                            AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = sample_rates,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO,
+                                                  AV_CH_LAYOUT_2_2,
+                                                  AV_CH_LAYOUT_5POINT0,
+                                                  AV_CH_LAYOUT_5POINT1,
+                                                  0 },
+    .defaults              = defaults,
+};
diff --git a/libavcodec/dcaenc.h b/libavcodec/dcaenc.h
new file mode 100644
index 0000000..20f557b
--- /dev/null
+++ b/libavcodec/dcaenc.h
@@ -0,0 +1,113 @@
+/*
+ * DCA encoder tables
+ * Copyright (C) 2008-2012 Alexander E. Patrakov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCAENC_H
+#define AVCODEC_DCAENC_H
+
+#include <stdint.h>
+
+typedef struct {
+    int32_t m;
+    int32_t e;
+} softfloat;
+
+static const int sample_rates[] = {
+    8000, 16000, 32000, 11025, 22050, 44100, 12000, 24000, 48000, 0,
+};
+
+static const uint8_t bitstream_sfreq[] = { 1, 2, 3, 6, 7, 8, 11, 12, 13 };
+
+/* Auditory filter center frequencies and bandwidths, in Hz.
+ * The last two are made up, because there is no scientific data.
+ */
+static uint16_t fc[] = {
+    50, 150, 250, 350, 450, 570, 700, 840, 1000, 1170, 1370, 1600, 1850, 2150,
+    2500, 2900, 3400, 4000, 4800, 5800, 7000, 8500, 10500, 13500, 17000
+};
+
+static uint16_t erb[] = {
+    80, 100, 100, 100, 110, 120, 140, 150, 160, 190, 210, 240, 280,
+    320, 380, 450, 550, 700, 900, 1100, 1300, 1800, 2500, 3500, 4500
+};
+
+static const softfloat stepsize_inv[27] = {
+    {0, 0}, {1342177360, 21}, {2147483647, 21}, {1342177360, 20},
+    {1819901661, 20}, {2147483647, 20}, {1278263843, 19}, {1579032492, 19},
+    {1412817763, 18}, {1220162327, 17}, {1118482133, 16}, {1917391412, 16},
+    {1766017772, 15}, {1525212826, 14}, {1290553940, 13}, {2097179000, 13},
+    {1677683200, 12}, {1497972244, 11}, {1310893147, 10}, {1165354136, 9},
+    {1748031204, 9}, {1542092044, 8}, {1636178017, 7}, {1636178017, 6},
+    {1636178017, 5}, {1636178017, 4}, {1636178017, 3},
+};
+
+static const softfloat scalefactor_inv[128] = {
+    {2147483647, 1}, {2147483647, 1}, {2147483647, 2}, {2147483647, 2},
+    {2147483647, 2}, {2147483647, 2}, {1431655765, 2}, {1431655765, 2},
+    {1431655765, 2}, {2147483647, 3}, {2147483647, 3}, {1717986918, 3},
+    {1431655765, 3}, {1227133513, 3}, {1227133513, 3}, {2147483647, 4},
+    {1717986918, 4}, {1561806289, 4}, {1431655765, 4}, {1227133513, 4},
+    {2147483647, 5}, {1908874353, 5}, {1717986918, 5}, {1493901668, 5},
+    {1321528398, 5}, {1145324612, 5}, {2021161080, 6}, {1808407282, 6},
+    {1561806289, 6}, {1374389534, 6}, {1227133513, 6}, {2147483647, 7},
+    {1908874353, 7}, {1676084798, 7}, {1477838209, 7}, {1296593900, 7},
+    {1145324612, 7}, {2021161080, 8}, {1773405851, 8}, {1561806289, 8},
+    {1374389534, 8}, {1216273924, 8}, {2139127680, 9}, {1882725390, 9},
+    {1660893697, 9}, {1462116526, 9}, {1287484341, 9}, {1135859119, 9},
+    {1999112050, 10}, {1762037865, 10}, {1552982525, 10}, {1367551775, 10},
+    {1205604855, 10}, {2124660150, 11}, {1871509153, 11}, {1648443220, 11},
+    {1452459217, 11}, {1279990253, 11}, {1127704233, 11}, {1987368509, 12},
+    {1750814693, 12}, {1542632939, 12}, {1359099663, 12}, {1197398995, 12},
+    {2109880792, 13}, {1858853132, 13}, {1638006149, 13}, {1443165385, 13},
+    {1271479187, 13}, {1120235993, 13}, {1973767086, 14}, {1739045674, 14},
+    {1532153461, 14}, {1349922194, 14}, {1189384493, 14}, {2095804865, 15},
+    {1846464029, 15}, {1626872524, 15}, {1433347133, 15}, {1262853884, 15},
+    {1112619678, 15}, {1960569045, 16}, {1727349015, 16}, {1521881227, 16},
+    {1340842289, 16}, {1181357555, 16}, {2081669156, 17}, {1834047752, 17},
+    {1615889229, 17}, {1423675973, 17}, {1254322457, 17}, {1105123583, 17},
+    {1947330755, 18}, {1715693602, 18}, {1511607799, 18}, {1331801790, 18},
+    {1173384427, 18}, {2067616532, 19}, {1821667648, 19}, {1604980024, 19},
+    {1414066955, 19}, {1245861410, 19}, {1097665748, 19}, {1934193616, 20},
+    {1704119624, 20}, {1501412075, 20}, {1322817107, 20}, {1165466323, 20},
+    {2053666205, 21}, {1809379407, 21}, {1594151671, 21}, {1404526328, 21},
+    {1237455941, 21}, {1090259329, 21}, {1921143210, 22}, {1692621231, 22},
+    {1491281857, 22}, {1313892269, 22}, {1157603482, 22}, {2039810470, 23},
+    {1797172644, 23}, {1583396912, 23}, {1395050052, 23}, {1229107276, 23},
+    {1082903494, 23}, {1082903494, 23}, {1082903494, 23}, {1082903494, 23},
+};
+
+/* manually derived from
+ * Table B.5: Selection of quantization levels and codebooks
+ * FIXME: will become invalid when Huffman codes are introduced.
+ */
+static const int bit_consumption[27] = {
+    -8, 28, 40, 48, 52, 60, 68, 76, 80, 96,
+    112, 128, 144, 160, 176, 192, 208, 224, 240, 256,
+    272, 288, 304, 320, 336, 352, 368,
+};
+
+/* Table B.5: Selection of quantization levels and codebooks */
+static const int quant_levels[27] = {
+    1, 3, 5, 7, 9, 13, 17, 25, 32, 64,
+    128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536,
+    131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608,
+};
+
+#endif /* AVCODEC_DCAENC_H */
diff --git a/libavcodec/dcahuff.h b/libavcodec/dcahuff.h
index 254fc76..cbc8429 100644
--- a/libavcodec/dcahuff.h
+++ b/libavcodec/dcahuff.h
@@ -3,20 +3,20 @@
  * Copyright (C) 2004 Gildas Bazin
  * Copyright (C) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index a531c88..bc6b077 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -2,20 +2,20 @@
  * (c) 2001 Fabrice Bellard
  *     2007 Marc Hoffman <marc.hoffman@analog.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -55,18 +55,32 @@ struct algo {
     int nonspec;
 };
 
-static const struct algo fdct_tab[4] = {
+static const struct algo fdct_tab[] = {
     { "REF-DBL",     ff_ref_fdct,          FF_IDCT_PERM_NONE },
     { "FAAN",        ff_faandct,           FF_IDCT_PERM_NONE },
     { "IJG-AAN-INT", ff_fdct_ifast,        FF_IDCT_PERM_NONE },
     { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, FF_IDCT_PERM_NONE },
 };
 
-static const struct algo idct_tab[4] = {
+static void ff_prores_idct_wrap(int16_t *dst){
+    DECLARE_ALIGNED(16, static int16_t, qmat)[64];
+    int i;
+
+    for(i=0; i<64; i++){
+        qmat[i]=4;
+    }
+    ff_prores_idct(dst, qmat);
+    for(i=0; i<64; i++) {
+         dst[i] -= 512;
+    }
+}
+
+static const struct algo idct_tab[] = {
     { "FAANI",       ff_faanidct,          FF_IDCT_PERM_NONE },
     { "REF-DBL",     ff_ref_idct,          FF_IDCT_PERM_NONE },
     { "INT",         ff_j_rev_dct,         FF_IDCT_PERM_LIBMPEG2 },
     { "SIMPLE-C",    ff_simple_idct_8,     FF_IDCT_PERM_NONE },
+    { "PR-C",        ff_prores_idct_wrap,  FF_IDCT_PERM_NONE, 0, 1 },
 };
 
 #if ARCH_ARM
@@ -88,7 +102,7 @@ static const struct algo idct_tab_arch[] = { 0 };
 DECLARE_ALIGNED(16, static int16_t, block)[64];
 DECLARE_ALIGNED(8,  static int16_t, block1)[64];
 
-static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
+static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
 {
     int i, j;
 
@@ -97,7 +111,7 @@ static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
     switch (test) {
     case 0:
         for (i = 0; i < 64; i++)
-            block[i] = (av_lfg_get(prng) % 512) - 256;
+            block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
         if (is_idct) {
             ff_ref_fdct(block);
             for (i = 0; i < 64; i++)
@@ -106,11 +120,13 @@ static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng)
         break;
     case 1:
         j = av_lfg_get(prng) % 10 + 1;
-        for (i = 0; i < j; i++)
-            block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
+        for (i = 0; i < j; i++) {
+            int idx = av_lfg_get(prng) % 64;
+            block[idx] = av_lfg_get(prng) % (2*vals) -vals;
+        }
         break;
     case 2:
-        block[ 0] = av_lfg_get(prng) % 4096 - 2048;
+        block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
         block[63] = (block[0] & 1) ^ 1;
         break;
     }
@@ -135,6 +151,10 @@ static void permute(int16_t dst[64], const int16_t src[64],
         for (i = 0; i < 64; i++)
             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
         break;
+    case FF_IDCT_PERM_TRANSPOSE:
+        for (i = 0; i < 64; i++)
+            dst[(i>>3) | ((i<<3)&0x38)] = src[i];
+        break;
     default:
         for (i = 0; i < 64; i++)
             dst[i] = src[i];
@@ -142,7 +162,7 @@ static void permute(int16_t dst[64], const int16_t src[64],
     }
 }
 
-static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
+static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
 {
     void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
     int it, i, scale;
@@ -152,6 +172,7 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
     int maxout = 0;
     int blockSumErrMax = 0, blockSumErr;
     AVLFG prng;
+    const int vals=1<<bits;
     double omse, ome;
     int spec_err;
 
@@ -162,7 +183,7 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
     for (i = 0; i < 64; i++)
         sysErr[i] = 0;
     for (it = 0; it < NB_ITS; it++) {
-        init_block(block1, test, is_idct, &prng);
+        init_block(block1, test, is_idct, &prng, vals);
         permute(block, block1, dct->perm_type);
 
         dct->func(block);
@@ -176,6 +197,9 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
         }
 
         ref(block1);
+        if (!strcmp(dct->name, "PR-SSE2"))
+            for (i = 0; i < 64; i++)
+                block1[i] = av_clip(block1[i], 4-512, 1019-512);
 
         blockSumErr = 0;
         for (i = 0; i < 64; i++) {
@@ -208,7 +232,7 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
 
     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
 
-    printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
+    printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
            omse, ome, (double) sysErrMax / NB_ITS,
            maxout, blockSumErrMax);
@@ -220,20 +244,21 @@ static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
         return 0;
 
     /* speed test */
-    init_block(block, test, is_idct, &prng);
+
+    init_block(block, test, is_idct, &prng, vals);
     permute(block1, block, dct->perm_type);
 
-    ti = av_gettime();
+    ti = av_gettime_relative();
     it1 = 0;
     do {
         for (it = 0; it < NB_ITS_SPEED; it++) {
             memcpy(block, block1, sizeof(block));
             dct->func(block);
         }
+        emms_c();
         it1 += NB_ITS_SPEED;
-        ti1 = av_gettime() - ti;
+        ti1 = av_gettime_relative() - ti;
     } while (ti1 < 1000000);
-    emms_c();
 
     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
            (double) it1 * 1000.0 / (double) ti1);
@@ -358,13 +383,32 @@ static void idct248_error(const char *name,
             if (v > err_max)
                 err_max = v;
         }
+#if 0
+        printf("ref=\n");
+        for(i=0;i<8;i++) {
+            int j;
+            for(j=0;j<8;j++) {
+                printf(" %3d", img_dest1[i*8+j]);
+            }
+            printf("\n");
+        }
+
+        printf("out=\n");
+        for(i=0;i<8;i++) {
+            int j;
+            for(j=0;j<8;j++) {
+                printf(" %3d", img_dest[i*8+j]);
+            }
+            printf("\n");
+        }
+#endif
     }
     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
 
     if (!speed)
         return;
 
-    ti = av_gettime();
+    ti = av_gettime_relative();
     it1 = 0;
     do {
         for (it = 0; it < NB_ITS_SPEED; it++) {
@@ -372,10 +416,10 @@ static void idct248_error(const char *name,
                 block[i] = block1[i];
             idct248_put(img_dest, 8, block);
         }
+        emms_c();
         it1 += NB_ITS_SPEED;
-        ti1 = av_gettime() - ti;
+        ti1 = av_gettime_relative() - ti;
     } while (ti1 < 1000000);
-    emms_c();
 
     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
            (double) it1 * 1000.0 / (double) ti1);
@@ -383,10 +427,11 @@ static void idct248_error(const char *name,
 
 static void help(void)
 {
-    printf("dct-test [-i] [<test-number>]\n"
+    printf("dct-test [-i] [<test-number>] [<bits>]\n"
            "test-number 0 -> test with random matrixes\n"
            "            1 -> test with random sparse matrixes\n"
            "            2 -> do 3. test from mpeg4 std\n"
+           "bits        Number of time domain bits to use, 8 is default\n"
            "-i          test IDCT implementations\n"
            "-4          test IDCT248 implementations\n"
            "-t          speed test\n");
@@ -403,6 +448,7 @@ int main(int argc, char **argv)
     int test = 1;
     int speed = 0;
     int err = 0;
+    int bits=8;
 
     ff_ref_dct_init();
 
@@ -429,8 +475,9 @@ int main(int argc, char **argv)
 
     if (optind < argc)
         test = atoi(argv[optind]);
+    if(optind+1 < argc) bits= atoi(argv[optind+1]);
 
-    printf("Libav DCT/IDCT test\n");
+    printf("ffmpeg DCT/IDCT test\n");
 
     if (test_248_dct) {
         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
@@ -438,20 +485,20 @@ int main(int argc, char **argv)
         const int cpu_flags = av_get_cpu_flags();
         if (test_idct) {
             for (i = 0; i < FF_ARRAY_ELEMS(idct_tab); i++)
-                err |= dct_error(&idct_tab[i], test, test_idct, speed);
+                err |= dct_error(&idct_tab[i], test, test_idct, speed, bits);
 
             for (i = 0; idct_tab_arch[i].name; i++)
                 if (!(~cpu_flags & idct_tab_arch[i].cpu_flag))
-                    err |= dct_error(&idct_tab_arch[i], test, test_idct, speed);
+                    err |= dct_error(&idct_tab_arch[i], test, test_idct, speed, bits);
         }
 #if CONFIG_FDCTDSP
         else {
             for (i = 0; i < FF_ARRAY_ELEMS(fdct_tab); i++)
-                err |= dct_error(&fdct_tab[i], test, test_idct, speed);
+                err |= dct_error(&fdct_tab[i], test, test_idct, speed, bits);
 
             for (i = 0; fdct_tab_arch[i].name; i++)
                 if (!(~cpu_flags & fdct_tab_arch[i].cpu_flag))
-                    err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed);
+                    err |= dct_error(&fdct_tab_arch[i], test, test_idct, speed, bits);
         }
 #endif /* CONFIG_FDCTDSP */
     }
diff --git a/libavcodec/dct.c b/libavcodec/dct.c
index 4dbbff8..26b4851 100644
--- a/libavcodec/dct.c
+++ b/libavcodec/dct.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  * Copyright (c) 2010 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
@@ -190,7 +190,7 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
         ff_init_ff_cos_tabs(nbits + 2);
 
         s->costab = ff_cos_tabs[nbits + 2];
-        s->csc2   = av_malloc(n / 2 * sizeof(FFTSample));
+        s->csc2   = av_malloc_array(n / 2, sizeof(FFTSample));
 
         if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) {
             av_free(s->csc2);
diff --git a/libavcodec/dct.h b/libavcodec/dct.h
index 3fd4e27..717f21b 100644
--- a/libavcodec/dct.h
+++ b/libavcodec/dct.h
@@ -4,24 +4,24 @@
  * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
  * Copyright (c) 2010 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#ifndef AVCODEC_DCT_H
+#if !defined(AVCODEC_DCT_H) && (!defined(FFT_FLOAT) || FFT_FLOAT)
 #define AVCODEC_DCT_H
 
 #include <stdint.h>
@@ -59,5 +59,8 @@ void ff_fdct248_islow_8(int16_t *data);
 void ff_fdct248_islow_10(int16_t *data);
 
 void ff_j_rev_dct(int16_t *data);
+void ff_j_rev_dct4(int16_t *data);
+void ff_j_rev_dct2(int16_t *data);
+void ff_j_rev_dct1(int16_t *data);
 
 #endif /* AVCODEC_DCT_H */
diff --git a/libavcodec/dct32.h b/libavcodec/dct32.h
index 110338d..f4b2471 100644
--- a/libavcodec/dct32.h
+++ b/libavcodec/dct32.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dct32_fixed.c b/libavcodec/dct32_fixed.c
index 64efe8b..9025d5e 100644
--- a/libavcodec/dct32_fixed.c
+++ b/libavcodec/dct32_fixed.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dct32_float.c b/libavcodec/dct32_float.c
index ef37ce9..597c9bb 100644
--- a/libavcodec/dct32_float.c
+++ b/libavcodec/dct32_float.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dct32_template.c b/libavcodec/dct32_template.c
index 272e0db..fb53d53 100644
--- a/libavcodec/dct32_template.c
+++ b/libavcodec/dct32_template.c
@@ -2,20 +2,20 @@
  * Template for the Discrete Cosine Transform for 32 samples
  * Copyright (c) 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dctref.c b/libavcodec/dctref.c
index ae3dec5..851014b 100644
--- a/libavcodec/dctref.c
+++ b/libavcodec/dctref.c
@@ -2,20 +2,20 @@
  * reference discrete cosine transform (double precision)
  * Copyright (C) 2009 Dylan Yudaken
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dctref.h b/libavcodec/dctref.h
index a93b70d..f6fde88 100644
--- a/libavcodec/dctref.h
+++ b/libavcodec/dctref.h
@@ -2,20 +2,20 @@
  * reference discrete cosine transform (double precision)
  * Copyright (C) 2009 Dylan Yudaken
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dfa.c b/libavcodec/dfa.c
index 6fa4edc..f13291e 100644
--- a/libavcodec/dfa.c
+++ b/libavcodec/dfa.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2011 Konstantin Shishkov
  * based on work by Vladimir "VAG" Gneushev
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
 #include "bytestream.h"
 #include "internal.h"
 
+#include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/mem.h"
 
@@ -37,12 +38,13 @@ typedef struct DfaContext {
 static av_cold int dfa_decode_init(AVCodecContext *avctx)
 {
     DfaContext *s = avctx->priv_data;
-    int ret;
 
     avctx->pix_fmt = AV_PIX_FMT_PAL8;
 
-    if ((ret = av_image_check_size(avctx->width, avctx->height, 0, avctx)) < 0)
-        return ret;
+    if (!avctx->width || !avctx->height)
+        return AVERROR_INVALIDDATA;
+
+    av_assert0(av_image_check_size(avctx->width, avctx->height, 0, avctx) >= 0);
 
     s->frame_buf = av_mallocz(avctx->width * avctx->height);
     if (!s->frame_buf)
@@ -70,6 +72,8 @@ static int decode_tsw1(GetByteContext *gb, uint8_t *frame, int width, int height
 
     segments = bytestream2_get_le32(gb);
     offset   = bytestream2_get_le32(gb);
+    if (segments == 0 && offset == frame_end - frame)
+        return 0; // skip frame
     if (frame_end - frame <= offset)
         return AVERROR_INVALIDDATA;
     frame += offset;
@@ -252,6 +256,9 @@ static int decode_wdlt(GetByteContext *gb, uint8_t *frame, int width, int height
             y        += skip_lines;
             segments = bytestream2_get_le16(gb);
         }
+
+        if (frame_end <= frame)
+            return AVERROR_INVALIDDATA;
         if (segments & 0x8000) {
             frame[width - 1] = segments & 0xFF;
             segments = bytestream2_get_le16(gb);
@@ -289,7 +296,7 @@ static int decode_wdlt(GetByteContext *gb, uint8_t *frame, int width, int height
 static int decode_tdlt(GetByteContext *gb, uint8_t *frame, int width, int height)
 {
     const uint8_t *frame_end = frame + width * height;
-    int segments = bytestream2_get_le32(gb);
+    uint32_t segments = bytestream2_get_le32(gb);
     int skip, copy;
 
     while (segments--) {
@@ -338,11 +345,10 @@ static int dfa_decode_frame(AVCodecContext *avctx,
     uint8_t *dst;
     int ret;
     int i, pal_elems;
+    int version = avctx->extradata_size==2 ? AV_RL16(avctx->extradata) : 0;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0))) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     bytestream2_init(&gb, avpkt->data, avpkt->size);
     while (bytestream2_get_bytes_left(&gb) > 0) {
@@ -355,7 +361,7 @@ static int dfa_decode_frame(AVCodecContext *avctx,
             pal_elems = FFMIN(chunk_size / 3, 256);
             for (i = 0; i < pal_elems; i++) {
                 s->pal[i] = bytestream2_get_be24(&gb) << 2;
-                s->pal[i] |= (s->pal[i] >> 6) & 0x333;
+                s->pal[i] |= 0xFFU << 24 | (s->pal[i] >> 6) & 0x30303;
             }
             frame->palette_has_changed = 1;
         } else if (chunk_type <= 9) {
@@ -375,9 +381,17 @@ static int dfa_decode_frame(AVCodecContext *avctx,
     buf = s->frame_buf;
     dst = frame->data[0];
     for (i = 0; i < avctx->height; i++) {
-        memcpy(dst, buf, avctx->width);
+        if(version == 0x100) {
+            int j;
+            for(j = 0; j < avctx->width; j++) {
+                dst[j] = buf[ (i&3)*(avctx->width /4) + (j/4) +
+                             ((j&3)*(avctx->height/4) + (i/4))*avctx->width];
+            }
+        } else {
+            memcpy(dst, buf, avctx->width);
+            buf += avctx->width;
+        }
         dst += frame->linesize[0];
-        buf += avctx->width;
     }
     memcpy(frame->data[1], s->pal, sizeof(s->pal));
 
diff --git a/libavcodec/dirac.c b/libavcodec/dirac.c
index ed0ea9f..55553d6 100644
--- a/libavcodec/dirac.c
+++ b/libavcodec/dirac.c
@@ -1,28 +1,29 @@
 /*
  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
  * Copyright (C) 2009 David Conrad
+ * Copyright (C) 2011 Jordi Ortiz
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * Dirac Decoder
- * @author Marco Gerards <marco@gnu.org>
+ * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  */
 
 #include "libavutil/imgutils.h"
@@ -32,7 +33,7 @@
 #include "internal.h"
 #include "mpeg12data.h"
 
-// defaults for source parameters
+/* defaults for source parameters */
 static const dirac_source_params dirac_source_parameters_defaults[] = {
     { 640,  480,  2, 0, 0, 1,  1, 640,  480,  0, 0, 1, 0 },
     { 176,  120,  2, 0, 0, 9,  2, 176,  120,  0, 0, 1, 1 },
@@ -43,7 +44,6 @@ static const dirac_source_params dirac_source_parameters_defaults[] = {
     { 704,  576,  2, 0, 1, 10, 3, 704,  576,  0, 0, 1, 2 },
     { 720,  480,  1, 1, 0, 4,  2, 704,  480,  8, 0, 3, 1 },
     { 720,  576,  1, 1, 1, 3,  3, 704,  576,  8, 0, 3, 2 },
-
     { 1280, 720,  1, 0, 1, 7,  1, 1280, 720,  0, 0, 3, 3 },
     { 1280, 720,  1, 0, 1, 6,  1, 1280, 720,  0, 0, 3, 3 },
     { 1920, 1080, 1, 1, 1, 4,  1, 1920, 1080, 0, 0, 3, 3 },
@@ -52,7 +52,6 @@ static const dirac_source_params dirac_source_parameters_defaults[] = {
     { 1920, 1080, 1, 0, 1, 6,  1, 1920, 1080, 0, 0, 3, 3 },
     { 2048, 1080, 0, 0, 1, 2,  1, 2048, 1080, 0, 0, 4, 4 },
     { 4096, 2160, 0, 0, 1, 2,  1, 4096, 2160, 0, 0, 4, 4 },
-
     { 3840, 2160, 1, 0, 1, 7,  1, 3840, 2160, 0, 0, 3, 3 },
     { 3840, 2160, 1, 0, 1, 6,  1, 3840, 2160, 0, 0, 3, 3 },
     { 7680, 4320, 1, 0, 1, 7,  1, 3840, 2160, 0, 0, 3, 3 },
@@ -121,6 +120,7 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
     AVRational frame_rate = {0,0};
     unsigned luma_depth = 8, luma_offset = 16;
     int idx;
+    int chroma_x_shift, chroma_y_shift;
 
     /* [DIRAC_STD] 10.3.2 Frame size. frame_size(video_params) */
     /* [DIRAC_STD] custom_dimensions_flag */
@@ -135,7 +135,7 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
     if (get_bits1(gb))
         /* [DIRAC_STD] CHROMA_FORMAT_INDEX */
         source->chroma_format = svq3_get_ue_golomb(gb);
-    if (source->chroma_format > 2) {
+    if (source->chroma_format > 2U) {
         av_log(avctx, AV_LOG_ERROR, "Unknown chroma format %d\n",
                source->chroma_format);
         return AVERROR_INVALIDDATA;
@@ -146,14 +146,14 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
     if (get_bits1(gb))
         /* [DIRAC_STD] SOURCE_SAMPLING */
         source->interlaced = svq3_get_ue_golomb(gb);
-    if (source->interlaced > 1)
+    if (source->interlaced > 1U)
         return AVERROR_INVALIDDATA;
 
     /* [DIRAC_STD] 10.3.5 Frame Rate. frame_rate(video_params) */
     if (get_bits1(gb)) { /* [DIRAC_STD] custom_frame_rate_flag */
         source->frame_rate_index = svq3_get_ue_golomb(gb);
 
-        if (source->frame_rate_index > 10)
+        if (source->frame_rate_index > 10U)
             return AVERROR_INVALIDDATA;
 
         if (!source->frame_rate_index) {
@@ -180,7 +180,7 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
          /* [DIRAC_STD] index */
         source->aspect_ratio_index = svq3_get_ue_golomb(gb);
 
-        if (source->aspect_ratio_index > 6)
+        if (source->aspect_ratio_index > 6U)
             return AVERROR_INVALIDDATA;
 
         if (!source->aspect_ratio_index) {
@@ -192,7 +192,7 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
      *  aspect ratio values */
     if (source->aspect_ratio_index > 0)
         avctx->sample_aspect_ratio =
-                dirac_preset_aspect_ratios[source->aspect_ratio_index-1];
+            dirac_preset_aspect_ratios[source->aspect_ratio_index-1];
 
     /* [DIRAC_STD] 10.3.7 Clean area. clean_area(video_params) */
     if (get_bits1(gb)) { /* [DIRAC_STD] custom_clean_area_flag */
@@ -213,10 +213,10 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
         /* [DIRAC_STD] index */
         source->pixel_range_index = svq3_get_ue_golomb(gb);
 
-        if (source->pixel_range_index > 4)
+        if (source->pixel_range_index > 4U)
             return AVERROR_INVALIDDATA;
 
-        // This assumes either fullrange or MPEG levels only
+        /* This assumes either fullrange or MPEG levels only */
         if (!source->pixel_range_index) {
             luma_offset = svq3_get_ue_golomb(gb);
             luma_depth  = av_log2(svq3_get_ue_golomb(gb))+1;
@@ -234,16 +234,22 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
     }
 
     if (luma_depth > 8)
-        av_log(avctx, AV_LOG_WARNING, "Bitdepth greater than 8");
+        av_log(avctx, AV_LOG_WARNING, "Bitdepth greater than 8\n");
 
     avctx->pix_fmt = dirac_pix_fmt[!luma_offset][source->chroma_format];
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
+    if ((source->width % (1<<chroma_x_shift)) || (source->height % (1<<chroma_y_shift))) {
+        av_log(avctx, AV_LOG_ERROR, "Dimensions must be a integer multiply of the chroma subsampling\n");
+        return AVERROR_INVALIDDATA;
+    }
+
 
     /* [DIRAC_STD] 10.3.9 Colour specification. colour_spec(video_params) */
     if (get_bits1(gb)) { /* [DIRAC_STD] custom_colour_spec_flag */
          /* [DIRAC_STD] index */
         idx = source->color_spec_index = svq3_get_ue_golomb(gb);
 
-        if (source->color_spec_index > 4)
+        if (source->color_spec_index > 4U)
             return AVERROR_INVALIDDATA;
 
         avctx->color_primaries = dirac_color_presets[idx].color_primaries;
@@ -254,7 +260,7 @@ static int parse_source_parameters(AVCodecContext *avctx, GetBitContext *gb,
             /* [DIRAC_STD] 10.3.9.1 Colour primaries */
             if (get_bits1(gb)) {
                 idx = svq3_get_ue_golomb(gb);
-                if (idx < 3)
+                if (idx < 3U)
                     avctx->color_primaries = dirac_primaries[idx];
             }
             /* [DIRAC_STD] 10.3.9.2 Colour matrix */
@@ -301,10 +307,10 @@ int avpriv_dirac_parse_sequence_header(AVCodecContext *avctx, GetBitContext *gb,
     else if (version_major > 2)
         av_log(avctx, AV_LOG_WARNING, "Stream may have unhandled features\n");
 
-    if (video_format > 20)
+    if (video_format > 20U)
         return AVERROR_INVALIDDATA;
 
-    // Fill in defaults for the source parameters.
+    /* Fill in defaults for the source parameters. */
     *source = dirac_source_parameters_defaults[video_format];
 
     /* [DIRAC_STD] 10.3 Source Parameters
@@ -322,7 +328,7 @@ int avpriv_dirac_parse_sequence_header(AVCodecContext *avctx, GetBitContext *gb,
      * currently only used to signal field coding */
     picture_coding_mode = svq3_get_ue_golomb(gb);
     if (picture_coding_mode != 0) {
-        av_log(avctx, AV_LOG_ERROR, "Unsupported picture coding mode %d",
+        av_log(avctx, AV_LOG_ERROR, "Unsupported picture coding mode %d\n",
                picture_coding_mode);
         return AVERROR_INVALIDDATA;
     }
diff --git a/libavcodec/dirac.h b/libavcodec/dirac.h
index e5b79b0..b0f955b 100644
--- a/libavcodec/dirac.h
+++ b/libavcodec/dirac.h
@@ -1,21 +1,22 @@
 /*
  * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
  * Copyright (C) 2009 David Conrad
+ * Copyright (C) 2011 Jordi Ortiz
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +27,8 @@
  * @file
  * Interface to Dirac Decoder/Encoder
  * @author Marco Gerards <marco@gnu.org>
+ * @author David Conrad
+ * @author Jordi Ortiz
  */
 
 #include "avcodec.h"
diff --git a/libavcodec/dirac_arith.c b/libavcodec/dirac_arith.c
new file mode 100644
index 0000000..bf91392
--- /dev/null
+++ b/libavcodec/dirac_arith.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
+ * Copyright (C) 2009 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Arithmetic decoder for Dirac
+ * @author Marco Gerards <marco@gnu.org>
+ */
+
+#include "dirac_arith.h"
+
+
+const uint16_t ff_dirac_prob[256] = {
+    0,    2,    5,    8,    11,   15,   20,   24,
+    29,   35,   41,   47,   53,   60,   67,   74,
+    82,   89,   97,   106,  114,  123,  132,  141,
+    150,  160,  170,  180,  190,  201,  211,  222,
+    233,  244,  256,  267,  279,  291,  303,  315,
+    327,  340,  353,  366,  379,  392,  405,  419,
+    433,  447,  461,  475,  489,  504,  518,  533,
+    548,  563,  578,  593,  609,  624,  640,  656,
+    672,  688,  705,  721,  738,  754,  771,  788,
+    805,  822,  840,  857,  875,  892,  910,  928,
+    946,  964,  983,  1001, 1020, 1038, 1057, 1076,
+    1095, 1114, 1133, 1153, 1172, 1192, 1211, 1231,
+    1251, 1271, 1291, 1311, 1332, 1352, 1373, 1393,
+    1414, 1435, 1456, 1477, 1498, 1520, 1541, 1562,
+    1584, 1606, 1628, 1649, 1671, 1694, 1716, 1738,
+    1760, 1783, 1806, 1828, 1851, 1874, 1897, 1920,
+    1935, 1942, 1949, 1955, 1961, 1968, 1974, 1980,
+    1985, 1991, 1996, 2001, 2006, 2011, 2016, 2021,
+    2025, 2029, 2033, 2037, 2040, 2044, 2047, 2050,
+    2053, 2056, 2058, 2061, 2063, 2065, 2066, 2068,
+    2069, 2070, 2071, 2072, 2072, 2072, 2072, 2072,
+    2072, 2071, 2070, 2069, 2068, 2066, 2065, 2063,
+    2060, 2058, 2055, 2052, 2049, 2045, 2042, 2038,
+    2033, 2029, 2024, 2019, 2013, 2008, 2002, 1996,
+    1989, 1982, 1975, 1968, 1960, 1952, 1943, 1934,
+    1925, 1916, 1906, 1896, 1885, 1874, 1863, 1851,
+    1839, 1827, 1814, 1800, 1786, 1772, 1757, 1742,
+    1727, 1710, 1694, 1676, 1659, 1640, 1622, 1602,
+    1582, 1561, 1540, 1518, 1495, 1471, 1447, 1422,
+    1396, 1369, 1341, 1312, 1282, 1251, 1219, 1186,
+    1151, 1114, 1077, 1037, 995,  952,  906,  857,
+    805,  750,  690,  625,  553,  471,  376,  255
+};
+
+const uint8_t ff_dirac_next_ctx[DIRAC_CTX_COUNT] = {
+    [CTX_ZPZN_F1]   = CTX_ZP_F2,
+    [CTX_ZPNN_F1]   = CTX_ZP_F2,
+    [CTX_ZP_F2]     = CTX_ZP_F3,
+    [CTX_ZP_F3]     = CTX_ZP_F4,
+    [CTX_ZP_F4]     = CTX_ZP_F5,
+    [CTX_ZP_F5]     = CTX_ZP_F6,
+    [CTX_ZP_F6]     = CTX_ZP_F6,
+    [CTX_NPZN_F1]   = CTX_NP_F2,
+    [CTX_NPNN_F1]   = CTX_NP_F2,
+    [CTX_NP_F2]     = CTX_NP_F3,
+    [CTX_NP_F3]     = CTX_NP_F4,
+    [CTX_NP_F4]     = CTX_NP_F5,
+    [CTX_NP_F5]     = CTX_NP_F6,
+    [CTX_NP_F6]     = CTX_NP_F6,
+    [CTX_DELTA_Q_F] = CTX_DELTA_Q_F,
+};
+
+int16_t ff_dirac_prob_branchless[256][2];
+
+void ff_dirac_init_arith_decoder(DiracArith *c, GetBitContext *gb, int length)
+{
+    int i;
+    align_get_bits(gb);
+
+    length = FFMIN(length, get_bits_left(gb)/8);
+
+    c->bytestream     = gb->buffer + get_bits_count(gb)/8;
+    c->bytestream_end = c->bytestream + length;
+    skip_bits_long(gb, length*8);
+
+    c->low = 0;
+    for (i = 0; i < 4; i++) {
+        c->low <<= 8;
+        if (c->bytestream < c->bytestream_end)
+            c->low |= *c->bytestream++;
+        else
+            c->low |= 0xff;
+    }
+
+    c->counter = -16;
+    c->range   = 0xffff;
+
+    for (i = 0; i < 256; i++) {
+        ff_dirac_prob_branchless[i][0] =  ff_dirac_prob[255-i];
+        ff_dirac_prob_branchless[i][1] = -ff_dirac_prob[i];
+    }
+
+    for (i = 0; i < DIRAC_CTX_COUNT; i++)
+        c->contexts[i] = 0x8000;
+}
diff --git a/libavcodec/dirac_arith.h b/libavcodec/dirac_arith.h
new file mode 100644
index 0000000..089c71a
--- /dev/null
+++ b/libavcodec/dirac_arith.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
+ * Copyright (C) 2009 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Arithmetic decoder for Dirac
+ * @author Marco Gerards <marco@gnu.org>
+ */
+
+#ifndef AVCODEC_DIRAC_ARITH_H
+#define AVCODEC_DIRAC_ARITH_H
+
+#include "libavutil/x86/asm.h"
+#include "bytestream.h"
+#include "get_bits.h"
+
+enum dirac_arith_contexts {
+    CTX_ZPZN_F1,
+    CTX_ZPNN_F1,
+    CTX_NPZN_F1,
+    CTX_NPNN_F1,
+    CTX_ZP_F2,
+    CTX_ZP_F3,
+    CTX_ZP_F4,
+    CTX_ZP_F5,
+    CTX_ZP_F6,
+    CTX_NP_F2,
+    CTX_NP_F3,
+    CTX_NP_F4,
+    CTX_NP_F5,
+    CTX_NP_F6,
+    CTX_COEFF_DATA,
+    CTX_SIGN_NEG,
+    CTX_SIGN_ZERO,
+    CTX_SIGN_POS,
+    CTX_ZERO_BLOCK,
+    CTX_DELTA_Q_F,
+    CTX_DELTA_Q_DATA,
+    CTX_DELTA_Q_SIGN,
+
+    DIRAC_CTX_COUNT
+};
+
+// Dirac resets the arith decoder between decoding various types of data,
+// so many contexts are never used simultaneously. Thus, we can reduce
+// the number of contexts needed by reusing them.
+#define CTX_SB_F1        CTX_ZP_F5
+#define CTX_SB_DATA      0
+#define CTX_PMODE_REF1   0
+#define CTX_PMODE_REF2   1
+#define CTX_GLOBAL_BLOCK 2
+#define CTX_MV_F1        CTX_ZP_F2
+#define CTX_MV_DATA      0
+#define CTX_DC_F1        CTX_ZP_F5
+#define CTX_DC_DATA      0
+
+typedef struct {
+    unsigned low;
+    uint16_t range;
+    int16_t  counter;
+
+    const uint8_t *bytestream;
+    const uint8_t *bytestream_end;
+
+    uint16_t contexts[DIRAC_CTX_COUNT];
+} DiracArith;
+
+extern const uint8_t ff_dirac_next_ctx[DIRAC_CTX_COUNT];
+extern const uint16_t ff_dirac_prob[256];
+extern int16_t ff_dirac_prob_branchless[256][2];
+
+static inline void renorm(DiracArith *c)
+{
+#if HAVE_FAST_CLZ
+    int shift = 14 - av_log2_16bit(c->range-1) + ((c->range-1)>>15);
+
+    c->low    <<= shift;
+    c->range  <<= shift;
+    c->counter += shift;
+#else
+    while (c->range <= 0x4000) {
+        c->low   <<= 1;
+        c->range <<= 1;
+        c->counter++;
+    }
+#endif
+}
+
+static inline void refill(DiracArith *c)
+{
+    int counter = c->counter;
+
+    if (counter >= 0) {
+        int new = bytestream_get_be16(&c->bytestream);
+
+        // the spec defines overread bits to be 1, and streams rely on this
+        if (c->bytestream > c->bytestream_end) {
+            new |= 0xff;
+            if (c->bytestream > c->bytestream_end+1)
+                new |= 0xff00;
+
+            c->bytestream = c->bytestream_end;
+        }
+
+        c->low += new << counter;
+        counter -= 16;
+    }
+    c->counter = counter;
+}
+
+static inline int dirac_get_arith_bit(DiracArith *c, int ctx)
+{
+    int prob_zero = c->contexts[ctx];
+    int range_times_prob, bit;
+    unsigned low = c->low;
+    int    range = c->range;
+
+    range_times_prob = (c->range * prob_zero) >> 16;
+
+#if HAVE_FAST_CMOV && HAVE_INLINE_ASM && HAVE_6REGS
+    low   -= range_times_prob << 16;
+    range -= range_times_prob;
+    bit = 0;
+    __asm__(
+        "cmpl   %5, %4 \n\t"
+        "setae  %b0    \n\t"
+        "cmovb  %3, %2 \n\t"
+        "cmovb  %5, %1 \n\t"
+        : "+q"(bit), "+r"(range), "+r"(low)
+        : "r"(c->low), "r"(c->low>>16),
+          "r"(range_times_prob)
+    );
+#else
+    bit = (low >> 16) >= range_times_prob;
+    if (bit) {
+        low   -= range_times_prob << 16;
+        range -= range_times_prob;
+    } else {
+        range  = range_times_prob;
+    }
+#endif
+
+    c->contexts[ctx] += ff_dirac_prob_branchless[prob_zero>>8][bit];
+    c->low   = low;
+    c->range = range;
+
+    renorm(c);
+    refill(c);
+    return bit;
+}
+
+static inline int dirac_get_arith_uint(DiracArith *c, int follow_ctx, int data_ctx)
+{
+    int ret = 1;
+    while (!dirac_get_arith_bit(c, follow_ctx)) {
+        ret <<= 1;
+        ret += dirac_get_arith_bit(c, data_ctx);
+        follow_ctx = ff_dirac_next_ctx[follow_ctx];
+    }
+    return ret-1;
+}
+
+static inline int dirac_get_arith_int(DiracArith *c, int follow_ctx, int data_ctx)
+{
+    int ret = dirac_get_arith_uint(c, follow_ctx, data_ctx);
+    if (ret && dirac_get_arith_bit(c, data_ctx+1))
+        ret = -ret;
+    return ret;
+}
+
+void ff_dirac_init_arith_decoder(DiracArith *c, GetBitContext *gb, int length);
+
+#endif /* AVCODEC_DIRAC_ARITH_H */
diff --git a/libavcodec/dirac_dwt.c b/libavcodec/dirac_dwt.c
new file mode 100644
index 0000000..96d8bae
--- /dev/null
+++ b/libavcodec/dirac_dwt.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2008 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "dirac_dwt.h"
+#include "libavcodec/x86/dirac_dwt.h"
+
+
+static inline int mirror(int v, int m)
+{
+    while ((unsigned)v > (unsigned)m) {
+        v = -v;
+        if (v < 0)
+            v += 2 * m;
+    }
+    return v;
+}
+
+static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                  int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] -= (b0[i] + b2[i] + 2) >> 2;
+}
+
+
+static av_always_inline
+void interleave(IDWTELEM *dst, IDWTELEM *src0, IDWTELEM *src1, int w2, int add, int shift)
+{
+    int i;
+    for (i = 0; i < w2; i++) {
+        dst[2*i  ] = (src0[i] + add) >> shift;
+        dst[2*i+1] = (src1[i] + add) >> shift;
+    }
+}
+
+static void horizontal_compose_dirac53i(IDWTELEM *b, IDWTELEM *temp, int w)
+{
+    const int w2 = w >> 1;
+    int x;
+
+    temp[0] = COMPOSE_53iL0(b[w2], b[0], b[w2]);
+    for (x = 1; x < w2; x++) {
+        temp[x     ] = COMPOSE_53iL0     (b[x+w2-1], b[x     ], b[x+w2]);
+        temp[x+w2-1] = COMPOSE_DIRAC53iH0(temp[x-1], b[x+w2-1], temp[x]);
+    }
+    temp[w-1] = COMPOSE_DIRAC53iH0(temp[w2-1], b[w-1], temp[w2-1]);
+
+    interleave(b, temp, temp+w2, w2, 1, 1);
+}
+
+static void horizontal_compose_dd97i(IDWTELEM *b, IDWTELEM *tmp, int w)
+{
+    const int w2 = w >> 1;
+    int x;
+
+    tmp[0] = COMPOSE_53iL0(b[w2], b[0], b[w2]);
+    for (x = 1; x < w2; x++)
+        tmp[x] = COMPOSE_53iL0(b[x+w2-1], b[x], b[x+w2]);
+
+    // extend the edges
+    tmp[-1]   = tmp[0];
+    tmp[w2+1] = tmp[w2] = tmp[w2-1];
+
+    for (x = 0; x < w2; x++) {
+        b[2*x  ] = (tmp[x] + 1)>>1;
+        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+    }
+}
+
+static void horizontal_compose_dd137i(IDWTELEM *b, IDWTELEM *tmp, int w)
+{
+    const int w2 = w >> 1;
+    int x;
+
+    tmp[0] = COMPOSE_DD137iL0(b[w2], b[w2], b[0], b[w2  ], b[w2+1]);
+    tmp[1] = COMPOSE_DD137iL0(b[w2], b[w2], b[1], b[w2+1], b[w2+2]);
+    for (x = 2; x < w2-1; x++)
+        tmp[x] = COMPOSE_DD137iL0(b[x+w2-2], b[x+w2-1], b[x], b[x+w2], b[x+w2+1]);
+    tmp[w2-1] = COMPOSE_DD137iL0(b[w-3], b[w-2], b[w2-1], b[w-1], b[w-1]);
+
+    // extend the edges
+    tmp[-1]   = tmp[0];
+    tmp[w2+1] = tmp[w2] = tmp[w2-1];
+
+    for (x = 0; x < w2; x++) {
+        b[2*x  ] = (tmp[x] + 1)>>1;
+        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+    }
+}
+
+static av_always_inline
+void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *temp, int w, int shift)
+{
+    const int w2 = w >> 1;
+    int x;
+
+    for (x = 0; x < w2; x++) {
+        temp[x   ] = COMPOSE_HAARiL0(b[x   ], b[x+w2]);
+        temp[x+w2] = COMPOSE_HAARiH0(b[x+w2], temp[x]);
+    }
+
+    interleave(b, temp, temp+w2, w2, shift, shift);
+}
+
+static void horizontal_compose_haar0i(IDWTELEM *b, IDWTELEM *temp, int w)
+{
+    horizontal_compose_haari(b, temp, w, 0);
+}
+
+static void horizontal_compose_haar1i(IDWTELEM *b, IDWTELEM *temp, int w)
+{
+    horizontal_compose_haari(b, temp, w, 1);
+}
+
+static void horizontal_compose_fidelityi(IDWTELEM *b, IDWTELEM *tmp, int w)
+{
+    const int w2 = w >> 1;
+    int i, x;
+    IDWTELEM v[8];
+
+    for (x = 0; x < w2; x++) {
+        for (i = 0; i < 8; i++)
+            v[i] = b[av_clip(x-3+i, 0, w2-1)];
+        tmp[x] = COMPOSE_FIDELITYiH0(v[0], v[1], v[2], v[3], b[x+w2], v[4], v[5], v[6], v[7]);
+    }
+
+    for (x = 0; x < w2; x++) {
+        for (i = 0; i < 8; i++)
+            v[i] = tmp[av_clip(x-4+i, 0, w2-1)];
+        tmp[x+w2] = COMPOSE_FIDELITYiL0(v[0], v[1], v[2], v[3], b[x], v[4], v[5], v[6], v[7]);
+    }
+
+    interleave(b, tmp+w2, tmp, w2, 0, 0);
+}
+
+static void horizontal_compose_daub97i(IDWTELEM *b, IDWTELEM *temp, int w)
+{
+    const int w2 = w >> 1;
+    int x, b0, b1, b2;
+
+    temp[0] = COMPOSE_DAUB97iL1(b[w2], b[0], b[w2]);
+    for (x = 1; x < w2; x++) {
+        temp[x     ] = COMPOSE_DAUB97iL1(b[x+w2-1], b[x     ], b[x+w2]);
+        temp[x+w2-1] = COMPOSE_DAUB97iH1(temp[x-1], b[x+w2-1], temp[x]);
+    }
+    temp[w-1] = COMPOSE_DAUB97iH1(temp[w2-1], b[w-1], temp[w2-1]);
+
+    // second stage combined with interleave and shift
+    b0 = b2 = COMPOSE_DAUB97iL0(temp[w2], temp[0], temp[w2]);
+    b[0] = (b0 + 1) >> 1;
+    for (x = 1; x < w2; x++) {
+        b2 = COMPOSE_DAUB97iL0(temp[x+w2-1], temp[x     ], temp[x+w2]);
+        b1 = COMPOSE_DAUB97iH0(          b0, temp[x+w2-1], b2        );
+        b[2*x-1] = (b1 + 1) >> 1;
+        b[2*x  ] = (b2 + 1) >> 1;
+        b0 = b2;
+    }
+    b[w-1] = (COMPOSE_DAUB97iH0(b2, temp[w-1], b2) + 1) >> 1;
+}
+
+static void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                  IDWTELEM *b3, IDWTELEM *b4, int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]);
+    }
+}
+
+static void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                      IDWTELEM *b3, IDWTELEM *b4, int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]);
+    }
+}
+
+static void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++) {
+        b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]);
+        b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]);
+    }
+}
+
+static void vertical_compose_fidelityiH0(IDWTELEM *dst, IDWTELEM *b[8], int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        dst[i] = COMPOSE_FIDELITYiH0(b[0][i], b[1][i], b[2][i], b[3][i], dst[i], b[4][i], b[5][i], b[6][i], b[7][i]);
+    }
+}
+
+static void vertical_compose_fidelityiL0(IDWTELEM *dst, IDWTELEM *b[8], int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        dst[i] = COMPOSE_FIDELITYiL0(b[0][i], b[1][i], b[2][i], b[3][i], dst[i], b[4][i], b[5][i], b[6][i], b[7][i]);
+    }
+}
+
+static void vertical_compose_daub97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DAUB97iH0(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void vertical_compose_daub97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DAUB97iH1(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void vertical_compose_daub97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DAUB97iL0(b0[i], b1[i], b2[i]);
+    }
+}
+
+static void vertical_compose_daub97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width)
+{
+    int i;
+
+    for(i=0; i<width; i++){
+        b1[i] = COMPOSE_DAUB97iL1(b0[i], b1[i], b2[i]);
+    }
+}
+
+
+static void spatial_compose_dd97i_dy(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_5tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    DWTCompose *cs = d->cs + level;
+
+    int i, y = cs->y;
+    IDWTELEM *b[8];
+    for (i = 0; i < 6; i++)
+        b[i] = cs->b[i];
+    b[6] = d->buffer + av_clip(y+5, 0, height-2)*stride;
+    b[7] = d->buffer + av_clip(y+6, 1, height-1)*stride;
+
+        if(y+5<(unsigned)height) vertical_compose_l0(      b[5], b[6], b[7],       width);
+        if(y+1<(unsigned)height) vertical_compose_h0(b[0], b[2], b[3], b[4], b[6], width);
+
+        if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
+        if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
+
+    for (i = 0; i < 6; i++)
+        cs->b[i] = b[i+2];
+    cs->y += 2;
+}
+
+static void spatial_compose_dirac53i_dy(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_3tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    DWTCompose *cs = d->cs + level;
+
+    int y= cs->y;
+    IDWTELEM *b[4] = { cs->b[0], cs->b[1] };
+    b[2] = d->buffer + mirror(y+1, height-1)*stride;
+    b[3] = d->buffer + mirror(y+2, height-1)*stride;
+
+        if(y+1<(unsigned)height) vertical_compose_l0(b[1], b[2], b[3], width);
+        if(y+0<(unsigned)height) vertical_compose_h0(b[0], b[1], b[2], width);
+
+        if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
+        if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
+
+    cs->b[0] = b[2];
+    cs->b[1] = b[3];
+    cs->y += 2;
+}
+
+
+static void spatial_compose_dd137i_dy(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_5tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_5tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    DWTCompose *cs = d->cs + level;
+
+    int i, y = cs->y;
+    IDWTELEM *b[10];
+    for (i = 0; i < 8; i++)
+        b[i] = cs->b[i];
+    b[8] = d->buffer + av_clip(y+7, 0, height-2)*stride;
+    b[9] = d->buffer + av_clip(y+8, 1, height-1)*stride;
+
+        if(y+5<(unsigned)height) vertical_compose_l0(b[3], b[5], b[6], b[7], b[9], width);
+        if(y+1<(unsigned)height) vertical_compose_h0(b[0], b[2], b[3], b[4], b[6], width);
+
+        if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
+        if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
+
+    for (i = 0; i < 8; i++)
+        cs->b[i] = b[i+2];
+    cs->y += 2;
+}
+
+// haar makes the assumption that height is even (always true for dirac)
+static void spatial_compose_haari_dy(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_2tap vertical_compose = (void*)d->vertical_compose;
+    int y = d->cs[level].y;
+    IDWTELEM *b0 = d->buffer + (y-1)*stride;
+    IDWTELEM *b1 = d->buffer + (y  )*stride;
+
+    vertical_compose(b0, b1, width);
+    d->horizontal_compose(b0, d->temp, width);
+    d->horizontal_compose(b1, d->temp, width);
+
+    d->cs[level].y += 2;
+}
+
+// Don't do sliced idwt for fidelity; the 9 tap filter makes it a bit annoying
+// Fortunately, this filter isn't used in practice.
+static void spatial_compose_fidelity(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_9tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_9tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    int i, y;
+    IDWTELEM *b[8];
+
+    for (y = 1; y < height; y += 2) {
+        for (i = 0; i < 8; i++)
+            b[i] = d->buffer + av_clip((y-7 + 2*i), 0, height-2)*stride;
+        vertical_compose_h0(d->buffer + y*stride, b, width);
+    }
+
+    for (y = 0; y < height; y += 2) {
+        for (i = 0; i < 8; i++)
+            b[i] = d->buffer + av_clip((y-7 + 2*i), 1, height-1)*stride;
+        vertical_compose_l0(d->buffer + y*stride, b, width);
+    }
+
+    for (y = 0; y < height; y++)
+        d->horizontal_compose(d->buffer + y*stride, d->temp, width);
+
+    d->cs[level].y = height+1;
+}
+
+static void spatial_compose_daub97i_dy(DWTContext *d, int level, int width, int height, int stride)
+{
+    vertical_compose_3tap vertical_compose_l0 = (void*)d->vertical_compose_l0;
+    vertical_compose_3tap vertical_compose_h0 = (void*)d->vertical_compose_h0;
+    vertical_compose_3tap vertical_compose_l1 = (void*)d->vertical_compose_l1;
+    vertical_compose_3tap vertical_compose_h1 = (void*)d->vertical_compose_h1;
+    DWTCompose *cs = d->cs + level;
+
+    int i, y = cs->y;
+    IDWTELEM *b[6];
+    for (i = 0; i < 4; i++)
+        b[i] = cs->b[i];
+    b[4] = d->buffer + mirror(y+3, height-1)*stride;
+    b[5] = d->buffer + mirror(y+4, height-1)*stride;
+
+        if(y+3<(unsigned)height) vertical_compose_l1(b[3], b[4], b[5], width);
+        if(y+2<(unsigned)height) vertical_compose_h1(b[2], b[3], b[4], width);
+        if(y+1<(unsigned)height) vertical_compose_l0(b[1], b[2], b[3], width);
+        if(y+0<(unsigned)height) vertical_compose_h0(b[0], b[1], b[2], width);
+
+        if(y-1<(unsigned)height) d->horizontal_compose(b[0], d->temp, width);
+        if(y+0<(unsigned)height) d->horizontal_compose(b[1], d->temp, width);
+
+    for (i = 0; i < 4; i++)
+        cs->b[i] = b[i+2];
+    cs->y += 2;
+}
+
+
+static void spatial_compose97i_init2(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
+{
+    cs->b[0] = buffer + mirror(-3-1, height-1)*stride;
+    cs->b[1] = buffer + mirror(-3  , height-1)*stride;
+    cs->b[2] = buffer + mirror(-3+1, height-1)*stride;
+    cs->b[3] = buffer + mirror(-3+2, height-1)*stride;
+    cs->y = -3;
+}
+
+static void spatial_compose53i_init2(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
+{
+    cs->b[0] = buffer + mirror(-1-1, height-1)*stride;
+    cs->b[1] = buffer + mirror(-1  , height-1)*stride;
+    cs->y = -1;
+}
+
+static void spatial_compose_dd97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
+{
+    cs->b[0] = buffer + av_clip(-5-1, 0, height-2)*stride;
+    cs->b[1] = buffer + av_clip(-5  , 1, height-1)*stride;
+    cs->b[2] = buffer + av_clip(-5+1, 0, height-2)*stride;
+    cs->b[3] = buffer + av_clip(-5+2, 1, height-1)*stride;
+    cs->b[4] = buffer + av_clip(-5+3, 0, height-2)*stride;
+    cs->b[5] = buffer + av_clip(-5+4, 1, height-1)*stride;
+    cs->y = -5;
+}
+
+static void spatial_compose_dd137i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride)
+{
+    cs->b[0] = buffer + av_clip(-5-1, 0, height-2)*stride;
+    cs->b[1] = buffer + av_clip(-5  , 1, height-1)*stride;
+    cs->b[2] = buffer + av_clip(-5+1, 0, height-2)*stride;
+    cs->b[3] = buffer + av_clip(-5+2, 1, height-1)*stride;
+    cs->b[4] = buffer + av_clip(-5+3, 0, height-2)*stride;
+    cs->b[5] = buffer + av_clip(-5+4, 1, height-1)*stride;
+    cs->b[6] = buffer + av_clip(-5+5, 0, height-2)*stride;
+    cs->b[7] = buffer + av_clip(-5+6, 1, height-1)*stride;
+    cs->y = -5;
+}
+
+int ff_spatial_idwt_init2(DWTContext *d, IDWTELEM *buffer, int width, int height,
+                          int stride, enum dwt_type type, int decomposition_count,
+                          IDWTELEM *temp)
+{
+    int level;
+
+    d->buffer = buffer;
+    d->width = width;
+    d->height = height;
+    d->stride = stride;
+    d->decomposition_count = decomposition_count;
+    d->temp = temp + 8;
+
+    for(level=decomposition_count-1; level>=0; level--){
+        int hl = height >> level;
+        int stride_l = stride << level;
+
+        switch(type){
+        case DWT_DIRAC_DD9_7:
+            spatial_compose_dd97i_init(d->cs+level, buffer, hl, stride_l);
+            break;
+        case DWT_DIRAC_LEGALL5_3:
+            spatial_compose53i_init2(d->cs+level, buffer, hl, stride_l);
+            break;
+        case DWT_DIRAC_DD13_7:
+            spatial_compose_dd137i_init(d->cs+level, buffer, hl, stride_l);
+            break;
+        case DWT_DIRAC_HAAR0:
+        case DWT_DIRAC_HAAR1:
+            d->cs[level].y = 1;
+            break;
+        case DWT_DIRAC_DAUB9_7:
+            spatial_compose97i_init2(d->cs+level, buffer, hl, stride_l);
+            break;
+        default:
+            d->cs[level].y = 0;
+            break;
+        }
+    }
+
+    switch (type) {
+    case DWT_DIRAC_DD9_7:
+        d->spatial_compose = spatial_compose_dd97i_dy;
+        d->vertical_compose_l0 = (void*)vertical_compose53iL0;
+        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0;
+        d->horizontal_compose = horizontal_compose_dd97i;
+        d->support = 7;
+        break;
+    case DWT_DIRAC_LEGALL5_3:
+        d->spatial_compose = spatial_compose_dirac53i_dy;
+        d->vertical_compose_l0 = (void*)vertical_compose53iL0;
+        d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0;
+        d->horizontal_compose = horizontal_compose_dirac53i;
+        d->support = 3;
+        break;
+    case DWT_DIRAC_DD13_7:
+        d->spatial_compose = spatial_compose_dd137i_dy;
+        d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0;
+        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0;
+        d->horizontal_compose = horizontal_compose_dd137i;
+        d->support = 7;
+        break;
+    case DWT_DIRAC_HAAR0:
+    case DWT_DIRAC_HAAR1:
+        d->spatial_compose = spatial_compose_haari_dy;
+        d->vertical_compose = (void*)vertical_compose_haar;
+        if (type == DWT_DIRAC_HAAR0)
+            d->horizontal_compose = horizontal_compose_haar0i;
+        else
+            d->horizontal_compose = horizontal_compose_haar1i;
+        d->support = 1;
+        break;
+    case DWT_DIRAC_FIDELITY:
+        d->spatial_compose = spatial_compose_fidelity;
+        d->vertical_compose_l0 = (void*)vertical_compose_fidelityiL0;
+        d->vertical_compose_h0 = (void*)vertical_compose_fidelityiH0;
+        d->horizontal_compose = horizontal_compose_fidelityi;
+        d->support = 0; // not really used
+        break;
+    case DWT_DIRAC_DAUB9_7:
+        d->spatial_compose = spatial_compose_daub97i_dy;
+        d->vertical_compose_l0 = (void*)vertical_compose_daub97iL0;
+        d->vertical_compose_h0 = (void*)vertical_compose_daub97iH0;
+        d->vertical_compose_l1 = (void*)vertical_compose_daub97iL1;
+        d->vertical_compose_h1 = (void*)vertical_compose_daub97iH1;
+        d->horizontal_compose = horizontal_compose_daub97i;
+        d->support = 5;
+        break;
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Unknown wavelet type %d\n", type);
+        return -1;
+    }
+
+    if (HAVE_MMX) ff_spatial_idwt_init_mmx(d, type);
+
+    return 0;
+}
+
+void ff_spatial_idwt_slice2(DWTContext *d, int y)
+{
+    int level, support = d->support;
+
+    for (level = d->decomposition_count-1; level >= 0; level--) {
+        int wl = d->width  >> level;
+        int hl = d->height >> level;
+        int stride_l = d->stride << level;
+
+        while (d->cs[level].y <= FFMIN((y>>level)+support, hl))
+            d->spatial_compose(d, level, wl, hl, stride_l);
+    }
+}
+
diff --git a/libavcodec/dirac_dwt.h b/libavcodec/dirac_dwt.h
new file mode 100644
index 0000000..e5e447b
--- /dev/null
+++ b/libavcodec/dirac_dwt.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DIRAC_DWT_H
+#define AVCODEC_DIRAC_DWT_H
+
+#include <stdint.h>
+
+typedef int DWTELEM;
+typedef short IDWTELEM;
+
+#define MAX_DWT_SUPPORT 8
+#define MAX_DECOMPOSITIONS 8
+
+typedef struct DWTCompose {
+    IDWTELEM *b[MAX_DWT_SUPPORT];
+    int y;
+} DWTCompose;
+
+struct DWTContext;
+
+// Possible prototypes for vertical_compose functions
+typedef void (*vertical_compose_2tap)(IDWTELEM *b0, IDWTELEM *b1, int width);
+typedef void (*vertical_compose_3tap)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width);
+typedef void (*vertical_compose_5tap)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width);
+typedef void (*vertical_compose_9tap)(IDWTELEM *dst, IDWTELEM *b[8], int width);
+
+typedef struct DWTContext {
+    IDWTELEM *buffer;
+    IDWTELEM *temp;
+    int width;
+    int height;
+    int stride;
+    int decomposition_count;
+    int support;
+
+    void (*spatial_compose)(struct DWTContext *cs, int level, int width, int height, int stride);
+    void (*vertical_compose_l0)(void);
+    void (*vertical_compose_h0)(void);
+    void (*vertical_compose_l1)(void);
+    void (*vertical_compose_h1)(void);
+    void (*vertical_compose)(void);     ///< one set of lowpass and highpass combined
+    void (*horizontal_compose)(IDWTELEM *b, IDWTELEM *tmp, int width);
+
+    DWTCompose cs[MAX_DECOMPOSITIONS];
+} DWTContext;
+
+enum dwt_type {
+    DWT_SNOW_DAUB9_7,
+    DWT_SNOW_LEGALL5_3,
+    DWT_DIRAC_DD9_7,
+    DWT_DIRAC_LEGALL5_3,
+    DWT_DIRAC_DD13_7,
+    DWT_DIRAC_HAAR0,
+    DWT_DIRAC_HAAR1,
+    DWT_DIRAC_FIDELITY,
+    DWT_DIRAC_DAUB9_7,
+    DWT_NUM_TYPES
+};
+
+// -1 if an error occurred, e.g. the dwt_type isn't recognized
+int ff_spatial_idwt_init2(DWTContext *d, IDWTELEM *buffer, int width, int height,
+                          int stride, enum dwt_type type, int decomposition_count,
+                          IDWTELEM *temp);
+
+void ff_spatial_idwt_slice2(DWTContext *d, int y);
+
+// shared stuff for simd optimizations
+#define COMPOSE_53iL0(b0, b1, b2)\
+    (b1 - ((b0 + b2 + 2) >> 2))
+
+#define COMPOSE_DIRAC53iH0(b0, b1, b2)\
+    (b1 + ((b0 + b2 + 1) >> 1))
+
+#define COMPOSE_DD97iH0(b0, b1, b2, b3, b4)\
+    (b2 + ((-b0 + 9*b1 + 9*b3 - b4 + 8) >> 4))
+
+#define COMPOSE_DD137iL0(b0, b1, b2, b3, b4)\
+    (b2 - ((-b0 + 9*b1 + 9*b3 - b4 + 16) >> 5))
+
+#define COMPOSE_HAARiL0(b0, b1)\
+    (b0 - ((b1 + 1) >> 1))
+
+#define COMPOSE_HAARiH0(b0, b1)\
+    (b0 + b1)
+
+#define COMPOSE_FIDELITYiL0(b0, b1, b2, b3, b4, b5, b6, b7, b8)\
+    (b4 - ((-8*(b0+b8) + 21*(b1+b7) - 46*(b2+b6) + 161*(b3+b5) + 128) >> 8))
+
+#define COMPOSE_FIDELITYiH0(b0, b1, b2, b3, b4, b5, b6, b7, b8)\
+    (b4 + ((-2*(b0+b8) + 10*(b1+b7) - 25*(b2+b6) + 81*(b3+b5) + 128) >> 8))
+
+#define COMPOSE_DAUB97iL1(b0, b1, b2)\
+    (b1 - ((1817*(b0 + b2) + 2048) >> 12))
+
+#define COMPOSE_DAUB97iH1(b0, b1, b2)\
+    (b1 - (( 113*(b0 + b2) + 64) >> 7))
+
+#define COMPOSE_DAUB97iL0(b0, b1, b2)\
+    (b1 + (( 217*(b0 + b2) + 2048) >> 12))
+
+#define COMPOSE_DAUB97iH0(b0, b1, b2)\
+    (b1 + ((6497*(b0 + b2) + 2048) >> 12))
+
+
+#endif /* AVCODEC_DWT_H */
diff --git a/libavcodec/dirac_parser.c b/libavcodec/dirac_parser.c
index 46f5430..a2a22ee 100644
--- a/libavcodec/dirac_parser.c
+++ b/libavcodec/dirac_parser.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2007-2008 Marco Gerards <marco@gnu.org>
  * Copyright (c) 2008 BBC, Anuradha Suraparaju <asuraparaju@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -161,7 +161,9 @@ static int dirac_combine_frame(AVCodecParserContext *s, AVCodecContext *avctx,
          * we can be pretty sure that we have a valid parse unit */
         if (!unpack_parse_unit(&pu1, pc, pc->index - 13)                     ||
             !unpack_parse_unit(&pu, pc, pc->index - 13 - pu1.prev_pu_offset) ||
-            pu.next_pu_offset != pu1.prev_pu_offset) {
+            pu.next_pu_offset != pu1.prev_pu_offset                          ||
+            pc->index < pc->dirac_unit_size + 13LL + pu1.prev_pu_offset
+        ) {
             pc->index -= 9;
             *buf_size = next-9;
             pc->header_bytes_needed = 9;
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
new file mode 100644
index 0000000..f673397
--- /dev/null
+++ b/libavcodec/diracdec.c
@@ -0,0 +1,2001 @@
+/*
+ * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
+ * Copyright (C) 2009 David Conrad
+ * Copyright (C) 2011 Jordi Ortiz
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Dirac Decoder
+ * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
+ */
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "bytestream.h"
+#include "internal.h"
+#include "golomb.h"
+#include "dirac_arith.h"
+#include "mpeg12data.h"
+#include "libavcodec/mpegvideo.h"
+#include "mpegvideoencdsp.h"
+#include "dirac_dwt.h"
+#include "dirac.h"
+#include "diracdsp.h"
+#include "videodsp.h"
+
+/**
+ * The spec limits the number of wavelet decompositions to 4 for both
+ * level 1 (VC-2) and 128 (long-gop default).
+ * 5 decompositions is the maximum before >16-bit buffers are needed.
+ * Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
+ * the others to 4 decompositions (or 3 for the fidelity filter).
+ *
+ * We use this instead of MAX_DECOMPOSITIONS to save some memory.
+ */
+#define MAX_DWT_LEVELS 5
+
+/**
+ * The spec limits this to 3 for frame coding, but in practice can be as high as 6
+ */
+#define MAX_REFERENCE_FRAMES 8
+#define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
+#define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
+#define MAX_QUANT 68        /* max quant for VC-2 */
+#define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
+
+/**
+ * DiracBlock->ref flags, if set then the block does MC from the given ref
+ */
+#define DIRAC_REF_MASK_REF1   1
+#define DIRAC_REF_MASK_REF2   2
+#define DIRAC_REF_MASK_GLOBAL 4
+
+/**
+ * Value of Picture.reference when Picture is not a reference picture, but
+ * is held for delayed output.
+ */
+#define DELAYED_PIC_REF 4
+
+#define CALC_PADDING(size, depth)                       \
+    (((size + (1 << depth) - 1) >> depth) << depth)
+
+#define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
+
+typedef struct {
+    AVFrame *avframe;
+    int interpolated[3];    /* 1 if hpel[] is valid */
+    uint8_t *hpel[3][4];
+    uint8_t *hpel_base[3][4];
+} DiracFrame;
+
+typedef struct {
+    union {
+        int16_t mv[2][2];
+        int16_t dc[3];
+    } u; /* anonymous unions aren't in C99 :( */
+    uint8_t ref;
+} DiracBlock;
+
+typedef struct SubBand {
+    int level;
+    int orientation;
+    int stride;
+    int width;
+    int height;
+    int quant;
+    IDWTELEM *ibuf;
+    struct SubBand *parent;
+
+    /* for low delay */
+    unsigned length;
+    const uint8_t *coeff_data;
+} SubBand;
+
+typedef struct Plane {
+    int width;
+    int height;
+    ptrdiff_t stride;
+
+    int idwt_width;
+    int idwt_height;
+    int idwt_stride;
+    IDWTELEM *idwt_buf;
+    IDWTELEM *idwt_buf_base;
+    IDWTELEM *idwt_tmp;
+
+    /* block length */
+    uint8_t xblen;
+    uint8_t yblen;
+    /* block separation (block n+1 starts after this many pixels in block n) */
+    uint8_t xbsep;
+    uint8_t ybsep;
+    /* amount of overspill on each edge (half of the overlap between blocks) */
+    uint8_t xoffset;
+    uint8_t yoffset;
+
+    SubBand band[MAX_DWT_LEVELS][4];
+} Plane;
+
+typedef struct DiracContext {
+    AVCodecContext *avctx;
+    MpegvideoEncDSPContext mpvencdsp;
+    VideoDSPContext vdsp;
+    DiracDSPContext diracdsp;
+    GetBitContext gb;
+    dirac_source_params source;
+    int seen_sequence_header;
+    int frame_number;           /* number of the next frame to display       */
+    Plane plane[3];
+    int chroma_x_shift;
+    int chroma_y_shift;
+
+    int zero_res;               /* zero residue flag                         */
+    int is_arith;               /* whether coeffs use arith or golomb coding */
+    int low_delay;              /* use the low delay syntax                  */
+    int globalmc_flag;          /* use global motion compensation            */
+    int num_refs;               /* number of reference pictures              */
+
+    /* wavelet decoding */
+    unsigned wavelet_depth;     /* depth of the IDWT                         */
+    unsigned wavelet_idx;
+
+    /**
+     * schroedinger older than 1.0.8 doesn't store
+     * quant delta if only one codebook exists in a band
+     */
+    unsigned old_delta_quant;
+    unsigned codeblock_mode;
+
+    struct {
+        unsigned width;
+        unsigned height;
+    } codeblock[MAX_DWT_LEVELS+1];
+
+    struct {
+        unsigned num_x;         /* number of horizontal slices               */
+        unsigned num_y;         /* number of vertical slices                 */
+        AVRational bytes;       /* average bytes per slice                   */
+        uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
+    } lowdelay;
+
+    struct {
+        int pan_tilt[2];        /* pan/tilt vector                           */
+        int zrs[2][2];          /* zoom/rotate/shear matrix                  */
+        int perspective[2];     /* perspective vector                        */
+        unsigned zrs_exp;
+        unsigned perspective_exp;
+    } globalmc[2];
+
+    /* motion compensation */
+    uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
+    int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
+    unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
+
+    int blwidth;                /* number of blocks (horizontally)           */
+    int blheight;               /* number of blocks (vertically)             */
+    int sbwidth;                /* number of superblocks (horizontally)      */
+    int sbheight;               /* number of superblocks (vertically)        */
+
+    uint8_t *sbsplit;
+    DiracBlock *blmotion;
+
+    uint8_t *edge_emu_buffer[4];
+    uint8_t *edge_emu_buffer_base;
+
+    uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
+    uint8_t *mcscratch;
+    int buffer_stride;
+
+    DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
+
+    void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+    void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+    void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+    dirac_weight_func weight_func;
+    dirac_biweight_func biweight_func;
+
+    DiracFrame *current_picture;
+    DiracFrame *ref_pics[2];
+
+    DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
+    DiracFrame *delay_frames[MAX_DELAY+1];
+    DiracFrame all_frames[MAX_FRAMES];
+} DiracContext;
+
+/**
+ * Dirac Specification ->
+ * Parse code values. 9.6.1 Table 9.1
+ */
+enum dirac_parse_code {
+    pc_seq_header         = 0x00,
+    pc_eos                = 0x10,
+    pc_aux_data           = 0x20,
+    pc_padding            = 0x30,
+};
+
+enum dirac_subband {
+    subband_ll = 0,
+    subband_hl = 1,
+    subband_lh = 2,
+    subband_hh = 3
+};
+
+static const uint8_t default_qmat[][4][4] = {
+    { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
+    { { 4,  2,  2,  0}, { 0,  4,  4,  2}, { 0,  5,  5,  3}, { 0,  7,  7,  5} },
+    { { 5,  3,  3,  0}, { 0,  4,  4,  1}, { 0,  5,  5,  2}, { 0,  6,  6,  3} },
+    { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
+    { { 8,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0}, { 0,  4,  4,  0} },
+    { { 0,  4,  4,  8}, { 0,  8,  8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
+    { { 3,  1,  1,  0}, { 0,  4,  4,  2}, { 0,  6,  6,  5}, { 0,  9,  9,  7} },
+};
+
+static const int qscale_tab[MAX_QUANT+1] = {
+    4,     5,     6,     7,     8,    10,    11,    13,
+    16,    19,    23,    27,    32,    38,    45,    54,
+    64,    76,    91,   108,   128,   152,   181,   215,
+    256,   304,   362,   431,   512,   609,   724,   861,
+    1024,  1218,  1448,  1722,  2048,  2435,  2896,  3444,
+    4096,  4871,  5793,  6889,  8192,  9742, 11585, 13777,
+    16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
+    65536, 77936
+};
+
+static const int qoffset_intra_tab[MAX_QUANT+1] = {
+    1,     2,     3,     4,     4,     5,     6,     7,
+    8,    10,    12,    14,    16,    19,    23,    27,
+    32,    38,    46,    54,    64,    76,    91,   108,
+    128,   152,   181,   216,   256,   305,   362,   431,
+    512,   609,   724,   861,  1024,  1218,  1448,  1722,
+    2048,  2436,  2897,  3445,  4096,  4871,  5793,  6889,
+    8192,  9742, 11585, 13777, 16384, 19484, 23171, 27555,
+    32768, 38968
+};
+
+static const int qoffset_inter_tab[MAX_QUANT+1] = {
+    1,     2,     2,     3,     3,     4,     4,     5,
+    6,     7,     9,    10,    12,    14,    17,    20,
+    24,    29,    34,    41,    48,    57,    68,    81,
+    96,   114,   136,   162,   192,   228,   272,   323,
+    384,   457,   543,   646,   768,   913,  1086,  1292,
+    1536,  1827,  2172,  2583,  3072,  3653,  4344,  5166,
+    6144,  7307,  8689, 10333, 12288, 14613, 17378, 20666,
+    24576, 29226
+};
+
+/* magic number division by 3 from schroedinger */
+static inline int divide3(int x)
+{
+    return ((x+1)*21845 + 10922) >> 16;
+}
+
+static DiracFrame *remove_frame(DiracFrame *framelist[], int picnum)
+{
+    DiracFrame *remove_pic = NULL;
+    int i, remove_idx = -1;
+
+    for (i = 0; framelist[i]; i++)
+        if (framelist[i]->avframe->display_picture_number == picnum) {
+            remove_pic = framelist[i];
+            remove_idx = i;
+        }
+
+    if (remove_pic)
+        for (i = remove_idx; framelist[i]; i++)
+            framelist[i] = framelist[i+1];
+
+    return remove_pic;
+}
+
+static int add_frame(DiracFrame *framelist[], int maxframes, DiracFrame *frame)
+{
+    int i;
+    for (i = 0; i < maxframes; i++)
+        if (!framelist[i]) {
+            framelist[i] = frame;
+            return 0;
+        }
+    return -1;
+}
+
+static int alloc_sequence_buffers(DiracContext *s)
+{
+    int sbwidth  = DIVRNDUP(s->source.width,  4);
+    int sbheight = DIVRNDUP(s->source.height, 4);
+    int i, w, h, top_padding;
+
+    /* todo: think more about this / use or set Plane here */
+    for (i = 0; i < 3; i++) {
+        int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
+        int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
+        w = s->source.width  >> (i ? s->chroma_x_shift : 0);
+        h = s->source.height >> (i ? s->chroma_y_shift : 0);
+
+        /* we allocate the max we support here since num decompositions can
+         * change from frame to frame. Stride is aligned to 16 for SIMD, and
+         * 1<<MAX_DWT_LEVELS top padding to avoid if(y>0) in arith decoding
+         * MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
+         * on each side */
+        top_padding = FFMAX(1<<MAX_DWT_LEVELS, max_yblen/2);
+        w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
+        h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
+
+        s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
+        s->plane[i].idwt_tmp      = av_malloc_array((w+16), sizeof(IDWTELEM));
+        s->plane[i].idwt_buf      = s->plane[i].idwt_buf_base + top_padding*w;
+        if (!s->plane[i].idwt_buf_base || !s->plane[i].idwt_tmp)
+            return AVERROR(ENOMEM);
+    }
+
+    /* fixme: allocate using real stride here */
+    s->sbsplit  = av_malloc_array(sbwidth, sbheight);
+    s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
+
+    if (!s->sbsplit || !s->blmotion)
+        return AVERROR(ENOMEM);
+    return 0;
+}
+
+static int alloc_buffers(DiracContext *s, int stride)
+{
+    int w = s->source.width;
+    int h = s->source.height;
+
+    av_assert0(stride >= w);
+    stride += 64;
+
+    if (s->buffer_stride >= stride)
+        return 0;
+    s->buffer_stride = 0;
+
+    av_freep(&s->edge_emu_buffer_base);
+    memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
+    av_freep(&s->mctmp);
+    av_freep(&s->mcscratch);
+
+    s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
+
+    s->mctmp     = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
+    s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
+
+    if (!s->edge_emu_buffer_base || !s->mctmp || !s->mcscratch)
+        return AVERROR(ENOMEM);
+
+    s->buffer_stride = stride;
+    return 0;
+}
+
+static void free_sequence_buffers(DiracContext *s)
+{
+    int i, j, k;
+
+    for (i = 0; i < MAX_FRAMES; i++) {
+        if (s->all_frames[i].avframe->data[0]) {
+            av_frame_unref(s->all_frames[i].avframe);
+            memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
+        }
+
+        for (j = 0; j < 3; j++)
+            for (k = 1; k < 4; k++)
+                av_freep(&s->all_frames[i].hpel_base[j][k]);
+    }
+
+    memset(s->ref_frames, 0, sizeof(s->ref_frames));
+    memset(s->delay_frames, 0, sizeof(s->delay_frames));
+
+    for (i = 0; i < 3; i++) {
+        av_freep(&s->plane[i].idwt_buf_base);
+        av_freep(&s->plane[i].idwt_tmp);
+    }
+
+    s->buffer_stride = 0;
+    av_freep(&s->sbsplit);
+    av_freep(&s->blmotion);
+    av_freep(&s->edge_emu_buffer_base);
+
+    av_freep(&s->mctmp);
+    av_freep(&s->mcscratch);
+}
+
+static av_cold int dirac_decode_init(AVCodecContext *avctx)
+{
+    DiracContext *s = avctx->priv_data;
+    int i;
+
+    s->avctx = avctx;
+    s->frame_number = -1;
+
+    ff_diracdsp_init(&s->diracdsp);
+    ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
+    ff_videodsp_init(&s->vdsp, 8);
+
+    for (i = 0; i < MAX_FRAMES; i++) {
+        s->all_frames[i].avframe = av_frame_alloc();
+        if (!s->all_frames[i].avframe) {
+            while (i > 0)
+                av_frame_free(&s->all_frames[--i].avframe);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
+static void dirac_decode_flush(AVCodecContext *avctx)
+{
+    DiracContext *s = avctx->priv_data;
+    free_sequence_buffers(s);
+    s->seen_sequence_header = 0;
+    s->frame_number = -1;
+}
+
+static av_cold int dirac_decode_end(AVCodecContext *avctx)
+{
+    DiracContext *s = avctx->priv_data;
+    int i;
+
+    dirac_decode_flush(avctx);
+    for (i = 0; i < MAX_FRAMES; i++)
+        av_frame_free(&s->all_frames[i].avframe);
+
+    return 0;
+}
+
+#define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
+
+static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
+                                      SubBand *b, IDWTELEM *buf, int x, int y)
+{
+    int coeff, sign;
+    int sign_pred = 0;
+    int pred_ctx = CTX_ZPZN_F1;
+
+    /* Check if the parent subband has a 0 in the corresponding position */
+    if (b->parent)
+        pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
+
+    if (b->orientation == subband_hl)
+        sign_pred = buf[-b->stride];
+
+    /* Determine if the pixel has only zeros in its neighbourhood */
+    if (x) {
+        pred_ctx += !(buf[-1] | buf[-b->stride] | buf[-1-b->stride]);
+        if (b->orientation == subband_lh)
+            sign_pred = buf[-1];
+    } else {
+        pred_ctx += !buf[-b->stride];
+    }
+
+    coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
+    if (coeff) {
+        coeff = (coeff * qfactor + qoffset + 2) >> 2;
+        sign  = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
+        coeff = (coeff ^ -sign) + sign;
+    }
+    *buf = coeff;
+}
+
+static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
+{
+    int sign, coeff;
+
+    coeff = svq3_get_ue_golomb(gb);
+    if (coeff) {
+        coeff = (coeff * qfactor + qoffset + 2) >> 2;
+        sign  = get_bits1(gb);
+        coeff = (coeff ^ -sign) + sign;
+    }
+    return coeff;
+}
+
+/**
+ * Decode the coeffs in the rectangle defined by left, right, top, bottom
+ * [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
+ */
+static inline void codeblock(DiracContext *s, SubBand *b,
+                             GetBitContext *gb, DiracArith *c,
+                             int left, int right, int top, int bottom,
+                             int blockcnt_one, int is_arith)
+{
+    int x, y, zero_block;
+    int qoffset, qfactor;
+    IDWTELEM *buf;
+
+    /* check for any coded coefficients in this codeblock */
+    if (!blockcnt_one) {
+        if (is_arith)
+            zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
+        else
+            zero_block = get_bits1(gb);
+
+        if (zero_block)
+            return;
+    }
+
+    if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
+        int quant = b->quant;
+        if (is_arith)
+            quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
+        else
+            quant += dirac_get_se_golomb(gb);
+        if (quant < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
+            return;
+        }
+        b->quant = quant;
+    }
+
+    b->quant = FFMIN(b->quant, MAX_QUANT);
+
+    qfactor = qscale_tab[b->quant];
+    /* TODO: context pointer? */
+    if (!s->num_refs)
+        qoffset = qoffset_intra_tab[b->quant];
+    else
+        qoffset = qoffset_inter_tab[b->quant];
+
+    buf = b->ibuf + top * b->stride;
+    for (y = top; y < bottom; y++) {
+        for (x = left; x < right; x++) {
+            /* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
+            if (is_arith)
+                coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
+            else
+                buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
+        }
+        buf += b->stride;
+    }
+}
+
+/**
+ * Dirac Specification ->
+ * 13.3 intra_dc_prediction(band)
+ */
+static inline void intra_dc_prediction(SubBand *b)
+{
+    IDWTELEM *buf = b->ibuf;
+    int x, y;
+
+    for (x = 1; x < b->width; x++)
+        buf[x] += buf[x-1];
+    buf += b->stride;
+
+    for (y = 1; y < b->height; y++) {
+        buf[0] += buf[-b->stride];
+
+        for (x = 1; x < b->width; x++) {
+            int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
+            buf[x]  += divide3(pred);
+        }
+        buf += b->stride;
+    }
+}
+
+/**
+ * Dirac Specification ->
+ * 13.4.2 Non-skipped subbands.  subband_coeffs()
+ */
+static av_always_inline void decode_subband_internal(DiracContext *s, SubBand *b, int is_arith)
+{
+    int cb_x, cb_y, left, right, top, bottom;
+    DiracArith c;
+    GetBitContext gb;
+    int cb_width  = s->codeblock[b->level + (b->orientation != subband_ll)].width;
+    int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
+    int blockcnt_one = (cb_width + cb_height) == 2;
+
+    if (!b->length)
+        return;
+
+    init_get_bits8(&gb, b->coeff_data, b->length);
+
+    if (is_arith)
+        ff_dirac_init_arith_decoder(&c, &gb, b->length);
+
+    top = 0;
+    for (cb_y = 0; cb_y < cb_height; cb_y++) {
+        bottom = (b->height * (cb_y+1)) / cb_height;
+        left = 0;
+        for (cb_x = 0; cb_x < cb_width; cb_x++) {
+            right = (b->width * (cb_x+1)) / cb_width;
+            codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
+            left = right;
+        }
+        top = bottom;
+    }
+
+    if (b->orientation == subband_ll && s->num_refs == 0)
+        intra_dc_prediction(b);
+}
+
+static int decode_subband_arith(AVCodecContext *avctx, void *b)
+{
+    DiracContext *s = avctx->priv_data;
+    decode_subband_internal(s, b, 1);
+    return 0;
+}
+
+static int decode_subband_golomb(AVCodecContext *avctx, void *arg)
+{
+    DiracContext *s = avctx->priv_data;
+    SubBand **b     = arg;
+    decode_subband_internal(s, *b, 0);
+    return 0;
+}
+
+/**
+ * Dirac Specification ->
+ * [DIRAC_STD] 13.4.1 core_transform_data()
+ */
+static void decode_component(DiracContext *s, int comp)
+{
+    AVCodecContext *avctx = s->avctx;
+    SubBand *bands[3*MAX_DWT_LEVELS+1];
+    enum dirac_subband orientation;
+    int level, num_bands = 0;
+
+    /* Unpack all subbands at all levels. */
+    for (level = 0; level < s->wavelet_depth; level++) {
+        for (orientation = !!level; orientation < 4; orientation++) {
+            SubBand *b = &s->plane[comp].band[level][orientation];
+            bands[num_bands++] = b;
+
+            align_get_bits(&s->gb);
+            /* [DIRAC_STD] 13.4.2 subband() */
+            b->length = svq3_get_ue_golomb(&s->gb);
+            if (b->length) {
+                b->quant = svq3_get_ue_golomb(&s->gb);
+                align_get_bits(&s->gb);
+                b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
+                b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
+                skip_bits_long(&s->gb, b->length*8);
+            }
+        }
+        /* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
+        if (s->is_arith)
+            avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
+                           NULL, 4-!!level, sizeof(SubBand));
+    }
+    /* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
+    if (!s->is_arith)
+        avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
+}
+
+/* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
+/* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
+static void lowdelay_subband(DiracContext *s, GetBitContext *gb, int quant,
+                             int slice_x, int slice_y, int bits_end,
+                             SubBand *b1, SubBand *b2)
+{
+    int left   = b1->width  * slice_x    / s->lowdelay.num_x;
+    int right  = b1->width  *(slice_x+1) / s->lowdelay.num_x;
+    int top    = b1->height * slice_y    / s->lowdelay.num_y;
+    int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
+
+    int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
+    int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
+
+    IDWTELEM *buf1 =      b1->ibuf + top * b1->stride;
+    IDWTELEM *buf2 = b2 ? b2->ibuf + top * b2->stride : NULL;
+    int x, y;
+    /* we have to constantly check for overread since the spec explicitly
+       requires this, with the meaning that all remaining coeffs are set to 0 */
+    if (get_bits_count(gb) >= bits_end)
+        return;
+
+    for (y = top; y < bottom; y++) {
+        for (x = left; x < right; x++) {
+            buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
+            if (get_bits_count(gb) >= bits_end)
+                return;
+            if (buf2) {
+                buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
+                if (get_bits_count(gb) >= bits_end)
+                    return;
+            }
+        }
+        buf1 += b1->stride;
+        if (buf2)
+            buf2 += b2->stride;
+    }
+}
+
+struct lowdelay_slice {
+    GetBitContext gb;
+    int slice_x;
+    int slice_y;
+    int bytes;
+};
+
+
+/**
+ * Dirac Specification ->
+ * 13.5.2 Slices. slice(sx,sy)
+ */
+static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
+{
+    DiracContext *s = avctx->priv_data;
+    struct lowdelay_slice *slice = arg;
+    GetBitContext *gb = &slice->gb;
+    enum dirac_subband orientation;
+    int level, quant, chroma_bits, chroma_end;
+
+    int quant_base  = get_bits(gb, 7); /*[DIRAC_STD] qindex */
+    int length_bits = av_log2(8 * slice->bytes)+1;
+    int luma_bits   = get_bits_long(gb, length_bits);
+    int luma_end    = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
+
+    /* [DIRAC_STD] 13.5.5.2 luma_slice_band */
+    for (level = 0; level < s->wavelet_depth; level++)
+        for (orientation = !!level; orientation < 4; orientation++) {
+            quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
+            lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
+                             &s->plane[0].band[level][orientation], NULL);
+        }
+
+    /* consume any unused bits from luma */
+    skip_bits_long(gb, get_bits_count(gb) - luma_end);
+
+    chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
+    chroma_end  = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
+    /* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
+    for (level = 0; level < s->wavelet_depth; level++)
+        for (orientation = !!level; orientation < 4; orientation++) {
+            quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
+            lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
+                             &s->plane[1].band[level][orientation],
+                             &s->plane[2].band[level][orientation]);
+        }
+
+    return 0;
+}
+
+/**
+ * Dirac Specification ->
+ * 13.5.1 low_delay_transform_data()
+ */
+static void decode_lowdelay(DiracContext *s)
+{
+    AVCodecContext *avctx = s->avctx;
+    int slice_x, slice_y, bytes, bufsize;
+    const uint8_t *buf;
+    struct lowdelay_slice *slices;
+    int slice_num = 0;
+
+    slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
+
+    align_get_bits(&s->gb);
+    /*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
+    buf = s->gb.buffer + get_bits_count(&s->gb)/8;
+    bufsize = get_bits_left(&s->gb);
+
+    for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
+        for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
+            bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
+                - slice_num    * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
+
+            slices[slice_num].bytes   = bytes;
+            slices[slice_num].slice_x = slice_x;
+            slices[slice_num].slice_y = slice_y;
+            init_get_bits(&slices[slice_num].gb, buf, bufsize);
+            slice_num++;
+
+            buf     += bytes;
+            bufsize -= bytes*8;
+        }
+
+    avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
+                   sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
+    intra_dc_prediction(&s->plane[0].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
+    intra_dc_prediction(&s->plane[1].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
+    intra_dc_prediction(&s->plane[2].band[0][0]);  /* [DIRAC_STD] 13.3 intra_dc_prediction() */
+    av_free(slices);
+}
+
+static void init_planes(DiracContext *s)
+{
+    int i, w, h, level, orientation;
+
+    for (i = 0; i < 3; i++) {
+        Plane *p = &s->plane[i];
+
+        p->width       = s->source.width  >> (i ? s->chroma_x_shift : 0);
+        p->height      = s->source.height >> (i ? s->chroma_y_shift : 0);
+        p->idwt_width  = w = CALC_PADDING(p->width , s->wavelet_depth);
+        p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
+        p->idwt_stride = FFALIGN(p->idwt_width, 8);
+
+        for (level = s->wavelet_depth-1; level >= 0; level--) {
+            w = w>>1;
+            h = h>>1;
+            for (orientation = !!level; orientation < 4; orientation++) {
+                SubBand *b = &p->band[level][orientation];
+
+                b->ibuf   = p->idwt_buf;
+                b->level  = level;
+                b->stride = p->idwt_stride << (s->wavelet_depth - level);
+                b->width  = w;
+                b->height = h;
+                b->orientation = orientation;
+
+                if (orientation & 1)
+                    b->ibuf += w;
+                if (orientation > 1)
+                    b->ibuf += b->stride>>1;
+
+                if (level)
+                    b->parent = &p->band[level-1][orientation];
+            }
+        }
+
+        if (i > 0) {
+            p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
+            p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
+            p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
+            p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
+        }
+
+        p->xoffset = (p->xblen - p->xbsep)/2;
+        p->yoffset = (p->yblen - p->ybsep)/2;
+    }
+}
+
+/**
+ * Unpack the motion compensation parameters
+ * Dirac Specification ->
+ * 11.2 Picture prediction data. picture_prediction()
+ */
+static int dirac_unpack_prediction_parameters(DiracContext *s)
+{
+    static const uint8_t default_blen[] = { 4, 12, 16, 24 };
+    static const uint8_t default_bsep[] = { 4,  8, 12, 16 };
+
+    GetBitContext *gb = &s->gb;
+    unsigned idx, ref;
+
+    align_get_bits(gb);
+    /* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
+    /* Luma and Chroma are equal. 11.2.3 */
+    idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
+
+    if (idx > 4) {
+        av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
+        return -1;
+    }
+
+    if (idx == 0) {
+        s->plane[0].xblen = svq3_get_ue_golomb(gb);
+        s->plane[0].yblen = svq3_get_ue_golomb(gb);
+        s->plane[0].xbsep = svq3_get_ue_golomb(gb);
+        s->plane[0].ybsep = svq3_get_ue_golomb(gb);
+    } else {
+        /*[DIRAC_STD] preset_block_params(index). Table 11.1 */
+        s->plane[0].xblen = default_blen[idx-1];
+        s->plane[0].yblen = default_blen[idx-1];
+        s->plane[0].xbsep = default_bsep[idx-1];
+        s->plane[0].ybsep = default_bsep[idx-1];
+    }
+    /*[DIRAC_STD] 11.2.4 motion_data_dimensions()
+      Calculated in function dirac_unpack_block_motion_data */
+
+    if (!s->plane[0].xbsep || !s->plane[0].ybsep || s->plane[0].xbsep < s->plane[0].xblen/2 || s->plane[0].ybsep < s->plane[0].yblen/2) {
+        av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
+        return -1;
+    }
+    if (s->plane[0].xbsep > s->plane[0].xblen || s->plane[0].ybsep > s->plane[0].yblen) {
+        av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
+        return -1;
+    }
+    if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
+        av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
+        return -1;
+    }
+
+    /*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
+      Read motion vector precision */
+    s->mv_precision = svq3_get_ue_golomb(gb);
+    if (s->mv_precision > 3) {
+        av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
+        return -1;
+    }
+
+    /*[DIRAC_STD] 11.2.6 Global motion. global_motion()
+      Read the global motion compensation parameters */
+    s->globalmc_flag = get_bits1(gb);
+    if (s->globalmc_flag) {
+        memset(s->globalmc, 0, sizeof(s->globalmc));
+        /* [DIRAC_STD] pan_tilt(gparams) */
+        for (ref = 0; ref < s->num_refs; ref++) {
+            if (get_bits1(gb)) {
+                s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
+                s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
+            }
+            /* [DIRAC_STD] zoom_rotate_shear(gparams)
+               zoom/rotation/shear parameters */
+            if (get_bits1(gb)) {
+                s->globalmc[ref].zrs_exp   = svq3_get_ue_golomb(gb);
+                s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
+                s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
+                s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
+                s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
+            } else {
+                s->globalmc[ref].zrs[0][0] = 1;
+                s->globalmc[ref].zrs[1][1] = 1;
+            }
+            /* [DIRAC_STD] perspective(gparams) */
+            if (get_bits1(gb)) {
+                s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
+                s->globalmc[ref].perspective[0]  = dirac_get_se_golomb(gb);
+                s->globalmc[ref].perspective[1]  = dirac_get_se_golomb(gb);
+            }
+        }
+    }
+
+    /*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
+      Picture prediction mode, not currently used. */
+    if (svq3_get_ue_golomb(gb)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
+        return -1;
+    }
+
+    /* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
+       just data read, weight calculation will be done later on. */
+    s->weight_log2denom = 1;
+    s->weight[0]        = 1;
+    s->weight[1]        = 1;
+
+    if (get_bits1(gb)) {
+        s->weight_log2denom = svq3_get_ue_golomb(gb);
+        s->weight[0] = dirac_get_se_golomb(gb);
+        if (s->num_refs == 2)
+            s->weight[1] = dirac_get_se_golomb(gb);
+    }
+    return 0;
+}
+
+/**
+ * Dirac Specification ->
+ * 11.3 Wavelet transform data. wavelet_transform()
+ */
+static int dirac_unpack_idwt_params(DiracContext *s)
+{
+    GetBitContext *gb = &s->gb;
+    int i, level;
+    unsigned tmp;
+
+#define CHECKEDREAD(dst, cond, errmsg) \
+    tmp = svq3_get_ue_golomb(gb); \
+    if (cond) { \
+        av_log(s->avctx, AV_LOG_ERROR, errmsg); \
+        return -1; \
+    }\
+    dst = tmp;
+
+    align_get_bits(gb);
+
+    s->zero_res = s->num_refs ? get_bits1(gb) : 0;
+    if (s->zero_res)
+        return 0;
+
+    /*[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() */
+    CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
+
+    CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS || tmp < 1, "invalid number of DWT decompositions\n")
+
+    if (!s->low_delay) {
+        /* Codeblock parameters (core syntax only) */
+        if (get_bits1(gb)) {
+            for (i = 0; i <= s->wavelet_depth; i++) {
+                CHECKEDREAD(s->codeblock[i].width , tmp < 1, "codeblock width invalid\n")
+                CHECKEDREAD(s->codeblock[i].height, tmp < 1, "codeblock height invalid\n")
+            }
+
+            CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
+        } else
+            for (i = 0; i <= s->wavelet_depth; i++)
+                s->codeblock[i].width = s->codeblock[i].height = 1;
+    } else {
+        /* Slice parameters + quantization matrix*/
+        /*[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() */
+        s->lowdelay.num_x     = svq3_get_ue_golomb(gb);
+        s->lowdelay.num_y     = svq3_get_ue_golomb(gb);
+        s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
+        s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
+
+        if (s->lowdelay.bytes.den <= 0) {
+            av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        /* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
+        if (get_bits1(gb)) {
+            av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
+            /* custom quantization matrix */
+            s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
+            for (level = 0; level < s->wavelet_depth; level++) {
+                s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
+                s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
+                s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
+            }
+        } else {
+            if (s->wavelet_depth > 4) {
+                av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
+                return AVERROR_INVALIDDATA;
+            }
+            /* default quantization matrix */
+            for (level = 0; level < s->wavelet_depth; level++)
+                for (i = 0; i < 4; i++) {
+                    s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
+                    /* haar with no shift differs for different depths */
+                    if (s->wavelet_idx == 3)
+                        s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
+                }
+        }
+    }
+    return 0;
+}
+
+static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
+{
+    static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
+
+    if (!(x|y))
+        return 0;
+    else if (!y)
+        return sbsplit[-1];
+    else if (!x)
+        return sbsplit[-stride];
+
+    return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
+}
+
+static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
+{
+    int pred;
+
+    if (!(x|y))
+        return 0;
+    else if (!y)
+        return block[-1].ref & refmask;
+    else if (!x)
+        return block[-stride].ref & refmask;
+
+    /* return the majority */
+    pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
+    return (pred >> 1) & refmask;
+}
+
+static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
+{
+    int i, n = 0;
+
+    memset(block->u.dc, 0, sizeof(block->u.dc));
+
+    if (x && !(block[-1].ref & 3)) {
+        for (i = 0; i < 3; i++)
+            block->u.dc[i] += block[-1].u.dc[i];
+        n++;
+    }
+
+    if (y && !(block[-stride].ref & 3)) {
+        for (i = 0; i < 3; i++)
+            block->u.dc[i] += block[-stride].u.dc[i];
+        n++;
+    }
+
+    if (x && y && !(block[-1-stride].ref & 3)) {
+        for (i = 0; i < 3; i++)
+            block->u.dc[i] += block[-1-stride].u.dc[i];
+        n++;
+    }
+
+    if (n == 2) {
+        for (i = 0; i < 3; i++)
+            block->u.dc[i] = (block->u.dc[i]+1)>>1;
+    } else if (n == 3) {
+        for (i = 0; i < 3; i++)
+            block->u.dc[i] = divide3(block->u.dc[i]);
+    }
+}
+
+static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
+{
+    int16_t *pred[3];
+    int refmask = ref+1;
+    int mask = refmask | DIRAC_REF_MASK_GLOBAL; /*  exclude gmc blocks */
+    int n = 0;
+
+    if (x && (block[-1].ref & mask) == refmask)
+        pred[n++] = block[-1].u.mv[ref];
+
+    if (y && (block[-stride].ref & mask) == refmask)
+        pred[n++] = block[-stride].u.mv[ref];
+
+    if (x && y && (block[-stride-1].ref & mask) == refmask)
+        pred[n++] = block[-stride-1].u.mv[ref];
+
+    switch (n) {
+    case 0:
+        block->u.mv[ref][0] = 0;
+        block->u.mv[ref][1] = 0;
+        break;
+    case 1:
+        block->u.mv[ref][0] = pred[0][0];
+        block->u.mv[ref][1] = pred[0][1];
+        break;
+    case 2:
+        block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
+        block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
+        break;
+    case 3:
+        block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
+        block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
+        break;
+    }
+}
+
+static void global_mv(DiracContext *s, DiracBlock *block, int x, int y, int ref)
+{
+    int ez      = s->globalmc[ref].zrs_exp;
+    int ep      = s->globalmc[ref].perspective_exp;
+    int (*A)[2] = s->globalmc[ref].zrs;
+    int *b      = s->globalmc[ref].pan_tilt;
+    int *c      = s->globalmc[ref].perspective;
+
+    int m       = (1<<ep) - (c[0]*x + c[1]*y);
+    int mx      = m * ((A[0][0] * x + A[0][1]*y) + (1<<ez) * b[0]);
+    int my      = m * ((A[1][0] * x + A[1][1]*y) + (1<<ez) * b[1]);
+
+    block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
+    block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
+}
+
+static void decode_block_params(DiracContext *s, DiracArith arith[8], DiracBlock *block,
+                                int stride, int x, int y)
+{
+    int i;
+
+    block->ref  = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
+    block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
+
+    if (s->num_refs == 2) {
+        block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
+        block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
+    }
+
+    if (!block->ref) {
+        pred_block_dc(block, stride, x, y);
+        for (i = 0; i < 3; i++)
+            block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
+        return;
+    }
+
+    if (s->globalmc_flag) {
+        block->ref |= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
+        block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
+    }
+
+    for (i = 0; i < s->num_refs; i++)
+        if (block->ref & (i+1)) {
+            if (block->ref & DIRAC_REF_MASK_GLOBAL) {
+                global_mv(s, block, x, y, i);
+            } else {
+                pred_mv(block, stride, x, y, i);
+                block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
+                block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
+            }
+        }
+}
+
+/**
+ * Copies the current block to the other blocks covered by the current superblock split mode
+ */
+static void propagate_block_data(DiracBlock *block, int stride, int size)
+{
+    int x, y;
+    DiracBlock *dst = block;
+
+    for (x = 1; x < size; x++)
+        dst[x] = *block;
+
+    for (y = 1; y < size; y++) {
+        dst += stride;
+        for (x = 0; x < size; x++)
+            dst[x] = *block;
+    }
+}
+
+/**
+ * Dirac Specification ->
+ * 12. Block motion data syntax
+ */
+static int dirac_unpack_block_motion_data(DiracContext *s)
+{
+    GetBitContext *gb = &s->gb;
+    uint8_t *sbsplit = s->sbsplit;
+    int i, x, y, q, p;
+    DiracArith arith[8];
+
+    align_get_bits(gb);
+
+    /* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
+    s->sbwidth  = DIVRNDUP(s->source.width,  4*s->plane[0].xbsep);
+    s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
+    s->blwidth  = 4 * s->sbwidth;
+    s->blheight = 4 * s->sbheight;
+
+    /* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
+       decode superblock split modes */
+    ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));     /* svq3_get_ue_golomb(gb) is the length */
+    for (y = 0; y < s->sbheight; y++) {
+        for (x = 0; x < s->sbwidth; x++) {
+            unsigned int split  = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
+            if (split > 2)
+                return -1;
+            sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
+        }
+        sbsplit += s->sbwidth;
+    }
+
+    /* setup arith decoding */
+    ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
+    for (i = 0; i < s->num_refs; i++) {
+        ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
+        ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
+    }
+    for (i = 0; i < 3; i++)
+        ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
+
+    for (y = 0; y < s->sbheight; y++)
+        for (x = 0; x < s->sbwidth; x++) {
+            int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
+            int step   = 4 >> s->sbsplit[y * s->sbwidth + x];
+
+            for (q = 0; q < blkcnt; q++)
+                for (p = 0; p < blkcnt; p++) {
+                    int bx = 4 * x + p*step;
+                    int by = 4 * y + q*step;
+                    DiracBlock *block = &s->blmotion[by*s->blwidth + bx];
+                    decode_block_params(s, arith, block, s->blwidth, bx, by);
+                    propagate_block_data(block, s->blwidth, step);
+                }
+        }
+
+    return 0;
+}
+
+static int weight(int i, int blen, int offset)
+{
+#define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) :        \
+    (1 + (6*(i) + offset - 1) / (2*offset - 1))
+
+    if (i < 2*offset)
+        return ROLLOFF(i);
+    else if (i > blen-1 - 2*offset)
+        return ROLLOFF(blen-1 - i);
+    return 8;
+}
+
+static void init_obmc_weight_row(Plane *p, uint8_t *obmc_weight, int stride,
+                                 int left, int right, int wy)
+{
+    int x;
+    for (x = 0; left && x < p->xblen >> 1; x++)
+        obmc_weight[x] = wy*8;
+    for (; x < p->xblen >> right; x++)
+        obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
+    for (; x < p->xblen; x++)
+        obmc_weight[x] = wy*8;
+    for (; x < stride; x++)
+        obmc_weight[x] = 0;
+}
+
+static void init_obmc_weight(Plane *p, uint8_t *obmc_weight, int stride,
+                             int left, int right, int top, int bottom)
+{
+    int y;
+    for (y = 0; top && y < p->yblen >> 1; y++) {
+        init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
+        obmc_weight += stride;
+    }
+    for (; y < p->yblen >> bottom; y++) {
+        int wy = weight(y, p->yblen, p->yoffset);
+        init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
+        obmc_weight += stride;
+    }
+    for (; y < p->yblen; y++) {
+        init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
+        obmc_weight += stride;
+    }
+}
+
+static void init_obmc_weights(DiracContext *s, Plane *p, int by)
+{
+    int top = !by;
+    int bottom = by == s->blheight-1;
+
+    /* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
+    if (top || bottom || by == 1) {
+        init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
+        init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
+        init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
+    }
+}
+
+static const uint8_t epel_weights[4][4][4] = {
+    {{ 16,  0,  0,  0 },
+     { 12,  4,  0,  0 },
+     {  8,  8,  0,  0 },
+     {  4, 12,  0,  0 }},
+    {{ 12,  0,  4,  0 },
+     {  9,  3,  3,  1 },
+     {  6,  6,  2,  2 },
+     {  3,  9,  1,  3 }},
+    {{  8,  0,  8,  0 },
+     {  6,  2,  6,  2 },
+     {  4,  4,  4,  4 },
+     {  2,  6,  2,  6 }},
+    {{  4,  0, 12,  0 },
+     {  3,  1,  9,  3 },
+     {  2,  2,  6,  6 },
+     {  1,  3,  3,  9 }}
+};
+
+/**
+ * For block x,y, determine which of the hpel planes to do bilinear
+ * interpolation from and set src[] to the location in each hpel plane
+ * to MC from.
+ *
+ * @return the index of the put_dirac_pixels_tab function to use
+ *  0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
+ */
+static int mc_subpel(DiracContext *s, DiracBlock *block, const uint8_t *src[5],
+                     int x, int y, int ref, int plane)
+{
+    Plane *p = &s->plane[plane];
+    uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
+    int motion_x = block->u.mv[ref][0];
+    int motion_y = block->u.mv[ref][1];
+    int mx, my, i, epel, nplanes = 0;
+
+    if (plane) {
+        motion_x >>= s->chroma_x_shift;
+        motion_y >>= s->chroma_y_shift;
+    }
+
+    mx         = motion_x & ~(-1U << s->mv_precision);
+    my         = motion_y & ~(-1U << s->mv_precision);
+    motion_x >>= s->mv_precision;
+    motion_y >>= s->mv_precision;
+    /* normalize subpel coordinates to epel */
+    /* TODO: template this function? */
+    mx      <<= 3 - s->mv_precision;
+    my      <<= 3 - s->mv_precision;
+
+    x += motion_x;
+    y += motion_y;
+    epel = (mx|my)&1;
+
+    /* hpel position */
+    if (!((mx|my)&3)) {
+        nplanes = 1;
+        src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
+    } else {
+        /* qpel or epel */
+        nplanes = 4;
+        for (i = 0; i < 4; i++)
+            src[i] = ref_hpel[i] + y*p->stride + x;
+
+        /* if we're interpolating in the right/bottom halves, adjust the planes as needed
+           we increment x/y because the edge changes for half of the pixels */
+        if (mx > 4) {
+            src[0] += 1;
+            src[2] += 1;
+            x++;
+        }
+        if (my > 4) {
+            src[0] += p->stride;
+            src[1] += p->stride;
+            y++;
+        }
+
+        /* hpel planes are:
+           [0]: F  [1]: H
+           [2]: V  [3]: C */
+        if (!epel) {
+            /* check if we really only need 2 planes since either mx or my is
+               a hpel position. (epel weights of 0 handle this there) */
+            if (!(mx&3)) {
+                /* mx == 0: average [0] and [2]
+                   mx == 4: average [1] and [3] */
+                src[!mx] = src[2 + !!mx];
+                nplanes = 2;
+            } else if (!(my&3)) {
+                src[0] = src[(my>>1)  ];
+                src[1] = src[(my>>1)+1];
+                nplanes = 2;
+            }
+        } else {
+            /* adjust the ordering if needed so the weights work */
+            if (mx > 4) {
+                FFSWAP(const uint8_t *, src[0], src[1]);
+                FFSWAP(const uint8_t *, src[2], src[3]);
+            }
+            if (my > 4) {
+                FFSWAP(const uint8_t *, src[0], src[2]);
+                FFSWAP(const uint8_t *, src[1], src[3]);
+            }
+            src[4] = epel_weights[my&3][mx&3];
+        }
+    }
+
+    /* fixme: v/h _edge_pos */
+    if (x + p->xblen > p->width +EDGE_WIDTH/2 ||
+        y + p->yblen > p->height+EDGE_WIDTH/2 ||
+        x < 0 || y < 0) {
+        for (i = 0; i < nplanes; i++) {
+            s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
+                                     p->stride, p->stride,
+                                     p->xblen, p->yblen, x, y,
+                                     p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
+            src[i] = s->edge_emu_buffer[i];
+        }
+    }
+    return (nplanes>>1) + epel;
+}
+
+static void add_dc(uint16_t *dst, int dc, int stride,
+                   uint8_t *obmc_weight, int xblen, int yblen)
+{
+    int x, y;
+    dc += 128;
+
+    for (y = 0; y < yblen; y++) {
+        for (x = 0; x < xblen; x += 2) {
+            dst[x  ] += dc * obmc_weight[x  ];
+            dst[x+1] += dc * obmc_weight[x+1];
+        }
+        dst          += stride;
+        obmc_weight  += MAX_BLOCKSIZE;
+    }
+}
+
+static void block_mc(DiracContext *s, DiracBlock *block,
+                     uint16_t *mctmp, uint8_t *obmc_weight,
+                     int plane, int dstx, int dsty)
+{
+    Plane *p = &s->plane[plane];
+    const uint8_t *src[5];
+    int idx;
+
+    switch (block->ref&3) {
+    case 0: /* DC */
+        add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
+        return;
+    case 1:
+    case 2:
+        idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
+        s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
+        if (s->weight_func)
+            s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
+                           s->weight[0] + s->weight[1], p->yblen);
+        break;
+    case 3:
+        idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
+        s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
+        idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
+        if (s->biweight_func) {
+            /* fixme: +32 is a quick hack */
+            s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
+            s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
+                             s->weight[0], s->weight[1], p->yblen);
+        } else
+            s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
+        break;
+    }
+    s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
+}
+
+static void mc_row(DiracContext *s, DiracBlock *block, uint16_t *mctmp, int plane, int dsty)
+{
+    Plane *p = &s->plane[plane];
+    int x, dstx = p->xbsep - p->xoffset;
+
+    block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
+    mctmp += p->xbsep;
+
+    for (x = 1; x < s->blwidth-1; x++) {
+        block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
+        dstx  += p->xbsep;
+        mctmp += p->xbsep;
+    }
+    block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
+}
+
+static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
+{
+    int idx = 0;
+    if (xblen > 8)
+        idx = 1;
+    if (xblen > 16)
+        idx = 2;
+
+    memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
+    memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
+    s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
+    if (s->weight_log2denom > 1 || s->weight[0] != 1 || s->weight[1] != 1) {
+        s->weight_func   = s->diracdsp.weight_dirac_pixels_tab[idx];
+        s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
+    } else {
+        s->weight_func   = NULL;
+        s->biweight_func = NULL;
+    }
+}
+
+static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, int width, int height)
+{
+    /* chroma allocates an edge of 8 when subsampled
+       which for 4:2:2 means an h edge of 16 and v edge of 8
+       just use 8 for everything for the moment */
+    int i, edge = EDGE_WIDTH/2;
+
+    ref->hpel[plane][0] = ref->avframe->data[plane];
+    s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
+
+    /* no need for hpel if we only have fpel vectors */
+    if (!s->mv_precision)
+        return;
+
+    for (i = 1; i < 4; i++) {
+        if (!ref->hpel_base[plane][i])
+            ref->hpel_base[plane][i] = av_malloc((height+2*edge) * ref->avframe->linesize[plane] + 32);
+        /* we need to be 16-byte aligned even for chroma */
+        ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
+    }
+
+    if (!ref->interpolated[plane]) {
+        s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
+                                      ref->hpel[plane][3], ref->hpel[plane][0],
+                                      ref->avframe->linesize[plane], width, height);
+        s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
+        s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
+        s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM);
+    }
+    ref->interpolated[plane] = 1;
+}
+
+/**
+ * Dirac Specification ->
+ * 13.0 Transform data syntax. transform_data()
+ */
+static int dirac_decode_frame_internal(DiracContext *s)
+{
+    DWTContext d;
+    int y, i, comp, dsty;
+
+    if (s->low_delay) {
+        /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
+        for (comp = 0; comp < 3; comp++) {
+            Plane *p = &s->plane[comp];
+            memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
+        }
+        if (!s->zero_res)
+            decode_lowdelay(s);
+    }
+
+    for (comp = 0; comp < 3; comp++) {
+        Plane *p       = &s->plane[comp];
+        uint8_t *frame = s->current_picture->avframe->data[comp];
+
+        /* FIXME: small resolutions */
+        for (i = 0; i < 4; i++)
+            s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
+
+        if (!s->zero_res && !s->low_delay)
+        {
+            memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
+            decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
+        }
+        if (ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
+                                  s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp))
+            return -1;
+
+        if (!s->num_refs) { /* intra */
+            for (y = 0; y < p->height; y += 16) {
+                ff_spatial_idwt_slice2(&d, y+16); /* decode */
+                s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
+                                                    p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
+            }
+        } else { /* inter */
+            int rowheight = p->ybsep*p->stride;
+
+            select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
+
+            for (i = 0; i < s->num_refs; i++)
+                interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
+
+            memset(s->mctmp, 0, 4*p->yoffset*p->stride);
+
+            dsty = -p->yoffset;
+            for (y = 0; y < s->blheight; y++) {
+                int h     = 0,
+                    start = FFMAX(dsty, 0);
+                uint16_t *mctmp    = s->mctmp + y*rowheight;
+                DiracBlock *blocks = s->blmotion + y*s->blwidth;
+
+                init_obmc_weights(s, p, y);
+
+                if (y == s->blheight-1 || start+p->ybsep > p->height)
+                    h = p->height - start;
+                else
+                    h = p->ybsep - (start - dsty);
+                if (h < 0)
+                    break;
+
+                memset(mctmp+2*p->yoffset*p->stride, 0, 2*rowheight);
+                mc_row(s, blocks, mctmp, comp, dsty);
+
+                mctmp += (start - dsty)*p->stride + p->xoffset;
+                ff_spatial_idwt_slice2(&d, start + h); /* decode */
+                s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
+                                             p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
+
+                dsty += p->ybsep;
+            }
+        }
+    }
+
+
+    return 0;
+}
+
+static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
+{
+    int ret, i;
+    int chroma_x_shift, chroma_y_shift;
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
+
+    f->width  = avctx->width  + 2 * EDGE_WIDTH;
+    f->height = avctx->height + 2 * EDGE_WIDTH + 2;
+    ret = ff_get_buffer(avctx, f, flags);
+    if (ret < 0)
+        return ret;
+
+    for (i = 0; f->data[i]; i++) {
+        int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
+                     f->linesize[i] + 32;
+        f->data[i] += offset;
+    }
+    f->width  = avctx->width;
+    f->height = avctx->height;
+
+    return 0;
+}
+
+/**
+ * Dirac Specification ->
+ * 11.1.1 Picture Header. picture_header()
+ */
+static int dirac_decode_picture_header(DiracContext *s)
+{
+    int retire, picnum;
+    int i, j, refnum, refdist;
+    GetBitContext *gb = &s->gb;
+
+    /* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
+    picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
+
+
+    av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
+
+    /* if this is the first keyframe after a sequence header, start our
+       reordering from here */
+    if (s->frame_number < 0)
+        s->frame_number = picnum;
+
+    s->ref_pics[0] = s->ref_pics[1] = NULL;
+    for (i = 0; i < s->num_refs; i++) {
+        refnum = picnum + dirac_get_se_golomb(gb);
+        refdist = INT_MAX;
+
+        /* find the closest reference to the one we want */
+        /* Jordi: this is needed if the referenced picture hasn't yet arrived */
+        for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
+            if (s->ref_frames[j]
+                && FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
+                s->ref_pics[i] = s->ref_frames[j];
+                refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
+            }
+
+        if (!s->ref_pics[i] || refdist)
+            av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
+
+        /* if there were no references at all, allocate one */
+        if (!s->ref_pics[i])
+            for (j = 0; j < MAX_FRAMES; j++)
+                if (!s->all_frames[j].avframe->data[0]) {
+                    s->ref_pics[i] = &s->all_frames[j];
+                    get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
+                    break;
+                }
+    }
+
+    /* retire the reference frames that are not used anymore */
+    if (s->current_picture->avframe->reference) {
+        retire = picnum + dirac_get_se_golomb(gb);
+        if (retire != picnum) {
+            DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
+
+            if (retire_pic)
+                retire_pic->avframe->reference &= DELAYED_PIC_REF;
+            else
+                av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
+        }
+
+        /* if reference array is full, remove the oldest as per the spec */
+        while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
+            remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->avframe->reference &= DELAYED_PIC_REF;
+        }
+    }
+
+    if (s->num_refs) {
+        if (dirac_unpack_prediction_parameters(s))  /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
+            return -1;
+        if (dirac_unpack_block_motion_data(s))      /* [DIRAC_STD] 12. Block motion data syntax                       */
+            return -1;
+    }
+    if (dirac_unpack_idwt_params(s))                /* [DIRAC_STD] 11.3 Wavelet transform data                        */
+        return -1;
+
+    init_planes(s);
+    return 0;
+}
+
+static int get_delayed_pic(DiracContext *s, AVFrame *picture, int *got_frame)
+{
+    DiracFrame *out = s->delay_frames[0];
+    int i, out_idx  = 0;
+    int ret;
+
+    /* find frame with lowest picture number */
+    for (i = 1; s->delay_frames[i]; i++)
+        if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
+            out     = s->delay_frames[i];
+            out_idx = i;
+        }
+
+    for (i = out_idx; s->delay_frames[i]; i++)
+        s->delay_frames[i] = s->delay_frames[i+1];
+
+    if (out) {
+        out->avframe->reference ^= DELAYED_PIC_REF;
+        *got_frame = 1;
+        if((ret = av_frame_ref(picture, out->avframe)) < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+/**
+ * Dirac Specification ->
+ * 9.6 Parse Info Header Syntax. parse_info()
+ * 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
+ */
+#define DATA_UNIT_HEADER_SIZE 13
+
+/* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
+   inside the function parse_sequence() */
+static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int size)
+{
+    DiracContext *s   = avctx->priv_data;
+    DiracFrame *pic   = NULL;
+    int ret, i, parse_code = buf[4];
+    unsigned tmp;
+
+    if (size < DATA_UNIT_HEADER_SIZE)
+        return -1;
+
+    init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
+
+    if (parse_code == pc_seq_header) {
+        if (s->seen_sequence_header)
+            return 0;
+
+        /* [DIRAC_STD] 10. Sequence header */
+        if (avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source))
+            return -1;
+
+        avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
+
+        if (alloc_sequence_buffers(s))
+            return -1;
+
+        s->seen_sequence_header = 1;
+    } else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
+        free_sequence_buffers(s);
+        s->seen_sequence_header = 0;
+    } else if (parse_code == pc_aux_data) {
+        if (buf[13] == 1) {     /* encoder implementation/version */
+            int ver[3];
+            /* versions older than 1.0.8 don't store quant delta for
+               subbands with only one codeblock */
+            if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
+                if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
+                    s->old_delta_quant = 1;
+        }
+    } else if (parse_code & 0x8) {  /* picture data unit */
+        if (!s->seen_sequence_header) {
+            av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
+            return -1;
+        }
+
+        /* find an unused frame */
+        for (i = 0; i < MAX_FRAMES; i++)
+            if (s->all_frames[i].avframe->data[0] == NULL)
+                pic = &s->all_frames[i];
+        if (!pic) {
+            av_log(avctx, AV_LOG_ERROR, "framelist full\n");
+            return -1;
+        }
+
+        av_frame_unref(pic->avframe);
+
+        /* [DIRAC_STD] Defined in 9.6.1 ... */
+        tmp            =  parse_code & 0x03;                   /* [DIRAC_STD] num_refs()      */
+        if (tmp > 2) {
+            av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
+            return -1;
+        }
+        s->num_refs    = tmp;
+        s->is_arith    = (parse_code & 0x48) == 0x08;          /* [DIRAC_STD] using_ac()      */
+        s->low_delay   = (parse_code & 0x88) == 0x88;          /* [DIRAC_STD] is_low_delay()  */
+        pic->avframe->reference = (parse_code & 0x0C) == 0x0C;  /* [DIRAC_STD]  is_reference() */
+        pic->avframe->key_frame = s->num_refs == 0;             /* [DIRAC_STD] is_intra()      */
+        pic->avframe->pict_type = s->num_refs + 1;              /* Definition of AVPictureType in avutil.h */
+
+        if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
+            return ret;
+        s->current_picture = pic;
+        s->plane[0].stride = pic->avframe->linesize[0];
+        s->plane[1].stride = pic->avframe->linesize[1];
+        s->plane[2].stride = pic->avframe->linesize[2];
+
+        if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
+            return AVERROR(ENOMEM);
+
+        /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
+        if (dirac_decode_picture_header(s))
+            return -1;
+
+        /* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
+        if (dirac_decode_frame_internal(s))
+            return -1;
+    }
+    return 0;
+}
+
+static int dirac_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *pkt)
+{
+    DiracContext *s     = avctx->priv_data;
+    AVFrame *picture    = data;
+    uint8_t *buf        = pkt->data;
+    int buf_size        = pkt->size;
+    int i, data_unit_size, buf_idx = 0;
+    int ret;
+
+    /* release unused frames */
+    for (i = 0; i < MAX_FRAMES; i++)
+        if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].avframe->reference) {
+            av_frame_unref(s->all_frames[i].avframe);
+            memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
+        }
+
+    s->current_picture = NULL;
+    *got_frame = 0;
+
+    /* end of stream, so flush delayed pics */
+    if (buf_size == 0)
+        return get_delayed_pic(s, (AVFrame *)data, got_frame);
+
+    for (;;) {
+        /*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
+          [DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
+          BBCD start code search */
+        for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
+            if (buf[buf_idx  ] == 'B' && buf[buf_idx+1] == 'B' &&
+                buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
+                break;
+        }
+        /* BBCD found or end of data */
+        if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
+            break;
+
+        data_unit_size = AV_RB32(buf+buf_idx+5);
+        if (buf_idx + data_unit_size > buf_size || !data_unit_size) {
+            if(buf_idx + data_unit_size > buf_size)
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Data unit with size %d is larger than input buffer, discarding\n",
+                   data_unit_size);
+            buf_idx += 4;
+            continue;
+        }
+        /* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
+        if (dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size))
+        {
+            av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
+            return -1;
+        }
+        buf_idx += data_unit_size;
+    }
+
+    if (!s->current_picture)
+        return buf_size;
+
+    if (s->current_picture->avframe->display_picture_number > s->frame_number) {
+        DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
+
+        s->current_picture->avframe->reference |= DELAYED_PIC_REF;
+
+        if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
+            int min_num = s->delay_frames[0]->avframe->display_picture_number;
+            /* Too many delayed frames, so we display the frame with the lowest pts */
+            av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
+
+            for (i = 1; s->delay_frames[i]; i++)
+                if (s->delay_frames[i]->avframe->display_picture_number < min_num)
+                    min_num = s->delay_frames[i]->avframe->display_picture_number;
+
+            delayed_frame = remove_frame(s->delay_frames, min_num);
+            add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
+        }
+
+        if (delayed_frame) {
+            delayed_frame->avframe->reference ^= DELAYED_PIC_REF;
+            if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
+                return ret;
+            *got_frame = 1;
+        }
+    } else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
+        /* The right frame at the right time :-) */
+        if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
+            return ret;
+        *got_frame = 1;
+    }
+
+    if (*got_frame)
+        s->frame_number = picture->display_picture_number + 1;
+
+    return buf_idx;
+}
+
+AVCodec ff_dirac_decoder = {
+    .name           = "dirac",
+    .long_name      = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_DIRAC,
+    .priv_data_size = sizeof(DiracContext),
+    .init           = dirac_decode_init,
+    .close          = dirac_decode_end,
+    .decode         = dirac_decode_frame,
+    .capabilities   = CODEC_CAP_DELAY,
+    .flush          = dirac_decode_flush,
+};
diff --git a/libavcodec/diracdsp.c b/libavcodec/diracdsp.c
new file mode 100644
index 0000000..3b8c39d
--- /dev/null
+++ b/libavcodec/diracdsp.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2009 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "diracdsp.h"
+#include "libavcodec/x86/diracdsp_mmx.h"
+
+#define FILTER(src, stride)                                     \
+    ((21*((src)[ 0*stride] + (src)[1*stride])                   \
+      -7*((src)[-1*stride] + (src)[2*stride])                   \
+      +3*((src)[-2*stride] + (src)[3*stride])                   \
+      -1*((src)[-3*stride] + (src)[4*stride]) + 16) >> 5)
+
+static void dirac_hpel_filter(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, const uint8_t *src,
+                              int stride, int width, int height)
+{
+    int x, y;
+
+    for (y = 0; y < height; y++) {
+        for (x = -3; x < width+5; x++)
+            dstv[x] = av_clip_uint8(FILTER(src+x, stride));
+
+        for (x = 0; x < width; x++)
+            dstc[x] = av_clip_uint8(FILTER(dstv+x, 1));
+
+        for (x = 0; x < width; x++)
+            dsth[x] = av_clip_uint8(FILTER(src+x, 1));
+
+        src  += stride;
+        dsth += stride;
+        dstv += stride;
+        dstc += stride;
+    }
+}
+
+#define PIXOP_BILINEAR(PFX, OP, WIDTH)                                  \
+    static void ff_ ## PFX ## _dirac_pixels ## WIDTH ## _bilinear_c(uint8_t *dst, const uint8_t *src[5], int stride, int h) \
+    {                                                                   \
+        int x;                                                          \
+        const uint8_t *s0 = src[0];                                     \
+        const uint8_t *s1 = src[1];                                     \
+        const uint8_t *s2 = src[2];                                     \
+        const uint8_t *s3 = src[3];                                     \
+        const uint8_t *w  = src[4];                                     \
+                                                                        \
+        while (h--) {                                                   \
+            for (x = 0; x < WIDTH; x++) {                               \
+                OP(dst[x], (s0[x]*w[0] + s1[x]*w[1] + s2[x]*w[2] + s3[x]*w[3] + 8) >> 4); \
+            }                                                           \
+                                                                        \
+            dst += stride;                                              \
+            s0 += stride;                                               \
+            s1 += stride;                                               \
+            s2 += stride;                                               \
+            s3 += stride;                                               \
+        }                                                               \
+    }
+
+#define OP_PUT(dst, val) (dst) = (val)
+#define OP_AVG(dst, val) (dst) = (((dst) + (val) + 1)>>1)
+
+PIXOP_BILINEAR(put, OP_PUT, 8)
+PIXOP_BILINEAR(put, OP_PUT, 16)
+PIXOP_BILINEAR(put, OP_PUT, 32)
+PIXOP_BILINEAR(avg, OP_AVG, 8)
+PIXOP_BILINEAR(avg, OP_AVG, 16)
+PIXOP_BILINEAR(avg, OP_AVG, 32)
+
+#define op_scale1(x)  block[x] = av_clip_uint8( (block[x]*weight + (1<<(log2_denom-1))) >> log2_denom)
+#define op_scale2(x)  dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + (1<<(log2_denom-1))) >> log2_denom)
+
+#define DIRAC_WEIGHT(W)                                                 \
+    static void weight_dirac_pixels ## W ## _c(uint8_t *block, int stride, int log2_denom, \
+                                               int weight, int h) {     \
+        int x;                                                          \
+        while (h--) {                                                   \
+            for (x = 0; x < W; x++) {                                   \
+                op_scale1(x);                                           \
+                op_scale1(x+1);                                         \
+            }                                                           \
+            block += stride;                                            \
+        }                                                               \
+    }                                                                   \
+    static void biweight_dirac_pixels ## W ## _c(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, \
+                                                 int weightd, int weights, int h) { \
+        int x;                                                          \
+        while (h--) {                                                   \
+            for (x = 0; x < W; x++) {                                   \
+                op_scale2(x);                                           \
+                op_scale2(x+1);                                         \
+            }                                                           \
+            dst += stride;                                              \
+            src += stride;                                              \
+        }                                                               \
+    }
+
+DIRAC_WEIGHT(8)
+DIRAC_WEIGHT(16)
+DIRAC_WEIGHT(32)
+
+#define ADD_OBMC(xblen)                                                 \
+    static void add_obmc ## xblen ## _c(uint16_t *dst, const uint8_t *src, int stride, \
+                                        const uint8_t *obmc_weight, int yblen) \
+    {                                                                   \
+        int x;                                                          \
+        while (yblen--) {                                               \
+            for (x = 0; x < xblen; x += 2) {                            \
+                dst[x  ] += src[x  ] * obmc_weight[x  ];                \
+                dst[x+1] += src[x+1] * obmc_weight[x+1];                \
+            }                                                           \
+            dst += stride;                                              \
+            src += stride;                                              \
+            obmc_weight += 32;                                          \
+        }                                                               \
+    }
+
+ADD_OBMC(8)
+ADD_OBMC(16)
+ADD_OBMC(32)
+
+static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
+{
+    int x, y;
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x+=4) {
+            dst[x  ] = av_clip_uint8(src[x  ] + 128);
+            dst[x+1] = av_clip_uint8(src[x+1] + 128);
+            dst[x+2] = av_clip_uint8(src[x+2] + 128);
+            dst[x+3] = av_clip_uint8(src[x+3] + 128);
+        }
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+
+static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride,
+                               const int16_t *idwt, int idwt_stride,
+                               int width, int height)
+{
+    int x, y;
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x+=2) {
+            dst[x  ] = av_clip_uint8(((src[x  ]+32)>>6) + idwt[x  ]);
+            dst[x+1] = av_clip_uint8(((src[x+1]+32)>>6) + idwt[x+1]);
+        }
+        dst += stride;
+        src += stride;
+        idwt += idwt_stride;
+    }
+}
+
+#define PIXFUNC(PFX, WIDTH)                                             \
+    c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \
+    c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \
+    c->PFX ## _dirac_pixels_tab[WIDTH>>4][2] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l4_c; \
+    c->PFX ## _dirac_pixels_tab[WIDTH>>4][3] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _bilinear_c
+
+void ff_diracdsp_init(DiracDSPContext *c)
+{
+    c->dirac_hpel_filter = dirac_hpel_filter;
+    c->add_rect_clamped = add_rect_clamped_c;
+    c->put_signed_rect_clamped = put_signed_rect_clamped_c;
+
+    c->add_dirac_obmc[0] = add_obmc8_c;
+    c->add_dirac_obmc[1] = add_obmc16_c;
+    c->add_dirac_obmc[2] = add_obmc32_c;
+
+    c->weight_dirac_pixels_tab[0] = weight_dirac_pixels8_c;
+    c->weight_dirac_pixels_tab[1] = weight_dirac_pixels16_c;
+    c->weight_dirac_pixels_tab[2] = weight_dirac_pixels32_c;
+    c->biweight_dirac_pixels_tab[0] = biweight_dirac_pixels8_c;
+    c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c;
+    c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c;
+
+    PIXFUNC(put, 8);
+    PIXFUNC(put, 16);
+    PIXFUNC(put, 32);
+    PIXFUNC(avg, 8);
+    PIXFUNC(avg, 16);
+    PIXFUNC(avg, 32);
+
+    if (HAVE_MMX && HAVE_YASM) ff_diracdsp_init_mmx(c);
+}
diff --git a/libavcodec/diracdsp.h b/libavcodec/diracdsp.h
new file mode 100644
index 0000000..613ca5b
--- /dev/null
+++ b/libavcodec/diracdsp.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DIRACDSP_H
+#define AVCODEC_DIRACDSP_H
+
+#include <stdint.h>
+
+typedef void (*dirac_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int h);
+typedef void (*dirac_biweight_func)(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, int weightd, int weights, int h);
+
+typedef struct {
+    void (*dirac_hpel_filter)(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, const uint8_t *src, int stride, int width, int height);
+    /**
+     * dirac_pixels_tab[width][subpel]
+     * width is 2 for 32, 1 for 16, 0 for 8
+     * subpel is 0 for fpel and hpel (only need to copy from the first plane in src)
+     *           1 if an average of the first 2 planes is needed (TODO: worth it?)
+     *           2 for general qpel (avg of 4)
+     *           3 for general epel (biweight of 4 using the weights in src[4])
+     * src[0-3] is each of the hpel planes
+     * src[4] is the 1/8 pel weights if needed
+     */
+    void (*put_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+    void (*avg_dirac_pixels_tab[3][4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+
+    void (*put_signed_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const int16_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
+    void (*put_rect_clamped)(uint8_t *dst/*align 16*/, int dst_stride, const int16_t *src/*align 16*/, int src_stride, int width, int height/*mod 2*/);
+    void (*add_rect_clamped)(uint8_t *dst/*align 16*/, const uint16_t *src/*align 16*/, int stride, const int16_t *idwt/*align 16*/, int idwt_stride, int width, int height/*mod 2*/);
+    void (*add_dirac_obmc[3])(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
+    dirac_weight_func weight_dirac_pixels_tab[3];
+    dirac_biweight_func biweight_dirac_pixels_tab[3];
+} DiracDSPContext;
+
+#define DECL_DIRAC_PIXOP(PFX, EXT)                                      \
+    void ff_ ## PFX ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h); \
+    void ff_ ## PFX ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h); \
+    void ff_ ## PFX ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+
+DECL_DIRAC_PIXOP(put, c);
+DECL_DIRAC_PIXOP(avg, c);
+DECL_DIRAC_PIXOP(put, l2_c);
+DECL_DIRAC_PIXOP(avg, l2_c);
+DECL_DIRAC_PIXOP(put, l4_c);
+DECL_DIRAC_PIXOP(avg, l4_c);
+
+void ff_diracdsp_init(DiracDSPContext *c);
+
+#endif /* AVCODEC_DIRACDSP_H */
diff --git a/libavcodec/dnxhd_parser.c b/libavcodec/dnxhd_parser.c
index 0de3561..fffb98f 100644
--- a/libavcodec/dnxhd_parser.c
+++ b/libavcodec/dnxhd_parser.c
@@ -2,20 +2,20 @@
  * DNxHD/VC-3 parser
  * Copyright (c) 2008 Baptiste Coudurier <baptiste.coudurier@free.fr>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,21 +26,32 @@
 
 #include "parser.h"
 
-#define DNXHD_HEADER_PREFIX 0x0000028001
+#define DNXHD_HEADER_PREFIX 0x000002800100
 
-static int dnxhd_find_frame_end(ParseContext *pc,
+typedef struct {
+    ParseContext pc;
+    int interlaced;
+    int cur_field; /* first field is 0, second is 1 */
+} DNXHDParserContext;
+
+static int dnxhd_find_frame_end(DNXHDParserContext *dctx,
                                 const uint8_t *buf, int buf_size)
 {
+    ParseContext *pc = &dctx->pc;
     uint64_t state = pc->state64;
     int pic_found = pc->frame_start_found;
     int i = 0;
+    int interlaced = dctx->interlaced;
+    int cur_field = dctx->cur_field;
 
     if (!pic_found) {
         for (i = 0; i < buf_size; i++) {
             state = (state << 8) | buf[i];
-            if ((state & 0xffffffffffLL) == DNXHD_HEADER_PREFIX) {
+            if ((state & 0xffffffffff00LL) == DNXHD_HEADER_PREFIX) {
                 i++;
                 pic_found = 1;
+                interlaced = (state&2)>>1; /* byte following the 5-byte header prefix */
+                cur_field = state&1;
                 break;
             }
         }
@@ -51,15 +62,25 @@ static int dnxhd_find_frame_end(ParseContext *pc,
             return 0;
         for (; i < buf_size; i++) {
             state = (state << 8) | buf[i];
-            if ((state & 0xffffffffffLL) == DNXHD_HEADER_PREFIX) {
-                pc->frame_start_found = 0;
-                pc->state64 = -1;
-                return i - 4;
+            if ((state & 0xffffffffff00LL) == DNXHD_HEADER_PREFIX) {
+                if (!interlaced || dctx->cur_field) {
+                    pc->frame_start_found = 0;
+                    pc->state64 = -1;
+                    dctx->interlaced = interlaced;
+                    dctx->cur_field = 0;
+                    return i - 5;
+                } else {
+                    /* continue, to get the second field */
+                    dctx->interlaced = interlaced = (state&2)>>1;
+                    dctx->cur_field = cur_field = state&1;
+                }
             }
         }
     }
     pc->frame_start_found = pic_found;
     pc->state64 = state;
+    dctx->interlaced = interlaced;
+    dctx->cur_field = cur_field;
     return END_NOT_FOUND;
 }
 
@@ -68,13 +89,14 @@ static int dnxhd_parse(AVCodecParserContext *s,
                        const uint8_t **poutbuf, int *poutbuf_size,
                        const uint8_t *buf, int buf_size)
 {
-    ParseContext *pc = s->priv_data;
+    DNXHDParserContext *dctx = s->priv_data;
+    ParseContext *pc = &dctx->pc;
     int next;
 
     if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
         next = buf_size;
     } else {
-        next = dnxhd_find_frame_end(pc, buf, buf_size);
+        next = dnxhd_find_frame_end(dctx, buf, buf_size);
         if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
             *poutbuf      = NULL;
             *poutbuf_size = 0;
@@ -88,7 +110,7 @@ static int dnxhd_parse(AVCodecParserContext *s,
 
 AVCodecParser ff_dnxhd_parser = {
     .codec_ids      = { AV_CODEC_ID_DNXHD },
-    .priv_data_size = sizeof(ParseContext),
+    .priv_data_size = sizeof(DNXHDParserContext),
     .parser_parse   = dnxhd_parse,
     .parser_close   = ff_parse_close,
 };
diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c
index 0edaeec..7ea9191 100644
--- a/libavcodec/dnxhddata.c
+++ b/libavcodec/dnxhddata.c
@@ -2,20 +2,20 @@
  * VC3/DNxHD data.
  * Copyright (c) 2007 SmartJog S.A., Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,26 +23,28 @@
 #include "dnxhddata.h"
 #include "libavutil/common.h"
 
+/* The quantization tables below are in zigzag order! */
+
 static const uint8_t dnxhd_1235_luma_weight[] = {
-     0, 32, 32, 32, 33, 35, 38, 39,
-    32, 33, 32, 33, 36, 36, 39, 42,
-    32, 32, 33, 36, 35, 37, 41, 43,
-    31, 33, 34, 36, 36, 40, 42, 48,
-    32, 34, 36, 37, 39, 42, 46, 51,
-    36, 37, 37, 39, 41, 46, 51, 55,
-    37, 39, 41, 41, 47, 50, 55, 56,
-    41, 42, 41, 44, 50, 53, 60, 60
+     0, 32, 32, 32, 33, 32, 32, 32,
+    32, 31, 32, 33, 33, 33, 33, 35,
+    36, 36, 34, 34, 36, 37, 37, 36,
+    36, 35, 36, 38, 39, 39, 37, 36,
+    37, 37, 39, 41, 42, 41, 39, 39,
+    40, 41, 42, 43, 42, 42, 41, 41,
+    41, 44, 47, 46, 46, 48, 51, 51,
+    50, 50, 53, 55, 55, 56, 60, 60,
 };
 
 static const uint8_t dnxhd_1235_chroma_weight[] = {
-     0, 32, 33, 34, 39, 41, 54, 59,
-    33, 34, 35, 38, 43, 49, 58, 84,
-    34, 37, 39, 44, 46, 55, 74, 87,
-    40, 42, 47, 48, 58, 70, 87, 86,
-    43, 50, 56, 63, 72, 94, 91, 82,
-    55, 63, 65, 75, 93, 89, 85, 73,
-    61, 67, 82, 81, 83, 90, 79, 73,
-    74, 84, 75, 78, 90, 85, 73, 73
+     0, 32, 33, 34, 34, 33, 34, 35,
+    37, 40, 43, 42, 39, 38, 39, 41,
+    43, 44, 47, 50, 55, 61, 63, 56,
+    48, 46, 49, 54, 59, 58, 55, 58,
+    63, 65, 67, 74, 84, 82, 75, 72,
+    70, 74, 84, 87, 87, 94, 93, 81,
+    75, 78, 83, 89, 91, 86, 82, 85,
+    90, 90, 85, 79, 73, 73, 73, 73,
 };
 
 static const uint8_t dnxhd_1237_luma_weight[] = {
@@ -156,25 +158,25 @@ static const uint8_t dnxhd_1243_chroma_weight[] = {
 };
 
 static const uint8_t dnxhd_1250_luma_weight[] = {
-     0, 32, 35, 35, 36, 36, 41, 43,
-    32, 34, 35, 36, 37, 39, 43, 47,
-    33, 34, 36, 38, 38, 42, 42, 50,
-    34, 36, 38, 38, 41, 40, 47, 54,
-    35, 38, 39, 40, 39, 45, 49, 58,
-    38, 39, 40, 39, 46, 47, 54, 60,
-    38, 39, 41, 46, 46, 48, 57, 62,
-    40, 41, 44, 45, 49, 54, 63, 63
+     0, 32, 32, 33, 34, 35, 35, 35,
+    34, 34, 35, 36, 36, 36, 36, 36,
+    37, 38, 38, 38, 38, 38, 39, 39,
+    38, 38, 39, 41, 43, 43, 42, 41,
+    40, 40, 39, 40, 41, 41, 39, 39,
+    40, 42, 47, 50, 47, 45, 46, 46,
+    44, 45, 46, 47, 49, 54, 58, 54,
+    48, 49, 54, 57, 60, 62, 63, 63,
 };
 
 static const uint8_t dnxhd_1250_chroma_weight[] = {
-     0, 32, 35, 36, 40, 42, 51, 51,
-    35, 36, 39, 39, 43, 51, 52, 55,
-    36, 41, 41, 43, 51, 53, 54, 56,
-    43, 44, 45, 50, 54, 54, 55, 57,
-    45, 48, 50, 51, 55, 58, 59, 58,
-    49, 52, 49, 57, 58, 62, 58, 60,
-    51, 51, 56, 58, 62, 61, 59, 62,
-    52, 52, 60, 61, 59, 59, 63, 63
+     0, 32, 35, 36, 36, 35, 36, 39,
+    41, 43, 45, 44, 41, 39, 40, 42,
+    43, 43, 45, 48, 49, 51, 52, 50,
+    50, 51, 51, 51, 51, 52, 53, 54,
+    51, 49, 51, 52, 52, 56, 57, 55,
+    54, 54, 55, 56, 55, 58, 58, 58,
+    60, 61, 62, 62, 59, 57, 58, 58,
+    61, 59, 59, 59, 60, 62, 63, 63,
 };
 
 static const uint8_t dnxhd_1251_luma_weight[] = {
@@ -296,63 +298,43 @@ static const uint8_t dnxhd_1237_ac_bits[257] = {
 };
 
 static const uint8_t dnxhd_1237_ac_level[257] = {
-     1,  1,  2,  0,  3,  4,  2,  5,  6,  7,  3,  8,  9, 10, 11, 12,
-     4,  5, 13, 14, 15, 16,  6, 17, 18, 19, 20, 21,  7, 22, 23, 24,
-    25, 26, 27,  8,  9, 28, 29, 30, 31, 32, 33, 34, 10, 11, 12, 35,
-    36, 37, 38, 39, 40, 41, 13, 14, 15, 16, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 17, 18, 19, 20, 21, 53, 54, 55, 56, 57, 58,
-    59, 60, 61, 64,  1, 22, 23, 24, 25, 26, 27, 62, 63,  2,  3,  4,
-     5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
-    21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
-    37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
-    53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,
-};
-
-static const uint8_t dnxhd_1237_ac_run_flag[257] = {
-    0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
-    1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
-    0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
-    0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      3,  3,  5,  0,  7,  9,  5, 11, 13, 15,  7, 17, 19, 21, 23, 25,
+      9, 11, 27, 29, 31, 33, 13, 35, 37, 39, 41, 43, 15, 45, 47, 49,
+     51, 53, 55, 17, 19, 57, 59, 61, 63, 65, 67, 69, 21, 23, 25, 71,
+     73, 75, 77, 79, 81, 83, 27, 29, 31, 33, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105, 35, 37, 39, 41, 43,107,109,111,113,115,117,
+    119,121,123,129,  3, 45, 47, 49, 51, 53, 55,125,127,  5,  7,  9,
+     11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
+     43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73,
+     75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,101,103,105,
+    107,109,111,113,115,117,119,121,123,125,127,129, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
+     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,
+};
+
+static const uint8_t dnxhd_1237_ac_flags[257] = {
+    0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0,
+    2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0,
+    0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0,
+    0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
-};
-
-static const uint8_t dnxhd_1237_ac_index_flag[257] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3,
 };
 
 static const uint16_t dnxhd_1237_run_codes[62] = {
@@ -445,63 +427,43 @@ static const uint8_t dnxhd_1238_ac_bits[257] = {
 };
 
 static const uint8_t dnxhd_1238_ac_level[257] = {
-     1,  1,  2,  3,  0,  4,  5,  2,  6,  7,  8,  3,  9, 10, 11,  4,
-    12, 13, 14, 15, 16,  5, 17, 18, 19, 20, 21, 22,  6,  7, 23, 24,
-    25, 26, 27, 28, 29,  8,  9, 30, 31, 32, 33, 34, 35, 36, 37, 10,
-    11, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 12, 13, 14, 49,
-    50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 15, 16, 17, 18,
-    62, 63, 64,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
-    14, 15, 16, 19, 20, 21, 22, 23, 24, 17, 18, 19, 20, 21, 22, 23,
-    24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 40, 25,
-    26, 27, 28, 29, 30, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49,
-    50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,
+      3,  3,  5,  7,  0,  9, 11,  5, 13, 15, 17,  7, 19, 21, 23,  9,
+     25, 27, 29, 31, 33, 11, 35, 37, 39, 41, 43, 45, 13, 15, 47, 49,
+     51, 53, 55, 57, 59, 17, 19, 61, 63, 65, 67, 69, 71, 73, 75, 21,
+     23, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 25, 27, 29, 99,
+    101,103,105,107,109,111,113,115,117,119,121,123, 31, 33, 35, 37,
+    125,127,129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27,
+     29, 31, 33, 39, 41, 43, 45, 47, 49, 35, 37, 39, 41, 43, 45, 47,
+     49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 81, 51,
+     53, 55, 57, 59, 61, 77, 79, 83, 85, 87, 89, 91, 93, 95, 97, 99,
+    101,103,105,107,109,111,113,115,117,119,121,123,125,127,129, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
+     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,
 }; /* 0 is EOB */
 
-static const uint8_t dnxhd_1238_ac_run_flag[257] = {
-    0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
-    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
-    0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
-};
-
-static const uint8_t dnxhd_1238_ac_index_flag[257] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+static const uint8_t dnxhd_1238_ac_flags[257] = {
+    0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2,
+    0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0,
+    0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2,
+    2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2,
     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
-    0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
+    1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
+    2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3,
 };
 
 static const uint16_t dnxhd_1235_1238_1241_run_codes[62] = {
@@ -594,63 +556,43 @@ static const uint8_t dnxhd_1235_1241_ac_bits[257] = {
 };
 
 static const uint8_t dnxhd_1235_1241_ac_level[257] = {
-     1,  1,  2,  3,  0,  4,  5,  2,  6,  7,  8,  3,  9, 10, 11,  4,
-    12, 13, 14, 15, 16,  5, 17, 18, 19, 20, 21,  6,  7, 22, 23, 24,
-    25, 26, 27, 28, 29,  8,  9, 30, 31, 32, 33, 34, 35, 36, 37, 38,
-    10, 11, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 12, 13,
-    14, 15, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,  1,
-    16, 17, 18, 19, 64,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
-    13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 18, 19, 20, 21, 22, 23,
-    24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
-    40, 41, 42, 25, 26, 27, 28, 29, 30, 31, 32, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,
-};
-
-static const uint8_t dnxhd_1235_1241_ac_run_flag[257] = {
-    0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
-    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
-    0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
-    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      3,  3,  5,  7,  0,  9, 11,  5, 13, 15, 17,  7, 19, 21, 23,  9,
+     25, 27, 29, 31, 33, 11, 35, 37, 39, 41, 43, 13, 15, 45, 47, 49,
+     51, 53, 55, 57, 59, 17, 19, 61, 63, 65, 67, 69, 71, 73, 75, 77,
+     21, 23, 79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,101, 25, 27,
+     29, 31,103,105,107,109,111,113,115,117,119,121,123,125,127,  3,
+     33, 35, 37, 39,129,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25,
+     27, 29, 31, 33, 35, 41, 43, 45, 47, 49, 37, 39, 41, 43, 45, 47,
+     49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+     81, 83, 85, 51, 53, 55, 57, 59, 61, 63, 65, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
+     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,
+};
+
+static const uint8_t dnxhd_1235_1241_ac_flags[257] = {
+    0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2,
+    0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0,
+    0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
+    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+    2, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
-};
-
-static const uint8_t dnxhd_1235_1241_ac_index_flag[257] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3,
 };
 
 static const uint8_t dnxhd_1235_1241_run[62] = {
@@ -721,61 +663,42 @@ static const uint8_t dnxhd_1250_ac_bits[257] = {
     16
 };
 static const uint8_t dnxhd_1250_ac_level[257] = {
-     1,  1,  2,  3,  0,  4,  5,  2,  6,  7,  8,  3,  9, 10, 11,  4,
-    12, 13, 14, 15, 16,  5, 17, 18, 19, 20, 21, 22,  6, 23, 24, 25,
-    26, 27, 28, 29,  7,  8, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
-     9, 10, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 11,
-    12, 13, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,  1,  2,
-     3,  4,  5, 14, 15, 16, 17,  6,  7,  8,  9, 10, 11, 12, 13, 14,
-    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 18, 19, 20, 21,
-    27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
-    43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 22, 23, 24,
-    25, 26, 27, 54, 57, 58, 59, 60, 61, 62, 63, 64, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64
-};
-static const uint8_t dnxhd_1250_ac_run_flag[257] = {
-    0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
-    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
-    0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
-    1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      3,  3,  5,  7,  0,  9, 11,  5, 13, 15, 17,  7, 19, 21, 23,  9,
+     25, 27, 29, 31, 33, 11, 35, 37, 39, 41, 43, 45, 13, 47, 49, 51,
+     53, 55, 57, 59, 15, 17, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+     19, 21, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,101,103,105, 23,
+     25, 27,107,109,111,113,115,117,119,121,123,125,127,129,  3,  5,
+      7,  9, 11, 29, 31, 33, 35, 13, 15, 17, 19, 21, 23, 25, 27, 29,
+     31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 37, 39, 41, 43,
+     55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85,
+     87, 89, 91, 93, 95, 97, 99,101,103,105,107,111,113, 45, 47, 49,
+     51, 53, 55,109,115,117,119,121,123,125,127,129, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
+     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129
+};
+static const uint8_t dnxhd_1250_ac_flags[257] = {
+    0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2,
+    0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
+    0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
+    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+    1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1
-};
-static const uint8_t dnxhd_1250_ac_index_flag[257] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
-    1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
-    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
+    2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3,
 };
 static const uint16_t dnxhd_1250_run_codes[62] = {
        0,    4,    5,   12,   26,   27,   28,   58,
@@ -865,63 +788,43 @@ static const uint8_t dnxhd_1251_ac_bits[257] = {
 };
 
 static const uint8_t dnxhd_1251_ac_level[257] = {
-     1,  1,  2,  3,  0,  4,  5,  2,  6,  7,  8,  3,  9, 10, 11,  4,
-    12, 13, 14, 15, 16,  5, 17, 18, 19, 20, 21,  6, 22, 23, 24, 25,
-    26, 27, 28, 29,  7,  8, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
-    40,  9, 10, 11, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
-    12, 13, 14, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,  1,
-     2,  3,  4,  5,  6,  7,  8, 15, 16, 17,  9, 10, 11, 12, 13, 14,
-    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 18,
-    19, 20, 21, 22, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
-    42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
-    58, 23, 24, 25, 26, 27, 28, 59, 60, 61, 62, 63, 64, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,
-};
-
-static const uint8_t dnxhd_1251_ac_run_flag[257] = {
-    0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
-    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
-    0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      3,  3,  5,  7,  0,  9, 11,  5, 13, 15, 17,  7, 19, 21, 23,  9,
+     25, 27, 29, 31, 33, 11, 35, 37, 39, 41, 43, 13, 45, 47, 49, 51,
+     53, 55, 57, 59, 15, 17, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+     81, 19, 21, 23, 83, 85, 87, 89, 91, 93, 95, 97, 99,101,103,105,
+     25, 27, 29,107,109,111,113,115,117,119,121,123,125,127,129,  3,
+      5,  7,  9, 11, 13, 15, 17, 31, 33, 35, 19, 21, 23, 25, 27, 29,
+     31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 37,
+     39, 41, 43, 45, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 81, 83,
+     85, 87, 89, 91, 93, 95, 97, 99,101,103,105,107,109,111,113,115,
+    117, 47, 49, 51, 53, 55, 57,119,121,123,125,127,129, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
+     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,
+};
+
+static const uint8_t dnxhd_1251_ac_flags[257] = {
+    0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2,
+    0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0,
+    0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+    1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
+    2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
-};
-
-static const uint8_t dnxhd_1251_ac_index_flag[257] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-    1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
-    0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
+    1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3,
 };
 
 static const uint16_t dnxhd_1251_run_codes[62] = {
@@ -1014,143 +917,134 @@ static const uint8_t dnxhd_1252_ac_bits[257] = {
 };
 
 static const uint8_t dnxhd_1252_ac_level[257] = {
-     1,  1,  2,  3,  2,  0,  4,  5,  6,  7,  3,  8,  9, 10, 11, 12,
-    13, 14,  4,  5, 15, 16, 17, 18,  6, 19, 20, 21, 22, 23, 24,  7,
-     8, 25, 26, 27, 28, 29, 30, 31, 32,  9, 10, 33, 34, 35, 36, 37,
-    38, 39, 40, 41, 11, 12, 13, 42, 43, 44, 45, 46, 47, 48, 49, 50,
-    51, 52, 53, 14, 15, 16, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,  1,  2,  3, 17, 18, 19, 20,  4,  5,  6,  7,  8,  9, 10, 11,
-    12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 21, 22, 23, 24, 25, 22,
-    23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
-    39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
-    55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-    64,
-};
-
-static const uint8_t dnxhd_1252_ac_run_flag[257] = {
-    0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
-    0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
-    1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      3,  3,  5,  7,  5,  0,  9, 11, 13, 15,  7, 17, 19, 21, 23, 25,
+     27, 29,  9, 11, 31, 33, 35, 37, 13, 39, 41, 43, 45, 47, 49, 15,
+     17, 51, 53, 55, 57, 59, 61, 63, 65, 19, 21, 67, 69, 71, 73, 75,
+     77, 79, 81, 83, 23, 25, 27, 85, 87, 89, 91, 93, 95, 97, 99,101,
+    103,105,107, 29, 31, 33,109,111,113,115,117,119,121,123,125,127,
+    129,  3,  5,  7, 35, 37, 39, 41,  9, 11, 13, 15, 17, 19, 21, 23,
+     25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 43, 45, 47, 49, 51, 45,
+     47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77,
+     79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99,101,103,105,107,109,
+    111,113,115,117,119,121,123,125,127,129, 53, 55, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,  3,  5,  7,  9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
+     33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
+     65, 67, 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 93, 95,
+     97, 99,101,103,105,107,109,111,113,115,117,119,121,123,125,127,
+    129,
+};
+
+static const uint8_t dnxhd_1252_ac_flags[257] = {
+    0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0,
+    0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2,
+    2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
-};
-
-static const uint8_t dnxhd_1252_ac_index_flag[257] = {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3,
 };
 
 const CIDEntry ff_dnxhd_cid_table[] = {
-    { 1235, 1920, 1080, 0, 917504, 917504, 6, 10,
+    { 1235, 1920, 1080, 0, 917504, 917504, 6, 10, 4,
       dnxhd_1235_luma_weight, dnxhd_1235_chroma_weight,
       dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
       dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
-      dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag,
+      dnxhd_1235_1241_ac_flags,
       dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
-      { 175, 185, 365, 440 } },
-    { 1237, 1920, 1080, 0, 606208, 606208, 4, 8,
+      { 175, 185, 365, 440 },
+      { { 24000, 1001 }, { 25, 1 }, { 50, 1 }, { 60000, 1001 } } },
+    { 1237, 1920, 1080, 0, 606208, 606208, 4, 8, 3,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_level,
-      dnxhd_1237_ac_run_flag, dnxhd_1237_ac_index_flag,
+      dnxhd_1237_ac_flags,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 115, 120, 145, 240, 290 } },
-    { 1238, 1920, 1080, 0, 917504, 917504, 4, 8,
+      { 115, 120, 145, 240, 290 },
+      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+    { 1238, 1920, 1080, 0, 917504, 917504, 4, 8, 4,
       dnxhd_1238_luma_weight, dnxhd_1238_chroma_weight,
       dnxhd_1238_dc_codes, dnxhd_1238_dc_bits,
       dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level,
-      dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag,
+      dnxhd_1238_ac_flags,
       dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
-      { 175, 185, 220, 365, 440 } },
-    { 1241, 1920, 1080, 1, 917504, 458752, 6, 10,
+      { 175, 185, 220, 365, 440 },
+      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+    { 1241, 1920, 1080, 1, 917504, 458752, 6, 10, 4,
       dnxhd_1241_luma_weight, dnxhd_1241_chroma_weight,
       dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
       dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
-      dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag,
+      dnxhd_1235_1241_ac_flags,
       dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
-      { 185, 220 } },
-    { 1242, 1920, 1080, 1, 606208, 303104, 4, 8,
+      { 185, 220 },
+      { { 25, 1 }, { 30000, 1001 } } },
+    { 1242, 1920, 1080, 1, 606208, 303104, 4, 8, 3,
       dnxhd_1242_luma_weight, dnxhd_1242_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_level,
-      dnxhd_1237_ac_run_flag, dnxhd_1237_ac_index_flag,
+      dnxhd_1237_ac_flags,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 120, 145 } },
-    { 1243, 1920, 1080, 1, 917504, 458752, 4, 8,
+      { 120, 145 },
+      { { 25, 1 }, { 30000, 1001 } } },
+    { 1243, 1920, 1080, 1, 917504, 458752, 4, 8, 4,
       dnxhd_1243_luma_weight, dnxhd_1243_chroma_weight,
       dnxhd_1238_dc_codes, dnxhd_1238_dc_bits,
       dnxhd_1238_ac_codes, dnxhd_1238_ac_bits, dnxhd_1238_ac_level,
-      dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag,
+      dnxhd_1238_ac_flags,
       dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
-      { 185, 220 } },
-    { 1250, 1280,  720, 0, 458752, 458752, 6, 10,
+      { 185, 220 },
+      { { 25, 1 }, { 30000, 1001 } } },
+    { 1250, 1280,  720, 0, 458752, 458752, 6, 10, 4,
       dnxhd_1250_luma_weight, dnxhd_1250_chroma_weight,
       dnxhd_1250_dc_codes, dnxhd_1250_dc_bits,
       dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_level,
-      dnxhd_1250_ac_run_flag, dnxhd_1250_ac_index_flag,
+      dnxhd_1250_ac_flags,
       dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
-      { 90, 180, 220 } },
-    { 1251, 1280,  720, 0, 458752, 458752, 4, 8,
+      { 90, 90, 180, 220 },
+      { { 24000, 1001 }, { 25, 1 }, { 50, 1 }, { 60000, 1001 } } },
+    { 1251, 1280,  720, 0, 458752, 458752, 4, 8, 4,
       dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight,
       dnxhd_1251_dc_codes, dnxhd_1251_dc_bits,
       dnxhd_1251_ac_codes, dnxhd_1251_ac_bits, dnxhd_1251_ac_level,
-      dnxhd_1251_ac_run_flag, dnxhd_1251_ac_index_flag,
+      dnxhd_1251_ac_flags,
       dnxhd_1251_run_codes, dnxhd_1251_run_bits, dnxhd_1251_run,
-      { 90, 110, 175, 220 } },
-    { 1252, 1280,  720, 0, 303104, 303104, 4, 8,
+      { 90, 90, 110, 180, 220 },
+      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+    { 1252, 1280,  720, 0, 303104, 303104, 4, 8, 5,
       dnxhd_1252_luma_weight, dnxhd_1252_chroma_weight,
       dnxhd_1252_dc_codes, dnxhd_1252_dc_bits,
       dnxhd_1252_ac_codes, dnxhd_1252_ac_bits, dnxhd_1252_ac_level,
-      dnxhd_1252_ac_run_flag, dnxhd_1252_ac_index_flag,
+      dnxhd_1252_ac_flags,
       dnxhd_1251_run_codes, dnxhd_1251_run_bits, dnxhd_1251_run,
-      { 60, 75, 115, 145 } },
-    { 1253, 1920, 1080, 0, 188416, 188416, 4, 8,
+      { 60, 60, 75, 120, 145 },
+      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+    { 1253, 1920, 1080, 0, 188416, 188416, 4, 8, 3,
       dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
       dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
       dnxhd_1237_ac_codes, dnxhd_1237_ac_bits, dnxhd_1237_ac_level,
-      dnxhd_1237_ac_run_flag, dnxhd_1237_ac_index_flag,
+      dnxhd_1237_ac_flags,
       dnxhd_1237_run_codes, dnxhd_1237_run_bits, dnxhd_1237_run,
-      { 36, 45, 75, 90 } },
-    { 1256, 1920, 1080, 0, 1835008, 1835008, 6, 10,
+      { 36, 36, 45, 75, 90 },
+      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
+    { 1256, 1920, 1080, 0, 1835008, 1835008, 6, 10, 4,
       dnxhd_1235_luma_weight, dnxhd_1256_chroma_weight,
       dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
       dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
-      dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag,
+      dnxhd_1235_1241_ac_flags,
       dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
-      { 350, 390, 440, 730, 880 } },
+      { 350, 390, 440, 730, 880 },
+      { { 24000, 1001 }, { 25, 1 }, { 30000, 1001 }, { 50, 1 }, { 60000, 1001 } } },
 };
 
 int ff_dnxhd_get_cid_table(int cid)
@@ -1162,6 +1056,14 @@ int ff_dnxhd_get_cid_table(int cid)
     return -1;
 }
 
+int avpriv_dnxhd_get_frame_size(int cid)
+{
+    int i = ff_dnxhd_get_cid_table(cid);
+    if (i<0)
+        return i;
+    return ff_dnxhd_cid_table[i].frame_size;
+}
+
 int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth)
 {
     int i, j;
@@ -1173,7 +1075,7 @@ int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth)
         if (cid->width == avctx->width && cid->height == avctx->height &&
             cid->interlaced == !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT) &&
             cid->bit_depth == bit_depth) {
-            for (j = 0; j < sizeof(cid->bit_rates); j++) {
+            for (j = 0; j < FF_ARRAY_ELEMS(cid->bit_rates); j++) {
                 if (cid->bit_rates[j] == mbs)
                     return cid->cid;
             }
@@ -1181,3 +1083,19 @@ int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth)
     }
     return 0;
 }
+
+void ff_dnxhd_print_profiles(AVCodecContext *avctx, int loglevel)
+{
+    int i, j;
+    for (i = 0; i < FF_ARRAY_ELEMS(ff_dnxhd_cid_table); i++) {
+        const CIDEntry *cid = &ff_dnxhd_cid_table[i];
+        for (j = 0; j < FF_ARRAY_ELEMS(cid->bit_rates); j++) {
+            if (!cid->bit_rates[j])
+                break;
+
+            av_log(avctx, loglevel, "Frame size: %dx%d%c; bitrate: %dMbps; pixel format: %s; framerate: %d/%d\n",
+                   cid->width, cid->height, cid->interlaced ? 'i' : 'p', cid->bit_rates[j],
+                   cid->bit_depth == 10 ? "yuv422p10" : "yuv422p", cid->frame_rates[j].num, cid->frame_rates[j].den);
+        }
+    }
+}
diff --git a/libavcodec/dnxhddata.h b/libavcodec/dnxhddata.h
index 66b0349..8166ee8 100644
--- a/libavcodec/dnxhddata.h
+++ b/libavcodec/dnxhddata.h
@@ -2,20 +2,20 @@
  * VC3/DNxHD decoder.
  * Copyright (c) 2007 SmartJog S.A., Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,6 +24,7 @@
 
 #include <stdint.h>
 #include "avcodec.h"
+#include "libavutil/internal.h"
 
 typedef struct CIDEntry {
     int cid;
@@ -33,19 +34,24 @@ typedef struct CIDEntry {
     unsigned int coding_unit_size;
     int index_bits;
     int bit_depth;
+    int eob_index;
     const uint8_t *luma_weight, *chroma_weight;
     const uint8_t *dc_codes, *dc_bits;
     const uint16_t *ac_codes;
     const uint8_t *ac_bits, *ac_level;
-    const uint8_t *ac_run_flag, *ac_index_flag;
+    const uint8_t *ac_flags;
     const uint16_t *run_codes;
     const uint8_t *run_bits, *run;
-    int bit_rates[5]; ///< Helpher to choose variants, rounded to nearest 5Mb/s
+    int bit_rates[5]; ///< Helper to choose variants, rounded to nearest 5Mb/s
+    AVRational frame_rates[5];
 } CIDEntry;
 
-extern const CIDEntry ff_dnxhd_cid_table[];
+extern av_export const CIDEntry ff_dnxhd_cid_table[];
 
 int ff_dnxhd_get_cid_table(int cid);
 int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth);
+void ff_dnxhd_print_profiles(AVCodecContext *avctx, int loglevel);
+
+int avpriv_dnxhd_get_frame_size(int cid);
 
 #endif /* AVCODEC_DNXHDDATA_H */
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index ca67990..787c6c5 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -5,20 +5,20 @@
  *
  * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,12 +30,13 @@
 #include "dnxhddata.h"
 #include "idctdsp.h"
 #include "internal.h"
+#include "thread.h"
 
 typedef struct DNXHDContext {
     AVCodecContext *avctx;
     GetBitContext gb;
     BlockDSPContext bdsp;
-    int cid;                            ///< compression id
+    int64_t cid;                        ///< compression id
     unsigned int width, height;
     unsigned int mb_width, mb_height;
     uint32_t mb_scan_index[68];         /* max for 1080p */
@@ -50,6 +51,9 @@ typedef struct DNXHDContext {
     int is_444;
     void (*decode_dct_block)(struct DNXHDContext *ctx, int16_t *block,
                              int n, int qscale);
+    int last_qscale;
+    int luma_scale[64];
+    int chroma_scale[64];
 } DNXHDContext;
 
 #define DNXHD_VLC_BITS 9
@@ -67,10 +71,11 @@ static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
     DNXHDContext *ctx = avctx->priv_data;
 
     ctx->avctx = avctx;
+    ctx->cid = -1;
     return 0;
 }
 
-static int dnxhd_init_vlc(DNXHDContext *ctx, int cid)
+static int dnxhd_init_vlc(DNXHDContext *ctx, uint32_t cid)
 {
     if (cid != ctx->cid) {
         int index;
@@ -79,6 +84,10 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, int cid)
             av_log(ctx->avctx, AV_LOG_ERROR, "unsupported cid %d\n", cid);
             return AVERROR(ENOSYS);
         }
+        if (ff_dnxhd_cid_table[index].bit_depth != ctx->bit_depth) {
+            av_log(ctx->avctx, AV_LOG_ERROR, "bit depth mismatches %d %d\n", ff_dnxhd_cid_table[index].bit_depth, ctx->bit_depth);
+            return AVERROR_INVALIDDATA;
+        }
         ctx->cid_table = &ff_dnxhd_cid_table[index];
 
         ff_free_vlc(&ctx->ac_vlc);
@@ -191,7 +200,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
     for (i = 0; i < ctx->mb_height; i++) {
         ctx->mb_scan_index[i] = AV_RB32(buf + 0x170 + (i << 2));
         av_dlog(ctx->avctx, "mb scan index %d\n", ctx->mb_scan_index[i]);
-        if (buf_size < ctx->mb_scan_index[i] + 0x280) {
+        if (buf_size < ctx->mb_scan_index[i] + 0x280LL) {
             av_log(ctx->avctx, AV_LOG_ERROR, "invalid mb scan index\n");
             return AVERROR_INVALIDDATA;
         }
@@ -207,24 +216,32 @@ static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx,
                                                     int level_bias,
                                                     int level_shift)
 {
-    int i, j, index1, index2, len;
+    int i, j, index1, index2, len, flags;
     int level, component, sign;
+    const int *scale;
     const uint8_t *weight_matrix;
+    const uint8_t *ac_level = ctx->cid_table->ac_level;
+    const uint8_t *ac_flags = ctx->cid_table->ac_flags;
+    const int eob_index     = ctx->cid_table->eob_index;
     OPEN_READER(bs, &ctx->gb);
 
     if (!ctx->is_444) {
         if (n & 2) {
             component     = 1 + (n & 1);
+            scale = ctx->chroma_scale;
             weight_matrix = ctx->cid_table->chroma_weight;
         } else {
             component     = 0;
+            scale = ctx->luma_scale;
             weight_matrix = ctx->cid_table->luma_weight;
         }
     } else {
         component = (n >> 1) % 3;
         if (component) {
+            scale = ctx->chroma_scale;
             weight_matrix = ctx->cid_table->chroma_weight;
         } else {
+            scale = ctx->luma_scale;
             weight_matrix = ctx->cid_table->luma_weight;
         }
     }
@@ -240,41 +257,47 @@ static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx,
     }
     block[0] = ctx->last_dc[component];
 
-    for (i = 1; ; i++) {
-        UPDATE_CACHE(bs, &ctx->gb);
-        GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table,
-                DNXHD_VLC_BITS, 2);
-        level = ctx->cid_table->ac_level[index1];
-        if (!level) /* EOB */
-            break;
+    i = 0;
+
+    UPDATE_CACHE(bs, &ctx->gb);
+    GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table,
+            DNXHD_VLC_BITS, 2);
+
+    while (index1 != eob_index) {
+        level = ac_level[index1];
+        flags = ac_flags[index1];
 
         sign = SHOW_SBITS(bs, &ctx->gb, 1);
         SKIP_BITS(bs, &ctx->gb, 1);
 
-        if (ctx->cid_table->ac_index_flag[index1]) {
-            level += SHOW_UBITS(bs, &ctx->gb, index_bits) << 6;
+        if (flags & 1) {
+            level += SHOW_UBITS(bs, &ctx->gb, index_bits) << 7;
             SKIP_BITS(bs, &ctx->gb, index_bits);
         }
 
-        if (ctx->cid_table->ac_run_flag[index1]) {
+        if (flags & 2) {
             UPDATE_CACHE(bs, &ctx->gb);
             GET_VLC(index2, bs, &ctx->gb, ctx->run_vlc.table,
                     DNXHD_VLC_BITS, 2);
             i += ctx->cid_table->run[index2];
         }
 
-        if (i > 63) {
+        if (++i > 63) {
             av_log(ctx->avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", n, i);
             break;
         }
 
         j     = ctx->scantable.permutated[i];
-        level = (2 * level + 1) * qscale * weight_matrix[i];
+        level *= scale[i];
         if (level_bias < 32 || weight_matrix[i] != level_bias)
             level += level_bias;
         level >>= level_shift;
 
         block[j] = (level ^ sign) - sign;
+
+        UPDATE_CACHE(bs, &ctx->gb);
+        GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table,
+                DNXHD_VLC_BITS, 2);
     }
 
     CLOSE_READER(bs, &ctx->gb);
@@ -311,6 +334,14 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame,
     qscale = get_bits(&ctx->gb, 11);
     skip_bits1(&ctx->gb);
 
+    if (qscale != ctx->last_qscale) {
+        for (i = 0; i < 64; i++) {
+            ctx->luma_scale[i]   = qscale * ctx->cid_table->luma_weight[i];
+            ctx->chroma_scale[i] = qscale * ctx->cid_table->chroma_weight[i];
+        }
+        ctx->last_qscale = qscale;
+    }
+
     for (i = 0; i < 8; i++) {
         ctx->bdsp.clear_block(ctx->blocks[i]);
         ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
@@ -398,6 +429,7 @@ static int dnxhd_decode_frame(AVCodecContext *avctx, void *data,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     DNXHDContext *ctx = avctx->priv_data;
+    ThreadFrame frame = { .f = data };
     AVFrame *picture = data;
     int first_field = 1;
     int ret;
@@ -420,10 +452,8 @@ decode_coding_unit:
         return ret;
 
     if (first_field) {
-        if ((ret = ff_get_buffer(avctx, picture, 0)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
             return ret;
-        }
         picture->pict_type = AV_PICTURE_TYPE_I;
         picture->key_frame = 1;
     }
@@ -438,7 +468,7 @@ decode_coding_unit:
     }
 
     *got_frame = 1;
-    return buf_size;
+    return avpkt->size;
 }
 
 static av_cold int dnxhd_decode_close(AVCodecContext *avctx)
@@ -460,5 +490,5 @@ AVCodec ff_dnxhd_decoder = {
     .init           = dnxhd_decode_init,
     .close          = dnxhd_decode_close,
     .decode         = dnxhd_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index e656b6e..3ad6253 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -6,20 +6,20 @@
  * VC-3 encoder funded by the British Broadcasting Corporation
  * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,6 +36,7 @@
 #include "pixblockdsp.h"
 #include "dnxhdenc.h"
 
+
 // The largest value that will not lead to overflow for 10bit samples.
 #define DNX10BIT_QMAT_SHIFT 18
 #define RC_VARIANCE 1 // use variance or ssd for fast rc
@@ -48,14 +49,14 @@ static const AVOption options[] = {
     { NULL }
 };
 
-static const AVClass class = {
-    "dnxhd",
-    av_default_item_name,
-    options,
-    LIBAVUTIL_VERSION_INT
+static const AVClass dnxhd_class = {
+    .class_name = "dnxhd",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
-static void dnxhd_8bit_get_pixels_8x4_sym(int16_t *restrict block,
+static void dnxhd_8bit_get_pixels_8x4_sym(int16_t *av_restrict block,
                                           const uint8_t *pixels,
                                           ptrdiff_t line_size)
 {
@@ -79,25 +80,33 @@ static void dnxhd_8bit_get_pixels_8x4_sym(int16_t *restrict block,
 }
 
 static av_always_inline
-void dnxhd_10bit_get_pixels_8x4_sym(int16_t *restrict block,
+void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block,
                                     const uint8_t *pixels,
                                     ptrdiff_t line_size)
 {
     int i;
-
-    block += 32;
+    const uint16_t* pixels16 = (const uint16_t*)pixels;
+    line_size >>= 1;
 
     for (i = 0; i < 4; i++) {
-        memcpy(block + i * 8, pixels + i * line_size, 8 * sizeof(*block));
-        memcpy(block - (i + 1) * 8, pixels + i * line_size, 8 * sizeof(*block));
+        block[0] = pixels16[0]; block[1] = pixels16[1];
+        block[2] = pixels16[2]; block[3] = pixels16[3];
+        block[4] = pixels16[4]; block[5] = pixels16[5];
+        block[6] = pixels16[6]; block[7] = pixels16[7];
+        pixels16 += line_size;
+        block += 8;
     }
+    memcpy(block,      block -  8, sizeof(*block) * 8);
+    memcpy(block +  8, block - 16, sizeof(*block) * 8);
+    memcpy(block + 16, block - 24, sizeof(*block) * 8);
+    memcpy(block + 24, block - 32, sizeof(*block) * 8);
 }
 
 static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block,
                                     int n, int qscale, int *overflow)
 {
     const uint8_t *scantable= ctx->intra_scantable.scantable;
-    const int *qmat = ctx->q_intra_matrix[qscale];
+    const int *qmat = n<4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale];
     int last_non_zero = 0;
     int i;
 
@@ -146,10 +155,10 @@ static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx)
                 alevel -= offset << 6;
             }
             for (j = 0; j < 257; j++) {
-                if (ctx->cid_table->ac_level[j] == alevel &&
-                    (!offset || (ctx->cid_table->ac_index_flag[j] && offset)) &&
-                    (!run    || (ctx->cid_table->ac_run_flag  [j] && run))) {
-                    assert(!ctx->vlc_codes[index]);
+                if (ctx->cid_table->ac_level[j] >> 1 == alevel &&
+                    (!offset || (ctx->cid_table->ac_flags[j] & 1) && offset) &&
+                    (!run    || (ctx->cid_table->ac_flags[j] & 2) && run)) {
+                    av_assert1(!ctx->vlc_codes[index]);
                     if (alevel) {
                         ctx->vlc_codes[index] =
                             (ctx->cid_table->ac_codes[j] << 1) | (sign & 1);
@@ -161,7 +170,7 @@ static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx)
                     break;
                 }
             }
-            assert(!alevel || j < 257);
+            av_assert0(!alevel || j < 257);
             if (offset) {
                 ctx->vlc_codes[index] =
                     (ctx->vlc_codes[index] << ctx->cid_table->index_bits) | offset;
@@ -171,7 +180,7 @@ static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx)
     }
     for (i = 0; i < 62; i++) {
         int run = ctx->cid_table->run[i];
-        assert(run < 63);
+        av_assert0(run < 63);
         ctx->run_codes[run] = ctx->cid_table->run_codes[i];
         ctx->run_bits[run]  = ctx->cid_table->run_bits[i];
     }
@@ -250,6 +259,11 @@ static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
         }
     }
 
+    ctx->m.q_chroma_intra_matrix16 = ctx->qmatrix_c16;
+    ctx->m.q_chroma_intra_matrix   = ctx->qmatrix_c;
+    ctx->m.q_intra_matrix16        = ctx->qmatrix_l16;
+    ctx->m.q_intra_matrix          = ctx->qmatrix_l;
+
     return 0;
 fail:
     return AVERROR(ENOMEM);
@@ -257,8 +271,7 @@ fail:
 
 static av_cold int dnxhd_init_rc(DNXHDEncContext *ctx)
 {
-    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_rc,
-                      8160 * ctx->m.avctx->qmax * sizeof(RCEntry), fail);
+    FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_rc, 8160 * (ctx->m.avctx->qmax + 1) * sizeof(RCEntry), fail);
     if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD)
         FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_cmp,
                           ctx->m.mb_num * sizeof(RCCMPEntry), fail);
@@ -293,12 +306,14 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
     ctx->cid = ff_dnxhd_find_cid(avctx, bit_depth);
     if (!ctx->cid) {
         av_log(avctx, AV_LOG_ERROR,
-               "video parameters incompatible with DNxHD\n");
+               "video parameters incompatible with DNxHD. Valid DNxHD profiles:\n");
+        ff_dnxhd_print_profiles(avctx, AV_LOG_ERROR);
         return AVERROR(EINVAL);
     }
     av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid);
 
     index = ff_dnxhd_get_cid_table(ctx->cid);
+    av_assert0(index >= 0);
     ctx->cid_table = &ff_dnxhd_cid_table[index];
 
     ctx->m.avctx    = avctx;
@@ -313,6 +328,8 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
     ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx);
     ff_pixblockdsp_init(&ctx->m.pdsp, avctx);
     ff_dct_common_init(&ctx->m);
+    ff_dct_encode_init(&ctx->m);
+
     if (!ctx->m.dct_quantize)
         ctx->m.dct_quantize = ff_dct_quantize_c;
 
@@ -375,6 +392,11 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
+    if (avctx->qmax <= 1) {
+        av_log(avctx, AV_LOG_ERROR, "qmax must be at least 2\n");
+        return AVERROR(EINVAL);
+    }
+
     ctx->thread[0] = ctx;
     for (i = 1; i < avctx->thread_count; i++) {
         ctx->thread[i] = av_malloc(sizeof(DNXHDEncContext));
@@ -389,7 +411,7 @@ fail:  // for FF_ALLOCZ_OR_GOTO
 static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
 {
     DNXHDEncContext *ctx = avctx->priv_data;
-    const uint8_t header_prefix[5] = { 0x00, 0x00, 0x02, 0x80, 0x01 };
+    static const uint8_t header_prefix[5] = { 0x00, 0x00, 0x02, 0x80, 0x01 };
 
     memset(buf, 0, 640);
 
@@ -582,15 +604,8 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
 static av_always_inline
 int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i)
 {
-    if (i & 2) {
-        ctx->m.q_intra_matrix16 = ctx->qmatrix_c16;
-        ctx->m.q_intra_matrix   = ctx->qmatrix_c;
-        return 1 + (i & 1);
-    } else {
-        ctx->m.q_intra_matrix16 = ctx->qmatrix_l16;
-        ctx->m.q_intra_matrix   = ctx->qmatrix_l;
-        return 0;
-    }
+    const static uint8_t component[8]={0,0,1,2,0,0,1,2};
+    return component[i];
 }
 
 static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg,
@@ -621,7 +636,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg,
             int n = dnxhd_switch_matrix(ctx, i);
 
             memcpy(block, src_block, 64 * sizeof(*block));
-            last_index = ctx->m.dct_quantize(&ctx->m, block, i,
+            last_index = ctx->m.dct_quantize(&ctx->m, block, 4 & (2*i),
                                              qscale, &overflow);
             ac_bits   += dnxhd_calc_ac_bits(ctx, block, last_index);
 
@@ -631,7 +646,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg,
             else
                 nbits = av_log2_16bit(2 * diff);
 
-            assert(nbits < ctx->cid_table->bit_depth + 4);
+            av_assert1(nbits < ctx->cid_table->bit_depth + 4);
             dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
 
             ctx->m.last_dc[n] = block[0];
@@ -673,7 +688,7 @@ static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg,
         for (i = 0; i < 8; i++) {
             int16_t *block = ctx->blocks[i];
             int overflow, n = dnxhd_switch_matrix(ctx, i);
-            int last_index = ctx->m.dct_quantize(&ctx->m, block, i,
+            int last_index = ctx->m.dct_quantize(&ctx->m, block, 4 & (2*i),
                                                  qscale, &overflow);
             // START_TIMER;
             dnxhd_encode_block(ctx, block, last_index, n);
@@ -932,13 +947,13 @@ static void radix_count(const RCCMPEntry *data, int size,
             buckets[j][get_bucket(v, 0)]++;
             v >>= BUCKET_BITS;
         }
-        assert(!v);
+        av_assert1(!v);
     }
     for (j = 0; j < RADIX_PASSES; j++) {
         int offset = size;
         for (i = NBUCKETS - 1; i >= 0; i--)
             buckets[j][i] = offset -= buckets[j][i];
-        assert(!buckets[j][0]);
+        av_assert1(!buckets[j][0]);
     }
 }
 
@@ -957,7 +972,7 @@ static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data,
 static void radix_sort(RCCMPEntry *data, int size)
 {
     int buckets[RADIX_PASSES][NBUCKETS];
-    RCCMPEntry *tmp = av_malloc(sizeof(*tmp) * size);
+    RCCMPEntry *tmp = av_malloc_array(size, sizeof(*tmp));
     radix_count(data, size, buckets);
     radix_sort_pass(tmp, data, size, buckets[0], 0);
     radix_sort_pass(data, tmp, size, buckets[1], 1);
@@ -1033,11 +1048,8 @@ static int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
     int offset, i, ret;
     uint8_t *buf;
 
-    if ((ret = ff_alloc_packet(pkt, ctx->cid_table->frame_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR,
-               "output buffer is too small to compress picture\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, ctx->cid_table->frame_size)) < 0)
         return ret;
-    }
     buf = pkt->data;
 
     dnxhd_load_picture(ctx, frame);
@@ -1067,12 +1079,12 @@ encode_coding_unit:
     for (i = 0; i < ctx->m.mb_height; i++) {
         AV_WB32(ctx->msip + i * 4, offset);
         offset += ctx->slice_size[i];
-        assert(!(ctx->slice_size[i] & 3));
+        av_assert1(!(ctx->slice_size[i] & 3));
     }
 
     avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height);
 
-    assert(640 + offset + 4 <= ctx->cid_table->coding_unit_size);
+    av_assert1(640 + offset + 4 <= ctx->cid_table->coding_unit_size);
     memset(buf + 640 + offset, 0,
            ctx->cid_table->coding_unit_size - 4 - offset - 640);
 
@@ -1123,6 +1135,11 @@ static av_cold int dnxhd_encode_end(AVCodecContext *avctx)
     return 0;
 }
 
+static const AVCodecDefault dnxhd_defaults[] = {
+    { "qmax", "1024" }, /* Maximum quantization scale factor allowed for VC-3 */
+    { NULL },
+};
+
 AVCodec ff_dnxhd_encoder = {
     .name           = "dnxhd",
     .long_name      = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
@@ -1138,5 +1155,6 @@ AVCodec ff_dnxhd_encoder = {
         AV_PIX_FMT_YUV422P10,
         AV_PIX_FMT_NONE
     },
-    .priv_class     = &class,
+    .priv_class     = &dnxhd_class,
+    .defaults       = dnxhd_defaults,
 };
diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
index c3248a2..7ef0b96 100644
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -4,20 +4,20 @@
  *
  * VC-3 encoder funded by the British Broadcasting Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -84,8 +84,6 @@ typedef struct DNXHDEncContext {
     unsigned qscale;
     unsigned lambda;
 
-    unsigned thread_size;
-
     uint16_t *mb_bits;
     uint8_t  *mb_qscale;
 
diff --git a/libavcodec/dpcm.c b/libavcodec/dpcm.c
index 5ab2331..0c0bcca 100644
--- a/libavcodec/dpcm.c
+++ b/libavcodec/dpcm.c
@@ -2,20 +2,20 @@
  * Assorted DPCM codecs
  * Copyright (c) 2003 The ffmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -118,7 +118,7 @@ static av_cold int dpcm_decode_init(AVCodecContext *avctx)
     int i;
 
     if (avctx->channels < 1 || avctx->channels > 2) {
-        av_log(avctx, AV_LOG_INFO, "invalid number of channels\n");
+        av_log(avctx, AV_LOG_ERROR, "invalid number of channels\n");
         return AVERROR(EINVAL);
     }
 
@@ -205,13 +205,14 @@ static int dpcm_decode_frame(AVCodecContext *avctx, void *data,
         av_log(avctx, AV_LOG_ERROR, "packet is too small\n");
         return AVERROR(EINVAL);
     }
+    if (out % avctx->channels) {
+        av_log(avctx, AV_LOG_WARNING, "channels have differing number of samples\n");
+    }
 
     /* get output buffer */
-    frame->nb_samples = out / avctx->channels;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    frame->nb_samples = (out + avctx->channels - 1) / avctx->channels;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     output_samples = (int16_t *)frame->data[0];
     samples_end = output_samples + out;
 
diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index c796387..5f05cd8 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -2,29 +2,42 @@
  * DPX (.dpx) image decoder
  * Copyright (c) 2009 Jimmy Christensen
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/intfloat.h"
 #include "libavutil/imgutils.h"
 #include "bytestream.h"
 #include "avcodec.h"
 #include "internal.h"
 
+static unsigned int read16(const uint8_t **ptr, int is_big)
+{
+    unsigned int temp;
+    if (is_big) {
+        temp = AV_RB16(*ptr);
+    } else {
+        temp = AV_RL16(*ptr);
+    }
+    *ptr += 2;
+    return temp;
+}
+
 static unsigned int read32(const uint8_t **ptr, int is_big)
 {
     unsigned int temp;
@@ -37,12 +50,19 @@ static unsigned int read32(const uint8_t **ptr, int is_big)
     return temp;
 }
 
-static inline unsigned make_16bit(unsigned value)
+static uint16_t read10in32(const uint8_t **ptr, uint32_t * lbuf,
+                                  int * n_datum, int is_big)
 {
-    // mask away invalid bits
-    value &= 0xFFC0;
-    // correctly expand to 16 bits
-    return value + (value >> 10);
+    if (*n_datum)
+        (*n_datum)--;
+    else {
+        *lbuf = read32(ptr, is_big);
+        *n_datum = 2;
+    }
+
+    *lbuf = (*lbuf << 10) | (*lbuf >> 22);
+
+    return *lbuf & 0x3FF;
 }
 
 static int decode_frame(AVCodecContext *avctx,
@@ -51,17 +71,18 @@ static int decode_frame(AVCodecContext *avctx,
                         AVPacket *avpkt)
 {
     const uint8_t *buf = avpkt->data;
-    const uint8_t *buf_end = avpkt->data + avpkt->size;
     int buf_size       = avpkt->size;
     AVFrame *const p = data;
-    uint8_t *ptr;
+    uint8_t *ptr[AV_NUM_DATA_POINTERS];
 
     unsigned int offset;
     int magic_num, endian;
-    int x, y, ret;
-    int w, h, stride, bits_per_color, descriptor, elements, target_packet_size, source_packet_size;
+    int x, y, i, ret;
+    int w, h, bits_per_color, descriptor, elements, packing, total_size;
+    int encoding;
 
-    unsigned int rgbBuffer;
+    unsigned int rgbBuffer = 0;
+    int n_datum = 0;
 
     if (avpkt->size <= 1634) {
         av_log(avctx, AV_LOG_ERROR, "Packet too small for DPX header\n");
@@ -92,6 +113,9 @@ static int decode_frame(AVCodecContext *avctx,
     w = read32(&buf, endian);
     h = read32(&buf, endian);
 
+    if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
+        return ret;
+
     // Need to end in 0x320 to read the descriptor
     buf += 20;
     descriptor = buf[0];
@@ -100,108 +124,202 @@ static int decode_frame(AVCodecContext *avctx,
     buf += 3;
     avctx->bits_per_raw_sample =
     bits_per_color = buf[0];
+    buf++;
+    packing = read16(&buf, endian);
+    encoding = read16(&buf, endian);
+
+    if (packing > 1) {
+        avpriv_report_missing_feature(avctx, "Packing %d", packing);
+        return AVERROR_PATCHWELCOME;
+    }
+    if (encoding) {
+        avpriv_report_missing_feature(avctx, "Encoding %d", encoding);
+        return AVERROR_PATCHWELCOME;
+    }
 
-    buf += 825;
+    buf += 820;
     avctx->sample_aspect_ratio.num = read32(&buf, endian);
     avctx->sample_aspect_ratio.den = read32(&buf, endian);
+    if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 0)
+        av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
+                   avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den,
+                  0x10000);
+    else
+        avctx->sample_aspect_ratio = (AVRational){ 0, 1 };
+
+    if (offset >= 1724 + 4) {
+        buf = avpkt->data + 1724;
+        i = read32(&buf, endian);
+        if(i) {
+            AVRational q = av_d2q(av_int2float(i), 4096);
+            if (q.num > 0 && q.den > 0)
+                avctx->time_base = av_inv_q(q);
+        }
+    }
 
     switch (descriptor) {
-        case 51: // RGBA
-            elements = 4;
-            break;
-        case 50: // RGB
-            elements = 3;
-            break;
-        default:
-            av_log(avctx, AV_LOG_ERROR, "Unsupported descriptor %d\n", descriptor);
-            return AVERROR_INVALIDDATA;
+    case 6:  // Y
+        elements = 1;
+        break;
+    case 52: // ABGR
+    case 51: // RGBA
+        elements = 4;
+        break;
+    case 50: // RGB
+        elements = 3;
+        break;
+    default:
+        avpriv_report_missing_feature(avctx, "Descriptor %d", descriptor);
+        return AVERROR_PATCHWELCOME;
     }
 
     switch (bits_per_color) {
-        case 8:
-            if (elements == 4) {
-                avctx->pix_fmt = AV_PIX_FMT_RGBA;
-            } else {
-                avctx->pix_fmt = AV_PIX_FMT_RGB24;
-            }
-            source_packet_size = elements;
-            target_packet_size = elements;
-            break;
-        case 10:
-            avctx->pix_fmt = AV_PIX_FMT_RGB48;
-            target_packet_size = 6;
-            source_packet_size = 4;
-            break;
-        case 12:
-        case 16:
-            if (endian) {
-                avctx->pix_fmt = AV_PIX_FMT_RGB48BE;
-            } else {
-                avctx->pix_fmt = AV_PIX_FMT_RGB48LE;
-            }
-            target_packet_size = 6;
-            source_packet_size = elements * 2;
-            break;
-        default:
-            av_log(avctx, AV_LOG_ERROR, "Unsupported color depth : %d\n", bits_per_color);
-            return AVERROR_INVALIDDATA;
+    case 8:
+        total_size = avctx->width * avctx->height * elements;
+        break;
+    case 10:
+        if (!packing) {
+            av_log(avctx, AV_LOG_ERROR, "Packing to 32bit required\n");
+            return -1;
+        }
+        total_size = (avctx->width * elements + 2) / 3 * 4 * avctx->height;
+        break;
+    case 12:
+        if (!packing) {
+            av_log(avctx, AV_LOG_ERROR, "Packing to 16bit required\n");
+            return -1;
+        }
+        total_size = 2 * avctx->width * avctx->height * elements;
+        break;
+    case 16:
+        total_size = 2 * avctx->width * avctx->height * elements;
+        break;
+    case 1:
+    case 32:
+    case 64:
+        avpriv_report_missing_feature(avctx, "Depth %d", bits_per_color);
+        return AVERROR_PATCHWELCOME;
+    default:
+        return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
-        return ret;
+    switch (1000 * descriptor + 10 * bits_per_color + endian) {
+    case 6081:
+    case 6080:
+        avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+        break;
+    case 50081:
+    case 50080:
+        avctx->pix_fmt = AV_PIX_FMT_RGB24;
+        break;
+    case 52081:
+    case 52080:
+        avctx->pix_fmt = AV_PIX_FMT_ABGR;
+        break;
+    case 51081:
+    case 51080:
+        avctx->pix_fmt = AV_PIX_FMT_RGBA;
+        break;
+    case 50100:
+    case 51100:
+    case 50101:
+    case 51101:
+        avctx->pix_fmt = AV_PIX_FMT_GBRP10;
+        break;
+    case 50120:
+    case 51120:
+    case 50121:
+    case 51121:
+        avctx->pix_fmt = AV_PIX_FMT_GBRP12;
+        break;
+    case 6161:
+        avctx->pix_fmt = AV_PIX_FMT_GRAY16BE;
+        break;
+    case 6160:
+        avctx->pix_fmt = AV_PIX_FMT_GRAY16LE;
+        break;
+    case 50161:
+        avctx->pix_fmt = AV_PIX_FMT_RGB48BE;
+        break;
+    case 50160:
+        avctx->pix_fmt = AV_PIX_FMT_RGB48LE;
+        break;
+    case 51161:
+        avctx->pix_fmt = AV_PIX_FMT_RGBA64BE;
+        break;
+    case 51160:
+        avctx->pix_fmt = AV_PIX_FMT_RGBA64LE;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported format\n");
+        return AVERROR_PATCHWELCOME;
+    }
 
     ff_set_sar(avctx, avctx->sample_aspect_ratio);
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
 
     // Move pointer to offset from start of file
     buf =  avpkt->data + offset;
 
-    ptr    = p->data[0];
-    stride = p->linesize[0];
+    for (i=0; i<AV_NUM_DATA_POINTERS; i++)
+        ptr[i] = p->data[i];
 
-    if (source_packet_size*avctx->width*avctx->height > buf_end - buf) {
+    if (total_size + (int64_t)offset > avpkt->size) {
         av_log(avctx, AV_LOG_ERROR, "Overread buffer. Invalid header?\n");
         return AVERROR_INVALIDDATA;
     }
     switch (bits_per_color) {
-        case 10:
-            for (x = 0; x < avctx->height; x++) {
-               uint16_t *dst = (uint16_t*)ptr;
-               for (y = 0; y < avctx->width; y++) {
-                   rgbBuffer = read32(&buf, endian);
-                   // Read out the 10-bit colors and convert to 16-bit
-                   *dst++ = make_16bit(rgbBuffer >> 16);
-                   *dst++ = make_16bit(rgbBuffer >>  6);
-                   *dst++ = make_16bit(rgbBuffer <<  4);
-               }
-               ptr += stride;
+    case 10:
+        for (x = 0; x < avctx->height; x++) {
+            uint16_t *dst[3] = {(uint16_t*)ptr[0],
+                                (uint16_t*)ptr[1],
+                                (uint16_t*)ptr[2]};
+            for (y = 0; y < avctx->width; y++) {
+                *dst[2]++ = read10in32(&buf, &rgbBuffer,
+                                       &n_datum, endian);
+                *dst[0]++ = read10in32(&buf, &rgbBuffer,
+                                       &n_datum, endian);
+                *dst[1]++ = read10in32(&buf, &rgbBuffer,
+                                       &n_datum, endian);
+                // For 10 bit, ignore alpha
+                if (elements == 4)
+                    read10in32(&buf, &rgbBuffer,
+                               &n_datum, endian);
             }
-            break;
-        case 8:
-        case 12: // Treat 12-bit as 16-bit
-        case 16:
-            if (source_packet_size == target_packet_size) {
-                for (x = 0; x < avctx->height; x++) {
-                    memcpy(ptr, buf, target_packet_size*avctx->width);
-                    ptr += stride;
-                    buf += source_packet_size*avctx->width;
-                }
-            } else {
-                for (x = 0; x < avctx->height; x++) {
-                    uint8_t *dst = ptr;
-                    for (y = 0; y < avctx->width; y++) {
-                        memcpy(dst, buf, target_packet_size);
-                        dst += target_packet_size;
-                        buf += source_packet_size;
-                    }
-                    ptr += stride;
-                }
+            n_datum = 0;
+            for (i = 0; i < 3; i++)
+                ptr[i] += p->linesize[i];
+        }
+        break;
+    case 12:
+        for (x = 0; x < avctx->height; x++) {
+            uint16_t *dst[3] = {(uint16_t*)ptr[0],
+                                (uint16_t*)ptr[1],
+                                (uint16_t*)ptr[2]};
+            for (y = 0; y < avctx->width; y++) {
+                *dst[2] = read16(&buf, endian) >> 4;
+                dst[2]++;
+                *dst[0] = read16(&buf, endian) >> 4;
+                dst[0]++;
+                *dst[1] = read16(&buf, endian) >> 4;
+                dst[1]++;
+                // For 12 bit, ignore alpha
+                if (elements == 4)
+                    buf += 2;
             }
-            break;
+            for (i = 0; i < 3; i++)
+                ptr[i] += p->linesize[i];
+        }
+        break;
+    case 16:
+        elements *= 2;
+    case 8:
+        av_image_copy_plane(ptr[0], p->linesize[0],
+                            buf, elements * avctx->width,
+                            elements * avctx->width, avctx->height);
+        break;
     }
 
     *got_frame = 1;
@@ -211,7 +329,7 @@ static int decode_frame(AVCodecContext *avctx,
 
 AVCodec ff_dpx_decoder = {
     .name           = "dpx",
-    .long_name      = NULL_IF_CONFIG_SMALL("DPX image"),
+    .long_name      = NULL_IF_CONFIG_SMALL("DPX (Digital Picture Exchange) image"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_DPX,
     .decode         = decode_frame,
diff --git a/libavcodec/dpx_parser.c b/libavcodec/dpx_parser.c
new file mode 100644
index 0000000..dd2a335
--- /dev/null
+++ b/libavcodec/dpx_parser.c
@@ -0,0 +1,114 @@
+/*
+ * DPX parser
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DPX parser
+ */
+
+#include "libavutil/bswap.h"
+#include "parser.h"
+
+typedef struct DPXParseContext {
+    ParseContext pc;
+    uint32_t index;
+    uint32_t fsize;
+    uint32_t remaining_size;
+    int is_be;
+} DPXParseContext;
+
+static int dpx_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                     const uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size)
+{
+    DPXParseContext *d = s->priv_data;
+    uint32_t state = d->pc.state;
+    int next = END_NOT_FOUND;
+    int i = 0;
+
+    s->pict_type = AV_PICTURE_TYPE_I;
+
+    *poutbuf_size = 0;
+    if (buf_size == 0)
+        next = 0;
+
+    if (!d->pc.frame_start_found) {
+        for (; i < buf_size; i++) {
+            state = (state << 8) | buf[i];
+            if (state == MKBETAG('S','D','P','X') ||
+                state == MKTAG('S','D','P','X')) {
+                d->pc.frame_start_found = 1;
+                d->is_be = state == MKBETAG('S','D','P','X');
+                d->index = 0;
+                break;
+            }
+        }
+        d->pc.state = state;
+    } else {
+        if (d->remaining_size) {
+            i = FFMIN(d->remaining_size, buf_size);
+            d->remaining_size -= i;
+            if (d->remaining_size)
+                goto flush;
+        }
+    }
+
+    for (;d->pc.frame_start_found && i < buf_size; i++) {
+        d->pc.state = (d->pc.state << 8) | buf[i];
+        d->index++;
+        if (d->index == 17) {
+            d->fsize = d->is_be ? d->pc.state : av_bswap32(d->pc.state);
+            if (d->fsize <= 1664) {
+                d->pc.frame_start_found = 0;
+                goto flush;
+            }
+            if (d->fsize > buf_size - i + 19)
+                d->remaining_size = d->fsize - buf_size + i - 19;
+            else
+                i += d->fsize - 19;
+
+            break;
+        } else if (d->index > 17) {
+            if (d->pc.state == MKBETAG('S','D','P','X') ||
+                d->pc.state == MKTAG('S','D','P','X')) {
+                next = i - 3;
+                break;
+            }
+        }
+    }
+
+flush:
+    if (ff_combine_frame(&d->pc, next, &buf, &buf_size) < 0)
+        return buf_size;
+
+    d->pc.frame_start_found = 0;
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser ff_dpx_parser = {
+    .codec_ids      = { AV_CODEC_ID_DPX },
+    .priv_data_size = sizeof(DPXParseContext),
+    .parser_parse   = dpx_parse,
+    .parser_close   = ff_parse_close,
+};
diff --git a/libavcodec/dpxenc.c b/libavcodec/dpxenc.c
index 2232933..0eb1297 100644
--- a/libavcodec/dpxenc.c
+++ b/libavcodec/dpxenc.c
@@ -2,20 +2,20 @@
  * DPX (.dpx) image encoder
  * Copyright (c) 2011 Peter Ross <pross@xvid.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,33 +29,41 @@ typedef struct DPXContext {
     int big_endian;
     int bits_per_component;
     int descriptor;
+    int planar;
 } DPXContext;
 
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     DPXContext *s = avctx->priv_data;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-    avctx->coded_frame->key_frame = 1;
-
-    s->big_endian         = 1;
-    s->bits_per_component = 8;
-    s->descriptor         = 50; /* RGB */
+    s->big_endian         = !!(desc->flags & AV_PIX_FMT_FLAG_BE);
+    s->bits_per_component = desc->comp[0].depth_minus1 + 1;
+    s->descriptor         = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) ? 51 : 50;
+    s->planar             = !!(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
 
     switch (avctx->pix_fmt) {
-    case AV_PIX_FMT_RGB24:
+    case AV_PIX_FMT_ABGR:
+        s->descriptor = 52;
         break;
+    case AV_PIX_FMT_GRAY16BE:
+    case AV_PIX_FMT_GRAY16LE:
+    case AV_PIX_FMT_GRAY8:
+        s->descriptor = 6;
+        break;
+    case AV_PIX_FMT_GBRP10BE:
+    case AV_PIX_FMT_GBRP10LE:
+    case AV_PIX_FMT_GBRP12BE:
+    case AV_PIX_FMT_GBRP12LE:
+    case AV_PIX_FMT_RGB24:
+    case AV_PIX_FMT_RGBA64BE:
+    case AV_PIX_FMT_RGBA64LE:
     case AV_PIX_FMT_RGBA:
-        s->descriptor = 51; /* RGBA */
         break;
     case AV_PIX_FMT_RGB48LE:
-        s->big_endian = 0;
     case AV_PIX_FMT_RGB48BE:
-        s->bits_per_component = avctx->bits_per_raw_sample ? avctx->bits_per_raw_sample : 16;
+        if (avctx->bits_per_raw_sample)
+            s->bits_per_component = avctx->bits_per_raw_sample;
         break;
     default:
         av_log(avctx, AV_LOG_INFO, "unsupported pixel format\n");
@@ -77,8 +85,7 @@ do { \
     else               AV_WL32(p, value); \
 } while(0)
 
-static void encode_rgb48_10bit(AVCodecContext *avctx, const AVPicture *pic,
-                               uint8_t *dst)
+static void encode_rgb48_10bit(AVCodecContext *avctx, const AVPicture *pic, uint8_t *dst)
 {
     DPXContext *s = avctx->priv_data;
     const uint8_t *src = pic->data[0];
@@ -87,14 +94,14 @@ static void encode_rgb48_10bit(AVCodecContext *avctx, const AVPicture *pic,
     for (y = 0; y < avctx->height; y++) {
         for (x = 0; x < avctx->width; x++) {
             int value;
-            if ((avctx->pix_fmt & 1)) {
-                value = ((AV_RB16(src + 6*x + 4) & 0xFFC0) >> 4)
-                      | ((AV_RB16(src + 6*x + 2) & 0xFFC0) << 6)
-                      | ((AV_RB16(src + 6*x + 0) & 0xFFC0) << 16);
+            if (s->big_endian) {
+                value = ((AV_RB16(src + 6*x + 4) & 0xFFC0U) >> 4)
+                      | ((AV_RB16(src + 6*x + 2) & 0xFFC0U) << 6)
+                      | ((AV_RB16(src + 6*x + 0) & 0xFFC0U) << 16);
             } else {
-                value = ((AV_RL16(src + 6*x + 4) & 0xFFC0) >> 4)
-                      | ((AV_RL16(src + 6*x + 2) & 0xFFC0) << 6)
-                      | ((AV_RL16(src + 6*x + 0) & 0xFFC0) << 16);
+                value = ((AV_RL16(src + 6*x + 4) & 0xFFC0U) >> 4)
+                      | ((AV_RL16(src + 6*x + 2) & 0xFFC0U) << 6)
+                      | ((AV_RL16(src + 6*x + 0) & 0xFFC0U) << 16);
             }
             write32(dst, value);
             dst += 4;
@@ -103,6 +110,59 @@ static void encode_rgb48_10bit(AVCodecContext *avctx, const AVPicture *pic,
     }
 }
 
+static void encode_gbrp10(AVCodecContext *avctx, const AVPicture *pic, uint8_t *dst)
+{
+    DPXContext *s = avctx->priv_data;
+    const uint8_t *src[3] = {pic->data[0], pic->data[1], pic->data[2]};
+    int x, y, i;
+
+    for (y = 0; y < avctx->height; y++) {
+        for (x = 0; x < avctx->width; x++) {
+            int value;
+            if (s->big_endian) {
+                value = (AV_RB16(src[0] + 2*x) << 12)
+                      | (AV_RB16(src[1] + 2*x) << 2)
+                      | ((unsigned)AV_RB16(src[2] + 2*x) << 22);
+            } else {
+                value = (AV_RL16(src[0] + 2*x) << 12)
+                      | (AV_RL16(src[1] + 2*x) << 2)
+                      | ((unsigned)AV_RL16(src[2] + 2*x) << 22);
+            }
+            write32(dst, value);
+            dst += 4;
+        }
+        for (i = 0; i < 3; i++)
+            src[i] += pic->linesize[i];
+    }
+}
+
+static void encode_gbrp12(AVCodecContext *avctx, const AVPicture *pic, uint16_t *dst)
+{
+    DPXContext *s = avctx->priv_data;
+    const uint16_t *src[3] = {(uint16_t*)pic->data[0],
+                              (uint16_t*)pic->data[1],
+                              (uint16_t*)pic->data[2]};
+    int x, y, i;
+    for (y = 0; y < avctx->height; y++) {
+        for (x = 0; x < avctx->width; x++) {
+            uint16_t value[3];
+            if (s->big_endian) {
+                value[1] = AV_RB16(src[0] + x) << 4;
+                value[2] = AV_RB16(src[1] + x) << 4;
+                value[0] = AV_RB16(src[2] + x) << 4;
+            } else {
+                value[1] = AV_RL16(src[0] + x) << 4;
+                value[2] = AV_RL16(src[1] + x) << 4;
+                value[0] = AV_RL16(src[2] + x) << 4;
+            }
+            for (i = 0; i < 3; i++)
+                write16(dst++, value[i]);
+        }
+        for (i = 0; i < 3; i++)
+            src[i] += pic->linesize[i]/2;
+    }
+}
+
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *frame, int *got_packet)
 {
@@ -115,10 +175,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         size = avctx->height * avctx->width * 4;
     else
         size = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
-    if ((ret = ff_alloc_packet(pkt, size + HEADER_SIZE)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, size + HEADER_SIZE)) < 0)
         return ret;
-    }
     buf = pkt->data;
 
     memset(buf, 0, HEADER_SIZE);
@@ -142,13 +200,15 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     buf[801] = 2; /* linear transfer */
     buf[802] = 2; /* linear colorimetric */
     buf[803] = s->bits_per_component;
-    write16(buf + 804, s->bits_per_component == 10 ? 1 : 0); /* packing method */
+    write16(buf + 804, (s->bits_per_component == 10 || s->bits_per_component == 12) ?
+                       1 : 0); /* packing method */
+    write32(buf + 808, HEADER_SIZE); /* data offset */
 
     /* Image source information header */
     write32(buf + 1628, avctx->sample_aspect_ratio.num);
     write32(buf + 1632, avctx->sample_aspect_ratio.den);
 
-    switch (s->bits_per_component) {
+    switch(s->bits_per_component) {
     case 8:
     case 16:
         size = avpicture_layout((const AVPicture*)frame, avctx->pix_fmt,
@@ -158,7 +218,13 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             return size;
         break;
     case 10:
-        encode_rgb48_10bit(avctx, (const AVPicture*)frame, buf + HEADER_SIZE);
+        if (s->planar)
+            encode_gbrp10(avctx, (const AVPicture*)frame, buf + HEADER_SIZE);
+        else
+            encode_rgb48_10bit(avctx, (const AVPicture*)frame, buf + HEADER_SIZE);
+        break;
+    case 12:
+        encode_gbrp12(avctx, (const AVPicture*)frame, (uint16_t*)(buf + HEADER_SIZE));
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", s->bits_per_component);
@@ -175,25 +241,21 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-    return 0;
-}
-
 AVCodec ff_dpx_encoder = {
-    .name = "dpx",
-    .long_name = NULL_IF_CONFIG_SMALL("DPX image"),
-    .type = AVMEDIA_TYPE_VIDEO,
-    .id   = AV_CODEC_ID_DPX,
+    .name           = "dpx",
+    .long_name      = NULL_IF_CONFIG_SMALL("DPX (Digital Picture Exchange) image"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_DPX,
     .priv_data_size = sizeof(DPXContext),
-    .init   = encode_init,
-    .encode2 = encode_frame,
-    .close   = encode_close,
-    .pix_fmts = (const enum AVPixelFormat[]){
-        AV_PIX_FMT_RGB24,
-        AV_PIX_FMT_RGBA,
-        AV_PIX_FMT_RGB48LE,
-        AV_PIX_FMT_RGB48BE,
+    .init           = encode_init,
+    .encode2        = encode_frame,
+    .pix_fmts       = (const enum AVPixelFormat[]){
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_RGB24,    AV_PIX_FMT_RGBA, AV_PIX_FMT_ABGR,
+        AV_PIX_FMT_GRAY16LE, AV_PIX_FMT_GRAY16BE,
+        AV_PIX_FMT_RGB48LE,  AV_PIX_FMT_RGB48BE,
+        AV_PIX_FMT_RGBA64LE, AV_PIX_FMT_RGBA64BE,
+        AV_PIX_FMT_GBRP10LE, AV_PIX_FMT_GBRP10BE,
+        AV_PIX_FMT_GBRP12LE, AV_PIX_FMT_GBRP12BE,
         AV_PIX_FMT_NONE},
 };
diff --git a/libavcodec/dsd_tablegen.c b/libavcodec/dsd_tablegen.c
new file mode 100644
index 0000000..dbeb9fe
--- /dev/null
+++ b/libavcodec/dsd_tablegen.c
@@ -0,0 +1,38 @@
+/*
+ * Generate a header file for hardcoded DSD tables
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#define CONFIG_HARDCODED_TABLES 0
+#include "dsd_tablegen.h"
+#include "tableprint.h"
+#include <inttypes.h>
+
+int main(void)
+{
+    dsd_ctables_tableinit();
+
+    write_fileheader();
+
+    printf("static const double ctables[CTABLES][256] = {\n");
+    write_float_2d_array(ctables, CTABLES, 256);
+    printf("};\n");
+
+    return 0;
+}
diff --git a/libavcodec/dsd_tablegen.h b/libavcodec/dsd_tablegen.h
new file mode 100644
index 0000000..6afb416
--- /dev/null
+++ b/libavcodec/dsd_tablegen.h
@@ -0,0 +1,95 @@
+/*
+ * Header file for hardcoded DSD tables
+ * based on BSD licensed dsd2pcm by Sebastian Gesemann
+ * Copyright (c) 2009, 2011 Sebastian Gesemann. All rights reserved.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DSD_TABLEGEN_H
+#define AVCODEC_DSD_TABLEGEN_H
+
+#include <stdint.h>
+#include "libavutil/attributes.h"
+
+#define HTAPS   48                /** number of FIR constants */
+#define CTABLES ((HTAPS + 7) / 8) /** number of "8 MACs" lookup tables */
+
+#if CONFIG_HARDCODED_TABLES
+#define dsd_ctables_tableinit()
+#include "libavcodec/dsd_tables.h"
+#else
+#include "libavutil/common.h"
+
+/*
+ * Properties of this 96-tap lowpass filter when applied on a signal
+ * with sampling rate of 44100*64 Hz:
+ *
+ * () has a delay of 17 microseconds.
+ *
+ * () flat response up to 48 kHz
+ *
+ * () if you downsample afterwards by a factor of 8, the
+ *    spectrum below 70 kHz is practically alias-free.
+ *
+ * () stopband rejection is about 160 dB
+ *
+ * The coefficient tables ("ctables") take only 6 Kibi Bytes and
+ * should fit into a modern processor's fast cache.
+ */
+
+/**
+ * The 2nd half (48 coeffs) of a 96-tap symmetric lowpass filter
+ */
+static const double htaps[HTAPS] = {
+     0.09950731974056658,    0.09562845727714668,    0.08819647126516944,
+     0.07782552527068175,    0.06534876523171299,    0.05172629311427257,
+     0.0379429484910187,     0.02490921351762261,    0.0133774746265897,
+     0.003883043418804416,  -0.003284703416210726,  -0.008080250212687497,
+    -0.01067241812471033,   -0.01139427235000863,   -0.0106813877974587,
+    -0.009007905078766049,  -0.006828859761015335,  -0.004535184322001496,
+    -0.002425035959059578,  -0.0006922187080790708,  0.0005700762133516592,
+     0.001353838005269448,   0.001713709169690937,   0.001742046839472948,
+     0.001545601648013235,   0.001226696225277855,   0.0008704322683580222,
+     0.0005381636200535649,  0.000266446345425276,   7.002968738383528e-05,
+    -5.279407053811266e-05, -0.0001140625650874684, -0.0001304796361231895,
+    -0.0001189970287491285, -9.396247155265073e-05, -6.577634378272832e-05,
+    -4.07492895872535e-05,  -2.17407957554587e-05,  -9.163058931391722e-06,
+    -2.017460145032201e-06,  1.249721855219005e-06,  2.166655190537392e-06,
+     1.930520892991082e-06,  1.319400334374195e-06,  7.410039764949091e-07,
+     3.423230509967409e-07,  1.244182214744588e-07,  3.130441005359396e-08
+};
+
+static float ctables[CTABLES][256];
+
+static av_cold void dsd_ctables_tableinit(void)
+{
+    int t, e, m, k;
+    double acc;
+    for (t = 0; t < CTABLES; ++t) {
+        k = FFMIN(HTAPS - t * 8, 8);
+        for (e = 0; e < 256; ++e) {
+            acc = 0.0;
+            for (m = 0; m < k; ++m)
+                acc += (((e >> (7 - m)) & 1) * 2 - 1) * htaps[t * 8 + m];
+            ctables[CTABLES - 1 - t][e] = (float)acc;
+        }
+    }
+}
+#endif /* CONFIG_HARDCODED_TABLES */
+
+#endif /* AVCODEC_DSD_TABLEGEN_H */
diff --git a/libavcodec/dsddec.c b/libavcodec/dsddec.c
new file mode 100644
index 0000000..f1dfd4b
--- /dev/null
+++ b/libavcodec/dsddec.c
@@ -0,0 +1,167 @@
+/*
+ * Direct Stream Digital (DSD) decoder
+ * based on BSD licensed dsd2pcm by Sebastian Gesemann
+ * Copyright (c) 2009, 2011 Sebastian Gesemann. All rights reserved.
+ * Copyright (c) 2014 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Direct Stream Digital (DSD) decoder
+ */
+
+#include "libavcodec/internal.h"
+#include "libavcodec/mathops.h"
+#include "avcodec.h"
+#include "dsd_tablegen.h"
+
+#define FIFOSIZE 16              /** must be a power of two */
+#define FIFOMASK (FIFOSIZE - 1)  /** bit mask for FIFO offsets */
+
+#if FIFOSIZE * 8 < HTAPS * 2
+#error "FIFOSIZE too small"
+#endif
+
+/**
+ * Per-channel buffer
+ */
+typedef struct {
+    unsigned char buf[FIFOSIZE];
+    unsigned pos;
+} DSDContext;
+
+static void dsd2pcm_translate(DSDContext* s, size_t samples, int lsbf,
+                              const unsigned char *src, ptrdiff_t src_stride,
+                              float *dst, ptrdiff_t dst_stride)
+{
+    unsigned pos, i;
+    unsigned char* p;
+    double sum;
+
+    pos = s->pos;
+
+    while (samples-- > 0) {
+        s->buf[pos] = lsbf ? ff_reverse[*src] : *src;
+        src += src_stride;
+
+        p = s->buf + ((pos - CTABLES) & FIFOMASK);
+        *p = ff_reverse[*p];
+
+        sum = 0.0;
+        for (i = 0; i < CTABLES; i++) {
+            unsigned char a = s->buf[(pos                   - i) & FIFOMASK];
+            unsigned char b = s->buf[(pos - (CTABLES*2 - 1) + i) & FIFOMASK];
+            sum += ctables[i][a] + ctables[i][b];
+        }
+
+        *dst = (float)sum;
+        dst += dst_stride;
+
+        pos = (pos + 1) & FIFOMASK;
+    }
+
+    s->pos = pos;
+}
+
+static av_cold void init_static_data(void)
+{
+    static int done = 0;
+    if (done)
+        return;
+    dsd_ctables_tableinit();
+    done = 1;
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    DSDContext * s;
+    int i;
+
+    init_static_data();
+
+    s = av_malloc_array(sizeof(DSDContext), avctx->channels);
+    if (!s)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < avctx->channels; i++) {
+        s[i].pos = 0;
+        memset(s[i].buf, 0x69, sizeof(s[i].buf));
+
+        /* 0x69 = 01101001
+         * This pattern "on repeat" makes a low energy 352.8 kHz tone
+         * and a high energy 1.0584 MHz tone which should be filtered
+         * out completely by any playback system --> silence
+         */
+    }
+
+    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+    avctx->priv_data  = s;
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data,
+                        int *got_frame_ptr, AVPacket *avpkt)
+{
+    DSDContext * s = avctx->priv_data;
+    AVFrame *frame = data;
+    int ret, i;
+    int lsbf = avctx->codec_id == AV_CODEC_ID_DSD_LSBF || avctx->codec_id == AV_CODEC_ID_DSD_LSBF_PLANAR;
+    int src_next;
+    int src_stride;
+
+    frame->nb_samples = avpkt->size / avctx->channels;
+
+    if (avctx->codec_id == AV_CODEC_ID_DSD_LSBF_PLANAR || avctx->codec_id == AV_CODEC_ID_DSD_MSBF_PLANAR) {
+        src_next   = frame->nb_samples;
+        src_stride = 1;
+    } else {
+        src_next   = 1;
+        src_stride = avctx->channels;
+    }
+
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    for (i = 0; i < avctx->channels; i++) {
+        float * dst = ((float **)frame->extended_data)[i];
+        dsd2pcm_translate(&s[i], frame->nb_samples, lsbf,
+            avpkt->data + i * src_next, src_stride,
+            dst, 1);
+    }
+
+    *got_frame_ptr = 1;
+    return frame->nb_samples * avctx->channels;
+}
+
+#define DSD_DECODER(id_, name_, long_name_) \
+AVCodec ff_##name_##_decoder = { \
+    .name         = #name_, \
+    .long_name    = NULL_IF_CONFIG_SMALL(long_name_), \
+    .type         = AVMEDIA_TYPE_AUDIO, \
+    .id           = AV_CODEC_ID_##id_, \
+    .init         = decode_init, \
+    .decode       = decode_frame, \
+    .sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP, \
+                                                   AV_SAMPLE_FMT_NONE }, \
+};
+
+DSD_DECODER(DSD_LSBF, dsd_lsbf, "DSD (Direct Stream Digital), least significant bit first")
+DSD_DECODER(DSD_MSBF, dsd_msbf, "DSD (Direct Stream Digital), most significant bit first")
+DSD_DECODER(DSD_MSBF_PLANAR, dsd_msbf_planar, "DSD (Direct Stream Digital), most significant bit first, planar")
+DSD_DECODER(DSD_LSBF_PLANAR, dsd_lsbf_planar, "DSD (Direct Stream Digital), least significant bit first, planar")
diff --git a/libavcodec/dsicinaudio.c b/libavcodec/dsicinaudio.c
index 969e4ae..b336d2c 100644
--- a/libavcodec/dsicinaudio.c
+++ b/libavcodec/dsicinaudio.c
@@ -2,20 +2,20 @@
  * Delphine Software International CIN audio decoder
  * Copyright (c) 2006 Gregory Montoir (cyx@users.sourceforge.net)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -98,10 +98,8 @@ static int cinaudio_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = avpkt->size - cin->initial_decode_frame;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (int16_t *)frame->data[0];
 
     delta = cin->delta;
diff --git a/libavcodec/dsicinvideo.c b/libavcodec/dsicinvideo.c
index b56a581..48fb635 100644
--- a/libavcodec/dsicinvideo.c
+++ b/libavcodec/dsicinvideo.c
@@ -2,20 +2,20 @@
  * Delphine Software International CIN video decoder
  * Copyright (c) 2006 Gregory Montoir (cyx@users.sourceforge.net)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -42,10 +42,33 @@ typedef struct CinVideoContext {
     uint8_t *bitmap_table[3];
 } CinVideoContext;
 
+static av_cold void destroy_buffers(CinVideoContext *cin)
+{
+    int i;
+
+    for (i = 0; i < 3; ++i)
+        av_freep(&cin->bitmap_table[i]);
+}
+
+static av_cold int allocate_buffers(CinVideoContext *cin)
+{
+    int i;
+
+    for (i = 0; i < 3; ++i) {
+        cin->bitmap_table[i] = av_mallocz(cin->bitmap_size);
+        if (!cin->bitmap_table[i]) {
+            av_log(cin->avctx, AV_LOG_ERROR, "Can't allocate bitmap buffers.\n");
+            destroy_buffers(cin);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
 static av_cold int cinvideo_decode_init(AVCodecContext *avctx)
 {
     CinVideoContext *cin = avctx->priv_data;
-    unsigned int i;
 
     cin->avctx = avctx;
     avctx->pix_fmt = AV_PIX_FMT_PAL8;
@@ -55,11 +78,8 @@ static av_cold int cinvideo_decode_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
 
     cin->bitmap_size = avctx->width * avctx->height;
-    for (i = 0; i < 3; ++i) {
-        cin->bitmap_table[i] = av_mallocz(cin->bitmap_size);
-        if (!cin->bitmap_table[i])
-            av_log(avctx, AV_LOG_ERROR, "Can't allocate bitmap buffers.\n");
-    }
+    if (allocate_buffers(cin))
+        return AVERROR(ENOMEM);
 
     return 0;
 }
@@ -141,27 +161,30 @@ static int cin_decode_lzss(const unsigned char *src, int src_size,
     return 0;
 }
 
-static void cin_decode_rle(const unsigned char *src, int src_size,
+static int cin_decode_rle(const unsigned char *src, int src_size,
                            unsigned char *dst, int dst_size)
 {
     int len, code;
     unsigned char *dst_end       = dst + dst_size;
     const unsigned char *src_end = src + src_size;
 
-    while (src < src_end && dst < dst_end) {
+    while (src + 1 < src_end && dst < dst_end) {
         code = *src++;
         if (code & 0x80) {
-            if (src >= src_end)
-                break;
             len = code - 0x7F;
             memset(dst, *src++, FFMIN(len, dst_end - dst));
         } else {
             len = code + 1;
+            if (len > src_end-src) {
+                av_log(NULL, AV_LOG_ERROR, "RLE overread\n");
+                return AVERROR_INVALIDDATA;
+            }
             memcpy(dst, src, FFMIN3(len, dst_end - dst, src_end - src));
             src += len;
         }
         dst += len;
     }
+    return 0;
 }
 
 static int cinvideo_decode_frame(AVCodecContext *avctx,
@@ -188,19 +211,17 @@ static int cinvideo_decode_frame(AVCodecContext *avctx,
         if (palette_colors_count > 256)
             return AVERROR_INVALIDDATA;
         for (i = 0; i < palette_colors_count; ++i) {
-            cin->palette[i]    = bytestream_get_le24(&buf);
+            cin->palette[i]    = 0xFFU << 24 | bytestream_get_le24(&buf);
             bitmap_frame_size -= 3;
         }
     } else {
         for (i = 0; i < palette_colors_count; ++i) {
-            cin->palette[buf[0]] = AV_RL24(buf + 1);
+            cin->palette[buf[0]] = 0xFFU << 24 | AV_RL24(buf + 1);
             buf                 += 4;
             bitmap_frame_size   -= 4;
         }
     }
 
-    bitmap_frame_size = FFMIN(cin->bitmap_size, bitmap_frame_size);
-
     /* note: the decoding routines below assumes that
      * surface.width = surface.pitch */
     switch (bitmap_frame_type) {
@@ -215,7 +236,7 @@ static int cinvideo_decode_frame(AVCodecContext *avctx,
                              cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
         break;
     case 35:
-        cin_decode_huffman(buf, bitmap_frame_size,
+        bitmap_frame_size = cin_decode_huffman(buf, bitmap_frame_size,
                            cin->bitmap_table[CIN_INT_BMP], cin->bitmap_size);
         cin_decode_rle(cin->bitmap_table[CIN_INT_BMP], bitmap_frame_size,
                        cin->bitmap_table[CIN_CUR_BMP], cin->bitmap_size);
@@ -251,11 +272,8 @@ static int cinvideo_decode_frame(AVCodecContext *avctx,
         break;
     }
 
-    if ((res = ff_reget_buffer(avctx, cin->frame)) < 0) {
-        av_log(cin->avctx, AV_LOG_ERROR,
-               "delphinecinvideo: reget_buffer() failed to allocate a frame\n");
+    if ((res = ff_reget_buffer(avctx, cin->frame)) < 0)
         return res;
-    }
 
     memcpy(cin->frame->data[1], cin->palette, sizeof(cin->palette));
     cin->frame->palette_has_changed = 1;
@@ -278,12 +296,10 @@ static int cinvideo_decode_frame(AVCodecContext *avctx,
 static av_cold int cinvideo_decode_end(AVCodecContext *avctx)
 {
     CinVideoContext *cin = avctx->priv_data;
-    int i;
 
     av_frame_free(&cin->frame);
 
-    for (i = 0; i < 3; ++i)
-        av_free(cin->bitmap_table[i]);
+    destroy_buffers(cin);
 
     return 0;
 }
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
new file mode 100644
index 0000000..2f4be85
--- /dev/null
+++ b/libavcodec/dsputil.h
@@ -0,0 +1,87 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DSP utils.
+ * This is deprecated
+ */
+
+#ifndef AVCODEC_DSPUTIL_H
+#define AVCODEC_DSPUTIL_H
+
+#include "avcodec.h"
+#include "version.h"
+#include "me_cmp.h"
+
+#if FF_API_DSPUTIL
+
+/* minimum alignment rules ;)
+ * If you notice errors in the align stuff, need more alignment for some ASM code
+ * for some CPU or need to use a function with less aligned data then send a mail
+ * to the ffmpeg-devel mailing list, ...
+ *
+ * !warning These alignments might not match reality, (missing attribute((align))
+ * stuff somewhere possible).
+ * I (Michael) did not check them, these are just the alignments which I think
+ * could be reached easily ...
+ *
+ * !future video codecs might need functions with less strict alignment
+ */
+
+struct MpegEncContext;
+
+/**
+ * DSPContext.
+ */
+typedef struct DSPContext {
+    int (*sum_abs_dctelem)(int16_t *block /* align 16 */);
+
+    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[6];
+    me_cmp_func hadamard8_diff[6];
+    me_cmp_func dct_sad[6];
+    me_cmp_func quant_psnr[6];
+    me_cmp_func bit[6];
+    me_cmp_func rd[6];
+    me_cmp_func vsad[6];
+    me_cmp_func vsse[6];
+    me_cmp_func nsse[6];
+    me_cmp_func w53[6];
+    me_cmp_func w97[6];
+    me_cmp_func dct_max[6];
+    me_cmp_func dct264_sad[6];
+
+    me_cmp_func me_pre_cmp[6];
+    me_cmp_func me_cmp[6];
+    me_cmp_func me_sub_cmp[6];
+    me_cmp_func mb_cmp[6];
+    me_cmp_func ildct_cmp[6]; // only width 16 used
+    me_cmp_func frame_skip_cmp[6]; // only width 8 used
+
+    me_cmp_func pix_abs[2][4];
+} DSPContext;
+
+attribute_deprecated void avpriv_dsputil_init(DSPContext* p, AVCodecContext *avctx);
+
+#endif
+#endif /* AVCODEC_DSPUTIL_H */
diff --git a/libavcodec/dsputil_compat.c b/libavcodec/dsputil_compat.c
new file mode 100644
index 0000000..7ac1099
--- /dev/null
+++ b/libavcodec/dsputil_compat.c
@@ -0,0 +1,56 @@
+/*
+ * DSP utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "me_cmp.h"
+#include "dsputil.h"
+
+#if FF_API_DSPUTIL
+
+void avpriv_dsputil_init(DSPContext* p, AVCodecContext *avctx)
+{
+    MECmpContext mecc;
+
+    ff_me_cmp_init(&mecc, avctx);
+#define COPY(name) memcpy(&p->name, &mecc.name, sizeof(p->name))
+    COPY(sum_abs_dctelem);
+    COPY(sad);
+    COPY(sse);
+    COPY(hadamard8_diff);
+    COPY(dct_sad);
+    COPY(quant_psnr);
+    COPY(bit);
+    COPY(rd);
+    COPY(vsad);
+    COPY(vsse);
+    COPY(nsse);
+    COPY(w53);
+    COPY(w97);
+    COPY(dct_max);
+    COPY(dct264_sad);
+    COPY(me_pre_cmp);
+    COPY(me_cmp);
+    COPY(me_sub_cmp);
+    COPY(mb_cmp);
+    COPY(ildct_cmp);
+    COPY(frame_skip_cmp);
+    COPY(pix_abs);
+}
+
+#endif
diff --git a/libavcodec/dump_extradata_bsf.c b/libavcodec/dump_extradata_bsf.c
index 17d9434..2dcbf8f 100644
--- a/libavcodec/dump_extradata_bsf.c
+++ b/libavcodec/dump_extradata_bsf.c
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,7 +47,6 @@ static int dump_extradata(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx,
 }
 
 AVBitStreamFilter ff_dump_extradata_bsf={
-    "dump_extra",
-    0,
-    dump_extradata,
+    .name   = "dump_extra",
+    .filter = dump_extradata,
 };
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index e05c878..25f53a7 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -16,20 +16,20 @@
  * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
  * of DV technical info.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -270,7 +270,7 @@ av_cold int ff_dvvideo_init(AVCodecContext *avctx)
            to accelerate the parsing of partial codes */
         init_vlc(&dv_vlc, TEX_VLC_BITS, j,
                  new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
-        assert(dv_vlc.table_size == 1184);
+        av_assert1(dv_vlc.table_size == 1184);
 
         for (i = 0; i < dv_vlc.table_size; i++){
             int code = dv_vlc.table[i][0];
@@ -296,3 +296,4 @@ av_cold int ff_dvvideo_init(AVCodecContext *avctx)
 
     return 0;
 }
+
diff --git a/libavcodec/dv.h b/libavcodec/dv.h
index 019c15a..14cfbce 100644
--- a/libavcodec/dv.h
+++ b/libavcodec/dv.h
@@ -2,20 +2,20 @@
  * Constants for DV codec
  * Copyright (c) 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -80,9 +80,6 @@ enum dv_pack_type {
 #define DV_PROFILE_IS_1080i50(p) (((p)->video_stype == 0x14) && ((p)->dsf == 1))
 #define DV_PROFILE_IS_720p50(p)  (((p)->video_stype == 0x18) && ((p)->dsf == 1))
 
-/* minimum number of bytes to read from a DV stream in order to
-   determine the profile */
-#define DV_PROFILE_BYTES (6*80) /* 6 DIF blocks */
 
 /**
  * largest possible DV frame, in bytes (1080i50)
diff --git a/libavcodec/dv_profile.c b/libavcodec/dv_profile.c
index 686632d..58ce79f 100644
--- a/libavcodec/dv_profile.c
+++ b/libavcodec/dv_profile.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -21,6 +21,7 @@
 #include "config.h"
 
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
 #include "libavutil/pixdesc.h"
 #include "avcodec.h"
@@ -264,23 +265,31 @@ void ff_dv_print_profiles(void *logctx, int loglevel)
 
 #endif /* CONFIG_DVPROFILE */
 
-const AVDVProfile *av_dv_frame_profile(const AVDVProfile *sys,
+const AVDVProfile* avpriv_dv_frame_profile2(AVCodecContext* codec, const AVDVProfile *sys,
                                        const uint8_t* frame, unsigned buf_size)
 {
 #if CONFIG_DVPROFILE
     int i, dsf, stype;
 
-    if (buf_size < 80 * 5 + 48 + 4)
+    if(buf_size < DV_PROFILE_BYTES)
         return NULL;
 
     dsf = (frame[3] & 0x80) >> 7;
     stype = frame[80 * 5 + 48 + 3] & 0x1f;
 
     /* 576i50 25Mbps 4:1:1 is a special case */
-    if (dsf == 1 && stype == 0 && frame[4] & 0x07 /* the APT field */) {
+    if ((dsf == 1 && stype == 0 && frame[4] & 0x07 /* the APT field */) ||
+        (stype == 31 && codec && codec->codec_tag==AV_RL32("SL25") && codec->coded_width==720 && codec->coded_height==576)) {
         return &dv_profiles[2];
     }
 
+    if(   stype == 0
+       && codec
+       && (codec->codec_tag==AV_RL32("dvsd") || codec->codec_tag==AV_RL32("CDVC"))
+       && codec->coded_width ==720
+       && codec->coded_height==576)
+        return &dv_profiles[1];
+
     for (i = 0; i < FF_ARRAY_ELEMS(dv_profiles); i++)
         if (dsf == dv_profiles[i].dsf && stype == dv_profiles[i].video_stype)
             return &dv_profiles[i];
@@ -288,11 +297,21 @@ const AVDVProfile *av_dv_frame_profile(const AVDVProfile *sys,
     /* check if old sys matches and assumes corrupted input */
     if (sys && buf_size == sys->frame_size)
         return sys;
+
+    /* hack for trac issue #217, dv files created with QuickTime 3 */
+    if ((frame[3] & 0x7f) == 0x3f && frame[80 * 5 + 48 + 3] == 0xff)
+        return &dv_profiles[dsf];
 #endif
 
     return NULL;
 }
 
+const AVDVProfile *av_dv_frame_profile(const AVDVProfile *sys,
+                                       const uint8_t* frame, unsigned buf_size)
+{
+    return avpriv_dv_frame_profile2(NULL, sys, frame, buf_size);
+}
+
 const AVDVProfile *av_dv_codec_profile(int width, int height,
                                        enum AVPixelFormat pix_fmt)
 {
@@ -318,6 +337,9 @@ const AVDVProfile *avpriv_dv_frame_profile(const AVDVProfile *sys,
 
 const AVDVProfile *avpriv_dv_codec_profile(AVCodecContext *codec)
 {
-    return av_dv_codec_profile(codec->width, codec->height, codec->pix_fmt);
+    if (codec->coded_width || codec->coded_height) {
+        return av_dv_codec_profile(codec->coded_width, codec->coded_height, codec->pix_fmt);
+    } else
+        return av_dv_codec_profile(codec->width, codec->height, codec->pix_fmt);
 }
 #endif
diff --git a/libavcodec/dv_profile.h b/libavcodec/dv_profile.h
index f2f5ce9..a2ef608 100644
--- a/libavcodec/dv_profile.h
+++ b/libavcodec/dv_profile.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,10 @@
 #include "libavutil/rational.h"
 #include "avcodec.h"
 
+/* minimum number of bytes to read from a DV stream in order to
+   determine the profile */
+#define DV_PROFILE_BYTES (6*80) /* 6 DIF blocks */
+
 /*
  * AVDVProfile is used to express the differences between various
  * DV flavors. For now it's primarily used for differentiating
@@ -53,6 +57,8 @@ typedef struct AVDVProfile {
     const uint8_t  (*audio_shuffle)[9];     /* PCM shuffling table */
 } AVDVProfile;
 
+const AVDVProfile* avpriv_dv_frame_profile2(AVCodecContext* codec, const AVDVProfile *sys,
+                                            const uint8_t* frame, unsigned buf_size);
 #if LIBAVCODEC_VERSION_MAJOR < 56
 const AVDVProfile *avpriv_dv_frame_profile(const AVDVProfile *sys,
                                            const uint8_t* frame, unsigned buf_size);
diff --git a/libavcodec/dv_profile_internal.h b/libavcodec/dv_profile_internal.h
index f93e7ca..9772041 100644
--- a/libavcodec/dv_profile_internal.h
+++ b/libavcodec/dv_profile_internal.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dv_tablegen.c b/libavcodec/dv_tablegen.c
index 9b2b954..2579341 100644
--- a/libavcodec/dv_tablegen.c
+++ b/libavcodec/dv_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dv_tablegen.h b/libavcodec/dv_tablegen.h
index 2f3fd95..c04b802 100644
--- a/libavcodec/dv_tablegen.h
+++ b/libavcodec/dv_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dvbsub.c b/libavcodec/dvbsub.c
index de0808f..f6b46e6 100644
--- a/libavcodec/dvbsub.c
+++ b/libavcodec/dvbsub.c
@@ -2,20 +2,20 @@
  * DVB subtitle encoding
  * Copyright (c) 2005 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
@@ -23,7 +23,6 @@
 #include "libavutil/colorspace.h"
 
 typedef struct DVBSubtitleContext {
-    int hide_state;
     int object_version;
 } DVBSubtitleContext;
 
@@ -194,6 +193,60 @@ static void dvb_encode_rle4(uint8_t **pq,
     *pq = q;
 }
 
+static void dvb_encode_rle8(uint8_t **pq,
+                            const uint8_t *bitmap, int linesize,
+                            int w, int h)
+{
+    uint8_t *q;
+    int x, y, len, x1, color;
+
+    q = *pq;
+
+    for (y = 0; y < h; y++) {
+        *q++ = 0x12;
+
+        x = 0;
+        while (x < w) {
+            x1 = x;
+            color = bitmap[x1++];
+            while (x1 < w && bitmap[x1] == color)
+                x1++;
+            len = x1 - x;
+            if (len == 1 && color) {
+                // 00000001 to 11111111           1 pixel in colour x
+                *q++ = color;
+            } else {
+                if (color == 0x00) {
+                    // 00000000 0LLLLLLL          L pixels (1-127) in colour 0 (L > 0)
+                    len = FFMIN(len, 127);
+                    *q++ = 0x00;
+                    *q++ = len;
+                } else if (len > 2) {
+                    // 00000000 1LLLLLLL CCCCCCCC L pixels (3-127) in colour C (L > 2)
+                    len = FFMIN(len, 127);
+                    *q++ = 0x00;
+                    *q++ = 0x80+len;
+                    *q++ = color;
+                }
+                else if (len == 2) {
+                    *q++ = color;
+                    *q++ = color;
+                } else {
+                    *q++ = color;
+                    len = 1;
+                }
+            }
+            x += len;
+        }
+        /* end of line */
+        // 00000000 00000000 end of 8-bit/pixel_code_string
+        *q++ = 0x00;
+        *q++ = 0x00;
+        bitmap += linesize;
+    }
+    *pq = q;
+}
+
 static int encode_dvb_subtitles(DVBSubtitleContext *s,
                                 uint8_t *outbuf, const AVSubtitle *h)
 {
@@ -205,11 +258,9 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
 
     page_id = 1;
 
-    if (h->num_rects == 0 || h->rects == NULL)
+    if (h->num_rects && h->rects == NULL)
         return -1;
 
-    *q++ = 0x00; /* subtitle_stream_id */
-
     /* page composition segment */
 
     *q++ = 0x0f; /* sync_byte */
@@ -218,10 +269,7 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
     pseg_len = q;
     q += 2; /* segment length */
     *q++ = 30; /* page_timeout (seconds) */
-    if (s->hide_state)
-        page_state = 0; /* normal case */
-    else
-        page_state = 2; /* mode change */
+    page_state = 2; /* mode change */
     /* page_version = 0 + page_state */
     *q++ = (s->object_version << 4) | (page_state << 2) | 3;
 
@@ -234,7 +282,7 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
 
     bytestream_put_be16(&pseg_len, q - pseg_len - 2);
 
-    if (!s->hide_state) {
+    if (h->num_rects) {
         for (clut_id = 0; clut_id < h->num_rects; clut_id++) {
 
             /* CLUT segment */
@@ -245,10 +293,15 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
             } else if (h->rects[clut_id]->nb_colors <= 16) {
                 /* 4 bpp, standard encoding */
                 bpp_index = 1;
+            } else if (h->rects[clut_id]->nb_colors <= 256) {
+                /* 8 bpp, standard encoding */
+                bpp_index = 2;
             } else {
                 return -1;
             }
 
+
+            /* CLUT segment */
             *q++ = 0x0f; /* sync byte */
             *q++ = 0x12; /* CLUT definition segment */
             bytestream_put_be16(&q, page_id);
@@ -307,32 +360,37 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
         *q++ = 0; /* 8 bit fill colors */
         *q++ = 0x03; /* 4 bit and 2 bit fill colors */
 
-        if (!s->hide_state) {
-            bytestream_put_be16(&q, region_id); /* object_id == region_id */
-            *q++ = (0 << 6) | (0 << 4);
-            *q++ = 0;
-            *q++ = 0xf0;
-            *q++ = 0;
-        }
+        bytestream_put_be16(&q, region_id); /* object_id == region_id */
+        *q++ = (0 << 6) | (0 << 4);
+        *q++ = 0;
+        *q++ = 0xf0;
+        *q++ = 0;
 
         bytestream_put_be16(&pseg_len, q - pseg_len - 2);
     }
 
-    if (!s->hide_state) {
+    if (h->num_rects) {
 
         for (object_id = 0; object_id < h->num_rects; object_id++) {
-            /* Object Data segment */
+            void (*dvb_encode_rle)(uint8_t **pq,
+                                    const uint8_t *bitmap, int linesize,
+                                    int w, int h);
 
+            /* bpp_index maths */
             if (h->rects[object_id]->nb_colors <= 4) {
                 /* 2 bpp, some decoders do not support it correctly */
-                bpp_index = 0;
+                dvb_encode_rle = dvb_encode_rle2;
             } else if (h->rects[object_id]->nb_colors <= 16) {
                 /* 4 bpp, standard encoding */
-                bpp_index = 1;
+                dvb_encode_rle = dvb_encode_rle4;
+            } else if (h->rects[object_id]->nb_colors <= 256) {
+                /* 8 bpp, standard encoding */
+                dvb_encode_rle = dvb_encode_rle8;
             } else {
                 return -1;
             }
 
+            /* Object Data segment */
             *q++ = 0x0f; /* sync byte */
             *q++ = 0x13;
             bytestream_put_be16(&q, page_id);
@@ -345,19 +403,12 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
                                                                        non_modifying_color_flag */
             {
                 uint8_t *ptop_field_len, *pbottom_field_len, *top_ptr, *bottom_ptr;
-                void (*dvb_encode_rle)(uint8_t **pq,
-                                        const uint8_t *bitmap, int linesize,
-                                        int w, int h);
+
                 ptop_field_len = q;
                 q += 2;
                 pbottom_field_len = q;
                 q += 2;
 
-                if (bpp_index == 0)
-                    dvb_encode_rle = dvb_encode_rle2;
-                else
-                    dvb_encode_rle = dvb_encode_rle4;
-
                 top_ptr = q;
                 dvb_encode_rle(&q, h->rects[object_id]->pict.data[0], h->rects[object_id]->w * 2,
                                     h->rects[object_id]->w, h->rects[object_id]->h >> 1);
@@ -384,10 +435,7 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
 
     bytestream_put_be16(&pseg_len, q - pseg_len - 2);
 
-    *q++ = 0xff; /* end of PES data */
-
     s->object_version = (s->object_version + 1) & 0xf;
-    s->hide_state = !s->hide_state;
     return q - outbuf;
 }
 
diff --git a/libavcodec/dvbsub_parser.c b/libavcodec/dvbsub_parser.c
index 295e03b..d15c891 100644
--- a/libavcodec/dvbsub_parser.c
+++ b/libavcodec/dvbsub_parser.c
@@ -1,21 +1,21 @@
 /*
- * DVB subtitle parser for Libav
+ * DVB subtitle parser for FFmpeg
  * Copyright (c) 2005 Ian Caulfield
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c
index 1dcefae..a40da76 100644
--- a/libavcodec/dvbsubdec.c
+++ b/libavcodec/dvbsubdec.c
@@ -2,20 +2,20 @@
  * DVB subtitle decoding
  * Copyright (c) 2005 Ian Caulfield
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,6 +23,7 @@
 #include "get_bits.h"
 #include "bytestream.h"
 #include "libavutil/colorspace.h"
+#include "libavutil/opt.h"
 
 #define DVBSUB_PAGE_SEGMENT     0x10
 #define DVBSUB_REGION_SEGMENT   0x11
@@ -152,6 +153,7 @@ static void png_save2(const char *filename, uint32_t *bitmap, int w, int h)
 
 typedef struct DVBSubCLUT {
     int id;
+    int version;
 
     uint32_t clut4[4];
     uint32_t clut16[16];
@@ -178,6 +180,7 @@ typedef struct DVBSubObjectDisplay {
 
 typedef struct DVBSubObject {
     int id;
+    int version;
 
     int type;
 
@@ -197,6 +200,7 @@ typedef struct DVBSubRegionDisplay {
 
 typedef struct DVBSubRegion {
     int id;
+    int version;
 
     int width;
     int height;
@@ -207,6 +211,7 @@ typedef struct DVBSubRegion {
 
     uint8_t *pbuf;
     int buf_size;
+    int dirty;
 
     DVBSubObjectDisplay *display_list;
 
@@ -223,15 +228,19 @@ typedef struct DVBSubDisplayDefinition {
 } DVBSubDisplayDefinition;
 
 typedef struct DVBSubContext {
+    AVClass *class;
     int composition_id;
     int ancillary_id;
 
+    int version;
     int time_out;
+    int compute_edt; /**< if 1 end display time calculated using pts
+                          if 0 (Default) calculated using time out */
+    int64_t prev_start;
     DVBSubRegion *region_list;
     DVBSubCLUT   *clut_list;
     DVBSubObject *object_list;
 
-    int display_list_size;
     DVBSubRegionDisplay *display_list;
     DVBSubDisplayDefinition *display_definition;
 } DVBSubContext;
@@ -316,21 +325,10 @@ static void delete_region_display_list(DVBSubContext *ctx, DVBSubRegion *region)
 
 }
 
-static void delete_state(DVBSubContext *ctx)
+static void delete_cluts(DVBSubContext *ctx)
 {
-    DVBSubRegion *region;
     DVBSubCLUT *clut;
 
-    while (ctx->region_list) {
-        region = ctx->region_list;
-
-        ctx->region_list = region->next;
-
-        delete_region_display_list(ctx, region);
-        av_free(region->pbuf);
-        av_free(region);
-    }
-
     while (ctx->clut_list) {
         clut = ctx->clut_list;
 
@@ -338,12 +336,35 @@ static void delete_state(DVBSubContext *ctx)
 
         av_free(clut);
     }
+}
 
-    av_freep(&ctx->display_definition);
+static void delete_objects(DVBSubContext *ctx)
+{
+    DVBSubObject *object;
+
+    while (ctx->object_list) {
+        object = ctx->object_list;
+
+        ctx->object_list = object->next;
 
-    /* Should already be null */
-    if (ctx->object_list)
-        av_log(0, AV_LOG_ERROR, "Memory deallocation error!\n");
+        av_free(object);
+    }
+}
+
+static void delete_regions(DVBSubContext *ctx)
+{
+    DVBSubRegion *region;
+
+    while (ctx->region_list) {
+        region = ctx->region_list;
+
+        ctx->region_list = region->next;
+
+        delete_region_display_list(ctx, region);
+
+        av_free(region->pbuf);
+        av_free(region);
+    }
 }
 
 static av_cold int dvbsub_init_decoder(AVCodecContext *avctx)
@@ -351,15 +372,22 @@ static av_cold int dvbsub_init_decoder(AVCodecContext *avctx)
     int i, r, g, b, a = 0;
     DVBSubContext *ctx = avctx->priv_data;
 
-    if (!avctx->extradata || avctx->extradata_size != 4) {
-        av_log(avctx, AV_LOG_WARNING, "Invalid extradata, subtitle streams may be combined!\n");
+    if (!avctx->extradata || (avctx->extradata_size < 4) || ((avctx->extradata_size % 5 != 0) && (avctx->extradata_size != 4))) {
+        av_log(avctx, AV_LOG_WARNING, "Invalid DVB subtitles stream extradata!\n");
         ctx->composition_id = -1;
         ctx->ancillary_id   = -1;
     } else {
+        if (avctx->extradata_size > 5) {
+            av_log(avctx, AV_LOG_WARNING, "Decoding first DVB subtitles sub-stream\n");
+        }
+
         ctx->composition_id = AV_RB16(avctx->extradata);
         ctx->ancillary_id   = AV_RB16(avctx->extradata + 2);
     }
 
+    ctx->version = -1;
+    ctx->prev_start = AV_NOPTS_VALUE;
+
     default_clut.id = -1;
     default_clut.next = NULL;
 
@@ -428,7 +456,13 @@ static av_cold int dvbsub_close_decoder(AVCodecContext *avctx)
     DVBSubContext *ctx = avctx->priv_data;
     DVBSubRegionDisplay *display;
 
-    delete_state(ctx);
+    delete_regions(ctx);
+
+    delete_objects(ctx);
+
+    delete_cluts(ctx);
+
+    av_freep(&ctx->display_definition);
 
     while (ctx->display_list) {
         display = ctx->display_list;
@@ -442,16 +476,18 @@ static av_cold int dvbsub_close_decoder(AVCodecContext *avctx)
 
 static int dvbsub_read_2bit_string(uint8_t *destbuf, int dbuf_len,
                                    const uint8_t **srcbuf, int buf_size,
-                                   int non_mod, uint8_t *map_table)
+                                   int non_mod, uint8_t *map_table, int x_pos)
 {
     GetBitContext gb;
 
     int bits;
     int run_length;
-    int pixels_read = 0;
+    int pixels_read = x_pos;
 
     init_get_bits(&gb, *srcbuf, buf_size << 3);
 
+    destbuf += x_pos;
+
     while (get_bits_count(&gb) < buf_size << 3 && pixels_read < dbuf_len) {
         bits = get_bits(&gb, 2);
 
@@ -512,14 +548,14 @@ static int dvbsub_read_2bit_string(uint8_t *destbuf, int dbuf_len,
                             }
                         }
                     } else if (bits == 1) {
-                        pixels_read += 2;
                         if (map_table)
                             bits = map_table[0];
                         else
                             bits = 0;
-                        if (pixels_read <= dbuf_len) {
-                            *destbuf++ = bits;
+                        run_length = 2;
+                        while (run_length-- > 0 && pixels_read < dbuf_len) {
                             *destbuf++ = bits;
+                            pixels_read++;
                         }
                     } else {
                         (*srcbuf) += (get_bits_count(&gb) + 7) >> 3;
@@ -547,16 +583,18 @@ static int dvbsub_read_2bit_string(uint8_t *destbuf, int dbuf_len,
 
 static int dvbsub_read_4bit_string(uint8_t *destbuf, int dbuf_len,
                                    const uint8_t **srcbuf, int buf_size,
-                                   int non_mod, uint8_t *map_table)
+                                   int non_mod, uint8_t *map_table, int x_pos)
 {
     GetBitContext gb;
 
     int bits;
     int run_length;
-    int pixels_read = 0;
+    int pixels_read = x_pos;
 
     init_get_bits(&gb, *srcbuf, buf_size << 3);
 
+    destbuf += x_pos;
+
     while (get_bits_count(&gb) < buf_size << 3 && pixels_read < dbuf_len) {
         bits = get_bits(&gb, 4);
 
@@ -636,14 +674,14 @@ static int dvbsub_read_4bit_string(uint8_t *destbuf, int dbuf_len,
                             }
                         }
                     } else if (bits == 1) {
-                        pixels_read += 2;
                         if (map_table)
                             bits = map_table[0];
                         else
                             bits = 0;
-                        if (pixels_read <= dbuf_len) {
-                            *destbuf++ = bits;
+                        run_length = 2;
+                        while (run_length-- > 0 && pixels_read < dbuf_len) {
                             *destbuf++ = bits;
+                            pixels_read++;
                         }
                     } else {
                         if (map_table)
@@ -668,12 +706,14 @@ static int dvbsub_read_4bit_string(uint8_t *destbuf, int dbuf_len,
 
 static int dvbsub_read_8bit_string(uint8_t *destbuf, int dbuf_len,
                                     const uint8_t **srcbuf, int buf_size,
-                                    int non_mod, uint8_t *map_table)
+                                    int non_mod, uint8_t *map_table, int x_pos)
 {
     const uint8_t *sbuf_end = (*srcbuf) + buf_size;
     int bits;
     int run_length;
-    int pixels_read = 0;
+    int pixels_read = x_pos;
+
+    destbuf += x_pos;
 
     while (*srcbuf < sbuf_end && pixels_read < dbuf_len) {
         bits = *(*srcbuf)++;
@@ -723,7 +763,96 @@ static int dvbsub_read_8bit_string(uint8_t *destbuf, int dbuf_len,
     return pixels_read;
 }
 
+static void save_subtitle_set(AVCodecContext *avctx, AVSubtitle *sub, int *got_output)
+{
+    DVBSubContext *ctx = avctx->priv_data;
+    DVBSubRegionDisplay *display;
+    DVBSubDisplayDefinition *display_def = ctx->display_definition;
+    DVBSubRegion *region;
+    AVSubtitleRect *rect;
+    DVBSubCLUT *clut;
+    uint32_t *clut_table;
+    int i;
+    int offset_x=0, offset_y=0;
+
+
+    if (display_def) {
+        offset_x = display_def->x;
+        offset_y = display_def->y;
+    }
+
+    /* Not touching AVSubtitles again*/
+    if(sub->num_rects) {
+        avpriv_request_sample(ctx, "Different Version of Segment asked Twice\n");
+        return;
+    }
+    for (display = ctx->display_list; display; display = display->next) {
+        region = get_region(ctx, display->region_id);
+        if (region && region->dirty)
+            sub->num_rects++;
+    }
+
+    if(ctx->compute_edt == 0) {
+        sub->end_display_time = ctx->time_out * 1000;
+        *got_output = 1;
+    } else if (ctx->prev_start != AV_NOPTS_VALUE) {
+        sub->end_display_time = av_rescale_q((sub->pts - ctx->prev_start ), AV_TIME_BASE_Q, (AVRational){ 1, 1000 }) - 1;
+        *got_output = 1;
+    }
+    if (sub->num_rects > 0) {
+
+        sub->rects = av_mallocz_array(sizeof(*sub->rects), sub->num_rects);
+        for(i=0; i<sub->num_rects; i++)
+            sub->rects[i] = av_mallocz(sizeof(*sub->rects[i]));
+
+        i = 0;
+
+        for (display = ctx->display_list; display; display = display->next) {
+            region = get_region(ctx, display->region_id);
+
+            if (!region)
+                continue;
+
+            if (!region->dirty)
+                continue;
+
+            rect = sub->rects[i];
+            rect->x = display->x_pos + offset_x;
+            rect->y = display->y_pos + offset_y;
+            rect->w = region->width;
+            rect->h = region->height;
+            rect->nb_colors = (1 << region->depth);
+            rect->type      = SUBTITLE_BITMAP;
+            rect->pict.linesize[0] = region->width;
+
+            clut = get_clut(ctx, region->clut);
+
+            if (!clut)
+                clut = &default_clut;
 
+            switch (region->depth) {
+            case 2:
+                clut_table = clut->clut4;
+                break;
+            case 8:
+                clut_table = clut->clut256;
+                break;
+            case 4:
+            default:
+                clut_table = clut->clut16;
+                break;
+            }
+
+            rect->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
+            memcpy(rect->pict.data[1], clut_table, (1 << region->depth) * sizeof(uint32_t));
+
+            rect->pict.data[0] = av_malloc(region->buf_size);
+            memcpy(rect->pict.data[0], region->pbuf, region->buf_size);
+
+            i++;
+        }
+    }
+}
 
 static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDisplay *display,
                                           const uint8_t *buf, int buf_size, int top_bottom, int non_mod)
@@ -742,6 +871,7 @@ static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDis
                          0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff};
     uint8_t *map_table;
 
+#if 0
     av_dlog(avctx, "DVB pixel block size %d, %s field:\n", buf_size,
             top_bottom ? "bottom" : "top");
 
@@ -756,21 +886,22 @@ static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDis
 
     if (i % 16)
         av_dlog(avctx, "\n");
+#endif
 
     if (region == 0)
         return;
 
     pbuf = region->pbuf;
+    region->dirty = 1;
 
     x_pos = display->x_pos;
     y_pos = display->y_pos;
 
-    if ((y_pos & 1) != top_bottom)
-        y_pos++;
+    y_pos += top_bottom;
 
     while (buf < buf_end) {
-        if (x_pos > region->width || y_pos > region->height) {
-            av_log(avctx, AV_LOG_ERROR, "Invalid object location!\n");
+        if ((*buf!=0xf0 && x_pos >= region->width) || y_pos >= region->height) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid object location! %d-%d %d-%d %02x\n", x_pos, region->width, y_pos, region->height, *buf);
             return;
         }
 
@@ -783,9 +914,9 @@ static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDis
             else
                 map_table = NULL;
 
-            x_pos += dvbsub_read_2bit_string(pbuf + (y_pos * region->width) + x_pos,
-                                                region->width - x_pos, &buf, buf_end - buf,
-                                                non_mod, map_table);
+            x_pos = dvbsub_read_2bit_string(pbuf + (y_pos * region->width),
+                                            region->width, &buf, buf_end - buf,
+                                            non_mod, map_table, x_pos);
             break;
         case 0x11:
             if (region->depth < 4) {
@@ -798,9 +929,9 @@ static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDis
             else
                 map_table = NULL;
 
-            x_pos += dvbsub_read_4bit_string(pbuf + (y_pos * region->width) + x_pos,
-                                                region->width - x_pos, &buf, buf_end - buf,
-                                                non_mod, map_table);
+            x_pos = dvbsub_read_4bit_string(pbuf + (y_pos * region->width),
+                                            region->width, &buf, buf_end - buf,
+                                            non_mod, map_table, x_pos);
             break;
         case 0x12:
             if (region->depth < 8) {
@@ -808,9 +939,9 @@ static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDis
                 return;
             }
 
-            x_pos += dvbsub_read_8bit_string(pbuf + (y_pos * region->width) + x_pos,
-                                                region->width - x_pos, &buf, buf_end - buf,
-                                                non_mod, NULL);
+            x_pos = dvbsub_read_8bit_string(pbuf + (y_pos * region->width),
+                                            region->width, &buf, buf_end - buf,
+                                            non_mod, NULL, x_pos);
             break;
 
         case 0x20:
@@ -845,7 +976,6 @@ static void dvbsub_parse_object_segment(AVCodecContext *avctx,
     DVBSubContext *ctx = avctx->priv_data;
 
     const uint8_t *buf_end = buf + buf_size;
-    const uint8_t *block;
     int object_id;
     DVBSubObject *object;
     DVBSubObjectDisplay *display;
@@ -876,7 +1006,8 @@ static void dvbsub_parse_object_segment(AVCodecContext *avctx,
         }
 
         for (display = object->display_list; display; display = display->object_list_next) {
-            block = buf;
+            const uint8_t *block = buf;
+            int bfl = bottom_field_len;
 
             dvbsub_parse_pixel_data_block(avctx, display, block, top_field_len, 0,
                                             non_modifying_color);
@@ -884,9 +1015,9 @@ static void dvbsub_parse_object_segment(AVCodecContext *avctx,
             if (bottom_field_len > 0)
                 block = buf + top_field_len;
             else
-                bottom_field_len = top_field_len;
+                bfl = top_field_len;
 
-            dvbsub_parse_pixel_data_block(avctx, display, block, bottom_field_len, 1,
+            dvbsub_parse_pixel_data_block(avctx, display, block, bfl, 1,
                                             non_modifying_color);
         }
 
@@ -898,13 +1029,14 @@ static void dvbsub_parse_object_segment(AVCodecContext *avctx,
 
 }
 
-static void dvbsub_parse_clut_segment(AVCodecContext *avctx,
+static int dvbsub_parse_clut_segment(AVCodecContext *avctx,
                                         const uint8_t *buf, int buf_size)
 {
     DVBSubContext *ctx = avctx->priv_data;
 
     const uint8_t *buf_end = buf + buf_size;
     int i, clut_id;
+    int version;
     DVBSubCLUT *clut;
     int entry_id, depth , full_range;
     int y, cr, cb, alpha;
@@ -922,6 +1054,7 @@ static void dvbsub_parse_clut_segment(AVCodecContext *avctx,
         av_dlog(avctx, "\n");
 
     clut_id = *buf++;
+    version = ((*buf)>>4)&15;
     buf += 1;
 
     clut = get_clut(ctx, clut_id);
@@ -932,11 +1065,16 @@ static void dvbsub_parse_clut_segment(AVCodecContext *avctx,
         memcpy(clut, &default_clut, sizeof(DVBSubCLUT));
 
         clut->id = clut_id;
+        clut->version = -1;
 
         clut->next = ctx->clut_list;
         ctx->clut_list = clut;
     }
 
+    if (clut->version != version) {
+
+    clut->version = version;
+
     while (buf + 4 < buf_end) {
         entry_id = *buf++;
 
@@ -944,7 +1082,7 @@ static void dvbsub_parse_clut_segment(AVCodecContext *avctx,
 
         if (depth == 0) {
             av_log(avctx, AV_LOG_ERROR, "Invalid clut depth 0x%x!\n", *buf);
-            return;
+            return 0;
         }
 
         full_range = (*buf++) & 1;
@@ -970,14 +1108,21 @@ static void dvbsub_parse_clut_segment(AVCodecContext *avctx,
         YUV_TO_RGB2_CCIR(r, g, b, y);
 
         av_dlog(avctx, "clut %d := (%d,%d,%d,%d)\n", entry_id, r, g, b, alpha);
+        if (!!(depth & 0x80) + !!(depth & 0x40) + !!(depth & 0x20) > 1) {
+            av_dlog(avctx, "More than one bit level marked: %x\n", depth);
+            if (avctx->strict_std_compliance > FF_COMPLIANCE_NORMAL)
+                return AVERROR_INVALIDDATA;
+        }
 
         if (depth & 0x80)
             clut->clut4[entry_id] = RGBA(r,g,b,255 - alpha);
-        if (depth & 0x40)
+        else if (depth & 0x40)
             clut->clut16[entry_id] = RGBA(r,g,b,255 - alpha);
-        if (depth & 0x20)
+        else if (depth & 0x20)
             clut->clut256[entry_id] = RGBA(r,g,b,255 - alpha);
     }
+    }
+    return 0;
 }
 
 
@@ -988,6 +1133,7 @@ static void dvbsub_parse_region_segment(AVCodecContext *avctx,
 
     const uint8_t *buf_end = buf + buf_size;
     int region_id, object_id;
+    int av_unused version;
     DVBSubRegion *region;
     DVBSubObject *object;
     DVBSubObjectDisplay *display;
@@ -1004,11 +1150,13 @@ static void dvbsub_parse_region_segment(AVCodecContext *avctx,
         region = av_mallocz(sizeof(DVBSubRegion));
 
         region->id = region_id;
+        region->version = -1;
 
         region->next = ctx->region_list;
         ctx->region_list = region;
     }
 
+    version = ((*buf)>>4) & 15;
     fill = ((*buf++) >> 3) & 1;
 
     region->width = AV_RB16(buf);
@@ -1024,6 +1172,7 @@ static void dvbsub_parse_region_segment(AVCodecContext *avctx,
         region->pbuf = av_malloc(region->buf_size);
 
         fill = 1;
+        region->dirty = 0;
     }
 
     region->depth = 1 << (((*buf++) >> 2) & 7);
@@ -1033,9 +1182,10 @@ static void dvbsub_parse_region_segment(AVCodecContext *avctx,
     }
     region->clut = *buf++;
 
-    if (region->depth == 8)
+    if (region->depth == 8) {
         region->bgcolor = *buf++;
-    else {
+        buf += 1;
+    } else {
         buf += 1;
 
         if (region->depth == 4)
@@ -1093,7 +1243,7 @@ static void dvbsub_parse_region_segment(AVCodecContext *avctx,
 }
 
 static void dvbsub_parse_page_segment(AVCodecContext *avctx,
-                                        const uint8_t *buf, int buf_size)
+                                        const uint8_t *buf, int buf_size, AVSubtitle *sub, int *got_output)
 {
     DVBSubContext *ctx = avctx->priv_data;
     DVBSubRegionDisplay *display;
@@ -1102,22 +1252,36 @@ static void dvbsub_parse_page_segment(AVCodecContext *avctx,
     const uint8_t *buf_end = buf + buf_size;
     int region_id;
     int page_state;
+    int timeout;
+    int version;
 
     if (buf_size < 1)
         return;
 
-    ctx->time_out = *buf++;
+    timeout = *buf++;
+    version = ((*buf)>>4) & 15;
     page_state = ((*buf++) >> 2) & 3;
 
+    if (ctx->version == version) {
+        return;
+    }
+
+    ctx->time_out = timeout;
+    ctx->version = version;
+
     av_dlog(avctx, "Page time out %ds, state %d\n", ctx->time_out, page_state);
 
-    if (page_state == 2) {
-        delete_state(ctx);
+    if(ctx->compute_edt == 1)
+        save_subtitle_set(avctx, sub, got_output);
+
+    if (page_state == 1 || page_state == 2) {
+        delete_regions(ctx);
+        delete_objects(ctx);
+        delete_cluts(ctx);
     }
 
     tmp_display_list = ctx->display_list;
     ctx->display_list = NULL;
-    ctx->display_list_size = 0;
 
     while (buf + 5 < buf_end) {
         region_id = *buf++;
@@ -1145,7 +1309,6 @@ static void dvbsub_parse_page_segment(AVCodecContext *avctx,
 
         display->next = ctx->display_list;
         ctx->display_list = display;
-        ctx->display_list_size++;
 
         av_dlog(avctx, "Region %d, (%d,%d)\n", region_id, display->x_pos, display->y_pos);
     }
@@ -1284,101 +1447,33 @@ static void dvbsub_parse_display_definition_segment(AVCodecContext *avctx,
     display_def->y       = 0;
     display_def->width   = bytestream_get_be16(&buf) + 1;
     display_def->height  = bytestream_get_be16(&buf) + 1;
+    if (!avctx->width || !avctx->height) {
+        avctx->width  = display_def->width;
+        avctx->height = display_def->height;
+    }
 
     if (buf_size < 13)
         return;
 
     if (info_byte & 1<<3) { // display_window_flag
         display_def->x = bytestream_get_be16(&buf);
-        display_def->y = bytestream_get_be16(&buf);
         display_def->width  = bytestream_get_be16(&buf) - display_def->x + 1;
+        display_def->y = bytestream_get_be16(&buf);
         display_def->height = bytestream_get_be16(&buf) - display_def->y + 1;
     }
 }
 
-static int dvbsub_display_end_segment(AVCodecContext *avctx, const uint8_t *buf,
-                                        int buf_size, AVSubtitle *sub)
+static void dvbsub_display_end_segment(AVCodecContext *avctx, const uint8_t *buf,
+                                        int buf_size, AVSubtitle *sub,int *got_output)
 {
     DVBSubContext *ctx = avctx->priv_data;
-    DVBSubDisplayDefinition *display_def = ctx->display_definition;
-
-    DVBSubRegion *region;
-    DVBSubRegionDisplay *display;
-    AVSubtitleRect *rect;
-    DVBSubCLUT *clut;
-    uint32_t *clut_table;
-    int i;
-    int offset_x=0, offset_y=0;
-
-    sub->rects = NULL;
-    sub->start_display_time = 0;
-    sub->end_display_time = ctx->time_out * 1000;
-    sub->format = 0;
-
-    if (display_def) {
-        offset_x = display_def->x;
-        offset_y = display_def->y;
-    }
-
-    sub->num_rects = ctx->display_list_size;
-
-    if (sub->num_rects > 0){
-        sub->rects = av_mallocz(sizeof(*sub->rects) * sub->num_rects);
-        for(i=0; i<sub->num_rects; i++)
-            sub->rects[i] = av_mallocz(sizeof(*sub->rects[i]));
-    }
-
-    i = 0;
-
-    for (display = ctx->display_list; display; display = display->next) {
-        region = get_region(ctx, display->region_id);
-        rect = sub->rects[i];
-
-        if (!region)
-            continue;
-
-        rect->x = display->x_pos + offset_x;
-        rect->y = display->y_pos + offset_y;
-        rect->w = region->width;
-        rect->h = region->height;
-        rect->nb_colors = 16;
-        rect->type      = SUBTITLE_BITMAP;
-        rect->pict.linesize[0] = region->width;
-
-        clut = get_clut(ctx, region->clut);
-
-        if (!clut)
-            clut = &default_clut;
-
-        switch (region->depth) {
-        case 2:
-            clut_table = clut->clut4;
-            break;
-        case 8:
-            clut_table = clut->clut256;
-            break;
-        case 4:
-        default:
-            clut_table = clut->clut16;
-            break;
-        }
-
-        rect->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
-        memcpy(rect->pict.data[1], clut_table, (1 << region->depth) * sizeof(uint32_t));
-
-        rect->pict.data[0] = av_malloc(region->buf_size);
-        memcpy(rect->pict.data[0], region->pbuf, region->buf_size);
-
-        i++;
-    }
-
-    sub->num_rects = i;
 
+    if(ctx->compute_edt == 0)
+        save_subtitle_set(avctx, sub, got_output);
 #ifdef DEBUG
     save_display_set(ctx);
 #endif
 
-    return 1;
 }
 
 static int dvbsub_decode(AVCodecContext *avctx,
@@ -1394,6 +1489,8 @@ static int dvbsub_decode(AVCodecContext *avctx,
     int page_id;
     int segment_length;
     int i;
+    int ret = 0;
+    int got_segment = 0;
 
     av_dlog(avctx, "DVB sub packet:\n");
 
@@ -1422,30 +1519,42 @@ static int dvbsub_decode(AVCodecContext *avctx,
         segment_length = AV_RB16(p);
         p += 2;
 
+        if (avctx->debug & FF_DEBUG_STARTCODE) {
+            av_log(avctx, AV_LOG_DEBUG, "segment_type:%d page_id:%d segment_length:%d\n", segment_type, page_id, segment_length);
+        }
+
         if (p_end - p < segment_length) {
             av_dlog(avctx, "incomplete or broken packet");
-            return -1;
+            ret = -1;
+            goto end;
         }
 
         if (page_id == ctx->composition_id || page_id == ctx->ancillary_id ||
             ctx->composition_id == -1 || ctx->ancillary_id == -1) {
             switch (segment_type) {
             case DVBSUB_PAGE_SEGMENT:
-                dvbsub_parse_page_segment(avctx, p, segment_length);
+                dvbsub_parse_page_segment(avctx, p, segment_length, sub, data_size);
+                got_segment |= 1;
                 break;
             case DVBSUB_REGION_SEGMENT:
                 dvbsub_parse_region_segment(avctx, p, segment_length);
+                got_segment |= 2;
                 break;
             case DVBSUB_CLUT_SEGMENT:
-                dvbsub_parse_clut_segment(avctx, p, segment_length);
+                ret = dvbsub_parse_clut_segment(avctx, p, segment_length);
+                if (ret < 0) goto end;
+                got_segment |= 4;
                 break;
             case DVBSUB_OBJECT_SEGMENT:
                 dvbsub_parse_object_segment(avctx, p, segment_length);
+                got_segment |= 8;
                 break;
             case DVBSUB_DISPLAYDEFINITION_SEGMENT:
                 dvbsub_parse_display_definition_segment(avctx, p, segment_length);
+                break;
             case DVBSUB_DISPLAY_SEGMENT:
-                *data_size = dvbsub_display_end_segment(avctx, p, segment_length, sub);
+                dvbsub_display_end_segment(avctx, p, segment_length, sub, data_size);
+                got_segment |= 16;
                 break;
             default:
                 av_dlog(avctx, "Subtitling segment type 0x%x, page id %d, length %d\n",
@@ -1456,10 +1565,36 @@ static int dvbsub_decode(AVCodecContext *avctx,
 
         p += segment_length;
     }
+    // Some streams do not send a display segment but if we have all the other
+    // segments then we need no further data.
+    if (got_segment == 15) {
+        av_log(avctx, AV_LOG_DEBUG, "Missing display_end_segment, emulating\n");
+        dvbsub_display_end_segment(avctx, p, 0, sub, data_size);
+    }
+
+end:
+    if(ret < 0) {
+        *data_size = 0;
+        avsubtitle_free(sub);
+        return ret;
+    } else {
+        if(ctx->compute_edt == 1 )
+            FFSWAP(int64_t, ctx->prev_start, sub->pts);
+    }
 
     return p - buf;
 }
 
+static const AVOption options[] = {
+    {"compute_edt", "compute end of time using pts or timeout", offsetof(DVBSubContext, compute_edt), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0},
+    {NULL}
+};
+static const AVClass dvbsubdec_class = {
+    .class_name = "DVB Sub Decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 AVCodec ff_dvbsub_decoder = {
     .name           = "dvbsub",
@@ -1470,4 +1605,5 @@ AVCodec ff_dvbsub_decoder = {
     .init           = dvbsub_init_decoder,
     .close          = dvbsub_close_decoder,
     .decode         = dvbsub_decode,
+    .priv_class     = &dvbsubdec_class,
 };
diff --git a/libavcodec/dvd_nav_parser.c b/libavcodec/dvd_nav_parser.c
new file mode 100644
index 0000000..6e2352d
--- /dev/null
+++ b/libavcodec/dvd_nav_parser.c
@@ -0,0 +1,115 @@
+/*
+ * DVD navigation block parser for FFmpeg
+ * Copyright (c) 2013 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "get_bits.h"
+#include "parser.h"
+
+#define PCI_SIZE  980
+#define DSI_SIZE 1018
+
+/* parser definition */
+typedef struct DVDNavParseContext {
+    uint32_t     lba;
+    uint8_t      buffer[PCI_SIZE+DSI_SIZE];
+    int          copied;
+} DVDNavParseContext;
+
+static av_cold int dvd_nav_parse_init(AVCodecParserContext *s)
+{
+    DVDNavParseContext *pc = s->priv_data;
+
+    pc->lba    = 0xFFFFFFFF;
+    pc->copied = 0;
+    return 0;
+}
+
+static int dvd_nav_parse(AVCodecParserContext *s,
+                         AVCodecContext *avctx,
+                         const uint8_t **poutbuf, int *poutbuf_size,
+                         const uint8_t *buf, int buf_size)
+{
+    DVDNavParseContext *pc1 = s->priv_data;
+    int lastPacket          = 0;
+    int valid               = 0;
+
+    s->pict_type = AV_PICTURE_TYPE_NONE;
+
+    avctx->time_base.num = 1;
+    avctx->time_base.den = 90000;
+
+    if (buf && buf_size) {
+        switch(buf[0]) {
+            case 0x00:
+                if (buf_size == PCI_SIZE) {
+                    /* PCI */
+                    uint32_t lba      = AV_RB32(&buf[0x01]);
+                    uint32_t startpts = AV_RB32(&buf[0x0D]);
+                    uint32_t endpts   = AV_RB32(&buf[0x11]);
+
+                    if (endpts > startpts) {
+                        pc1->lba    = lba;
+                        s->pts      = (int64_t)startpts;
+                        s->duration = endpts - startpts;
+
+                        memcpy(pc1->buffer, buf, PCI_SIZE);
+                        pc1->copied = PCI_SIZE;
+                        valid       = 1;
+                    }
+                }
+                break;
+
+            case 0x01:
+                if ((buf_size == DSI_SIZE) && (pc1->copied == PCI_SIZE)) {
+                    /* DSI */
+                    uint32_t lba = AV_RB32(&buf[0x05]);
+
+                    if (lba == pc1->lba) {
+                        memcpy(pc1->buffer + pc1->copied, buf, DSI_SIZE);
+                        lastPacket  = 1;
+                        valid       = 1;
+                    }
+                }
+                break;
+        }
+    }
+
+    if (!valid || lastPacket) {
+        pc1->copied = 0;
+        pc1->lba    = 0xFFFFFFFF;
+    }
+
+    if (lastPacket) {
+        *poutbuf      = pc1->buffer;
+        *poutbuf_size = sizeof(pc1->buffer);
+    } else {
+        *poutbuf      = NULL;
+        *poutbuf_size = 0;
+    }
+
+    return buf_size;
+}
+
+AVCodecParser ff_dvd_nav_parser = {
+    .codec_ids      = { AV_CODEC_ID_DVD_NAV },
+    .priv_data_size = sizeof(DVDNavParseContext),
+    .parser_init    = dvd_nav_parse_init,
+    .parser_parse   = dvd_nav_parse,
+};
diff --git a/libavcodec/dvdata.c b/libavcodec/dvdata.c
index ad41a1b..ace01ff 100644
--- a/libavcodec/dvdata.c
+++ b/libavcodec/dvdata.c
@@ -2,20 +2,20 @@
  * Constants for DV codec
  * Copyright (c) 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dvdata.h b/libavcodec/dvdata.h
index 8e7c0fb..0932d3a 100644
--- a/libavcodec/dvdata.h
+++ b/libavcodec/dvdata.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dvdec.c b/libavcodec/dvdec.c
index 476f8c9..fbd158f 100644
--- a/libavcodec/dvdec.c
+++ b/libavcodec/dvdec.c
@@ -13,20 +13,20 @@
  * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
  * of DV technical info.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,6 +35,7 @@
  * DV decoder
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/internal.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/pixdesc.h"
@@ -65,12 +66,19 @@ static av_cold int dvvideo_decode_init(AVCodecContext *avctx)
     IDCTDSPContext idsp;
     int i;
 
+    memset(&idsp,0, sizeof(idsp));
     ff_idctdsp_init(&idsp, avctx);
 
     for (i = 0; i < 64; i++)
        s->dv_zigzag[0][i] = idsp.idct_permutation[ff_zigzag_direct[i]];
 
-    memcpy(s->dv_zigzag[1], ff_dv_zigzag248_direct, sizeof(s->dv_zigzag[1]));
+    if (avctx->lowres){
+        for (i = 0; i < 64; i++){
+            int j = ff_dv_zigzag248_direct[i];
+            s->dv_zigzag[1][i] = idsp.idct_permutation[(j & 7) + (j & 8) * 4 + (j & 48) / 2];
+        }
+    }else
+        memcpy(s->dv_zigzag[1], ff_dv_zigzag248_direct, sizeof(s->dv_zigzag[1]));
 
     s->idct_put[0] = idsp.idct_put;
     s->idct_put[1] = ff_simple_idct248_put;
@@ -167,11 +175,11 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
     LOCAL_ALIGNED_16(int16_t, sblock, [5*DV_MAX_BPM], [64]);
     LOCAL_ALIGNED_16(uint8_t, mb_bit_buffer, [  80 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
     LOCAL_ALIGNED_16(uint8_t, vs_bit_buffer, [5*80 + FF_INPUT_BUFFER_PADDING_SIZE]); /* allow some slack */
-    const int log2_blocksize = 3;
+    const int log2_blocksize = 3-s->avctx->lowres;
     int is_field_mode[5];
 
-    assert((((int)mb_bit_buffer) & 7) == 0);
-    assert((((int)vs_bit_buffer) & 7) == 0);
+    av_assert1((((int)mb_bit_buffer) & 7) == 0);
+    av_assert1((((int)vs_bit_buffer) & 7) == 0);
 
     memset(sblock, 0, 5*DV_MAX_BPM*sizeof(*sblock));
 
@@ -258,7 +266,7 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
     flush_put_bits(&vs_pb);
     for (mb_index = 0; mb_index < 5; mb_index++) {
         for (j = 0; j < s->sys->bpm; j++) {
-            if (mb->pos < 64) {
+            if (mb->pos < 64 && get_bits_left(&gb) > 0) {
                 av_dlog(avctx, "start %d:%d\n", mb_index, j);
                 dv_decode_ac(&gb, mb, block);
             }
@@ -308,9 +316,9 @@ static int dv_decode_video_segment(AVCodecContext *avctx, void *arg)
                   int x, y;
                   mb->idct_put(pixels, 8, block);
                   for (y = 0; y < (1 << log2_blocksize); y++, c_ptr += s->frame->linesize[j], pixels += 8) {
-                      ptr1   = pixels + (1 << (log2_blocksize - 1));
+                      ptr1   = pixels + ((1 << (log2_blocksize))>>1);
                       c_ptr1 = c_ptr + (s->frame->linesize[j] << log2_blocksize);
-                      for (x = 0; x < (1 << (log2_blocksize - 1)); x++) {
+                      for (x = 0; x < (1 << FFMAX(log2_blocksize - 1, 0)); x++) {
                           c_ptr[x]  = pixels[x];
                           c_ptr1[x] = ptr1[x];
                       }
@@ -343,7 +351,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
     int apt, is16_9, ret;
     const AVDVProfile *sys;
 
-    sys = av_dv_frame_profile(s->sys, buf, buf_size);
+    sys = avpriv_dv_frame_profile2(avctx, s->sys, buf, buf_size);
     if (!sys || buf_size < sys->frame_size) {
         av_log(avctx, AV_LOG_ERROR, "could not find dv frame profile\n");
         return -1; /* NOTE: we only accept several full frames */
@@ -372,17 +380,20 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
     vsc_pack = buf + 80*5 + 48 + 5;
     if ( *vsc_pack == dv_video_control ) {
         apt = buf[4] & 0x07;
-        is16_9 = (vsc_pack && ((vsc_pack[2] & 0x07) == 0x02 || (!apt && (vsc_pack[2] & 0x07) == 0x07)));
+        is16_9 = (vsc_pack[2] & 0x07) == 0x02 || (!apt && (vsc_pack[2] & 0x07) == 0x07);
         ff_set_sar(avctx, s->sys->sar[is16_9]);
     }
 
-    if (ff_get_buffer(avctx, s->frame, 0) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-        return -1;
-    }
+    if ((ret = ff_get_buffer(avctx, s->frame, 0)) < 0)
+        return ret;
     s->frame->interlaced_frame = 1;
     s->frame->top_field_first  = 0;
 
+    /* Determine the codec's field order from the packet */
+    if ( *vsc_pack == dv_video_control ) {
+        s->frame->top_field_first = !(vsc_pack[3] & 0x40);
+    }
+
     s->buf = buf;
     avctx->execute(avctx, dv_decode_video_segment, s->work_chunks, NULL,
                    dv_work_pool_size(s->sys), sizeof(DVwork_chunk));
@@ -404,4 +415,5 @@ AVCodec ff_dvvideo_decoder = {
     .init           = dvvideo_decode_init,
     .decode         = dvvideo_decode_frame,
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+    .max_lowres     = 3,
 };
diff --git a/libavcodec/dvdsub_parser.c b/libavcodec/dvdsub_parser.c
index 2ad3b33..32a945e 100644
--- a/libavcodec/dvdsub_parser.c
+++ b/libavcodec/dvdsub_parser.c
@@ -2,20 +2,20 @@
  * DVD subtitle decoding
  * Copyright (c) 2005 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -45,8 +45,11 @@ static int dvdsub_parse(AVCodecParserContext *s,
     DVDSubParseContext *pc = s->priv_data;
 
     if (pc->packet_index == 0) {
-        if (buf_size < 2)
-            return 0;
+        if (buf_size < 2 || AV_RB16(buf) && buf_size < 6) {
+            if (buf_size)
+                av_log(avctx, AV_LOG_DEBUG, "Parser input %d too small\n", buf_size);
+            return buf_size;
+        }
         pc->packet_len = AV_RB16(buf);
         if (pc->packet_len == 0) /* HD-DVD subpicture packet */
             pc->packet_len = AV_RB32(buf+2);
diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c
index 02f70be..2b363d0 100644
--- a/libavcodec/dvdsubdec.c
+++ b/libavcodec/dvdsubdec.c
@@ -2,20 +2,20 @@
  * DVD subtitle decoding
  * Copyright (c) 2005 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,12 +25,23 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/colorspace.h"
+#include "libavutil/opt.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/avstring.h"
 
-typedef struct DVDSubContext {
-    uint32_t palette[16];
-    int      has_palette;
+typedef struct DVDSubContext
+{
+  AVClass *class;
+  uint32_t palette[16];
+  char    *palette_str;
+  int      has_palette;
+  uint8_t  colormap[4];
+  uint8_t  alpha[256];
+  uint8_t *buf;
+  int      buf_size;
+#ifdef DEBUG
+  int sub_id;
+#endif
 } DVDSubContext;
 
 static void yuv_a_to_rgba(const uint8_t *ycbcr, const uint8_t *alpha, uint32_t *rgba, int num_values)
@@ -125,17 +136,24 @@ static int decode_rle(uint8_t *bitmap, int linesize, int w, int h,
 
 static void guess_palette(DVDSubContext* ctx,
                           uint32_t *rgba_palette,
-                          uint8_t *colormap,
-                          uint8_t *alpha,
                           uint32_t subtitle_color)
 {
+    static const uint8_t level_map[4][4] = {
+        // this configuration (full range, lowest to highest) in tests
+        // seemed most common, so assume this
+        {0xff},
+        {0x00, 0xff},
+        {0x00, 0x80, 0xff},
+        {0x00, 0x55, 0xaa, 0xff},
+    };
     uint8_t color_used[16] = { 0 };
     int nb_opaque_colors, i, level, j, r, g, b;
+    uint8_t *colormap = ctx->colormap, *alpha = ctx->alpha;
 
-    if (ctx->has_palette) {
-        for (i = 0; i < 4; i++)
+    if(ctx->has_palette) {
+        for(i = 0; i < 4; i++)
             rgba_palette[i] = (ctx->palette[colormap[i]] & 0x00ffffff)
-                              | ((alpha[i] * 17) << 24);
+                              | ((alpha[i] * 17U) << 24);
         return;
     }
 
@@ -153,18 +171,18 @@ static void guess_palette(DVDSubContext* ctx,
     if (nb_opaque_colors == 0)
         return;
 
-    j = nb_opaque_colors;
+    j = 0;
     memset(color_used, 0, 16);
     for(i = 0; i < 4; i++) {
         if (alpha[i] != 0) {
             if (!color_used[colormap[i]])  {
-                level = (0xff * j) / nb_opaque_colors;
+                level = level_map[nb_opaque_colors][j];
                 r = (((subtitle_color >> 16) & 0xff) * level) >> 8;
                 g = (((subtitle_color >> 8) & 0xff) * level) >> 8;
                 b = (((subtitle_color >> 0) & 0xff) * level) >> 8;
                 rgba_palette[i] = b | (g << 8) | (r << 16) | ((alpha[i] * 17) << 24);
                 color_used[colormap[i]] = (i + 1);
-                j--;
+                j++;
             } else {
                 rgba_palette[i] = (rgba_palette[color_used[colormap[i]] - 1] & 0x00ffffff) |
                                     ((alpha[i] * 17) << 24);
@@ -173,6 +191,21 @@ static void guess_palette(DVDSubContext* ctx,
     }
 }
 
+static void reset_rects(AVSubtitle *sub_header)
+{
+    int i;
+
+    if (sub_header->rects != NULL) {
+        for (i = 0; i < sub_header->num_rects; i++) {
+            av_freep(&sub_header->rects[i]->pict.data[0]);
+            av_freep(&sub_header->rects[i]->pict.data[1]);
+            av_freep(&sub_header->rects[i]);
+        }
+        av_freep(&sub_header->rects);
+        sub_header->num_rects = 0;
+    }
+}
+
 #define READ_OFFSET(a) (big_offsets ? AV_RB32(a) : AV_RB16(a))
 
 static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
@@ -181,14 +214,13 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
     int cmd_pos, pos, cmd, x1, y1, x2, y2, offset1, offset2, next_cmd_pos;
     int big_offsets, offset_size, is_8bit = 0;
     const uint8_t *yuv_palette = 0;
-    uint8_t colormap[4] = { 0 }, alpha[256] = { 0 };
+    uint8_t *colormap = ctx->colormap, *alpha = ctx->alpha;
     int date;
     int i;
     int is_menu = 0;
 
     if (buf_size < 10)
         return -1;
-    memset(sub_header, 0, sizeof(*sub_header));
 
     if (AV_RB16(buf) == 0) {   /* HD subpicture with 4-byte offsets */
         big_offsets = 1;
@@ -202,6 +234,9 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
 
     cmd_pos = READ_OFFSET(buf + cmd_pos);
 
+    if (cmd_pos < 0 || cmd_pos > buf_size - 2 - offset_size)
+        return AVERROR(EAGAIN);
+
     while (cmd_pos > 0 && cmd_pos < buf_size - 2 - offset_size) {
         date = AV_RB16(buf + cmd_pos);
         next_cmd_pos = READ_OFFSET(buf + cmd_pos + 2);
@@ -310,19 +345,11 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
             w = x2 - x1 + 1;
             if (w < 0)
                 w = 0;
-            h = y2 - y1;
+            h = y2 - y1 + 1;
             if (h < 0)
                 h = 0;
             if (w > 0 && h > 0) {
-                if (sub_header->rects != NULL) {
-                    for (i = 0; i < sub_header->num_rects; i++) {
-                        av_freep(&sub_header->rects[i]->pict.data[0]);
-                        av_freep(&sub_header->rects[i]->pict.data[1]);
-                        av_freep(&sub_header->rects[i]);
-                    }
-                    av_freep(&sub_header->rects);
-                    sub_header->num_rects = 0;
-                }
+                reset_rects(sub_header);
 
                 bitmap = av_malloc(w * h);
                 sub_header->rects = av_mallocz(sizeof(*sub_header->rects));
@@ -341,9 +368,8 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                     yuv_a_to_rgba(yuv_palette, alpha, (uint32_t*)sub_header->rects[0]->pict.data[1], 256);
                 } else {
                     sub_header->rects[0]->nb_colors = 4;
-                    guess_palette(ctx,
-                                  (uint32_t*)sub_header->rects[0]->pict.data[1],
-                                  colormap, alpha, 0xffff00);
+                    guess_palette(ctx, (uint32_t*)sub_header->rects[0]->pict.data[1],
+                                  0xffff00);
                 }
                 sub_header->rects[0]->x = x1;
                 sub_header->rects[0]->y = y1;
@@ -351,8 +377,13 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
                 sub_header->rects[0]->h = h;
                 sub_header->rects[0]->type = SUBTITLE_BITMAP;
                 sub_header->rects[0]->pict.linesize[0] = w;
+                sub_header->rects[0]->flags = is_menu ? AV_SUBTITLE_FLAG_FORCED : 0;
             }
         }
+        if (next_cmd_pos < cmd_pos) {
+            av_log(NULL, AV_LOG_ERROR, "Invalid command offset\n");
+            break;
+        }
         if (next_cmd_pos == cmd_pos)
             break;
         cmd_pos = next_cmd_pos;
@@ -360,15 +391,7 @@ static int decode_dvd_subtitles(DVDSubContext *ctx, AVSubtitle *sub_header,
     if (sub_header->num_rects > 0)
         return is_menu;
  fail:
-    if (sub_header->rects != NULL) {
-        for (i = 0; i < sub_header->num_rects; i++) {
-            av_freep(&sub_header->rects[i]->pict.data[0]);
-            av_freep(&sub_header->rects[i]->pict.data[1]);
-            av_freep(&sub_header->rects[i]);
-        }
-        av_freep(&sub_header->rects);
-        sub_header->num_rects = 0;
-    }
+    reset_rects(sub_header);
     return -1;
 }
 
@@ -439,16 +462,19 @@ static int find_smallest_bounding_rectangle(AVSubtitle *s)
 }
 
 #ifdef DEBUG
+#define ALPHA_MIX(A,BACK,FORE) (((255-(A)) * (BACK) + (A) * (FORE)) / 255)
 static void ppm_save(const char *filename, uint8_t *bitmap, int w, int h,
                      uint32_t *rgba_palette)
 {
-    int x, y, v;
+    int x, y, alpha;
+    uint32_t v;
+    int back[3] = {0, 255, 0};  /* green background */
     FILE *f;
 
     f = fopen(filename, "w");
     if (!f) {
         perror(filename);
-        exit(1);
+        return;
     }
     fprintf(f, "P6\n"
             "%d %d\n"
@@ -457,15 +483,35 @@ static void ppm_save(const char *filename, uint8_t *bitmap, int w, int h,
     for(y = 0; y < h; y++) {
         for(x = 0; x < w; x++) {
             v = rgba_palette[bitmap[y * w + x]];
-            putc((v >> 16) & 0xff, f);
-            putc((v >> 8) & 0xff, f);
-            putc((v >> 0) & 0xff, f);
+            alpha = v >> 24;
+            putc(ALPHA_MIX(alpha, back[0], (v >> 16) & 0xff), f);
+            putc(ALPHA_MIX(alpha, back[1], (v >> 8) & 0xff), f);
+            putc(ALPHA_MIX(alpha, back[2], (v >> 0) & 0xff), f);
         }
     }
     fclose(f);
 }
 #endif
 
+static int append_to_cached_buf(AVCodecContext *avctx,
+                                const uint8_t *buf, int buf_size)
+{
+    DVDSubContext *ctx = avctx->priv_data;
+
+    if (ctx->buf_size > 0xffff - buf_size) {
+        av_log(avctx, AV_LOG_WARNING, "Attempt to reconstruct "
+               "too large SPU packets aborted.\n");
+        av_freep(&ctx->buf);
+        return AVERROR_INVALIDDATA;
+    }
+    ctx->buf = av_realloc(ctx->buf, ctx->buf_size + buf_size);
+    if (!ctx->buf)
+        return AVERROR(ENOMEM);
+    memcpy(ctx->buf + ctx->buf_size, buf, buf_size);
+    ctx->buf_size += buf_size;
+    return 0;
+}
+
 static int dvdsub_decode(AVCodecContext *avctx,
                          void *data, int *data_size,
                          AVPacket *avpkt)
@@ -476,10 +522,25 @@ static int dvdsub_decode(AVCodecContext *avctx,
     AVSubtitle *sub = data;
     int is_menu;
 
+    if (ctx->buf) {
+        int ret = append_to_cached_buf(avctx, buf, buf_size);
+        if (ret < 0) {
+            *data_size = 0;
+            return ret;
+        }
+        buf = ctx->buf;
+        buf_size = ctx->buf_size;
+    }
+
     is_menu = decode_dvd_subtitles(ctx, sub, buf, buf_size);
+    if (is_menu == AVERROR(EAGAIN)) {
+        *data_size = 0;
+        return append_to_cached_buf(avctx, buf, buf_size);
+    }
 
     if (is_menu < 0) {
     no_subtitle:
+        reset_rects(sub);
         *data_size = 0;
 
         return buf_size;
@@ -488,57 +549,118 @@ static int dvdsub_decode(AVCodecContext *avctx,
         goto no_subtitle;
 
 #if defined(DEBUG)
+    {
+    char ppm_name[32];
+
+    snprintf(ppm_name, sizeof(ppm_name), "/tmp/%05d.ppm", ctx->sub_id++);
     av_dlog(NULL, "start=%d ms end =%d ms\n",
             sub->start_display_time,
             sub->end_display_time);
-    ppm_save("/tmp/a.ppm", sub->rects[0]->pict.data[0],
-             sub->rects[0]->w, sub->rects[0]->h, sub->rects[0]->pict.data[1]);
+    ppm_save(ppm_name, sub->rects[0]->pict.data[0],
+             sub->rects[0]->w, sub->rects[0]->h, (uint32_t*) sub->rects[0]->pict.data[1]);
+    }
 #endif
 
+    av_freep(&ctx->buf);
+    ctx->buf_size = 0;
     *data_size = 1;
     return buf_size;
 }
 
-static av_cold int dvdsub_init(AVCodecContext *avctx)
+static void parse_palette(DVDSubContext *ctx, char *p)
 {
-    DVDSubContext *ctx = avctx->priv_data;
-    char *data, *cur;
+    int i;
+
+    ctx->has_palette = 1;
+    for(i=0;i<16;i++) {
+        ctx->palette[i] = strtoul(p, &p, 16);
+        while(*p == ',' || av_isspace(*p))
+            p++;
+    }
+}
+
+static int dvdsub_parse_extradata(AVCodecContext *avctx)
+{
+    DVDSubContext *ctx = (DVDSubContext*) avctx->priv_data;
+    char *dataorig, *data;
 
     if (!avctx->extradata || !avctx->extradata_size)
-        return 0;
+        return 1;
 
-    data = av_malloc(avctx->extradata_size + 1);
+    dataorig = data = av_malloc(avctx->extradata_size+1);
     if (!data)
         return AVERROR(ENOMEM);
     memcpy(data, avctx->extradata, avctx->extradata_size);
     data[avctx->extradata_size] = '\0';
-    cur = data;
-
-    while (*cur) {
-        if (strncmp("palette:", cur, 8) == 0) {
-            int i;
-            char *p = cur + 8;
-            ctx->has_palette = 1;
-            for (i = 0; i < 16; i++) {
-                ctx->palette[i] = strtoul(p, &p, 16);
-                while (*p == ',' || av_isspace(*p))
-                    p++;
-            }
-        } else if (!strncmp("size:", cur, 5)) {
+
+    for(;;) {
+        int pos = strcspn(data, "\n\r");
+        if (pos==0 && *data==0)
+            break;
+
+        if (strncmp("palette:", data, 8) == 0) {
+            parse_palette(ctx, data + 8);
+        } else if (strncmp("size:", data, 5) == 0) {
             int w, h;
-            if (sscanf(cur + 5, "%dx%d", &w, &h) == 2) {
+            if (sscanf(data + 5, "%dx%d", &w, &h) == 2) {
                int ret = ff_set_dimensions(avctx, w, h);
-               if (ret < 0)
+               if (ret < 0) {
+                   av_free(dataorig);
                    return ret;
+               }
             }
         }
-        cur += strcspn(cur, "\n\r");
-        cur += strspn(cur, "\n\r");
+
+        data += pos;
+        data += strspn(data, "\n\r");
     }
-    av_free(data);
+
+    av_free(dataorig);
+    return 1;
+}
+
+static av_cold int dvdsub_init(AVCodecContext *avctx)
+{
+    DVDSubContext *ctx = avctx->priv_data;
+    int ret;
+
+    if ((ret = dvdsub_parse_extradata(avctx)) < 0)
+        return ret;
+
+    if (ctx->palette_str)
+        parse_palette(ctx, ctx->palette_str);
+    if (ctx->has_palette) {
+        int i;
+        av_log(avctx, AV_LOG_DEBUG, "palette:");
+        for(i=0;i<16;i++)
+            av_log(avctx, AV_LOG_DEBUG, " 0x%06x", ctx->palette[i]);
+        av_log(avctx, AV_LOG_DEBUG, "\n");
+    }
+
+    return 1;
+}
+
+static av_cold int dvdsub_close(AVCodecContext *avctx)
+{
+    DVDSubContext *ctx = avctx->priv_data;
+    av_freep(&ctx->buf);
+    ctx->buf_size = 0;
     return 0;
 }
 
+#define OFFSET(field) offsetof(DVDSubContext, field)
+#define VD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "palette", "set the global palette", OFFSET(palette_str), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
+    { NULL }
+};
+static const AVClass dvdsub_class = {
+    .class_name = "dvdsubdec",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_dvdsub_decoder = {
     .name           = "dvdsub",
     .long_name      = NULL_IF_CONFIG_SMALL("DVD subtitles"),
@@ -547,4 +669,6 @@ AVCodec ff_dvdsub_decoder = {
     .priv_data_size = sizeof(DVDSubContext),
     .init           = dvdsub_init,
     .decode         = dvdsub_decode,
+    .close          = dvdsub_close,
+    .priv_class     = &dvdsub_class,
 };
diff --git a/libavcodec/dvdsubenc.c b/libavcodec/dvdsubenc.c
index db6749e..ced09dd 100644
--- a/libavcodec/dvdsubenc.c
+++ b/libavcodec/dvdsubenc.c
@@ -2,27 +2,35 @@
  * DVD subtitle encoding
  * Copyright (c) 2005 Wolfram Gloger
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
 #include "bytestream.h"
+#include "internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/bprint.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
 
-#undef NDEBUG
-#include <assert.h>
+typedef struct {
+    AVClass *class;
+    uint32_t global_palette[16];
+    int even_rows_fix;
+} DVDSubtitleContext;
 
 // ncnt is the nibble counter
 #define PUTNIBBLE(val)\
@@ -53,7 +61,7 @@ static void dvd_encode_rle(uint8_t **pq,
                 if (bitmap[x+len] != color)
                     break;
             color = cmap[color];
-            assert(color < 4);
+            av_assert0(color < 4);
             if (len < 0x04) {
                 PUTNIBBLE((len << 2)|color);
             } else if (len < 0x10) {
@@ -86,72 +94,265 @@ static void dvd_encode_rle(uint8_t **pq,
     *pq = q;
 }
 
-static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size,
+static int color_distance(uint32_t a, uint32_t b)
+{
+    int r = 0, d, i;
+    int alpha_a = 8, alpha_b = 8;
+
+    for (i = 24; i >= 0; i -= 8) {
+        d = alpha_a * (int)((a >> i) & 0xFF) -
+            alpha_b * (int)((b >> i) & 0xFF);
+        r += d * d;
+        alpha_a = a >> 28;
+        alpha_b = b >> 28;
+    }
+    return r;
+}
+
+/**
+ * Count colors used in a rectangle, quantizing alpha and grouping by
+ * nearest global palette entry.
+ */
+static void count_colors(AVCodecContext *avctx, unsigned hits[33],
+                         const AVSubtitleRect *r)
+{
+    DVDSubtitleContext *dvdc = avctx->priv_data;
+    unsigned count[256] = { 0 };
+    uint32_t *palette = (uint32_t *)r->pict.data[1];
+    uint32_t color;
+    int x, y, i, j, match, d, best_d, av_uninit(best_j);
+    uint8_t *p = r->pict.data[0];
+
+    for (y = 0; y < r->h; y++) {
+        for (x = 0; x < r->w; x++)
+            count[*(p++)]++;
+        p += r->pict.linesize[0] - r->w;
+    }
+    for (i = 0; i < 256; i++) {
+        if (!count[i]) /* avoid useless search */
+            continue;
+        color = palette[i];
+        /* 0: transparent, 1-16: semi-transparent, 17-33 opaque */
+        match = color < 0x33000000 ? 0 : color < 0xCC000000 ? 1 : 17;
+        if (match) {
+            best_d = INT_MAX;
+            for (j = 0; j < 16; j++) {
+                d = color_distance(0xFF000000 | color,
+                                   0xFF000000 | dvdc->global_palette[j]);
+                if (d < best_d) {
+                    best_d = d;
+                    best_j = j;
+                }
+            }
+            match += best_j;
+        }
+        hits[match] += count[i];
+    }
+}
+
+static void select_palette(AVCodecContext *avctx, int out_palette[4],
+                           int out_alpha[4], unsigned hits[33])
+{
+    DVDSubtitleContext *dvdc = avctx->priv_data;
+    int i, j, bright, mult;
+    uint32_t color;
+    int selected[4] = { 0 };
+    uint32_t pseudopal[33] = { 0 };
+    uint32_t refcolor[3] = { 0x00000000, 0xFFFFFFFF, 0xFF000000 };
+
+    /* Bonus for transparent: if the rectangle fits tightly the text, the
+       background color can be quite rare, but it would be ugly without it */
+    hits[0] *= 16;
+    /* Bonus for bright colors */
+    for (i = 0; i < 16; i++) {
+        if (!(hits[1 + i] + hits[17 + i]))
+            continue; /* skip unused colors to gain time */
+        color = dvdc->global_palette[i];
+        bright = 0;
+        for (j = 0; j < 3; j++, color >>= 8)
+            bright += (color & 0xFF) < 0x40 || (color & 0xFF) >= 0xC0;
+        mult = 2 + FFMIN(bright, 2);
+        hits[ 1 + i] *= mult;
+        hits[17 + i] *= mult;
+    }
+
+    /* Select four most frequent colors */
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < 33; j++)
+            if (hits[j] > hits[selected[i]])
+                selected[i] = j;
+        hits[selected[i]] = 0;
+    }
+
+    /* Order the colors like in most DVDs:
+       0: background, 1: foreground, 2: outline */
+    for (i = 0; i < 16; i++) {
+        pseudopal[ 1 + i] = 0x80000000 | dvdc->global_palette[i];
+        pseudopal[17 + i] = 0xFF000000 | dvdc->global_palette[i];
+    }
+    for (i = 0; i < 3; i++) {
+        int best_d = color_distance(refcolor[i], pseudopal[selected[i]]);
+        for (j = i + 1; j < 4; j++) {
+            int d = color_distance(refcolor[i], pseudopal[selected[j]]);
+            if (d < best_d) {
+                FFSWAP(int, selected[i], selected[j]);
+                best_d = d;
+            }
+        }
+    }
+
+    /* Output */
+    for (i = 0; i < 4; i++) {
+        out_palette[i] = selected[i] ? (selected[i] - 1) & 0xF : 0;
+        out_alpha  [i] = !selected[i] ? 0 : selected[i] < 17 ? 0x80 : 0xFF;
+    }
+}
+
+static void build_color_map(AVCodecContext *avctx, int cmap[],
+                            const uint32_t palette[],
+                            const int out_palette[], unsigned int const out_alpha[])
+{
+    DVDSubtitleContext *dvdc = avctx->priv_data;
+    int i, j, d, best_d;
+    uint32_t pseudopal[4];
+
+    for (i = 0; i < 4; i++)
+        pseudopal[i] = (out_alpha[i] << 24) |
+                       dvdc->global_palette[out_palette[i]];
+    for (i = 0; i < 256; i++) {
+        best_d = INT_MAX;
+        for (j = 0; j < 4; j++) {
+            d = color_distance(pseudopal[j], palette[i]);
+            if (d < best_d) {
+                cmap[i] = j;
+                best_d = d;
+            }
+        }
+    }
+}
+
+static void copy_rectangle(AVSubtitleRect *dst, AVSubtitleRect *src, int cmap[])
+{
+    int x, y;
+    uint8_t *p, *q;
+
+    p = src->pict.data[0];
+    q = dst->pict.data[0] + (src->x - dst->x) +
+                            (src->y - dst->y) * dst->pict.linesize[0];
+    for (y = 0; y < src->h; y++) {
+        for (x = 0; x < src->w; x++)
+            *(q++) = cmap[*(p++)];
+        p += src->pict.linesize[0] - src->w;
+        q += dst->pict.linesize[0] - src->w;
+    }
+}
+
+static int encode_dvd_subtitles(AVCodecContext *avctx,
+                                uint8_t *outbuf, int outbuf_size,
                                 const AVSubtitle *h)
 {
+    DVDSubtitleContext *dvdc = avctx->priv_data;
     uint8_t *q, *qq;
-    int object_id;
-    int offset1[20], offset2[20];
-    int i, imax, color, alpha, rects = h->num_rects;
-    unsigned long hmax;
-    unsigned long hist[256];
-    int           cmap[256];
+    int offset1, offset2;
+    int i, rects = h->num_rects, ret;
+    unsigned global_palette_hits[33] = { 0 };
+    int cmap[256];
+    int out_palette[4];
+    int out_alpha[4];
+    AVSubtitleRect vrect;
+    uint8_t *vrect_data = NULL;
+    int x2, y2;
+    int forced = 0;
 
     if (rects == 0 || h->rects == NULL)
-        return -1;
-    if (rects > 20)
-        rects = 20;
-
-    // analyze bitmaps, compress to 4 colors
-    for (i=0; i<256; ++i) {
-        hist[i] = 0;
-        cmap[i] = 0;
-    }
-    for (object_id = 0; object_id < rects; object_id++)
-        for (i=0; i<h->rects[object_id]->w*h->rects[object_id]->h; ++i) {
-            color = h->rects[object_id]->pict.data[0][i];
-            // only count non-transparent pixels
-            alpha = ((uint32_t*)h->rects[object_id]->pict.data[1])[color] >> 24;
-            hist[color] += alpha;
+        return AVERROR(EINVAL);
+    for (i = 0; i < rects; i++)
+        if (h->rects[i]->type != SUBTITLE_BITMAP) {
+            av_log(avctx, AV_LOG_ERROR, "Bitmap subtitle required\n");
+            return AVERROR(EINVAL);
         }
-    for (color=3;; --color) {
-        hmax = 0;
-        imax = 0;
-        for (i=0; i<256; ++i)
-            if (hist[i] > hmax) {
-                imax = i;
-                hmax = hist[i];
-            }
-        if (hmax == 0)
+    /* Mark this subtitle forced if any of the rectangles is forced. */
+    for (i = 0; i < rects; i++)
+        if ((h->rects[i]->flags & AV_SUBTITLE_FLAG_FORCED) != 0) {
+            forced = 1;
             break;
-        if (color == 0)
-            color = 3;
-        av_log(NULL, AV_LOG_DEBUG, "dvd_subtitle hist[%d]=%ld -> col %d\n",
-               imax, hist[imax], color);
-        cmap[imax] = color;
-        hist[imax] = 0;
+        }
+    vrect = *h->rects[0];
+
+    if (rects > 1) {
+        /* DVD subtitles can have only one rectangle: build a virtual
+           rectangle containing all actual rectangles.
+           The data of the rectangles will be copied later, when the palette
+           is decided, because the rectangles may have different palettes. */
+        int xmin = h->rects[0]->x, xmax = xmin + h->rects[0]->w;
+        int ymin = h->rects[0]->y, ymax = ymin + h->rects[0]->h;
+        for (i = 1; i < rects; i++) {
+            xmin = FFMIN(xmin, h->rects[i]->x);
+            ymin = FFMIN(ymin, h->rects[i]->y);
+            xmax = FFMAX(xmax, h->rects[i]->x + h->rects[i]->w);
+            ymax = FFMAX(ymax, h->rects[i]->y + h->rects[i]->h);
+        }
+        vrect.x = xmin;
+        vrect.y = ymin;
+        vrect.w = xmax - xmin;
+        vrect.h = ymax - ymin;
+        if ((ret = av_image_check_size(vrect.w, vrect.h, 0, avctx)) < 0)
+            return ret;
+
+        /* Count pixels outside the virtual rectangle as transparent */
+        global_palette_hits[0] = vrect.w * vrect.h;
+        for (i = 0; i < rects; i++)
+            global_palette_hits[0] -= h->rects[i]->w * h->rects[i]->h;
     }
 
+    for (i = 0; i < rects; i++)
+        count_colors(avctx, global_palette_hits, h->rects[i]);
+    select_palette(avctx, out_palette, out_alpha, global_palette_hits);
+
+    if (rects > 1) {
+        if (!(vrect_data = av_calloc(vrect.w, vrect.h)))
+            return AVERROR(ENOMEM);
+        vrect.pict.data    [0] = vrect_data;
+        vrect.pict.linesize[0] = vrect.w;
+        for (i = 0; i < rects; i++) {
+            build_color_map(avctx, cmap, (uint32_t *)h->rects[i]->pict.data[1],
+                            out_palette, out_alpha);
+            copy_rectangle(&vrect, h->rects[i], cmap);
+        }
+        for (i = 0; i < 4; i++)
+            cmap[i] = i;
+    } else {
+        build_color_map(avctx, cmap, (uint32_t *)h->rects[0]->pict.data[1],
+                        out_palette, out_alpha);
+    }
+
+    av_log(avctx, AV_LOG_DEBUG, "Selected palette:");
+    for (i = 0; i < 4; i++)
+        av_log(avctx, AV_LOG_DEBUG, " 0x%06x@@%02x (0x%x,0x%x)",
+               dvdc->global_palette[out_palette[i]], out_alpha[i],
+               out_palette[i], out_alpha[i] >> 4);
+    av_log(avctx, AV_LOG_DEBUG, "\n");
 
     // encode data block
     q = outbuf + 4;
-    for (object_id = 0; object_id < rects; object_id++) {
-        offset1[object_id] = q - outbuf;
-        // worst case memory requirement: 1 nibble per pixel..
-        if ((q - outbuf) + h->rects[object_id]->w*h->rects[object_id]->h/2
-            + 17*rects + 21 > outbuf_size) {
-            av_log(NULL, AV_LOG_ERROR, "dvd_subtitle too big\n");
-            return -1;
-        }
-        dvd_encode_rle(&q, h->rects[object_id]->pict.data[0],
-                       h->rects[object_id]->w*2,
-                       h->rects[object_id]->w, h->rects[object_id]->h >> 1,
-                       cmap);
-        offset2[object_id] = q - outbuf;
-        dvd_encode_rle(&q, h->rects[object_id]->pict.data[0] + h->rects[object_id]->w,
-                       h->rects[object_id]->w*2,
-                       h->rects[object_id]->w, h->rects[object_id]->h >> 1,
-                       cmap);
+    offset1 = q - outbuf;
+    // worst case memory requirement: 1 nibble per pixel..
+    if ((q - outbuf) + vrect.w * vrect.h / 2 + 17 + 21 > outbuf_size) {
+        av_log(NULL, AV_LOG_ERROR, "dvd_subtitle too big\n");
+        ret = AVERROR_BUFFER_TOO_SMALL;
+        goto fail;
+    }
+    dvd_encode_rle(&q, vrect.pict.data[0], vrect.w * 2,
+                   vrect.w, (vrect.h + 1) >> 1, cmap);
+    offset2 = q - outbuf;
+    dvd_encode_rle(&q, vrect.pict.data[0] + vrect.w, vrect.w * 2,
+                   vrect.w, vrect.h >> 1, cmap);
+
+    if (dvdc->even_rows_fix && (vrect.h & 1)) {
+        // Work-around for some players that want the height to be even.
+        vrect.h++;
+        *q++ = 0x00; // 0x00 0x00 == empty row, i.e. fully transparent
+        *q++ = 0x00;
     }
 
     // set data packet size
@@ -160,35 +361,34 @@ static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size,
 
     // send start display command
     bytestream_put_be16(&q, (h->start_display_time*90) >> 10);
-    bytestream_put_be16(&q, (q - outbuf) /*- 2 */ + 8 + 12*rects + 2);
+    bytestream_put_be16(&q, (q - outbuf) /*- 2 */ + 8 + 12 + 2);
     *q++ = 0x03; // palette - 4 nibbles
-    *q++ = 0x03; *q++ = 0x7f;
+    *q++ = (out_palette[3] << 4) | out_palette[2];
+    *q++ = (out_palette[1] << 4) | out_palette[0];
     *q++ = 0x04; // alpha - 4 nibbles
-    *q++ = 0xf0; *q++ = 0x00;
-    //*q++ = 0x0f; *q++ = 0xff;
+    *q++ = (out_alpha[3] & 0xF0) | (out_alpha[2] >> 4);
+    *q++ = (out_alpha[1] & 0xF0) | (out_alpha[0] >> 4);
 
-    // XXX not sure if more than one rect can really be encoded..
     // 12 bytes per rect
-    for (object_id = 0; object_id < rects; object_id++) {
-        int x2 = h->rects[object_id]->x + h->rects[object_id]->w - 1;
-        int y2 = h->rects[object_id]->y + h->rects[object_id]->h - 1;
-
-        *q++ = 0x05;
-        // x1 x2 -> 6 nibbles
-        *q++ = h->rects[object_id]->x >> 4;
-        *q++ = (h->rects[object_id]->x << 4) | ((x2 >> 8) & 0xf);
-        *q++ = x2;
-        // y1 y2 -> 6 nibbles
-        *q++ = h->rects[object_id]->y >> 4;
-        *q++ = (h->rects[object_id]->y << 4) | ((y2 >> 8) & 0xf);
-        *q++ = y2;
-
-        *q++ = 0x06;
-        // offset1, offset2
-        bytestream_put_be16(&q, offset1[object_id]);
-        bytestream_put_be16(&q, offset2[object_id]);
-    }
-    *q++ = 0x01; // start command
+    x2 = vrect.x + vrect.w - 1;
+    y2 = vrect.y + vrect.h - 1;
+
+    *q++ = 0x05;
+    // x1 x2 -> 6 nibbles
+    *q++ = vrect.x >> 4;
+    *q++ = (vrect.x << 4) | ((x2 >> 8) & 0xf);
+    *q++ = x2;
+    // y1 y2 -> 6 nibbles
+    *q++ = vrect.y >> 4;
+    *q++ = (vrect.y << 4) | ((y2 >> 8) & 0xf);
+    *q++ = y2;
+
+    *q++ = 0x06;
+    // offset1, offset2
+    bytestream_put_be16(&q, offset1);
+    bytestream_put_be16(&q, offset2);
+
+    *q++ = forced ? 0x00 : 0x01; // start command
     *q++ = 0xff; // terminating command
 
     // send stop display command last
@@ -200,8 +400,42 @@ static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size,
     qq = outbuf;
     bytestream_put_be16(&qq, q - outbuf);
 
-    av_log(NULL, AV_LOG_DEBUG, "subtitle_packet size=%td\n", q - outbuf);
-    return q - outbuf;
+    av_log(NULL, AV_LOG_DEBUG, "subtitle_packet size=%"PTRDIFF_SPECIFIER"\n", q - outbuf);
+    ret = q - outbuf;
+
+fail:
+    av_free(vrect_data);
+    return ret;
+}
+
+static int dvdsub_init(AVCodecContext *avctx)
+{
+    DVDSubtitleContext *dvdc = avctx->priv_data;
+    static const uint32_t default_palette[16] = {
+        0x000000, 0x0000FF, 0x00FF00, 0xFF0000,
+        0xFFFF00, 0xFF00FF, 0x00FFFF, 0xFFFFFF,
+        0x808000, 0x8080FF, 0x800080, 0x80FF80,
+        0x008080, 0xFF8080, 0x555555, 0xAAAAAA,
+    };
+    AVBPrint extradata;
+    int i, ret;
+
+    av_assert0(sizeof(dvdc->global_palette) == sizeof(default_palette));
+    memcpy(dvdc->global_palette, default_palette, sizeof(dvdc->global_palette));
+
+    av_bprint_init(&extradata, 0, 1);
+    if (avctx->width && avctx->height)
+        av_bprintf(&extradata, "size: %dx%d\n", avctx->width, avctx->height);
+    av_bprintf(&extradata, "palette:");
+    for (i = 0; i < 16; i++)
+        av_bprintf(&extradata, " %06"PRIx32"%c",
+                   dvdc->global_palette[i] & 0xFFFFFF, i < 15 ? ',' : '\n');
+
+    ret = avpriv_bprint_to_extradata(avctx, &extradata);
+    if (ret < 0)
+        return ret;
+
+    return 0;
 }
 
 static int dvdsub_encode(AVCodecContext *avctx,
@@ -211,14 +445,31 @@ static int dvdsub_encode(AVCodecContext *avctx,
     //DVDSubtitleContext *s = avctx->priv_data;
     int ret;
 
-    ret = encode_dvd_subtitles(buf, buf_size, sub);
+    ret = encode_dvd_subtitles(avctx, buf, buf_size, sub);
     return ret;
 }
 
+#define OFFSET(x) offsetof(DVDSubtitleContext, x)
+#define SE AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    {"even_rows_fix", "Make number of rows even (workaround for some players)", OFFSET(even_rows_fix), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, SE},
+    { NULL },
+};
+
+static const AVClass dvdsubenc_class = {
+    .class_name = "VOBSUB subtitle encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_dvdsub_encoder = {
     .name           = "dvdsub",
     .long_name      = NULL_IF_CONFIG_SMALL("DVD subtitles"),
     .type           = AVMEDIA_TYPE_SUBTITLE,
     .id             = AV_CODEC_ID_DVD_SUBTITLE,
+    .init           = dvdsub_init,
     .encode_sub     = dvdsub_encode,
+    .priv_class     = &dvdsubenc_class,
+    .priv_data_size = sizeof(DVDSubtitleContext),
 };
diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c
index 74e29d8..ad98a9c 100644
--- a/libavcodec/dvenc.c
+++ b/libavcodec/dvenc.c
@@ -2,20 +2,20 @@
  * DV encoder
  * Copyright (c) 2003 Roman Shaposhnik
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -53,6 +53,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
         ff_dv_print_profiles(avctx, AV_LOG_ERROR);
         return AVERROR(EINVAL);
     }
+    if (avctx->height > 576) {
+        av_log(avctx, AV_LOG_ERROR, "DVCPRO HD encoding is not supported.\n");
+        return AVERROR_PATCHWELCOME;
+    }
     ret = ff_dv_init_dynamic_tables(s, s->sys);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error initializing work tables.\n");
@@ -65,6 +69,9 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx)
 
     dv_vlc_map_tableinit();
 
+    memset(&fdsp,0, sizeof(fdsp));
+    memset(&mecc,0, sizeof(mecc));
+    memset(&pdsp,0, sizeof(pdsp));
     ff_fdctdsp_init(&fdsp, avctx);
     ff_me_cmp_init(&mecc, avctx);
     ff_pixblockdsp_init(&pdsp, avctx);
@@ -240,7 +247,7 @@ static av_always_inline int dv_init_enc_block(EncBlockInfo* bi, uint8_t *data, i
        method suggested in SMPTE 314M Table 22, and an improved
        method. The SMPTE method is very conservative; it assigns class
        3 (i.e. severe quantization) to any block where the largest AC
-       component is greater than 36. Libav's DV encoder tracks AC bit
+       component is greater than 36. FFmpeg's DV encoder tracks AC bit
        consumption precisely, so there is no need to bias most blocks
        towards strongly lossy compression. Instead, we assign class 2
        to most blocks, and use class 3 only when strictly necessary
@@ -248,13 +255,13 @@ static av_always_inline int dv_init_enc_block(EncBlockInfo* bi, uint8_t *data, i
 
 #if 0 /* SMPTE spec method */
     static const int classes[] = {12, 24, 36, 0xffff};
-#else /* improved Libav method */
+#else /* improved FFmpeg method */
     static const int classes[] = {-1, -1, 255, 0xffff};
 #endif
     int max  = classes[0];
     int prev = 0;
 
-    assert((((int)blk) & 15) == 0);
+    av_assert2((((int)blk) & 15) == 0);
 
     bi->area_q[0] = bi->area_q[1] = bi->area_q[2] = bi->area_q[3] = 0;
     bi->partial_bit_count = 0;
@@ -283,7 +290,7 @@ static av_always_inline int dv_init_enc_block(EncBlockInfo* bi, uint8_t *data, i
 
           if (level + 15 > 30U) {
               bi->sign[i] = (level >> 31) & 1;
-              /* weight it and and shift down into range, adding for rounding */
+              /* weight it and shift down into range, adding for rounding */
               /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT
                  AND the 2x doubling of the weights */
               level = (FFABS(level) * weight[i] + (1 << (dv_weight_bits+3))) >> (dv_weight_bits+4);
@@ -345,7 +352,7 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
                     b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
                     b->area_q[a]++;
                     prev = b->prev[a];
-                    assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]);
+                    av_assert2(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]);
                     for (k = b->next[prev] ; k < mb_area_start[a+1]; k = b->next[k]) {
                        b->mb[k] >>= 1;
                        if (b->mb[k]) {
@@ -355,11 +362,11 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
                            if (b->next[k] >= mb_area_start[a+1] && b->next[k]<64){
                                 for (a2 = a + 1; b->next[k] >= mb_area_start[a2+1]; a2++)
                                     b->prev[a2] = prev;
-                                assert(a2 < 4);
-                                assert(b->mb[b->next[k]]);
+                                av_assert2(a2 < 4);
+                                av_assert2(b->mb[b->next[k]]);
                                 b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]])
                                                   -dv_rl2vlc_size(b->next[k] -    k - 1, b->mb[b->next[k]]);
-                                assert(b->prev[a2] == k && (a2 + 1 >= 4 || b->prev[a2+1] != k));
+                                av_assert2(b->prev[a2] == k && (a2 + 1 >= 4 || b->prev[a2+1] != k));
                                 b->prev[a2] = prev;
                            }
                            b->next[prev] = b->next[k];
@@ -546,6 +553,7 @@ static inline int dv_write_pack(enum dv_pack_type pack_id, DVVideoContext *c,
      *      compression scheme (if any).
      */
     int apt   = (c->sys->pix_fmt == AV_PIX_FMT_YUV420P ? 0 : 1);
+    int fs    = c->frame->top_field_first ? 0x00 : 0x40;
 
     uint8_t aspect = 0;
     if ((int)(av_q2d(c->avctx->sample_aspect_ratio) * c->avctx->width / c->avctx->height * 10) >= 17) /* 16:9 */
@@ -584,7 +592,7 @@ static inline int dv_write_pack(enum dv_pack_type pack_id, DVVideoContext *c,
           buf[2] = 0xc8 |     /* reserved -- always b11001xxx */
                    aspect;
           buf[3] = (1 << 7) | /* frame/field flag 1 -- frame, 0 -- field */
-                   (1 << 6) | /* first/second field flag 0 -- field 2, 1 -- field 1 */
+                   fs       | /* first/second field flag 0 -- field 2, 1 -- field 1 */
                    (1 << 5) | /* frame change flag 0 -- same picture as before, 1 -- different */
                    (1 << 4) | /* 1 - interlaced, 0 - noninterlaced */
                    0xc;       /* reserved -- always b1100 */
@@ -687,10 +695,8 @@ static int dvvideo_encode_frame(AVCodecContext *c, AVPacket *pkt,
     DVVideoContext *s = c->priv_data;
     int ret;
 
-    if ((ret = ff_alloc_packet(pkt, s->sys->frame_size)) < 0) {
-        av_log(c, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(c, pkt, s->sys->frame_size)) < 0)
         return ret;
-    }
 
     c->pix_fmt                = s->sys->pix_fmt;
     s->frame                  = frame;
diff --git a/libavcodec/dxa.c b/libavcodec/dxa.c
index fa0677d..0f64b5e 100644
--- a/libavcodec/dxa.c
+++ b/libavcodec/dxa.c
@@ -2,20 +2,20 @@
  * Feeble Files/ScummVM DXA decoder
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,6 +29,7 @@
 
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
+#include "bytestream.h"
 #include "avcodec.h"
 #include "internal.h"
 
@@ -41,6 +42,7 @@ typedef struct DxaDecContext {
     AVFrame *prev;
 
     int dsize;
+#define DECOMP_BUF_PADDING 16
     uint8_t *decomp_buf;
     uint32_t pal[256];
 } DxaDecContext;
@@ -49,13 +51,17 @@ static const int shift1[6] = { 0, 8, 8, 8, 4, 4 };
 static const int shift2[6] = { 0, 0, 8, 4, 0, 4 };
 
 static int decode_13(AVCodecContext *avctx, DxaDecContext *c, uint8_t* dst,
-                     int stride, uint8_t *src, uint8_t *ref)
+                     int stride, uint8_t *src, int srcsize, uint8_t *ref)
 {
     uint8_t *code, *data, *mv, *msk, *tmp, *tmp2;
+    uint8_t *src_end = src + srcsize;
     int i, j, k;
     int type, x, y, d, d2;
     uint32_t mask;
 
+    if (12ULL  + ((avctx->width * avctx->height) >> 4) + AV_RB32(src + 0) + AV_RB32(src + 4) > srcsize)
+        return AVERROR_INVALIDDATA;
+
     code = src  + 12;
     data = code + ((avctx->width * avctx->height) >> 4);
     mv   = data + AV_RB32(src + 0);
@@ -63,6 +69,8 @@ static int decode_13(AVCodecContext *avctx, DxaDecContext *c, uint8_t* dst,
 
     for(j = 0; j < avctx->height; j += 4){
         for(i = 0; i < avctx->width; i += 4){
+            if (data > src_end || mv > src_end || msk > src_end)
+                return AVERROR_INVALIDDATA;
             tmp  = dst + i;
             tmp2 = ref + i;
             type = *code++;
@@ -70,6 +78,11 @@ static int decode_13(AVCodecContext *avctx, DxaDecContext *c, uint8_t* dst,
             case 4: // motion compensation
                 x = (*mv) >> 4;    if(x & 8) x = 8 - x;
                 y = (*mv++) & 0xF; if(y & 8) y = 8 - y;
+                if (i < -x || avctx->width  - i - 4 < x ||
+                    j < -y || avctx->height - j - 4 < y) {
+                    av_log(avctx, AV_LOG_ERROR, "MV %d %d out of bounds\n", x,y);
+                    return AVERROR_INVALIDDATA;
+                }
                 tmp2 += x + y*stride;
             case 0: // skip
             case 5: // skip in method 12
@@ -127,6 +140,11 @@ static int decode_13(AVCodecContext *avctx, DxaDecContext *c, uint8_t* dst,
                     case 0x80: // motion compensation
                         x = (*mv) >> 4;    if(x & 8) x = 8 - x;
                         y = (*mv++) & 0xF; if(y & 8) y = 8 - y;
+                        if (i + 2*(k & 1) < -x || avctx->width  - i - 2*(k & 1) - 2 < x ||
+                            j +   (k & 2) < -y || avctx->height - j -   (k & 2) - 2 < y) {
+                            av_log(avctx, AV_LOG_ERROR, "MV %d %d out of bounds\n", x,y);
+                            return AVERROR_INVALIDDATA;
+                        }
                         tmp2 += x + y*stride;
                     case 0x00: // skip
                         tmp[d + 0         ] = tmp2[0];
@@ -192,35 +210,27 @@ static int decode_13(AVCodecContext *avctx, DxaDecContext *c, uint8_t* dst,
 static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
 {
     AVFrame *frame = data;
-    const uint8_t *buf = avpkt->data;
-    int buf_size = avpkt->size;
     DxaDecContext * const c = avctx->priv_data;
     uint8_t *outptr, *srcptr, *tmpptr;
     unsigned long dsize;
     int i, j, compr, ret;
     int stride;
-    int orig_buf_size = buf_size;
     int pc = 0;
+    GetByteContext gb;
 
-    /* make the palette available on the way out */
-    if(buf[0]=='C' && buf[1]=='M' && buf[2]=='A' && buf[3]=='P'){
-        int r, g, b;
+    bytestream2_init(&gb, avpkt->data, avpkt->size);
 
-        buf += 4;
+    /* make the palette available on the way out */
+    if (bytestream2_peek_le32(&gb) == MKTAG('C','M','A','P')) {
+        bytestream2_skip(&gb, 4);
         for(i = 0; i < 256; i++){
-            r = *buf++;
-            g = *buf++;
-            b = *buf++;
-            c->pal[i] = (r << 16) | (g << 8) | b;
+            c->pal[i] = 0xFFU << 24 | bytestream2_get_be24(&gb);
         }
         pc = 1;
-        buf_size -= 768+4;
     }
 
-    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
     memcpy(frame->data[1], c->pal, AVPALETTE_SIZE);
     frame->palette_has_changed = pc;
 
@@ -229,16 +239,25 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     tmpptr = c->prev->data[0];
     stride = frame->linesize[0];
 
-    if(buf[0]=='N' && buf[1]=='U' && buf[2]=='L' && buf[3]=='L')
+    if (bytestream2_get_le32(&gb) == MKTAG('N','U','L','L'))
         compr = -1;
     else
-        compr = buf[4];
+        compr = bytestream2_get_byte(&gb);
 
     dsize = c->dsize;
-    if((compr != 4 && compr != -1) && uncompress(c->decomp_buf, &dsize, buf + 9, buf_size - 9) != Z_OK){
-        av_log(avctx, AV_LOG_ERROR, "Uncompress failed!\n");
-        return AVERROR_UNKNOWN;
+    if (compr != 4 && compr != -1) {
+        bytestream2_skip(&gb, 4);
+        if (uncompress(c->decomp_buf, &dsize, avpkt->data + bytestream2_tell(&gb),
+                       bytestream2_get_bytes_left(&gb)) != Z_OK) {
+            av_log(avctx, AV_LOG_ERROR, "Uncompress failed!\n");
+            return AVERROR_UNKNOWN;
+        }
+        memset(c->decomp_buf + dsize, 0, DECOMP_BUF_PADDING);
     }
+
+    if (avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(avctx, AV_LOG_DEBUG, "compr:%2d, dsize:%d\n", compr, (int)dsize);
+
     switch(compr){
     case -1:
         frame->key_frame = 0;
@@ -265,14 +284,18 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     case 5:
         if (!tmpptr) {
             av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
-            return AVERROR_INVALIDDATA;
+            if (!(avctx->flags2 & CODEC_FLAG2_SHOW_ALL))
+                return AVERROR_INVALIDDATA;
         }
         frame->key_frame = 0;
         frame->pict_type = AV_PICTURE_TYPE_P;
         for (j = 0; j < avctx->height; j++) {
-            for (i = 0; i < avctx->width; i++)
-                outptr[i] = srcptr[i] ^ tmpptr[i];
-            tmpptr += stride;
+            if(tmpptr){
+                for(i = 0; i < avctx->width; i++)
+                    outptr[i] = srcptr[i] ^ tmpptr[i];
+                tmpptr += stride;
+            }else
+                memcpy(outptr, srcptr, avctx->width);
             outptr += stride;
             srcptr += avctx->width;
         }
@@ -281,10 +304,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     case 13:
         frame->key_frame = 0;
         frame->pict_type = AV_PICTURE_TYPE_P;
-        decode_13(avctx, c, frame->data[0], frame->linesize[0], srcptr, c->prev->data[0]);
+        if (!c->prev->data[0]) {
+            av_log(avctx, AV_LOG_ERROR, "Missing reference frame\n");
+            return AVERROR_INVALIDDATA;
+        }
+        decode_13(avctx, c, frame->data[0], frame->linesize[0], srcptr, dsize, c->prev->data[0]);
         break;
     default:
-        av_log(avctx, AV_LOG_ERROR, "Unknown/unsupported compression type %d\n", buf[4]);
+        av_log(avctx, AV_LOG_ERROR, "Unknown/unsupported compression type %d\n", compr);
         return AVERROR_INVALIDDATA;
     }
 
@@ -295,7 +322,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     *got_frame = 1;
 
     /* always report that the buffer was completely consumed */
-    return orig_buf_size;
+    return avpkt->size;
 }
 
 static av_cold int decode_init(AVCodecContext *avctx)
@@ -309,7 +336,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
     avctx->pix_fmt = AV_PIX_FMT_PAL8;
 
     c->dsize = avctx->width * avctx->height * 2;
-    if((c->decomp_buf = av_malloc(c->dsize)) == NULL) {
+    c->decomp_buf = av_malloc(c->dsize + DECOMP_BUF_PADDING);
+    if (!c->decomp_buf) {
+        av_frame_free(&c->prev);
         av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n");
         return AVERROR(ENOMEM);
     }
diff --git a/libavcodec/dxtory.c b/libavcodec/dxtory.c
index 662cd9f..1a59ae7 100644
--- a/libavcodec/dxtory.c
+++ b/libavcodec/dxtory.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,7 +39,7 @@ static int dxtory_decode_v1_rgb(AVCodecContext *avctx, AVFrame *pic,
     uint8_t *dst;
     int ret;
 
-    if (src_size < avctx->width * avctx->height * bpp) {
+    if (src_size < avctx->width * avctx->height * (int64_t)bpp) {
         av_log(avctx, AV_LOG_ERROR, "packet too small\n");
         return AVERROR_INVALIDDATA;
     }
@@ -65,7 +65,7 @@ static int dxtory_decode_v1_410(AVCodecContext *avctx, AVFrame *pic,
     uint8_t *Y1, *Y2, *Y3, *Y4, *U, *V;
     int ret;
 
-    if (src_size < avctx->width * avctx->height * 18 / 16) {
+    if (src_size < avctx->width * avctx->height * 9L / 8) {
         av_log(avctx, AV_LOG_ERROR, "packet too small\n");
         return AVERROR_INVALIDDATA;
     }
@@ -108,7 +108,7 @@ static int dxtory_decode_v1_420(AVCodecContext *avctx, AVFrame *pic,
     uint8_t *Y1, *Y2, *U, *V;
     int ret;
 
-    if (src_size < avctx->width * avctx->height * 3 / 2) {
+    if (src_size < avctx->width * avctx->height * 3L / 2) {
         av_log(avctx, AV_LOG_ERROR, "packet too small\n");
         return AVERROR_INVALIDDATA;
     }
@@ -145,7 +145,7 @@ static int dxtory_decode_v1_444(AVCodecContext *avctx, AVFrame *pic,
     uint8_t *Y, *U, *V;
     int ret;
 
-    if (src_size < avctx->width * avctx->height * 3) {
+    if (src_size < avctx->width * avctx->height * 3L) {
         av_log(avctx, AV_LOG_ERROR, "packet too small\n");
         return AVERROR_INVALIDDATA;
     }
@@ -171,9 +171,9 @@ static int dxtory_decode_v1_444(AVCodecContext *avctx, AVFrame *pic,
     return 0;
 }
 
-const uint8_t def_lru[8] = { 0x00, 0x20, 0x40, 0x60, 0x80, 0xA0, 0xC0, 0xFF };
-const uint8_t def_lru_555[8] = { 0x00, 0x08, 0x10, 0x18, 0x1F };
-const uint8_t def_lru_565[8] = { 0x00, 0x08, 0x10, 0x20, 0x30, 0x3F };
+static const uint8_t def_lru[8] = { 0x00, 0x20, 0x40, 0x60, 0x80, 0xA0, 0xC0, 0xFF };
+static const uint8_t def_lru_555[8] = { 0x00, 0x08, 0x10, 0x18, 0x1F };
+static const uint8_t def_lru_565[8] = { 0x00, 0x08, 0x10, 0x20, 0x30, 0x3F };
 
 static inline uint8_t decode_sym(GetBitContext *gb, uint8_t lru[8])
 {
diff --git a/libavcodec/dxva2.c b/libavcodec/dxva2.c
index 9ee22c8..c1c7681 100644
--- a/libavcodec/dxva2.c
+++ b/libavcodec/dxva2.c
@@ -3,20 +3,20 @@
  *
  * copyright (c) 2010 Laurent Aimar
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
index 78939be..2639d89 100644
--- a/libavcodec/dxva2.h
+++ b/libavcodec/dxva2.h
@@ -3,20 +3,20 @@
  *
  * copyright (c) 2009 Laurent Aimar
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,7 +29,14 @@
  * Public libavcodec DXVA2 header.
  */
 
+#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0600
+#undef _WIN32_WINNT
+#endif
+
+#if !defined(_WIN32_WINNT)
 #define _WIN32_WINNT 0x0600
+#endif
+
 #include <stdint.h>
 #include <d3d9.h>
 #include <dxva2api.h>
@@ -46,7 +53,7 @@
 
 /**
  * This structure is used to provides the necessary configurations and data
- * to the DXVA2 Libav HWAccel implementation.
+ * to the DXVA2 FFmpeg HWAccel implementation.
  *
  * The application must make it available as AVCodecContext.hwaccel_context.
  */
@@ -77,7 +84,7 @@ struct dxva_context {
     uint64_t workaround;
 
     /**
-     * Private to the Libav AVHWAccel implementation
+     * Private to the FFmpeg AVHWAccel implementation
      */
     unsigned report_id;
 };
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index fa205c4..1174188 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -3,20 +3,20 @@
  *
  * copyright (c) 2009 Laurent Aimar
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -96,7 +96,7 @@ static void fill_picture_parameters(struct dxva_context *ctx, const H264Context
                                         ((h->sps.mb_aff &&
                                         (h->picture_structure == PICT_FRAME)) <<  1) |
                                         (h->sps.residual_color_transform_flag <<  2) |
-                                        /* sp_for_switch_flag (not implemented by Libav) */
+                                        /* sp_for_switch_flag (not implemented by FFmpeg) */
                                         (0                                    <<  3) |
                                         (h->sps.chroma_format_idc             <<  4) |
                                         ((h->nal_ref_idc != 0)                <<  6) |
@@ -152,8 +152,8 @@ static void fill_picture_parameters(struct dxva_context *ctx, const H264Context
     pp->deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present;
     pp->redundant_pic_cnt_present_flag= h->pps.redundant_pic_cnt_present;
     pp->Reserved8BitsB                = 0;
-    pp->slice_group_change_rate_minus1= 0;  /* XXX not implemented by Libav */
-    //pp->SliceGroupMap[810];               /* XXX not implemented by Libav */
+    pp->slice_group_change_rate_minus1= 0;  /* XXX not implemented by FFmpeg */
+    //pp->SliceGroupMap[810];               /* XXX not implemented by FFmpeg */
 }
 
 static void fill_scaling_lists(struct dxva_context *ctx, const H264Context *h, DXVA_Qmatrix_H264 *qm)
@@ -274,7 +274,7 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
             }
         }
     }
-    slice->slice_qs_delta    = 0; /* XXX not implemented by Libav */
+    slice->slice_qs_delta    = 0; /* XXX not implemented by FFmpeg */
     slice->slice_qp_delta    = h->qscale - h->pps.init_qp;
     slice->redundant_pic_cnt = h->redundant_pic_count;
     if (h->slice_type == AV_PICTURE_TYPE_B)
@@ -307,7 +307,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx,
     /* Create an annex B bitstream buffer with only slice NAL and finalize slice */
     if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx->decoder,
                                                DXVA2_BitStreamDateBufferType,
-                                               &dxva_data, &dxva_size)))
+                                               (void **)&dxva_data, &dxva_size)))
         return -1;
     current = dxva_data;
     end = dxva_data + dxva_size;
diff --git a/libavcodec/dxva2_internal.h b/libavcodec/dxva2_internal.h
index f35a076..427c1ff 100644
--- a/libavcodec/dxva2_internal.h
+++ b/libavcodec/dxva2_internal.h
@@ -3,27 +3,26 @@
  *
  * copyright (c) 2010 Laurent Aimar
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef AVCODEC_DXVA_INTERNAL_H
 #define AVCODEC_DXVA_INTERNAL_H
 
-#define _WIN32_WINNT 0x0600
 #define COBJMACROS
 
 #include "config.h"
diff --git a/libavcodec/dxva2_mpeg2.c b/libavcodec/dxva2_mpeg2.c
index f6ef5e5..ee832bc 100644
--- a/libavcodec/dxva2_mpeg2.c
+++ b/libavcodec/dxva2_mpeg2.c
@@ -3,20 +3,20 @@
  *
  * copyright (c) 2010 Laurent Aimar
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -111,10 +111,10 @@ static void fill_quantization_matrices(AVCodecContext *avctx,
         qm->bNewQmatrix[i] = 1;
     for (i = 0; i < 64; i++) {
         int n = s->idsp.idct_permutation[ff_zigzag_direct[i]];
-        qm->Qmatrix[0][i] = s->intra_matrix[n];;
-        qm->Qmatrix[1][i] = s->inter_matrix[n];;
-        qm->Qmatrix[2][i] = s->chroma_intra_matrix[n];;
-        qm->Qmatrix[3][i] = s->chroma_inter_matrix[n];;
+        qm->Qmatrix[0][i] = s->intra_matrix[n];
+        qm->Qmatrix[1][i] = s->inter_matrix[n];
+        qm->Qmatrix[2][i] = s->chroma_intra_matrix[n];
+        qm->Qmatrix[3][i] = s->chroma_inter_matrix[n];
     }
 }
 
@@ -141,8 +141,7 @@ static void fill_slice(AVCodecContext *avctx,
     init_get_bits(&gb, &buffer[4], 8 * (size - 4));
 
     slice->wQuantizerScaleCode = get_bits(&gb, 5);
-    while (get_bits1(&gb))
-        skip_bits(&gb, 8);
+    skip_1stop_8data_bits(&gb);
 
     slice->wMBbitOffset        = 4 * 8 + get_bits_count(&gb);
 }
@@ -162,7 +161,7 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx,
 
     if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx->decoder,
                                               DXVA2_BitStreamDateBufferType,
-                                              &dxva_data, &dxva_size)))
+                                              (void **)&dxva_data, &dxva_size)))
         return -1;
     current = dxva_data;
     end = dxva_data + dxva_size;
diff --git a/libavcodec/dxva2_vc1.c b/libavcodec/dxva2_vc1.c
index 4f4dd17..b2b5ced 100644
--- a/libavcodec/dxva2_vc1.c
+++ b/libavcodec/dxva2_vc1.c
@@ -3,20 +3,20 @@
  *
  * copyright (c) 2010 Laurent Aimar
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,6 +39,15 @@ static void fill_picture_parameters(AVCodecContext *avctx,
 {
     const MpegEncContext *s = &v->s;
     const Picture *current_picture = s->current_picture_ptr;
+    int intcomp = 0;
+
+    // determine if intensity compensation is needed
+    if (s->pict_type == AV_PICTURE_TYPE_P) {
+      if ((v->fcm == ILACE_FRAME && v->intcomp) || (v->fcm != ILACE_FRAME && v->mv_mode == MV_PMODE_INTENSITY_COMP)) {
+        if (v->lumscale != 32 || v->lumshift != 0 || (s->picture_structure != PICT_FRAME && (v->lumscale2 != 32 || v->lumshift2 != 0)))
+          intcomp = 1;
+      }
+    }
 
     memset(pp, 0, sizeof(*pp));
     pp->wDecodedPictureIndex    =
@@ -69,13 +78,13 @@ static void fill_picture_parameters(AVCodecContext *avctx,
         pp->bPicStructure      |= 0x01;
     if (s->picture_structure & PICT_BOTTOM_FIELD)
         pp->bPicStructure      |= 0x02;
-    pp->bSecondField            = v->interlace && v->fcm != ILACE_FIELD && !s->first_field;
+    pp->bSecondField            = v->interlace && v->fcm == ILACE_FIELD && v->second_field;
     pp->bPicIntra               = s->pict_type == AV_PICTURE_TYPE_I || v->bi_type;
     pp->bPicBackwardPrediction  = s->pict_type == AV_PICTURE_TYPE_B && !v->bi_type;
     pp->bBidirectionalAveragingMode = (1                                           << 7) |
                                       ((ctx->cfg->ConfigIntraResidUnsigned != 0)   << 6) |
                                       ((ctx->cfg->ConfigResidDiffAccelerator != 0) << 5) |
-                                      ((v->lumscale != 32 || v->lumshift != 0)     << 4) |
+                                      (intcomp                                     << 4) |
                                       ((v->profile == PROFILE_ADVANCED)            << 3);
     pp->bMVprecisionAndChromaRelation = ((v->mv_mode == MV_PMODE_1MV_HPEL_BILIN) << 3) |
                                         (1                                       << 2) |
@@ -123,15 +132,25 @@ static void fill_picture_parameters(AVCodecContext *avctx,
                                   (v->range_mapuv_flag << 3) |
                                   (v->range_mapuv          );
     pp->bPicBinPB               = 0;
-    pp->bMV_RPS                 = 0;
-    pp->bReservedBits           = 0;
+    pp->bMV_RPS                 = (v->fcm == ILACE_FIELD && pp->bPicBackwardPrediction) ? v->refdist + 9 : 0;
+    pp->bReservedBits           = v->pq;
     if (s->picture_structure == PICT_FRAME) {
-        pp->wBitstreamFcodes        = v->lumscale;
-        pp->wBitstreamPCEelements   = v->lumshift;
+        if (intcomp) {
+            pp->wBitstreamFcodes      = v->lumscale;
+            pp->wBitstreamPCEelements = v->lumshift;
+        } else {
+            pp->wBitstreamFcodes      = 32;
+            pp->wBitstreamPCEelements = 0;
+        }
     } else {
         /* Syntax: (top_field_param << 8) | bottom_field_param */
-        pp->wBitstreamFcodes        = (v->lumscale << 8) | v->lumscale;
-        pp->wBitstreamPCEelements   = (v->lumshift << 8) | v->lumshift;
+        if (intcomp) {
+            pp->wBitstreamFcodes      = (v->lumscale << 8) | v->lumscale2;
+            pp->wBitstreamPCEelements = (v->lumshift << 8) | v->lumshift2;
+        } else {
+            pp->wBitstreamFcodes      = (32 << 8) | 32;
+            pp->wBitstreamPCEelements = 0;
+        }
     }
     pp->bBitstreamConcealmentNeed   = 0;
     pp->bBitstreamConcealmentMethod = 0;
@@ -149,8 +168,8 @@ static void fill_slice(AVCodecContext *avctx, DXVA_SliceInfo *slice,
     slice->dwSliceBitsInBuffer = 8 * size;
     slice->dwSliceDataLocation = position;
     slice->bStartCodeBitOffset = 0;
-    slice->bReservedBits       = 0;
-    slice->wMBbitOffset        = get_bits_count(&s->gb);
+    slice->bReservedBits       = (s->pict_type == AV_PICTURE_TYPE_B && !v->bi_type) ? v->bfraction_lut_index + 9 : 0;
+    slice->wMBbitOffset        = v->p_frame_skipped ? 0xffff : get_bits_count(&s->gb) + (avctx->codec_id == AV_CODEC_ID_VC1 ? 32 : 0);
     slice->wNumberMBsInSlice   = s->mb_width * s->mb_height; /* XXX We assume 1 slice */
     slice->wQuantizerScaleCode = v->pq;
     slice->wBadSliceChopping   = 0;
@@ -179,13 +198,16 @@ static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx,
 
     if (FAILED(IDirectXVideoDecoder_GetBuffer(ctx->decoder,
                                               DXVA2_BitStreamDateBufferType,
-                                              &dxva_data, &dxva_size)))
+                                              (void **)&dxva_data, &dxva_size)))
         return -1;
 
     result = data_size <= dxva_size ? 0 : -1;
     if (!result) {
-        if (start_code_size > 0)
+        if (start_code_size > 0) {
             memcpy(dxva_data, start_code, start_code_size);
+            if (v->second_field)
+                dxva_data[3] = 0x0c;
+        }
         memcpy(dxva_data + start_code_size,
                ctx_pic->bitstream + slice->dwSliceDataLocation, slice_size);
         if (padding > 0)
diff --git a/libavcodec/eac3_data.c b/libavcodec/eac3_data.c
index b0416f3..b159e16 100644
--- a/libavcodec/eac3_data.c
+++ b/libavcodec/eac3_data.c
@@ -2,20 +2,20 @@
  * E-AC-3 tables
  * Copyright (c) 2007 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/eac3_data.h b/libavcodec/eac3_data.h
index 4d88ce0..10a67f1 100644
--- a/libavcodec/eac3_data.h
+++ b/libavcodec/eac3_data.h
@@ -2,20 +2,20 @@
  * E-AC-3 tables
  * Copyright (c) 2007 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/eac3dec.c b/libavcodec/eac3dec.c
index b9d079c..8e931fd 100644
--- a/libavcodec/eac3dec.c
+++ b/libavcodec/eac3dec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Bartlomiej Wolowiec <bartek.wolowiec@gmail.com>
  * Copyright (c) 2008 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c
index 3aa2d54..e1d61f6 100644
--- a/libavcodec/eac3enc.c
+++ b/libavcodec/eac3enc.c
@@ -2,20 +2,20 @@
  * E-AC-3 encoder
  * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,9 +34,13 @@
 
 #define AC3ENC_TYPE AC3ENC_TYPE_EAC3
 #include "ac3enc_opts_template.c"
-static const AVClass eac3enc_class = { "E-AC-3 Encoder", av_default_item_name,
-                                       ac3_options, LIBAVUTIL_VERSION_INT };
 
+static const AVClass eac3enc_class = {
+    .class_name = "E-AC-3 Encoder",
+    .item_name  = av_default_item_name,
+    .option     = ac3_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 /**
  * LUT for finding a matching frame exponent strategy index from a set of
diff --git a/libavcodec/eac3enc.h b/libavcodec/eac3enc.h
index a92a24c..7d61559 100644
--- a/libavcodec/eac3enc.h
+++ b/libavcodec/eac3enc.h
@@ -2,20 +2,20 @@
  * E-AC-3 encoder
  * Copyright (c) 2011 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/eacmv.c b/libavcodec/eacmv.c
index 6adadb1..4a1af8c 100644
--- a/libavcodec/eacmv.c
+++ b/libavcodec/eacmv.c
@@ -2,20 +2,20 @@
  * Electronic Arts CMV Video Decoder
  * Copyright (c) 2007-2008 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
@@ -44,6 +44,7 @@ typedef struct CmvContext {
 
 static av_cold int cmv_decode_init(AVCodecContext *avctx){
     CmvContext *s = avctx->priv_data;
+
     s->avctx = avctx;
     avctx->pix_fmt = AV_PIX_FMT_PAL8;
 
@@ -160,7 +161,7 @@ static int cmv_process_header(CmvContext *s, const uint8_t *buf, const uint8_t *
 
     buf += 16;
     for (i=pal_start; i<pal_start+pal_count && i<AVPALETTE_COUNT && buf_end - buf >= 3; i++) {
-        s->palette[i] = AV_RB24(buf);
+        s->palette[i] = 0xFFU << 24 | AV_RB24(buf);
         buf += 3;
     }
 
@@ -185,19 +186,20 @@ static int cmv_decode_frame(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
 
     if (AV_RL32(buf)==MVIh_TAG||AV_RB32(buf)==MVIh_TAG) {
+        unsigned size = AV_RL32(buf + 4);
         ret = cmv_process_header(s, buf+EA_PREAMBLE_SIZE, buf_end);
         if (ret < 0)
             return ret;
-        return buf_size;
+        if (size > buf_end - buf - EA_PREAMBLE_SIZE)
+            return -1;
+        buf += size;
     }
 
     if (av_image_check_size(s->width, s->height, 0, s->avctx))
         return -1;
 
-    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
 
     memcpy(frame->data[1], s->palette, AVPALETTE_SIZE);
 
diff --git a/libavcodec/eaidct.c b/libavcodec/eaidct.c
index 5b2db44..e4840f2 100644
--- a/libavcodec/eaidct.c
+++ b/libavcodec/eaidct.c
@@ -2,20 +2,20 @@
  * Electronic Arts TGQ/TQI/MAD IDCT algorithm
  * Copyright (c) 2007-2008 Peter Ross <pross@xvid.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/eaidct.h b/libavcodec/eaidct.h
index e78de04..6b9ec1c 100644
--- a/libavcodec/eaidct.h
+++ b/libavcodec/eaidct.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/eamad.c b/libavcodec/eamad.c
index ceccfe4..711363c 100644
--- a/libavcodec/eamad.c
+++ b/libavcodec/eamad.c
@@ -2,20 +2,20 @@
  * Electronic Arts Madcow Video Decoder
  * Copyright (c) 2007-2009 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
@@ -94,15 +94,21 @@ static inline void comp_block(MadContext *t, AVFrame *frame,
                               int j, int mv_x, int mv_y, int add)
 {
     if (j < 4) {
+        unsigned offset = (mb_y*16 + ((j&2)<<2) + mv_y)*t->last_frame->linesize[0] + mb_x*16 + ((j&1)<<3) + mv_x;
+        if (offset >= (t->avctx->height - 7) * t->last_frame->linesize[0] - 7)
+            return;
         comp(frame->data[0] + (mb_y*16 + ((j&2)<<2))*frame->linesize[0] + mb_x*16 + ((j&1)<<3),
              frame->linesize[0],
-             t->last_frame->data[0] + (mb_y*16 + ((j&2)<<2) + mv_y)*t->last_frame->linesize[0] + mb_x*16 + ((j&1)<<3) + mv_x,
+             t->last_frame->data[0] + offset,
              t->last_frame->linesize[0], add);
     } else if (!(t->avctx->flags & CODEC_FLAG_GRAY)) {
         int index = j - 3;
+        unsigned offset = (mb_y * 8 + (mv_y/2))*t->last_frame->linesize[index] + mb_x * 8 + (mv_x/2);
+        if (offset >= (t->avctx->height/2 - 7) * t->last_frame->linesize[index] - 7)
+            return;
         comp(frame->data[index] + (mb_y*8)*frame->linesize[index] + mb_x * 8,
              frame->linesize[index],
-             t->last_frame->data[index] + (mb_y * 8 + (mv_y/2))*t->last_frame->linesize[index] + mb_x * 8 + (mv_x/2),
+             t->last_frame->data[index] + offset,
              t->last_frame->linesize[index], add);
     }
 }
@@ -122,7 +128,7 @@ static inline void idct_put(MadContext *t, AVFrame *frame, int16_t *block,
     }
 }
 
-static inline void decode_block_intra(MadContext *s, int16_t * block)
+static inline int decode_block_intra(MadContext *s, int16_t * block)
 {
     int level, i, j, run;
     RLTable *rl = &ff_rl_mpeg1;
@@ -172,13 +178,14 @@ static inline void decode_block_intra(MadContext *s, int16_t * block)
             }
             if (i > 63) {
                 av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
-                return;
+                return -1;
             }
 
             block[j] = level;
         }
         CLOSE_READER(re, &s->gb);
     }
+    return 0;
 }
 
 static int decode_motion(GetBitContext *gb)
@@ -192,10 +199,10 @@ static int decode_motion(GetBitContext *gb)
     return value;
 }
 
-static void decode_mb(MadContext *s, AVFrame *frame, int inter)
+static int decode_mb(MadContext *s, AVFrame *frame, int inter)
 {
     int mv_map = 0;
-    int mv_x, mv_y;
+    int av_uninit(mv_x), av_uninit(mv_y);
     int j;
 
     if (inter) {
@@ -204,21 +211,22 @@ static void decode_mb(MadContext *s, AVFrame *frame, int inter)
             mv_map = v ? get_bits(&s->gb, 6) : 63;
             mv_x = decode_motion(&s->gb);
             mv_y = decode_motion(&s->gb);
-        } else {
-            mv_map = 0;
         }
     }
 
     for (j=0; j<6; j++) {
         if (mv_map & (1<<j)) {  // mv_x and mv_y are guarded by mv_map
             int add = 2*decode_motion(&s->gb);
-            comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add);
+            if (s->last_frame->data[0])
+                comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add);
         } else {
             s->bdsp.clear_block(s->block);
-            decode_block_intra(s, s->block);
+            if(decode_block_intra(s, s->block) < 0)
+                return -1;
             idct_put(s, frame, s->block, s->mb_x, s->mb_y, j);
         }
     }
+    return 0;
 }
 
 static void calc_quant_matrix(MadContext *s, int qscale)
@@ -263,16 +271,21 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
     }
 
+    if (width < 16 || height < 16) {
+        av_log(avctx, AV_LOG_ERROR, "Dimensions too small\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     if (avctx->width != width || avctx->height != height) {
         av_frame_unref(s->last_frame);
+        if((width * height)/2048*7 > bytestream2_get_bytes_left(&gb))
+            return AVERROR_INVALIDDATA;
         if ((ret = ff_set_dimensions(avctx, width, height)) < 0)
             return ret;
     }
 
-    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
 
     if (inter && !s->last_frame->data[0]) {
         av_log(avctx, AV_LOG_WARNING, "Missing reference frame.\n");
@@ -293,11 +306,13 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR(ENOMEM);
     s->bbdsp.bswap16_buf(s->bitstream_buf, (const uint16_t *)(buf + bytestream2_tell(&gb)),
                          bytestream2_get_bytes_left(&gb) / 2);
+    memset((uint8_t*)s->bitstream_buf + bytestream2_get_bytes_left(&gb), 0, FF_INPUT_BUFFER_PADDING_SIZE);
     init_get_bits(&s->gb, s->bitstream_buf, 8*(bytestream2_get_bytes_left(&gb)));
 
     for (s->mb_y=0; s->mb_y < (avctx->height+15)/16; s->mb_y++)
         for (s->mb_x=0; s->mb_x < (avctx->width +15)/16; s->mb_x++)
-            decode_mb(s, frame, inter);
+            if(decode_mb(s, frame, inter) < 0)
+                return AVERROR_INVALIDDATA;
 
     *got_frame = 1;
 
diff --git a/libavcodec/eatgq.c b/libavcodec/eatgq.c
index 0e4ba2f..34cb642 100644
--- a/libavcodec/eatgq.c
+++ b/libavcodec/eatgq.c
@@ -2,20 +2,20 @@
  * Electronic Arts TGQ Video Decoder
  * Copyright (c) 2007-2008 Peter Ross <pross@xvid.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
@@ -148,7 +148,7 @@ static void tgq_idct_put_mb_dconly(TgqContext *s, AVFrame *frame,
     }
 }
 
-static void tgq_decode_mb(TgqContext *s, AVFrame *frame, int mb_y, int mb_x)
+static int tgq_decode_mb(TgqContext *s, AVFrame *frame, int mb_y, int mb_x)
 {
     int mode;
     int i;
@@ -157,7 +157,7 @@ static void tgq_decode_mb(TgqContext *s, AVFrame *frame, int mb_y, int mb_x)
     mode = bytestream2_get_byte(&s->gb);
     if (mode > 12) {
         GetBitContext gb;
-        init_get_bits(&gb, s->gb.buffer, FFMIN(s->gb.buffer_end - s->gb.buffer, mode) * 8);
+        init_get_bits8(&gb, s->gb.buffer, FFMIN(bytestream2_get_bytes_left(&s->gb), mode));
         for (i = 0; i < 6; i++)
             tgq_decode_block(s, s->block[i], &gb);
         tgq_idct_put_mb(s, s->block, frame, mb_x, mb_y);
@@ -176,9 +176,11 @@ static void tgq_decode_mb(TgqContext *s, AVFrame *frame, int mb_y, int mb_x)
             }
         } else {
             av_log(s->avctx, AV_LOG_ERROR, "unsupported mb mode %i\n", mode);
+            return -1;
         }
         tgq_idct_put_mb_dconly(s, frame, mb_x, mb_y, dc);
     }
+    return 0;
 }
 
 static void tgq_calculate_qtable(TgqContext *s, int quant)
@@ -201,12 +203,13 @@ static int tgq_decode_frame(AVCodecContext *avctx,
     TgqContext *s      = avctx->priv_data;
     AVFrame *frame     = data;
     int x, y, ret;
-    int big_endian = AV_RL32(&buf[4]) > 0x000FFFFF;
+    int big_endian;
 
     if (buf_size < 16) {
         av_log(avctx, AV_LOG_WARNING, "truncated header\n");
         return AVERROR_INVALIDDATA;
     }
+    big_endian = AV_RL32(&buf[4]) > 0x000FFFFF;
     bytestream2_init(&s->gb, buf + 8, buf_size - 8);
     if (big_endian) {
         s->width  = bytestream2_get_be16u(&s->gb);
@@ -223,16 +226,15 @@ static int tgq_decode_frame(AVCodecContext *avctx,
     tgq_calculate_qtable(s, bytestream2_get_byteu(&s->gb));
     bytestream2_skip(&s->gb, 3);
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     frame->key_frame = 1;
     frame->pict_type = AV_PICTURE_TYPE_I;
 
     for (y = 0; y < FFALIGN(avctx->height, 16) >> 4; y++)
         for (x = 0; x < FFALIGN(avctx->width, 16) >> 4; x++)
-            tgq_decode_mb(s, frame, y, x);
+            if (tgq_decode_mb(s, frame, y, x) < 0)
+                return AVERROR_INVALIDDATA;
 
     *got_frame = 1;
 
diff --git a/libavcodec/eatgv.c b/libavcodec/eatgv.c
index 3bc6506..f204a13 100644
--- a/libavcodec/eatgv.c
+++ b/libavcodec/eatgv.c
@@ -2,20 +2,20 @@
  * Electronic Arts TGV Video Decoder
  * Copyright (c) 2007-2008 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
@@ -81,7 +81,7 @@ static int unpack(const uint8_t *src, const uint8_t *src_end,
     else
         src += 2;
 
-    if (src + 3 > src_end)
+    if (src_end - src < 3)
         return AVERROR_INVALIDDATA;
     size = AV_RB24(src);
     src += 3;
@@ -156,7 +156,7 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame,
     int mvbits;
     const uint8_t *blocks_raw;
 
-    if (buf + 12 > buf_end)
+    if(buf_end - buf < 12)
         return AVERROR_INVALIDDATA;
 
     num_mvs           = AV_RL16(&buf[0]);
@@ -173,7 +173,10 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame,
 
     /* allocate codebook buffers as necessary */
     if (num_mvs > s->num_mvs) {
-        s->mv_codebook = av_realloc(s->mv_codebook, num_mvs*2*sizeof(int));
+        if (av_reallocp_array(&s->mv_codebook, num_mvs, sizeof(*s->mv_codebook))) {
+            s->num_mvs = 0;
+            return AVERROR(ENOMEM);
+        }
         s->num_mvs = num_mvs;
     }
 
@@ -189,7 +192,7 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame,
     /* read motion vectors */
     mvbits = (num_mvs * 2 * 10 + 31) & ~31;
 
-    if (buf + (mvbits >> 3) + 16 * num_blocks_raw + 8 * num_blocks_packed > buf_end)
+    if (buf_end - buf < (mvbits>>3) + 16*num_blocks_raw + 8*num_blocks_packed)
         return AVERROR_INVALIDDATA;
 
     init_get_bits(&gb, buf, mvbits);
@@ -229,8 +232,10 @@ static int tgv_decode_inter(TgvContext *s, AVFrame *frame,
                 int my = y * 4 + s->mv_codebook[vector][1];
 
                 if (mx < 0 || mx + 4 > s->avctx->width ||
-                    my < 0 || my + 4 > s->avctx->height)
+                    my < 0 || my + 4 > s->avctx->height) {
+                    av_log(s->avctx, AV_LOG_ERROR, "MV %d %d out of picture\n", mx, my);
                     continue;
+                }
 
                 src = s->last_frame->data[0] + mx + my * s->last_frame->linesize[0];
                 src_stride = s->last_frame->linesize[0];
@@ -265,12 +270,15 @@ static int tgv_decode_frame(AVCodecContext *avctx,
     AVFrame *frame         = data;
     int chunk_type, ret;
 
+    if (buf_end - buf < EA_PREAMBLE_SIZE)
+        return AVERROR_INVALIDDATA;
+
     chunk_type = AV_RL32(&buf[0]);
     buf       += EA_PREAMBLE_SIZE;
 
     if (chunk_type == kVGT_TAG) {
         int pal_count, i;
-        if (buf + 12 > buf_end) {
+        if(buf_end - buf < 12) {
             av_log(avctx, AV_LOG_WARNING, "truncated header\n");
             return AVERROR_INVALIDDATA;
         }
@@ -286,8 +294,8 @@ static int tgv_decode_frame(AVCodecContext *avctx,
 
         pal_count = AV_RL16(&buf[6]);
         buf += 12;
-        for (i = 0; i < pal_count && i < AVPALETTE_COUNT && buf + 2 < buf_end; i++) {
-            s->palette[i] = AV_RB24(buf);
+        for(i = 0; i < pal_count && i < AVPALETTE_COUNT && buf_end - buf >= 3; i++) {
+            s->palette[i] = 0xFFU << 24 | AV_RB24(buf);
             buf += 3;
         }
     }
@@ -303,7 +311,7 @@ static int tgv_decode_frame(AVCodecContext *avctx,
         frame->pict_type = AV_PICTURE_TYPE_I;
 
         if (!s->frame_buffer &&
-            !(s->frame_buffer = av_malloc(s->width * s->height)))
+            !(s->frame_buffer = av_mallocz(s->width * s->height)))
             return AVERROR(ENOMEM);
 
         if (unpack(buf, buf_end, s->frame_buffer, s->avctx->width, s->avctx->height) < 0) {
diff --git a/libavcodec/eatqi.c b/libavcodec/eatqi.c
index 25882da..864291a 100644
--- a/libavcodec/eatqi.c
+++ b/libavcodec/eatqi.c
@@ -2,20 +2,20 @@
  * Electronic Arts TQI Video Decoder
  * Copyright (c) 2007-2009 Peter Ross <pross@xvid.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
@@ -121,10 +121,8 @@ static int tqi_decode_frame(AVCodecContext *avctx,
     if (ret < 0)
         return ret;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     av_fast_padded_malloc(&t->bitstream_buf, &t->bitstream_buf_size,
                           buf_end - buf);
@@ -139,9 +137,10 @@ static int tqi_decode_frame(AVCodecContext *avctx,
     for (s->mb_x=0; s->mb_x<(avctx->width+15)/16; s->mb_x++)
     {
         if (tqi_decode_mb(s, t->block) < 0)
-            break;
+            goto end;
         tqi_idct_put(t, frame, t->block);
     }
+    end:
 
     *got_frame = 1;
     return buf_size;
diff --git a/libavcodec/elbg.c b/libavcodec/elbg.c
index 0aa8e16..9bbb6d8 100644
--- a/libavcodec/elbg.c
+++ b/libavcodec/elbg.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,7 @@
 
 #include <string.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/lfg.h"
 #include "elbg.h"
@@ -50,7 +51,7 @@ typedef struct{
     int *codebook;
     cell **cells;
     int *utility;
-    int *utility_inc;
+    int64_t *utility_inc;
     int *nearest_cb;
     int *points;
     AVLFG *rand_state;
@@ -107,11 +108,20 @@ static int get_high_utility_cell(elbg_data *elbg)
 {
     int i=0;
     /* Using linear search, do binary if it ever turns to be speed critical */
-    int r = av_lfg_get(elbg->rand_state)%elbg->utility_inc[elbg->numCB-1] + 1;
-    while (elbg->utility_inc[i] < r)
+    uint64_t r;
+
+    if (elbg->utility_inc[elbg->numCB-1] < INT_MAX) {
+        r = av_lfg_get(elbg->rand_state) % (unsigned int)elbg->utility_inc[elbg->numCB-1] + 1;
+    } else {
+        r = av_lfg_get(elbg->rand_state);
+        r = (av_lfg_get(elbg->rand_state) + (r<<32)) % elbg->utility_inc[elbg->numCB-1] + 1;
+    }
+
+    while (elbg->utility_inc[i] < r) {
         i++;
+    }
 
-    assert(elbg->cells[i]);
+    av_assert2(elbg->cells[i]);
 
     return i;
 }
@@ -189,7 +199,7 @@ static void get_new_centroids(elbg_data *elbg, int huc, int *newcentroid_i,
 
 /**
  * Add the points in the low utility cell to its closest cell. Split the high
- * utility cell, putting the separed points in the (now empty) low utility
+ * utility cell, putting the separate points in the (now empty) low utility
  * cell.
  *
  * @param elbg         Internal elbg data
@@ -226,7 +236,8 @@ static void shift_codebook(elbg_data *elbg, int *indexes,
 
 static void evaluate_utility_inc(elbg_data *elbg)
 {
-    int i, inc=0;
+    int i;
+    int64_t inc=0;
 
     for (i=0; i < elbg->numCB; i++) {
         if (elbg->numCB*elbg->utility[i] > elbg->error)
@@ -323,7 +334,7 @@ static void do_shiftings(elbg_data *elbg)
 
 #define BIG_PRIME 433494437LL
 
-void ff_init_elbg(int *points, int dim, int numpoints, int *codebook,
+void avpriv_init_elbg(int *points, int dim, int numpoints, int *codebook,
                   int numCB, int max_steps, int *closest_cb,
                   AVLFG *rand_state)
 {
@@ -332,14 +343,14 @@ void ff_init_elbg(int *points, int dim, int numpoints, int *codebook,
     if (numpoints > 24*numCB) {
         /* ELBG is very costly for a big number of points. So if we have a lot
            of them, get a good initial codebook to save on iterations       */
-        int *temp_points = av_malloc(dim*(numpoints/8)*sizeof(int));
+        int *temp_points = av_malloc_array(dim, (numpoints/8)*sizeof(int));
         for (i=0; i<numpoints/8; i++) {
             k = (i*BIG_PRIME) % numpoints;
             memcpy(temp_points + i*dim, points + k*dim, dim*sizeof(int));
         }
 
-        ff_init_elbg(temp_points, dim, numpoints/8, codebook, numCB, 2*max_steps, closest_cb, rand_state);
-        ff_do_elbg(temp_points, dim, numpoints/8, codebook, numCB, 2*max_steps, closest_cb, rand_state);
+        avpriv_init_elbg(temp_points, dim, numpoints/8, codebook, numCB, 2*max_steps, closest_cb, rand_state);
+        avpriv_do_elbg(temp_points, dim, numpoints/8, codebook, numCB, 2*max_steps, closest_cb, rand_state);
 
         av_free(temp_points);
 
@@ -350,7 +361,7 @@ void ff_init_elbg(int *points, int dim, int numpoints, int *codebook,
 
 }
 
-void ff_do_elbg(int *points, int dim, int numpoints, int *codebook,
+void avpriv_do_elbg(int *points, int dim, int numpoints, int *codebook,
                 int numCB, int max_steps, int *closest_cb,
                 AVLFG *rand_state)
 {
@@ -358,9 +369,9 @@ void ff_do_elbg(int *points, int dim, int numpoints, int *codebook,
     elbg_data elbg_d;
     elbg_data *elbg = &elbg_d;
     int i, j, k, last_error, steps=0;
-    int *dist_cb = av_malloc(numpoints*sizeof(int));
-    int *size_part = av_malloc(numCB*sizeof(int));
-    cell *list_buffer = av_malloc(numpoints*sizeof(cell));
+    int *dist_cb = av_malloc_array(numpoints, sizeof(int));
+    int *size_part = av_malloc_array(numCB, sizeof(int));
+    cell *list_buffer = av_malloc_array(numpoints, sizeof(cell));
     cell *free_cells;
     int best_dist, best_idx = 0;
 
@@ -368,12 +379,12 @@ void ff_do_elbg(int *points, int dim, int numpoints, int *codebook,
     elbg->dim = dim;
     elbg->numCB = numCB;
     elbg->codebook = codebook;
-    elbg->cells = av_malloc(numCB*sizeof(cell *));
-    elbg->utility = av_malloc(numCB*sizeof(int));
+    elbg->cells = av_malloc_array(numCB, sizeof(cell *));
+    elbg->utility = av_malloc_array(numCB, sizeof(int));
     elbg->nearest_cb = closest_cb;
     elbg->points = points;
-    elbg->utility_inc = av_malloc(numCB*sizeof(int));
-    elbg->scratchbuf = av_malloc(5*dim*sizeof(int));
+    elbg->utility_inc = av_malloc_array(numCB, sizeof(*elbg->utility_inc));
+    elbg->scratchbuf = av_malloc_array(5*dim, sizeof(int));
 
     elbg->rand_state = rand_state;
 
diff --git a/libavcodec/elbg.h b/libavcodec/elbg.h
index b8ea489..22fb53f 100644
--- a/libavcodec/elbg.h
+++ b/libavcodec/elbg.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2007 Vitor Sessak <vitor1001@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,7 +37,7 @@
  * @param closest_cb Return the closest codebook to each point. Must be allocated.
  * @param rand_state A random number generator state. Should be already initialized by av_lfg_init().
  */
-void ff_do_elbg(int *points, int dim, int numpoints, int *codebook,
+void avpriv_do_elbg(int *points, int dim, int numpoints, int *codebook,
                 int numCB, int num_steps, int *closest_cb,
                 AVLFG *rand_state);
 
@@ -45,10 +45,10 @@ void ff_do_elbg(int *points, int dim, int numpoints, int *codebook,
  * Initialize the **codebook vector for the elbg algorithm. If you have already
  * a codebook and you want to refine it, you shouldn't call this function.
  * If numpoints < 8*numCB this function fills **codebook with random numbers.
- * If not, it calls ff_do_elbg for a (smaller) random sample of the points in
- * **points. Get the same parameters as ff_do_elbg.
+ * If not, it calls avpriv_do_elbg for a (smaller) random sample of the points in
+ * **points. Get the same parameters as avpriv_do_elbg.
  */
-void ff_init_elbg(int *points, int dim, int numpoints, int *codebook,
+void avpriv_init_elbg(int *points, int dim, int numpoints, int *codebook,
                   int numCB, int num_steps, int *closest_cb,
                   AVLFG *rand_state);
 
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index b41474a..2ba4a68 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,7 +43,7 @@
 static void set_mv_strides(ERContext *s, int *mv_step, int *stride)
 {
     if (s->avctx->codec_id == AV_CODEC_ID_H264) {
-        assert(s->quarter_sample);
+        av_assert0(s->quarter_sample);
         *mv_step = 4;
         *stride  = s->mb_width * 4;
     } else {
@@ -136,11 +136,73 @@ static void guess_dc(ERContext *s, int16_t *dc, int w,
                      int h, int stride, int is_luma)
 {
     int b_x, b_y;
+    int16_t  (*col )[4] = av_malloc_array(stride, h*sizeof( int16_t)*4);
+    uint32_t (*dist)[4] = av_malloc_array(stride, h*sizeof(uint32_t)*4);
+
+    if(!col || !dist) {
+        av_log(s->avctx, AV_LOG_ERROR, "guess_dc() is out of memory\n");
+        goto fail;
+    }
+
+    for(b_y=0; b_y<h; b_y++){
+        int color= 1024;
+        int distance= -1;
+        for(b_x=0; b_x<w; b_x++){
+            int mb_index_j= (b_x>>is_luma) + (b_y>>is_luma)*s->mb_stride;
+            int error_j= s->error_status_table[mb_index_j];
+            int intra_j = IS_INTRA(s->cur_pic.mb_type[mb_index_j]);
+            if(intra_j==0 || !(error_j&ER_DC_ERROR)){
+                color= dc[b_x + b_y*stride];
+                distance= b_x;
+            }
+            col [b_x + b_y*stride][1]= color;
+            dist[b_x + b_y*stride][1]= distance >= 0 ? b_x-distance : 9999;
+        }
+        color= 1024;
+        distance= -1;
+        for(b_x=w-1; b_x>=0; b_x--){
+            int mb_index_j= (b_x>>is_luma) + (b_y>>is_luma)*s->mb_stride;
+            int error_j= s->error_status_table[mb_index_j];
+            int intra_j = IS_INTRA(s->cur_pic.mb_type[mb_index_j]);
+            if(intra_j==0 || !(error_j&ER_DC_ERROR)){
+                color= dc[b_x + b_y*stride];
+                distance= b_x;
+            }
+            col [b_x + b_y*stride][0]= color;
+            dist[b_x + b_y*stride][0]= distance >= 0 ? distance-b_x : 9999;
+        }
+    }
+    for(b_x=0; b_x<w; b_x++){
+        int color= 1024;
+        int distance= -1;
+        for(b_y=0; b_y<h; b_y++){
+            int mb_index_j= (b_x>>is_luma) + (b_y>>is_luma)*s->mb_stride;
+            int error_j= s->error_status_table[mb_index_j];
+            int intra_j = IS_INTRA(s->cur_pic.mb_type[mb_index_j]);
+            if(intra_j==0 || !(error_j&ER_DC_ERROR)){
+                color= dc[b_x + b_y*stride];
+                distance= b_y;
+            }
+            col [b_x + b_y*stride][3]= color;
+            dist[b_x + b_y*stride][3]= distance >= 0 ? b_y-distance : 9999;
+        }
+        color= 1024;
+        distance= -1;
+        for(b_y=h-1; b_y>=0; b_y--){
+            int mb_index_j= (b_x>>is_luma) + (b_y>>is_luma)*s->mb_stride;
+            int error_j= s->error_status_table[mb_index_j];
+            int intra_j = IS_INTRA(s->cur_pic.mb_type[mb_index_j]);
+            if(intra_j==0 || !(error_j&ER_DC_ERROR)){
+                color= dc[b_x + b_y*stride];
+                distance= b_y;
+            }
+            col [b_x + b_y*stride][2]= color;
+            dist[b_x + b_y*stride][2]= distance >= 0 ? distance-b_y : 9999;
+        }
+    }
 
     for (b_y = 0; b_y < h; b_y++) {
         for (b_x = 0; b_x < w; b_x++) {
-            int color[4]    = { 1024, 1024, 1024, 1024 };
-            int distance[4] = { 9999, 9999, 9999, 9999 };
             int mb_index, error, j;
             int64_t guess, weight_sum;
             mb_index = (b_x >> is_luma) + (b_y >> is_luma) * s->mb_stride;
@@ -151,66 +213,21 @@ static void guess_dc(ERContext *s, int16_t *dc, int w,
             if (!(error & ER_DC_ERROR))
                 continue; // dc-ok
 
-            /* right block */
-            for (j = b_x + 1; j < w; j++) {
-                int mb_index_j = (j >> is_luma) + (b_y >> is_luma) * s->mb_stride;
-                int error_j    = s->error_status_table[mb_index_j];
-                int intra_j    = IS_INTRA(s->cur_pic.mb_type[mb_index_j]);
-                if (intra_j == 0 || !(error_j & ER_DC_ERROR)) {
-                    color[0]    = dc[j + b_y * stride];
-                    distance[0] = j - b_x;
-                    break;
-                }
-            }
-
-            /* left block */
-            for (j = b_x - 1; j >= 0; j--) {
-                int mb_index_j = (j >> is_luma) + (b_y >> is_luma) * s->mb_stride;
-                int error_j    = s->error_status_table[mb_index_j];
-                int intra_j    = IS_INTRA(s->cur_pic.mb_type[mb_index_j]);
-                if (intra_j == 0 || !(error_j & ER_DC_ERROR)) {
-                    color[1]    = dc[j + b_y * stride];
-                    distance[1] = b_x - j;
-                    break;
-                }
-            }
-
-            /* bottom block */
-            for (j = b_y + 1; j < h; j++) {
-                int mb_index_j = (b_x >> is_luma) + (j >> is_luma) * s->mb_stride;
-                int error_j    = s->error_status_table[mb_index_j];
-                int intra_j    = IS_INTRA(s->cur_pic.mb_type[mb_index_j]);
-
-                if (intra_j == 0 || !(error_j & ER_DC_ERROR)) {
-                    color[2]    = dc[b_x + j * stride];
-                    distance[2] = j - b_y;
-                    break;
-                }
-            }
-
-            /* top block */
-            for (j = b_y - 1; j >= 0; j--) {
-                int mb_index_j = (b_x >> is_luma) + (j >> is_luma) * s->mb_stride;
-                int error_j    = s->error_status_table[mb_index_j];
-                int intra_j    = IS_INTRA(s->cur_pic.mb_type[mb_index_j]);
-                if (intra_j == 0 || !(error_j & ER_DC_ERROR)) {
-                    color[3]    = dc[b_x + j * stride];
-                    distance[3] = b_y - j;
-                    break;
-                }
-            }
-
             weight_sum = 0;
             guess      = 0;
             for (j = 0; j < 4; j++) {
-                int64_t weight  = 256 * 256 * 256 * 16 / distance[j];
-                guess          += weight * (int64_t) color[j];
+                int64_t weight  = 256 * 256 * 256 * 16 / FFMAX(dist[b_x + b_y*stride][j], 1);
+                guess          += weight*(int64_t)col[b_x + b_y*stride][j];
                 weight_sum     += weight;
             }
             guess = (guess + weight_sum / 2) / weight_sum;
             dc[b_x + b_y * stride] = guess;
         }
     }
+
+fail:
+    av_freep(&col);
+    av_freep(&dist);
 }
 
 /**
@@ -380,6 +397,14 @@ static void guess_mv(ERContext *s)
         fixed[mb_xy] = f;
         if (f == MV_FROZEN)
             num_avail++;
+        else if(s->last_pic.f->data[0] && s->last_pic.motion_val[0]){
+            const int mb_y= mb_xy / s->mb_stride;
+            const int mb_x= mb_xy % s->mb_stride;
+            const int mot_index= (mb_x + mb_y*mot_stride) * mot_step;
+            s->cur_pic.motion_val[0][mot_index][0]= s->last_pic.motion_val[0][mot_index][0];
+            s->cur_pic.motion_val[0][mot_index][1]= s->last_pic.motion_val[0][mot_index][1];
+            s->cur_pic.ref_index[0][4*mb_xy]      = s->last_pic.ref_index[0][4*mb_xy];
+        }
     }
 
     if ((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) ||
@@ -430,8 +455,8 @@ static void guess_mv(ERContext *s)
 
                     if (fixed[mb_xy] == MV_FROZEN)
                         continue;
-                    assert(!IS_INTRA(s->cur_pic.mb_type[mb_xy]));
-                    assert(s->last_pic && s->last_pic.f->data[0]);
+                    av_assert1(!IS_INTRA(s->cur_pic.mb_type[mb_xy]));
+                    av_assert1(s->last_pic.f && s->last_pic.f->data[0]);
 
                     j = 0;
                     if (mb_x > 0             && fixed[mb_xy - 1]         == MV_FROZEN)
@@ -546,7 +571,7 @@ skip_mean_and_median:
                     /* zero MV */
                     pred_count++;
 
-                    if (!fixed[mb_xy]) {
+                    if (!fixed[mb_xy] && 0) {
                         if (s->avctx->codec_id == AV_CODEC_ID_H264) {
                             // FIXME
                         } else {
@@ -661,6 +686,9 @@ static int is_intra_more_likely(ERContext *s)
     if (!s->last_pic.f || !s->last_pic.f->data[0])
         return 1; // no previous frame available -> use spatial prediction
 
+    if (s->avctx->error_concealment & FF_EC_FAVOR_INTER)
+        return 0;
+
     undamaged_count = 0;
     for (i = 0; i < s->mb_num; i++) {
         const int mb_xy = s->mb_index2xy[i];
@@ -675,15 +703,11 @@ static int is_intra_more_likely(ERContext *s)
     if (undamaged_count < 5)
         return 0; // almost all MBs damaged -> use temporal prediction
 
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
     // prevent dsp.sad() check, that requires access to the image
-    if (CONFIG_MPEG_XVMC_DECODER    &&
-        s->avctx->xvmc_acceleration &&
+    if (CONFIG_XVMC    &&
+        s->avctx->hwaccel && s->avctx->hwaccel->decode_mb &&
         s->cur_pic.f->pict_type == AV_PICTURE_TYPE_I)
         return 1;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
 
     skip_amount     = FFMAX(undamaged_count / 50, 1); // check only up to 50 MBs
     is_intra_likely = 0;
@@ -717,6 +741,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 }
                 is_intra_likely += s->mecc->sad[0](NULL, last_mb_ptr, mb_ptr,
                                                    linesize[0], 16);
+                // FIXME need await_progress() here
                 is_intra_likely -= s->mecc->sad[0](NULL, last_mb_ptr,
                                                    last_mb_ptr + linesize[0] * 16,
                                                    linesize[0], 16);
@@ -728,6 +753,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             }
         }
     }
+//      av_log(NULL, AV_LOG_ERROR, "is_intra_likely: %d type:%d\n", is_intra_likely, s->pict_type);
     return is_intra_likely > 0;
 }
 
@@ -742,6 +768,17 @@ void ff_er_frame_start(ERContext *s)
     s->error_occurred = 0;
 }
 
+static int er_supported(ERContext *s)
+{
+    if(s->avctx->hwaccel && s->avctx->hwaccel->decode_slice           ||
+       s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU          ||
+       !s->cur_pic.f                                                  ||
+       s->cur_pic.field_picture
+    )
+        return 0;
+    return 1;
+}
+
 /**
  * Add a slice.
  * @param endx   x component of the last macroblock, can be -1
@@ -758,7 +795,7 @@ void ff_er_add_slice(ERContext *s, int startx, int starty,
     const int end_xy   = s->mb_index2xy[end_i];
     int mask           = -1;
 
-    if (s->avctx->hwaccel)
+    if (s->avctx->hwaccel && s->avctx->hwaccel->decode_slice)
         return;
 
     if (start_i > end_i || start_xy > end_xy) {
@@ -807,13 +844,15 @@ void ff_er_add_slice(ERContext *s, int startx, int starty,
 
     s->error_status_table[start_xy] |= VP_START;
 
-    if (start_xy > 0 && s->avctx->thread_count <= 1 &&
-        s->avctx->skip_top * s->mb_width < start_i) {
+    if (start_xy > 0 && !(s->avctx->active_thread_type & FF_THREAD_SLICE) &&
+        er_supported(s) && s->avctx->skip_top * s->mb_width < start_i) {
         int prev_status = s->error_status_table[s->mb_index2xy[start_i - 1]];
 
         prev_status &= ~ VP_START;
-        if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END))
+        if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END)) {
+            s->error_occurred = 1;
             s->error_count = INT_MAX;
+        }
     }
 }
 
@@ -825,23 +864,71 @@ void ff_er_frame_end(ERContext *s)
     int threshold_part[4] = { 100, 100, 100 };
     int threshold = 50;
     int is_intra_likely;
+    int size = s->b8_stride * 2 * s->mb_height;
 
     /* We do not support ER of field pictures yet,
      * though it should not crash if enabled. */
     if (!s->avctx->error_concealment || s->error_count == 0            ||
-        s->avctx->hwaccel                                              ||
-        !s->cur_pic.f                                                  ||
-        s->cur_pic.field_picture                                       ||
+        s->avctx->lowres                                               ||
+        !er_supported(s)                                               ||
         s->error_count == 3 * s->mb_width *
                           (s->avctx->skip_top + s->avctx->skip_bottom)) {
         return;
-    };
+    }
+    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+        int status = s->error_status_table[mb_x + (s->mb_height - 1) * s->mb_stride];
+        if (status != 0x7F)
+            break;
+    }
 
-    if (!s->cur_pic.motion_val[0] || !s->cur_pic.ref_index[0]) {
-        av_log(s->avctx, AV_LOG_ERROR, "MVs not available, ER not possible.\n");
+    if (   mb_x == s->mb_width
+        && s->avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO
+        && (s->avctx->height&16)
+        && s->error_count == 3 * s->mb_width * (s->avctx->skip_top + s->avctx->skip_bottom + 1)
+    ) {
+        av_log(s->avctx, AV_LOG_DEBUG, "ignoring last missing slice\n");
         return;
     }
 
+    if (s->last_pic.f) {
+        if (s->last_pic.f->width  != s->cur_pic.f->width  ||
+            s->last_pic.f->height != s->cur_pic.f->height ||
+            s->last_pic.f->format != s->cur_pic.f->format) {
+            av_log(s->avctx, AV_LOG_WARNING, "Cannot use previous picture in error concealment\n");
+            memset(&s->last_pic, 0, sizeof(s->last_pic));
+        }
+    }
+    if (s->next_pic.f) {
+        if (s->next_pic.f->width  != s->cur_pic.f->width  ||
+            s->next_pic.f->height != s->cur_pic.f->height ||
+            s->next_pic.f->format != s->cur_pic.f->format) {
+            av_log(s->avctx, AV_LOG_WARNING, "Cannot use next picture in error concealment\n");
+            memset(&s->next_pic, 0, sizeof(s->next_pic));
+        }
+    }
+
+    if (!s->cur_pic.motion_val[0] || !s->cur_pic.ref_index[0]) {
+        av_log(s->avctx, AV_LOG_ERROR, "Warning MVs not available\n");
+
+        for (i = 0; i < 2; i++) {
+            s->ref_index_buf[i]  = av_buffer_allocz(s->mb_stride * s->mb_height * 4 * sizeof(uint8_t));
+            s->motion_val_buf[i] = av_buffer_allocz((size + 4) * 2 * sizeof(uint16_t));
+            if (!s->ref_index_buf[i] || !s->motion_val_buf[i])
+                break;
+            s->cur_pic.ref_index[i]  = s->ref_index_buf[i]->data;
+            s->cur_pic.motion_val[i] = (int16_t (*)[2])s->motion_val_buf[i]->data + 4;
+        }
+        if (i < 2) {
+            for (i = 0; i < 2; i++) {
+                av_buffer_unref(&s->ref_index_buf[i]);
+                av_buffer_unref(&s->motion_val_buf[i]);
+                s->cur_pic.ref_index[i]  = NULL;
+                s->cur_pic.motion_val[i] = NULL;
+            }
+            return;
+        }
+    }
+
     if (s->avctx->debug & FF_DEBUG_ER) {
         for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
@@ -853,6 +940,7 @@ void ff_er_frame_end(ERContext *s)
         }
     }
 
+#if 1
     /* handle overlapping slices */
     for (error_type = 1; error_type <= 3; error_type++) {
         int end_ok = 0;
@@ -873,7 +961,8 @@ void ff_er_frame_end(ERContext *s)
                 end_ok = 0;
         }
     }
-
+#endif
+#if 1
     /* handle slices with partitions of different length */
     if (s->partitioned_frame) {
         int end_ok = 0;
@@ -896,7 +985,7 @@ void ff_er_frame_end(ERContext *s)
                 end_ok = 0;
         }
     }
-
+#endif
     /* handle missing slices */
     if (s->avctx->err_recognition & AV_EF_EXPLODE) {
         int end_ok = 1;
@@ -923,6 +1012,7 @@ void ff_er_frame_end(ERContext *s)
         }
     }
 
+#if 1
     /* backward mark errors */
     distance = 9999999;
     for (error_type = 1; error_type <= 3; error_type++) {
@@ -947,6 +1037,7 @@ void ff_er_frame_end(ERContext *s)
                 distance = 9999999;
         }
     }
+#endif
 
     /* forward mark errors */
     error = 0;
@@ -961,22 +1052,23 @@ void ff_er_frame_end(ERContext *s)
             s->error_status_table[mb_xy] |= error;
         }
     }
-
+#if 1
     /* handle not partitioned case */
     if (!s->partitioned_frame) {
         for (i = 0; i < s->mb_num; i++) {
             const int mb_xy = s->mb_index2xy[i];
-            error = s->error_status_table[mb_xy];
+            int error = s->error_status_table[mb_xy];
             if (error & ER_MB_ERROR)
                 error |= ER_MB_ERROR;
             s->error_status_table[mb_xy] = error;
         }
     }
+#endif
 
     dc_error = ac_error = mv_error = 0;
     for (i = 0; i < s->mb_num; i++) {
         const int mb_xy = s->mb_index2xy[i];
-        error = s->error_status_table[mb_xy];
+        int error = s->error_status_table[mb_xy];
         if (error & ER_DC_ERROR)
             dc_error++;
         if (error & ER_AC_ERROR)
@@ -984,15 +1076,15 @@ void ff_er_frame_end(ERContext *s)
         if (error & ER_MV_ERROR)
             mv_error++;
     }
-    av_log(s->avctx, AV_LOG_INFO, "concealing %d DC, %d AC, %d MV errors\n",
-           dc_error, ac_error, mv_error);
+    av_log(s->avctx, AV_LOG_INFO, "concealing %d DC, %d AC, %d MV errors in %c frame\n",
+           dc_error, ac_error, mv_error, av_get_picture_type_char(s->cur_pic.f->pict_type));
 
     is_intra_likely = is_intra_more_likely(s);
 
     /* set unknown mb-type to most likely */
     for (i = 0; i < s->mb_num; i++) {
         const int mb_xy = s->mb_index2xy[i];
-        error = s->error_status_table[mb_xy];
+        int error = s->error_status_table[mb_xy];
         if (!((error & ER_DC_ERROR) && (error & ER_MV_ERROR)))
             continue;
 
@@ -1020,7 +1112,7 @@ void ff_er_frame_end(ERContext *s)
             const int mv_dir  = dir ? MV_DIR_BACKWARD : MV_DIR_FORWARD;
             int mv_type;
 
-            error = s->error_status_table[mb_xy];
+            int error = s->error_status_table[mb_xy];
 
             if (IS_INTRA(mb_type))
                 continue; // intra
@@ -1057,7 +1149,7 @@ void ff_er_frame_end(ERContext *s)
                 const int mb_type = s->cur_pic.mb_type[mb_xy];
                 int mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
 
-                error = s->error_status_table[mb_xy];
+                int error = s->error_status_table[mb_xy];
 
                 if (IS_INTRA(mb_type))
                     continue;
@@ -1075,6 +1167,7 @@ void ff_er_frame_end(ERContext *s)
                     int time_pp = s->pp_time;
                     int time_pb = s->pb_time;
 
+                    av_assert0(s->avctx->codec_id != AV_CODEC_ID_H264);
                     ff_thread_await_progress(s->next_pic.tf, mb_y, 0);
 
                     s->mv[0][0][0] = s->next_pic.motion_val[0][xy][0] *  time_pb            / time_pp;
@@ -1095,13 +1188,9 @@ void ff_er_frame_end(ERContext *s)
     } else
         guess_mv(s);
 
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-    /* the filters below are not XvMC compatible, skip them */
-    if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
+    /* the filters below manipulate raw image, skip them */
+    if (CONFIG_XVMC && s->avctx->hwaccel && s->avctx->hwaccel->decode_mb)
         goto ec_clean;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
     /* fill DC for inter blocks */
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
         for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
@@ -1111,7 +1200,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             const int mb_xy   = mb_x + mb_y * s->mb_stride;
             const int mb_type = s->cur_pic.mb_type[mb_xy];
 
-            error = s->error_status_table[mb_xy];
+            // error = s->error_status_table[mb_xy];
 
             if (IS_INTRA(mb_type) && s->partitioned_frame)
                 continue;
@@ -1146,15 +1235,17 @@ FF_ENABLE_DEPRECATION_WARNINGS
             s->dc_val[2][mb_x + mb_y * s->mb_stride] = (dcv + 4) >> 3;
         }
     }
-
+#if 1
     /* guess DC for damaged blocks */
-    guess_dc(s, s->dc_val[0], s->mb_width * 2, s->mb_height * 2, s->b8_stride, 1);
-    guess_dc(s, s->dc_val[1], s->mb_width, s->mb_height, s->mb_stride, 0);
-    guess_dc(s, s->dc_val[2], s->mb_width, s->mb_height, s->mb_stride, 0);
+    guess_dc(s, s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride, 1);
+    guess_dc(s, s->dc_val[1], s->mb_width  , s->mb_height  , s->mb_stride, 0);
+    guess_dc(s, s->dc_val[2], s->mb_width  , s->mb_height  , s->mb_stride, 0);
+#endif
 
     /* filter luma DC */
     filter181(s->dc_val[0], s->mb_width * 2, s->mb_height * 2, s->b8_stride);
 
+#if 1
     /* render DC only intra */
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
         for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
@@ -1162,7 +1253,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             const int mb_xy   = mb_x + mb_y * s->mb_stride;
             const int mb_type = s->cur_pic.mb_type[mb_xy];
 
-            error = s->error_status_table[mb_xy];
+            int error = s->error_status_table[mb_xy];
 
             if (IS_INTER(mb_type))
                 continue;
@@ -1176,6 +1267,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y);
         }
     }
+#endif
 
     if (s->avctx->error_concealment & FF_EC_DEBLOCK) {
         /* filter horizontal block boundaries */
@@ -1208,6 +1300,13 @@ ec_clean:
         s->mbintra_table[mb_xy] = 1;
     }
 
+    for (i = 0; i < 2; i++) {
+        av_buffer_unref(&s->ref_index_buf[i]);
+        av_buffer_unref(&s->motion_val_buf[i]);
+        s->cur_pic.ref_index[i]  = NULL;
+        s->cur_pic.motion_val[i] = NULL;
+    }
+
     memset(&s->cur_pic, 0, sizeof(ERPicture));
     memset(&s->last_pic, 0, sizeof(ERPicture));
     memset(&s->next_pic, 0, sizeof(ERPicture));
diff --git a/libavcodec/error_resilience.h b/libavcodec/error_resilience.h
index 7b9ec19..171af08 100644
--- a/libavcodec/error_resilience.h
+++ b/libavcodec/error_resilience.h
@@ -1,19 +1,19 @@
 /*
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -42,7 +42,7 @@ typedef struct ERPicture {
     AVFrame *f;
     ThreadFrame *tf;
 
-    // it's the caller responsability to allocate these buffers
+    // it's the caller's responsibility to allocate these buffers
     int16_t (*motion_val[2])[2];
     int8_t *ref_index[2];
 
@@ -72,6 +72,9 @@ typedef struct ERContext {
     ERPicture last_pic;
     ERPicture next_pic;
 
+    AVBufferRef *ref_index_buf[2];
+    AVBufferRef *motion_val_buf[2];
+
     uint16_t pp_time;
     uint16_t pb_time;
     int quarter_sample;
diff --git a/libavcodec/escape124.c b/libavcodec/escape124.c
index 30f22e0..bed1efb 100644
--- a/libavcodec/escape124.c
+++ b/libavcodec/escape124.c
@@ -2,20 +2,20 @@
  * Escape 124 Video Decoder
  * Copyright (C) 2008 Eli Friedman (eli.friedman@gmail.com)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -49,10 +49,6 @@ typedef struct Escape124Context {
     CodeBook codebooks[3];
 } Escape124Context;
 
-static int can_safely_read(GetBitContext* gb, int bits) {
-    return get_bits_left(gb) >= bits;
-}
-
 /**
  * Initialize the decoder
  * @param avctx decoder context
@@ -93,7 +89,7 @@ static CodeBook unpack_codebook(GetBitContext* gb, unsigned depth,
     unsigned i, j;
     CodeBook cb = { 0 };
 
-    if (!can_safely_read(gb, size * 34))
+    if (size >= INT_MAX / 34 || get_bits_left(gb) < size * 34)
         return cb;
 
     if (size >= INT_MAX / sizeof(MacroBlock))
@@ -124,7 +120,7 @@ static unsigned decode_skip_count(GetBitContext* gb)
     unsigned value;
     // This function reads a maximum of 23 bits,
     // which is within the padding space
-    if (!can_safely_read(gb, 1))
+    if (get_bits_left(gb) < 1)
         return -1;
     value = get_bits1(gb);
     if (!value)
@@ -203,7 +199,6 @@ static int escape124_decode_frame(AVCodecContext *avctx,
                                   void *data, int *got_frame,
                                   AVPacket *avpkt)
 {
-    const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     Escape124Context *s = avctx->priv_data;
     AVFrame *frame = data;
@@ -218,13 +213,15 @@ static int escape124_decode_frame(AVCodecContext *avctx,
 
     uint16_t* old_frame_data, *new_frame_data;
     unsigned old_stride, new_stride;
+
     int ret;
 
-    init_get_bits(&gb, buf, buf_size * 8);
+    if ((ret = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
+        return ret;
 
     // This call also guards the potential depth reads for the
     // codebook unpacking.
-    if (!can_safely_read(&gb, 64))
+    if (get_bits_left(&gb) < 64)
         return -1;
 
     frame_flags = get_bits_long(&gb, 32);
@@ -236,7 +233,7 @@ static int escape124_decode_frame(AVCodecContext *avctx,
         if (!s->frame->data[0])
             return AVERROR_INVALIDDATA;
 
-        av_log(NULL, AV_LOG_DEBUG, "Skipping frame\n");
+        av_log(avctx, AV_LOG_DEBUG, "Skipping frame\n");
 
         *got_frame = 1;
         if ((ret = av_frame_ref(frame, s->frame)) < 0)
@@ -273,10 +270,8 @@ static int escape124_decode_frame(AVCodecContext *avctx,
         }
     }
 
-    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
 
     new_frame_data = (uint16_t*)frame->data[0];
     new_stride = frame->linesize[0] / 2;
@@ -302,7 +297,7 @@ static int escape124_decode_frame(AVCodecContext *avctx,
             copy_superblock(sb.pixels, 8,
                             old_frame_data, old_stride);
 
-            while (can_safely_read(&gb, 1) && !get_bits1(&gb)) {
+            while (get_bits_left(&gb) >= 1 && !get_bits1(&gb)) {
                 unsigned mask;
                 mb = decode_macroblock(s, &gb, &cb_index, superblock_index);
                 mask = get_bits(&gb, 16);
@@ -314,7 +309,7 @@ static int escape124_decode_frame(AVCodecContext *avctx,
                 }
             }
 
-            if (can_safely_read(&gb, 1) && !get_bits1(&gb)) {
+            if (!get_bits1(&gb)) {
                 unsigned inv_mask = get_bits(&gb, 4);
                 for (i = 0; i < 4; i++) {
                     if (inv_mask & (1 << i)) {
@@ -326,15 +321,13 @@ static int escape124_decode_frame(AVCodecContext *avctx,
 
                 for (i = 0; i < 16; i++) {
                     if (multi_mask & mask_matrix[i]) {
-                        if (!can_safely_read(&gb, 1))
-                            break;
                         mb = decode_macroblock(s, &gb, &cb_index,
                                                superblock_index);
                         insert_mb_into_sb(&sb, mb, i);
                     }
                 }
             } else if (frame_flags & (1 << 16)) {
-                while (can_safely_read(&gb, 1) && !get_bits1(&gb)) {
+                while (get_bits_left(&gb) >= 1 && !get_bits1(&gb)) {
                     mb = decode_macroblock(s, &gb, &cb_index, superblock_index);
                     insert_mb_into_sb(&sb, mb, get_bits(&gb, 4));
                 }
@@ -356,7 +349,7 @@ static int escape124_decode_frame(AVCodecContext *avctx,
         skip--;
     }
 
-    av_log(NULL, AV_LOG_DEBUG,
+    av_log(avctx, AV_LOG_DEBUG,
            "Escape sizes: %i, %i, %i\n",
            frame_size, buf_size, get_bits_count(&gb) / 8);
 
diff --git a/libavcodec/escape130.c b/libavcodec/escape130.c
index bc865a3..466b2b5 100644
--- a/libavcodec/escape130.c
+++ b/libavcodec/escape130.c
@@ -2,20 +2,20 @@
  * Escape 130 video decoder
  * Copyright (C) 2008 Eli Friedman (eli.friedman <at> gmail.com)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -105,7 +105,7 @@ static const int8_t chroma_adjust[2][8] = {
     { 0, 1, 1,  1,  0, -1, -1, -1 }
 };
 
-const uint8_t chroma_vals[] = {
+static const uint8_t chroma_vals[] = {
      20,  28,  36,  44,  52,  60,  68,  76,
      84,  92, 100, 106, 112, 116, 120, 124,
     128, 132, 136, 140, 144, 150, 156, 164,
@@ -166,6 +166,9 @@ static int decode_skip_count(GetBitContext* gb)
 {
     int value;
 
+    if (get_bits_left(gb) < 1+3)
+        return -1;
+
     value = get_bits1(gb);
     if (value)
         return 0;
@@ -188,7 +191,6 @@ static int decode_skip_count(GetBitContext* gb)
 static int escape130_decode_frame(AVCodecContext *avctx, void *data,
                                   int *got_frame, AVPacket *avpkt)
 {
-    const uint8_t *buf  = avpkt->data;
     int buf_size        = avpkt->size;
     Escape130Context *s = avctx->priv_data;
     AVFrame *pic        = data;
@@ -215,7 +217,9 @@ static int escape130_decode_frame(AVCodecContext *avctx, void *data,
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
 
-    init_get_bits(&gb, buf + 16, (buf_size - 16) * 8);
+    if ((ret = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
+        return ret;
+    skip_bits_long(&gb, 16 * 8);
 
     new_y  = s->new_y;
     new_cb = s->new_u;
diff --git a/libavcodec/evrcdata.h b/libavcodec/evrcdata.h
new file mode 100644
index 0000000..ebcb0ac
--- /dev/null
+++ b/libavcodec/evrcdata.h
@@ -0,0 +1,1499 @@
+/*
+ * Enhanced Variable Rate Codec, Service Option 3 decoder
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_EVRCDATA_H
+#define AVCODEC_EVRCDATA_H
+
+/**
+ * @file
+ * Data tables for the EVRC decoder
+ * @author Paul B Mahol
+ */
+
+#include "libavutil/common.h"
+
+/**
+ * Rate 1/8 frame energy quantization
+ *
+ * TIA/IS-127 table 8-18
+ */
+static const float evrc_energy_quant[][3] = {
+{-0.2464E-01,-0.4005E-02,-0.1107E+00 }, { 0.8734E+00, 0.1004E+01, 0.9930E+00 },
+{ 0.4222E+00, 0.3894E+00, 0.5020E+00 }, { 0.1450E+01, 0.1328E+01, 0.1278E+01 },
+{ 0.1957E+00, 0.2169E+00, 0.2735E+00 }, { 0.1142E+01, 0.1240E+01, 0.1157E+01 },
+{ 0.7881E+00, 0.6778E+00, 0.4185E+00 }, { 0.1504E+01, 0.1468E+01, 0.1534E+01 },
+{ 0.3173E+00, 0.2693E+00,-0.9526E-01 }, { 0.1141E+01, 0.1154E+01, 0.1044E+01 },
+{ 0.5147E+00, 0.5784E+00, 0.8802E+00 }, { 0.1502E+01, 0.1407E+01, 0.1409E+01 },
+{ 0.3163E+00, 0.3592E+00, 0.2830E+00 }, { 0.1217E+01, 0.1213E+01, 0.1216E+01 },
+{ 0.1023E+01, 0.1139E+01,-0.9526E-01 }, { 0.1619E+01, 0.1655E+01, 0.1642E+01 },
+{ 0.1437E+00, 0.1505E+00, 0.6838E-01 }, { 0.9794E+00, 0.1021E+01, 0.1117E+01 },
+{ 0.4701E+00, 0.6426E+00, 0.5519E+00 }, { 0.1366E+01, 0.1397E+01, 0.1406E+01 },
+{ 0.2918E+00, 0.3022E+00, 0.2420E+00 }, { 0.1309E+01, 0.1241E+01, 0.1220E+01 },
+{ 0.7989E+00, 0.7654E+00, 0.7391E+00 }, { 0.1612E+01, 0.1502E+01, 0.1447E+01 },
+{ 0.2594E+00, 0.1948E+00, 0.2555E+00 }, { 0.1091E+01, 0.1150E+01, 0.1272E+01 },
+{ 0.3423E+00, 0.4150E+00, 0.1294E+01 }, { 0.1729E+01, 0.1377E+01, 0.1065E+01 },
+{ 0.4103E+00, 0.3287E+00, 0.3228E+00 }, { 0.1144E+01, 0.1281E+01, 0.1416E+01 },
+{ 0.1047E+01, 0.1117E+01, 0.6188E+00 }, { 0.1914E+01, 0.1777E+01, 0.1516E+01 },
+{-0.2117E-01, 0.2159E+00, 0.2351E+00 }, { 0.1093E+01, 0.1088E+01, 0.1026E+01 },
+{ 0.5567E+00, 0.5092E+00, 0.4654E+00 }, { 0.1510E+01, 0.1449E+01, 0.1201E+01 },
+{ 0.2362E+00, 0.3426E+00, 0.2549E+00 }, { 0.1340E+01, 0.1225E+01, 0.1117E+01 },
+{ 0.1203E+01, 0.3819E+00, 0.2269E+00 }, { 0.1373E+01, 0.1404E+01, 0.1830E+01 },
+{ 0.2570E+00, 0.2668E+00, 0.1636E+00 }, { 0.1219E+01, 0.1098E+01, 0.1122E+01 },
+{ 0.6985E+00, 0.8456E+00, 0.1069E+01 }, { 0.1550E+01, 0.1501E+01, 0.1388E+01 },
+{ 0.2870E+00, 0.3060E+00, 0.3599E+00 }, { 0.1178E+01, 0.1345E+01, 0.1302E+01 },
+{ 0.1270E+01, 0.1215E+01, 0.1812E+00 }, { 0.1725E+01, 0.1777E+01, 0.1693E+01 },
+{ 0.2074E+00, 0.2104E+00, 0.1539E+00 }, { 0.1105E+01, 0.1034E+01, 0.1104E+01 },
+{ 0.6683E+00, 0.6646E+00, 0.6639E+00 }, { 0.1403E+01, 0.1462E+01, 0.1435E+01 },
+{ 0.3389E+00, 0.3754E+00, 0.2150E+00 }, { 0.1288E+01, 0.1325E+01, 0.1257E+01 },
+{ 0.8933E+00, 0.8253E+00, 0.8133E+00 }, { 0.1555E+01, 0.1579E+01, 0.1565E+01 },
+{ 0.3264E+00, 0.2434E+00, 0.2852E+00 }, { 0.1242E+01, 0.1180E+01, 0.1202E+01 },
+{ 0.1314E+00, 0.1698E+00, 0.1646E+01 }, { 0.1797E+01, 0.1597E+01, 0.1241E+01 },
+{ 0.4721E+00, 0.5346E+00, 0.3066E+00 }, { 0.1274E+01, 0.1401E+01, 0.1351E+01 },
+{ 0.1455E+01, 0.1386E+01, 0.6430E+00 }, { 0.1828E+01, 0.1867E+01, 0.1825E+01 },
+{-0.3265E+00,-0.2956E+00,-0.2462E+00 }, { 0.1035E+01, 0.1020E+01, 0.1003E+01 },
+{ 0.3702E+00, 0.4307E+00, 0.7072E+00 }, { 0.1424E+01, 0.1345E+01, 0.1352E+01 },
+{ 0.2267E+00, 0.2680E+00, 0.3037E+00 }, { 0.1235E+01, 0.1249E+01, 0.1146E+01 },
+{ 0.9944E+00, 0.6485E+00, 0.5248E+00 }, { 0.1539E+01, 0.1492E+01, 0.1612E+01 },
+{ 0.3815E+00, 0.3360E+00,-0.9526E-01 }, { 0.1163E+01, 0.1144E+01, 0.1117E+01 },
+{ 0.6734E+00, 0.7656E+00, 0.1014E+01 }, { 0.1568E+01, 0.1438E+01, 0.1455E+01 },
+{ 0.3409E+00, 0.3317E+00, 0.3856E+00 }, { 0.1180E+01, 0.1284E+01, 0.1284E+01 },
+{ 0.1244E+01, 0.1214E+01,-0.9526E-01 }, { 0.1753E+01, 0.1598E+01, 0.1744E+01 },
+{ 0.1548E+00, 0.1388E+00, 0.2020E+00 }, { 0.1027E+01, 0.1133E+01, 0.1093E+01 },
+{ 0.3906E+00, 0.7505E+00, 0.5705E+00 }, { 0.1420E+01, 0.1357E+01, 0.1543E+01 },
+{ 0.3252E+00, 0.3136E+00, 0.2804E+00 }, { 0.1351E+01, 0.1309E+01, 0.1224E+01 },
+{ 0.8781E+00, 0.8095E+00, 0.7109E+00 }, { 0.1614E+01, 0.1580E+01, 0.1433E+01 },
+{ 0.3222E+00, 0.2298E+00, 0.2157E+00 }, { 0.1216E+01, 0.1077E+01, 0.1247E+01 },
+{ 0.1363E+01, 0.1280E+01, 0.1317E+01 }, { 0.1751E+01, 0.1457E+01, 0.1182E+01 },
+{ 0.4428E+00, 0.4082E+00, 0.3181E+00 }, { 0.1157E+01, 0.1227E+01, 0.1604E+01 },
+{ 0.1286E+01, 0.1268E+01, 0.8167E+00 }, { 0.1994E+01, 0.2018E+01, 0.1307E+01 },
+{ 0.2671E-01, 0.2594E+00, 0.3397E+00 }, { 0.1164E+01, 0.1080E+01, 0.9321E+00 },
+{ 0.5998E+00, 0.6076E+00, 0.5081E+00 }, { 0.1442E+01, 0.1442E+01, 0.1375E+01 },
+{ 0.2390E+00, 0.3554E+00, 0.3426E+00 }, { 0.1287E+01, 0.1307E+01, 0.1144E+01 },
+{ 0.1200E+01, 0.7495E+00, 0.3967E+00 }, { 0.1561E+01, 0.1517E+01, 0.1898E+01 },
+{ 0.3598E+00, 0.3463E+00, 0.1200E+00 }, { 0.1298E+01, 0.1125E+01, 0.1062E+01 },
+{ 0.7577E+00, 0.1013E+01, 0.1194E+01 }, { 0.1537E+01, 0.1513E+01, 0.1464E+01 },
+{ 0.4041E+00, 0.4038E+00, 0.3897E+00 }, { 0.1293E+01, 0.1219E+01, 0.1378E+01 },
+{ 0.1250E+01, 0.1391E+01, 0.2451E+00 }, { 0.1558E+01, 0.1764E+01, 0.1728E+01 },
+{ 0.2700E+00, 0.1894E+00, 0.1924E+00 }, { 0.1111E+01, 0.1112E+01, 0.1173E+01 },
+{ 0.7579E+00, 0.8342E+00, 0.4781E+00 }, { 0.1464E+01, 0.1477E+01, 0.1469E+01 },
+{ 0.4001E+00, 0.3104E+00, 0.2217E+00 }, { 0.1346E+01, 0.1421E+01, 0.1312E+01 },
+{ 0.1071E+01, 0.8967E+00, 0.7511E+00 }, { 0.1616E+01, 0.1551E+01, 0.1574E+01 },
+{ 0.3329E+00, 0.2785E+00, 0.3140E+00 }, { 0.1281E+01, 0.1209E+01, 0.1239E+01 },
+{ 0.2805E+00, 0.2687E+00, 0.1646E+01 }, { 0.1814E+01, 0.1514E+01, 0.1510E+01 },
+{ 0.6231E+00, 0.4200E+00, 0.3701E+00 }, { 0.1255E+01, 0.1429E+01, 0.1454E+01 },
+{ 0.1642E+01, 0.1581E+01, 0.7112E+00 }, { 0.1844E+01, 0.1963E+01, 0.1895E+01 },
+{-0.4208E-01,-0.1491E+00,-0.7639E-01 }, { 0.1046E+01, 0.9598E+00, 0.9176E+00 },
+{ 0.4478E+00, 0.4605E+00, 0.5111E+00 }, { 0.1521E+01, 0.1292E+01, 0.1342E+01 },
+{ 0.2220E+00, 0.2549E+00, 0.2510E+00 }, { 0.1186E+01, 0.1254E+01, 0.1171E+01 },
+{ 0.8999E+00, 0.4960E+00, 0.4943E+00 }, { 0.1423E+01, 0.1484E+01, 0.1620E+01 },
+{ 0.2796E+00, 0.2778E+00,-0.2820E+00 }, { 0.1170E+01, 0.1181E+01, 0.1076E+01 },
+{ 0.4068E+00, 0.8541E+00, 0.9352E+00 }, { 0.1584E+01, 0.1416E+01, 0.1387E+01 },
+{ 0.3325E+00, 0.3655E+00, 0.3340E+00 }, { 0.1224E+01, 0.1257E+01, 0.1245E+01 },
+{ 0.1061E+01, 0.1138E+01,-0.9526E-01 }, { 0.1681E+01, 0.1704E+01, 0.1673E+01 },
+{ 0.1932E+00, 0.1489E+00, 0.1258E+00 }, { 0.1023E+01, 0.1088E+01, 0.1145E+01 },
+{ 0.5190E+00, 0.6873E+00, 0.5172E+00 }, { 0.1380E+01, 0.1405E+01, 0.1474E+01 },
+{ 0.3393E+00, 0.3100E+00, 0.2231E+00 }, { 0.1354E+01, 0.1249E+01, 0.1270E+01 },
+{ 0.7363E+00, 0.8508E+00, 0.8247E+00 }, { 0.1612E+01, 0.1537E+01, 0.1509E+01 },
+{ 0.2952E+00, 0.2053E+00, 0.2590E+00 }, { 0.1138E+01, 0.1219E+01, 0.1262E+01 },
+{ 0.1345E+01, 0.1289E+01, 0.1338E+01 }, { 0.1437E+01, 0.1360E+01, 0.1442E+01 },
+{ 0.4826E+00, 0.3298E+00, 0.3842E+00 }, { 0.1219E+01, 0.1311E+01, 0.1413E+01 },
+{ 0.1212E+01, 0.1186E+01, 0.6357E+00 }, { 0.1873E+01, 0.1939E+01, 0.1674E+01 },
+{ 0.1260E+01, 0.1306E+01, 0.1368E+01 }, { 0.1146E+01, 0.1077E+01, 0.1025E+01 },
+{ 0.6029E+00, 0.5039E+00, 0.5781E+00 }, { 0.1514E+01, 0.1420E+01, 0.1324E+01 },
+{ 0.2652E+00, 0.3192E+00, 0.3042E+00 }, { 0.1368E+01, 0.1198E+01, 0.1200E+01 },
+{ 0.1234E+01, 0.4910E+00, 0.3464E-01 }, { 0.1347E+01, 0.1560E+01, 0.1861E+01 },
+{ 0.2766E+00, 0.2887E+00, 0.2029E+00 }, { 0.1257E+01, 0.1105E+01, 0.1145E+01 },
+{ 0.1351E+01, 0.1353E+01, 0.1406E+01 }, { 0.1506E+01, 0.1580E+01, 0.1362E+01 },
+{ 0.2794E+00, 0.3868E+00, 0.4277E+00 }, { 0.1234E+01, 0.1334E+01, 0.1336E+01 },
+{ 0.1280E+01, 0.1252E+01, 0.1805E+00 }, { 0.1387E+01, 0.1396E+01, 0.1434E+01 },
+{ 0.2902E+00, 0.1170E+00, 0.1698E+00 }, { 0.1134E+01, 0.1077E+01, 0.1117E+01 },
+{ 0.6986E+00, 0.7177E+00, 0.7366E+00 }, { 0.1370E+01, 0.1491E+01, 0.1495E+01 },
+{ 0.4031E+00, 0.5144E+00, 0.1751E+00 }, { 0.1333E+01, 0.1377E+01, 0.1257E+01 },
+{ 0.9212E+00, 0.8934E+00, 0.8897E+00 }, { 0.1589E+01, 0.1614E+01, 0.1523E+01 },
+{ 0.3152E+00, 0.2164E+00, 0.3230E+00 }, { 0.1300E+01, 0.1145E+01, 0.1212E+01 },
+{ 0.1269E+01, 0.1245E+01, 0.1497E+01 }, { 0.1763E+01, 0.1716E+01, 0.1311E+01 },
+{ 0.4702E+00, 0.5422E+00, 0.4306E+00 }, { 0.1342E+01, 0.1433E+01, 0.1423E+01 },
+{ 0.1472E+01, 0.1404E+01, 0.8371E+00 }, { 0.1936E+01, 0.1883E+01, 0.1838E+01 },
+{ 0.1266E+01, 0.1295E+01, 0.1302E+01 }, { 0.1074E+01, 0.1002E+01, 0.1023E+01 },
+{ 0.5206E+00, 0.4045E+00, 0.6549E+00 }, { 0.1457E+01, 0.1378E+01, 0.1363E+01 },
+{ 0.2715E+00, 0.2629E+00, 0.2841E+00 }, { 0.1264E+01, 0.1271E+01, 0.1175E+01 },
+{ 0.1337E+01, 0.1305E+01, 0.1306E+01 }, { 0.1555E+01, 0.1571E+01, 0.1657E+01 },
+{ 0.3341E+00, 0.4147E+00,-0.3648E+00 }, { 0.1188E+01, 0.1185E+01, 0.1161E+01 },
+{ 0.6198E+00, 0.7208E+00, 0.1157E+01 }, { 0.1582E+01, 0.1465E+01, 0.1513E+01 },
+{ 0.3839E+00, 0.3651E+00, 0.3814E+00 }, { 0.1214E+01, 0.1256E+01, 0.1292E+01 },
+{ 0.1361E+01, 0.1363E+01, 0.1312E+01 }, { 0.1793E+01, 0.1693E+01, 0.1669E+01 },
+{ 0.1889E+00, 0.1275E+00, 0.2534E+00 }, { 0.1066E+01, 0.1174E+01, 0.1133E+01 },
+{ 0.4999E+00, 0.8207E+00, 0.5813E+00 }, { 0.1478E+01, 0.1416E+01, 0.1497E+01 },
+{ 0.3814E+00, 0.3138E+00, 0.2889E+00 }, { 0.1396E+01, 0.1265E+01, 0.1233E+01 },
+{ 0.9458E+00, 0.9161E+00, 0.5875E+00 }, { 0.1672E+01, 0.1632E+01, 0.1553E+01 },
+{ 0.3505E+00, 0.2525E+00, 0.2364E+00 }, { 0.1211E+01, 0.1138E+01, 0.1235E+01 },
+{ 0.1391E+01, 0.1231E+01, 0.1355E+01 }, { 0.1783E+01, 0.1510E+01, 0.1199E+01 },
+{ 0.4227E+00, 0.4548E+00, 0.3671E+00 }, { 0.1281E+01, 0.1254E+01, 0.1661E+01 },
+{ 0.1338E+01, 0.1379E+01, 0.9531E+00 }, { 0.2148E+01, 0.1965E+01, 0.1584E+01 },
+{ 0.9324E-01, 0.3575E+00, 0.3522E+00 }, { 0.1212E+01, 0.1086E+01, 0.1044E+01 },
+{ 0.6128E+00, 0.6136E+00, 0.6060E+00 }, { 0.1484E+01, 0.1507E+01, 0.1396E+01 },
+{ 0.2820E+00, 0.3848E+00, 0.3156E+00 }, { 0.1368E+01, 0.1287E+01, 0.1128E+01 },
+{ 0.1369E+01, 0.1352E+01, 0.1358E+01 }, { 0.1381E+01, 0.1765E+01, 0.2113E+01 },
+{ 0.1314E+01, 0.1345E+01, 0.1334E+01 }, { 0.1290E+01, 0.1172E+01, 0.1119E+01 },
+{ 0.1304E+01, 0.1377E+01, 0.1427E+01 }, { 0.1490E+01, 0.1540E+01, 0.1536E+01 },
+{ 0.3994E+00, 0.4402E+00, 0.4173E+00 }, { 0.1323E+01, 0.1307E+01, 0.1392E+01 },
+{ 0.1400E+01, 0.1388E+01, 0.1369E+01 }, { 0.1669E+01, 0.1818E+01, 0.1834E+01 },
+{ 0.2742E+00, 0.2235E+00, 0.1986E+00 }, { 0.1137E+01, 0.1139E+01, 0.1201E+01 },
+{ 0.1324E+01, 0.1385E+01, 0.1349E+01 }, { 0.1455E+01, 0.1574E+01, 0.1454E+01 },
+{ 0.5019E+00, 0.3255E+00, 0.2555E+00 }, { 0.1388E+01, 0.1438E+01, 0.1300E+01 },
+{ 0.1394E+01, 0.1349E+01, 0.1411E+01 }, { 0.1639E+01, 0.1580E+01, 0.1681E+01 },
+{ 0.3920E+00, 0.2498E+00, 0.3523E+00 }, { 0.1301E+01, 0.1221E+01, 0.1285E+01 },
+{ 0.1318E+01, 0.1342E+01, 0.1494E+01 }, { 0.1910E+01, 0.1680E+01, 0.1470E+01 },
+{ 0.6082E+00, 0.5270E+00, 0.4173E+00 }, { 0.1255E+01, 0.1477E+01, 0.1503E+01 },
+{ 0.1807E+01, 0.1742E+01, 0.6553E+00 }, { 0.2000E+01, 0.2072E+01, 0.2051E+01 }};
+
+/**
+ * LSP vector quantization tables
+ *
+ * TIA/IS-127 tables 8-1 through 8-9
+ */
+
+static const float evrc_lspq_full_codebook1[64][2] = {
+{1.42016308E-2, 1.93881616E-2}, {2.91667543E-2, 6.51749149E-2},
+{2.06693150E-2, 4.97564934E-2}, {3.94719802E-2, 9.55850929E-2},
+{2.27012448E-2, 3.96625809E-2}, {5.38789518E-2, 6.28347769E-2},
+{2.90525518E-2, 5.73435798E-2}, {4.48280610E-2, 1.15364626E-1},
+{1.94110647E-2, 3.46889682E-2}, {4.37502973E-2, 6.75228462E-2},
+{3.55497338E-2, 4.94086780E-2}, {6.99219853E-2, 8.67279768E-2},
+{2.77880151E-2, 4.65748496E-2}, {5.79111017E-2, 6.74542487E-2},
+{4.74664383E-2, 5.50271496E-2}, {7.88898915E-2, 1.22443043E-1},
+{2.21715886E-2, 3.02628800E-2}, {3.39134485E-2, 7.17703998E-2},
+{3.17989141E-2, 4.98996116E-2}, {6.11555986E-2, 8.73361230E-2},
+{2.67506503E-2, 3.96735854E-2}, {4.44100983E-2, 8.26731324E-2},
+{3.89172547E-2, 5.65788932E-2}, {6.04800619E-2, 1.04536951E-1},
+{2.69156620E-2, 3.57168876E-2}, {4.11117189E-2, 7.33322948E-2},
+{4.12660725E-2, 4.85165231E-2}, {7.18049556E-2, 1.06202349E-1},
+{3.38037871E-2, 4.24300395E-2}, {5.91818243E-2, 7.97467977E-2},
+{4.70107906E-2, 6.28563762E-2}, {9.42011923E-2, 1.30053163E-1},
+{1.94244273E-2, 2.72732340E-2}, {3.70831676E-2, 6.64898157E-2},
+{2.80136354E-2, 5.15984930E-2}, {5.34461029E-2, 9.25904214E-2},
+{2.54959203E-2, 4.32844795E-2}, {5.51860742E-2, 7.36182332E-2},
+{3.39851119E-2, 6.05329126E-2}, {6.18182123E-2, 1.34581268E-1},
+{2.35669166E-2, 3.55242006E-2}, {5.10804243E-2, 6.79562539E-2},
+{3.83464955E-2, 5.23469411E-2}, {7.44275749E-2, 9.66108292E-2},
+{3.18591148E-2, 4.62123118E-2}, {6.18909821E-2, 7.33231753E-2},
+{4.41718437E-2, 5.79240918E-2}, {7.93596208E-2, 1.41177371E-1},
+{2.47412287E-2, 3.23629379E-2}, {3.36563922E-2, 8.04650635E-2},
+{3.37943695E-2, 5.44977151E-2}, {6.53648973E-2, 9.52775925E-2},
+{2.93364152E-2, 4.28411029E-2}, {5.27870469E-2, 8.16159397E-2},
+{4.00724895E-2, 6.18144684E-2}, {6.75848573E-2, 1.17196076E-1},
+{3.03064957E-2, 3.86914052E-2}, {4.83106263E-2, 7.42383003E-2},
+{4.37548272E-2, 5.22842295E-2}, {8.32310021E-2, 1.09881967E-1},
+{3.75600643E-2, 4.53217216E-2}, {6.60113171E-2, 7.97580183E-2},
+{5.03225066E-2, 5.90176322E-2}, {8.77133310E-2, 1.63187444E-1}};
+
+static const float evrc_lspq_full_codebook2[64][2] = {
+{5.21959551E-2, 8.38445649E-2}, {1.05874076E-1, 1.28694162E-1},
+{5.48323877E-2, 1.33842856E-1}, {1.17768474E-1, 1.94037274E-1},
+{5.36086522E-2, 1.11398734E-1}, {1.19989693E-1, 1.47474691E-1},
+{8.00373554E-2, 1.42999724E-1}, {1.64086595E-1, 2.09821835E-1},
+{5.21059223E-2, 9.95229408E-2}, {8.67567956E-2, 1.85966507E-1},
+{7.77341127E-2, 1.31506845E-1}, {1.60545513E-1, 1.81930289E-1},
+{7.42243677E-2, 1.10437103E-1}, {1.18635088E-1, 1.75306752E-1},
+{6.61557764E-2, 1.64441928E-1}, {1.96810856E-1, 2.16682002E-1},
+{6.05317838E-2, 9.45408568E-2}, {1.06271386E-1, 1.48013934E-1},
+{5.87486550E-2, 1.47724584E-1}, {1.34816468E-1, 2.01517954E-1},
+{6.59698322E-2, 1.16447397E-1}, {1.32297173E-1, 1.53267249E-1},
+{9.26660746E-2, 1.46725491E-1}, {1.79285541E-1, 2.19705954E-1},
+{7.06458464E-2, 9.99924466E-2}, {1.06500491E-1, 1.79443434E-1},
+{8.79249722E-2, 1.25287697E-1}, {1.53640196E-1, 1.97852716E-1},
+{8.88430104E-2, 1.12465657E-1}, {1.48286715E-1, 1.67517021E-1},
+{8.16568136E-2, 1.69274017E-1}, {2.07810536E-1, 2.31033549E-1},
+{6.14927970E-2, 8.36263224E-2}, {1.14473253E-1, 1.36779979E-1},
+{6.87129870E-2, 1.38099059E-1}, {1.10511415E-1, 2.15352878E-1},
+{5.55652268E-2, 1.22242786E-1}, {1.20557591E-1, 1.61072448E-1},
+{8.32249671E-2, 1.55475482E-1}, {1.61638483E-1, 2.28268847E-1},
+{6.29152283E-2, 1.06229566E-1}, {8.29186887E-2, 2.06774518E-1},
+{8.84756893E-2, 1.35799959E-1}, {1.69772223E-1, 1.93773940E-1},
+{7.77297840E-2, 1.20287232E-1}, {1.30648017E-1, 1.84331819E-1},
+{6.91939592E-2, 1.84218004E-1}, {2.03904077E-1, 2.49715164E-1},
+{7.07671717E-2, 9.03186128E-2}, {1.08471557E-1, 1.61966518E-1},
+{7.16886371E-2, 1.51093170E-1}, {1.38779536E-1, 2.18801782E-1},
+{6.75907061E-2, 1.26740307E-1}, {1.33412346E-1, 1.68838874E-1},
+{9.61822569E-2, 1.58728704E-1}, {1.86485633E-1, 2.36560926E-1},
+{8.23447108E-2, 1.02126025E-1}, {1.00336641E-1, 1.94918498E-1},
+{9.95981991E-2, 1.36425093E-1}, {1.82448462E-1, 2.03655198E-1},
+{9.78890732E-2, 1.21145472E-1}, {1.45453140E-1, 1.83604524E-1},
+{9.58395451E-2, 1.72194853E-1}, {2.23295853E-1, 2.46418610E-1}};
+
+static const float evrc_lspq_full_codebook3[512][3] = {
+{1.36425778E-1, 1.68651849E-1, 2.04688221E-1},
+{1.85717627E-1, 2.28756160E-1, 2.51958042E-1},
+{1.22760192E-1, 1.85950696E-1, 2.79446691E-1},
+{1.96468458E-1, 2.64484435E-1, 2.89318889E-1},
+{1.25653744E-1, 1.50529265E-1, 2.76144296E-1},
+{1.96301565E-1, 2.41699994E-1, 2.88230687E-1},
+{1.40099391E-1, 2.22365588E-1, 2.74666578E-1},
+{2.59952307E-1, 2.75394946E-1, 3.10975939E-1},
+{1.58452198E-1, 1.88591003E-1, 2.07339197E-1},
+{1.95616230E-1, 2.21379519E-1, 2.87022918E-1},
+{1.69424579E-1, 2.01614648E-1, 2.75669187E-1},
+{2.12393746E-1, 2.64250666E-1, 3.17967504E-1},
+{1.82965085E-1, 1.99547559E-1, 2.29538843E-1},
+{2.15200707E-1, 2.62409419E-1, 2.82432705E-1},
+{1.46404549E-1, 2.36966729E-1, 2.90067106E-1},
+{2.45338634E-1, 3.03358108E-1, 3.42260152E-1},
+{1.37478963E-1, 1.58276558E-1, 2.39217222E-1},
+{2.01999024E-1, 2.20102608E-1, 2.69546896E-1},
+{1.18350029E-1, 2.30206400E-1, 2.83554822E-1},
+{2.25519255E-1, 2.72272140E-1, 3.06072980E-1},
+{1.35661438E-1, 1.91633970E-1, 2.65912026E-1},
+{1.95733085E-1, 2.31926173E-1, 3.14376086E-1},
+{1.67998984E-1, 2.27706313E-1, 2.76947826E-1},
+{2.50170559E-1, 3.01627070E-1, 3.21084231E-1},
+{1.33492306E-1, 2.01223105E-1, 2.33893991E-1},
+{2.06442133E-1, 2.38704175E-1, 2.77560145E-1},
+{1.79048792E-1, 1.95776582E-1, 2.80656606E-1},
+{2.06193641E-1, 2.64055401E-1, 3.33098441E-1},
+{1.75185278E-1, 1.91166341E-1, 2.57540315E-1},
+{2.28398636E-1, 2.45296657E-1, 3.08980793E-1},
+{1.80859819E-1, 2.43579060E-1, 2.96631068E-1},
+{2.76152968E-1, 3.08256060E-1, 3.46822590E-1},
+{1.37115732E-1, 1.80057764E-1, 2.20953465E-1},
+{1.81370094E-1, 2.26770103E-1, 2.70392686E-1},
+{1.25246510E-1, 1.79606944E-1, 3.10376436E-1},
+{1.90708354E-1, 2.87734240E-1, 3.13476235E-1},
+{1.30486086E-1, 1.60435289E-1, 3.00243706E-1},
+{1.97318628E-1, 2.56378502E-1, 2.78474301E-1},
+{1.58597067E-1, 2.37381399E-1, 2.62910336E-1},
+{2.61825919E-1, 2.77717203E-1, 3.31382245E-1},
+{1.64160743E-1, 1.85841531E-1, 2.35615849E-1},
+{2.09486142E-1, 2.21452802E-1, 2.92153865E-1},
+{1.66807845E-1, 2.13641763E-1, 2.70675927E-1},
+{2.29834273E-1, 2.88374633E-1, 3.06238323E-1},
+{1.82154253E-1, 2.00822473E-1, 2.40169376E-1},
+{2.24944726E-1, 2.69813925E-1, 2.91401237E-1},
+{1.63940564E-1, 2.50341147E-1, 2.78307766E-1},
+{2.56727993E-1, 2.95103759E-1, 3.53297085E-1},
+{1.40218839E-1, 1.76687688E-1, 2.46773273E-1},
+{2.15291306E-1, 2.29216009E-1, 2.64283627E-1},
+{1.21002659E-1, 2.18333840E-1, 3.22341293E-1},
+{2.54243195E-1, 2.73986191E-1, 2.96262473E-1},
+{1.60385415E-1, 1.83762908E-1, 2.81598717E-1},
+{1.87832162E-1, 2.37420350E-1, 3.29777509E-1},
+{1.77788362E-1, 2.26703495E-1, 3.02322537E-1},
+{2.75108218E-1, 2.93730587E-1, 3.12373787E-1},
+{1.70116410E-1, 1.85232103E-1, 2.46125028E-1},
+{2.21754774E-1, 2.39912242E-1, 2.86891907E-1},
+{1.95083722E-1, 2.08337873E-1, 2.88349718E-1},
+{2.37536535E-1, 2.75004476E-1, 3.39786023E-1},
+{1.88369319E-1, 2.04371840E-1, 2.57375032E-1},
+{2.47250155E-1, 2.60551840E-1, 3.02137524E-1},
+{1.66944191E-1, 2.46912360E-1, 3.18894416E-1},
+{2.78118610E-1, 3.13011140E-1, 3.65329295E-1},
+{1.45213529E-1, 1.63051456E-1, 2.24912614E-1},
+{2.05692515E-1, 2.20831484E-1, 2.52817810E-1},
+{1.21125661E-1, 1.96374118E-1, 3.00122708E-1},
+{2.15566799E-1, 2.65657336E-1, 2.99202889E-1},
+{1.09134212E-1, 1.78472102E-1, 2.88323194E-1},
+{2.03508541E-1, 2.40347922E-1, 2.96309739E-1},
+{1.53101787E-1, 2.25415319E-1, 2.84843713E-1},
+{2.50233442E-1, 2.77736932E-1, 3.24840695E-1},
+{1.66308925E-1, 1.94173396E-1, 2.11635381E-1},
+{2.01289460E-1, 2.26062179E-1, 2.93246478E-1},
+{1.49518773E-1, 2.14201719E-1, 2.83894747E-1},
+{2.21836135E-1, 2.85231501E-1, 3.20082635E-1},
+{1.89573213E-1, 2.06577629E-1, 2.30332345E-1},
+{2.31247649E-1, 2.46864259E-1, 2.89846569E-1},
+{1.39116928E-1, 2.59189934E-1, 2.98019558E-1},
+{2.44512573E-1, 2.82671362E-1, 3.61258298E-1},
+{1.22530967E-1, 1.68514788E-1, 2.70879298E-1},
+{2.04372838E-1, 2.30398357E-1, 2.71792918E-1},
+{1.42643943E-1, 2.22405583E-1, 2.92057186E-1},
+{2.42643669E-1, 2.77429372E-1, 2.97135502E-1},
+{1.52048603E-1, 1.96921080E-1, 2.61013240E-1},
+{2.17875019E-1, 2.45840371E-1, 3.08138579E-1},
+{1.90109268E-1, 2.31099129E-1, 2.80178159E-1},
+{2.54314184E-1, 2.94079810E-1, 3.39649171E-1},
+{1.56698599E-1, 2.08597451E-1, 2.28010774E-1},
+{2.25088730E-1, 2.50014484E-1, 2.76250154E-1},
+{1.78219035E-1, 1.98228240E-1, 3.04198891E-1},
+{2.08567217E-1, 2.92395383E-1, 3.46786886E-1},
+{1.71052113E-1, 2.03438759E-1, 2.62644321E-1},
+{2.30275467E-1, 2.58817524E-1, 3.11986536E-1},
+{1.85333565E-1, 2.45760202E-1, 3.10553998E-1},
+{2.89413869E-1, 3.11095625E-1, 3.46476167E-1},
+{1.50332406E-1, 1.67538226E-1, 2.40182847E-1},
+{1.79971650E-1, 2.37168610E-1, 2.60899693E-1},
+{1.49866179E-1, 1.97890073E-1, 3.07916552E-1},
+{2.10799649E-1, 2.88180083E-1, 3.29747230E-1},
+{1.31711140E-1, 1.65906459E-1, 3.22898000E-1},
+{2.14832023E-1, 2.52822131E-1, 2.97547072E-1},
+{1.83760419E-1, 2.37523615E-1, 2.74610013E-1},
+{2.55575180E-1, 2.75439233E-1, 3.46021861E-1},
+{1.82662204E-1, 1.99470907E-1, 2.16051653E-1},
+{2.09240332E-1, 2.22406715E-1, 3.02382857E-1},
+{1.84088245E-1, 2.11327791E-1, 2.82538086E-1},
+{2.41171077E-1, 2.97036022E-1, 3.15979272E-1},
+{1.96804658E-1, 2.11815894E-1, 2.41647676E-1},
+{2.42761984E-1, 2.58586556E-1, 2.93204397E-1},
+{1.58905461E-1, 2.65077025E-1, 2.89881319E-1},
+{2.58060575E-1, 3.18903178E-1, 3.47846836E-1},
+{1.48766384E-1, 1.66853935E-1, 2.66827434E-1},
+{2.15942249E-1, 2.29938298E-1, 2.76041597E-1},
+{1.38410494E-1, 2.39283442E-1, 3.27972382E-1},
+{2.43765280E-1, 2.88408488E-1, 3.06048721E-1},
+{1.70157120E-1, 1.89986289E-1, 2.81219155E-1},
+{2.19117031E-1, 2.58005291E-1, 3.26571971E-1},
+{1.92163572E-1, 2.23614186E-1, 2.98683077E-1},
+{2.73545444E-1, 3.12078089E-1, 3.30766588E-1},
+{1.62452087E-1, 2.04930902E-1, 2.53337711E-1},
+{2.23855302E-1, 2.37671077E-1, 3.03202003E-1},
+{1.93955287E-1, 2.12335557E-1, 3.07566851E-1},
+{2.29912683E-1, 2.97581047E-1, 3.37499231E-1},
+{1.89335391E-1, 2.04148144E-1, 2.78609782E-1},
+{2.42303565E-1, 2.73163110E-1, 3.15361649E-1},
+{1.55009672E-1, 2.88095146E-1, 3.35996419E-1},
+{2.73716152E-1, 3.31215471E-1, 3.62539083E-1},
+{1.52389362E-1, 1.72619134E-1, 1.90585673E-1},
+{1.96988270E-1, 2.26309747E-1, 2.46197492E-1},
+{1.20555148E-1, 2.06369758E-1, 2.81199783E-1},
+{1.93709418E-1, 2.71900505E-1, 3.01332921E-1},
+{1.36701152E-1, 1.54093146E-1, 2.82258362E-1},
+{1.97299168E-1, 2.53656298E-1, 2.90315062E-1},
+{1.43463776E-1, 2.43872911E-1, 2.75533706E-1},
+{2.58477271E-1, 2.73279876E-1, 3.21119100E-1},
+{1.54406175E-1, 1.93793535E-1, 2.15884149E-1},
+{2.05979452E-1, 2.24277020E-1, 2.85732359E-1},
+{1.74535319E-1, 2.08482355E-1, 2.79668540E-1},
+{2.18844578E-1, 2.72486299E-1, 3.27095598E-1},
+{1.77609727E-1, 2.12990195E-1, 2.39119649E-1},
+{2.29163751E-1, 2.59165913E-1, 2.83514649E-1},
+{1.57353148E-1, 2.39961296E-1, 3.04263145E-1},
+{2.45613828E-1, 3.16824526E-1, 3.42909366E-1},
+{1.42953232E-1, 1.61905348E-1, 2.53710240E-1},
+{2.10192814E-1, 2.22847700E-1, 2.71103770E-1},
+{1.26843944E-1, 2.16709048E-1, 2.97734648E-1},
+{2.31000140E-1, 2.80109137E-1, 2.99707443E-1},
+{1.52980462E-1, 1.93996876E-1, 2.72895664E-1},
+{2.12860718E-1, 2.41545349E-1, 3.16518754E-1},
+{1.71154693E-1, 2.22469687E-1, 2.93786496E-1},
+{2.51988232E-1, 3.04254979E-1, 3.31269950E-1},
+{1.33188918E-1, 2.07924992E-1, 2.55362093E-1},
+{2.12044910E-1, 2.42189646E-1, 2.88903743E-1},
+{1.84612468E-1, 2.01143622E-1, 2.86360770E-1},
+{2.18286708E-1, 2.76752442E-1, 3.44581515E-1},
+{1.83562174E-1, 1.99478507E-1, 2.62156576E-1},
+{2.33130530E-1, 2.49596909E-1, 3.15842837E-1},
+{1.89898983E-1, 2.46874869E-1, 2.97132462E-1},
+{2.75022447E-1, 3.22490305E-1, 3.46977681E-1},
+{1.42305329E-1, 1.92689180E-1, 2.16155857E-1},
+{1.95676163E-1, 2.22268641E-1, 2.76587397E-1},
+{1.33241490E-1, 1.97791785E-1, 3.22897941E-1},
+{1.84865132E-1, 2.97106177E-1, 3.26105148E-1},
+{1.50203660E-1, 1.76781267E-1, 2.91536182E-1},
+{2.03144446E-1, 2.59616166E-1, 2.99156040E-1},
+{1.65488973E-1, 2.38342047E-1, 2.87493914E-1},
+{2.71071255E-1, 2.89544493E-1, 3.19521040E-1},
+{1.68598369E-1, 1.98825568E-1, 2.30347604E-1},
+{2.13811651E-1, 2.34471768E-1, 2.90959626E-1},
+{1.74605444E-1, 2.17256010E-1, 2.85688072E-1},
+{2.28503481E-1, 2.96190292E-1, 3.16534668E-1},
+{1.87172607E-1, 2.20547438E-1, 2.39688724E-1},
+{2.28884771E-1, 2.63583153E-1, 3.01329464E-1},
+{1.77897051E-1, 2.58131474E-1, 2.81487674E-1},
+{2.59513617E-1, 3.07204396E-1, 3.48793596E-1},
+{1.45224437E-1, 1.78715974E-1, 2.59186983E-1},
+{2.19062313E-1, 2.38223523E-1, 2.60461539E-1},
+{1.43650874E-1, 2.09760785E-1, 3.15830201E-1},
+{2.50127465E-1, 2.79182345E-1, 3.05153579E-1},
+{1.48986444E-1, 2.01226771E-1, 2.82543689E-1},
+{2.08387777E-1, 2.35603899E-1, 3.45363885E-1},
+{1.85830340E-1, 2.21607298E-1, 3.10773641E-1},
+{2.80904710E-1, 2.95469791E-1, 3.25499445E-1},
+{1.72967300E-1, 1.97078109E-1, 2.45801106E-1},
+{2.19495699E-1, 2.44767100E-1, 2.93587774E-1},
+{1.83909580E-1, 2.15004295E-1, 3.00334543E-1},
+{2.45338634E-1, 2.68595248E-1, 3.48330349E-1},
+{1.92957386E-1, 2.06625074E-1, 2.67336398E-1},
+{2.54845560E-1, 2.68642277E-1, 3.03547889E-1},
+{1.76853105E-1, 2.59330958E-1, 3.16200763E-1},
+{2.90929139E-1, 3.15634757E-1, 3.68723541E-1},
+{1.57116994E-1, 1.73552901E-1, 2.28736520E-1},
+{2.12509260E-1, 2.30501205E-1, 2.52217978E-1},
+{1.42521843E-1, 2.01979935E-1, 2.93012232E-1},
+{2.14919671E-1, 2.78065056E-1, 3.14176053E-1},
+{1.35947272E-1, 1.81055903E-1, 2.75475413E-1},
+{1.98416695E-1, 2.41673797E-1, 3.05173427E-1},
+{1.59517333E-1, 2.31580108E-1, 2.95412451E-1},
+{2.58203626E-1, 2.87348121E-1, 3.20351988E-1},
+{1.74840674E-1, 1.92883253E-1, 2.11250007E-1},
+{2.02168509E-1, 2.27025688E-1, 3.04884046E-1},
+{1.69532105E-1, 2.11826235E-1, 2.97355384E-1},
+{2.30033740E-1, 2.91504353E-1, 3.26589435E-1},
+{1.95046112E-1, 2.11709172E-1, 2.27705747E-1},
+{2.37926885E-1, 2.52411634E-1, 2.97752172E-1},
+{1.53762922E-1, 2.46541560E-1, 3.14768940E-1},
+{2.36075714E-1, 3.03568929E-1, 3.70624453E-1},
+{1.38660327E-1, 1.67949975E-1, 2.73515254E-1},
+{2.13806167E-1, 2.27267206E-1, 2.86276251E-1},
+{1.25080630E-1, 2.44098395E-1, 3.02548796E-1},
+{2.35714868E-1, 2.81208843E-1, 3.08903724E-1},
+{1.51691392E-1, 2.10877746E-1, 2.63812989E-1},
+{2.20730439E-1, 2.52777904E-1, 3.16413730E-1},
+{1.84924737E-1, 2.39424765E-1, 2.85120815E-1},
+{2.59548545E-1, 3.09809893E-1, 3.26423734E-1},
+{1.62930742E-1, 2.19900876E-1, 2.36148626E-1},
+{2.34194234E-1, 2.49944329E-1, 2.77549058E-1},
+{1.70870200E-1, 1.98291600E-1, 3.21412593E-1},
+{2.31566861E-1, 2.75015086E-1, 3.69710356E-1},
+{1.80002406E-1, 2.06701040E-1, 2.71204919E-1},
+{2.38075271E-1, 2.54006237E-1, 3.23827595E-1},
+{1.99148253E-1, 2.54273921E-1, 3.07479709E-1},
+{2.87428617E-1, 3.25045079E-1, 3.48634571E-1},
+{1.45285025E-1, 1.91359162E-1, 2.49691397E-1},
+{1.94659308E-1, 2.40821242E-1, 2.77302653E-1},
+{1.53150991E-1, 1.94375664E-1, 3.27550441E-1},
+{2.04085842E-1, 2.98595697E-1, 3.21480066E-1},
+{1.56009689E-1, 1.81012720E-1, 3.00931662E-1},
+{2.10962430E-1, 2.55770296E-1, 3.08086127E-1},
+{1.85444072E-1, 2.49021322E-1, 2.74029821E-1},
+{2.74493456E-1, 2.89441973E-1, 3.38794917E-1},
+{1.76941887E-1, 1.94476932E-1, 2.22077265E-1},
+{2.16377512E-1, 2.30735779E-1, 3.03689271E-1},
+{1.89683452E-1, 2.14660764E-1, 2.88445383E-1},
+{2.40827337E-1, 2.98141748E-1, 3.27378422E-1},
+{2.01787844E-1, 2.19441772E-1, 2.39327446E-1},
+{2.48812512E-1, 2.65865892E-1, 2.93382376E-1},
+{1.82027832E-1, 2.68279046E-1, 2.93991417E-1},
+{2.56498635E-1, 3.19984466E-1, 3.62663239E-1},
+{1.58799276E-1, 1.75433666E-1, 2.67389864E-1},
+{2.24259302E-1, 2.36668259E-1, 2.77639121E-1},
+{1.49203405E-1, 2.26585329E-1, 3.45255584E-1},
+{2.50655770E-1, 2.92264849E-1, 3.13574284E-1},
+{1.58096299E-1, 2.02193201E-1, 2.98711687E-1},
+{2.28820905E-1, 2.48557344E-1, 3.44726473E-1},
+{1.87972054E-1, 2.34109432E-1, 3.04235607E-1},
+{2.85657108E-1, 3.14878136E-1, 3.36931497E-1},
+{1.62680015E-1, 2.17820048E-1, 2.57436782E-1},
+{2.24049792E-1, 2.46739820E-1, 3.00795883E-1},
+{2.01354548E-1, 2.18286663E-1, 3.13036293E-1},
+{2.38028511E-1, 2.98103482E-1, 3.53503793E-1},
+{1.98829994E-1, 2.12877125E-1, 2.72980839E-1},
+{2.50616491E-1, 2.67659992E-1, 3.20611864E-1},
+{1.70901820E-1, 2.69330353E-1, 3.34428221E-1},
+{3.04988861E-1, 3.36196691E-1, 3.65235358E-1},
+{1.47624031E-1, 1.81272805E-1, 2.04707921E-1},
+{1.93751350E-1, 2.20973969E-1, 2.61775166E-1},
+{1.32089809E-1, 1.94851607E-1, 2.83547610E-1},
+{2.07739428E-1, 2.70596832E-1, 2.92264789E-1},
+{1.27733424E-1, 1.66896015E-1, 2.83891350E-1},
+{2.05309406E-1, 2.47807533E-1, 2.83632785E-1},
+{1.54211894E-1, 2.25014091E-1, 2.70082027E-1},
+{2.67574131E-1, 2.84426898E-1, 3.09334785E-1},
+{1.68846920E-1, 1.87004536E-1, 2.02433169E-1},
+{2.02441111E-1, 2.16733068E-1, 2.93079227E-1},
+{1.63621262E-1, 2.15616465E-1, 2.82792896E-1},
+{2.25509301E-1, 2.66283005E-1, 3.17886561E-1},
+{1.89110294E-1, 2.05609441E-1, 2.22113580E-1},
+{2.21240178E-1, 2.60288864E-1, 2.92541057E-1},
+{1.55563369E-1, 2.46850818E-1, 2.89648801E-1},
+{2.48406157E-1, 3.05291861E-1, 3.55316669E-1},
+{1.27122149E-1, 1.58053726E-1, 2.54164368E-1},
+{2.04998836E-1, 2.19476849E-1, 2.78342038E-1},
+{1.33302316E-1, 2.29614019E-1, 2.86947161E-1},
+{2.36777052E-1, 2.67918199E-1, 3.08230907E-1},
+{1.40853569E-1, 2.03414679E-1, 2.73257107E-1},
+{2.07684264E-1, 2.34520018E-1, 3.24583262E-1},
+{1.77181646E-1, 2.29595393E-1, 2.83539146E-1},
+{2.61378348E-1, 3.01160187E-1, 3.21707100E-1},
+{1.48595735E-1, 2.07772017E-1, 2.46946126E-1},
+{2.14334831E-1, 2.48061299E-1, 2.72259146E-1},
+{1.76380262E-1, 1.96897894E-1, 2.92286903E-1},
+{1.98193476E-1, 2.75483340E-1, 3.49037558E-1},
+{1.76153168E-1, 1.93248957E-1, 2.69548506E-1},
+{2.36968622E-1, 2.50065804E-1, 3.06820840E-1},
+{1.76060721E-1, 2.54037619E-1, 3.03566784E-1},
+{2.82952905E-1, 3.01765054E-1, 3.53956312E-1},
+{1.45353720E-1, 1.83678836E-1, 2.34750062E-1},
+{1.93842635E-1, 2.30635554E-1, 2.67817765E-1},
+{1.38958976E-1, 1.86760783E-1, 3.13113242E-1},
+{1.99944481E-1, 2.77624756E-1, 3.25046331E-1},
+{1.42966077E-1, 1.71310842E-1, 3.03013414E-1},
+{2.07741663E-1, 2.58691758E-1, 2.88766950E-1},
+{1.71776935E-1, 2.40246087E-1, 2.73284525E-1},
+{2.71046638E-1, 2.85170943E-1, 3.27401131E-1},
+{1.69854626E-1, 1.87545776E-1, 2.24484712E-1},
+{2.15221986E-1, 2.27339745E-1, 2.95008808E-1},
+{1.75596640E-1, 2.17936546E-1, 2.74879605E-1},
+{2.34665439E-1, 2.89530903E-1, 3.16494375E-1},
+{1.89946994E-1, 2.04953820E-1, 2.46955171E-1},
+{2.37297818E-1, 2.68316716E-1, 2.90684313E-1},
+{1.69963166E-1, 2.53367484E-1, 2.92533010E-1},
+{2.70659864E-1, 2.97146112E-1, 3.56183976E-1},
+{1.52539685E-1, 1.70138955E-1, 2.52703935E-1},
+{2.19119206E-1, 2.35900700E-1, 2.69739121E-1},
+{1.42245665E-1, 2.18184620E-1, 3.28218073E-1},
+{2.61472821E-1, 2.78025657E-1, 3.02375883E-1},
+{1.53526023E-1, 1.90727741E-1, 2.92820841E-1},
+{2.09240988E-1, 2.49808684E-1, 3.24709088E-1},
+{1.75176397E-1, 2.38646746E-1, 3.06392699E-1},
+{2.73218870E-1, 3.03954989E-1, 3.20513874E-1},
+{1.63911596E-1, 1.89611584E-1, 2.56272525E-1},
+{2.26953760E-1, 2.40120232E-1, 2.92728513E-1},
+{1.95565715E-1, 2.11956203E-1, 2.97374696E-1},
+{2.41045550E-1, 2.88497001E-1, 3.36352319E-1},
+{1.94948331E-1, 2.09475279E-1, 2.56309658E-1},
+{2.47884631E-1, 2.63356417E-1, 3.11270863E-1},
+{1.69189706E-1, 2.35864580E-1, 3.36249381E-1},
+{2.86001563E-1, 3.25423747E-1, 3.59607369E-1},
+{1.56258598E-1, 1.76704943E-1, 2.14393437E-1},
+{2.08996847E-1, 2.23968685E-1, 2.60886759E-1},
+{1.35765389E-1, 2.03580052E-1, 3.05503219E-1},
+{2.18961373E-1, 2.79463500E-1, 2.99450845E-1},
+{1.34064749E-1, 1.78332120E-1, 2.90169626E-1},
+{2.13298395E-1, 2.40031511E-1, 3.00345927E-1},
+{1.64373413E-1, 2.26438701E-1, 2.87171155E-1},
+{2.50739604E-1, 2.80812472E-1, 3.35349351E-1},
+{1.63649514E-1, 1.97108001E-1, 2.21165180E-1},
+{2.08139613E-1, 2.30869800E-1, 2.96137065E-1},
+{1.59113124E-1, 2.18189180E-1, 2.95531958E-1},
+{2.39883497E-1, 2.81831235E-1, 3.26045603E-1},
+{1.89394727E-1, 2.08127141E-1, 2.38446414E-1},
+{2.32995704E-1, 2.59603471E-1, 2.93427974E-1},
+{1.60558835E-1, 2.55164832E-1, 3.02872926E-1},
+{2.53509283E-1, 2.96028465E-1, 3.67721587E-1},
+{1.30124375E-1, 1.74838990E-1, 2.60486037E-1},
+{2.10203990E-1, 2.33570784E-1, 2.83061892E-1},
+{1.52365491E-1, 2.25338757E-1, 3.03720981E-1},
+{2.40558609E-1, 2.77192205E-1, 3.05891901E-1},
+{1.63728818E-1, 1.94779396E-1, 2.69253582E-1},
+{2.25709423E-1, 2.40902692E-1, 3.18060607E-1},
+{1.92055091E-1, 2.29857832E-1, 2.89826721E-1},
+{2.62759686E-1, 3.04292172E-1, 3.35680574E-1},
+{1.66071162E-1, 2.06819177E-1, 2.39712462E-1},
+{2.23915562E-1, 2.50106871E-1, 2.85296232E-1},
+{1.88402340E-1, 2.03793734E-1, 3.03041130E-1},
+{2.30698988E-1, 2.87044138E-1, 3.49802762E-1},
+{1.82025358E-1, 2.14073509E-1, 2.63470024E-1},
+{2.37297758E-1, 2.65025407E-1, 3.17815512E-1},
+{1.89278707E-1, 2.58802205E-1, 3.04866165E-1},
+{2.97243059E-1, 3.17153066E-1, 3.56583923E-1},
+{1.58607468E-1, 1.78659767E-1, 2.41919369E-1},
+{1.94887385E-1, 2.41695851E-1, 2.62176663E-1},
+{1.58124432E-1, 2.11753070E-1, 3.11352164E-1},
+{2.16902718E-1, 2.98796803E-1, 3.20994049E-1},
+{1.49272785E-1, 1.74964130E-1, 3.15334409E-1},
+{2.21622273E-1, 2.56179065E-1, 3.03902954E-1},
+{1.75979599E-1, 2.43505448E-1, 2.85801739E-1},
+{2.64590383E-1, 2.85541564E-1, 3.45107764E-1},
+{1.80137083E-1, 2.05279350E-1, 2.22255990E-1},
+{2.10796222E-1, 2.26315439E-1, 3.14426929E-1},
+{1.79151163E-1, 2.09439725E-1, 2.93280870E-1},
+{2.49719024E-1, 2.91257650E-1, 3.27162296E-1},
+{1.98700234E-1, 2.15896755E-1, 2.49960214E-1},
+{2.40726396E-1, 2.64857739E-1, 2.99639553E-1},
+{1.71249732E-1, 2.68166155E-1, 3.03572744E-1},
+{2.69555569E-1, 3.16100627E-1, 3.56570691E-1},
+{1.50564745E-1, 1.84190869E-1, 2.68674821E-1},
+{2.16941193E-1, 2.40813971E-1, 2.78942198E-1},
+{1.35399476E-1, 2.60586530E-1, 3.32604855E-1},
+{2.56150961E-1, 2.87822872E-1, 3.06156367E-1},
+{1.66398838E-1, 1.88721806E-1, 2.93023735E-1},
+{2.29214087E-1, 2.61565417E-1, 3.27494055E-1},
+{1.98266640E-1, 2.32970506E-1, 2.99134284E-1},
+{2.87046254E-1, 3.07103783E-1, 3.27298075E-1},
+{1.75898686E-1, 2.11898595E-1, 2.51332909E-1},
+{2.32067421E-1, 2.44622201E-1, 2.99443692E-1},
+{1.90780059E-1, 2.12090015E-1, 3.25059265E-1},
+{2.31531218E-1, 3.14166099E-1, 3.42735857E-1},
+{1.95099846E-1, 2.09554315E-1, 2.79483467E-1},
+{2.40416065E-1, 2.69604772E-1, 3.28015476E-1},
+{1.71800867E-1, 2.82233089E-1, 3.14749271E-1},
+{2.69243777E-1, 3.38462502E-1, 3.79935652E-1},
+{1.59934625E-1, 1.77966774E-1, 2.00818628E-1},
+{2.01979712E-1, 2.30668545E-1, 2.56773323E-1},
+{1.34024277E-1, 2.10961610E-1, 2.84687728E-1},
+{2.03712896E-1, 2.83053070E-1, 3.03309411E-1},
+{1.44528881E-1, 1.64728075E-1, 2.85079390E-1},
+{2.06285611E-1, 2.48649031E-1, 2.96383053E-1},
+{1.58138171E-1, 2.34317720E-1, 2.79650003E-1},
+{2.64995635E-1, 2.79900700E-1, 3.18619400E-1},
+{1.66537479E-1, 1.84279412E-1, 2.14547485E-1},
+{2.03051880E-1, 2.35110492E-1, 2.88755983E-1},
+{1.68422714E-1, 2.03946173E-1, 2.87478894E-1},
+{2.31727019E-1, 2.74086386E-1, 3.24755162E-1},
+{1.85356215E-1, 2.14113116E-1, 2.29030401E-1},
+{2.42482558E-1, 2.60655493E-1, 2.83030301E-1},
+{1.67562261E-1, 2.42027491E-1, 2.99461991E-1},
+{2.38809898E-1, 3.19003850E-1, 3.58415872E-1},
+{1.37908265E-1, 1.54787809E-1, 2.65611202E-1},
+{2.11019263E-1, 2.24607319E-1, 2.79954702E-1},
+{1.37569889E-1, 2.25128531E-1, 3.09312850E-1},
+{2.29239866E-1, 2.76150972E-1, 3.15241843E-1},
+{1.60487458E-1, 1.95461214E-1, 2.83169478E-1},
+{2.18505666E-1, 2.38197207E-1, 3.30340117E-1},
+{1.81991324E-1, 2.33026952E-1, 2.93276042E-1},
+{2.54552305E-1, 3.14394146E-1, 3.36392254E-1},
+{1.44095764E-1, 2.26640165E-1, 2.50595063E-1},
+{2.15188012E-1, 2.51417249E-1, 2.85043985E-1},
+{1.87674388E-1, 2.04458863E-1, 2.94168979E-1},
+{2.30494842E-1, 2.68452436E-1, 3.52370054E-1},
+{1.85022101E-1, 1.99075252E-1, 2.71930546E-1},
+{2.42569372E-1, 2.55389154E-1, 3.11399311E-1},
+{1.95166096E-1, 2.49102056E-1, 2.98998445E-1},
+{2.83654153E-1, 3.14600259E-1, 3.55619401E-1},
+{1.51490018E-1, 1.97729796E-1, 2.32467473E-1},
+{2.00029895E-1, 2.30101258E-1, 2.81933933E-1},
+{1.38711318E-1, 1.91816628E-1, 3.45780402E-1},
+{1.96580395E-1, 3.04714769E-1, 3.40553433E-1},
+{1.38154253E-1, 1.88543141E-1, 2.99461216E-1},
+{2.05666468E-1, 2.68904895E-1, 3.05537194E-1},
+{1.72447845E-1, 2.33558387E-1, 2.93625206E-1},
+{2.70145416E-1, 2.98654765E-1, 3.28556389E-1},
+{1.75489411E-1, 1.91361547E-1, 2.35585332E-1},
+{2.20548794E-1, 2.34773993E-1, 2.95397669E-1},
+{1.85652360E-1, 2.22349137E-1, 2.79883891E-1},
+{2.29456946E-1, 3.04546326E-1, 3.24684292E-1},
+{1.86900780E-1, 2.15469390E-1, 2.51856804E-1},
+{2.34910533E-1, 2.71217376E-1, 2.99894661E-1},
+{1.85142443E-1, 2.56071001E-1, 2.93291301E-1},
+{2.63883710E-1, 3.07127446E-1, 3.62546653E-1},
+{1.60997644E-1, 1.78937852E-1, 2.55808324E-1},
+{2.25671068E-1, 2.43735075E-1, 2.68624991E-1},
+{1.55076161E-1, 2.30396181E-1, 3.21005553E-1},
+{2.51760483E-1, 2.79653400E-1, 3.14202160E-1},
+{1.56988814E-1, 2.07466930E-1, 2.89933950E-1},
+{2.17479482E-1, 2.59626418E-1, 3.40659052E-1},
+{1.76811531E-1, 2.31087089E-1, 3.17562491E-1},
+{2.82952607E-1, 2.99844354E-1, 3.36822897E-1},
+{1.82060316E-1, 1.98734730E-1, 2.51980305E-1},
+{2.25874200E-1, 2.52469152E-1, 2.93356389E-1},
+{2.00799957E-1, 2.17786849E-1, 3.02210063E-1},
+{2.47423753E-1, 2.86882848E-1, 3.47820610E-1},
+{2.01128140E-1, 2.14746892E-1, 2.62269646E-1},
+{2.53963351E-1, 2.69477993E-1, 3.12133819E-1},
+{1.91034868E-1, 2.55738169E-1, 3.32559615E-1},
+{2.91053712E-1, 3.31458420E-1, 3.68588477E-1},
+{1.57229915E-1, 1.85374141E-1, 2.25361317E-1},
+{2.08051339E-1, 2.38350868E-1, 2.64212936E-1},
+{1.46848336E-1, 2.13000089E-1, 3.00192565E-1},
+{2.18630567E-1, 2.90263802E-1, 3.09045762E-1},
+{1.43699184E-1, 1.87815160E-1, 2.83769876E-1},
+{2.07328036E-1, 2.45088696E-1, 3.08956414E-1},
+{1.64228097E-1, 2.27826655E-1, 3.08907896E-1},
+{2.61919737E-1, 2.91333705E-1, 3.31527978E-1},
+{1.70648888E-1, 2.02157527E-1, 2.17827827E-1},
+{2.07796112E-1, 2.34704822E-1, 3.06783766E-1},
+{1.72118798E-1, 2.14057386E-1, 3.10151786E-1},
+{2.29116157E-1, 2.80949861E-1, 3.33774298E-1},
+{1.96622208E-1, 2.16653049E-1, 2.33279720E-1},
+{2.37789229E-1, 2.58971304E-1, 3.04609209E-1},
+{1.55182019E-1, 2.63032585E-1, 3.18943053E-1},
+{2.49388829E-1, 3.16970855E-1, 3.77762467E-1},
+{1.51363596E-1, 1.75010651E-1, 2.78245836E-1},
+{2.19810233E-1, 2.32360214E-1, 2.85034925E-1},
+{1.42630622E-1, 2.40602851E-1, 3.04125100E-1},
+{2.42764875E-1, 2.83762127E-1, 3.15481216E-1},
+{1.57467470E-1, 2.07524061E-1, 2.75674909E-1},
+{2.28758618E-1, 2.49092206E-1, 3.28139395E-1},
+{1.90872714E-1, 2.38125205E-1, 2.94894546E-1},
+{2.66389251E-1, 3.14321429E-1, 3.38669509E-1},
+{1.70644209E-1, 2.25980043E-1, 2.47372389E-1},
+{2.36442789E-1, 2.53003448E-1, 2.88220435E-1},
+{1.85423777E-1, 2.04888850E-1, 3.14608842E-1},
+{2.17379019E-1, 2.94553548E-1, 3.67831022E-1},
+{1.88563988E-1, 2.15174288E-1, 2.72999734E-1},
+{2.45102122E-1, 2.59770364E-1, 3.21885556E-1},
+{1.98444173E-1, 2.61160702E-1, 3.17097872E-1},
+{2.99013853E-1, 3.28965336E-1, 3.56681198E-1},
+{1.58248767E-1, 1.92205697E-1, 2.46059090E-1},
+{2.02385351E-1, 2.47965842E-1, 2.71749645E-1},
+{1.61710784E-1, 2.13708103E-1, 3.27384740E-1},
+{2.14419708E-1, 3.05552453E-1, 3.33721548E-1},
+{1.61819980E-1, 1.89897299E-1, 3.10501546E-1},
+{2.19436333E-1, 2.65029579E-1, 3.09288830E-1},
+{1.88303933E-1, 2.49633163E-1, 2.85499543E-1},
+{2.69325376E-1, 2.99807042E-1, 3.41722459E-1},
+{1.72406003E-1, 2.10977256E-1, 2.27773219E-1},
+{2.20281526E-1, 2.34015763E-1, 3.12846094E-1},
+{1.83257267E-1, 2.22061962E-1, 2.91052371E-1},
+{2.42531225E-1, 3.09527606E-1, 3.30389649E-1},
+{2.07546696E-1, 2.24662632E-1, 2.44420141E-1},
+{2.45858207E-1, 2.70285994E-1, 3.05132121E-1},
+{1.84840545E-1, 2.72096783E-1, 3.12531084E-1},
+{2.74252594E-1, 3.21252435E-1, 3.74658197E-1},
+{1.66425839E-1, 1.84491634E-1, 2.68278092E-1},
+{2.28423670E-1, 2.43025422E-1, 2.81184882E-1},
+{1.60091296E-1, 2.52953321E-1, 3.35822314E-1},
+{2.62109995E-1, 2.95581907E-1, 3.13354105E-1},
+{1.67702749E-1, 2.01536924E-1, 3.01801592E-1},
+{2.37822965E-1, 2.59894758E-1, 3.38231117E-1},
+{1.97206214E-1, 2.45490909E-1, 3.17895442E-1},
+{2.98455298E-1, 3.19209784E-1, 3.40971738E-1},
+{1.71195343E-1, 2.24327832E-1, 2.62736112E-1},
+{2.30626896E-1, 2.53310233E-1, 3.01206797E-1},
+{2.04814211E-1, 2.21881568E-1, 3.25966567E-1},
+{2.22987518E-1, 3.06339115E-1, 3.50717157E-1},
+{2.00855389E-1, 2.15359926E-1, 2.84143478E-1},
+{2.50951648E-1, 2.66189247E-1, 3.33360583E-1},
+{1.75610259E-1, 2.93791324E-1, 3.40326935E-1},
+{2.91745067E-1, 3.40602487E-1, 3.81397158E-1}};
+
+static const float evrc_lspq_full_codebook4[128][3] = {
+{2.77461529E-1, 3.16972077E-1, 3.95498335E-1},
+{3.36560428E-1, 3.60156953E-1, 3.81473005E-1},
+{3.10509324E-1, 3.31732392E-1, 3.66864383E-1},
+{3.37470949E-1, 3.96795273E-1, 4.12356317E-1},
+{2.79660404E-1, 3.66520107E-1, 3.85313451E-1},
+{3.16038966E-1, 3.85609329E-1, 4.01304781E-1},
+{3.09960425E-1, 3.43410730E-1, 4.24745500E-1},
+{3.54243636E-1, 4.08699274E-1, 4.22167957E-1},
+{2.95587242E-1, 3.33741128E-1, 3.87421668E-1},
+{3.33446383E-1, 3.86974752E-1, 4.01353061E-1},
+{3.23412836E-1, 3.65269661E-1, 3.85193288E-1},
+{3.42731953E-1, 4.03192520E-1, 4.19920385E-1},
+{2.77681828E-1, 3.82494986E-1, 4.04274166E-1},
+{3.18247974E-1, 3.95985305E-1, 4.31353152E-1},
+{3.03711414E-1, 3.80319715E-1, 4.37173545E-1},
+{3.78288805E-1, 4.07077312E-1, 4.22679126E-1},
+{2.38116503E-1, 3.42454314E-1, 4.24624741E-1},
+{3.45615685E-1, 3.68681073E-1, 4.00817335E-1},
+{3.17688107E-1, 3.41902673E-1, 4.05601799E-1},
+{3.66368949E-1, 3.89039934E-1, 4.06154454E-1},
+{2.99398005E-1, 3.52021694E-1, 3.99955690E-1},
+{3.24991941E-1, 3.90028834E-1, 4.19478714E-1},
+{3.23025763E-1, 3.68114293E-1, 4.02087748E-1},
+{3.62326264E-1, 4.16927993E-1, 4.32773650E-1},
+{2.72696435E-1, 3.59205008E-1, 4.26880658E-1},
+{3.46539855E-1, 3.69616628E-1, 4.15621221E-1},
+{3.34109128E-1, 3.55736315E-1, 3.96749556E-1},
+{3.37468982E-1, 4.10392702E-1, 4.25986826E-1},
+{2.99468994E-1, 3.80648255E-1, 4.18284118E-1},
+{3.21378171E-1, 4.11198020E-1, 4.28792536E-1},
+{3.27841163E-1, 3.69345129E-1, 4.34395611E-1},
+{3.80669057E-1, 4.26086366E-1, 4.42754567E-1},
+{2.68943667E-1, 3.42942953E-1, 3.98681462E-1},
+{3.38102877E-1, 3.76338840E-1, 3.92043173E-1},
+{3.23593497E-1, 3.48742068E-1, 3.72551978E-1},
+{3.47550809E-1, 3.92885387E-1, 4.21169937E-1},
+{3.04182827E-1, 3.59816670E-1, 3.81633341E-1},
+{3.14221382E-1, 4.02108550E-1, 4.20085251E-1},
+{3.01306546E-1, 3.62662733E-1, 4.29262817E-1},
+{3.71770263E-1, 3.98696363E-1, 4.31438982E-1},
+{2.74591267E-1, 3.35595489E-1, 4.20079648E-1},
+{3.44540834E-1, 3.90451789E-1, 4.06412065E-1},
+{3.25239837E-1, 3.78344476E-1, 3.94673288E-1},
+{3.56683493E-1, 3.90574157E-1, 4.33851063E-1},
+{2.63501287E-1, 3.95260096E-1, 4.23116386E-1},
+{3.37520659E-1, 3.92563462E-1, 4.43415821E-1},
+{3.14522266E-1, 3.80968630E-1, 4.22676384E-1},
+{3.76235068E-1, 4.17298734E-1, 4.31451261E-1},
+{2.61855006E-1, 3.68646085E-1, 4.04260576E-1},
+{3.55580151E-1, 3.77994478E-1, 3.95868242E-1},
+{3.27742815E-1, 3.53872776E-1, 4.11040604E-1},
+{3.62960637E-1, 3.99466991E-1, 4.14690197E-1},
+{3.09410870E-1, 3.73796046E-1, 3.92672479E-1},
+{3.31016302E-1, 4.00801599E-1, 4.31759298E-1},
+{3.23573053E-1, 3.68619561E-1, 4.17455137E-1},
+{3.49115849E-1, 4.26840067E-1, 4.43913996E-1},
+{2.89738595E-1, 3.63759339E-1, 4.10511792E-1},
+{3.55286479E-1, 3.89331281E-1, 4.13432419E-1},
+{3.36565912E-1, 3.60222459E-1, 4.24179018E-1},
+{3.39932680E-1, 4.09228802E-1, 4.40184891E-1},
+{3.00889730E-1, 4.00081098E-1, 4.17955697E-1},
+{3.17052066E-1, 4.22288120E-1, 4.42229569E-1},
+{3.27336788E-1, 3.84311676E-1, 4.30288613E-1},
+{3.98990929E-1, 4.29498434E-1, 4.43475187E-1},
+{2.49110118E-1, 3.25696886E-1, 4.11728263E-1},
+{3.45929205E-1, 3.68577540E-1, 3.88473272E-1},
+{3.13219666E-1, 3.39229465E-1, 3.87597919E-1},
+{3.51453960E-1, 3.98730278E-1, 4.12656188E-1},
+{2.93487132E-1, 3.75763118E-1, 3.94488096E-1},
+{3.24470758E-1, 3.94202888E-1, 4.08882737E-1},
+{3.12710822E-1, 3.57720256E-1, 4.14061189E-1},
+{3.66507173E-1, 4.08171296E-1, 4.23891425E-1},
+{2.99965680E-1, 3.31993401E-1, 4.07860160E-1},
+{3.34925175E-1, 3.86143029E-1, 4.11538124E-1},
+{3.34788024E-1, 3.66196156E-1, 3.93347144E-1},
+{3.47847939E-1, 4.05926466E-1, 4.30507302E-1},
+{2.85952926E-1, 3.95283282E-1, 4.16119337E-1},
+{3.23867381E-1, 4.06476676E-1, 4.42482829E-1},
+{3.16716671E-1, 3.84451628E-1, 4.39411044E-1},
+{3.86772931E-1, 4.11824584E-1, 4.27831531E-1},
+{2.38072395E-1, 3.62342358E-1, 4.30931687E-1},
+{3.46450031E-1, 3.79082918E-1, 4.06567812E-1},
+{3.16576600E-1, 3.56468618E-1, 3.96218300E-1},
+{3.66539180E-1, 3.89590919E-1, 4.21055555E-1},
+{3.08291376E-1, 3.71324301E-1, 4.07867432E-1},
+{3.36435199E-1, 3.91514421E-1, 4.22977090E-1},
+{3.23035538E-1, 3.80447328E-1, 4.09550190E-1},
+{3.65228057E-1, 4.27910388E-1, 4.43691254E-1},
+{2.72038043E-1, 3.76596808E-1, 4.33685899E-1},
+{3.57665777E-1, 3.77761602E-1, 4.09178972E-1},
+{3.36498559E-1, 3.64215910E-1, 4.09255505E-1},
+{3.48082423E-1, 4.17631805E-1, 4.33284521E-1},
+{3.02754521E-1, 3.95974755E-1, 4.33717251E-1},
+{3.31676304E-1, 4.17587161E-1, 4.36239839E-1},
+{3.33287597E-1, 3.80799115E-1, 4.39620733E-1},
+{3.88112009E-1, 4.36933577E-1, 4.50829268E-1},
+{2.56026626E-1, 3.48015189E-1, 4.22922611E-1},
+{3.45773995E-1, 3.81725788E-1, 3.96794081E-1},
+{3.25623751E-1, 3.50391924E-1, 3.87330651E-1},
+{3.56868088E-1, 3.98574769E-1, 4.23177242E-1},
+{3.01226199E-1, 3.86906981E-1, 4.03335571E-1},
+{3.28178406E-1, 4.02090192E-1, 4.19389248E-1},
+{3.14385355E-1, 3.69043887E-1, 4.34375286E-1},
+{3.72321129E-1, 4.11672413E-1, 4.40518737E-1},
+{2.90479720E-1, 3.48121881E-1, 4.26216483E-1},
+{3.44438791E-1, 3.82666349E-1, 4.17321086E-1},
+{3.34866822E-1, 3.76235664E-1, 4.04475212E-1},
+{3.59025359E-1, 4.04721916E-1, 4.34838414E-1},
+{2.79127955E-1, 4.11106586E-1, 4.35360551E-1},
+{3.48125517E-1, 3.98732066E-1, 4.46927428E-1},
+{3.27018857E-1, 3.90107334E-1, 4.41707492E-1},
+{3.90858352E-1, 4.19813931E-1, 4.35153484E-1},
+{2.55319297E-1, 3.70405972E-1, 4.32188630E-1},
+{3.54651988E-1, 3.88332665E-1, 4.02956128E-1},
+{3.21608186E-1, 3.54489803E-1, 4.28299785E-1},
+{3.75163496E-1, 3.98833990E-1, 4.14177418E-1},
+{3.11953604E-1, 3.91430676E-1, 4.12552476E-1},
+{3.42528820E-1, 3.96365345E-1, 4.32497382E-1},
+{3.33744347E-1, 3.76422405E-1, 4.20536995E-1},
+{3.53529096E-1, 4.29231048E-1, 4.59699273E-1},
+{2.88017929E-1, 3.77999961E-1, 4.34011698E-1},
+{3.55683446E-1, 3.80780041E-1, 4.23145533E-1},
+{3.44358265E-1, 3.72184873E-1, 4.31265354E-1},
+{3.53966117E-1, 4.14166689E-1, 4.42941308E-1},
+{3.04770231E-1, 4.12517488E-1, 4.34183121E-1},
+{3.35913360E-1, 4.24590766E-1, 4.46378469E-1},
+{3.43738198E-1, 3.84766221E-1, 4.35271382E-1},
+{4.10941303E-1, 4.40662980E-1, 4.52113390E-1}};
+
+static const float evrc_lspq_half_codebook1[128][3] = {
+{1.35226343E-2, 1.82081293E-2, 3.93940695E-2},
+{2.29392890E-2, 3.57831158E-2, 1.05352886E-1},
+{2.09106486E-2, 3.04159056E-2, 8.93941075E-2},
+{1.88909005E-2, 3.82722206E-2, 1.37820408E-1},
+{2.05143820E-2, 2.85481159E-2, 7.39762187E-2},
+{4.69510332E-2, 6.84031919E-2, 1.09123811E-1},
+{3.15557197E-2, 5.69139980E-2, 8.57057571E-2},
+{3.81181911E-2, 7.77784660E-2, 1.92532852E-1},
+{2.16297153E-2, 2.92908940E-2, 6.25042021E-2},
+{3.11414022E-2, 5.99079318E-2, 1.02860682E-1},
+{3.02799307E-2, 5.35012372E-2, 7.80925751E-2},
+{6.50846213E-2, 9.06624720E-2, 1.42850950E-1},
+{3.27340364E-2, 5.04027791E-2, 6.26492277E-2},
+{5.27439862E-2, 6.22574277E-2, 1.22198336E-1},
+{3.48840356E-2, 6.42222390E-2, 9.16024595E-2},
+{4.88984436E-2, 1.05058022E-1, 1.68813452E-1},
+{2.35791076E-2, 3.21034677E-2, 5.60899563E-2},
+{2.77252812E-2, 4.87281792E-2, 1.01224191E-1},
+{2.74348017E-2, 4.04965915E-2, 9.34926122E-2},
+{4.38360050E-2, 6.03261292E-2, 1.52400866E-1},
+{2.68994924E-2, 4.52906378E-2, 6.49800375E-2},
+{5.16058952E-2, 6.08312152E-2, 1.08799636E-1},
+{4.20064926E-2, 6.11845106E-2, 8.54474008E-2},
+{7.13502690E-2, 1.01972111E-1, 1.74640998E-1},
+{2.88906675E-2, 4.13964354E-2, 5.25928028E-2},
+{3.16364467E-2, 6.63532093E-2, 1.24950245E-1},
+{4.30289507E-2, 5.14023267E-2, 7.96877742E-2},
+{5.70970774E-2, 1.08444504E-1, 1.44075617E-1},
+{3.38840261E-2, 5.04746847E-2, 7.29765445E-2},
+{6.54265657E-2, 7.90987685E-2, 1.15570590E-1},
+{3.85423526E-2, 7.33125433E-2, 1.02307513E-1},
+{6.57824501E-2, 1.02909811E-1, 2.11874440E-1},
+{1.54727865E-2, 2.04559695E-2, 5.46121262E-2},
+{2.27950197E-2, 3.90954204E-2, 1.19443826E-1},
+{3.06889173E-2, 4.54540215E-2, 8.20418894E-2},
+{2.25957241E-2, 4.79101725E-2, 1.71844408E-1},
+{2.71088015E-2, 4.01739590E-2, 7.01922849E-2},
+{4.95789349E-2, 7.92963281E-2, 1.04862511E-1},
+{3.06095853E-2, 5.64059429E-2, 9.49584097E-2},
+{6.34224564E-2, 9.11655501E-2, 1.84724405E-1},
+{2.43342388E-2, 3.91998328E-2, 6.31406233E-2},
+{3.38011980E-2, 6.60846457E-2, 1.11031540E-1},
+{3.51784080E-2, 5.79397269E-2, 7.20702857E-2},
+{6.49054050E-2, 8.65831897E-2, 1.54648736E-1},
+{2.91934665E-2, 5.16204573E-2, 6.94437325E-2},
+{5.94522804E-2, 7.19829276E-2, 1.27434507E-1},
+{5.31888530E-2, 6.38182089E-2, 9.88218486E-2},
+{8.68290961E-2, 1.41135350E-1, 1.91728458E-1},
+{2.49991138E-2, 3.62556018E-2, 5.03724031E-2},
+{2.82246377E-2, 5.44572286E-2, 1.12663500E-1},
+{3.62618119E-2, 4.59073223E-2, 9.43343639E-2},
+{5.70455343E-2, 7.46300444E-2, 1.59157172E-1},
+{2.72987466E-2, 4.56625856E-2, 7.52529651E-2},
+{5.12860194E-2, 8.51126984E-2, 1.23587973E-1},
+{4.91451994E-2, 5.93483113E-2, 9.22686011E-2},
+{7.06961900E-2, 1.05451979E-1, 1.92602143E-1},
+{2.80733760E-2, 4.18509208E-2, 5.87159805E-2},
+{4.64449003E-2, 7.06698820E-2, 1.26038432E-1},
+{4.18453738E-2, 6.30445331E-2, 7.66169876E-2},
+{8.42416435E-2, 1.13282882E-1, 1.43687114E-1},
+{4.17615622E-2, 5.59472926E-2, 7.09872842E-2},
+{5.55161387E-2, 9.50126722E-2, 1.27727196E-1},
+{5.90935498E-2, 7.36730024E-2, 9.65935886E-2},
+{7.84136653E-2, 1.41432360E-1, 2.17428640E-1},
+{2.10490543E-2, 2.91891042E-2, 4.60035764E-2},
+{3.64863276E-2, 4.62387018E-2, 1.07044168E-1},
+{2.68652122E-2, 3.92937548E-2, 8.41179937E-2},
+{2.72903945E-2, 5.53805046E-2, 1.41586170E-1},
+{2.48476695E-2, 3.63277681E-2, 7.62430876E-2},
+{5.25430813E-2, 7.75778666E-2, 1.14567965E-1},
+{4.07741442E-2, 5.39923795E-2, 9.07640457E-2},
+{5.73043302E-2, 7.65803084E-2, 1.79578975E-1},
+{2.46032421E-2, 3.41408364E-2, 6.78990781E-2},
+{4.08220068E-2, 6.29783794E-2, 9.95191261E-2},
+{3.83025035E-2, 5.52857481E-2, 7.90019333E-2},
+{7.24111274E-2, 1.01903863E-1, 1.46979645E-1},
+{3.73902172E-2, 4.70463894E-2, 6.54684529E-2},
+{5.27397543E-2, 6.72770366E-2, 1.39680430E-1},
+{4.05365378E-2, 7.05081299E-2, 9.25668627E-2},
+{4.43425253E-2, 1.10367171E-1, 1.99636266E-1},
+{2.54920740E-2, 3.47603969E-2, 6.05902039E-2},
+{4.35465500E-2, 5.32369502E-2, 1.08325966E-1},
+{2.79599819E-2, 4.91324775E-2, 8.84284526E-2},
+{4.98051867E-2, 8.81728902E-2, 1.52597323E-1},
+{3.19346264E-2, 4.62169312E-2, 6.85206428E-2},
+{5.80246300E-2, 6.84268698E-2, 1.15085281E-1},
+{4.33904678E-2, 6.90575615E-2, 8.44984353E-2},
+{7.39691556E-2, 1.19240515E-1, 1.77340195E-1},
+{3.18767503E-2, 4.59697433E-2, 5.72372638E-2},
+{4.50873822E-2, 5.66509366E-2, 1.32005826E-1},
+{4.59097028E-2, 5.45580424E-2, 8.61423314E-2},
+{7.44685754E-2, 1.13815404E-1, 1.61570594E-1},
+{3.97509560E-2, 4.95359488E-2, 7.22542256E-2},
+{6.76257759E-2, 8.31029043E-2, 1.27990112E-1},
+{5.76258078E-2, 6.95326403E-2, 1.05012968E-1},
+{6.85313493E-2, 1.21758826E-1, 2.20626548E-1},
+{2.18480472E-2, 2.99130920E-2, 5.16208000E-2},
+{3.64343151E-2, 4.91795056E-2, 1.23277210E-1},
+{3.89611274E-2, 4.76634987E-2, 8.61716568E-2},
+{4.14635167E-2, 6.88006952E-2, 1.69356152E-1},
+{3.35514620E-2, 4.17815186E-2, 7.37159401E-2},
+{5.80224693E-2, 8.70314166E-2, 1.12917498E-1},
+{4.80243117E-2, 5.69486506E-2, 1.00755706E-1},
+{5.98873124E-2, 8.57942328E-2, 2.01388851E-1},
+{2.99309995E-2, 3.94828431E-2, 6.46376088E-2},
+{3.88626605E-2, 8.07443634E-2, 1.15519784E-1},
+{3.49444002E-2, 6.28911033E-2, 8.04982036E-2},
+{6.88817874E-2, 9.92431119E-2, 1.60393253E-1},
+{3.64237651E-2, 5.34016453E-2, 6.70152009E-2},
+{5.83492741E-2, 7.85285756E-2, 1.41746715E-1},
+{4.86469641E-2, 7.26736858E-2, 9.48315859E-2},
+{5.85533604E-2, 1.36289746E-1, 1.98639736E-1},
+{2.60888506E-2, 3.73406820E-2, 5.57853170E-2},
+{4.58504409E-2, 5.60512505E-2, 1.17927872E-1},
+{4.28801328E-2, 5.14739119E-2, 9.75309014E-2},
+{6.37611598E-2, 8.73552933E-2, 1.68334916E-1},
+{3.76709923E-2, 4.58216034E-2, 7.86528140E-2},
+{6.75194561E-2, 8.98697898E-2, 1.19418114E-1},
+{5.46374246E-2, 6.66805878E-2, 8.93813819E-2},
+{7.73086548E-2, 1.21754415E-1, 1.99579224E-1},
+{3.15621309E-2, 4.51702215E-2, 6.25768527E-2},
+{3.78782675E-2, 8.03486481E-2, 1.38961688E-1},
+{5.08303270E-2, 6.18740581E-2, 8.31153840E-2},
+{8.96311402E-2, 1.28753766E-1, 1.64891586E-1},
+{4.73503470E-2, 5.75724356E-2, 7.65264630E-2},
+{7.16898590E-2, 9.89895687E-2, 1.30078360E-1},
+{6.29082546E-2, 7.90778771E-2, 1.05111063E-1},
+{8.80649835E-2, 1.65206164E-1, 2.13214174E-1}};
+
+static const float evrc_lspq_half_codebook2[128][3] = {
+{9.75915268E-2, 1.23701490E-1, 1.69437975E-1},
+{9.49536338E-2, 2.01081768E-1, 2.26855248E-1},
+{9.00496617E-2, 1.49164870E-1, 2.26532787E-1},
+{1.70302704E-1, 1.97222874E-1, 2.49974832E-1},
+{1.08773641E-1, 1.51972428E-1, 1.75123364E-1},
+{1.30278930E-1, 2.13229164E-1, 2.29646355E-1},
+{1.24917991E-1, 1.87347755E-1, 2.04712003E-1},
+{2.00670198E-1, 2.28963569E-1, 2.69420803E-1},
+{8.98375586E-2, 1.25332758E-1, 2.10539430E-1},
+{9.62376669E-2, 2.07185850E-1, 2.54174471E-1},
+{1.05694629E-1, 1.78856418E-1, 2.00121015E-1},
+{1.56048968E-1, 2.19573721E-1, 2.91079402E-1},
+{1.37392268E-1, 1.59993336E-1, 1.94698542E-1},
+{1.07262500E-1, 2.37790957E-1, 2.70740807E-1},
+{1.42976448E-1, 2.01550499E-1, 2.18468934E-1},
+{2.14270487E-1, 2.71881402E-1, 3.01200211E-1},
+{1.10729210E-1, 1.33688226E-1, 1.54877156E-1},
+{1.06667660E-1, 1.76678821E-1, 2.62798905E-1},
+{9.16352943E-2, 1.74592838E-1, 2.19329327E-1},
+{1.84038624E-1, 2.27964059E-1, 2.47762203E-1},
+{1.10572360E-1, 1.58207163E-1, 1.96013063E-1},
+{1.33543387E-1, 2.32269660E-1, 2.51828164E-1},
+{1.55922309E-1, 1.77941337E-1, 2.18096644E-1},
+{1.92260072E-1, 2.49512479E-1, 2.89911509E-1},
+{1.13708906E-1, 1.37872443E-1, 2.02929884E-1},
+{1.02557532E-1, 1.84820071E-1, 2.92164624E-1},
+{1.36595622E-1, 1.58687428E-1, 2.41399556E-1},
+{1.72813818E-1, 2.49303415E-1, 3.00458610E-1},
+{1.36871174E-1, 1.57249823E-1, 2.10913152E-1},
+{1.28974810E-1, 2.45167866E-1, 2.67653584E-1},
+{1.66812256E-1, 1.88998029E-1, 2.31345922E-1},
+{2.32248470E-1, 2.63196051E-1, 3.16754937E-1},
+{9.24560949E-2, 1.19977452E-1, 1.91262275E-1},
+{1.13085262E-1, 2.08461538E-1, 2.29368120E-1},
+{1.00716405E-1, 1.40670076E-1, 2.58062959E-1},
+{1.67010382E-1, 2.18105540E-1, 2.62592494E-1},
+{1.25487238E-1, 1.62686959E-1, 1.84409231E-1},
+{1.52406558E-1, 2.07131729E-1, 2.47582436E-1},
+{1.37441203E-1, 1.80262372E-1, 2.17698842E-1},
+{2.07853511E-1, 2.49209508E-1, 2.69830108E-1},
+{9.35257301E-2, 1.49197355E-1, 2.04652041E-1},
+{1.11997180E-1, 2.25233063E-1, 2.47003049E-1},
+{1.09315015E-1, 1.93811879E-1, 2.13802189E-1},
+{1.75118580E-1, 2.52520263E-1, 2.75082767E-1},
+{1.36918738E-1, 1.77440569E-1, 1.97931141E-1},
+{1.36811242E-1, 2.37426177E-1, 2.84737825E-1},
+{1.60759792E-1, 2.00833157E-1, 2.18084484E-1},
+{2.33710244E-1, 2.66372561E-1, 2.91802049E-1},
+{1.19171090E-1, 1.39703169E-1, 1.87723249E-1},
+{1.31049946E-1, 1.93696663E-1, 2.60426998E-1},
+{1.08267047E-1, 1.65194795E-1, 2.39523023E-1},
+{2.03195021E-1, 2.25942209E-1, 2.49403238E-1},
+{1.23842932E-1, 1.45794615E-1, 2.15635628E-1},
+{1.71226338E-1, 2.38054529E-1, 2.57975638E-1},
+{1.66923836E-1, 1.88604668E-1, 2.11124212E-1},
+{2.10620746E-1, 2.62442708E-1, 2.83127964E-1},
+{1.05748810E-1, 1.36286482E-1, 2.20050186E-1},
+{9.72945765E-2, 2.33471528E-1, 2.96113968E-1},
+{1.34298369E-1, 1.93955436E-1, 2.39148825E-1},
+{1.64229318E-1, 2.70067751E-1, 2.94142485E-1},
+{1.42760262E-1, 1.65033355E-1, 2.24100381E-1},
+{1.46414533E-1, 2.47942328E-1, 3.00708115E-1},
+{1.74778774E-1, 2.19349250E-1, 2.38162965E-1},
+{2.36311123E-1, 2.90669680E-1, 3.28010976E-1},
+{1.14076428E-1, 1.33071408E-1, 1.73181504E-1},
+{1.13575839E-1, 1.90307274E-1, 2.41681188E-1},
+{8.59165266E-2, 1.63920239E-1, 2.37934500E-1},
+{1.92916945E-1, 2.15082392E-1, 2.39128128E-1},
+{1.37291834E-1, 1.59423307E-1, 1.79722220E-1},
+{1.40435383E-1, 2.22092256E-1, 2.40960747E-1},
+{1.40387163E-1, 1.89601168E-1, 2.05635697E-1},
+{2.11695507E-1, 2.36578360E-1, 2.81248927E-1},
+{9.03010592E-2, 1.27157405E-1, 2.33567923E-1},
+{1.10118054E-1, 2.09328398E-1, 2.72836268E-1},
+{1.16710417E-1, 1.77853987E-1, 2.22808748E-1},
+{1.81691542E-1, 2.32265159E-1, 2.74991214E-1},
+{1.46553472E-1, 1.69474706E-1, 1.90245956E-1},
+{1.09213792E-1, 2.63291955E-1, 2.88490772E-1},
+{1.49815127E-1, 2.11342707E-1, 2.28899449E-1},
+{1.97645500E-1, 2.83229947E-1, 3.14882278E-1},
+{1.24495603E-1, 1.46097973E-1, 1.66125208E-1},
+{1.34878591E-1, 1.83030054E-1, 2.89288282E-1},
+{9.33032110E-2, 1.83962211E-1, 2.38543004E-1},
+{1.92844257E-1, 2.39588335E-1, 2.58421540E-1},
+{1.23796798E-1, 1.65556595E-1, 2.08408386E-1},
+{1.51144341E-1, 2.35801116E-1, 2.59280622E-1},
+{1.50657728E-1, 1.90052524E-1, 2.28362590E-1},
+{1.98180959E-1, 2.56794214E-1, 3.08975637E-1},
+{1.28490031E-1, 1.49084017E-1, 1.98376507E-1},
+{9.20595750E-2, 2.12231293E-1, 2.92948842E-1},
+{1.41698137E-1, 1.72356680E-1, 2.58454144E-1},
+{1.96733460E-1, 2.29709730E-1, 2.95780182E-1},
+{1.47062227E-1, 1.68918088E-1, 2.07363635E-1},
+{1.36309877E-1, 2.60373056E-1, 2.82607377E-1},
+{1.81041077E-1, 2.01826140E-1, 2.38867551E-1},
+{2.45326266E-1, 2.80183077E-1, 3.11954319E-1},
+{1.04131766E-1, 1.33040652E-1, 1.89834684E-1},
+{1.23298146E-1, 2.09621087E-1, 2.47813210E-1},
+{1.24040775E-1, 1.59827366E-1, 2.58856058E-1},
+{1.87048867E-1, 2.12488100E-1, 2.59629130E-1},
+{1.24255307E-1, 1.73768952E-1, 1.92850024E-1},
+{1.58917829E-1, 2.25389823E-1, 2.43284762E-1},
+{1.53421149E-1, 1.91807315E-1, 2.09249526E-1},
+{2.27154449E-1, 2.51181155E-1, 2.72600353E-1},
+{1.09922059E-1, 1.57100275E-1, 2.20024973E-1},
+{1.32782355E-1, 2.19485506E-1, 2.67028928E-1},
+{1.26857504E-1, 1.98836312E-1, 2.17928499E-1},
+{1.91415027E-1, 2.52424240E-1, 2.72652745E-1},
+{1.55277625E-1, 1.79573521E-1, 2.00773627E-1},
+{1.17547743E-1, 2.47869864E-1, 3.08279335E-1},
+{1.65706977E-1, 2.10339502E-1, 2.29199320E-1},
+{2.25694910E-1, 2.84438193E-1, 3.12106073E-1},
+{1.29503176E-1, 1.48420051E-1, 1.80180401E-1},
+{1.54752508E-1, 1.97748467E-1, 2.67275035E-1},
+{1.28590241E-1, 1.76178381E-1, 2.39905864E-1},
+{2.14926764E-1, 2.37634435E-1, 2.58794010E-1},
+{1.28322318E-1, 1.59338519E-1, 2.26626605E-1},
+{1.55747548E-1, 2.47740522E-1, 2.73726821E-1},
+{1.75741687E-1, 1.97952345E-1, 2.19115943E-1},
+{2.18626365E-1, 2.45809183E-1, 3.00479650E-1},
+{1.17709018E-1, 1.45512864E-1, 2.38044471E-1},
+{1.18006893E-1, 2.23775521E-1, 2.94175088E-1},
+{1.51349202E-1, 1.88157812E-1, 2.48743281E-1},
+{1.89312205E-1, 2.69580543E-1, 2.93785989E-1},
+{1.49895594E-1, 1.74537256E-1, 2.37430006E-1},
+{1.39775530E-1, 2.71709383E-1, 3.07839513E-1},
+{1.83945730E-1, 2.07717165E-1, 2.26722151E-1},
+{2.54552156E-1, 2.96640933E-1, 3.24801445E-1}};
+
+static const float evrc_lspq_half_codebook3[256][4] = {
+{2.36904725E-1, 2.56104350E-1, 3.16955745E-1, 4.07520533E-1},
+{2.97596931E-1, 3.23482454E-1, 3.47667515E-1, 3.74551237E-1},
+{2.73721159E-1, 2.98297524E-1, 3.29923928E-1, 3.83599102E-1},
+{3.07849586E-1, 3.32836270E-1, 3.89340341E-1, 4.05575991E-1},
+{2.33803615E-1, 2.60296524E-1, 3.67351949E-1, 4.04388249E-1},
+{2.97513664E-1, 3.15356553E-1, 3.85135233E-1, 4.02197123E-1},
+{2.85618782E-1, 3.10872793E-1, 3.65022361E-1, 3.84816766E-1},
+{3.35271597E-1, 3.55222225E-1, 3.81921113E-1, 3.98685753E-1},
+{2.00265601E-1, 2.50502288E-1, 3.70398223E-1, 4.32012677E-1},
+{3.07982087E-1, 3.33767712E-1, 3.58199060E-1, 3.78386796E-1},
+{2.60086119E-1, 3.25520277E-1, 3.56873333E-1, 3.84737790E-1},
+{3.01356375E-1, 3.41369390E-1, 4.00296748E-1, 4.17337179E-1},
+{2.67080963E-1, 2.97674358E-1, 3.69702041E-1, 3.89139235E-1},
+{2.72669852E-1, 3.49704087E-1, 3.91925275E-1, 4.06383276E-1},
+{2.52825916E-1, 3.49636555E-1, 3.84550989E-1, 4.05930996E-1},
+{3.42927098E-1, 3.74274015E-1, 4.05468166E-1, 4.20351923E-1},
+{2.52408743E-1, 2.80375838E-1, 3.21436584E-1, 3.88436913E-1},
+{2.96970189E-1, 3.17173600E-1, 3.65342557E-1, 4.02736843E-1},
+{2.81905174E-1, 3.01479161E-1, 3.34335625E-1, 4.07633483E-1},
+{3.26872945E-1, 3.47177684E-1, 3.75017703E-1, 4.05372381E-1},
+{2.36371145E-1, 3.16441059E-1, 3.48707020E-1, 3.82030427E-1},
+{2.87817597E-1, 3.13627005E-1, 4.05129731E-1, 4.23379660E-1},
+{2.77502477E-1, 3.01843822E-1, 3.72250855E-1, 4.19212818E-1},
+{3.28988850E-1, 3.61901104E-1, 4.02015507E-1, 4.19229805E-1},
+{2.24960461E-1, 2.74636388E-1, 3.77016127E-1, 3.94726515E-1},
+{3.01045477E-1, 3.40486169E-1, 3.74888122E-1, 4.02532160E-1},
+{2.59897947E-1, 3.30334961E-1, 3.57493818E-1, 4.08657968E-1},
+{3.00961852E-1, 3.56449068E-1, 4.04779494E-1, 4.22508955E-1},
+{2.20979586E-1, 3.16477656E-1, 4.01744068E-1, 4.20735776E-1},
+{2.79754996E-1, 3.30776095E-1, 4.11152899E-1, 4.32687044E-1},
+{2.64246881E-1, 3.16610634E-1, 3.83876741E-1, 4.36683774E-1},
+{3.44381154E-1, 3.85365665E-1, 4.24949467E-1, 4.41560209E-1},
+{2.19488308E-1, 2.36459881E-1, 3.42465997E-1, 4.24989998E-1},
+{2.91465104E-1, 3.22282016E-1, 3.72852802E-1, 3.91635895E-1},
+{2.74792433E-1, 3.16536307E-1, 3.45392585E-1, 3.74555230E-1},
+{3.10583472E-1, 3.35264921E-1, 3.87527227E-1, 4.23076212E-1},
+{2.23211512E-1, 2.98497617E-1, 3.68426204E-1, 3.90213728E-1},
+{2.89078832E-1, 3.26512754E-1, 3.76308680E-1, 4.09553707E-1},
+{2.63830125E-1, 3.08977246E-1, 3.81453037E-1, 4.04660761E-1},
+{3.47073615E-1, 3.64797831E-1, 3.86763453E-1, 4.04511690E-1},
+{2.18452707E-1, 2.75614083E-1, 3.62711072E-1, 4.18278992E-1},
+{3.15042794E-1, 3.40813220E-1, 3.78627181E-1, 3.96316767E-1},
+{2.79727697E-1, 3.31259727E-1, 3.60061288E-1, 3.81175518E-1},
+{3.18602443E-1, 3.38044286E-1, 4.09010768E-1, 4.30300415E-1},
+{2.64196932E-1, 2.90672481E-1, 3.68595004E-1, 4.31856751E-1},
+{2.72645593E-1, 3.63514841E-1, 3.96518826E-1, 4.20091212E-1},
+{2.26540968E-1, 3.50055099E-1, 3.93851519E-1, 4.12597001E-1},
+{3.53053868E-1, 3.69929552E-1, 4.09656048E-1, 4.26387310E-1},
+{2.60788381E-1, 2.85172462E-1, 3.45943332E-1, 3.97500694E-1},
+{3.01113129E-1, 3.28201890E-1, 3.56068015E-1, 4.10803795E-1},
+{2.88101614E-1, 3.09559643E-1, 3.43756795E-1, 4.24872875E-1},
+{3.10489357E-1, 3.51421893E-1, 3.93717408E-1, 4.15550530E-1},
+{2.22308263E-1, 3.26798201E-1, 3.77981663E-1, 3.98635030E-1},
+{3.02915514E-1, 3.22781920E-1, 3.98558855E-1, 4.25489604E-1},
+{2.77136803E-1, 3.19992602E-1, 3.77490878E-1, 4.29177463E-1},
+{3.38731766E-1, 3.58164370E-1, 4.08386350E-1, 4.25495386E-1},
+{2.18726233E-1, 2.84384966E-1, 3.94053698E-1, 4.16346967E-1},
+{3.01005960E-1, 3.44093680E-1, 3.69013667E-1, 4.15091276E-1},
+{2.80783713E-1, 3.33053648E-1, 3.76726151E-1, 3.97526860E-1},
+{3.14394057E-1, 3.62678826E-1, 4.23668981E-1, 4.41899240E-1},
+{2.66453624E-1, 3.08513761E-1, 3.97407174E-1, 4.17450190E-1},
+{2.94222653E-1, 3.41904402E-1, 4.12726879E-1, 4.34888899E-1},
+{2.87300706E-1, 3.32434595E-1, 3.78856659E-1, 4.38234031E-1},
+{3.57146621E-1, 3.98147047E-1, 4.29875731E-1, 4.44243908E-1},
+{2.29671344E-1, 2.51018614E-1, 3.41046572E-1, 4.04376328E-1},
+{2.94472575E-1, 3.34944606E-1, 3.60409737E-1, 3.83682847E-1},
+{2.88250983E-1, 3.11722696E-1, 3.31680059E-1, 3.65104675E-1},
+{3.24881613E-1, 3.45656693E-1, 3.88306379E-1, 4.05954897E-1},
+{2.50829220E-1, 2.77623534E-1, 3.70799541E-1, 3.90479207E-1},
+{2.93523371E-1, 3.28319192E-1, 3.92112255E-1, 4.09464061E-1},
+{2.83608794E-1, 3.03885639E-1, 3.78504395E-1, 3.97310555E-1},
+{3.34039807E-1, 3.52837384E-1, 3.97272944E-1, 4.14322019E-1},
+{2.21891895E-1, 2.51877457E-1, 3.71723533E-1, 4.31791008E-1},
+{3.13201427E-1, 3.41175437E-1, 3.65503550E-1, 3.88567209E-1},
+{2.71330535E-1, 3.39163721E-1, 3.62616420E-1, 3.95735979E-1},
+{3.07550132E-1, 3.47777665E-1, 4.01049614E-1, 4.32767451E-1},
+{2.59387434E-1, 2.87243843E-1, 3.86817336E-1, 4.06042695E-1},
+{2.85485208E-1, 3.44094992E-1, 4.02050495E-1, 4.19413745E-1},
+{2.65781403E-1, 3.40084374E-1, 3.69407654E-1, 4.27031696E-1},
+{3.53740931E-1, 3.84463251E-1, 4.11747813E-1, 4.26181793E-1},
+{2.43866488E-1, 2.68350184E-1, 3.42201948E-1, 3.98457229E-1},
+{2.93145239E-1, 3.34754169E-1, 3.61702800E-1, 3.98416638E-1},
+{2.91342974E-1, 3.13155174E-1, 3.36525917E-1, 3.87748599E-1},
+{3.05656791E-1, 3.62904549E-1, 3.88153434E-1, 4.05543149E-1},
+{2.17492327E-1, 3.11723530E-1, 3.75984788E-1, 4.28997755E-1},
+{2.91149259E-1, 3.29380929E-1, 4.03900385E-1, 4.22333181E-1},
+{2.90362060E-1, 3.09530973E-1, 3.78994226E-1, 4.13688362E-1},
+{3.29564869E-1, 3.77404690E-1, 4.06584859E-1, 4.24739718E-1},
+{2.46461585E-1, 2.71593273E-1, 3.66338253E-1, 4.30753767E-1},
+{3.14107716E-1, 3.37011874E-1, 3.80409718E-1, 4.11099434E-1},
+{2.76568413E-1, 3.27320695E-1, 3.58844280E-1, 4.28949475E-1},
+{3.17179084E-1, 3.58972430E-1, 4.04765844E-1, 4.40376341E-1},
+{2.42777750E-1, 3.34954798E-1, 3.96943450E-1, 4.13318396E-1},
+{2.88895488E-1, 3.25691164E-1, 4.22859550E-1, 4.43758667E-1},
+{2.77583301E-1, 3.25479031E-1, 3.89144659E-1, 4.41075861E-1},
+{3.59125674E-1, 3.90694141E-1, 4.21009541E-1, 4.35708523E-1},
+{2.20172390E-1, 2.47719273E-1, 3.54381859E-1, 4.25398111E-1},
+{3.06046784E-1, 3.27924728E-1, 3.66992772E-1, 3.93192589E-1},
+{2.70805597E-1, 3.16826642E-1, 3.45648706E-1, 4.11717594E-1},
+{3.23188901E-1, 3.45463097E-1, 3.89778793E-1, 4.21570778E-1},
+{2.46136114E-1, 3.12391996E-1, 3.72188628E-1, 3.95842731E-1},
+{3.03856730E-1, 3.24354768E-1, 3.85747254E-1, 4.14155006E-1},
+{2.81075418E-1, 3.18608463E-1, 3.85646880E-1, 4.02703643E-1},
+{3.53517115E-1, 3.72702539E-1, 3.96264613E-1, 4.13074911E-1},
+{2.09221140E-1, 2.95262218E-1, 3.80314291E-1, 4.31278229E-1},
+{3.25313628E-1, 3.46735477E-1, 3.70724022E-1, 3.91045630E-1},
+{2.86396503E-1, 3.43560040E-1, 3.69713604E-1, 3.89867842E-1},
+{3.27794671E-1, 3.47367823E-1, 4.05465066E-1, 4.24566150E-1},
+{2.53054976E-1, 3.02656293E-1, 3.82165134E-1, 4.29898322E-1},
+{2.94418454E-1, 3.70745420E-1, 3.95443261E-1, 4.19514775E-1},
+{2.62873113E-1, 3.45069230E-1, 4.04140890E-1, 4.21902061E-1},
+{3.65063488E-1, 3.82435143E-1, 4.13424790E-1, 4.31241691E-1},
+{2.48788506E-1, 2.82372773E-1, 3.65772307E-1, 4.10981059E-1},
+{3.07288766E-1, 3.27828944E-1, 3.77664983E-1, 4.36220944E-1},
+{2.98542321E-1, 3.20627332E-1, 3.50569665E-1, 4.27620232E-1},
+{3.16258013E-1, 3.62903833E-1, 3.88225138E-1, 4.25608873E-1},
+{2.39077866E-1, 3.31310451E-1, 3.70317876E-1, 4.15995896E-1},
+{3.03735793E-1, 3.32806051E-1, 4.10232842E-1, 4.27751064E-1},
+{2.96002507E-1, 3.19014788E-1, 3.81062448E-1, 4.26954985E-1},
+{3.32508922E-1, 3.62516999E-1, 4.23315108E-1, 4.40995157E-1},
+{2.35128701E-1, 2.74731100E-1, 4.12070572E-1, 4.35478806E-1},
+{2.98073769E-1, 3.55338752E-1, 3.79087746E-1, 4.15318787E-1},
+{2.83429801E-1, 3.45264912E-1, 3.70376289E-1, 4.09900844E-1},
+{3.23593080E-1, 3.65412831E-1, 4.12813127E-1, 4.31023479E-1},
+{2.76626348E-1, 3.00508440E-1, 4.02236879E-1, 4.26638782E-1},
+{2.94512928E-1, 3.61443222E-1, 4.19635236E-1, 4.36999202E-1},
+{2.90807247E-1, 3.41689348E-1, 3.92779291E-1, 4.43490267E-1},
+{3.59391451E-1, 4.03985143E-1, 4.40843761E-1, 4.53028619E-1},
+{2.23295465E-1, 2.39192486E-1, 3.23768020E-1, 4.21689451E-1},
+{2.94778049E-1, 3.18798721E-1, 3.53217840E-1, 3.91906381E-1},
+{2.59032130E-1, 3.10240507E-1, 3.43569040E-1, 3.95064235E-1},
+{3.16474676E-1, 3.38544369E-1, 3.93329024E-1, 4.12235558E-1},
+{2.40108207E-1, 2.84631193E-1, 3.60280991E-1, 3.79973769E-1},
+{2.96909094E-1, 3.15798342E-1, 3.94964337E-1, 4.15127575E-1},
+{2.85434067E-1, 3.04921508E-1, 3.61974716E-1, 4.05767262E-1},
+{3.37407053E-1, 3.56672168E-1, 3.85155082E-1, 4.11186695E-1},
+{2.24014923E-1, 2.60116160E-1, 3.94772530E-1, 4.19585884E-1},
+{3.00647914E-1, 3.41640651E-1, 3.70223522E-1, 3.89520049E-1},
+{2.65946031E-1, 3.25039148E-1, 3.74339938E-1, 3.92346144E-1},
+{3.16029310E-1, 3.40491295E-1, 4.02355313E-1, 4.20484245E-1},
+{2.69841492E-1, 2.94562399E-1, 3.62341762E-1, 4.06415462E-1},
+{2.78897285E-1, 3.59831035E-1, 3.82025838E-1, 4.10577476E-1},
+{2.60760844E-1, 3.31088543E-1, 3.88826251E-1, 4.05486643E-1},
+{3.43372285E-1, 3.82647038E-1, 4.14716601E-1, 4.31592941E-1},
+{2.47998103E-1, 2.73393154E-1, 3.31160426E-1, 4.18943226E-1},
+{3.03579569E-1, 3.25202465E-1, 3.70984435E-1, 4.14420485E-1},
+{2.76896894E-1, 3.00499499E-1, 3.54178190E-1, 4.28807020E-1},
+{3.23655546E-1, 3.59816968E-1, 3.89525414E-1, 4.09288704E-1},
+{2.38927796E-1, 3.09919238E-1, 3.53915572E-1, 4.16634321E-1},
+{2.81171739E-1, 3.07520270E-1, 4.16264892E-1, 4.38523829E-1},
+{2.88858652E-1, 3.09810817E-1, 3.67845178E-1, 4.36035573E-1},
+{3.38423491E-1, 3.70634377E-1, 4.15449977E-1, 4.31534529E-1},
+{2.41260394E-1, 2.73617864E-1, 3.89554620E-1, 4.12539542E-1},
+{2.98046708E-1, 3.40122104E-1, 3.86183739E-1, 4.13826346E-1},
+{2.82436430E-1, 3.31597507E-1, 3.57941389E-1, 4.12115216E-1},
+{3.03820193E-1, 3.70588601E-1, 4.05774951E-1, 4.31517065E-1},
+{2.39077732E-1, 3.11638474E-1, 4.13935781E-1, 4.35304046E-1},
+{2.67116845E-1, 3.41937900E-1, 4.17409420E-1, 4.39184844E-1},
+{2.67946839E-1, 3.33343923E-1, 3.86481404E-1, 4.37462509E-1},
+{3.40510964E-1, 3.90878022E-1, 4.35485125E-1, 4.49101925E-1},
+{2.10069850E-1, 2.32524484E-1, 3.61781418E-1, 4.31357861E-1},
+{2.94509888E-1, 3.33709776E-1, 3.82278621E-1, 3.98638904E-1},
+{2.80525148E-1, 3.25905204E-1, 3.50647032E-1, 3.92873943E-1},
+{3.19999635E-1, 3.43674660E-1, 3.91070545E-1, 4.37501073E-1},
+{2.20581010E-1, 3.03151906E-1, 3.81765544E-1, 4.04488146E-1},
+{2.86122739E-1, 3.29746544E-1, 3.88102829E-1, 4.24247742E-1},
+{2.69807100E-1, 3.25332284E-1, 3.79154503E-1, 4.15138245E-1},
+{3.34858894E-1, 3.69258404E-1, 3.94743145E-1, 4.11922157E-1},
+{2.07109794E-1, 2.72779524E-1, 3.78566444E-1, 4.34579968E-1},
+{3.06466222E-1, 3.46695721E-1, 3.87138307E-1, 4.03558314E-1},
+{2.70148575E-1, 3.46654534E-1, 3.77696693E-1, 3.96434486E-1},
+{3.18745911E-1, 3.40225697E-1, 4.14991558E-1, 4.41578746E-1},
+{2.58592844E-1, 3.14370096E-1, 3.65083754E-1, 4.21615183E-1},
+{2.82712996E-1, 3.54137123E-1, 4.06745970E-1, 4.29267883E-1},
+{2.52021760E-1, 3.59105110E-1, 3.95102918E-1, 4.18148398E-1},
+{3.54906201E-1, 3.74952912E-1, 4.18965995E-1, 4.36144412E-1},
+{2.64841139E-1, 2.92941809E-1, 3.27751458E-1, 4.08790469E-1},
+{3.07774246E-1, 3.35586190E-1, 3.62209618E-1, 4.25394237E-1},
+{2.88466334E-1, 3.16075742E-1, 3.60989630E-1, 4.19551432E-1},
+{3.17128420E-1, 3.55772197E-1, 4.05808747E-1, 4.23972964E-1},
+{2.47089684E-1, 3.38184595E-1, 3.71859610E-1, 3.95971477E-1},
+{3.07981730E-1, 3.32691789E-1, 4.00534213E-1, 4.38273668E-1},
+{2.79484808E-1, 3.16183507E-1, 3.97237718E-1, 4.34746623E-1},
+{3.44490469E-1, 3.66153181E-1, 4.10959423E-1, 4.41727102E-1},
+{2.35741779E-1, 2.94587255E-1, 3.98072541E-1, 4.16833401E-1},
+{3.14038455E-1, 3.52272034E-1, 3.79138887E-1, 4.10969079E-1},
+{2.83002496E-1, 3.38136256E-1, 3.88641894E-1, 4.06193316E-1},
+{3.23625326E-1, 3.50243390E-1, 4.28089559E-1, 4.46630359E-1},
+{2.61252105E-1, 3.24970961E-1, 4.00214493E-1, 4.25321758E-1},
+{3.05284500E-1, 3.42164159E-1, 4.24475133E-1, 4.43830967E-1},
+{2.87374794E-1, 3.32500637E-1, 3.94308269E-1, 4.42538500E-1},
+{3.74075353E-1, 4.02026355E-1, 4.30933535E-1, 4.44160044E-1},
+{2.34503999E-1, 2.56218612E-1, 3.41238797E-1, 4.23045278E-1},
+{3.05492580E-1, 3.29156995E-1, 3.52709830E-1, 3.92439067E-1},
+{2.81323552E-1, 3.03292334E-1, 3.48925412E-1, 3.93163860E-1},
+{3.21893454E-1, 3.50419939E-1, 3.97317469E-1, 4.14560318E-1},
+{2.39684582E-1, 2.92451501E-1, 3.78937423E-1, 3.96535456E-1},
+{3.07307243E-1, 3.29127908E-1, 3.98455560E-1, 4.16143298E-1},
+{2.85274565E-1, 3.08774531E-1, 3.92916501E-1, 4.14437652E-1},
+{3.44446361E-1, 3.62201869E-1, 3.97619784E-1, 4.17743623E-1},
+{2.32083067E-1, 2.67807961E-1, 3.78075659E-1, 4.34560895E-1},
+{3.04738700E-1, 3.51865292E-1, 3.75973165E-1, 3.95293653E-1},
+{2.61990905E-1, 3.46207321E-1, 3.71296942E-1, 4.12438929E-1},
+{3.11080933E-1, 3.51040900E-1, 4.16082799E-1, 4.34340119E-1},
+{2.74980426E-1, 2.96631455E-1, 3.87520492E-1, 4.09243762E-1},
+{2.90939093E-1, 3.54455590E-1, 3.93426955E-1, 4.08220291E-1},
+{2.71871865E-1, 3.45510781E-1, 3.87125313E-1, 4.22590613E-1},
+{3.63245904E-1, 3.81932199E-1, 4.04114902E-1, 4.18370664E-1},
+{2.45770738E-1, 2.72909343E-1, 3.48317921E-1, 4.25161839E-1},
+{3.14139009E-1, 3.37872326E-1, 3.65195215E-1, 4.04423416E-1},
+{2.94075787E-1, 3.16935539E-1, 3.43047202E-1, 4.06130373E-1},
+{3.14627469E-1, 3.72413397E-1, 4.00660694E-1, 4.17930841E-1},
+{2.34014243E-1, 3.14007223E-1, 3.83003533E-1, 4.34829175E-1},
+{2.93635666E-1, 3.20529997E-1, 4.10837352E-1, 4.36393142E-1},
+{2.89505839E-1, 3.11828852E-1, 3.86311471E-1, 4.38771248E-1},
+{3.26317430E-1, 3.80858183E-1, 4.19721425E-1, 4.38795507E-1},
+{2.50809520E-1, 2.83018053E-1, 3.82247388E-1, 4.34244394E-1},
+{3.18994045E-1, 3.44855130E-1, 3.72690141E-1, 4.23067033E-1},
+{2.88380086E-1, 3.36622238E-1, 3.69742334E-1, 4.25057590E-1},
+{3.06107700E-1, 3.81856918E-1, 4.18206155E-1, 4.32868361E-1},
+{2.33898312E-1, 3.44861805E-1, 4.12176549E-1, 4.29216206E-1},
+{2.85980880E-1, 3.42903793E-1, 4.25112903E-1, 4.44299698E-1},
+{2.79858828E-1, 3.38789344E-1, 3.92085373E-1, 4.40541029E-1},
+{3.64509344E-1, 3.82202744E-1, 4.29830611E-1, 4.45818365E-1},
+{2.34392300E-1, 2.57377386E-1, 3.59567046E-1, 4.30088580E-1},
+{3.05031896E-1, 3.27589393E-1, 3.78305554E-1, 4.01026130E-1},
+{2.77522624E-1, 3.18130314E-1, 3.67794275E-1, 4.01543021E-1},
+{3.33035767E-1, 3.55820954E-1, 3.87548923E-1, 4.24628675E-1},
+{2.45021001E-1, 3.12560678E-1, 3.91147614E-1, 4.08762813E-1},
+{2.97059119E-1, 3.40246916E-1, 3.92919302E-1, 4.28899705E-1},
+{2.77839303E-1, 3.25019777E-1, 3.97436380E-1, 4.15920913E-1},
+{3.49465251E-1, 3.70362461E-1, 3.95482540E-1, 4.31923389E-1},
+{2.31485590E-1, 2.91023374E-1, 3.77909541E-1, 4.32259738E-1},
+{3.19283485E-1, 3.53671074E-1, 3.80982876E-1, 3.97843361E-1},
+{2.89689243E-1, 3.50265682E-1, 3.80729675E-1, 3.97969365E-1},
+{3.28987300E-1, 3.52005422E-1, 4.12557244E-1, 4.37597930E-1},
+{2.76273251E-1, 3.02267194E-1, 3.81723404E-1, 4.34989095E-1},
+{2.79627264E-1, 3.73727322E-1, 4.12374616E-1, 4.30626333E-1},
+{2.53442764E-1, 3.65940034E-1, 4.14937019E-1, 4.32743609E-1},
+{3.76107216E-1, 3.95142019E-1, 4.16787744E-1, 4.33023572E-1},
+{2.62815833E-1, 2.88270533E-1, 3.47397208E-1, 4.24182594E-1},
+{3.01931322E-1, 3.43652546E-1, 3.77031326E-1, 4.34204459E-1},
+{2.97834277E-1, 3.23495388E-1, 3.64492416E-1, 4.33550835E-1},
+{3.31774473E-1, 3.64324927E-1, 3.98243546E-1, 4.35078323E-1},
+{2.49049723E-1, 3.27870786E-1, 3.83587003E-1, 4.35558081E-1},
+{3.04653406E-1, 3.27671230E-1, 4.18484688E-1, 4.41378772E-1},
+{2.96960890E-1, 3.23898911E-1, 3.90463710E-1, 4.39915955E-1},
+{3.43923748E-1, 3.67100477E-1, 4.29523230E-1, 4.45214987E-1},
+{2.59399652E-1, 2.91602671E-1, 4.04372454E-1, 4.31413233E-1},
+{2.97537506E-1, 3.57573807E-1, 3.88991833E-1, 4.30006981E-1},
+{2.84068942E-1, 3.49574566E-1, 3.81042838E-1, 4.29712772E-1},
+{3.25716257E-1, 3.74875903E-1, 4.31959271E-1, 4.47290838E-1},
+{2.65302956E-1, 3.14745963E-1, 4.16703463E-1, 4.37294722E-1},
+{3.00398588E-1, 3.54147255E-1, 4.28538084E-1, 4.60336387E-1},
+{2.98077166E-1, 3.49304914E-1, 4.00429249E-1, 4.48213518E-1},
+{3.75576198E-1, 4.16657329E-1, 4.42136765E-1, 4.52728629E-1}};
+
+static const float evrc_lspq_quant_codebook1[16][5] = {
+{0.42091064E-1, 0.69474973E-1, 0.11168948E+0, 0.14571965E+0, 0.20893581E+0},
+{0.54944664E-1, 0.98242261E-1, 0.11007882E+0, 0.15890779E+0, 0.20548241E+0},
+{0.45188572E-1, 0.75199433E-1, 0.11423391E+0, 0.15469728E+0, 0.19746706E+0},
+{0.49474996E-1, 0.79667501E-1, 0.12571351E+0, 0.16944779E+0, 0.20775315E+0},
+{0.41789379E-1, 0.63459560E-1, 0.12068028E+0, 0.15850765E+0, 0.20406815E+0},
+{0.47159236E-1, 0.79129547E-1, 0.12183110E+0, 0.15650047E+0, 0.22309226E+0},
+{0.54539919E-1, 0.80343045E-1, 0.12947764E+0, 0.15186153E+0, 0.20171718E+0},
+{0.55852082E-1, 0.94114847E-1, 0.14016025E+0, 0.17807084E+0, 0.22955489E+0},
+{0.45443531E-1, 0.73541410E-1, 0.11937657E+0, 0.15442030E+0, 0.21010752E+0},
+{0.63178010E-1, 0.95231488E-1, 0.12364983E+0, 0.17672543E+0, 0.21743731E+0},
+{0.52765369E-1, 0.84351443E-1, 0.11589085E+0, 0.15790924E+0, 0.20732352E+0},
+{0.51865745E-1, 0.81328541E-1, 0.13756232E+0, 0.18322878E+0, 0.21640070E+0},
+{0.44419531E-1, 0.68874463E-1, 0.13115251E+0, 0.16263582E+0, 0.21659100E+0},
+{0.49378436E-1, 0.81882551E-1, 0.13067168E+0, 0.16821896E+0, 0.23136081E+0},
+{0.55909779E-1, 0.90783298E-1, 0.13348848E+0, 0.16298474E+0, 0.20961523E+0},
+{0.61378211E-1, 0.98602772E-1, 0.14793332E+0, 0.19283190E+0, 0.23156509E+0}};
+
+static const float evrc_lspq_quant_codebook2[16][5] = {
+{0.26822963, 0.30585295, 0.31110349, 0.36823335, 0.40774474},
+{0.24418014, 0.28970167, 0.32573757, 0.39021483, 0.41345838},
+{0.23341830, 0.30078292, 0.32893899, 0.38557330, 0.41068462},
+{0.25905868, 0.29756859, 0.34196618, 0.38531172, 0.41295227},
+{0.24290450, 0.29223618, 0.32718554, 0.37788135, 0.40332928},
+{0.24674191, 0.29749370, 0.33631226, 0.39426059, 0.42258954},
+{0.21377595, 0.33140418, 0.34067687, 0.38222077, 0.40939021},
+{0.26673481, 0.30791649, 0.34419721, 0.39611506, 0.42387524},
+{0.26121426, 0.30492544, 0.32997236, 0.38486803, 0.42023736},
+{0.24954870, 0.29372856, 0.33382735, 0.37850669, 0.41714057},
+{0.24158891, 0.30173415, 0.34128246, 0.38428575, 0.41619650},
+{0.25818908, 0.31736413, 0.34904337, 0.38769925, 0.41551358},
+{0.24450587, 0.30673453, 0.33579323, 0.37844428, 0.40557048},
+{0.25164026, 0.31225079, 0.33847794, 0.39554194, 0.42396802},
+{0.22787990, 0.31779197, 0.33831909, 0.40044111, 0.41185561},
+{0.27896860, 0.32261974, 0.35658112, 0.40206763, 0.42370448}};
+
+static const float * const evrc_lspq_full_codebooks[] = {
+    evrc_lspq_full_codebook1[0], evrc_lspq_full_codebook2[0],
+    evrc_lspq_full_codebook3[0], evrc_lspq_full_codebook4[0],
+};
+
+static const float * const evrc_lspq_half_codebooks[] = {
+    evrc_lspq_half_codebook1[0], evrc_lspq_half_codebook2[0],
+    evrc_lspq_half_codebook3[0],
+};
+
+static const float * const evrc_lspq_quant_codebooks[] = {
+    evrc_lspq_quant_codebook1[0], evrc_lspq_quant_codebook2[0],
+};
+
+static const float * const *evrc_lspq_codebooks[] = {
+    0,
+    evrc_lspq_quant_codebooks,
+    0,
+    evrc_lspq_half_codebooks,
+    evrc_lspq_full_codebooks,
+};
+
+static const uint8_t evrc_lspq_nb_codebooks[] = {
+    0,
+    FF_ARRAY_ELEMS(evrc_lspq_quant_codebooks),
+    0,
+    FF_ARRAY_ELEMS(evrc_lspq_half_codebooks),
+    FF_ARRAY_ELEMS(evrc_lspq_full_codebooks),
+};
+
+static const uint8_t evrc_lspq_full_codebooks_row_sizes[] = {
+    FF_ARRAY_ELEMS(evrc_lspq_full_codebook1[0]),
+    FF_ARRAY_ELEMS(evrc_lspq_full_codebook2[0]),
+    FF_ARRAY_ELEMS(evrc_lspq_full_codebook3[0]),
+    FF_ARRAY_ELEMS(evrc_lspq_full_codebook4[0]),
+};
+
+static const uint8_t evrc_lspq_half_codebooks_row_sizes[] = {
+    FF_ARRAY_ELEMS(evrc_lspq_half_codebook1[0]),
+    FF_ARRAY_ELEMS(evrc_lspq_half_codebook2[0]),
+    FF_ARRAY_ELEMS(evrc_lspq_half_codebook3[0]),
+};
+
+static const uint8_t evrc_lspq_quant_codebooks_row_sizes[] = {
+    FF_ARRAY_ELEMS(evrc_lspq_quant_codebook1[0]),
+    FF_ARRAY_ELEMS(evrc_lspq_quant_codebook2[0]),
+};
+
+static const uint8_t* const evrc_lspq_codebooks_row_sizes[] = {
+    NULL,
+    evrc_lspq_quant_codebooks_row_sizes,
+    NULL,
+    evrc_lspq_half_codebooks_row_sizes,
+    evrc_lspq_full_codebooks_row_sizes,
+};
+
+static const float pitch_gain_vq[] = { 0, 0.3, 0.55, 0.7, 0.8, 0.9, 1, 1.2 };
+static const float estimation_delay[] = { 55.0, 80.0, 39.0, 71.0, 33.0 }; // 5.2.3.4
+static const uint8_t subframe_sizes[] = { 53, 53, 54 };
+#endif /* AVCODEC_EVRCDATA_H */
diff --git a/libavcodec/evrcdec.c b/libavcodec/evrcdec.c
new file mode 100644
index 0000000..20d0fe7
--- /dev/null
+++ b/libavcodec/evrcdec.c
@@ -0,0 +1,917 @@
+/*
+ * Enhanced Variable Rate Codec, Service Option 3 decoder
+ * Copyright (c) 2013 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Enhanced Variable Rate Codec, Service Option 3 decoder
+ * @author Paul B Mahol
+ */
+
+#include "libavutil/mathematics.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "get_bits.h"
+#include "evrcdata.h"
+#include "acelp_vectors.h"
+#include "lsp.h"
+
+#define MIN_LSP_SEP (0.05 / (2.0 * M_PI))
+#define MIN_DELAY      20
+#define MAX_DELAY     120
+#define NB_SUBFRAMES    3
+#define SUBFRAME_SIZE  54
+#define FILTER_ORDER   10
+#define ACB_SIZE      128
+
+typedef enum {
+    RATE_ERRS = -1,
+    SILENCE,
+    RATE_QUANT,
+    RATE_QUARTER,
+    RATE_HALF,
+    RATE_FULL,
+} evrc_packet_rate;
+
+/**
+ * EVRC-A unpacked data frame
+ */
+typedef struct EVRCAFrame {
+    uint8_t  lpc_flag;        ///< spectral change indicator
+    uint16_t lsp[4];          ///< index into LSP codebook
+    uint8_t  pitch_delay;     ///< pitch delay for entire frame
+    uint8_t  delay_diff;      ///< delay difference for entire frame
+    uint8_t  acb_gain[3];     ///< adaptive codebook gain
+    uint16_t fcb_shape[3][4]; ///< fixed codebook shape
+    uint8_t  fcb_gain[3];     ///< fixed codebook gain index
+    uint8_t  energy_gain;     ///< frame energy gain index
+    uint8_t  tty;             ///< tty baud rate bit
+} EVRCAFrame;
+
+typedef struct EVRCContext {
+    GetBitContext    gb;
+    evrc_packet_rate bitrate;
+    evrc_packet_rate last_valid_bitrate;
+    EVRCAFrame       frame;
+
+    float            lspf[FILTER_ORDER];
+    float            prev_lspf[FILTER_ORDER];
+    float            synthesis[FILTER_ORDER];
+    float            postfilter_fir[FILTER_ORDER];
+    float            postfilter_iir[FILTER_ORDER];
+    float            postfilter_residual[ACB_SIZE + SUBFRAME_SIZE];
+    float            pitch_delay;
+    float            prev_pitch_delay;
+    float            avg_acb_gain;  ///< average adaptive codebook gain
+    float            avg_fcb_gain;  ///< average fixed codebook gain
+    float            pitch[ACB_SIZE + FILTER_ORDER + SUBFRAME_SIZE];
+    float            pitch_back[ACB_SIZE];
+    float            interpolation_coeffs[136];
+    float            energy_vector[NB_SUBFRAMES];
+    float            fade_scale;
+    float            last;
+
+    uint8_t          prev_energy_gain;
+    uint8_t          prev_error_flag;
+    uint8_t          warned_buf_mismatch_bitrate;
+} EVRCContext;
+
+/**
+ * Frame unpacking for RATE_FULL, RATE_HALF and RATE_QUANT
+ *
+ * @param e the context
+ *
+ * TIA/IS-127 Table 4.21-1
+ */
+static void unpack_frame(EVRCContext *e)
+{
+    EVRCAFrame *frame = &e->frame;
+    GetBitContext *gb = &e->gb;
+
+    switch (e->bitrate) {
+    case RATE_FULL:
+        frame->lpc_flag        = get_bits1(gb);
+        frame->lsp[0]          = get_bits(gb,  6);
+        frame->lsp[1]          = get_bits(gb,  6);
+        frame->lsp[2]          = get_bits(gb,  9);
+        frame->lsp[3]          = get_bits(gb,  7);
+        frame->pitch_delay     = get_bits(gb,  7);
+        frame->delay_diff      = get_bits(gb,  5);
+        frame->acb_gain[0]     = get_bits(gb,  3);
+        frame->fcb_shape[0][0] = get_bits(gb,  8);
+        frame->fcb_shape[0][1] = get_bits(gb,  8);
+        frame->fcb_shape[0][2] = get_bits(gb,  8);
+        frame->fcb_shape[0][3] = get_bits(gb, 11);
+        frame->fcb_gain[0]     = get_bits(gb,  5);
+        frame->acb_gain[1]     = get_bits(gb,  3);
+        frame->fcb_shape[1][0] = get_bits(gb,  8);
+        frame->fcb_shape[1][1] = get_bits(gb,  8);
+        frame->fcb_shape[1][2] = get_bits(gb,  8);
+        frame->fcb_shape[1][3] = get_bits(gb, 11);
+        frame->fcb_gain    [1] = get_bits(gb,  5);
+        frame->acb_gain    [2] = get_bits(gb,  3);
+        frame->fcb_shape[2][0] = get_bits(gb,  8);
+        frame->fcb_shape[2][1] = get_bits(gb,  8);
+        frame->fcb_shape[2][2] = get_bits(gb,  8);
+        frame->fcb_shape[2][3] = get_bits(gb, 11);
+        frame->fcb_gain    [2] = get_bits(gb,  5);
+        frame->tty             = get_bits1(gb);
+        break;
+    case RATE_HALF:
+        frame->lsp         [0] = get_bits(gb,  7);
+        frame->lsp         [1] = get_bits(gb,  7);
+        frame->lsp         [2] = get_bits(gb,  8);
+        frame->pitch_delay     = get_bits(gb,  7);
+        frame->acb_gain    [0] = get_bits(gb,  3);
+        frame->fcb_shape[0][0] = get_bits(gb, 10);
+        frame->fcb_gain    [0] = get_bits(gb,  4);
+        frame->acb_gain    [1] = get_bits(gb,  3);
+        frame->fcb_shape[1][0] = get_bits(gb, 10);
+        frame->fcb_gain    [1] = get_bits(gb,  4);
+        frame->acb_gain    [2] = get_bits(gb,  3);
+        frame->fcb_shape[2][0] = get_bits(gb, 10);
+        frame->fcb_gain    [2] = get_bits(gb,  4);
+        break;
+    case RATE_QUANT:
+        frame->lsp         [0] = get_bits(gb, 4);
+        frame->lsp         [1] = get_bits(gb, 4);
+        frame->energy_gain     = get_bits(gb, 8);
+        break;
+    }
+}
+
+static evrc_packet_rate buf_size2bitrate(const int buf_size)
+{
+    switch (buf_size) {
+    case 23: return RATE_FULL;
+    case 11: return RATE_HALF;
+    case  6: return RATE_QUARTER;
+    case  3: return RATE_QUANT;
+    case  1: return SILENCE;
+    }
+
+    return RATE_ERRS;
+}
+
+/**
+ * Determine the bitrate from the frame size and/or the first byte of the frame.
+ *
+ * @param avctx the AV codec context
+ * @param buf_size length of the buffer
+ * @param buf the bufffer
+ *
+ * @return the bitrate on success,
+ *         RATE_ERRS  if the bitrate cannot be satisfactorily determined
+ */
+static evrc_packet_rate determine_bitrate(AVCodecContext *avctx,
+                                          int *buf_size,
+                                          const uint8_t **buf)
+{
+    evrc_packet_rate bitrate;
+
+    if ((bitrate = buf_size2bitrate(*buf_size)) >= 0) {
+        if (bitrate > **buf) {
+            EVRCContext *e = avctx->priv_data;
+            if (!e->warned_buf_mismatch_bitrate) {
+                av_log(avctx, AV_LOG_WARNING,
+                       "Claimed bitrate and buffer size mismatch.\n");
+                e->warned_buf_mismatch_bitrate = 1;
+            }
+            bitrate = **buf;
+        } else if (bitrate < **buf) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Buffer is too small for the claimed bitrate.\n");
+            return RATE_ERRS;
+        }
+        (*buf)++;
+        *buf_size -= 1;
+    } else if ((bitrate = buf_size2bitrate(*buf_size + 1)) >= 0) {
+        av_log(avctx, AV_LOG_DEBUG,
+               "Bitrate byte is missing, guessing the bitrate from packet size.\n");
+    } else
+        return RATE_ERRS;
+
+    return bitrate;
+}
+
+static void warn_insufficient_frame_quality(AVCodecContext *avctx,
+                                            const char *message)
+{
+    av_log(avctx, AV_LOG_WARNING, "Frame #%d, %s\n",
+           avctx->frame_number, message);
+}
+
+/**
+ * Initialize the speech codec according to the specification.
+ *
+ * TIA/IS-127 5.2
+ */
+static av_cold int evrc_decode_init(AVCodecContext *avctx)
+{
+    EVRCContext *e = avctx->priv_data;
+    int i, n, idx = 0;
+    float denom = 2.0 / (2.0 * 8.0 + 1.0);
+
+    avctx->channels       = 1;
+    avctx->channel_layout = AV_CH_LAYOUT_MONO;
+    avctx->sample_fmt     = AV_SAMPLE_FMT_FLT;
+
+    for (i = 0; i < FILTER_ORDER; i++) {
+        e->prev_lspf[i] = (i + 1) * 0.048;
+        e->synthesis[i] = 0.0;
+    }
+
+    for (i = 0; i < ACB_SIZE; i++)
+        e->pitch[i] = e->pitch_back[i] = 0.0;
+
+    e->last_valid_bitrate = RATE_QUANT;
+    e->prev_pitch_delay   = 40.0;
+    e->fade_scale         = 1.0;
+    e->prev_error_flag    = 0;
+    e->avg_acb_gain = e->avg_fcb_gain = 0.0;
+
+    for (i = 0; i < 8; i++) {
+        float tt = ((float)i - 8.0 / 2.0) / 8.0;
+
+        for (n = -8; n <= 8; n++, idx++) {
+            float arg1 = M_PI * 0.9 * (tt - n);
+            float arg2 = M_PI * (tt - n);
+
+            e->interpolation_coeffs[idx] = 0.9;
+            if (arg1)
+                e->interpolation_coeffs[idx] *= (0.54 + 0.46 * cos(arg2 * denom)) *
+                                                 sin(arg1) / arg1;
+        }
+    }
+
+    return 0;
+}
+
+/**
+ * Decode the 10 vector quantized line spectral pair frequencies from the LSP
+ * transmission codes of any bitrate and check for badly received packets.
+ *
+ * @param e the context
+ *
+ * @return 0 on success, -1 if the packet is badly received
+ *
+ * TIA/IS-127 5.2.1, 5.7.1
+ */
+static int decode_lspf(EVRCContext *e)
+{
+    const float * const *codebooks = evrc_lspq_codebooks[e->bitrate];
+    int i, j, k = 0;
+
+    for (i = 0; i < evrc_lspq_nb_codebooks[e->bitrate]; i++) {
+        int row_size = evrc_lspq_codebooks_row_sizes[e->bitrate][i];
+        const float *codebook = codebooks[i];
+
+        for (j = 0; j < row_size; j++)
+            e->lspf[k++] = codebook[e->frame.lsp[i] * row_size + j];
+    }
+
+    // check for monotonic LSPs
+    for (i = 1; i < FILTER_ORDER; i++)
+        if (e->lspf[i] <= e->lspf[i - 1])
+            return -1;
+
+    // check for minimum separation of LSPs at the splits
+    for (i = 0, k = 0; i < evrc_lspq_nb_codebooks[e->bitrate] - 1; i++) {
+        k += evrc_lspq_codebooks_row_sizes[e->bitrate][i];
+        if (e->lspf[k] - e->lspf[k - 1] <= MIN_LSP_SEP)
+            return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Interpolation of LSP parameters.
+ *
+ * TIA/IS-127 5.2.3.1, 5.7.3.2
+ */
+static void interpolate_lsp(float *ilsp, const float *lsp,
+                            const float *prev, int index)
+{
+    static const float lsp_interpolation_factors[] = { 0.1667, 0.5, 0.8333 };
+    ff_weighted_vector_sumf(ilsp, prev, lsp,
+                            1.0 - lsp_interpolation_factors[index],
+                            lsp_interpolation_factors[index], FILTER_ORDER);
+}
+
+/*
+ * Reconstruction of the delay contour.
+ *
+ * TIA/IS-127 5.2.2.3.2
+ */
+static void interpolate_delay(float *dst, float current, float prev, int index)
+{
+    static const float d_interpolation_factors[] = { 0, 0.3313, 0.6625, 1, 1 };
+    dst[0] = (1.0 - d_interpolation_factors[index    ]) * prev
+                  + d_interpolation_factors[index    ]  * current;
+    dst[1] = (1.0 - d_interpolation_factors[index + 1]) * prev
+                  + d_interpolation_factors[index + 1]  * current;
+    dst[2] = (1.0 - d_interpolation_factors[index + 2]) * prev
+                  + d_interpolation_factors[index + 2]  * current;
+}
+
+/*
+ * Convert the quantized, interpolated line spectral frequencies,
+ * to prediction coefficients.
+ *
+ * TIA/IS-127 5.2.3.2, 4.7.2.2
+ */
+static void decode_predictor_coeffs(const float *ilspf, float *ilpc)
+{
+    double lsp[FILTER_ORDER];
+    float a[FILTER_ORDER / 2 + 1], b[FILTER_ORDER / 2 + 1];
+    float a1[FILTER_ORDER / 2] = { 0 };
+    float a2[FILTER_ORDER / 2] = { 0 };
+    float b1[FILTER_ORDER / 2] = { 0 };
+    float b2[FILTER_ORDER / 2] = { 0 };
+    int i, k;
+
+    ff_acelp_lsf2lspd(lsp, ilspf, FILTER_ORDER);
+
+    for (k = 0; k <= FILTER_ORDER; k++) {
+        a[0] = k < 2 ? 0.25 : 0;
+        b[0] = k < 2 ? k < 1 ? 0.25 : -0.25 : 0;
+
+        for (i = 0; i < FILTER_ORDER / 2; i++) {
+            a[i + 1] = a[i] - 2 * lsp[i * 2    ] * a1[i] + a2[i];
+            b[i + 1] = b[i] - 2 * lsp[i * 2 + 1] * b1[i] + b2[i];
+            a2[i] = a1[i];
+            a1[i] = a[i];
+            b2[i] = b1[i];
+            b1[i] = b[i];
+        }
+
+        if (k)
+            ilpc[k - 1] = 2.0 * (a[FILTER_ORDER / 2] + b[FILTER_ORDER / 2]);
+    }
+}
+
+static void bl_intrp(EVRCContext *e, float *ex, float delay)
+{
+    float *f;
+    int offset, i, coef_idx;
+    int16_t t;
+
+    offset = lrintf(delay);
+
+    t = (offset - delay + 0.5) * 8.0 + 0.5;
+    if (t == 8) {
+        t = 0;
+        offset--;
+    }
+
+    f = ex - offset - 8;
+
+    coef_idx = t * (2 * 8 + 1);
+
+    ex[0] = 0.0;
+    for (i = 0; i < 2 * 8 + 1; i++)
+        ex[0] += e->interpolation_coeffs[coef_idx + i] * f[i];
+}
+
+/*
+ * Adaptive codebook excitation.
+ *
+ * TIA/IS-127 5.2.2.3.3, 4.12.5.2
+ */
+static void acb_excitation(EVRCContext *e, float *excitation, float gain,
+                           const float delay[3], int length)
+{
+    float denom, locdelay, dpr, invl;
+    int i;
+
+    invl = 1.0 / ((float) length);
+    dpr = length;
+
+    /* first at-most extra samples */
+    denom = (delay[1] - delay[0]) * invl;
+    for (i = 0; i < dpr; i++) {
+        locdelay = delay[0] + i * denom;
+        bl_intrp(e, excitation + i, locdelay);
+    }
+
+    denom = (delay[2] - delay[1]) * invl;
+    /* interpolation */
+    for (i = dpr; i < dpr + 10; i++) {
+        locdelay = delay[1] + (i - dpr) * denom;
+        bl_intrp(e, excitation + i, locdelay);
+    }
+
+    for (i = 0; i < length; i++)
+        excitation[i] *= gain;
+}
+
+static void decode_8_pulses_35bits(const uint16_t *fixed_index, float *cod)
+{
+    int i, pos1, pos2, offset;
+
+    offset = (fixed_index[3] >> 9) & 3;
+
+    for (i = 0; i < 3; i++) {
+        pos1 = ((fixed_index[i] & 0x7f) / 11) * 5 + ((i + offset) % 5);
+        pos2 = ((fixed_index[i] & 0x7f) % 11) * 5 + ((i + offset) % 5);
+
+        cod[pos1] = (fixed_index[i] & 0x80) ? -1.0 : 1.0;
+
+        if (pos2 < pos1)
+            cod[pos2]  = -cod[pos1];
+        else
+            cod[pos2] +=  cod[pos1];
+    }
+
+    pos1 = ((fixed_index[3] & 0x7f) / 11) * 5 + ((3 + offset) % 5);
+    pos2 = ((fixed_index[3] & 0x7f) % 11) * 5 + ((4 + offset) % 5);
+
+    cod[pos1] = (fixed_index[3] & 0x100) ? -1.0 : 1.0;
+    cod[pos2] = (fixed_index[3] & 0x80 ) ? -1.0 : 1.0;
+}
+
+static void decode_3_pulses_10bits(uint16_t fixed_index, float *cod)
+{
+    float sign;
+    int pos;
+
+    sign = (fixed_index & 0x200) ? -1.0 : 1.0;
+
+    pos = ((fixed_index        & 0x7) * 7) + 4;
+    cod[pos] += sign;
+    pos = (((fixed_index >> 3) & 0x7) * 7) + 2;
+    cod[pos] -= sign;
+    pos = (((fixed_index >> 6) & 0x7) * 7);
+    cod[pos] += sign;
+}
+
+/*
+ * Reconstruction of ACELP fixed codebook excitation for full and half rate.
+ *
+ * TIA/IS-127 5.2.3.7
+ */
+static void fcb_excitation(EVRCContext *e, const uint16_t *codebook,
+                           float *excitation, float pitch_gain,
+                           int pitch_lag, int subframe_size)
+{
+    int i;
+
+    if (e->bitrate == RATE_FULL)
+        decode_8_pulses_35bits(codebook, excitation);
+    else
+        decode_3_pulses_10bits(*codebook, excitation);
+
+    pitch_gain = av_clipf(pitch_gain, 0.2, 0.9);
+
+    for (i = pitch_lag; i < subframe_size; i++)
+        excitation[i] += pitch_gain * excitation[i - pitch_lag];
+}
+
+/**
+ * Synthesis of the decoder output signal.
+ *
+ * param[in]     in              input signal
+ * param[in]     filter_coeffs   LPC coefficients
+ * param[in/out] memory          synthesis filter memory
+ * param         buffer_length   amount of data to process
+ * param[out]    samples         output samples
+ *
+ * TIA/IS-127 5.2.3.15, 5.7.3.4
+ */
+static void synthesis_filter(const float *in, const float *filter_coeffs,
+                             float *memory, int buffer_length, float *samples)
+{
+    int i, j;
+
+    for (i = 0; i < buffer_length; i++) {
+        samples[i] = in[i];
+        for (j = FILTER_ORDER - 1; j > 0; j--) {
+            samples[i] -= filter_coeffs[j] * memory[j];
+            memory[j]   = memory[j - 1];
+        }
+        samples[i] -= filter_coeffs[0] * memory[0];
+        memory[0]   = samples[i];
+    }
+}
+
+static void bandwidth_expansion(float *coeff, const float *inbuf, float gamma)
+{
+    double fac = gamma;
+    int i;
+
+    for (i = 0; i < FILTER_ORDER; i++) {
+        coeff[i] = inbuf[i] * fac;
+        fac *= gamma;
+    }
+}
+
+static void residual_filter(float *output, const float *input,
+                            const float *coef, float *memory, int length)
+{
+    float sum;
+    int i, j;
+
+    for (i = 0; i < length; i++) {
+        sum = input[i];
+
+        for (j = FILTER_ORDER - 1; j > 0; j--) {
+            sum      += coef[j] * memory[j];
+            memory[j] = memory[j - 1];
+        }
+        sum += coef[0] * memory[0];
+        memory[0] = input[i];
+        output[i] = sum;
+    }
+}
+
+/*
+ * TIA/IS-127 Table 5.9.1-1.
+ */
+static const struct PfCoeff {
+    float tilt;
+    float ltgain;
+    float p1;
+    float p2;
+} postfilter_coeffs[5] = {
+    { 0.0 , 0.0 , 0.0 , 0.0  },
+    { 0.0 , 0.0 , 0.57, 0.57 },
+    { 0.0 , 0.0 , 0.0 , 0.0  },
+    { 0.35, 0.50, 0.50, 0.75 },
+    { 0.20, 0.50, 0.57, 0.75 },
+};
+
+/*
+ * Adaptive postfilter.
+ *
+ * TIA/IS-127 5.9
+ */
+static void postfilter(EVRCContext *e, float *in, const float *coeff,
+                       float *out, int idx, const struct PfCoeff *pfc,
+                       int length)
+{
+    float wcoef1[FILTER_ORDER], wcoef2[FILTER_ORDER],
+          scratch[SUBFRAME_SIZE], temp[SUBFRAME_SIZE],
+          mem[SUBFRAME_SIZE];
+    float sum1 = 0.0, sum2 = 0.0, gamma, gain;
+    float tilt = pfc->tilt;
+    int i, n, best;
+
+    bandwidth_expansion(wcoef1, coeff, pfc->p1);
+    bandwidth_expansion(wcoef2, coeff, pfc->p2);
+
+    /* Tilt compensation filter, TIA/IS-127 5.9.1 */
+    for (i = 0; i < length - 1; i++)
+        sum2 += in[i] * in[i + 1];
+    if (sum2 < 0.0)
+        tilt = 0.0;
+
+    for (i = 0; i < length; i++) {
+        scratch[i] = in[i] - tilt * e->last;
+        e->last = in[i];
+    }
+
+    /* Short term residual filter, TIA/IS-127 5.9.2 */
+    residual_filter(&e->postfilter_residual[ACB_SIZE], scratch, wcoef1, e->postfilter_fir, length);
+
+    /* Long term postfilter */
+    best = idx;
+    for (i = FFMIN(MIN_DELAY, idx - 3); i <= FFMAX(MAX_DELAY, idx + 3); i++) {
+        for (n = ACB_SIZE, sum2 = 0; n < ACB_SIZE + length; n++)
+            sum2 += e->postfilter_residual[n] * e->postfilter_residual[n - i];
+        if (sum2 > sum1) {
+            sum1 = sum2;
+            best = i;
+        }
+    }
+
+    for (i = ACB_SIZE, sum1 = 0; i < ACB_SIZE + length; i++)
+        sum1 += e->postfilter_residual[i - best] * e->postfilter_residual[i - best];
+    for (i = ACB_SIZE, sum2 = 0; i < ACB_SIZE + length; i++)
+        sum2 += e->postfilter_residual[i] * e->postfilter_residual[i - best];
+
+    if (sum2 * sum1 == 0 || e->bitrate == RATE_QUANT) {
+        memcpy(temp, e->postfilter_residual + ACB_SIZE, length * sizeof(float));
+    } else {
+        gamma = sum2 / sum1;
+        if (gamma < 0.5)
+            memcpy(temp, e->postfilter_residual + ACB_SIZE, length * sizeof(float));
+        else {
+            gamma = FFMIN(gamma, 1.0);
+
+            for (i = 0; i < length; i++) {
+                temp[i] = e->postfilter_residual[ACB_SIZE + i] + gamma *
+                    pfc->ltgain * e->postfilter_residual[ACB_SIZE + i - best];
+            }
+        }
+    }
+
+    memcpy(scratch, temp, length * sizeof(float));
+    memcpy(mem, e->postfilter_iir, FILTER_ORDER * sizeof(float));
+    synthesis_filter(scratch, wcoef2, mem, length, scratch);
+
+    /* Gain computation, TIA/IS-127 5.9.4-2 */
+    for (i = 0, sum1 = 0, sum2 = 0; i < length; i++) {
+        sum1 += in[i] * in[i];
+        sum2 += scratch[i] * scratch[i];
+    }
+    gain = sum2 ? sqrt(sum1 / sum2) : 1.0;
+
+    for (i = 0; i < length; i++)
+        temp[i] *= gain;
+
+    /* Short term postfilter */
+    synthesis_filter(temp, wcoef2, e->postfilter_iir, length, out);
+
+    memmove(e->postfilter_residual,
+           e->postfilter_residual + length, ACB_SIZE * sizeof(float));
+}
+
+static void frame_erasure(EVRCContext *e, float *samples)
+{
+    float ilspf[FILTER_ORDER], ilpc[FILTER_ORDER], idelay[NB_SUBFRAMES],
+          tmp[SUBFRAME_SIZE + 6], f;
+    int i, j;
+
+    for (i = 0; i < FILTER_ORDER; i++) {
+        if (e->bitrate != RATE_QUANT)
+            e->lspf[i] = e->prev_lspf[i] * 0.875 + 0.125 * (i + 1) * 0.048;
+        else
+            e->lspf[i] = e->prev_lspf[i];
+    }
+
+    if (e->prev_error_flag)
+        e->avg_acb_gain *= 0.75;
+    if (e->bitrate == RATE_FULL)
+        memcpy(e->pitch_back, e->pitch, ACB_SIZE * sizeof(float));
+    if (e->last_valid_bitrate == RATE_QUANT)
+        e->bitrate = RATE_QUANT;
+    else
+        e->bitrate = RATE_FULL;
+
+    if (e->bitrate == RATE_FULL || e->bitrate == RATE_HALF) {
+        e->pitch_delay = e->prev_pitch_delay;
+    } else {
+        float sum = 0;
+
+        idelay[0] = idelay[1] = idelay[2] = MIN_DELAY;
+
+        for (i = 0; i < NB_SUBFRAMES; i++)
+            sum += evrc_energy_quant[e->prev_energy_gain][i];
+        sum /= (float) NB_SUBFRAMES;
+        sum  = pow(10, sum);
+        for (i = 0; i < NB_SUBFRAMES; i++)
+            e->energy_vector[i] = sum;
+    }
+
+    if (fabs(e->pitch_delay - e->prev_pitch_delay) > 15)
+        e->prev_pitch_delay = e->pitch_delay;
+
+    for (i = 0; i < NB_SUBFRAMES; i++) {
+        int subframe_size = subframe_sizes[i];
+        int pitch_lag;
+
+        interpolate_lsp(ilspf, e->lspf, e->prev_lspf, i);
+
+        if (e->bitrate != RATE_QUANT) {
+            if (e->avg_acb_gain < 0.3) {
+                idelay[0] = estimation_delay[i];
+                idelay[1] = estimation_delay[i + 1];
+                idelay[2] = estimation_delay[i + 2];
+            } else {
+                interpolate_delay(idelay, e->pitch_delay, e->prev_pitch_delay, i);
+            }
+        }
+
+        pitch_lag = lrintf((idelay[1] + idelay[0]) / 2.0);
+        decode_predictor_coeffs(ilspf, ilpc);
+
+        if (e->bitrate != RATE_QUANT) {
+            acb_excitation(e, e->pitch + ACB_SIZE,
+                           e->avg_acb_gain, idelay, subframe_size);
+            for (j = 0; j < subframe_size; j++)
+                e->pitch[ACB_SIZE + j] *= e->fade_scale;
+            e->fade_scale = FFMAX(e->fade_scale - 0.05, 0.0);
+        } else {
+            for (j = 0; j < subframe_size; j++)
+                e->pitch[ACB_SIZE + j] = e->energy_vector[i];
+        }
+
+        memmove(e->pitch, e->pitch + subframe_size, ACB_SIZE * sizeof(float));
+
+        if (e->bitrate != RATE_QUANT && e->avg_acb_gain < 0.4) {
+            f = 0.1 * e->avg_fcb_gain;
+            for (j = 0; j < subframe_size; j++)
+                e->pitch[ACB_SIZE + j] += f;
+        } else if (e->bitrate == RATE_QUANT) {
+            for (j = 0; j < subframe_size; j++)
+                e->pitch[ACB_SIZE + j] = e->energy_vector[i];
+        }
+
+        synthesis_filter(e->pitch + ACB_SIZE, ilpc,
+                         e->synthesis, subframe_size, tmp);
+        postfilter(e, tmp, ilpc, samples, pitch_lag,
+                   &postfilter_coeffs[e->bitrate], subframe_size);
+
+        samples += subframe_size;
+    }
+}
+
+static int evrc_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame_ptr, AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    AVFrame *frame     = data;
+    EVRCContext *e     = avctx->priv_data;
+    int buf_size       = avpkt->size;
+    float ilspf[FILTER_ORDER], ilpc[FILTER_ORDER], idelay[NB_SUBFRAMES];
+    float *samples;
+    int   i, j, ret, error_flag = 0;
+
+    frame->nb_samples = 160;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+    samples = (float *)frame->data[0];
+
+    if ((e->bitrate = determine_bitrate(avctx, &buf_size, &buf)) == RATE_ERRS) {
+        warn_insufficient_frame_quality(avctx, "bitrate cannot be determined.");
+        goto erasure;
+    }
+    if (e->bitrate <= SILENCE || e->bitrate == RATE_QUARTER)
+        goto erasure;
+    if (e->bitrate == RATE_QUANT && e->last_valid_bitrate == RATE_FULL
+                                 && !e->prev_error_flag)
+        goto erasure;
+
+    init_get_bits(&e->gb, buf, 8 * buf_size);
+    memset(&e->frame, 0, sizeof(EVRCAFrame));
+
+    unpack_frame(e);
+
+    if (e->bitrate != RATE_QUANT) {
+        uint8_t *p = (uint8_t *) &e->frame;
+        for (i = 0; i < sizeof(EVRCAFrame); i++) {
+            if (p[i])
+                break;
+        }
+        if (i == sizeof(EVRCAFrame))
+            goto erasure;
+    } else if (e->frame.lsp[0] == 0xf &&
+               e->frame.lsp[1] == 0xf &&
+               e->frame.energy_gain == 0xff) {
+        goto erasure;
+    }
+
+    if (decode_lspf(e) < 0)
+        goto erasure;
+
+    if (e->bitrate == RATE_FULL || e->bitrate == RATE_HALF) {
+        /* Pitch delay parameter checking as per TIA/IS-127 5.1.5.1 */
+        if (e->frame.pitch_delay > MAX_DELAY - MIN_DELAY)
+            goto erasure;
+
+        e->pitch_delay = e->frame.pitch_delay + MIN_DELAY;
+
+        /* Delay diff parameter checking as per TIA/IS-127 5.1.5.2 */
+        if (e->frame.delay_diff) {
+            int p = e->pitch_delay - e->frame.delay_diff + 16;
+            if (p < MIN_DELAY || p > MAX_DELAY)
+                goto erasure;
+        }
+
+        /* Delay contour reconstruction as per TIA/IS-127 5.2.2.2 */
+        if (e->frame.delay_diff &&
+            e->bitrate == RATE_FULL && e->prev_error_flag) {
+            float delay;
+
+            memcpy(e->pitch, e->pitch_back, ACB_SIZE * sizeof(float));
+
+            delay = e->prev_pitch_delay;
+            e->prev_pitch_delay = delay - e->frame.delay_diff + 16.0;
+
+            if (fabs(e->pitch_delay - delay) > 15)
+                delay = e->pitch_delay;
+
+            for (i = 0; i < NB_SUBFRAMES; i++) {
+                int subframe_size = subframe_sizes[i];
+
+                interpolate_delay(idelay, delay, e->prev_pitch_delay, i);
+                acb_excitation(e, e->pitch + ACB_SIZE, e->avg_acb_gain, idelay, subframe_size);
+                memmove(e->pitch, e->pitch + subframe_size, ACB_SIZE * sizeof(float));
+            }
+        }
+
+        /* Smoothing of the decoded delay as per TIA/IS-127 5.2.2.5 */
+        if (fabs(e->pitch_delay - e->prev_pitch_delay) > 15)
+            e->prev_pitch_delay = e->pitch_delay;
+
+        e->avg_acb_gain = e->avg_fcb_gain = 0.0;
+    } else {
+        idelay[0] = idelay[1] = idelay[2] = MIN_DELAY;
+
+        /* Decode frame energy vectors as per TIA/IS-127 5.7.2 */
+        for (i = 0; i < NB_SUBFRAMES; i++)
+            e->energy_vector[i] = pow(10, evrc_energy_quant[e->frame.energy_gain][i]);
+        e->prev_energy_gain = e->frame.energy_gain;
+    }
+
+    for (i = 0; i < NB_SUBFRAMES; i++) {
+        float tmp[SUBFRAME_SIZE + 6] = { 0 };
+        int subframe_size = subframe_sizes[i];
+        int pitch_lag;
+
+        interpolate_lsp(ilspf, e->lspf, e->prev_lspf, i);
+
+        if (e->bitrate != RATE_QUANT)
+            interpolate_delay(idelay, e->pitch_delay, e->prev_pitch_delay, i);
+
+        pitch_lag = lrintf((idelay[1] + idelay[0]) / 2.0);
+        decode_predictor_coeffs(ilspf, ilpc);
+
+        /* Bandwidth expansion as per TIA/IS-127 5.2.3.3 */
+        if (e->frame.lpc_flag && e->prev_error_flag)
+            bandwidth_expansion(ilpc, ilpc, 0.75);
+
+        if (e->bitrate != RATE_QUANT) {
+            float acb_sum, f;
+
+            f = exp((e->bitrate == RATE_HALF ? 0.5 : 0.25)
+                         * (e->frame.fcb_gain[i] + 1));
+            acb_sum = pitch_gain_vq[e->frame.acb_gain[i]];
+            e->avg_acb_gain += acb_sum / NB_SUBFRAMES;
+            e->avg_fcb_gain += f / NB_SUBFRAMES;
+
+            acb_excitation(e, e->pitch + ACB_SIZE,
+                           acb_sum, idelay, subframe_size);
+            fcb_excitation(e, e->frame.fcb_shape[i], tmp,
+                           acb_sum, pitch_lag, subframe_size);
+
+            /* Total excitation generation as per TIA/IS-127 5.2.3.9 */
+            for (j = 0; j < subframe_size; j++)
+                e->pitch[ACB_SIZE + j] += f * tmp[j];
+            e->fade_scale = FFMIN(e->fade_scale + 0.2, 1.0);
+        } else {
+            for (j = 0; j < subframe_size; j++)
+                e->pitch[ACB_SIZE + j] = e->energy_vector[i];
+        }
+
+        memmove(e->pitch, e->pitch + subframe_size, ACB_SIZE * sizeof(float));
+
+        synthesis_filter(e->pitch + ACB_SIZE, ilpc,
+                         e->synthesis, subframe_size, tmp);
+        postfilter(e, tmp, ilpc, samples, pitch_lag,
+                   &postfilter_coeffs[e->bitrate], subframe_size);
+
+        samples += subframe_size;
+    }
+
+    if (error_flag) {
+erasure:
+        error_flag = 1;
+        av_log(avctx, AV_LOG_WARNING, "frame erasure\n");
+        frame_erasure(e, samples);
+    }
+
+    memcpy(e->prev_lspf, e->lspf, sizeof(e->prev_lspf));
+    e->prev_error_flag    = error_flag;
+    e->last_valid_bitrate = e->bitrate;
+
+    if (e->bitrate != RATE_QUANT)
+        e->prev_pitch_delay = e->pitch_delay;
+
+    samples = (float *)frame->data[0];
+    for (i = 0; i < 160; i++)
+        samples[i] /= 32768;
+
+    *got_frame_ptr   = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_evrc_decoder = {
+    .name           = "evrc",
+    .long_name      = NULL_IF_CONFIG_SMALL("EVRC (Enhanced Variable Rate Codec)"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_EVRC,
+    .init           = evrc_decode_init,
+    .decode         = evrc_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .priv_data_size = sizeof(EVRCContext),
+};
diff --git a/libavcodec/exif.c b/libavcodec/exif.c
new file mode 100644
index 0000000..fa30f05
--- /dev/null
+++ b/libavcodec/exif.c
@@ -0,0 +1,142 @@
+/*
+ * EXIF metadata parser
+ * Copyright (c) 2013 Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * EXIF metadata parser
+ * @author Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ */
+
+#include "exif.h"
+
+
+static const char *exif_get_tag_name(uint16_t id)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(tag_list); i++) {
+        if (tag_list[i].id == id)
+            return tag_list[i].name;
+    }
+
+    return NULL;
+}
+
+
+static int exif_add_metadata(AVCodecContext *avctx, int count, int type,
+                             const char *name, const char *sep,
+                             GetByteContext *gb, int le,
+                             AVDictionary **metadata)
+{
+    switch(type) {
+    case 0:
+        av_log(avctx, AV_LOG_WARNING,
+               "Invalid TIFF tag type 0 found for %s with size %d\n",
+               name, count);
+        return 0;
+    case TIFF_DOUBLE   : return ff_tadd_doubles_metadata(count, name, sep, gb, le, metadata);
+    case TIFF_SSHORT   : return ff_tadd_shorts_metadata(count, name, sep, gb, le, 1, metadata);
+    case TIFF_SHORT    : return ff_tadd_shorts_metadata(count, name, sep, gb, le, 0, metadata);
+    case TIFF_SBYTE    : return ff_tadd_bytes_metadata(count, name, sep, gb, le, 1, metadata);
+    case TIFF_BYTE     :
+    case TIFF_UNDEFINED: return ff_tadd_bytes_metadata(count, name, sep, gb, le, 0, metadata);
+    case TIFF_STRING   : return ff_tadd_string_metadata(count, name, gb, le, metadata);
+    case TIFF_SRATIONAL:
+    case TIFF_RATIONAL : return ff_tadd_rational_metadata(count, name, sep, gb, le, metadata);
+    case TIFF_SLONG    :
+    case TIFF_LONG     : return ff_tadd_long_metadata(count, name, sep, gb, le, metadata);
+    default:
+        avpriv_request_sample(avctx, "TIFF tag type (%u)", type);
+        return 0;
+    };
+}
+
+
+static int exif_decode_tag(AVCodecContext *avctx, GetByteContext *gbytes, int le,
+                           int depth, AVDictionary **metadata)
+{
+    int ret, cur_pos;
+    unsigned id, count;
+    enum TiffTypes type;
+
+    if (depth > 2) {
+        return 0;
+    }
+
+    ff_tread_tag(gbytes, le, &id, &type, &count, &cur_pos);
+
+    if (!bytestream2_tell(gbytes)) {
+        bytestream2_seek(gbytes, cur_pos, SEEK_SET);
+        return 0;
+    }
+
+    // read count values and add it metadata
+    // store metadata or proceed with next IFD
+    ret = ff_tis_ifd(id);
+    if (ret) {
+        ret = avpriv_exif_decode_ifd(avctx, gbytes, le, depth + 1, metadata);
+    } else {
+        const char *name = exif_get_tag_name(id);
+        char *use_name   = (char*) name;
+
+        if (!use_name) {
+            use_name = av_malloc(7);
+            if (!use_name) {
+                return AVERROR(ENOMEM);
+            }
+            snprintf(use_name, 7, "0x%04X", id);
+        }
+
+        ret = exif_add_metadata(avctx, count, type, use_name, NULL,
+                                gbytes, le, metadata);
+
+        if (!name) {
+            av_freep(&use_name);
+        }
+    }
+
+    bytestream2_seek(gbytes, cur_pos, SEEK_SET);
+
+    return ret;
+}
+
+
+int avpriv_exif_decode_ifd(AVCodecContext *avctx, GetByteContext *gbytes, int le,
+                           int depth, AVDictionary **metadata)
+{
+    int i, ret;
+    int entries;
+
+    entries = ff_tget_short(gbytes, le);
+
+    if (bytestream2_get_bytes_left(gbytes) < entries * 12) {
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i = 0; i < entries; i++) {
+        if ((ret = exif_decode_tag(avctx, gbytes, le, depth, metadata)) < 0) {
+            return ret;
+        }
+    }
+
+    // return next IDF offset or 0x000000000 or a value < 0 for failure
+    return ff_tget_long(gbytes, le);
+}
diff --git a/libavcodec/exif.h b/libavcodec/exif.h
new file mode 100644
index 0000000..2f509ba
--- /dev/null
+++ b/libavcodec/exif.h
@@ -0,0 +1,170 @@
+/*
+ * EXIF metadata parser
+ * Copyright (c) 2013 Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * EXIF metadata parser
+ * @author Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ */
+
+#ifndef AVCODEC_EXIF_H
+#define AVCODEC_EXIF_H
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "tiff.h"
+
+#define EXIF_MAX_IFD_RECURSION 2
+#define EXIF_TAG_NAME_LENGTH   32
+
+struct exif_tag {
+    char      name[EXIF_TAG_NAME_LENGTH];
+    uint16_t  id;
+};
+
+static const struct exif_tag tag_list[] = { // JEITA CP-3451 EXIF specification:
+    {"GPSVersionID",               0x00}, // <- Table 12 GPS Attribute Information
+    {"GPSLatitudeRef",             0x01},
+    {"GPSLatitude",                0x02},
+    {"GPSLongitudeRef",            0x03},
+    {"GPSLongitude",               0x04},
+    {"GPSAltitudeRef",             0x05},
+    {"GPSAltitude",                0x06},
+    {"GPSTimeStamp",               0x07},
+    {"GPSSatellites",              0x08},
+    {"GPSStatus",                  0x09},
+    {"GPSMeasureMode",             0x0A},
+    {"GPSDOP",                     0x0B},
+    {"GPSSpeedRef",                0x0C},
+    {"GPSSpeed",                   0x0D},
+    {"GPSTrackRef",                0x0E},
+    {"GPSTrack",                   0x0F},
+    {"GPSImgDirectionRef",         0x10},
+    {"GPSImgDirection",            0x11},
+    {"GPSMapDatum",                0x12},
+    {"GPSDestLatitudeRef",         0x13},
+    {"GPSDestLatitude",            0x14},
+    {"GPSDestLongitudeRef",        0x15},
+    {"GPSDestLongitude",           0x16},
+    {"GPSDestBearingRef",          0x17},
+    {"GPSDestBearing",             0x18},
+    {"GPSDestDistanceRef",         0x19},
+    {"GPSDestDistance",            0x1A},
+    {"GPSProcessingMethod",        0x1B},
+    {"GPSAreaInformation",         0x1C},
+    {"GPSDateStamp",               0x1D},
+    {"GPSDifferential",            0x1E},
+    {"ImageWidth",                 0x100}, // <- Table 3 TIFF Rev. 6.0 Attribute Information Used in Exif
+    {"ImageLength",                0x101},
+    {"BitsPerSample",              0x102},
+    {"Compression",                0x103},
+    {"PhotometricInterpretation",  0x106},
+    {"Orientation",                0x112},
+    {"SamplesPerPixel",            0x115},
+    {"PlanarConfiguration",        0x11C},
+    {"YCbCrSubSampling",           0x212},
+    {"YCbCrPositioning",           0x213},
+    {"XResolution",                0x11A},
+    {"YResolution",                0x11B},
+    {"ResolutionUnit",             0x128},
+    {"StripOffsets",               0x111},
+    {"RowsPerStrip",               0x116},
+    {"StripByteCounts",            0x117},
+    {"JPEGInterchangeFormat",      0x201},
+    {"JPEGInterchangeFormatLength",0x202},
+    {"TransferFunction",           0x12D},
+    {"WhitePoint",                 0x13E},
+    {"PrimaryChromaticities",      0x13F},
+    {"YCbCrCoefficients",          0x211},
+    {"ReferenceBlackWhite",        0x214},
+    {"DateTime",                   0x132},
+    {"ImageDescription",           0x10E},
+    {"Make",                       0x10F},
+    {"Model",                      0x110},
+    {"Software",                   0x131},
+    {"Artist",                     0x13B},
+    {"Copyright",                  0x8298},
+    {"ExifVersion",                0x9000}, // <- Table 4 Exif IFD Attribute Information (1)
+    {"FlashpixVersion",            0xA000},
+    {"ColorSpace",                 0xA001},
+    {"ComponentsConfiguration",    0x9101},
+    {"CompressedBitsPerPixel",     0x9102},
+    {"PixelXDimension",            0xA002},
+    {"PixelYDimension",            0xA003},
+    {"MakerNote",                  0x927C},
+    {"UserComment",                0x9286},
+    {"RelatedSoundFile",           0xA004},
+    {"DateTimeOriginal",           0x9003},
+    {"DateTimeDigitized",          0x9004},
+    {"SubSecTime",                 0x9290},
+    {"SubSecTimeOriginal",         0x9291},
+    {"SubSecTimeDigitized",        0x9292},
+    {"ImageUniqueID",              0xA420},
+    {"ExposureTime",               0x829A}, // <- Table 5 Exif IFD Attribute Information (2)
+    {"FNumber",                    0x829D},
+    {"ExposureProgram",            0x8822},
+    {"SpectralSensitivity",        0x8824},
+    {"ISOSpeedRatings",            0x8827},
+    {"OECF",                       0x8828},
+    {"ShutterSpeedValue",          0x9201},
+    {"ApertureValue",              0x9202},
+    {"BrightnessValue",            0x9203},
+    {"ExposureBiasValue",          0x9204},
+    {"MaxApertureValue",           0x9205},
+    {"SubjectDistance",            0x9206},
+    {"MeteringMode",               0x9207},
+    {"LightSource",                0x9208},
+    {"Flash",                      0x9209},
+    {"FocalLength",                0x920A},
+    {"SubjectArea",                0x9214},
+    {"FlashEnergy",                0xA20B},
+    {"SpatialFrequencyResponse",   0xA20C},
+    {"FocalPlaneXResolution",      0xA20E},
+    {"FocalPlaneYResolution",      0xA20F},
+    {"FocalPlaneResolutionUnit",   0xA210},
+    {"SubjectLocation",            0xA214},
+    {"ExposureIndex",              0xA215},
+    {"SensingMethod",              0xA217},
+    {"FileSource",                 0xA300},
+    {"SceneType",                  0xA301},
+    {"CFAPattern",                 0xA302},
+    {"CustomRendered",             0xA401},
+    {"ExposureMode",               0xA402},
+    {"WhiteBalance",               0xA403},
+    {"DigitalZoomRatio",           0xA404},
+    {"FocalLengthIn35mmFilm",      0xA405},
+    {"SceneCaptureType",           0xA406},
+    {"GainControl",                0xA407},
+    {"Contrast",                   0xA408},
+    {"Saturation",                 0xA409},
+    {"Sharpness",                  0xA40A},
+    {"DeviceSettingDescription",   0xA40B},
+    {"SubjectDistanceRange",       0xA40C}
+//    {"InteroperabilityIndex",      0x1}, // <- Table 13 Interoperability IFD Attribute Information
+//    {"",                           0x0}
+};
+
+/** Recursively decodes all IFD's and
+ *  adds included TAGS into the metadata dictionary. */
+int avpriv_exif_decode_ifd(AVCodecContext *avctx, GetByteContext *gbytes, int le,
+                           int depth, AVDictionary **metadata);
+
+#endif /* AVCODEC_EXIF_H */
diff --git a/libavcodec/exr.c b/libavcodec/exr.c
index 37a31ce..62e8521 100644
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -2,20 +2,20 @@
  * OpenEXR (.exr) image decoder
  * Copyright (c) 2009 Jimmy Christensen
  *
- * This file is part of Libav
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,13 +27,17 @@
  * For more information on the OpenEXR format, visit:
  *  http://openexr.com/
  *
- * exr_flt2uint() and exr_halflt2uint() is credited to  Reimar Döffinger
+ * exr_flt2uint() and exr_halflt2uint() is credited to  Reimar Döffinger.
+ * exr_half2float() is credited to Aaftab Munshi; Dan Ginsburg, Dave Shreiner.
+ *
  */
 
 #include <zlib.h>
+#include <float.h>
 
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
+#include "libavutil/intfloat.h"
 
 #include "avcodec.h"
 #include "bytestream.h"
@@ -106,8 +110,75 @@ typedef struct EXRContext {
     EXRThreadData *thread_data;
 
     const char *layer;
+
+    float gamma;
+
+    uint16_t gamma_table[65536];
+
 } EXRContext;
 
+/* -15 stored using a single precision bias of 127 */
+#define HALF_FLOAT_MIN_BIASED_EXP_AS_SINGLE_FP_EXP 0x38000000
+/* max exponent value in single precision that will be converted
+ * to Inf or Nan when stored as a half-float */
+#define HALF_FLOAT_MAX_BIASED_EXP_AS_SINGLE_FP_EXP 0x47800000
+
+/* 255 is the max exponent biased value */
+#define FLOAT_MAX_BIASED_EXP (0xFF << 23)
+
+#define HALF_FLOAT_MAX_BIASED_EXP (0x1F << 10)
+
+/*
+ * Convert a half float as a uint16_t into a full float.
+ *
+ * @param hf half float as uint16_t
+ *
+ * @return float value
+ */
+static union av_intfloat32 exr_half2float(uint16_t hf)
+{
+    unsigned int    sign = (unsigned int)(hf >> 15);
+    unsigned int    mantissa = (unsigned int)(hf & ((1 << 10) - 1));
+    unsigned int    exp = (unsigned int)(hf & HALF_FLOAT_MAX_BIASED_EXP);
+    union av_intfloat32   f;
+
+    if (exp == HALF_FLOAT_MAX_BIASED_EXP) {
+        // we have a half-float NaN or Inf
+        // half-float NaNs will be converted to a single precision NaN
+        // half-float Infs will be converted to a single precision Inf
+        exp = FLOAT_MAX_BIASED_EXP;
+        if (mantissa)
+            mantissa = (1 << 23) - 1;    // set all bits to indicate a NaN
+    } else if (exp == 0x0) {
+        // convert half-float zero/denorm to single precision value
+        if (mantissa) {
+            mantissa <<= 1;
+            exp = HALF_FLOAT_MIN_BIASED_EXP_AS_SINGLE_FP_EXP;
+            // check for leading 1 in denorm mantissa
+            while ((mantissa & (1 << 10))) {
+                // for every leading 0, decrement single precision exponent by 1
+                // and shift half-float mantissa value to the left
+                mantissa <<= 1;
+                exp -= (1 << 23);
+            }
+            // clamp the mantissa to 10-bits
+            mantissa &= ((1 << 10) - 1);
+            // shift left to generate single-precision mantissa of 23-bits
+            mantissa <<= 13;
+        }
+    } else {
+        // shift left to generate single-precision mantissa of 23-bits
+        mantissa <<= 13;
+        // generate single precision biased exponent value
+        exp = (exp << 13) + HALF_FLOAT_MIN_BIASED_EXP_AS_SINGLE_FP_EXP;
+    }
+
+    f.i = (sign << 31) | exp | mantissa;
+
+    return f;
+}
+
+
 /**
  * Convert from 32-bit float as uint32_t to uint16_t.
  *
@@ -652,8 +723,8 @@ static int piz_uncompress(EXRContext *s, const uint8_t *src, int ssize,
     if (!td->lut)
         td->lut = av_malloc(1 << 17);
     if (!td->bitmap || !td->lut) {
-        av_free(td->bitmap);
-        av_free(td->lut);
+        av_freep(&td->bitmap);
+        av_freep(&td->lut);
         return AVERROR(ENOMEM);
     }
 
@@ -772,6 +843,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
     int bxmin = s->xmin * 2 * s->desc->nb_components;
     int i, x, buf_size = s->buf_size;
     int ret;
+    float one_gamma = 1.0f / s->gamma;
 
     line_offset = AV_RL64(s->gb.buffer + jobnr * 8);
     // Check if the buffer has the required bytes needed from the offset
@@ -851,18 +923,30 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
         if (s->pixel_type == EXR_FLOAT) {
             // 32-bit
             for (x = 0; x < xdelta; x++) {
-                *ptr_x++ = exr_flt2uint(bytestream_get_le32(&r));
-                *ptr_x++ = exr_flt2uint(bytestream_get_le32(&g));
-                *ptr_x++ = exr_flt2uint(bytestream_get_le32(&b));
+                union av_intfloat32 t;
+                t.i = bytestream_get_le32(&r);
+                if ( t.f > 0.0f )  /* avoid negative values */
+                    t.f = powf(t.f, one_gamma);
+                *ptr_x++ = exr_flt2uint(t.i);
+
+                t.i = bytestream_get_le32(&g);
+                if ( t.f > 0.0f )
+                    t.f = powf(t.f, one_gamma);
+                *ptr_x++ = exr_flt2uint(t.i);
+
+                t.i = bytestream_get_le32(&b);
+                if ( t.f > 0.0f )
+                    t.f = powf(t.f, one_gamma);
+                *ptr_x++ = exr_flt2uint(t.i);
                 if (channel_buffer[3])
                     *ptr_x++ = exr_flt2uint(bytestream_get_le32(&a));
             }
         } else {
             // 16-bit
             for (x = 0; x < xdelta; x++) {
-                *ptr_x++ = exr_halflt2uint(bytestream_get_le16(&r));
-                *ptr_x++ = exr_halflt2uint(bytestream_get_le16(&g));
-                *ptr_x++ = exr_halflt2uint(bytestream_get_le16(&b));
+                *ptr_x++ = s->gamma_table[bytestream_get_le16(&r)];
+                *ptr_x++ = s->gamma_table[bytestream_get_le16(&g)];
+                *ptr_x++ = s->gamma_table[bytestream_get_le16(&b)];
                 if (channel_buffer[3])
                     *ptr_x++ = exr_halflt2uint(bytestream_get_le16(&a));
             }
@@ -1261,7 +1345,10 @@ static int decode_frame(AVCodecContext *avctx, void *data,
 
 static av_cold int decode_init(AVCodecContext *avctx)
 {
+    uint32_t i;
+    union av_intfloat32 t;
     EXRContext *s = avctx->priv_data;
+    float one_gamma = 1.0f / s->gamma;
 
     s->avctx              = avctx;
     s->xmin               = ~0;
@@ -1280,6 +1367,23 @@ static av_cold int decode_init(AVCodecContext *avctx)
     s->w                  = 0;
     s->h                  = 0;
 
+    if ( one_gamma > 0.9999f && one_gamma < 1.0001f ) {
+        for ( i = 0; i < 65536; ++i ) {
+            s->gamma_table[i] = exr_halflt2uint(i);
+        }
+    } else {
+        for ( i = 0; i < 65536; ++i ) {
+            t = exr_half2float(i);
+            /* If negative value we reuse half value */
+            if ( t.f <= 0.0f ) {
+                s->gamma_table[i] = exr_halflt2uint(i);
+            } else {
+                t.f = powf(t.f, one_gamma);
+                s->gamma_table[i] = exr_flt2uint(t.i);
+            }
+        }
+    }
+
     // allocate thread data, used for non EXR_RAW compreesion types
     s->thread_data = av_mallocz_array(avctx->thread_count, sizeof(EXRThreadData));
     if (!s->thread_data)
@@ -1322,6 +1426,8 @@ static av_cold int decode_end(AVCodecContext *avctx)
 static const AVOption options[] = {
     { "layer", "Set the decoding layer", OFFSET(layer),
         AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD },
+    { "gamma", "Set the float gamma value when decoding (experimental/unsupported)", OFFSET(gamma),
+        AV_OPT_TYPE_FLOAT, { .dbl = 1.0f }, 0.001, FLT_MAX, VD },
     { NULL },
 };
 
diff --git a/libavcodec/faandct.h b/libavcodec/faandct.h
index 59d5ff3..c5ef96d 100644
--- a/libavcodec/faandct.h
+++ b/libavcodec/faandct.h
@@ -2,20 +2,20 @@
  * Floating point AAN DCT
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/faanidct.c b/libavcodec/faanidct.c
index 5cacfdd..5f34fa5 100644
--- a/libavcodec/faanidct.c
+++ b/libavcodec/faanidct.c
@@ -2,20 +2,20 @@
  * Floating point AAN IDCT
  * Copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "faanidct.h"
diff --git a/libavcodec/faanidct.h b/libavcodec/faanidct.h
index 0c01520..4cd2c78 100644
--- a/libavcodec/faanidct.h
+++ b/libavcodec/faanidct.h
@@ -2,20 +2,20 @@
  * Floating point AAN IDCT
  * Copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/faxcompr.c b/libavcodec/faxcompr.c
index 4cbda3f..155f78d 100644
--- a/libavcodec/faxcompr.c
+++ b/libavcodec/faxcompr.c
@@ -2,20 +2,20 @@
  * CCITT Fax Group 3 and 4 decompression
  * Copyright (c) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -165,8 +165,6 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
     int run_off       = *ref++;
     unsigned int offs = 0, run = 0;
 
-    runend--; // for the last written 0
-
     while (offs < width) {
         int cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1);
         if (cmode == -1) {
@@ -174,10 +172,12 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
             return AVERROR_INVALIDDATA;
         }
         if (!cmode) { //pass mode
-            run_off += *ref++;
+            if (run_off < width)
+                run_off += *ref++;
             run      = run_off - offs;
             offs     = run_off;
-            run_off += *ref++;
+            if (run_off < width)
+                run_off += *ref++;
             if (offs > width) {
                 av_log(avctx, AV_LOG_ERROR, "Run went out of bounds\n");
                 return AVERROR_INVALIDDATA;
@@ -230,13 +230,19 @@ static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
             mode      = !mode;
         }
         //sync line pointers
-        while (run_off <= offs) {
+        while (offs < width && run_off <= offs) {
             run_off += *ref++;
             run_off += *ref++;
         }
     }
     *runs++ = saved_run;
-    *runs++ = 0;
+    if (saved_run) {
+        if (runs >= runend) {
+            av_log(avctx, AV_LOG_ERROR, "Run overrun\n");
+            return -1;
+        }
+        *runs++ = 0;
+    }
     return 0;
 }
 
@@ -279,9 +285,10 @@ int ff_ccitt_unpack(AVCodecContext *avctx, const uint8_t *src, int srcsize,
     int *runs, *ref = NULL, *runend;
     int ret;
     int runsize = avctx->width + 2;
+    int has_eol;
 
-    runs = av_malloc(runsize * sizeof(runs[0]));
-    ref  = av_malloc(runsize * sizeof(ref[0]));
+    runs = av_malloc_array(runsize, sizeof(runs[0]));
+    ref  = av_malloc_array(runsize, sizeof(ref[0]));
     if (!runs || !ref) {
         ret = AVERROR(ENOMEM);
         goto fail;
@@ -290,6 +297,8 @@ int ff_ccitt_unpack(AVCodecContext *avctx, const uint8_t *src, int srcsize,
     ref[1] = 0;
     ref[2] = 0;
     init_get_bits(&gb, src, srcsize * 8);
+    has_eol = show_bits(&gb, 12) == 1 || show_bits(&gb, 16) == 1;
+
     for (j = 0; j < height; j++) {
         runend = runs + runsize;
         if (compr == TIFF_G4) {
@@ -300,6 +309,7 @@ int ff_ccitt_unpack(AVCodecContext *avctx, const uint8_t *src, int srcsize,
         } else {
             int g3d1 = (compr == TIFF_G3) && !(opts & 1);
             if (compr != TIFF_CCITT_RLE &&
+                has_eol &&
                 find_group3_syncmarker(&gb, srcsize * 8) < 0)
                 break;
             if (compr == TIFF_CCITT_RLE || g3d1 || get_bits1(&gb))
diff --git a/libavcodec/faxcompr.h b/libavcodec/faxcompr.h
index 8157f1f..53d1168 100644
--- a/libavcodec/faxcompr.h
+++ b/libavcodec/faxcompr.h
@@ -2,20 +2,20 @@
  * CCITT Fax Group 3 and 4 decompression
  * Copyright (c) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/fdctdsp.c b/libavcodec/fdctdsp.c
index b245198..f75eed5 100644
--- a/libavcodec/fdctdsp.c
+++ b/libavcodec/fdctdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/fdctdsp.h b/libavcodec/fdctdsp.h
index 944dc6d..3e1f683 100644
--- a/libavcodec/fdctdsp.h
+++ b/libavcodec/fdctdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/fft-fixed-test.c b/libavcodec/fft-fixed-test.c
index d6ea987..330211e 100644
--- a/libavcodec/fft-fixed-test.c
+++ b/libavcodec/fft-fixed-test.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/fft-fixed32-test.c b/libavcodec/fft-fixed32-test.c
new file mode 100644
index 0000000..4bd11ce
--- /dev/null
+++ b/libavcodec/fft-fixed32-test.c
@@ -0,0 +1,21 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define FFT_FLOAT 0
+#define FFT_FIXED_32 1
+#include "fft-test.c"
diff --git a/libavcodec/fft-internal.h b/libavcodec/fft-internal.h
index a449ec0..0a8f7d0 100644
--- a/libavcodec/fft-internal.h
+++ b/libavcodec/fft-internal.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,12 +36,29 @@
 
 #else
 
+#define SCALE_FLOAT(a, bits) lrint((a) * (double)(1 << (bits)))
+
+#if FFT_FIXED_32
+
+#define CMUL(dre, dim, are, aim, bre, bim) do {             \
+        int64_t accu;                                     \
+        (accu)  = (int64_t)(bre) * (are);                 \
+        (accu) -= (int64_t)(bim) * (aim);                 \
+        (dre)   = (int)(((accu) + 0x40000000) >> 31);       \
+        (accu)  = (int64_t)(bre) * (aim);                 \
+        (accu) += (int64_t)(bim) * (are);                 \
+        (dim)   = (int)(((accu) + 0x40000000) >> 31);       \
+    } while (0)
+
+#define FIX15(a) av_clip(SCALE_FLOAT(a, 31), -2147483647, 2147483647)
+
+#else /* FFT_FIXED_32 */
+
 #include "fft.h"
 #include "mathops.h"
 
 void ff_mdct_calcw_c(FFTContext *s, FFTDouble *output, const FFTSample *input);
 
-#define SCALE_FLOAT(a, bits) lrint((a) * (double)(1 << (bits)))
 #define FIX15(a) av_clip(SCALE_FLOAT(a, 15), -32767, 32767)
 
 #define sqrthalf ((int16_t)((1<<15)*M_SQRT1_2))
@@ -62,6 +79,8 @@ void ff_mdct_calcw_c(FFTContext *s, FFTDouble *output, const FFTSample *input);
 #define CMULL(dre, dim, are, aim, bre, bim)     \
     CMULS(dre, dim, are, aim, bre, bim, 0)
 
+#endif /* FFT_FIXED_32 */
+
 #endif /* FFT_FLOAT */
 
 #define ff_imdct_calc_c FFT_NAME(ff_imdct_calc_c)
diff --git a/libavcodec/fft-test.c b/libavcodec/fft-test.c
index f62cf0d..ba26f81 100644
--- a/libavcodec/fft-test.c
+++ b/libavcodec/fft-test.c
@@ -1,20 +1,20 @@
 /*
  * (c) 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,6 +59,10 @@
 #define RANGE 1.0
 #define REF_SCALE(x, bits)  (x)
 #define FMT "%10.6f"
+#elif FFT_FIXED_32
+#define RANGE 8388608
+#define REF_SCALE(x, bits) (x)
+#define FMT "%6d"
 #else
 #define RANGE 16384
 #define REF_SCALE(x, bits) ((x) / (1 << (bits)))
@@ -73,7 +77,7 @@ static int fft_ref_init(int nbits, int inverse)
 {
     int i, n = 1 << nbits;
 
-    exptab = av_malloc((n / 2) * sizeof(*exptab));
+    exptab = av_malloc_array((n / 2), sizeof(*exptab));
     if (!exptab)
         return AVERROR(ENOMEM);
 
@@ -150,7 +154,7 @@ static void mdct_ref(FFTSample *output, FFTSample *input, int nbits)
 
 #if FFT_FLOAT
 #if CONFIG_DCT
-static void idct_ref(float *output, float *input, int nbits)
+static void idct_ref(FFTSample *output, FFTSample *input, int nbits)
 {
     int i, k, n = 1 << nbits;
 
@@ -165,7 +169,7 @@ static void idct_ref(float *output, float *input, int nbits)
     }
 }
 
-static void dct_ref(float *output, float *input, int nbits)
+static void dct_ref(FFTSample *output, FFTSample *input, int nbits)
 {
     int i, k, n = 1 << nbits;
 
@@ -203,7 +207,7 @@ static int check_diff(FFTSample *tab1, FFTSample *tab2, int n, double scale)
         if (e > max)
             max = e;
     }
-    av_log(NULL, AV_LOG_INFO, "max:%f e:%g\n", max, sqrt(error) / n);
+    av_log(NULL, AV_LOG_INFO, "max:%f e:%g\n", max, sqrt(error / n));
     return err;
 }
 
@@ -281,20 +285,22 @@ int main(int argc, char **argv)
             break;
         case 'c':
         {
-            int cpuflags = av_parse_cpu_flags(optarg);
-            if (cpuflags < 0)
+            int cpuflags = av_get_cpu_flags();
+
+            if (av_parse_cpu_caps(&cpuflags, optarg) < 0)
                 return 1;
-            av_set_cpu_flags_mask(cpuflags);
+
+            av_force_cpu_flags(cpuflags);
             break;
         }
         }
     }
 
     fft_size = 1 << fft_nbits;
-    tab      = av_malloc(fft_size * sizeof(FFTComplex));
-    tab1     = av_malloc(fft_size * sizeof(FFTComplex));
-    tab_ref  = av_malloc(fft_size * sizeof(FFTComplex));
-    tab2     = av_malloc(fft_size * sizeof(FFTSample));
+    tab      = av_malloc_array(fft_size, sizeof(FFTComplex));
+    tab1     = av_malloc_array(fft_size, sizeof(FFTComplex));
+    tab_ref  = av_malloc_array(fft_size, sizeof(FFTComplex));
+    tab2     = av_malloc_array(fft_size, sizeof(FFTSample));
 
     if (!(tab && tab1 && tab_ref && tab2))
         goto cleanup;
@@ -316,22 +322,22 @@ int main(int argc, char **argv)
         else
             av_log(NULL, AV_LOG_INFO, "FFT");
         ff_fft_init(&s, fft_nbits, do_inverse);
-        if (err = fft_ref_init(fft_nbits, do_inverse) < 0)
+        if ((err = fft_ref_init(fft_nbits, do_inverse)) < 0)
             goto cleanup;
         break;
 #if FFT_FLOAT
-#if CONFIG_RDFT
+#    if CONFIG_RDFT
     case TRANSFORM_RDFT:
         if (do_inverse)
             av_log(NULL, AV_LOG_INFO, "IDFT_C2R");
         else
             av_log(NULL, AV_LOG_INFO, "DFT_R2C");
         ff_rdft_init(&r, fft_nbits, do_inverse ? IDFT_C2R : DFT_R2C);
-        if (err = fft_ref_init(fft_nbits, do_inverse) < 0)
+        if ((err = fft_ref_init(fft_nbits, do_inverse)) < 0)
             goto cleanup;
         break;
-#endif /* CONFIG_RDFT */
-#if CONFIG_DCT
+#    endif /* CONFIG_RDFT */
+#    if CONFIG_DCT
     case TRANSFORM_DCT:
         if (do_inverse)
             av_log(NULL, AV_LOG_INFO, "DCT_III");
@@ -339,7 +345,7 @@ int main(int argc, char **argv)
             av_log(NULL, AV_LOG_INFO, "DCT_II");
         ff_dct_init(&d, fft_nbits, do_inverse ? DCT_III : DCT_II);
         break;
-#endif /* CONFIG_DCT */
+#    endif /* CONFIG_DCT */
 #endif /* FFT_FLOAT */
     default:
         av_log(NULL, AV_LOG_ERROR, "Requested transform not supported\n");
@@ -439,7 +445,7 @@ int main(int argc, char **argv)
         /* we measure during about 1 seconds */
         nb_its = 1;
         for (;;) {
-            time_start = av_gettime();
+            time_start = av_gettime_relative();
             for (it = 0; it < nb_its; it++) {
                 switch (transform) {
                 case TRANSFORM_MDCT:
@@ -464,7 +470,7 @@ int main(int argc, char **argv)
 #endif /* FFT_FLOAT */
                 }
             }
-            duration = av_gettime() - time_start;
+            duration = av_gettime_relative() - time_start;
             if (duration >= 1000000)
                 break;
             nb_its *= 2;
@@ -486,16 +492,16 @@ int main(int argc, char **argv)
         ff_fft_end(&s);
         break;
 #if FFT_FLOAT
-#if CONFIG_RDFT
+#    if CONFIG_RDFT
     case TRANSFORM_RDFT:
         ff_rdft_end(&r);
         break;
-#endif /* CONFIG_RDFT */
-#if CONFIG_DCT
+#    endif /* CONFIG_RDFT */
+#    if CONFIG_DCT
     case TRANSFORM_DCT:
         ff_dct_end(&d);
         break;
-#endif /* CONFIG_DCT */
+#    endif /* CONFIG_DCT */
 #endif /* FFT_FLOAT */
     }
 
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 7daae24..64f0f63 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -2,20 +2,20 @@
  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,10 @@
 #define FFT_FLOAT 1
 #endif
 
+#ifndef FFT_FIXED_32
+#define FFT_FIXED_32 0
+#endif
+
 #include <stdint.h>
 #include "config.h"
 #include "libavutil/mem.h"
@@ -40,15 +44,26 @@ typedef float FFTDouble;
 
 #else
 
+#if FFT_FIXED_32
+
+#define Q31(x) (int)((x)*2147483648.0 + 0.5)
+#define FFT_NAME(x) x ## _fixed_32
+
+typedef int32_t FFTSample;
+
+#else /* FFT_FIXED_32 */
+
 #define FFT_NAME(x) x ## _fixed
 
 typedef int16_t FFTSample;
-typedef int     FFTDouble;
+
+#endif /* FFT_FIXED_32 */
 
 typedef struct FFTComplex {
-    int16_t re, im;
+    FFTSample re, im;
 } FFTComplex;
 
+typedef int    FFTDouble;
 typedef struct FFTContext FFTContext;
 
 #endif /* FFT_FLOAT */
@@ -142,6 +157,7 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse);
 void ff_fft_init_aarch64(FFTContext *s);
 void ff_fft_init_x86(FFTContext *s);
 void ff_fft_init_arm(FFTContext *s);
+void ff_fft_init_mips(FFTContext *s);
 void ff_fft_init_ppc(FFTContext *s);
 
 void ff_fft_fixed_init_arm(FFTContext *s);
diff --git a/libavcodec/fft_fixed.c b/libavcodec/fft_fixed.c
index bad4821..3d3bd2f 100644
--- a/libavcodec/fft_fixed.c
+++ b/libavcodec/fft_fixed.c
@@ -1,20 +1,21 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #define FFT_FLOAT 0
+#define FFT_FIXED_32 0
 #include "fft_template.c"
diff --git a/libavcodec/fft_fixed_32.c b/libavcodec/fft_fixed_32.c
new file mode 100644
index 0000000..fbdbf84
--- /dev/null
+++ b/libavcodec/fft_fixed_32.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj@mips.com)
+ *           Goran Cordasic   (goran@mips.com)
+ *           Djordje Pesut    (djordje@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define FFT_FLOAT 0
+#define FFT_FIXED_32 1
+#include "fft_template.c"
diff --git a/libavcodec/fft_float.c b/libavcodec/fft_float.c
index ed4cffa..73cc98d 100644
--- a/libavcodec/fft_float.c
+++ b/libavcodec/fft_float.c
@@ -1,20 +1,21 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #define FFT_FLOAT 1
+#define FFT_FIXED_32 0
 #include "fft_template.c"
diff --git a/libavcodec/fft_init_table.c b/libavcodec/fft_init_table.c
new file mode 100644
index 0000000..7511dbe
--- /dev/null
+++ b/libavcodec/fft_init_table.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj@mips.com)
+ *           Goran Cordasic   (goran@mips.com)
+ *           Djordje Pesut    (djordje@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * definitions and initialization of LUT table for FFT
+ */
+#include "libavcodec/fft_table.h"
+
+int32_t w_tab_sr[MAX_FFT_SIZE/(4*16)] = {
+    2147483647, 2147481121, 2147473542, 2147460908, 2147443222, 2147420483, 2147392690, 2147359845,
+    2147321946, 2147278995, 2147230991, 2147177934, 2147119825, 2147056664, 2146988450, 2146915184,
+    2146836866, 2146753497, 2146665076, 2146571603, 2146473080, 2146369505, 2146260881, 2146147205,
+    2146028480, 2145904705, 2145775880, 2145642006, 2145503083, 2145359112, 2145210092, 2145056025,
+    2144896910, 2144732748, 2144563539, 2144389283, 2144209982, 2144025635, 2143836244, 2143641807,
+    2143442326, 2143237802, 2143028234, 2142813624, 2142593971, 2142369276, 2142139541, 2141904764,
+    2141664948, 2141420092, 2141170197, 2140915264, 2140655293, 2140390284, 2140120240, 2139845159,
+    2139565043, 2139279892, 2138989708, 2138694490, 2138394240, 2138088958, 2137778644, 2137463301,
+    2137142927, 2136817525, 2136487095, 2136151637, 2135811153, 2135465642, 2135115107, 2134759548,
+    2134398966, 2134033361, 2133662734, 2133287087, 2132906420, 2132520734, 2132130030, 2131734309,
+    2131333572, 2130927819, 2130517052, 2130101272, 2129680480, 2129254676, 2128823862, 2128388038,
+    2127947206, 2127501367, 2127050522, 2126594672, 2126133817, 2125667960, 2125197100, 2124721240,
+    2124240380, 2123754522, 2123263666, 2122767814, 2122266967, 2121761126, 2121250292, 2120734467,
+    2120213651, 2119687847, 2119157054, 2118621275, 2118080511, 2117534762, 2116984031, 2116428319,
+    2115867626, 2115301954, 2114731305, 2114155680, 2113575080, 2112989506, 2112398960, 2111803444,
+    2111202959, 2110597505, 2109987085, 2109371700, 2108751352, 2108126041, 2107495770, 2106860540,
+    2106220352, 2105575208, 2104925109, 2104270057, 2103610054, 2102945101, 2102275199, 2101600350,
+    2100920556, 2100235819, 2099546139, 2098851519, 2098151960, 2097447464, 2096738032, 2096023667,
+    2095304370, 2094580142, 2093850985, 2093116901, 2092377892, 2091633960, 2090885105, 2090131331,
+    2089372638, 2088609029, 2087840505, 2087067068, 2086288720, 2085505463, 2084717298, 2083924228,
+    2083126254, 2082323379, 2081515603, 2080702930, 2079885360, 2079062896, 2078235540, 2077403294,
+    2076566160, 2075724139, 2074877233, 2074025446, 2073168777, 2072307231, 2071440808, 2070569511,
+    2069693342, 2068812302, 2067926394, 2067035621, 2066139983, 2065239484, 2064334124, 2063423908,
+    2062508835, 2061588910, 2060664133, 2059734508, 2058800036, 2057860719, 2056916560, 2055967560,
+    2055013723, 2054055050, 2053091544, 2052123207, 2051150040, 2050172048, 2049189231, 2048201592,
+    2047209133, 2046211857, 2045209767, 2044202863, 2043191150, 2042174628, 2041153301, 2040127172,
+    2039096241, 2038060512, 2037019988, 2035974670, 2034924562, 2033869665, 2032809982, 2031745516,
+    2030676269, 2029602243, 2028523442, 2027439867, 2026351522, 2025258408, 2024160529, 2023057887,
+    2021950484, 2020838323, 2019721407, 2018599739, 2017473321, 2016342155, 2015206245, 2014065592,
+    2012920201, 2011770073, 2010615210, 2009455617, 2008291295, 2007122248, 2005948478, 2004769987,
+    2003586779, 2002398857, 2001206222, 2000008879, 1998806829, 1997600076, 1996388622, 1995172471,
+    1993951625, 1992726087, 1991495860, 1990260946, 1989021350, 1987777073, 1986528118, 1985274489,
+    1984016189, 1982753220, 1981485585, 1980213288, 1978936331, 1977654717, 1976368450, 1975077532,
+    1973781967, 1972481757, 1971176906, 1969867417, 1968553292, 1967234535, 1965911148, 1964583136,
+    1963250501, 1961913246, 1960571375, 1959224890, 1957873796, 1956518093, 1955157788, 1953792881,
+    1952423377, 1951049279, 1949670589, 1948287312, 1946899451, 1945507008, 1944109987, 1942708392,
+    1941302225, 1939891490, 1938476190, 1937056329, 1935631910, 1934202936, 1932769411, 1931331338,
+    1929888720, 1928441561, 1926989864, 1925533633, 1924072871, 1922607581, 1921137767, 1919663432,
+    1918184581, 1916701216, 1915213340, 1913720958, 1912224073, 1910722688, 1909216806, 1907706433,
+    1906191570, 1904672222, 1903148392, 1901620084, 1900087301, 1898550047, 1897008325, 1895462140,
+    1893911494, 1892356392, 1890796837, 1889232832, 1887664383, 1886091491, 1884514161, 1882932397,
+    1881346202, 1879755580, 1878160535, 1876561070, 1874957189, 1873348897, 1871736196, 1870119091,
+    1868497586, 1866871683, 1865241388, 1863606704, 1861967634, 1860324183, 1858676355, 1857024153,
+    1855367581, 1853706643, 1852041343, 1850371686, 1848697674, 1847019312, 1845336604, 1843649553,
+    1841958164, 1840262441, 1838562388, 1836858008, 1835149306, 1833436286, 1831718951, 1829997307,
+    1828271356, 1826541103, 1824806552, 1823067707, 1821324572, 1819577151, 1817825449, 1816069469,
+    1814309216, 1812544694, 1810775906, 1809002858, 1807225553, 1805443995, 1803658189, 1801868139,
+    1800073849, 1798275323, 1796472565, 1794665580, 1792854372, 1791038946, 1789219305, 1787395453,
+    1785567396, 1783735137, 1781898681, 1780058032, 1778213194, 1776364172, 1774510970, 1772653593,
+    1770792044, 1768926328, 1767056450, 1765182414, 1763304224, 1761421885, 1759535401, 1757644777,
+    1755750017, 1753851126, 1751948107, 1750040966, 1748129707, 1746214334, 1744294853, 1742371267,
+    1740443581, 1738511799, 1736575927, 1734635968, 1732691928, 1730743810, 1728791620, 1726835361,
+    1724875040, 1722910659, 1720942225, 1718969740, 1716993211, 1715012642, 1713028037, 1711039401,
+    1709046739, 1707050055, 1705049355, 1703044642, 1701035922, 1699023199, 1697006479, 1694985765,
+    1692961062, 1690932376, 1688899711, 1686863072, 1684822463, 1682777890, 1680729357, 1678676870,
+    1676620432, 1674560049, 1672495725, 1670427466, 1668355276, 1666279161, 1664199124, 1662115172,
+    1660027308, 1657935539, 1655839867, 1653740300, 1651636841, 1649529496, 1647418269, 1645303166,
+    1643184191, 1641061349, 1638934646, 1636804087, 1634669676, 1632531418, 1630389319, 1628243383,
+    1626093616, 1623940023, 1621782608, 1619621377, 1617456335, 1615287487, 1613114838, 1610938393,
+    1608758157, 1606574136, 1604386335, 1602194758, 1599999411, 1597800299, 1595597428, 1593390801,
+    1591180426, 1588966306, 1586748447, 1584526854, 1582301533, 1580072489, 1577839726, 1575603251,
+    1573363068, 1571119183, 1568871601, 1566620327, 1564365367, 1562106725, 1559844408, 1557578421,
+    1555308768, 1553035455, 1550758488, 1548477872, 1546193612, 1543905714, 1541614183, 1539319024,
+    1537020244, 1534717846, 1532411837, 1530102222, 1527789007, 1525472197, 1523151797, 1520827813,
+    1518500250, 1516169114, 1513834411, 1511496145, 1509154322, 1506808949, 1504460029, 1502107570,
+    1499751576, 1497392053, 1495029006, 1492662441, 1490292364, 1487918781, 1485541696, 1483161115,
+    1480777044, 1478389489, 1475998456, 1473603949, 1471205974, 1468804538, 1466399645, 1463991302,
+    1461579514, 1459164286, 1456745625, 1454323536, 1451898025, 1449469098, 1447036760, 1444601017,
+    1442161874, 1439719338, 1437273414, 1434824109, 1432371426, 1429915374, 1427455956, 1424993180,
+    1422527051, 1420057574, 1417584755, 1415108601, 1412629117, 1410146309, 1407660183, 1405170745,
+    1402678000, 1400181954, 1397682613, 1395179984, 1392674072, 1390164882, 1387652422, 1385136696,
+    1382617710, 1380095472, 1377569986, 1375041258, 1372509294, 1369974101, 1367435685, 1364894050,
+    1362349204, 1359801152, 1357249901, 1354695455, 1352137822, 1349577007, 1347013017, 1344445857,
+    1341875533, 1339302052, 1336725419, 1334145641, 1331562723, 1328976672, 1326387494, 1323795195,
+    1321199781, 1318601257, 1315999631, 1313394909, 1310787095, 1308176198, 1305562222, 1302945174,
+    1300325060, 1297701886, 1295075659, 1292446384, 1289814068, 1287178717, 1284540337, 1281898935,
+    1279254516, 1276607086, 1273956653, 1271303222, 1268646800, 1265987392, 1263325005, 1260659646,
+    1257991320, 1255320034, 1252645794, 1249968606, 1247288478, 1244605414, 1241919421, 1239230506,
+    1236538675, 1233843935, 1231146291, 1228445750, 1225742318, 1223036002, 1220326809, 1217614743,
+    1214899813, 1212182024, 1209461382, 1206737894, 1204011567, 1201282407, 1198550419, 1195815612,
+    1193077991, 1190337562, 1187594332, 1184848308, 1182099496, 1179347902, 1176593533, 1173836395,
+    1171076495, 1168313840, 1165548435, 1162780288, 1160009405, 1157235792, 1154459456, 1151680403,
+    1148898640, 1146114174, 1143327011, 1140537158, 1137744621, 1134949406, 1132151521, 1129350972,
+    1126547765, 1123741908, 1120933406, 1118122267, 1115308496, 1112492101, 1109673089, 1106851465,
+    1104027237, 1101200410, 1098370993, 1095538991, 1092704411, 1089867259, 1087027544, 1084185270,
+    1081340445, 1078493076, 1075643169, 1072790730, 1069935768, 1067078288, 1064218296, 1061355801,
+    1058490808, 1055623324, 1052753357, 1049880912, 1047005996, 1044128617, 1041248781, 1038366495,
+    1035481766, 1032594600, 1029705004, 1026812985, 1023918550, 1021021705, 1018122458, 1015220816,
+    1012316784, 1009410370, 1006501581, 1003590424, 1000676905,  997761031,  994842810,  991922248,
+     988999351,  986074127,  983146583,  980216726,  977284562,  974350098,  971413342,  968474300,
+     965532978,  962589385,  959643527,  956695411,  953745043,  950792431,  947837582,  944880503,
+     941921200,  938959681,  935995952,  933030021,  930061894,  927091579,  924119082,  921144411,
+     918167572,  915188572,  912207419,  909224120,  906238681,  903251110,  900261413,  897269597,
+     894275671,  891279640,  888281512,  885281293,  882278992,  879274614,  876268167,  873259659,
+     870249095,  867236484,  864221832,  861205147,  858186435,  855165703,  852142959,  849118210,
+     846091463,  843062726,  840032004,  836999305,  833964638,  830928007,  827889422,  824848888,
+     821806413,  818762005,  815715670,  812667415,  809617249,  806565177,  803511207,  800455346,
+     797397602,  794337982,  791276492,  788213141,  785147934,  782080880,  779011986,  775941259,
+     772868706,  769794334,  766718151,  763640164,  760560380,  757478806,  754395449,  751310318,
+     748223418,  745134758,  742044345,  738952186,  735858287,  732762657,  729665303,  726566232,
+     723465451,  720362968,  717258790,  714152924,  711045377,  707936158,  704825272,  701712728,
+     698598533,  695482694,  692365218,  689246113,  686125387,  683003045,  679879097,  676753549,
+     673626408,  670497682,  667367379,  664235505,  661102068,  657967075,  654830535,  651692453,
+     648552838,  645411696,  642269036,  639124865,  635979190,  632832018,  629683357,  626533215,
+     623381598,  620228514,  617073971,  613917975,  610760536,  607601658,  604441352,  601279623,
+     598116479,  594951927,  591785976,  588618632,  585449903,  582279796,  579108320,  575935480,
+     572761285,  569585743,  566408860,  563230645,  560051104,  556870245,  553688076,  550504604,
+     547319836,  544133781,  540946445,  537757837,  534567963,  531376831,  528184449,  524990824,
+     521795963,  518599875,  515402566,  512204045,  509004318,  505803394,  502601279,  499397982,
+     496193509,  492987869,  489781069,  486573117,  483364019,  480153784,  476942419,  473729932,
+     470516330,  467301622,  464085813,  460868912,  457650927,  454431865,  451211734,  447990541,
+     444768294,  441545000,  438320667,  435095303,  431868915,  428641511,  425413098,  422183684,
+     418953276,  415721883,  412489512,  409256170,  406021865,  402786604,  399550396,  396313247,
+     393075166,  389836160,  386596237,  383355404,  380113669,  376871039,  373627523,  370383128,
+     367137861,  363891730,  360644742,  357396906,  354148230,  350898719,  347648383,  344397230,
+     341145265,  337892498,  334638936,  331384586,  328129457,  324873555,  321616889,  318359466,
+     315101295,  311842381,  308582734,  305322361,  302061269,  298799466,  295536961,  292273760,
+     289009871,  285745302,  282480061,  279214155,  275947592,  272680379,  269412525,  266144038,
+     262874923,  259605191,  256334847,  253063900,  249792358,  246520228,  243247518,  239974235,
+     236700388,  233425984,  230151030,  226875535,  223599506,  220322951,  217045878,  213768293,
+     210490206,  207211624,  203932553,  200653003,  197372981,  194092495,  190811551,  187530159,
+     184248325,  180966058,  177683365,  174400254,  171116733,  167832808,  164548489,  161263783,
+     157978697,  154693240,  151407418,  148121241,  144834714,  141547847,  138260647,  134973122,
+     131685278,  128397125,  125108670,  121819921,  118530885,  115241570,  111951983,  108662134,
+     105372028,  102081675,   98791081,   95500255,   92209205,   88917937,   85626460,   82334782,
+      79042909,   75750851,   72458615,   69166208,   65873638,   62580914,   59288042,   55995030,
+      52701887,   49408620,   46115236,   42821744,   39528151,   36234466,   32940695,   29646846,
+      26352928,   23058947,   19764913,   16470832,   13176712,    9882561,    6588387,    3294197
+};
+
+uint16_t fft_offsets_lut[0x2aab];
+
+void ff_fft_lut_init(uint16_t *table, int off, int size, int *index)
+{
+    if (size < 16) {
+        table[*index] = off >> 2;
+        (*index)++;
+    }
+    else {
+        ff_fft_lut_init(table, off, size>>1, index);
+        ff_fft_lut_init(table, off+(size>>1), size>>2, index);
+        ff_fft_lut_init(table, off+3*(size>>2), size>>2, index);
+    }
+}
diff --git a/libavcodec/fft_table.h b/libavcodec/fft_table.h
new file mode 100644
index 0000000..7f3142d
--- /dev/null
+++ b/libavcodec/fft_table.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj@mips.com)
+ *           Goran Cordasic   (goran@mips.com)
+ *           Djordje Pesut    (djordje@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * definitions and tables for FFT
+ */
+#ifndef AVCODEC_FFT_TABLE_H
+#define AVCODEC_FFT_TABLE_H
+
+#include "libavcodec/fft.h"
+
+#define MAX_LOG2_NFFT 16 //!< Specifies maximum allowed fft size
+#define MAX_FFT_SIZE (1 << MAX_LOG2_NFFT)
+
+extern int32_t w_tab_sr[];
+extern uint16_t fft_offsets_lut[];
+void ff_fft_lut_init(uint16_t *table, int off, int size, int *index);
+
+#endif /* AVCODEC_FFT_TABLE_H */
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 808f317..b8d6417 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2002 Fabrice Bellard
  * Partly based on libdjbfft by D. J. Bernstein
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,6 +32,10 @@
 #include "fft.h"
 #include "fft-internal.h"
 
+#if FFT_FIXED_32
+#include "fft_table.h"
+#else /* FFT_FIXED_32 */
+
 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
 #if !CONFIG_HARDCODED_TABLES
 COSTABLE(16);
@@ -65,6 +69,8 @@ COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
     FFT_NAME(ff_cos_65536),
 };
 
+#endif /* FFT_FIXED_32 */
+
 static void fft_permute_c(FFTContext *s, FFTComplex *z);
 static void fft_calc_c(FFTContext *s, FFTComplex *z);
 
@@ -81,7 +87,7 @@ static int split_radix_permutation(int i, int n, int inverse)
 
 av_cold void ff_init_ff_cos_tabs(int index)
 {
-#if !CONFIG_HARDCODED_TABLES
+#if (!CONFIG_HARDCODED_TABLES) && (!FFT_FIXED_32)
     int i;
     int m = 1<<index;
     double freq = 2*M_PI/m;
@@ -157,26 +163,34 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
     s->mdct_calc   = ff_mdct_calc_c;
 #endif
 
+#if FFT_FIXED_32
+    {
+        int n=0;
+        ff_fft_lut_init(fft_offsets_lut, 0, 1 << 16, &n);
+    }
+#else /* FFT_FIXED_32 */
 #if FFT_FLOAT
     if (ARCH_AARCH64) ff_fft_init_aarch64(s);
     if (ARCH_ARM)     ff_fft_init_arm(s);
     if (ARCH_PPC)     ff_fft_init_ppc(s);
     if (ARCH_X86)     ff_fft_init_x86(s);
     if (CONFIG_MDCT)  s->mdct_calcw = s->mdct_calc;
+    if (HAVE_MIPSFPU) ff_fft_init_mips(s);
 #else
     if (CONFIG_MDCT)  s->mdct_calcw = ff_mdct_calcw_c;
     if (ARCH_ARM)     ff_fft_fixed_init_arm(s);
 #endif
-
     for(j=4; j<=nbits; j++) {
         ff_init_ff_cos_tabs(j);
     }
+#endif /* FFT_FIXED_32 */
+
 
     if (s->fft_permutation == FF_FFT_PERM_AVX) {
         fft_perm_avx(s);
     } else {
         for(i=0; i<n; i++) {
-            int j = i;
+            j = i;
             if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
                 j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
             s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
@@ -206,6 +220,169 @@ av_cold void ff_fft_end(FFTContext *s)
     av_freep(&s->tmp_buf);
 }
 
+#if FFT_FIXED_32
+
+static void fft_calc_c(FFTContext *s, FFTComplex *z) {
+
+    int nbits, i, n, num_transforms, offset, step;
+    int n4, n2, n34;
+    FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    FFTComplex *tmpz;
+    FFTSample w_re, w_im;
+    FFTSample *w_re_ptr, *w_im_ptr;
+    const int fft_size = (1 << s->nbits);
+    int64_t accu;
+
+    num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+
+    for (n=0; n<num_transforms; n++){
+        offset = fft_offsets_lut[n] << 2;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[0].re + tmpz[1].re;
+        tmp5 = tmpz[2].re + tmpz[3].re;
+        tmp2 = tmpz[0].im + tmpz[1].im;
+        tmp6 = tmpz[2].im + tmpz[3].im;
+        tmp3 = tmpz[0].re - tmpz[1].re;
+        tmp8 = tmpz[2].im - tmpz[3].im;
+        tmp4 = tmpz[0].im - tmpz[1].im;
+        tmp7 = tmpz[2].re - tmpz[3].re;
+
+        tmpz[0].re = tmp1 + tmp5;
+        tmpz[2].re = tmp1 - tmp5;
+        tmpz[0].im = tmp2 + tmp6;
+        tmpz[2].im = tmp2 - tmp6;
+        tmpz[1].re = tmp3 + tmp8;
+        tmpz[3].re = tmp3 - tmp8;
+        tmpz[1].im = tmp4 - tmp7;
+        tmpz[3].im = tmp4 + tmp7;
+    }
+
+    if (fft_size < 8)
+        return;
+
+    num_transforms = (num_transforms >> 1) | 1;
+
+    for (n=0; n<num_transforms; n++){
+        offset = fft_offsets_lut[n] << 3;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[4].re + tmpz[5].re;
+        tmp3 = tmpz[6].re + tmpz[7].re;
+        tmp2 = tmpz[4].im + tmpz[5].im;
+        tmp4 = tmpz[6].im + tmpz[7].im;
+        tmp5 = tmp1 + tmp3;
+        tmp7 = tmp1 - tmp3;
+        tmp6 = tmp2 + tmp4;
+        tmp8 = tmp2 - tmp4;
+
+        tmp1 = tmpz[4].re - tmpz[5].re;
+        tmp2 = tmpz[4].im - tmpz[5].im;
+        tmp3 = tmpz[6].re - tmpz[7].re;
+        tmp4 = tmpz[6].im - tmpz[7].im;
+
+        tmpz[4].re = tmpz[0].re - tmp5;
+        tmpz[0].re = tmpz[0].re + tmp5;
+        tmpz[4].im = tmpz[0].im - tmp6;
+        tmpz[0].im = tmpz[0].im + tmp6;
+        tmpz[6].re = tmpz[2].re - tmp8;
+        tmpz[2].re = tmpz[2].re + tmp8;
+        tmpz[6].im = tmpz[2].im + tmp7;
+        tmpz[2].im = tmpz[2].im - tmp7;
+
+        accu = (int64_t)Q31(M_SQRT1_2)*(tmp1 + tmp2);
+        tmp5 = (int32_t)((accu + 0x40000000) >> 31);
+        accu = (int64_t)Q31(M_SQRT1_2)*(tmp3 - tmp4);
+        tmp7 = (int32_t)((accu + 0x40000000) >> 31);
+        accu = (int64_t)Q31(M_SQRT1_2)*(tmp2 - tmp1);
+        tmp6 = (int32_t)((accu + 0x40000000) >> 31);
+        accu = (int64_t)Q31(M_SQRT1_2)*(tmp3 + tmp4);
+        tmp8 = (int32_t)((accu + 0x40000000) >> 31);
+        tmp1 = tmp5 + tmp7;
+        tmp3 = tmp5 - tmp7;
+        tmp2 = tmp6 + tmp8;
+        tmp4 = tmp6 - tmp8;
+
+        tmpz[5].re = tmpz[1].re - tmp1;
+        tmpz[1].re = tmpz[1].re + tmp1;
+        tmpz[5].im = tmpz[1].im - tmp2;
+        tmpz[1].im = tmpz[1].im + tmp2;
+        tmpz[7].re = tmpz[3].re - tmp4;
+        tmpz[3].re = tmpz[3].re + tmp4;
+        tmpz[7].im = tmpz[3].im + tmp3;
+        tmpz[3].im = tmpz[3].im - tmp3;
+    }
+
+    step = 1 << ((MAX_LOG2_NFFT-4) - 4);
+    n4 = 4;
+
+    for (nbits=4; nbits<=s->nbits; nbits++){
+        n2  = 2*n4;
+        n34 = 3*n4;
+        num_transforms = (num_transforms >> 1) | 1;
+
+        for (n=0; n<num_transforms; n++){
+            offset = fft_offsets_lut[n] << nbits;
+            tmpz = z + offset;
+
+            tmp5 = tmpz[ n2].re + tmpz[n34].re;
+            tmp1 = tmpz[ n2].re - tmpz[n34].re;
+            tmp6 = tmpz[ n2].im + tmpz[n34].im;
+            tmp2 = tmpz[ n2].im - tmpz[n34].im;
+
+            tmpz[ n2].re = tmpz[ 0].re - tmp5;
+            tmpz[  0].re = tmpz[ 0].re + tmp5;
+            tmpz[ n2].im = tmpz[ 0].im - tmp6;
+            tmpz[  0].im = tmpz[ 0].im + tmp6;
+            tmpz[n34].re = tmpz[n4].re - tmp2;
+            tmpz[ n4].re = tmpz[n4].re + tmp2;
+            tmpz[n34].im = tmpz[n4].im + tmp1;
+            tmpz[ n4].im = tmpz[n4].im - tmp1;
+
+            w_re_ptr = w_tab_sr + step;
+            w_im_ptr = w_tab_sr + MAX_FFT_SIZE/(4*16) - step;
+
+            for (i=1; i<n4; i++){
+                w_re = w_re_ptr[0];
+                w_im = w_im_ptr[0];
+                accu  = (int64_t)w_re*tmpz[ n2+i].re;
+                accu += (int64_t)w_im*tmpz[ n2+i].im;
+                tmp1 = (int32_t)((accu + 0x40000000) >> 31);
+                accu  = (int64_t)w_re*tmpz[ n2+i].im;
+                accu -= (int64_t)w_im*tmpz[ n2+i].re;
+                tmp2 = (int32_t)((accu + 0x40000000) >> 31);
+                accu  = (int64_t)w_re*tmpz[n34+i].re;
+                accu -= (int64_t)w_im*tmpz[n34+i].im;
+                tmp3 = (int32_t)((accu + 0x40000000) >> 31);
+                accu  = (int64_t)w_re*tmpz[n34+i].im;
+                accu += (int64_t)w_im*tmpz[n34+i].re;
+                tmp4 = (int32_t)((accu + 0x40000000) >> 31);
+
+                tmp5 = tmp1 + tmp3;
+                tmp1 = tmp1 - tmp3;
+                tmp6 = tmp2 + tmp4;
+                tmp2 = tmp2 - tmp4;
+
+                tmpz[ n2+i].re = tmpz[   i].re - tmp5;
+                tmpz[    i].re = tmpz[   i].re + tmp5;
+                tmpz[ n2+i].im = tmpz[   i].im - tmp6;
+                tmpz[    i].im = tmpz[   i].im + tmp6;
+                tmpz[n34+i].re = tmpz[n4+i].re - tmp2;
+                tmpz[ n4+i].re = tmpz[n4+i].re + tmp2;
+                tmpz[n34+i].im = tmpz[n4+i].im + tmp1;
+                tmpz[ n4+i].im = tmpz[n4+i].im - tmp1;
+
+                w_re_ptr += step;
+                w_im_ptr -= step;
+            }
+        }
+        step >>= 1;
+        n4   <<= 1;
+    }
+}
+
+#else /* FFT_FIXED_32 */
+
 #define BUTTERFLIES(a0,a1,a2,a3) {\
     BF(t3, t5, t5, t1);\
     BF(a2.re, a0.re, a0.re, t5);\
@@ -351,3 +528,4 @@ static void fft_calc_c(FFTContext *s, FFTComplex *z)
 {
     fft_dispatch[s->nbits-2](z);
 }
+#endif /* FFT_FIXED_32 */
diff --git a/libavcodec/ffv1.c b/libavcodec/ffv1.c
index d1a6a83..ab58a60 100644
--- a/libavcodec/ffv1.c
+++ b/libavcodec/ffv1.c
@@ -1,22 +1,22 @@
 /*
  * FFV1 codec for libavcodec
  *
- * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2003-2013 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,119 +27,32 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
+#include "libavutil/crc.h"
+#include "libavutil/opt.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/timer.h"
 #include "avcodec.h"
-#include "get_bits.h"
-#include "put_bits.h"
+#include "internal.h"
 #include "rangecoder.h"
 #include "golomb.h"
 #include "mathops.h"
 #include "ffv1.h"
 
-const int8_t ffv1_quant5_10bit[256] = {
-     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,
-     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-     1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-    -1, -1, -1, -1, -1, -1, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0,
-};
-
-const int8_t ffv1_quant5[256] = {
-     0,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1,
-};
-
-const int8_t ffv1_quant9_10bit[256] = {
-     0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,
-     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,
-     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
-     3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
-     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
-    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
-    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
-    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
-    -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3,
-    -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
-    -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-    -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -0, -0, -0, -0,
-};
-
-const int8_t ffv1_quant11[256] = {
-     0,  1,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,
-     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-     4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
-    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
-    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
-    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
-    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
-    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -4, -4,
-    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
-    -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -1,
-};
-
-const uint8_t ffv1_ver2_state[256] = {
-      0,  10,  10,  10,  10,  16,  16,  16,  28,  16,  16,  29,  42,  49,  20,  49,
-     59,  25,  26,  26,  27,  31,  33,  33,  33,  34,  34,  37,  67,  38,  39,  39,
-     40,  40,  41,  79,  43,  44,  45,  45,  48,  48,  64,  50,  51,  52,  88,  52,
-     53,  74,  55,  57,  58,  58,  74,  60,  101, 61,  62,  84,  66,  66,  68,  69,
-     87,  82,  71,  97,  73,  73,  82,  75,  111, 77,  94,  78,  87,  81,  83,  97,
-     85,  83,  94,  86,  99,  89,  90,  99,  111, 92,  93,  134, 95,  98,  105, 98,
-    105, 110, 102, 108, 102, 118, 103, 106, 106, 113, 109, 112, 114, 112, 116, 125,
-    115, 116, 117, 117, 126, 119, 125, 121, 121, 123, 145, 124, 126, 131, 127, 129,
-    165, 130, 132, 138, 133, 135, 145, 136, 137, 139, 146, 141, 143, 142, 144, 148,
-    147, 155, 151, 149, 151, 150, 152, 157, 153, 154, 156, 168, 158, 162, 161, 160,
-    172, 163, 169, 164, 166, 184, 167, 170, 177, 174, 171, 173, 182, 176, 180, 178,
-    175, 189, 179, 181, 186, 183, 192, 185, 200, 187, 191, 188, 190, 197, 193, 196,
-    197, 194, 195, 196, 198, 202, 199, 201, 210, 203, 207, 204, 205, 206, 208, 214,
-    209, 211, 221, 212, 213, 215, 224, 216, 217, 218, 219, 220, 222, 228, 223, 225,
-    226, 224, 227, 229, 240, 230, 231, 232, 233, 234, 235, 236, 238, 239, 237, 242,
-    241, 243, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 252, 253, 254, 255,
-};
-
-
 av_cold int ffv1_common_init(AVCodecContext *avctx)
 {
     FFV1Context *s = avctx->priv_data;
 
+    if (!avctx->width || !avctx->height)
+        return AVERROR_INVALIDDATA;
+
     s->avctx = avctx;
     s->flags = avctx->flags;
 
-    if (!avctx->width || !avctx->height)
-        return AVERROR_INVALIDDATA;
+    s->picture.f = av_frame_alloc();
+    s->last_picture.f = av_frame_alloc();
+    if (!s->picture.f || !s->last_picture.f)
+        return AVERROR(ENOMEM);
 
     s->width  = avctx->width;
     s->height = avctx->height;
@@ -151,7 +64,7 @@ av_cold int ffv1_common_init(AVCodecContext *avctx)
     return 0;
 }
 
-int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
+av_cold int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
 {
     int j;
 
@@ -162,13 +75,13 @@ int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
 
         if (fs->ac) {
             if (!p->state)
-                p->state = av_malloc(CONTEXT_SIZE * p->context_count *
+                p->state = av_malloc_array(p->context_count, CONTEXT_SIZE *
                                      sizeof(uint8_t));
             if (!p->state)
                 return AVERROR(ENOMEM);
         } else {
             if (!p->vlc_state)
-                p->vlc_state = av_malloc(p->context_count * sizeof(VlcState));
+                p->vlc_state = av_malloc_array(p->context_count, sizeof(VlcState));
             if (!p->vlc_state)
                 return AVERROR(ENOMEM);
         }
@@ -177,7 +90,7 @@ int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
     if (fs->ac > 1) {
         //FIXME only redo if state_transition changed
         for (j = 1; j < 256; j++) {
-            fs->c.one_state[j]        = f->state_transition[j];
+            fs->c. one_state[      j] = f->state_transition[j];
             fs->c.zero_state[256 - j] = 256 - fs->c.one_state[j];
         }
     }
@@ -185,15 +98,23 @@ int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs)
     return 0;
 }
 
+av_cold int ffv1_init_slices_state(FFV1Context *f)
+{
+    int i, ret;
+    for (i = 0; i < f->slice_count; i++) {
+        FFV1Context *fs = f->slice_context[i];
+        if ((ret = ffv1_init_slice_state(f, fs)) < 0)
+            return AVERROR(ENOMEM);
+    }
+    return 0;
+}
+
 av_cold int ffv1_init_slice_contexts(FFV1Context *f)
 {
     int i;
 
     f->slice_count = f->num_h_slices * f->num_v_slices;
-    if (f->slice_count <= 0) {
-        av_log(f->avctx, AV_LOG_ERROR, "Invalid number of slices\n");
-        return AVERROR(EINVAL);
-    }
+    av_assert0(f->slice_count > 0);
 
     for (i = 0; i < f->slice_count; i++) {
         FFV1Context *fs = av_mallocz(sizeof(*fs));
@@ -203,6 +124,10 @@ av_cold int ffv1_init_slice_contexts(FFV1Context *f)
         int sxe         = f->avctx->width  * (sx + 1) / f->num_h_slices;
         int sys         = f->avctx->height *  sy      / f->num_v_slices;
         int sye         = f->avctx->height * (sy + 1) / f->num_v_slices;
+
+        if (!fs)
+            return AVERROR(ENOMEM);
+
         f->slice_context[i] = fs;
         memcpy(fs, f, sizeof(*fs));
         memset(fs->rc_stat2, 0, sizeof(fs->rc_stat2));
@@ -212,7 +137,7 @@ av_cold int ffv1_init_slice_contexts(FFV1Context *f)
         fs->slice_x      = sxs;
         fs->slice_y      = sys;
 
-        fs->sample_buffer = av_malloc(3 * MAX_PLANES * (fs->width + 6) *
+        fs->sample_buffer = av_malloc_array((fs->width + 6), 3 * MAX_PLANES *
                                       sizeof(*fs->sample_buffer));
         if (!fs->sample_buffer)
             return AVERROR(ENOMEM);
@@ -225,7 +150,7 @@ int ffv1_allocate_initial_states(FFV1Context *f)
     int i;
 
     for (i = 0; i < f->quant_table_count; i++) {
-        f->initial_states[i] = av_malloc(f->context_count[i] *
+        f->initial_states[i] = av_malloc_array(f->context_count[i],
                                          sizeof(*f->initial_states[i]));
         if (!f->initial_states[i])
             return AVERROR(ENOMEM);
@@ -262,11 +187,20 @@ void ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs)
     }
 }
 
+
 av_cold int ffv1_close(AVCodecContext *avctx)
 {
     FFV1Context *s = avctx->priv_data;
     int i, j;
 
+    if (s->picture.f)
+        ff_thread_release_buffer(avctx, &s->picture);
+    av_frame_free(&s->picture.f);
+
+    if (s->last_picture.f)
+        ff_thread_release_buffer(avctx, &s->last_picture);
+    av_frame_free(&s->last_picture.f);
+
     for (j = 0; j < s->slice_count; j++) {
         FFV1Context *fs = s->slice_context[j];
         for (i = 0; i < s->plane_count; i++) {
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 5c3bdc1..5081397 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -3,32 +3,49 @@
  *
  * Copyright (c) 2003-2012 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef AVCODEC_FFV1_H
 #define AVCODEC_FFV1_H
 
-#include <stdint.h>
+/**
+ * @file
+ * FF Video Codec 1 (a lossless codec)
+ */
 
+#include "libavutil/avassert.h"
+#include "libavutil/crc.h"
+#include "libavutil/opt.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/timer.h"
 #include "avcodec.h"
 #include "get_bits.h"
+#include "internal.h"
+#include "mathops.h"
 #include "put_bits.h"
 #include "rangecoder.h"
+#include "thread.h"
+
+#ifdef __INTEL_COMPILER
+#undef av_flatten
+#define av_flatten
+#endif
 
 #define MAX_PLANES 4
 #define CONTEXT_SIZE 32
@@ -36,14 +53,6 @@
 #define MAX_QUANT_TABLES 8
 #define MAX_CONTEXT_INPUTS 5
 
-extern const uint8_t ff_log2_run[41];
-
-extern const int8_t ffv1_quant5_10bit[256];
-extern const int8_t ffv1_quant5[256];
-extern const int8_t ffv1_quant9_10bit[256];
-extern const int8_t ffv1_quant11[256];
-extern const uint8_t ffv1_ver2_state[256];
-
 typedef struct VlcState {
     int16_t drift;
     uint16_t error_sum;
@@ -71,20 +80,20 @@ typedef struct FFV1Context {
     uint64_t rc_stat[256][2];
     uint64_t (*rc_stat2[MAX_QUANT_TABLES])[32][2];
     int version;
-    int minor_version;
+    int micro_version;
     int width, height;
     int chroma_planes;
     int chroma_h_shift, chroma_v_shift;
     int transparency;
     int flags;
     int picture_number;
-    AVFrame *frame;
-    AVFrame *last_picture;
+    ThreadFrame picture, last_picture;
+    struct FFV1Context *fsrc;
 
     AVFrame *cur;
     int plane_count;
-    int ac;     // 1 = range coder <-> 0 = golomb rice
-    int ac_byte_count;      // number of bytes used for AC coding
+    int ac;                              ///< 1=range coder <-> 0=golomb rice
+    int ac_byte_count;                   ///< number of bytes used for AC coding
     PlaneContext plane[MAX_PLANES];
     int16_t quant_table[MAX_CONTEXT_INPUTS][256];
     int16_t quant_tables[MAX_QUANT_TABLES][MAX_CONTEXT_INPUTS][256];
@@ -96,6 +105,7 @@ typedef struct FFV1Context {
     int16_t *sample_buffer;
 
     int ec;
+    int intra;
     int slice_damaged;
     int key_frame_ok;
 
@@ -113,8 +123,20 @@ typedef struct FFV1Context {
     int slice_height;
     int slice_x;
     int slice_y;
+    int slice_reset_contexts;
+    int slice_coding_mode;
+    int slice_rct_by_coef;
+    int slice_rct_ry_coef;
 } FFV1Context;
 
+int ffv1_common_init(AVCodecContext *avctx);
+int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs);
+int ffv1_init_slices_state(FFV1Context *f);
+int ffv1_init_slice_contexts(FFV1Context *f);
+int ffv1_allocate_initial_states(FFV1Context *f);
+void ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs);
+int ffv1_close(AVCodecContext *avctx);
+
 static av_always_inline int fold(int diff, int bits)
 {
     if (bits == 8)
@@ -193,11 +215,4 @@ static inline void update_vlc_state(VlcState *const state, const int v)
     state->count = count;
 }
 
-int ffv1_common_init(AVCodecContext *avctx);
-int ffv1_init_slice_state(FFV1Context *f, FFV1Context *fs);
-int ffv1_init_slice_contexts(FFV1Context *f);
-int ffv1_allocate_initial_states(FFV1Context *f);
-void ffv1_clear_slice_state(FFV1Context *f, FFV1Context *fs);
-int ffv1_close(AVCodecContext *avctx);
-
 #endif /* AVCODEC_FFV1_H */
diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c
index 703491e..b10e212 100644
--- a/libavcodec/ffv1dec.c
+++ b/libavcodec/ffv1dec.c
@@ -1,22 +1,22 @@
 /*
  * FFV1 decoder
  *
- * Copyright (c) 2003-2012 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2003-2013 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,15 +26,14 @@
  */
 
 #include "libavutil/avassert.h"
-#include "libavutil/pixdesc.h"
 #include "libavutil/crc.h"
 #include "libavutil/opt.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
 #include "libavutil/timer.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "get_bits.h"
-#include "put_bits.h"
 #include "rangecoder.h"
 #include "golomb.h"
 #include "mathops.h"
@@ -77,8 +76,6 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState *const state,
         i += i;
     }
 
-    assert(k <= 8);
-
     v = get_sr_golomb(gb, k, 12, bits);
     av_dlog(NULL, "v:%d bias:%d error:%d drift:%d count:%d k:%d",
             v, state->bias, state->error_sum, state->drift, state->count, k);
@@ -108,6 +105,19 @@ static av_always_inline void decode_line(FFV1Context *s, int w,
     int run_mode  = 0;
     int run_index = s->run_index;
 
+    if (s->slice_coding_mode == 1) {
+        int i;
+        for (x = 0; x < w; x++) {
+            int v = 0;
+            for (i=0; i<bits; i++) {
+                uint8_t state = 128;
+                v += v + get_rac(c, &state);
+            }
+            sample[1][x] = v;
+        }
+        return;
+    }
+
     for (x = 0; x < w; x++) {
         int diff, context, sign;
 
@@ -195,29 +205,27 @@ static void decode_plane(FFV1Context *s, uint8_t *src,
             for (x = 0; x < w; x++)
                 src[x + stride * y] = sample[1][x];
         } else {
-            decode_line(s, w, sample, plane_index,
-                        s->avctx->bits_per_raw_sample);
+            decode_line(s, w, sample, plane_index, s->avctx->bits_per_raw_sample);
             if (s->packed_at_lsb) {
-                for (x = 0; x < w; x++)
-                    ((uint16_t *)(src + stride * y))[x] = sample[1][x];
+                for (x = 0; x < w; x++) {
+                    ((uint16_t*)(src + stride*y))[x] = sample[1][x];
+                }
             } else {
-                for (x = 0; x < w; x++)
-                    ((uint16_t *)(src + stride * y))[x] = sample[1][x] << (16 - s->avctx->bits_per_raw_sample);
+                for (x = 0; x < w; x++) {
+                    ((uint16_t*)(src + stride*y))[x] = sample[1][x] << (16 - s->avctx->bits_per_raw_sample);
+                }
             }
         }
 // STOP_TIMER("decode-line") }
     }
 }
 
-static void decode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h,
-                             int stride[3])
+static void decode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h, int stride[3])
 {
     int x, y, p;
     int16_t *sample[4][2];
-    int lbd  = s->avctx->bits_per_raw_sample <= 8;
-    int bits = s->avctx->bits_per_raw_sample > 0
-               ? s->avctx->bits_per_raw_sample
-               : 8;
+    int lbd    = s->avctx->bits_per_raw_sample <= 8;
+    int bits   = s->avctx->bits_per_raw_sample > 0 ? s->avctx->bits_per_raw_sample : 8;
     int offset = 1 << bits;
 
     for (x = 0; x < 4; x++) {
@@ -231,17 +239,17 @@ static void decode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h,
 
     for (y = 0; y < h; y++) {
         for (p = 0; p < 3 + s->transparency; p++) {
-            int16_t *temp = sample[p][0]; //FIXME try a normal buffer
+            int16_t *temp = sample[p][0]; // FIXME: try a normal buffer
 
             sample[p][0] = sample[p][1];
             sample[p][1] = temp;
 
-            sample[p][1][-1] = sample[p][0][0];
-            sample[p][0][w]  = sample[p][0][w - 1];
-            if (lbd)
-                decode_line(s, w, sample[p], (p + 1) / 2, 9);
+            sample[p][1][-1]= sample[p][0][0  ];
+            sample[p][0][ w]= sample[p][0][w-1];
+            if (lbd && s->slice_coding_mode == 0)
+                decode_line(s, w, sample[p], (p + 1)/2, 9);
             else
-                decode_line(s, w, sample[p], (p + 1) / 2, bits + 1);
+                decode_line(s, w, sample[p], (p + 1)/2, bits + (s->slice_coding_mode != 1));
         }
         for (x = 0; x < w; x++) {
             int g = sample[0][1][x];
@@ -249,19 +257,20 @@ static void decode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h,
             int r = sample[2][1][x];
             int a = sample[3][1][x];
 
-            b -= offset;
-            r -= offset;
-            g -= (b + r) >> 2;
-            b += g;
-            r += g;
+            if (s->slice_coding_mode != 1) {
+                b -= offset;
+                r -= offset;
+                g -= (b * s->slice_rct_by_coef + r * s->slice_rct_ry_coef) >> 2;
+                b += g;
+                r += g;
+            }
 
             if (lbd)
-                *((uint32_t *)(src[0] + x * 4 + stride[0] * y)) = b +
-                    (g << 8) + (r << 16) + (a << 24);
+                *((uint32_t*)(src[0] + x*4 + stride[0]*y)) = b + (g<<8) + (r<<16) + (a<<24);
             else {
-                *((uint16_t *)(src[0] + x * 2 + stride[0] * y)) = b;
-                *((uint16_t *)(src[1] + x * 2 + stride[1] * y)) = g;
-                *((uint16_t *)(src[2] + x * 2 + stride[2] * y)) = r;
+                *((uint16_t*)(src[0] + x*2 + stride[0]*y)) = b;
+                *((uint16_t*)(src[1] + x*2 + stride[1]*y)) = g;
+                *((uint16_t*)(src[2] + x*2 + stride[2]*y)) = r;
             }
         }
     }
@@ -274,35 +283,29 @@ static int decode_slice_header(FFV1Context *f, FFV1Context *fs)
     unsigned ps, i, context_count;
     memset(state, 128, sizeof(state));
 
-    if (fs->ac > 1) {
-        for (i = 1; i < 256; i++) {
-            fs->c.one_state[i]        = f->state_transition[i];
-            fs->c.zero_state[256 - i] = 256 - fs->c.one_state[i];
-        }
-    }
+    av_assert0(f->version > 2);
 
-    fs->slice_x      = get_symbol(c, state, 0) * f->width;
-    fs->slice_y      = get_symbol(c, state, 0) * f->height;
-    fs->slice_width  = (get_symbol(c, state, 0) + 1) * f->width + fs->slice_x;
+    fs->slice_x      =  get_symbol(c, state, 0)      * f->width ;
+    fs->slice_y      =  get_symbol(c, state, 0)      * f->height;
+    fs->slice_width  = (get_symbol(c, state, 0) + 1) * f->width  + fs->slice_x;
     fs->slice_height = (get_symbol(c, state, 0) + 1) * f->height + fs->slice_y;
 
-    fs->slice_x     /= f->num_h_slices;
-    fs->slice_y     /= f->num_v_slices;
-    fs->slice_width  = fs->slice_width / f->num_h_slices - fs->slice_x;
-    fs->slice_height = fs->slice_height / f->num_v_slices - fs->slice_y;
-    if ((unsigned)fs->slice_width  > f->width ||
-        (unsigned)fs->slice_height > f->height)
-        return AVERROR_INVALIDDATA;
-    if ((unsigned)fs->slice_x + (uint64_t)fs->slice_width  > f->width ||
-        (unsigned)fs->slice_y + (uint64_t)fs->slice_height > f->height)
-        return AVERROR_INVALIDDATA;
+    fs->slice_x /= f->num_h_slices;
+    fs->slice_y /= f->num_v_slices;
+    fs->slice_width  = fs->slice_width /f->num_h_slices - fs->slice_x;
+    fs->slice_height = fs->slice_height/f->num_v_slices - fs->slice_y;
+    if ((unsigned)fs->slice_width > f->width || (unsigned)fs->slice_height > f->height)
+        return -1;
+    if (    (unsigned)fs->slice_x + (uint64_t)fs->slice_width  > f->width
+         || (unsigned)fs->slice_y + (uint64_t)fs->slice_height > f->height)
+        return -1;
 
     for (i = 0; i < f->plane_count; i++) {
-        PlaneContext *const p = &fs->plane[i];
-        int idx               = get_symbol(c, state, 0);
+        PlaneContext * const p = &fs->plane[i];
+        int idx = get_symbol(c, state, 0);
         if (idx > (unsigned)f->quant_table_count) {
             av_log(f->avctx, AV_LOG_ERROR, "quant_table_index out of range\n");
-            return AVERROR_INVALIDDATA;
+            return -1;
         }
         p->quant_table_index = idx;
         memcpy(p->quant_table, f->quant_tables[idx], sizeof(p->quant_table));
@@ -336,20 +339,72 @@ static int decode_slice_header(FFV1Context *f, FFV1Context *fs)
         f->cur->sample_aspect_ratio = (AVRational){ 0, 1 };
     }
 
+    if (fs->version > 3) {
+        fs->slice_reset_contexts = get_rac(c, state);
+        fs->slice_coding_mode = get_symbol(c, state, 0);
+        if (fs->slice_coding_mode != 1) {
+            fs->slice_rct_by_coef = get_symbol(c, state, 0);
+            fs->slice_rct_ry_coef = get_symbol(c, state, 0);
+            if ((uint64_t)fs->slice_rct_by_coef + (uint64_t)fs->slice_rct_ry_coef > 4) {
+                av_log(f->avctx, AV_LOG_ERROR, "slice_rct_y_coef out of range\n");
+                return AVERROR_INVALIDDATA;
+            }
+        }
+    }
+
     return 0;
 }
 
 static int decode_slice(AVCodecContext *c, void *arg)
 {
-    FFV1Context *fs = *(void **)arg;
-    FFV1Context *f  = fs->avctx->priv_data;
+    FFV1Context *fs   = *(void **)arg;
+    FFV1Context *f    = fs->avctx->priv_data;
     int width, height, x, y, ret;
-    const int ps = (av_pix_fmt_desc_get(c->pix_fmt)->flags & AV_PIX_FMT_FLAG_PLANAR)
-                   ? (c->bits_per_raw_sample > 8) + 1
-                   : 4;
-    AVFrame *const p = f->cur;
+    const int ps      = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1;
+    AVFrame * const p = f->cur;
+    int i, si;
+
+    for( si=0; fs != f->slice_context[si]; si ++)
+        ;
+
+    if(f->fsrc && !p->key_frame)
+        ff_thread_await_progress(&f->last_picture, si, 0);
+
+    if(f->fsrc && !p->key_frame) {
+        FFV1Context *fssrc = f->fsrc->slice_context[si];
+        FFV1Context *fsdst = f->slice_context[si];
+        av_assert1(fsdst->plane_count == fssrc->plane_count);
+        av_assert1(fsdst == fs);
+
+        if (!p->key_frame)
+            fsdst->slice_damaged |= fssrc->slice_damaged;
+
+        for (i = 0; i < f->plane_count; i++) {
+            PlaneContext *psrc = &fssrc->plane[i];
+            PlaneContext *pdst = &fsdst->plane[i];
+
+            av_free(pdst->state);
+            av_free(pdst->vlc_state);
+            memcpy(pdst, psrc, sizeof(*pdst));
+            pdst->state = NULL;
+            pdst->vlc_state = NULL;
+
+            if (fssrc->ac) {
+                pdst->state = av_malloc_array(CONTEXT_SIZE,  psrc->context_count);
+                memcpy(pdst->state, psrc->state, CONTEXT_SIZE * psrc->context_count);
+            } else {
+                pdst->vlc_state = av_malloc_array(sizeof(*pdst->vlc_state), psrc->context_count);
+                memcpy(pdst->vlc_state, psrc->vlc_state, sizeof(*pdst->vlc_state) * psrc->context_count);
+            }
+        }
+    }
+
+    fs->slice_rct_by_coef = 1;
+    fs->slice_rct_ry_coef = 1;
 
     if (f->version > 2) {
+        if (ffv1_init_slice_state(f, fs) < 0)
+            return AVERROR(ENOMEM);
         if (decode_slice_header(f, fs) < 0) {
             fs->slice_damaged = 1;
             return AVERROR_INVALIDDATA;
@@ -357,44 +412,37 @@ static int decode_slice(AVCodecContext *c, void *arg)
     }
     if ((ret = ffv1_init_slice_state(f, fs)) < 0)
         return ret;
-    if (f->cur->key_frame)
+    if (f->cur->key_frame || fs->slice_reset_contexts)
         ffv1_clear_slice_state(f, fs);
+
     width  = fs->slice_width;
     height = fs->slice_height;
     x      = fs->slice_x;
     y      = fs->slice_y;
 
     if (!fs->ac) {
-        if (f->version == 3 && f->minor_version > 1 || f->version > 3)
+        if (f->version == 3 && f->micro_version > 1 || f->version > 3)
             get_rac(&fs->c, (uint8_t[]) { 129 });
         fs->ac_byte_count = f->version > 2 || (!x && !y) ? fs->c.bytestream - fs->c.bytestream_start - 1 : 0;
-        init_get_bits(&fs->gb, fs->c.bytestream_start + fs->ac_byte_count,
-                      (fs->c.bytestream_end - fs->c.bytestream_start -
-                       fs->ac_byte_count) * 8);
+        init_get_bits(&fs->gb,
+                      fs->c.bytestream_start + fs->ac_byte_count,
+                      (fs->c.bytestream_end - fs->c.bytestream_start - fs->ac_byte_count) * 8);
     }
 
     av_assert1(width && height);
     if (f->colorspace == 0) {
-        const int chroma_width  = -((-width) >> f->chroma_h_shift);
-        const int chroma_height = -((-height) >> f->chroma_v_shift);
+        const int chroma_width  = FF_CEIL_RSHIFT(width,  f->chroma_h_shift);
+        const int chroma_height = FF_CEIL_RSHIFT(height, f->chroma_v_shift);
         const int cx            = x >> f->chroma_h_shift;
         const int cy            = y >> f->chroma_v_shift;
-        decode_plane(fs, p->data[0] + ps * x + y * p->linesize[0], width,
-                     height, p->linesize[0],
-                     0);
+        decode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0);
 
         if (f->chroma_planes) {
-            decode_plane(fs, p->data[1] + ps * cx + cy * p->linesize[1],
-                         chroma_width, chroma_height, p->linesize[1],
-                         1);
-            decode_plane(fs, p->data[2] + ps * cx + cy * p->linesize[2],
-                         chroma_width, chroma_height, p->linesize[2],
-                         1);
+            decode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
+            decode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
         }
         if (fs->transparency)
-            decode_plane(fs, p->data[3] + ps * x + y * p->linesize[3], width,
-                         height, p->linesize[3],
-                         2);
+            decode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2);
     } else {
         uint8_t *planes[3] = { p->data[0] + ps * x + y * p->linesize[0],
                                p->data[1] + ps * x + y * p->linesize[1],
@@ -404,16 +452,17 @@ static int decode_slice(AVCodecContext *c, void *arg)
     if (fs->ac && f->version > 2) {
         int v;
         get_rac(&fs->c, (uint8_t[]) { 129 });
-        v = fs->c.bytestream_end - fs->c.bytestream - 2 - 5 * f->ec;
+        v = fs->c.bytestream_end - fs->c.bytestream - 2 - 5*f->ec;
         if (v) {
-            av_log(f->avctx, AV_LOG_ERROR, "bytestream end mismatching by %d\n",
-                   v);
+            av_log(f->avctx, AV_LOG_ERROR, "bytestream end mismatching by %d\n", v);
             fs->slice_damaged = 1;
         }
     }
 
     emms_c();
 
+    ff_thread_report_progress(&f->picture, si, 0);
+
     return 0;
 }
 
@@ -429,7 +478,7 @@ static int read_quant_table(RangeCoder *c, int16_t *quant_table, int scale)
         unsigned len = get_symbol(c, state, 0) + 1;
 
         if (len > 128 - i)
-            return -1;
+            return AVERROR_INVALIDDATA;
 
         while (len--) {
             quant_table[i] = scale * v;
@@ -453,7 +502,7 @@ static int read_quant_tables(RangeCoder *c,
     for (i = 0; i < 5; i++) {
         context_count *= read_quant_table(c, quant_table[i], context_count);
         if (context_count > 32768U) {
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
     }
     return (context_count + 1) / 2;
@@ -473,12 +522,15 @@ static int read_extra_header(FFV1Context *f)
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
     f->version = get_symbol(c, state, 0);
+    if (f->version < 2) {
+        av_log(f->avctx, AV_LOG_ERROR, "Invalid version in global header\n");
+        return AVERROR_INVALIDDATA;
+    }
     if (f->version > 2) {
         c->bytestream_end -= 4;
-        f->minor_version   = get_symbol(c, state, 0);
+        f->micro_version = get_symbol(c, state, 0);
     }
     f->ac = f->avctx->coder_type = get_symbol(c, state, 0);
-
     if (f->ac > 1) {
         for (i = 1; i < 256; i++)
             f->state_transition[i] = get_symbol(c, state, 1) + c->one_state[i];
@@ -490,19 +542,21 @@ static int read_extra_header(FFV1Context *f)
     f->chroma_h_shift             = get_symbol(c, state, 0);
     f->chroma_v_shift             = get_symbol(c, state, 0);
     f->transparency               = get_rac(c, state);
-    f->plane_count                = 2 + f->transparency;
+    f->plane_count                = 1 + (f->chroma_planes || f->version<4) + f->transparency;
     f->num_h_slices               = 1 + get_symbol(c, state, 0);
     f->num_v_slices               = 1 + get_symbol(c, state, 0);
 
-    if (f->num_h_slices > (unsigned)f->width ||
-        f->num_v_slices > (unsigned)f->height) {
-        av_log(f->avctx, AV_LOG_ERROR, "too many slices\n");
+    if (f->num_h_slices > (unsigned)f->width  || !f->num_h_slices ||
+        f->num_v_slices > (unsigned)f->height || !f->num_v_slices
+       ) {
+        av_log(f->avctx, AV_LOG_ERROR, "slice count invalid\n");
         return AVERROR_INVALIDDATA;
     }
 
     f->quant_table_count = get_symbol(c, state, 0);
     if (f->quant_table_count > (unsigned)MAX_QUANT_TABLES)
         return AVERROR_INVALIDDATA;
+
     for (i = 0; i < f->quant_table_count; i++) {
         f->context_count[i] = read_quant_tables(c, f->quant_tables[i]);
         if (f->context_count[i] < 0) {
@@ -525,6 +579,8 @@ static int read_extra_header(FFV1Context *f)
 
     if (f->version > 2) {
         f->ec = get_symbol(c, state, 0);
+        if (f->micro_version > 2)
+            f->intra = get_symbol(c, state, 0);
     }
 
     if (f->version > 2) {
@@ -537,138 +593,135 @@ static int read_extra_header(FFV1Context *f)
         }
     }
 
+    if (f->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(f->avctx, AV_LOG_DEBUG,
+               "global: ver:%d.%d, coder:%d, colorspace: %d bpr:%d chroma:%d(%d:%d), alpha:%d slices:%dx%d qtabs:%d ec:%d intra:%d\n",
+               f->version, f->micro_version,
+               f->ac,
+               f->colorspace,
+               f->avctx->bits_per_raw_sample,
+               f->chroma_planes, f->chroma_h_shift, f->chroma_v_shift,
+               f->transparency,
+               f->num_h_slices, f->num_v_slices,
+               f->quant_table_count,
+               f->ec,
+               f->intra
+              );
     return 0;
 }
 
-
 static int read_header(FFV1Context *f)
 {
     uint8_t state[CONTEXT_SIZE];
-    int i, j, context_count = -1;
+    int i, j, context_count = -1; //-1 to avoid warning
     RangeCoder *const c = &f->slice_context[0]->c;
 
     memset(state, 128, sizeof(state));
 
     if (f->version < 2) {
-        unsigned v = get_symbol(c, state, 0);
-        if (v > 1) {
-            av_log(f->avctx, AV_LOG_ERROR,
-                   "invalid version %d in version 1 header\n", v);
+        int chroma_planes, chroma_h_shift, chroma_v_shift, transparency, colorspace, bits_per_raw_sample;
+        unsigned v= get_symbol(c, state, 0);
+        if (v >= 2) {
+            av_log(f->avctx, AV_LOG_ERROR, "invalid version %d in ver01 header\n", v);
             return AVERROR_INVALIDDATA;
         }
         f->version = v;
-
-        f->ac = f->avctx->coder_type = get_symbol(c, state, 0);
-
+        f->ac      = f->avctx->coder_type = get_symbol(c, state, 0);
         if (f->ac > 1) {
             for (i = 1; i < 256; i++)
-                f->state_transition[i] =
-                    get_symbol(c, state, 1) + c->one_state[i];
+                f->state_transition[i] = get_symbol(c, state, 1) + c->one_state[i];
         }
 
-        f->colorspace = get_symbol(c, state, 0); //YUV cs type
+        colorspace     = get_symbol(c, state, 0); //YUV cs type
+        bits_per_raw_sample = f->version > 0 ? get_symbol(c, state, 0) : f->avctx->bits_per_raw_sample;
+        chroma_planes  = get_rac(c, state);
+        chroma_h_shift = get_symbol(c, state, 0);
+        chroma_v_shift = get_symbol(c, state, 0);
+        transparency   = get_rac(c, state);
+
+        if (f->plane_count) {
+            if (   colorspace    != f->colorspace
+                || bits_per_raw_sample != f->avctx->bits_per_raw_sample
+                || chroma_planes != f->chroma_planes
+                || chroma_h_shift!= f->chroma_h_shift
+                || chroma_v_shift!= f->chroma_v_shift
+                || transparency  != f->transparency) {
+                av_log(f->avctx, AV_LOG_ERROR, "Invalid change of global parameters\n");
+                return AVERROR_INVALIDDATA;
+            }
+        }
 
-        if (f->version > 0)
-            f->avctx->bits_per_raw_sample = get_symbol(c, state, 0);
+        f->colorspace     = colorspace;
+        f->avctx->bits_per_raw_sample = bits_per_raw_sample;
+        f->chroma_planes  = chroma_planes;
+        f->chroma_h_shift = chroma_h_shift;
+        f->chroma_v_shift = chroma_v_shift;
+        f->transparency   = transparency;
 
-        f->chroma_planes  = get_rac(c, state);
-        f->chroma_h_shift = get_symbol(c, state, 0);
-        f->chroma_v_shift = get_symbol(c, state, 0);
-        f->transparency   = get_rac(c, state);
         f->plane_count    = 2 + f->transparency;
     }
 
     if (f->colorspace == 0) {
+        if (f->avctx->skip_alpha) f->transparency = 0;
         if (!f->transparency && !f->chroma_planes) {
             if (f->avctx->bits_per_raw_sample <= 8)
                 f->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
             else
                 f->avctx->pix_fmt = AV_PIX_FMT_GRAY16;
-        } else if (f->avctx->bits_per_raw_sample <= 8 && !f->transparency) {
-            switch (16 * f->chroma_h_shift + f->chroma_v_shift) {
-            case 0x00:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV444P;
-                break;
-            case 0x01:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV440P;
-                break;
-            case 0x10:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV422P;
-                break;
-            case 0x11:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV420P;
-                break;
-            case 0x20:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV411P;
-                break;
-            case 0x22:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV410P;
-                break;
-            default:
-                av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
-                return AVERROR(ENOSYS);
+        } else if (f->avctx->bits_per_raw_sample<=8 && !f->transparency) {
+            switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P; break;
+            case 0x01: f->avctx->pix_fmt = AV_PIX_FMT_YUV440P; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P; break;
+            case 0x20: f->avctx->pix_fmt = AV_PIX_FMT_YUV411P; break;
+            case 0x22: f->avctx->pix_fmt = AV_PIX_FMT_YUV410P; break;
             }
         } else if (f->avctx->bits_per_raw_sample <= 8 && f->transparency) {
-            switch (16 * f->chroma_h_shift + f->chroma_v_shift) {
-            case 0x00:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
-                break;
-            case 0x10:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
-                break;
-            case 0x11:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
-                break;
-            default:
-                av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
-                return AVERROR(ENOSYS);
+            switch(16*f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P; break;
             }
-        } else if (f->avctx->bits_per_raw_sample == 9) {
+        } else if (f->avctx->bits_per_raw_sample == 9 && !f->transparency) {
             f->packed_at_lsb = 1;
-            switch (16 * f->chroma_h_shift + f->chroma_v_shift) {
-            case 0x00:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV444P9;
-                break;
-            case 0x10:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV422P9;
-                break;
-            case 0x11:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV420P9;
-                break;
-            default:
-                av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
-                return AVERROR(ENOSYS);
+            switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P9; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P9; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P9; break;
             }
-        } else if (f->avctx->bits_per_raw_sample == 10) {
+        } else if (f->avctx->bits_per_raw_sample == 9 && f->transparency) {
             f->packed_at_lsb = 1;
-            switch (16 * f->chroma_h_shift + f->chroma_v_shift) {
-            case 0x00:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV444P10;
-                break;
-            case 0x10:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
-                break;
-            case 0x11:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV420P10;
-                break;
-            default:
-                av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
-                return AVERROR(ENOSYS);
+            switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P9; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P9; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P9; break;
             }
-        } else {
-            switch (16 * f->chroma_h_shift + f->chroma_v_shift) {
-            case 0x00:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV444P16;
-                break;
-            case 0x10:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV422P16;
-                break;
-            case 0x11:
-                f->avctx->pix_fmt = AV_PIX_FMT_YUV420P16;
-                break;
-            default:
-                av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
-                return AVERROR(ENOSYS);
+        } else if (f->avctx->bits_per_raw_sample == 10 && !f->transparency) {
+            f->packed_at_lsb = 1;
+            switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P10; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P10; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P10; break;
+            }
+        } else if (f->avctx->bits_per_raw_sample == 10 && f->transparency) {
+            f->packed_at_lsb = 1;
+            switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P10; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P10; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P10; break;
+            }
+        } else if (f->avctx->bits_per_raw_sample == 16 && !f->transparency){
+            switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUV444P16; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUV422P16; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUV420P16; break;
+            }
+        } else if (f->avctx->bits_per_raw_sample == 16 && f->transparency){
+            switch(16 * f->chroma_h_shift + f->chroma_v_shift) {
+            case 0x00: f->avctx->pix_fmt = AV_PIX_FMT_YUVA444P16; break;
+            case 0x10: f->avctx->pix_fmt = AV_PIX_FMT_YUVA422P16; break;
+            case 0x11: f->avctx->pix_fmt = AV_PIX_FMT_YUVA420P16; break;
             }
         }
     } else if (f->colorspace == 1) {
@@ -677,27 +730,25 @@ static int read_header(FFV1Context *f)
                    "chroma subsampling not supported in this colorspace\n");
             return AVERROR(ENOSYS);
         }
-        switch (f->avctx->bits_per_raw_sample) {
-        case 0:
-        case 8:
-            f->avctx->pix_fmt = AV_PIX_FMT_RGB32;
-            break;
-        case 9:
+        if (     f->avctx->bits_per_raw_sample ==  9)
             f->avctx->pix_fmt = AV_PIX_FMT_GBRP9;
-            break;
-        case 10:
+        else if (f->avctx->bits_per_raw_sample == 10)
             f->avctx->pix_fmt = AV_PIX_FMT_GBRP10;
-            break;
-        default:
-            av_log(f->avctx, AV_LOG_ERROR,
-                   "bit depth %d not supported\n",
-                   f->avctx->bits_per_raw_sample);
-            return AVERROR(ENOSYS);
-        }
+        else if (f->avctx->bits_per_raw_sample == 12)
+            f->avctx->pix_fmt = AV_PIX_FMT_GBRP12;
+        else if (f->avctx->bits_per_raw_sample == 14)
+            f->avctx->pix_fmt = AV_PIX_FMT_GBRP14;
+        else
+        if (f->transparency) f->avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        else                 f->avctx->pix_fmt = AV_PIX_FMT_0RGB32;
     } else {
         av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n");
         return AVERROR(ENOSYS);
     }
+    if (f->avctx->pix_fmt == AV_PIX_FMT_NONE) {
+        av_log(f->avctx, AV_LOG_ERROR, "format not supported\n");
+        return AVERROR(ENOSYS);
+    }
 
     av_dlog(f->avctx, "%d %d %d\n",
             f->chroma_h_shift, f->chroma_v_shift, f->avctx->pix_fmt);
@@ -714,16 +765,15 @@ static int read_header(FFV1Context *f)
         for (f->slice_count = 0;
              f->slice_count < MAX_SLICES && 3 < p - c->bytestream_start;
              f->slice_count++) {
-            int trailer = 3 + 5 * !!f->ec;
-            int size    = AV_RB24(p - trailer);
+            int trailer = 3 + 5*!!f->ec;
+            int size = AV_RB24(p-trailer);
             if (size + trailer > p - c->bytestream_start)
                 break;
             p -= size + trailer;
         }
     }
     if (f->slice_count > (unsigned)MAX_SLICES || f->slice_count <= 0) {
-        av_log(f->avctx, AV_LOG_ERROR, "slice count %d is invalid\n",
-               f->slice_count);
+        av_log(f->avctx, AV_LOG_ERROR, "slice count %d is invalid\n", f->slice_count);
         return AVERROR_INVALIDDATA;
     }
 
@@ -735,23 +785,20 @@ static int read_header(FFV1Context *f)
         fs->slice_damaged = 0;
 
         if (f->version == 2) {
-            fs->slice_x     = get_symbol(c, state, 0) * f->width;
-            fs->slice_y     = get_symbol(c, state, 0) * f->height;
-            fs->slice_width =
-                (get_symbol(c, state, 0) + 1) * f->width + fs->slice_x;
-            fs->slice_height =
-                (get_symbol(c, state, 0) + 1) * f->height + fs->slice_y;
-
-            fs->slice_x      /= f->num_h_slices;
-            fs->slice_y      /= f->num_v_slices;
+            fs->slice_x      =  get_symbol(c, state, 0)      * f->width ;
+            fs->slice_y      =  get_symbol(c, state, 0)      * f->height;
+            fs->slice_width  = (get_symbol(c, state, 0) + 1) * f->width  + fs->slice_x;
+            fs->slice_height = (get_symbol(c, state, 0) + 1) * f->height + fs->slice_y;
+
+            fs->slice_x     /= f->num_h_slices;
+            fs->slice_y     /= f->num_v_slices;
             fs->slice_width  = fs->slice_width  / f->num_h_slices - fs->slice_x;
             fs->slice_height = fs->slice_height / f->num_v_slices - fs->slice_y;
-            if ((unsigned)fs->slice_width > f->width ||
+            if ((unsigned)fs->slice_width  > f->width ||
                 (unsigned)fs->slice_height > f->height)
                 return AVERROR_INVALIDDATA;
-            if ((unsigned)fs->slice_x + (uint64_t)fs->slice_width > f->width
-                || (unsigned)fs->slice_y + (uint64_t)fs->slice_height >
-                f->height)
+            if (   (unsigned)fs->slice_x + (uint64_t)fs->slice_width  > f->width
+                || (unsigned)fs->slice_y + (uint64_t)fs->slice_height > f->height)
                 return AVERROR_INVALIDDATA;
         }
 
@@ -786,16 +833,13 @@ static int read_header(FFV1Context *f)
     return 0;
 }
 
-static av_cold int ffv1_decode_init(AVCodecContext *avctx)
+static av_cold int decode_init(AVCodecContext *avctx)
 {
     FFV1Context *f = avctx->priv_data;
     int ret;
 
-    ffv1_common_init(avctx);
-
-    f->last_picture = av_frame_alloc();
-    if (!f->last_picture)
-        return AVERROR(ENOMEM);
+    if ((ret = ffv1_common_init(avctx)) < 0)
+        return ret;
 
     if (avctx->extradata && (ret = read_extra_header(f)) < 0)
         return ret;
@@ -803,11 +847,12 @@ static av_cold int ffv1_decode_init(AVCodecContext *avctx)
     if ((ret = ffv1_init_slice_contexts(f)) < 0)
         return ret;
 
+    avctx->internal->allocate_progress = 1;
+
     return 0;
 }
 
-static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
-                             int *got_frame, AVPacket *avpkt)
+static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
 {
     const uint8_t *buf  = avpkt->data;
     int buf_size        = avpkt->size;
@@ -816,10 +861,22 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
     int i, ret;
     uint8_t keystate = 128;
     const uint8_t *buf_p;
-    AVFrame *const p    = data;
+    AVFrame *p;
 
-    f->cur = p;
+    if (f->last_picture.f)
+        ff_thread_release_buffer(avctx, &f->last_picture);
+    FFSWAP(ThreadFrame, f->picture, f->last_picture);
 
+    f->cur = p = f->picture.f;
+
+    if (f->version < 3 && avctx->field_order > AV_FIELD_PROGRESSIVE) {
+        /* we have interlaced material flagged in container */
+        p->interlaced_frame = 1;
+        if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB)
+            p->top_field_first = 1;
+    }
+
+    f->avctx = avctx;
     ff_init_range_decoder(c, buf, buf_size);
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
@@ -839,27 +896,23 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
         p->key_frame = 0;
     }
 
-    if ((ret = ff_get_buffer(avctx, p, AV_GET_BUFFER_FLAG_REF)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_thread_get_buffer(avctx, &f->picture, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
 
     if (avctx->debug & FF_DEBUG_PICT_INFO)
-        av_log(avctx, AV_LOG_DEBUG,
-               "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n",
-               f->version, p->key_frame, f->ac, f->ec, f->slice_count,
-               f->avctx->bits_per_raw_sample);
+        av_log(avctx, AV_LOG_DEBUG, "ver:%d keyframe:%d coder:%d ec:%d slices:%d bps:%d\n",
+               f->version, p->key_frame, f->ac, f->ec, f->slice_count, f->avctx->bits_per_raw_sample);
+
+    ff_thread_finish_setup(avctx);
 
     buf_p = buf + buf_size;
     for (i = f->slice_count - 1; i >= 0; i--) {
         FFV1Context *fs = f->slice_context[i];
-        int trailer     = 3 + 5 * !!f->ec;
+        int trailer = 3 + 5*!!f->ec;
         int v;
 
-        if (i || f->version > 2)
-            v = AV_RB24(buf_p - trailer) + trailer;
-        else
-            v = buf_p - c->bytestream_start;
+        if (i || f->version > 2) v = AV_RB24(buf_p-trailer) + trailer;
+        else                     v = buf_p - c->bytestream_start;
         if (buf_p - c->bytestream_start < v) {
             av_log(avctx, AV_LOG_ERROR, "Slice pointer chain broken\n");
             return AVERROR_INVALIDDATA;
@@ -869,7 +922,15 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
         if (f->ec) {
             unsigned crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0, buf_p, v);
             if (crc) {
-                av_log(f->avctx, AV_LOG_ERROR, "CRC mismatch %X!\n", crc);
+                int64_t ts = avpkt->pts != AV_NOPTS_VALUE ? avpkt->pts : avpkt->dts;
+                av_log(f->avctx, AV_LOG_ERROR, "CRC mismatch %X!", crc);
+                if (ts != AV_NOPTS_VALUE && avctx->pkt_timebase.num) {
+                    av_log(f->avctx, AV_LOG_ERROR, "at %f seconds\n", ts*av_q2d(avctx->pkt_timebase));
+                } else if (ts != AV_NOPTS_VALUE) {
+                    av_log(f->avctx, AV_LOG_ERROR, "at %"PRId64"\n", ts);
+                } else {
+                    av_log(f->avctx, AV_LOG_ERROR, "\n");
+                }
                 fs->slice_damaged = 1;
             }
         }
@@ -879,54 +940,142 @@ static int ffv1_decode_frame(AVCodecContext *avctx, void *data,
         } else
             fs->c.bytestream_end = (uint8_t *)(buf_p + v);
 
+        fs->avctx = avctx;
         fs->cur = p;
     }
 
-    avctx->execute(avctx, decode_slice, &f->slice_context[0], NULL,
+    avctx->execute(avctx,
+                   decode_slice,
+                   &f->slice_context[0],
+                   NULL,
                    f->slice_count,
-                   sizeof(void *));
+                   sizeof(void*));
 
     for (i = f->slice_count - 1; i >= 0; i--) {
         FFV1Context *fs = f->slice_context[i];
         int j;
-        if (fs->slice_damaged && f->last_picture->data[0]) {
+        if (fs->slice_damaged && f->last_picture.f->data[0]) {
             const uint8_t *src[4];
             uint8_t *dst[4];
+            ff_thread_await_progress(&f->last_picture, INT_MAX, 0);
             for (j = 0; j < 4; j++) {
                 int sh = (j == 1 || j == 2) ? f->chroma_h_shift : 0;
                 int sv = (j == 1 || j == 2) ? f->chroma_v_shift : 0;
                 dst[j] = p->data[j] + p->linesize[j] *
                          (fs->slice_y >> sv) + (fs->slice_x >> sh);
-                src[j] = f->last_picture->data[j] +
-                         f->last_picture->linesize[j] *
+                src[j] = f->last_picture.f->data[j] + f->last_picture.f->linesize[j] *
                          (fs->slice_y >> sv) + (fs->slice_x >> sh);
             }
             av_image_copy(dst, p->linesize, (const uint8_t **)src,
-                          f->last_picture->linesize,
-                          avctx->pix_fmt, fs->slice_width,
+                          f->last_picture.f->linesize,
+                          avctx->pix_fmt,
+                          fs->slice_width,
                           fs->slice_height);
         }
     }
+    ff_thread_report_progress(&f->picture, INT_MAX, 0);
 
     f->picture_number++;
 
-    av_frame_unref(f->last_picture);
-    if ((ret = av_frame_ref(f->last_picture, p)) < 0)
-        return ret;
+    if (f->last_picture.f)
+        ff_thread_release_buffer(avctx, &f->last_picture);
     f->cur = NULL;
+    if ((ret = av_frame_ref(data, f->picture.f)) < 0)
+        return ret;
 
     *got_frame = 1;
 
     return buf_size;
 }
 
-static av_cold int ffv1_decode_close(AVCodecContext *avctx)
+static int init_thread_copy(AVCodecContext *avctx)
 {
-    FFV1Context *s = avctx->priv_data;;
+    FFV1Context *f = avctx->priv_data;
+    int i, ret;
+
+    f->picture.f      = NULL;
+    f->last_picture.f = NULL;
+    f->sample_buffer  = NULL;
+    f->slice_count = 0;
+
+    for (i = 0; i < f->quant_table_count; i++) {
+        av_assert0(f->version > 1);
+        f->initial_states[i] = av_memdup(f->initial_states[i],
+                                         f->context_count[i] * sizeof(*f->initial_states[i]));
+    }
 
-    av_frame_free(&s->last_picture);
+    f->picture.f      = av_frame_alloc();
+    f->last_picture.f = av_frame_alloc();
+
+    if ((ret = ffv1_init_slice_contexts(f)) < 0)
+        return ret;
+
+    return 0;
+}
+
+static void copy_fields(FFV1Context *fsdst, FFV1Context *fssrc, FFV1Context *fsrc)
+{
+    fsdst->version             = fsrc->version;
+    fsdst->micro_version       = fsrc->micro_version;
+    fsdst->chroma_planes       = fsrc->chroma_planes;
+    fsdst->chroma_h_shift      = fsrc->chroma_h_shift;
+    fsdst->chroma_v_shift      = fsrc->chroma_v_shift;
+    fsdst->transparency        = fsrc->transparency;
+    fsdst->plane_count         = fsrc->plane_count;
+    fsdst->ac                  = fsrc->ac;
+    fsdst->colorspace          = fsrc->colorspace;
+
+    fsdst->ec                  = fsrc->ec;
+    fsdst->intra               = fsrc->intra;
+    fsdst->slice_damaged       = fssrc->slice_damaged;
+    fsdst->key_frame_ok        = fsrc->key_frame_ok;
+
+    fsdst->bits_per_raw_sample = fsrc->bits_per_raw_sample;
+    fsdst->packed_at_lsb       = fsrc->packed_at_lsb;
+    fsdst->slice_count         = fsrc->slice_count;
+    if (fsrc->version<3){
+        fsdst->slice_x             = fssrc->slice_x;
+        fsdst->slice_y             = fssrc->slice_y;
+        fsdst->slice_width         = fssrc->slice_width;
+        fsdst->slice_height        = fssrc->slice_height;
+    }
+}
+
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    FFV1Context *fsrc = src->priv_data;
+    FFV1Context *fdst = dst->priv_data;
+    int i, ret;
+
+    if (dst == src)
+        return 0;
+
+    {
+        FFV1Context bak = *fdst;
+        memcpy(fdst, fsrc, sizeof(*fdst));
+        memcpy(fdst->initial_states, bak.initial_states, sizeof(fdst->initial_states));
+        memcpy(fdst->slice_context,  bak.slice_context , sizeof(fdst->slice_context));
+        fdst->picture      = bak.picture;
+        fdst->last_picture = bak.last_picture;
+        for (i = 0; i<fdst->num_h_slices * fdst->num_v_slices; i++) {
+            FFV1Context *fssrc = fsrc->slice_context[i];
+            FFV1Context *fsdst = fdst->slice_context[i];
+            copy_fields(fsdst, fssrc, fsrc);
+        }
+        av_assert0(!fdst->plane[0].state);
+        av_assert0(!fdst->sample_buffer);
+    }
+
+    av_assert1(fdst->slice_count == fsrc->slice_count);
+
+
+    ff_thread_release_buffer(dst, &fdst->picture);
+    if (fsrc->picture.f->data[0]) {
+        if ((ret = ff_thread_ref_frame(&fdst->picture, &fsrc->picture)) < 0)
+            return ret;
+    }
 
-    ffv1_close(avctx);
+    fdst->fsrc = fsrc;
 
     return 0;
 }
@@ -937,9 +1086,11 @@ AVCodec ff_ffv1_decoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_FFV1,
     .priv_data_size = sizeof(FFV1Context),
-    .init           = ffv1_decode_init,
-    .close          = ffv1_decode_close,
-    .decode         = ffv1_decode_frame,
+    .init           = decode_init,
+    .close          = ffv1_close,
+    .decode         = decode_frame,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(update_thread_context),
     .capabilities   = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/ |
-                      CODEC_CAP_SLICE_THREADS,
+                      CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
 };
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 179453d..b63ed42 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -1,22 +1,22 @@
 /*
- * FFV1 encoder for libavcodec
+ * FFV1 encoder
  *
- * Copyright (c) 2003-2012 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2003-2013 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,19 +27,114 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
-#include "libavutil/pixdesc.h"
 #include "libavutil/crc.h"
 #include "libavutil/opt.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/timer.h"
 #include "avcodec.h"
 #include "internal.h"
-#include "get_bits.h"
 #include "put_bits.h"
 #include "rangecoder.h"
 #include "golomb.h"
 #include "mathops.h"
 #include "ffv1.h"
 
+static const int8_t quant5_10bit[256] = {
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+    -1, -1, -1, -1, -1, -1, -0, -0, -0, -0, -0, -0, -0, -0, -0, -0,
+};
+
+static const int8_t quant5[256] = {
+     0,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1,
+};
+
+static const int8_t quant9_10bit[256] = {
+     0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,
+     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+     3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
+     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
+    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
+    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
+    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
+    -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3,
+    -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
+    -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+    -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -0, -0, -0, -0,
+};
+
+static const int8_t quant11[256] = {
+     0,  1,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,
+     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+     4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,
+    -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -4, -4,
+    -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
+    -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -1,
+};
+
+static const uint8_t ver2_state[256] = {
+      0,  10,  10,  10,  10,  16,  16,  16, 28,   16,  16,  29,  42,  49,  20,  49,
+     59,  25,  26,  26,  27,  31,  33,  33, 33,   34,  34,  37,  67,  38,  39,  39,
+     40,  40,  41,  79,  43,  44,  45,  45, 48,   48,  64,  50,  51,  52,  88,  52,
+     53,  74,  55,  57,  58,  58,  74,  60, 101,  61,  62,  84,  66,  66,  68,  69,
+     87,  82,  71,  97,  73,  73,  82,  75, 111,  77,  94,  78,  87,  81,  83,  97,
+     85,  83,  94,  86,  99,  89,  90,  99, 111,  92,  93,  134, 95,  98,  105, 98,
+    105, 110, 102, 108, 102, 118, 103, 106, 106, 113, 109, 112, 114, 112, 116, 125,
+    115, 116, 117, 117, 126, 119, 125, 121, 121, 123, 145, 124, 126, 131, 127, 129,
+    165, 130, 132, 138, 133, 135, 145, 136, 137, 139, 146, 141, 143, 142, 144, 148,
+    147, 155, 151, 149, 151, 150, 152, 157, 153, 154, 156, 168, 158, 162, 161, 160,
+    172, 163, 169, 164, 166, 184, 167, 170, 177, 174, 171, 173, 182, 176, 180, 178,
+    175, 189, 179, 181, 186, 183, 192, 185, 200, 187, 191, 188, 190, 197, 193, 196,
+    197, 194, 195, 196, 198, 202, 199, 201, 210, 203, 207, 204, 205, 206, 208, 214,
+    209, 211, 221, 212, 213, 215, 224, 216, 217, 218, 219, 220, 222, 228, 223, 225,
+    226, 224, 227, 229, 240, 230, 231, 232, 233, 234, 235, 236, 238, 239, 237, 242,
+    241, 243, 242, 244, 245, 246, 247, 248, 249, 250, 251, 252, 252, 253, 254, 255,
+};
+
 static void find_best_state(uint8_t best_state[256][256],
                             const uint8_t one_state[256])
 {
@@ -64,8 +159,8 @@ static void find_best_state(uint8_t best_state[256][256],
                 double newocc[256] = { 0 };
                 for (m = 1; m < 256; m++)
                     if (occ[m]) {
-                        len -= occ[m] *     (p  * l2tab[m] +
-                                        (1 - p) * l2tab[256 - m]);
+                        len -=occ[m]*(     p *l2tab[    m]
+                                      + (1-p)*l2tab[256-m]);
                     }
                 if (len < best_len[k]) {
                     best_len[k]      = len;
@@ -73,7 +168,7 @@ static void find_best_state(uint8_t best_state[256][256],
                 }
                 for (m = 0; m < 256; m++)
                     if (occ[m]) {
-                        newocc[one_state[m]]             += occ[m] * p;
+                        newocc[      one_state[      m]] += occ[m] * p;
                         newocc[256 - one_state[256 - m]] += occ[m] * (1 - p);
                     }
                 memcpy(occ, newocc, sizeof(occ));
@@ -136,6 +231,7 @@ static av_noinline void put_symbol(RangeCoder *c, uint8_t *state,
     put_symbol_inline(c, state, v, is_signed, NULL, NULL);
 }
 
+
 static inline void put_vlc_symbol(PutBitContext *pb, VlcState *const state,
                                   int v, int bits)
 {
@@ -149,7 +245,7 @@ static inline void put_vlc_symbol(PutBitContext *pb, VlcState *const state,
         i += i;
     }
 
-    assert(k <= 13);
+    av_assert2(k <= 13);
 
 #if 0 // JPEG LS
     if (k == 0 && 2 * state->drift <= -state->count)
@@ -179,7 +275,7 @@ static av_always_inline int encode_line(FFV1Context *s, int w,
     int run_mode  = 0;
 
     if (s->ac) {
-        if (c->bytestream_end - c->bytestream < w * 20) {
+        if (c->bytestream_end - c->bytestream < w * 35) {
             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
             return AVERROR_INVALIDDATA;
         }
@@ -190,6 +286,18 @@ static av_always_inline int encode_line(FFV1Context *s, int w,
         }
     }
 
+    if (s->slice_coding_mode == 1) {
+        for (x = 0; x < w; x++) {
+            int i;
+            int v = sample[0][x];
+            for (i = bits-1; i>=0; i--) {
+                uint8_t state = 128;
+                put_rac(c, &state, (v>>i) & 1);
+            }
+        }
+        return 0;
+    }
+
     for (x = 0; x < w; x++) {
         int diff, context;
 
@@ -257,10 +365,10 @@ static av_always_inline int encode_line(FFV1Context *s, int w,
     return 0;
 }
 
-static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h,
+static int encode_plane(FFV1Context *s, uint8_t *src, int w, int h,
                          int stride, int plane_index)
 {
-    int x, y, i;
+    int x, y, i, ret;
     const int ring_size = s->avctx->context_model ? 3 : 2;
     int16_t *sample[3];
     s->run_index = 0;
@@ -271,38 +379,39 @@ static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h,
         for (i = 0; i < ring_size; i++)
             sample[i] = s->sample_buffer + (w + 6) * ((h + i - y) % ring_size) + 3;
 
-        sample[0][-1] = sample[1][0];
-        sample[1][w]  = sample[1][w - 1];
+        sample[0][-1]= sample[1][0  ];
+        sample[1][ w]= sample[1][w-1];
 // { START_TIMER
         if (s->bits_per_raw_sample <= 8) {
             for (x = 0; x < w; x++)
                 sample[0][x] = src[x + stride * y];
-            encode_line(s, w, sample, plane_index, 8);
+            if((ret = encode_line(s, w, sample, plane_index, 8)) < 0)
+                return ret;
         } else {
             if (s->packed_at_lsb) {
-                for (x = 0; x < w; x++)
-                    sample[0][x] = ((uint16_t *)(src + stride * y))[x];
+                for (x = 0; x < w; x++) {
+                    sample[0][x] = ((uint16_t*)(src + stride*y))[x];
+                }
             } else {
-                for (x = 0; x < w; x++)
-                    sample[0][x] =
-                        ((uint16_t *)(src + stride * y))[x] >> (16 - s->bits_per_raw_sample);
+                for (x = 0; x < w; x++) {
+                    sample[0][x] = ((uint16_t*)(src + stride*y))[x] >> (16 - s->bits_per_raw_sample);
+                }
             }
-            encode_line(s, w, sample, plane_index, s->bits_per_raw_sample);
+            if((ret = encode_line(s, w, sample, plane_index, s->bits_per_raw_sample)) < 0)
+                return ret;
         }
 // STOP_TIMER("encode line") }
     }
+    return 0;
 }
 
-static void encode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h,
-                             int stride[3])
+static int encode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h, const int stride[3])
 {
     int x, y, p, i;
     const int ring_size = s->avctx->context_model ? 3 : 2;
-    int16_t *sample[MAX_PLANES][3];
-    int lbd  = s->avctx->bits_per_raw_sample <= 8;
-    int bits = s->avctx->bits_per_raw_sample > 0
-               ? s->avctx->bits_per_raw_sample
-               : 8;
+    int16_t *sample[4][3];
+    int lbd    = s->bits_per_raw_sample <= 8;
+    int bits   = s->bits_per_raw_sample > 0 ? s->bits_per_raw_sample : 8;
     int offset = 1 << bits;
 
     s->run_index = 0;
@@ -313,29 +422,29 @@ static void encode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h,
     for (y = 0; y < h; y++) {
         for (i = 0; i < ring_size; i++)
             for (p = 0; p < MAX_PLANES; p++)
-                sample[p][i] = s->sample_buffer + p * ring_size *
-                               (w + 6) +
-                               ((h + i - y) % ring_size) * (w + 6) + 3;
+                sample[p][i]= s->sample_buffer + p*ring_size*(w+6) + ((h+i-y)%ring_size)*(w+6) + 3;
 
         for (x = 0; x < w; x++) {
             int b, g, r, av_uninit(a);
             if (lbd) {
-                unsigned v = *((uint32_t *)(src[0] + x * 4 + stride[0] * y));
-                b = v & 0xFF;
-                g = (v >> 8) & 0xFF;
+                unsigned v = *((uint32_t*)(src[0] + x*4 + stride[0]*y));
+                b =  v        & 0xFF;
+                g = (v >>  8) & 0xFF;
                 r = (v >> 16) & 0xFF;
-                a = v >> 24;
+                a =  v >> 24;
             } else {
-                b = *((uint16_t *)(src[0] + x * 2 + stride[0] * y));
-                g = *((uint16_t *)(src[1] + x * 2 + stride[1] * y));
-                r = *((uint16_t *)(src[2] + x * 2 + stride[2] * y));
+                b = *((uint16_t*)(src[0] + x*2 + stride[0]*y));
+                g = *((uint16_t*)(src[1] + x*2 + stride[1]*y));
+                r = *((uint16_t*)(src[2] + x*2 + stride[2]*y));
             }
 
-            b -= g;
-            r -= g;
-            g += (b + r) >> 2;
-            b += offset;
-            r += offset;
+            if (s->slice_coding_mode != 1) {
+                b -= g;
+                r -= g;
+                g += (b * s->slice_rct_by_coef + r * s->slice_rct_ry_coef) >> 2;
+                b += offset;
+                r += offset;
+            }
 
             sample[0][0][x] = g;
             sample[1][0][x] = b;
@@ -343,17 +452,20 @@ static void encode_rgb_frame(FFV1Context *s, uint8_t *src[3], int w, int h,
             sample[3][0][x] = a;
         }
         for (p = 0; p < 3 + s->transparency; p++) {
-            sample[p][0][-1] = sample[p][1][0];
-            sample[p][1][w]  = sample[p][1][w - 1];
-            if (lbd)
-                encode_line(s, w, sample[p], (p + 1) / 2, 9);
+            int ret;
+            sample[p][0][-1] = sample[p][1][0  ];
+            sample[p][1][ w] = sample[p][1][w-1];
+            if (lbd && s->slice_coding_mode == 0)
+                ret = encode_line(s, w, sample[p], (p + 1) / 2, 9);
             else
-                encode_line(s, w, sample[p], (p + 1) / 2, bits + 1);
+                ret = encode_line(s, w, sample[p], (p + 1) / 2, bits + (s->slice_coding_mode != 1));
+            if (ret < 0)
+                return ret;
         }
     }
+    return 0;
 }
 
-
 static void write_quant_table(RangeCoder *c, int16_t *quant_table)
 {
     int last = 0;
@@ -393,7 +505,7 @@ static void write_header(FFV1Context *f)
                 put_symbol(c, state,
                            f->state_transition[i] - c->one_state[i], 1);
         }
-        put_symbol(c, state, f->colorspace, 0); // YUV cs type
+        put_symbol(c, state, f->colorspace, 0); //YUV cs type
         if (f->version > 0)
             put_symbol(c, state, f->bits_per_raw_sample, 0);
         put_rac(c, state, f->chroma_planes);
@@ -437,15 +549,19 @@ static int write_extradata(FFV1Context *f)
 
     f->avctx->extradata_size = 10000 + 4 +
                                     (11 * 11 * 5 * 5 * 5 + 11 * 11 * 11) * 32;
-    f->avctx->extradata = av_malloc(f->avctx->extradata_size);
+    f->avctx->extradata = av_malloc(f->avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!f->avctx->extradata)
+        return AVERROR(ENOMEM);
     ff_init_range_encoder(c, f->avctx->extradata, f->avctx->extradata_size);
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
     put_symbol(c, state, f->version, 0);
     if (f->version > 2) {
-        if (f->version == 3)
-            f->minor_version = 2;
-        put_symbol(c, state, f->minor_version, 0);
+        if (f->version == 3) {
+            f->micro_version = 4;
+        } else if (f->version == 4)
+            f->micro_version = 2;
+        put_symbol(c, state, f->micro_version, 0);
     }
 
     put_symbol(c, state, f->ac, 0);
@@ -485,12 +601,11 @@ static int write_extradata(FFV1Context *f)
 
     if (f->version > 2) {
         put_symbol(c, state, f->ec, 0);
+        put_symbol(c, state, f->intra = (f->avctx->gop_size < 2), 0);
     }
 
     f->avctx->extradata_size = ff_rac_terminate(c);
-
-    v = av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0,
-               f->avctx->extradata, f->avctx->extradata_size);
+    v = av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0, f->avctx->extradata, f->avctx->extradata_size);
     AV_WL32(f->avctx->extradata + f->avctx->extradata_size, v);
     f->avctx->extradata_size += 4;
 
@@ -515,7 +630,7 @@ static int sort_stt(FFV1Context *s, uint8_t stt[256])
 
                 double size0 = COST2(i,  i) + COST2(i2, i2);
                 double sizeX = COST2(i, i2) + COST2(i2, i);
-                if (sizeX < size0 && i != 128 && i2 != 128) {
+                if (size0 - sizeX > size0*(1e-14) && i != 128 && i2 != 128) {
                     int j;
                     FFSWAP(int, stt[i], stt[i2]);
                     FFSWAP(int, s->rc_stat[i][0], s->rc_stat[i2][0]);
@@ -545,60 +660,57 @@ static int sort_stt(FFV1Context *s, uint8_t stt[256])
     return print;
 }
 
-static av_cold int init_slices_state(FFV1Context *f)
-{
-    int i, ret;
-    for (i = 0; i < f->slice_count; i++) {
-        FFV1Context *fs = f->slice_context[i];
-        if ((ret = ffv1_init_slice_state(f, fs)) < 0)
-            return AVERROR(ENOMEM);
-    }
-    return 0;
-}
-
-static av_cold int ffv1_encode_init(AVCodecContext *avctx)
+static av_cold int encode_init(AVCodecContext *avctx)
 {
     FFV1Context *s = avctx->priv_data;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
     int i, j, k, m, ret;
 
-    ffv1_common_init(avctx);
+    if ((ret = ffv1_common_init(avctx)) < 0)
+        return ret;
 
     s->version = 0;
 
-    if ((avctx->flags & (CODEC_FLAG_PASS1 | CODEC_FLAG_PASS2)) ||
-        avctx->slices > 1)
+    if ((avctx->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)) || avctx->slices>1)
         s->version = FFMAX(s->version, 2);
 
-    if (avctx->level == 3) {
+    // Unspecified level & slices, we choose version 1.2+ to ensure multithreaded decodability
+    if (avctx->slices == 0 && avctx->level < 0 && avctx->width * avctx->height > 720*576)
+        s->version = FFMAX(s->version, 2);
+
+    if (avctx->level <= 0 && s->version == 2) {
         s->version = 3;
     }
+    if (avctx->level >= 0 && avctx->level <= 4)
+        s->version = FFMAX(s->version, avctx->level);
 
     if (s->ec < 0) {
         s->ec = (s->version >= 3);
     }
 
-    if (s->version >= 2 &&
-        avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Version %d requested, please set -strict experimental in "
-               "order to enable it\n",
-               s->version);
-        return AVERROR(ENOSYS);
+    if ((s->version == 2 || s->version>3) && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        av_log(avctx, AV_LOG_ERROR, "Version 2 needed for requested features but version 2 is experimental and not enabled\n");
+        return AVERROR_INVALIDDATA;
     }
 
     s->ac = avctx->coder_type > 0 ? 2 : 0;
 
     s->plane_count = 3;
-    switch (avctx->pix_fmt) {
+    switch(avctx->pix_fmt) {
     case AV_PIX_FMT_YUV444P9:
     case AV_PIX_FMT_YUV422P9:
     case AV_PIX_FMT_YUV420P9:
+    case AV_PIX_FMT_YUVA444P9:
+    case AV_PIX_FMT_YUVA422P9:
+    case AV_PIX_FMT_YUVA420P9:
         if (!avctx->bits_per_raw_sample)
             s->bits_per_raw_sample = 9;
     case AV_PIX_FMT_YUV444P10:
     case AV_PIX_FMT_YUV420P10:
     case AV_PIX_FMT_YUV422P10:
+    case AV_PIX_FMT_YUVA444P10:
+    case AV_PIX_FMT_YUVA422P10:
+    case AV_PIX_FMT_YUVA420P10:
         s->packed_at_lsb = 1;
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 10;
@@ -606,6 +718,9 @@ static av_cold int ffv1_encode_init(AVCodecContext *avctx)
     case AV_PIX_FMT_YUV444P16:
     case AV_PIX_FMT_YUV422P16:
     case AV_PIX_FMT_YUV420P16:
+    case AV_PIX_FMT_YUVA444P16:
+    case AV_PIX_FMT_YUVA422P16:
+    case AV_PIX_FMT_YUVA420P16:
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample) {
             s->bits_per_raw_sample = 16;
         } else if (!s->bits_per_raw_sample) {
@@ -616,15 +731,12 @@ static av_cold int ffv1_encode_init(AVCodecContext *avctx)
             return AVERROR_INVALIDDATA;
         }
         if (!s->ac && avctx->coder_type == -1) {
-            av_log(avctx, AV_LOG_INFO,
-                   "bits_per_raw_sample > 8, forcing coder 1\n");
+            av_log(avctx, AV_LOG_INFO, "bits_per_raw_sample > 8, forcing coder 1\n");
             s->ac = 2;
         }
         if (!s->ac) {
-            av_log(
-                avctx, AV_LOG_ERROR,
-                "bits_per_raw_sample of more than 8 needs -coder 1 currently\n");
-            return AVERROR_INVALIDDATA;
+            av_log(avctx, AV_LOG_ERROR, "bits_per_raw_sample of more than 8 needs -coder 1 currently\n");
+            return AVERROR(ENOSYS);
         }
         s->version = FFMAX(s->version, 1);
     case AV_PIX_FMT_GRAY8:
@@ -634,19 +746,21 @@ static av_cold int ffv1_encode_init(AVCodecContext *avctx)
     case AV_PIX_FMT_YUV420P:
     case AV_PIX_FMT_YUV411P:
     case AV_PIX_FMT_YUV410P:
-        s->chroma_planes = desc->nb_components < 3 ? 0 : 1;
-        s->colorspace    = 0;
-        break;
     case AV_PIX_FMT_YUVA444P:
     case AV_PIX_FMT_YUVA422P:
     case AV_PIX_FMT_YUVA420P:
-        s->chroma_planes = 1;
-        s->colorspace    = 0;
-        s->transparency  = 1;
+        s->chroma_planes = desc->nb_components < 3 ? 0 : 1;
+        s->colorspace = 0;
+        s->transparency = desc->nb_components == 4;
         break;
     case AV_PIX_FMT_RGB32:
-        s->colorspace   = 1;
+        s->colorspace = 1;
         s->transparency = 1;
+        s->chroma_planes = 1;
+        break;
+    case AV_PIX_FMT_0RGB32:
+        s->colorspace = 1;
+        s->chroma_planes = 1;
         break;
     case AV_PIX_FMT_GBRP9:
         if (!avctx->bits_per_raw_sample)
@@ -654,55 +768,58 @@ static av_cold int ffv1_encode_init(AVCodecContext *avctx)
     case AV_PIX_FMT_GBRP10:
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
             s->bits_per_raw_sample = 10;
-    case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GBRP12:
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
-            s->bits_per_raw_sample = 16;
+            s->bits_per_raw_sample = 12;
+    case AV_PIX_FMT_GBRP14:
+        if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
+            s->bits_per_raw_sample = 14;
         else if (!s->bits_per_raw_sample)
             s->bits_per_raw_sample = avctx->bits_per_raw_sample;
-        s->colorspace    = 1;
+        s->colorspace = 1;
         s->chroma_planes = 1;
-        s->version       = FFMAX(s->version, 1);
+        s->version = FFMAX(s->version, 1);
+        if (!s->ac) {
+            av_log(avctx, AV_LOG_ERROR, "bits_per_raw_sample of more than 8 needs -coder 1 currently\n");
+            return AVERROR(ENOSYS);
+        }
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "format not supported\n");
-        return AVERROR_INVALIDDATA;
+        return AVERROR(ENOSYS);
     }
     if (s->transparency) {
-        av_log(
-            avctx, AV_LOG_WARNING,
-            "Storing alpha plane, this will require a recent FFV1 decoder to playback!\n");
+        av_log(avctx, AV_LOG_WARNING, "Storing alpha plane, this will require a recent FFV1 decoder to playback!\n");
     }
     if (avctx->context_model > 1U) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Invalid context model %d, valid values are 0 and 1\n",
-               avctx->context_model);
+        av_log(avctx, AV_LOG_ERROR, "Invalid context model %d, valid values are 0 and 1\n", avctx->context_model);
         return AVERROR(EINVAL);
     }
 
     if (s->ac > 1)
         for (i = 1; i < 256; i++)
-            s->state_transition[i] = ffv1_ver2_state[i];
+            s->state_transition[i] = ver2_state[i];
 
     for (i = 0; i < 256; i++) {
         s->quant_table_count = 2;
         if (s->bits_per_raw_sample <= 8) {
-            s->quant_tables[0][0][i] = ffv1_quant11[i];
-            s->quant_tables[0][1][i] = ffv1_quant11[i] * 11;
-            s->quant_tables[0][2][i] = ffv1_quant11[i] * 11 * 11;
-            s->quant_tables[1][0][i] = ffv1_quant11[i];
-            s->quant_tables[1][1][i] = ffv1_quant11[i] * 11;
-            s->quant_tables[1][2][i] = ffv1_quant5[i]  * 11 * 11;
-            s->quant_tables[1][3][i] = ffv1_quant5[i]  *  5 * 11 * 11;
-            s->quant_tables[1][4][i] = ffv1_quant5[i]  *  5 *  5 * 11 * 11;
+            s->quant_tables[0][0][i]=           quant11[i];
+            s->quant_tables[0][1][i]=        11*quant11[i];
+            s->quant_tables[0][2][i]=     11*11*quant11[i];
+            s->quant_tables[1][0][i]=           quant11[i];
+            s->quant_tables[1][1][i]=        11*quant11[i];
+            s->quant_tables[1][2][i]=     11*11*quant5 [i];
+            s->quant_tables[1][3][i]=   5*11*11*quant5 [i];
+            s->quant_tables[1][4][i]= 5*5*11*11*quant5 [i];
         } else {
-            s->quant_tables[0][0][i] = ffv1_quant9_10bit[i];
-            s->quant_tables[0][1][i] = ffv1_quant9_10bit[i] * 11;
-            s->quant_tables[0][2][i] = ffv1_quant9_10bit[i] * 11 * 11;
-            s->quant_tables[1][0][i] = ffv1_quant9_10bit[i];
-            s->quant_tables[1][1][i] = ffv1_quant9_10bit[i] * 11;
-            s->quant_tables[1][2][i] = ffv1_quant5_10bit[i] * 11 * 11;
-            s->quant_tables[1][3][i] = ffv1_quant5_10bit[i] *  5 * 11 * 11;
-            s->quant_tables[1][4][i] = ffv1_quant5_10bit[i] *  5 *  5 * 11 * 11;
+            s->quant_tables[0][0][i]=           quant9_10bit[i];
+            s->quant_tables[0][1][i]=        11*quant9_10bit[i];
+            s->quant_tables[0][2][i]=     11*11*quant9_10bit[i];
+            s->quant_tables[1][0][i]=           quant9_10bit[i];
+            s->quant_tables[1][1][i]=        11*quant9_10bit[i];
+            s->quant_tables[1][2][i]=     11*11*quant5_10bit[i];
+            s->quant_tables[1][3][i]=   5*11*11*quant5_10bit[i];
+            s->quant_tables[1][4][i]= 5*5*11*11*quant5_10bit[i];
         }
     }
     s->context_count[0] = (11 * 11 * 11        + 1) / 2;
@@ -729,10 +846,10 @@ static av_cold int ffv1_encode_init(AVCodecContext *avctx)
 
     if (!s->transparency)
         s->plane_count = 2;
+    if (!s->chroma_planes && s->version > 3)
+        s->plane_count--;
 
-    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift,
-                                     &s->chroma_v_shift);
-
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
     s->picture_number = 0;
 
     if (avctx->flags & (CODEC_FLAG_PASS1 | CODEC_FLAG_PASS2)) {
@@ -751,7 +868,7 @@ static av_cold int ffv1_encode_init(AVCodecContext *avctx)
 
         av_assert0(s->version >= 2);
 
-        for (;; ) {
+        for (;;) {
             for (j = 0; j < 256; j++)
                 for (i = 0; i < 2; i++) {
                     s->rc_stat[j][i] = strtol(p, &next, 0);
@@ -792,45 +909,60 @@ static av_cold int ffv1_encode_init(AVCodecContext *avctx)
         find_best_state(best_state, s->state_transition);
 
         for (i = 0; i < s->quant_table_count; i++) {
-            for (j = 0; j < s->context_count[i]; j++)
-                for (k = 0; k < 32; k++) {
+            for (k = 0; k < 32; k++) {
+                double a=0, b=0;
+                int jp = 0;
+                for (j = 0; j < s->context_count[i]; j++) {
                     double p = 128;
-                    if (s->rc_stat2[i][j][k][0] + s->rc_stat2[i][j][k][1]) {
-                        p = 256.0 * s->rc_stat2[i][j][k][1] /
-                            (s->rc_stat2[i][j][k][0] + s->rc_stat2[i][j][k][1]);
+                    if (s->rc_stat2[i][j][k][0] + s->rc_stat2[i][j][k][1] > 200 && j || a+b > 200) {
+                        if (a+b)
+                            p = 256.0 * b / (a + b);
+                        s->initial_states[i][jp][k] =
+                            best_state[av_clip(round(p), 1, 255)][av_clip((a + b) / gob_count, 0, 255)];
+                        for(jp++; jp<j; jp++)
+                            s->initial_states[i][jp][k] = s->initial_states[i][jp-1][k];
+                        a=b=0;
+                    }
+                    a += s->rc_stat2[i][j][k][0];
+                    b += s->rc_stat2[i][j][k][1];
+                    if (a+b) {
+                        p = 256.0 * b / (a + b);
                     }
                     s->initial_states[i][j][k] =
-                        best_state[av_clip(round(p), 1, 255)][av_clip((s->rc_stat2[i][j][k][0] +
-                                                                       s->rc_stat2[i][j][k][1]) /
-                                                                      gob_count, 0, 255)];
+                        best_state[av_clip(round(p), 1, 255)][av_clip((a + b) / gob_count, 0, 255)];
                 }
+            }
         }
     }
 
     if (s->version > 1) {
-        for (s->num_v_slices = 2; s->num_v_slices < 9; s->num_v_slices++)
-            for (s->num_h_slices = s->num_v_slices;
-                 s->num_h_slices < 2 * s->num_v_slices; s->num_h_slices++)
-                if (avctx->slices == s->num_h_slices * s->num_v_slices &&
-                    avctx->slices <= 64 || !avctx->slices)
+        s->num_v_slices = (avctx->width > 352 || avctx->height > 288 || !avctx->slices) ? 2 : 1;
+        for (; s->num_v_slices < 9; s->num_v_slices++) {
+            for (s->num_h_slices = s->num_v_slices; s->num_h_slices < 2*s->num_v_slices; s->num_h_slices++) {
+                if (avctx->slices == s->num_h_slices * s->num_v_slices && avctx->slices <= 64 || !avctx->slices)
                     goto slices_ok;
+            }
+        }
         av_log(avctx, AV_LOG_ERROR,
                "Unsupported number %d of slices requested, please specify a "
                "supported number with -slices (ex:4,6,9,12,16, ...)\n",
                avctx->slices);
         return AVERROR(ENOSYS);
 slices_ok:
-        write_extradata(s);
+        if ((ret = write_extradata(s)) < 0)
+            return ret;
     }
 
     if ((ret = ffv1_init_slice_contexts(s)) < 0)
         return ret;
-    if ((ret = init_slices_state(s)) < 0)
+    if ((ret = ffv1_init_slices_state(s)) < 0)
         return ret;
 
 #define STATS_OUT_SIZE 1024 * 1024 * 6
     if (avctx->flags & CODEC_FLAG_PASS1) {
         avctx->stats_out = av_mallocz(STATS_OUT_SIZE);
+        if (!avctx->stats_out)
+            return AVERROR(ENOMEM);
         for (i = 0; i < s->quant_table_count; i++)
             for (j = 0; j < s->slice_count; j++) {
                 FFV1Context *sf = s->slice_context[j];
@@ -852,23 +984,112 @@ static void encode_slice_header(FFV1Context *f, FFV1Context *fs)
     int j;
     memset(state, 128, sizeof(state));
 
-    put_symbol(c, state, (fs->slice_x + 1) * f->num_h_slices / f->width, 0);
-    put_symbol(c, state, (fs->slice_y + 1) * f->num_v_slices / f->height, 0);
-    put_symbol(c, state, (fs->slice_width + 1) * f->num_h_slices / f->width - 1,
-               0);
-    put_symbol(c, state,
-               (fs->slice_height + 1) * f->num_v_slices / f->height - 1,
-               0);
-    for (j = 0; j < f->plane_count; j++) {
+    put_symbol(c, state, (fs->slice_x     +1)*f->num_h_slices / f->width   , 0);
+    put_symbol(c, state, (fs->slice_y     +1)*f->num_v_slices / f->height  , 0);
+    put_symbol(c, state, (fs->slice_width +1)*f->num_h_slices / f->width -1, 0);
+    put_symbol(c, state, (fs->slice_height+1)*f->num_v_slices / f->height-1, 0);
+    for (j=0; j<f->plane_count; j++) {
         put_symbol(c, state, f->plane[j].quant_table_index, 0);
         av_assert0(f->plane[j].quant_table_index == f->avctx->context_model);
     }
-    if (!f->avctx->coded_frame->interlaced_frame)
+    if (!f->picture.f->interlaced_frame)
         put_symbol(c, state, 3, 0);
     else
-        put_symbol(c, state, 1 + !f->avctx->coded_frame->top_field_first, 0);
-    put_symbol(c, state, f->avctx->coded_frame->sample_aspect_ratio.num, 0);
-    put_symbol(c, state, f->avctx->coded_frame->sample_aspect_ratio.den, 0);
+        put_symbol(c, state, 1 + !f->picture.f->top_field_first, 0);
+    put_symbol(c, state, f->picture.f->sample_aspect_ratio.num, 0);
+    put_symbol(c, state, f->picture.f->sample_aspect_ratio.den, 0);
+    if (f->version > 3) {
+        put_rac(c, state, fs->slice_coding_mode == 1);
+        if (fs->slice_coding_mode == 1)
+            ffv1_clear_slice_state(f, fs);
+        put_symbol(c, state, fs->slice_coding_mode, 0);
+        if (fs->slice_coding_mode != 1) {
+            put_symbol(c, state, fs->slice_rct_by_coef, 0);
+            put_symbol(c, state, fs->slice_rct_ry_coef, 0);
+        }
+    }
+}
+
+static void choose_rct_params(FFV1Context *fs, uint8_t *src[3], const int stride[3], int w, int h)
+{
+#define NB_Y_COEFF 15
+    static const int rct_y_coeff[15][2] = {
+        {0, 0}, //      4G
+        {1, 1}, //  R + 2G + B
+        {2, 2}, // 2R      + 2B
+        {0, 2}, //      2G + 2B
+        {2, 0}, // 2R + 2G
+        {4, 0}, // 4R
+        {0, 4}, //           4B
+
+        {0, 3}, //      1G + 3B
+        {3, 0}, // 3R + 1G
+        {3, 1}, // 3R      +  B
+        {1, 3}, //  R      + 3B
+        {1, 2}, //  R +  G + 2B
+        {2, 1}, // 2R +  G +  B
+        {0, 1}, //      3G +  B
+        {1, 0}, //  R + 3G
+    };
+
+    int stat[NB_Y_COEFF] = {0};
+    int x, y, i, p, best;
+    int16_t *sample[3];
+    int lbd = fs->bits_per_raw_sample <= 8;
+
+    for (y = 0; y < h; y++) {
+        int lastr=0, lastg=0, lastb=0;
+        for (p = 0; p < 3; p++)
+            sample[p] = fs->sample_buffer + p*w;
+
+        for (x = 0; x < w; x++) {
+            int b, g, r;
+            int ab, ag, ar;
+            if (lbd) {
+                unsigned v = *((uint32_t*)(src[0] + x*4 + stride[0]*y));
+                b =  v        & 0xFF;
+                g = (v >>  8) & 0xFF;
+                r = (v >> 16) & 0xFF;
+            } else {
+                b = *((uint16_t*)(src[0] + x*2 + stride[0]*y));
+                g = *((uint16_t*)(src[1] + x*2 + stride[1]*y));
+                r = *((uint16_t*)(src[2] + x*2 + stride[2]*y));
+            }
+
+            ar = r - lastr;
+            ag = g - lastg;
+            ab = b - lastb;
+            if (x && y) {
+                int bg = ag - sample[0][x];
+                int bb = ab - sample[1][x];
+                int br = ar - sample[2][x];
+
+                br -= bg;
+                bb -= bg;
+
+                for (i = 0; i<NB_Y_COEFF; i++) {
+                    stat[i] += FFABS(bg + ((br*rct_y_coeff[i][0] + bb*rct_y_coeff[i][1])>>2));
+                }
+
+            }
+            sample[0][x] = ag;
+            sample[1][x] = ab;
+            sample[2][x] = ar;
+
+            lastr = r;
+            lastg = g;
+            lastb = b;
+        }
+    }
+
+    best = 0;
+    for (i=1; i<NB_Y_COEFF; i++) {
+        if (stat[i] < stat[best])
+            best = i;
+    }
+
+    fs->slice_rct_by_coef = rct_y_coeff[best][1];
+    fs->slice_rct_ry_coef = rct_y_coeff[best][0];
 }
 
 static int encode_slice(AVCodecContext *c, void *arg)
@@ -879,11 +1100,23 @@ static int encode_slice(AVCodecContext *c, void *arg)
     int height       = fs->slice_height;
     int x            = fs->slice_x;
     int y            = fs->slice_y;
-    const AVFrame *const p = f->frame;
-    const int ps     = (av_pix_fmt_desc_get(c->pix_fmt)->flags & AV_PIX_FMT_FLAG_PLANAR)
-                       ? (f->bits_per_raw_sample > 8) + 1
-                       : 4;
+    const AVFrame *const p = f->picture.f;
+    const int ps     = av_pix_fmt_desc_get(c->pix_fmt)->comp[0].step_minus1 + 1;
+    int ret;
+    RangeCoder c_bak = fs->c;
+    uint8_t *planes[3] = {p->data[0] + ps*x + y*p->linesize[0],
+                          p->data[1] + ps*x + y*p->linesize[1],
+                          p->data[2] + ps*x + y*p->linesize[2]};
+
+    fs->slice_coding_mode = 0;
+    if (f->version > 3) {
+        choose_rct_params(fs, planes, p->linesize, width, height);
+    } else {
+        fs->slice_rct_by_coef = 1;
+        fs->slice_rct_ry_coef = 1;
+    }
 
+retry:
     if (c->coded_frame->key_frame)
         ffv1_clear_slice_state(f, fs);
     if (f->version > 2) {
@@ -892,71 +1125,126 @@ static int encode_slice(AVCodecContext *c, void *arg)
     if (!fs->ac) {
         if (f->version > 2)
             put_rac(&fs->c, (uint8_t[]) { 129 }, 0);
-        fs->ac_byte_count = f->version > 2 || (!x && !y) ? ff_rac_terminate( &fs->c) : 0;
-        init_put_bits(&fs->pb, fs->c.bytestream_start + fs->ac_byte_count,
+        fs->ac_byte_count = f->version > 2 || (!x && !y) ? ff_rac_terminate(&fs->c) : 0;
+        init_put_bits(&fs->pb,
+                      fs->c.bytestream_start + fs->ac_byte_count,
                       fs->c.bytestream_end - fs->c.bytestream_start - fs->ac_byte_count);
     }
 
     if (f->colorspace == 0) {
-        const int chroma_width  = -((-width) >> f->chroma_h_shift);
-        const int chroma_height = -((-height) >> f->chroma_v_shift);
+        const int chroma_width  = FF_CEIL_RSHIFT(width,  f->chroma_h_shift);
+        const int chroma_height = FF_CEIL_RSHIFT(height, f->chroma_v_shift);
         const int cx            = x >> f->chroma_h_shift;
         const int cy            = y >> f->chroma_v_shift;
 
-        encode_plane(fs, p->data[0] + ps * x + y * p->linesize[0],
-                     width, height, p->linesize[0], 0);
+        ret = encode_plane(fs, p->data[0] + ps*x + y*p->linesize[0], width, height, p->linesize[0], 0);
 
         if (f->chroma_planes) {
-            encode_plane(fs, p->data[1] + ps * cx + cy * p->linesize[1],
-                         chroma_width, chroma_height, p->linesize[1], 1);
-            encode_plane(fs, p->data[2] + ps * cx + cy * p->linesize[2],
-                         chroma_width, chroma_height, p->linesize[2], 1);
+            ret |= encode_plane(fs, p->data[1] + ps*cx+cy*p->linesize[1], chroma_width, chroma_height, p->linesize[1], 1);
+            ret |= encode_plane(fs, p->data[2] + ps*cx+cy*p->linesize[2], chroma_width, chroma_height, p->linesize[2], 1);
         }
         if (fs->transparency)
-            encode_plane(fs, p->data[3] + ps * x + y * p->linesize[3], width,
-                         height, p->linesize[3], 2);
+            ret |= encode_plane(fs, p->data[3] + ps*x + y*p->linesize[3], width, height, p->linesize[3], 2);
     } else {
-        uint8_t *planes[3] = { p->data[0] + ps * x + y * p->linesize[0],
-                               p->data[1] + ps * x + y * p->linesize[1],
-                               p->data[2] + ps * x + y * p->linesize[2] };
-        encode_rgb_frame(fs, planes, width, height, p->linesize);
+        ret = encode_rgb_frame(fs, planes, width, height, p->linesize);
     }
     emms_c();
 
+    if (ret < 0) {
+        av_assert0(fs->slice_coding_mode == 0);
+        if (fs->version < 4 || !fs->ac) {
+            av_log(c, AV_LOG_ERROR, "Buffer too small\n");
+            return ret;
+        }
+        av_log(c, AV_LOG_DEBUG, "Coding slice as PCM\n");
+        fs->slice_coding_mode = 1;
+        fs->c = c_bak;
+        goto retry;
+    }
+
     return 0;
 }
 
-static int ffv1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *pict, int *got_packet)
 {
     FFV1Context *f      = avctx->priv_data;
     RangeCoder *const c = &f->slice_context[0]->c;
-    AVFrame *const p    = avctx->coded_frame;
+    AVFrame *const p    = f->picture.f;
     int used_count      = 0;
     uint8_t keystate    = 128;
     uint8_t *buf_p;
     int i, ret;
+    int64_t maxsize =   FF_MIN_BUFFER_SIZE
+                      + avctx->width*avctx->height*35LL*4;
+
+    if(!pict) {
+        if (avctx->flags & CODEC_FLAG_PASS1) {
+            int j, k, m;
+            char *p   = avctx->stats_out;
+            char *end = p + STATS_OUT_SIZE;
+
+            memset(f->rc_stat, 0, sizeof(f->rc_stat));
+            for (i = 0; i < f->quant_table_count; i++)
+                memset(f->rc_stat2[i], 0, f->context_count[i] * sizeof(*f->rc_stat2[i]));
+
+            for (j = 0; j < f->slice_count; j++) {
+                FFV1Context *fs = f->slice_context[j];
+                for (i = 0; i < 256; i++) {
+                    f->rc_stat[i][0] += fs->rc_stat[i][0];
+                    f->rc_stat[i][1] += fs->rc_stat[i][1];
+                }
+                for (i = 0; i < f->quant_table_count; i++) {
+                    for (k = 0; k < f->context_count[i]; k++)
+                        for (m = 0; m < 32; m++) {
+                            f->rc_stat2[i][k][m][0] += fs->rc_stat2[i][k][m][0];
+                            f->rc_stat2[i][k][m][1] += fs->rc_stat2[i][k][m][1];
+                        }
+                }
+            }
 
-    f->frame = pict;
+            for (j = 0; j < 256; j++) {
+                snprintf(p, end - p, "%" PRIu64 " %" PRIu64 " ",
+                        f->rc_stat[j][0], f->rc_stat[j][1]);
+                p += strlen(p);
+            }
+            snprintf(p, end - p, "\n");
 
-    if ((ret = ff_alloc_packet(pkt, avctx->width * avctx->height *
-                             ((8 * 2 + 1 + 1) * 4) / 8 +
-                             FF_MIN_BUFFER_SIZE)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
-        return ret;
+            for (i = 0; i < f->quant_table_count; i++) {
+                for (j = 0; j < f->context_count[i]; j++)
+                    for (m = 0; m < 32; m++) {
+                        snprintf(p, end - p, "%" PRIu64 " %" PRIu64 " ",
+                                f->rc_stat2[i][j][m][0], f->rc_stat2[i][j][m][1]);
+                        p += strlen(p);
+                    }
+            }
+            snprintf(p, end - p, "%d\n", f->gob_count);
+        }
+        return 0;
     }
 
+    if (f->version > 3)
+        maxsize = FF_MIN_BUFFER_SIZE + avctx->width*avctx->height*3LL*4;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, maxsize)) < 0)
+        return ret;
+
     ff_init_range_encoder(c, pkt->data, pkt->size);
     ff_build_rac_states(c, 0.05 * (1LL << 32), 256 - 8);
 
+    av_frame_unref(p);
+    if ((ret = av_frame_ref(p, pict)) < 0)
+        return ret;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
     if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
         put_rac(c, &keystate, 1);
-        p->key_frame = 1;
+        avctx->coded_frame->key_frame = 1;
         f->gob_count++;
         write_header(f);
     } else {
         put_rac(c, &keystate, 0);
-        p->key_frame = 0;
+        avctx->coded_frame->key_frame = 0;
     }
 
     if (f->ac > 1) {
@@ -969,9 +1257,8 @@ static int ffv1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     for (i = 1; i < f->slice_count; i++) {
         FFV1Context *fs = f->slice_context[i];
-        uint8_t *start  = pkt->data +
-                          (pkt->size - used_count) * (int64_t)i / f->slice_count;
-        int len = pkt->size / f->slice_count;
+        uint8_t *start  = pkt->data + (pkt->size - used_count) * (int64_t)i / f->slice_count;
+        int len         = pkt->size / f->slice_count;
         ff_init_range_encoder(&fs->c, start, len);
     }
     avctx->execute(avctx, encode_slice, &f->slice_context[0], NULL,
@@ -1007,58 +1294,20 @@ static int ffv1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         buf_p += bytes;
     }
 
-    if ((avctx->flags & CODEC_FLAG_PASS1) && (f->picture_number & 31) == 0) {
-        int j, k, m;
-        char *p   = avctx->stats_out;
-        char *end = p + STATS_OUT_SIZE;
-
-        memset(f->rc_stat, 0, sizeof(f->rc_stat));
-        for (i = 0; i < f->quant_table_count; i++)
-            memset(f->rc_stat2[i], 0, f->context_count[i] * sizeof(*f->rc_stat2[i]));
-
-        for (j = 0; j < f->slice_count; j++) {
-            FFV1Context *fs = f->slice_context[j];
-            for (i = 0; i < 256; i++) {
-                f->rc_stat[i][0] += fs->rc_stat[i][0];
-                f->rc_stat[i][1] += fs->rc_stat[i][1];
-            }
-            for (i = 0; i < f->quant_table_count; i++) {
-                for (k = 0; k < f->context_count[i]; k++)
-                    for (m = 0; m < 32; m++) {
-                        f->rc_stat2[i][k][m][0] += fs->rc_stat2[i][k][m][0];
-                        f->rc_stat2[i][k][m][1] += fs->rc_stat2[i][k][m][1];
-                    }
-            }
-        }
-
-        for (j = 0; j < 256; j++) {
-            snprintf(p, end - p, "%" PRIu64 " %" PRIu64 " ",
-                     f->rc_stat[j][0], f->rc_stat[j][1]);
-            p += strlen(p);
-        }
-        snprintf(p, end - p, "\n");
-
-        for (i = 0; i < f->quant_table_count; i++) {
-            for (j = 0; j < f->context_count[i]; j++)
-                for (m = 0; m < 32; m++) {
-                    snprintf(p, end - p, "%" PRIu64 " %" PRIu64 " ",
-                             f->rc_stat2[i][j][m][0], f->rc_stat2[i][j][m][1]);
-                    p += strlen(p);
-                }
-        }
-        snprintf(p, end - p, "%d\n", f->gob_count);
-    } else if (avctx->flags & CODEC_FLAG_PASS1)
+    if (avctx->flags & CODEC_FLAG_PASS1)
         avctx->stats_out[0] = '\0';
 
     f->picture_number++;
     pkt->size   = buf_p - pkt->data;
-    pkt->flags |= AV_PKT_FLAG_KEY * p->key_frame;
+    pkt->pts    =
+    pkt->dts    = pict->pts;
+    pkt->flags |= AV_PKT_FLAG_KEY * avctx->coded_frame->key_frame;
     *got_packet = 1;
 
     return 0;
 }
 
-static av_cold int ffv1_encode_close(AVCodecContext *avctx)
+static av_cold int encode_close(AVCodecContext *avctx)
 {
     av_frame_free(&avctx->coded_frame);
     ffv1_close(avctx);
@@ -1068,12 +1317,11 @@ static av_cold int ffv1_encode_close(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(FFV1Context, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "slicecrc", "Protect slices with CRCs", OFFSET(ec), AV_OPT_TYPE_INT,
-             { .i64 = -1 }, -1, 1, VE },
+    { "slicecrc", "Protect slices with CRCs", OFFSET(ec), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
     { NULL }
 };
 
-static const AVClass class = {
+static const AVClass ffv1_class = {
     .class_name = "ffv1 encoder",
     .item_name  = av_default_item_name,
     .option     = options,
@@ -1091,23 +1339,24 @@ AVCodec ff_ffv1_encoder = {
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_FFV1,
     .priv_data_size = sizeof(FFV1Context),
-    .init           = ffv1_encode_init,
-    .encode2        = ffv1_encode_frame,
-    .close          = ffv1_encode_close,
-    .capabilities   = CODEC_CAP_SLICE_THREADS,
+    .init           = encode_init,
+    .encode2        = encode_frame,
+    .close          = encode_close,
+    .capabilities   = CODEC_CAP_SLICE_THREADS | CODEC_CAP_DELAY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
-        AV_PIX_FMT_YUV420P,   AV_PIX_FMT_YUV422P,   AV_PIX_FMT_YUV444P,
-        AV_PIX_FMT_YUV411P,   AV_PIX_FMT_YUV410P,
-        AV_PIX_FMT_YUV444P9,  AV_PIX_FMT_YUV422P9,  AV_PIX_FMT_YUV420P9,
-        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
-        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
-        AV_PIX_FMT_RGB32,
-        AV_PIX_FMT_GBRP9,     AV_PIX_FMT_GBRP10,
-        AV_PIX_FMT_YUVA420P,  AV_PIX_FMT_YUVA422P,  AV_PIX_FMT_YUVA444P,
-        AV_PIX_FMT_GRAY16,    AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_YUV420P,   AV_PIX_FMT_YUVA420P,  AV_PIX_FMT_YUVA422P,  AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUVA444P,  AV_PIX_FMT_YUV440P,   AV_PIX_FMT_YUV422P,   AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV410P,   AV_PIX_FMT_0RGB32,    AV_PIX_FMT_RGB32,     AV_PIX_FMT_YUV420P16,
+        AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, AV_PIX_FMT_YUV444P9,  AV_PIX_FMT_YUV422P9,
+        AV_PIX_FMT_YUV420P9,  AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUVA444P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA420P16,
+        AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA420P10,
+        AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA420P9,
+        AV_PIX_FMT_GRAY16,    AV_PIX_FMT_GRAY8,     AV_PIX_FMT_GBRP9,     AV_PIX_FMT_GBRP10,
+        AV_PIX_FMT_GBRP12,    AV_PIX_FMT_GBRP14,
         AV_PIX_FMT_NONE
 
     },
     .defaults       = ffv1_defaults,
-    .priv_class     = &class,
+    .priv_class     = &ffv1_class,
 };
diff --git a/libavcodec/ffwavesynth.c b/libavcodec/ffwavesynth.c
new file mode 100644
index 0000000..4a5031a
--- /dev/null
+++ b/libavcodec/ffwavesynth.c
@@ -0,0 +1,481 @@
+/*
+ * Wavesynth pseudo-codec
+ * Copyright (c) 2011 Nicolas George
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "avcodec.h"
+#include "internal.h"
+
+
+#define SIN_BITS 14
+#define WS_MAX_CHANNELS 32
+#define INF_TS 0x7FFFFFFFFFFFFFFF
+
+#define PINK_UNIT 128
+
+/*
+   Format of the extradata and packets
+
+   THIS INFORMATION IS NOT PART OF THE PUBLIC API OR ABI.
+   IT CAN CHANGE WITHOUT NOTIFICATION.
+
+   All numbers are in little endian.
+
+   The codec extradata define a set of intervals with uniform content.
+   Overlapping intervals are added together.
+
+   extradata:
+       uint32      number of intervals
+       ...         intervals
+
+   interval:
+       int64       start timestamp; time_base must be 1/sample_rate;
+                   start timestamps must be in ascending order
+       int64       end timestamp
+       uint32      type
+       uint32      channels mask
+       ...         additional information, depends on type
+
+   sine interval (type fourcc "SINE"):
+       int32       start frequency, in 1/(1<<16) Hz
+       int32       end frequency
+       int32       start amplitude, 1<<16 is the full amplitude
+       int32       end amplitude
+       uint32      start phase, 0 is sin(0), 0x20000000 is sin(pi/2), etc.;
+                   n | (1<<31) means to match the phase of previous channel #n
+
+   pink noise interval (type fourcc "NOIS"):
+       int32       start amplitude
+       int32       end amplitude
+
+   The input packets encode the time and duration of the requested segment.
+
+   packet:
+       int64       start timestamp
+       int32       duration
+
+*/
+
+enum ws_interval_type {
+    WS_SINE  = MKTAG('S','I','N','E'),
+    WS_NOISE = MKTAG('N','O','I','S'),
+};
+
+struct ws_interval {
+    int64_t ts_start, ts_end;
+    uint64_t phi0, dphi0, ddphi;
+    uint64_t amp0, damp;
+    uint64_t phi, dphi, amp;
+    uint32_t channels;
+    enum ws_interval_type type;
+    int next;
+};
+
+struct wavesynth_context {
+    int64_t cur_ts;
+    int64_t next_ts;
+    int32_t *sin;
+    struct ws_interval *inter;
+    uint32_t dither_state;
+    uint32_t pink_state;
+    int32_t pink_pool[PINK_UNIT];
+    unsigned pink_need, pink_pos;
+    int nb_inter;
+    int cur_inter;
+    int next_inter;
+};
+
+#define LCG_A 1284865837
+#define LCG_C 4150755663
+#define LCG_AI 849225893 /* A*AI = 1 [mod 1<<32] */
+
+static uint32_t lcg_next(uint32_t *s)
+{
+    *s = *s * LCG_A + LCG_C;
+    return *s;
+}
+
+static void lcg_seek(uint32_t *s, int64_t dt)
+{
+    uint32_t a, c, t = *s;
+
+    if (dt >= 0) {
+        a = LCG_A;
+        c = LCG_C;
+    } else { /* coefficients for a step backward */
+        a = LCG_AI;
+        c = (uint32_t)(LCG_AI * LCG_C);
+        dt = -dt;
+    }
+    while (dt) {
+        if (dt & 1)
+            t = a * t + c;
+        c *= a + 1; /* coefficients for a double step */
+        a *= a;
+        dt >>= 1;
+    }
+    *s = t;
+}
+
+/* Emulate pink noise by summing white noise at the sampling frequency,
+ * white noise at half the sampling frequency (each value taken twice),
+ * etc., with a total of 8 octaves.
+ * This is known as the Voss-McCartney algorithm. */
+
+static void pink_fill(struct wavesynth_context *ws)
+{
+    int32_t vt[7] = { 0 }, v = 0;
+    int i, j;
+
+    ws->pink_pos = 0;
+    if (!ws->pink_need)
+        return;
+    for (i = 0; i < PINK_UNIT; i++) {
+        for (j = 0; j < 7; j++) {
+            if ((i >> j) & 1)
+                break;
+            v -= vt[j];
+            vt[j] = (int32_t)lcg_next(&ws->pink_state) >> 3;
+            v += vt[j];
+        }
+        ws->pink_pool[i] = v + ((int32_t)lcg_next(&ws->pink_state) >> 3);
+    }
+    lcg_next(&ws->pink_state); /* so we use exactly 256 steps */
+}
+
+/**
+ * @return  (1<<64) * a / b, without overflow, if a < b
+ */
+static uint64_t frac64(uint64_t a, uint64_t b)
+{
+    uint64_t r = 0;
+    int i;
+
+    if (b < (uint64_t)1 << 32) { /* b small, use two 32-bits steps */
+        a <<= 32;
+        return ((a / b) << 32) | ((a % b) << 32) / b;
+    }
+    if (b < (uint64_t)1 << 48) { /* b medium, use four 16-bits steps */
+        for (i = 0; i < 4; i++) {
+            a <<= 16;
+            r = (r << 16) | (a / b);
+            a %= b;
+        }
+        return r;
+    }
+    for (i = 63; i >= 0; i--) {
+        if (a >= (uint64_t)1 << 63 || a << 1 >= b) {
+            r |= (uint64_t)1 << i;
+            a = (a << 1) - b;
+        } else {
+            a <<= 1;
+        }
+    }
+    return r;
+}
+
+static uint64_t phi_at(struct ws_interval *in, int64_t ts)
+{
+    uint64_t dt = ts - in->ts_start;
+    uint64_t dt2 = dt & 1 ? /* dt * (dt - 1) / 2 without overflow */
+                   dt * ((dt - 1) >> 1) : (dt >> 1) * (dt - 1);
+    return in->phi0 + dt * in->dphi0 + dt2 * in->ddphi;
+}
+
+static void wavesynth_seek(struct wavesynth_context *ws, int64_t ts)
+{
+    int *last, i;
+    struct ws_interval *in;
+
+    last = &ws->cur_inter;
+    for (i = 0; i < ws->nb_inter; i++) {
+        in = &ws->inter[i];
+        if (ts < in->ts_start)
+            break;
+        if (ts >= in->ts_end)
+            continue;
+        *last = i;
+        last = &in->next;
+        in->phi  = phi_at(in, ts);
+        in->dphi = in->dphi0 + (ts - in->ts_start) * in->ddphi;
+        in->amp  = in->amp0  + (ts - in->ts_start) * in->damp;
+    }
+    ws->next_inter = i;
+    ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
+    *last = -1;
+    lcg_seek(&ws->dither_state, ts - ws->cur_ts);
+    if (ws->pink_need) {
+        int64_t pink_ts_cur  = (ws->cur_ts + PINK_UNIT - 1) & ~(PINK_UNIT - 1);
+        int64_t pink_ts_next = ts & ~(PINK_UNIT - 1);
+        int pos = ts & (PINK_UNIT - 1);
+        lcg_seek(&ws->pink_state, (pink_ts_next - pink_ts_cur) << 1);
+        if (pos) {
+            pink_fill(ws);
+            ws->pink_pos = pos;
+        } else {
+            ws->pink_pos = PINK_UNIT;
+        }
+    }
+    ws->cur_ts = ts;
+}
+
+static int wavesynth_parse_extradata(AVCodecContext *avc)
+{
+    struct wavesynth_context *ws = avc->priv_data;
+    struct ws_interval *in;
+    uint8_t *edata, *edata_end;
+    int32_t f1, f2, a1, a2;
+    uint32_t phi;
+    int64_t dphi1, dphi2, dt, cur_ts = -0x8000000000000000;
+    int i;
+
+    if (avc->extradata_size < 4)
+        return AVERROR(EINVAL);
+    edata = avc->extradata;
+    edata_end = edata + avc->extradata_size;
+    ws->nb_inter = AV_RL32(edata);
+    edata += 4;
+    if (ws->nb_inter < 0)
+        return AVERROR(EINVAL);
+    ws->inter = av_calloc(ws->nb_inter, sizeof(*ws->inter));
+    if (!ws->inter)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < ws->nb_inter; i++) {
+        in = &ws->inter[i];
+        if (edata_end - edata < 24)
+            return AVERROR(EINVAL);
+        in->ts_start = AV_RL64(edata +  0);
+        in->ts_end   = AV_RL64(edata +  8);
+        in->type     = AV_RL32(edata + 16);
+        in->channels = AV_RL32(edata + 20);
+        edata += 24;
+        if (in->ts_start < cur_ts || in->ts_end <= in->ts_start)
+            return AVERROR(EINVAL);
+        cur_ts = in->ts_start;
+        dt = in->ts_end - in->ts_start;
+        switch (in->type) {
+            case WS_SINE:
+                if (edata_end - edata < 20)
+                    return AVERROR(EINVAL);
+                f1  = AV_RL32(edata +  0);
+                f2  = AV_RL32(edata +  4);
+                a1  = AV_RL32(edata +  8);
+                a2  = AV_RL32(edata + 12);
+                phi = AV_RL32(edata + 16);
+                edata += 20;
+                dphi1 = frac64(f1, (int64_t)avc->sample_rate << 16);
+                dphi2 = frac64(f2, (int64_t)avc->sample_rate << 16);
+                in->dphi0 = dphi1;
+                in->ddphi = (dphi2 - dphi1) / dt;
+                if (phi & 0x80000000) {
+                    phi &= ~0x80000000;
+                    if (phi >= i)
+                        return AVERROR(EINVAL);
+                    in->phi0 = phi_at(&ws->inter[phi], in->ts_start);
+                } else {
+                    in->phi0 = (uint64_t)phi << 33;
+                }
+                break;
+            case WS_NOISE:
+                if (edata_end - edata < 8)
+                    return AVERROR(EINVAL);
+                a1  = AV_RL32(edata +  0);
+                a2  = AV_RL32(edata +  4);
+                edata += 8;
+                break;
+            default:
+                return AVERROR(EINVAL);
+        }
+        in->amp0 = (int64_t)a1 << 32;
+        in->damp = (((int64_t)a2 << 32) - ((int64_t)a1 << 32)) / dt;
+    }
+    if (edata != edata_end)
+        return AVERROR(EINVAL);
+    return 0;
+}
+
+static av_cold int wavesynth_init(AVCodecContext *avc)
+{
+    struct wavesynth_context *ws = avc->priv_data;
+    int i, r;
+
+    if (avc->channels > WS_MAX_CHANNELS) {
+        av_log(avc, AV_LOG_ERROR,
+               "This implementation is limited to %d channels.\n",
+               WS_MAX_CHANNELS);
+        return AVERROR(EINVAL);
+    }
+    r = wavesynth_parse_extradata(avc);
+    if (r < 0) {
+        av_log(avc, AV_LOG_ERROR, "Invalid intervals definitions.\n");
+        goto fail;
+    }
+    ws->sin = av_malloc(sizeof(*ws->sin) << SIN_BITS);
+    if (!ws->sin) {
+        r = AVERROR(ENOMEM);
+        goto fail;
+    }
+    for (i = 0; i < 1 << SIN_BITS; i++)
+        ws->sin[i] = floor(32767 * sin(2 * M_PI * i / (1 << SIN_BITS)));
+    ws->dither_state = MKTAG('D','I','T','H');
+    for (i = 0; i < ws->nb_inter; i++)
+        ws->pink_need += ws->inter[i].type == WS_NOISE;
+    ws->pink_state = MKTAG('P','I','N','K');
+    ws->pink_pos = PINK_UNIT;
+    wavesynth_seek(ws, 0);
+    avc->sample_fmt = AV_SAMPLE_FMT_S16;
+    return 0;
+
+fail:
+    av_free(ws->inter);
+    av_free(ws->sin);
+    return r;
+}
+
+static void wavesynth_synth_sample(struct wavesynth_context *ws, int64_t ts,
+                                   int32_t *channels)
+{
+    int32_t amp, val, *cv;
+    struct ws_interval *in;
+    int i, *last, pink;
+    uint32_t c, all_ch = 0;
+
+    i = ws->cur_inter;
+    last = &ws->cur_inter;
+    if (ws->pink_pos == PINK_UNIT)
+        pink_fill(ws);
+    pink = ws->pink_pool[ws->pink_pos++] >> 16;
+    while (i >= 0) {
+        in = &ws->inter[i];
+        i = in->next;
+        if (ts >= in->ts_end) {
+            *last = i;
+            continue;
+        }
+        last = &in->next;
+        amp = in->amp >> 32;
+        in->amp  += in->damp;
+        switch (in->type) {
+            case WS_SINE:
+                val = amp * ws->sin[in->phi >> (64 - SIN_BITS)];
+                in->phi  += in->dphi;
+                in->dphi += in->ddphi;
+                break;
+            case WS_NOISE:
+                val = amp * pink;
+                break;
+            default:
+                val = 0;
+        }
+        all_ch |= in->channels;
+        for (c = in->channels, cv = channels; c; c >>= 1, cv++)
+            if (c & 1)
+                *cv += val;
+    }
+    val = (int32_t)lcg_next(&ws->dither_state) >> 16;
+    for (c = all_ch, cv = channels; c; c >>= 1, cv++)
+        if (c & 1)
+            *cv += val;
+}
+
+static void wavesynth_enter_intervals(struct wavesynth_context *ws, int64_t ts)
+{
+    int *last, i;
+    struct ws_interval *in;
+
+    last = &ws->cur_inter;
+    for (i = ws->cur_inter; i >= 0; i = ws->inter[i].next)
+        last = &ws->inter[i].next;
+    for (i = ws->next_inter; i < ws->nb_inter; i++) {
+        in = &ws->inter[i];
+        if (ts < in->ts_start)
+            break;
+        if (ts >= in->ts_end)
+            continue;
+        *last = i;
+        last = &in->next;
+        in->phi = in->phi0;
+        in->dphi = in->dphi0;
+        in->amp = in->amp0;
+    }
+    ws->next_inter = i;
+    ws->next_ts = i < ws->nb_inter ? ws->inter[i].ts_start : INF_TS;
+    *last = -1;
+}
+
+static int wavesynth_decode(AVCodecContext *avc, void *rframe, int *rgot_frame,
+                            AVPacket *packet)
+{
+    struct wavesynth_context *ws = avc->priv_data;
+    AVFrame *frame = rframe;
+    int64_t ts;
+    int duration;
+    int s, c, r;
+    int16_t *pcm;
+    int32_t channels[WS_MAX_CHANNELS];
+
+    *rgot_frame = 0;
+    if (packet->size != 12)
+        return AVERROR_INVALIDDATA;
+    ts = AV_RL64(packet->data);
+    if (ts != ws->cur_ts)
+        wavesynth_seek(ws, ts);
+    duration = AV_RL32(packet->data + 8);
+    if (duration <= 0)
+        return AVERROR(EINVAL);
+    frame->nb_samples = duration;
+    r = ff_get_buffer(avc, frame, 0);
+    if (r < 0)
+        return r;
+    pcm = (int16_t *)frame->data[0];
+    for (s = 0; s < duration; s++, ts++) {
+        memset(channels, 0, avc->channels * sizeof(*channels));
+        if (ts >= ws->next_ts)
+            wavesynth_enter_intervals(ws, ts);
+        wavesynth_synth_sample(ws, ts, channels);
+        for (c = 0; c < avc->channels; c++)
+            *(pcm++) = channels[c] >> 16;
+    }
+    ws->cur_ts += duration;
+    *rgot_frame = 1;
+    return packet->size;
+}
+
+static av_cold int wavesynth_close(AVCodecContext *avc)
+{
+    struct wavesynth_context *ws = avc->priv_data;
+
+    av_free(ws->sin);
+    av_free(ws->inter);
+    return 0;
+}
+
+AVCodec ff_ffwavesynth_decoder = {
+    .name           = "wavesynth",
+    .long_name      = NULL_IF_CONFIG_SMALL("Wave synthesis pseudo-codec"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_FFWAVESYNTH,
+    .priv_data_size = sizeof(struct wavesynth_context),
+    .init           = wavesynth_init,
+    .close          = wavesynth_close,
+    .decode         = wavesynth_decode,
+    .capabilities   = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/fic.c b/libavcodec/fic.c
index 8512ef3..d08d240 100644
--- a/libavcodec/fic.c
+++ b/libavcodec/fic.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2014 Konstantin Shishkov
  * Copyright (c) 2014 Derek Buitenhuis
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -79,7 +79,7 @@ static const uint8_t fic_header[7] = { 0, 0, 1, 'F', 'I', 'C', 'V' };
 
 #define FIC_HEADER_SIZE 27
 
-static av_always_inline void fic_idct(int16_t *blk, int step, int shift)
+static av_always_inline void fic_idct(int16_t *blk, int step, int shift, int rnd)
 {
     const int t0 =  27246 * blk[3 * step] + 18405 * blk[5 * step];
     const int t1 =  27246 * blk[5 * step] - 18405 * blk[3 * step];
@@ -91,8 +91,8 @@ static av_always_inline void fic_idct(int16_t *blk, int step, int shift)
     const int t7 = t3 - t1;
     const int t8 =  17734 * blk[2 * step] - 42813 * blk[6 * step];
     const int t9 =  17734 * blk[6 * step] + 42814 * blk[2 * step];
-    const int tA = (blk[0 * step] - blk[4 * step] << 15) + (1 << shift - 1);
-    const int tB = (blk[0 * step] + blk[4 * step] << 15) + (1 << shift - 1);
+    const int tA = (blk[0 * step] - blk[4 * step] << 15) + rnd;
+    const int tB = (blk[0 * step] + blk[4 * step] << 15) + rnd;
     blk[0 * step] = (  t4       + t9 + tB) >> shift;
     blk[1 * step] = (  t6 + t7  + t8 + tA) >> shift;
     blk[2 * step] = (  t6 - t7  - t8 + tA) >> shift;
@@ -109,14 +109,15 @@ static void fic_idct_put(uint8_t *dst, int stride, int16_t *block)
     int16_t *ptr;
 
     ptr = block;
-    for (i = 0; i < 8; i++) {
-        fic_idct(ptr, 8, 13);
+    fic_idct(ptr++, 8, 13, (1 << 12) + (1 << 17));
+    for (i = 1; i < 8; i++) {
+        fic_idct(ptr, 8, 13, 1 << 12);
         ptr++;
     }
 
     ptr = block;
     for (i = 0; i < 8; i++) {
-        fic_idct(ptr, 1, 20);
+        fic_idct(ptr, 1, 20, 0);
         ptr += 8;
     }
 
diff --git a/libavcodec/flac.c b/libavcodec/flac.c
index cd1a6ab..aeb276b 100644
--- a/libavcodec/flac.c
+++ b/libavcodec/flac.c
@@ -2,20 +2,20 @@
  * FLAC common code
  * Copyright (c) 2009 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -230,8 +230,7 @@ void avpriv_flac_parse_streaminfo(AVCodecContext *avctx, struct FLACStreaminfo *
         av_get_channel_layout_nb_channels(avctx->channel_layout) != avctx->channels)
         ff_flac_set_channel_layout(avctx);
 
-    s->samples  = get_bits_long(&gb, 32) << 4;
-    s->samples |= get_bits(&gb, 4);
+    s->samples = get_bits64(&gb, 36);
 
     skip_bits_long(&gb, 64); /* md5 sum */
     skip_bits_long(&gb, 64); /* md5 sum */
diff --git a/libavcodec/flac.h b/libavcodec/flac.h
index fbd34a1..b4f28cf 100644
--- a/libavcodec/flac.h
+++ b/libavcodec/flac.h
@@ -2,20 +2,20 @@
  * FLAC (Free Lossless Audio Codec) decoder/demuxer common functions
  * Copyright (c) 2008 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/flac_parser.c b/libavcodec/flac_parser.c
index bf2c118..a031dbf 100644
--- a/libavcodec/flac_parser.c
+++ b/libavcodec/flac_parser.c
@@ -2,20 +2,20 @@
  * FLAC parser
  * Copyright (c) 2010 Michael Chinen
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -87,6 +87,8 @@ typedef struct FLACParseContext {
     int end_padded;                /**< specifies if fifo_buf's end is padded */
     uint8_t *wrap_buf;             /**< general fifo read buffer when wrapped */
     int wrap_buf_allocated_size;   /**< actual allocated size of the buffer   */
+    FLACFrameInfo last_fi;         /**< last decoded frame header info        */
+    int last_fi_valid;             /**< set if last_fi is valid               */
 } FLACParseContext;
 
 static int frame_header_is_valid(AVCodecContext *avctx, const uint8_t *buf,
@@ -267,13 +269,12 @@ static int find_new_headers(FLACParseContext *fpc, int search_start)
     return size;
 }
 
-static int check_header_mismatch(FLACParseContext  *fpc,
-                                 FLACHeaderMarker  *header,
-                                 FLACHeaderMarker  *child,
-                                 int                log_level_offset)
+static int check_header_fi_mismatch(FLACParseContext  *fpc,
+                                    FLACFrameInfo     *header_fi,
+                                    FLACFrameInfo     *child_fi,
+                                    int                log_level_offset)
 {
-    FLACFrameInfo  *header_fi = &header->fi, *child_fi = &child->fi;
-    int deduction = 0, deduction_expected = 0, i;
+    int deduction = 0;
     if (child_fi->samplerate != header_fi->samplerate) {
         deduction += FLAC_HEADER_CHANGED_PENALTY;
         av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
@@ -288,13 +289,25 @@ static int check_header_mismatch(FLACParseContext  *fpc,
         /* Changing blocking strategy not allowed per the spec */
         deduction += FLAC_HEADER_BASE_SCORE;
         av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
-                   "blocking strategy change detected in adjacent frames\n");
+               "blocking strategy change detected in adjacent frames\n");
     }
     if (child_fi->channels != header_fi->channels) {
         deduction += FLAC_HEADER_CHANGED_PENALTY;
         av_log(fpc->avctx, AV_LOG_WARNING + log_level_offset,
-                   "number of channels change detected in adjacent frames\n");
+               "number of channels change detected in adjacent frames\n");
     }
+    return deduction;
+}
+
+static int check_header_mismatch(FLACParseContext  *fpc,
+                                 FLACHeaderMarker  *header,
+                                 FLACHeaderMarker  *child,
+                                 int                log_level_offset)
+{
+    FLACFrameInfo  *header_fi = &header->fi, *child_fi = &child->fi;
+    int deduction, deduction_expected = 0, i;
+    deduction = check_header_fi_mismatch(fpc, header_fi, child_fi,
+                                         log_level_offset);
     /* Check sample and frame numbers. */
     if ((child_fi->frame_or_sample_num - header_fi->frame_or_sample_num
          != header_fi->blocksize) &&
@@ -399,11 +412,18 @@ static int score_header(FLACParseContext *fpc, FLACHeaderMarker *header)
     FLACHeaderMarker *child;
     int dist = 0;
     int child_score;
-
+    int base_score = FLAC_HEADER_BASE_SCORE;
     if (header->max_score != FLAC_HEADER_NOT_SCORED_YET)
         return header->max_score;
 
-    header->max_score = FLAC_HEADER_BASE_SCORE;
+    /* Modify the base score with changes from the last output header */
+    if (fpc->last_fi_valid) {
+        /* Silence the log since this will be repeated if selected */
+        base_score -= check_header_fi_mismatch(fpc, &fpc->last_fi, &header->fi,
+                                               AV_LOG_DEBUG);
+    }
+
+    header->max_score = base_score;
 
     /* Check and compute the children's scores. */
     child = header->next;
@@ -419,7 +439,7 @@ static int score_header(FLACParseContext *fpc, FLACHeaderMarker *header)
         if (FLAC_HEADER_BASE_SCORE + child_score > header->max_score) {
             /* Keep the child because the frame scoring is dynamic. */
             header->best_child = child;
-            header->max_score  = FLAC_HEADER_BASE_SCORE + child_score;
+            header->max_score  = base_score + child_score;
         }
         child = child->next;
     }
@@ -430,7 +450,7 @@ static int score_header(FLACParseContext *fpc, FLACHeaderMarker *header)
 static void score_sequences(FLACParseContext *fpc)
 {
     FLACHeaderMarker *curr;
-    int best_score = FLAC_HEADER_NOT_SCORED_YET;
+    int best_score = 0;//FLAC_HEADER_NOT_SCORED_YET;
     /* First pass to clear all old scores. */
     for (curr = fpc->headers; curr; curr = curr->next)
         curr->max_score = FLAC_HEADER_NOT_SCORED_YET;
@@ -469,7 +489,18 @@ static int get_best_header(FLACParseContext* fpc, const uint8_t **poutbuf,
                                         &fpc->wrap_buf,
                                         &fpc->wrap_buf_allocated_size);
 
+
+    if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){
+        if (header->fi.is_var_size)
+          fpc->pc->pts = header->fi.frame_or_sample_num;
+        else if (header->best_child)
+          fpc->pc->pts = header->fi.frame_or_sample_num * header->fi.blocksize;
+    }
+
     fpc->best_header_valid = 0;
+    fpc->last_fi_valid = 1;
+    fpc->last_fi = header->fi;
+
     /* Return the negative overread index so the client can compute pos.
        This should be the amount overread to the beginning of the child */
     if (child)
@@ -489,8 +520,16 @@ static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
 
     if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
         FLACFrameInfo fi;
-        if (frame_header_is_valid(avctx, buf, &fi))
+        if (frame_header_is_valid(avctx, buf, &fi)) {
             s->duration = fi.blocksize;
+            if (!avctx->sample_rate)
+                avctx->sample_rate = fi.samplerate;
+            if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){
+                fpc->pc->pts = fi.frame_or_sample_num;
+                if (!fi.is_var_size)
+                  fpc->pc->pts *= fi.blocksize;
+            }
+        }
         *poutbuf      = buf;
         *poutbuf_size = buf_size;
         return buf_size;
@@ -546,14 +585,18 @@ static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
         av_freep(&fpc->best_header);
     }
 
-    /* Find and score new headers. */
-    while ((buf && read_end < buf + buf_size &&
+    /* Find and score new headers.                                     */
+    /* buf_size is to zero when padding, so check for this since we do */
+    /* not want to try to read more input once we have found the end.  */
+    /* Note that as (non-modified) parameters, buf can be non-NULL,    */
+    /* while buf_size is 0.                                            */
+    while ((buf && buf_size && read_end < buf + buf_size &&
             fpc->nb_headers_buffered < FLAC_MIN_HEADERS)
-           || (!buf && !fpc->end_padded)) {
+           || ((!buf || !buf_size) && !fpc->end_padded)) {
         int start_offset;
 
         /* Pad the end once if EOF, to check the final region for headers. */
-        if (!buf) {
+        if (!buf || !buf_size) {
             fpc->end_padded      = 1;
             buf_size = MAX_FRAME_HEADER_SIZE;
             read_end = read_start + MAX_FRAME_HEADER_SIZE;
@@ -566,15 +609,15 @@ static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
         }
 
         /* Fill the buffer. */
-        if (av_fifo_realloc2(fpc->fifo_buf,
-                             (read_end - read_start) + av_fifo_size(fpc->fifo_buf)) < 0) {
+        if (   av_fifo_space(fpc->fifo_buf) < read_end - read_start
+            && av_fifo_realloc2(fpc->fifo_buf, (read_end - read_start) + 2*av_fifo_size(fpc->fifo_buf)) < 0) {
             av_log(avctx, AV_LOG_ERROR,
-                   "couldn't reallocate buffer of size %td\n",
+                   "couldn't reallocate buffer of size %"PTRDIFF_SPECIFIER"\n",
                    (read_end - read_start) + av_fifo_size(fpc->fifo_buf));
             goto handle_error;
         }
 
-        if (buf) {
+        if (buf && buf_size) {
             av_fifo_generic_write(fpc->fifo_buf, (void*) read_start,
                                   read_end - read_start, NULL);
         } else {
@@ -611,10 +654,11 @@ static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
 
         /* restore the state pre-padding */
         if (fpc->end_padded) {
+            int warp = fpc->fifo_buf->wptr - fpc->fifo_buf->buffer < MAX_FRAME_HEADER_SIZE;
             /* HACK: drain the tail of the fifo */
             fpc->fifo_buf->wptr -= MAX_FRAME_HEADER_SIZE;
             fpc->fifo_buf->wndx -= MAX_FRAME_HEADER_SIZE;
-            if (fpc->fifo_buf->wptr < 0) {
+            if (warp) {
                 fpc->fifo_buf->wptr += fpc->fifo_buf->end -
                     fpc->fifo_buf->buffer;
             }
@@ -623,10 +667,12 @@ static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
         }
     }
 
-    curr = fpc->headers;
-    for (curr = fpc->headers; curr; curr = curr->next)
-        if (!fpc->best_header || curr->max_score > fpc->best_header->max_score)
+    for (curr = fpc->headers; curr; curr = curr->next) {
+        if (curr->max_score > 0 &&
+            (!fpc->best_header || curr->max_score > fpc->best_header->max_score)) {
             fpc->best_header = curr;
+        }
+    }
 
     if (fpc->best_header) {
         fpc->best_header_valid = 1;
@@ -660,7 +706,9 @@ static av_cold int flac_parse_init(AVCodecParserContext *c)
     fpc->pc = c;
     /* There will generally be FLAC_MIN_HEADERS buffered in the fifo before
        it drains.  This is allocated early to avoid slow reallocation. */
-    fpc->fifo_buf = av_fifo_alloc(FLAC_AVG_FRAME_SIZE * (FLAC_MIN_HEADERS + 3));
+    fpc->fifo_buf = av_fifo_alloc_array(FLAC_MIN_HEADERS + 3, FLAC_AVG_FRAME_SIZE);
+    if (!fpc->fifo_buf)
+        return AVERROR(ENOMEM);
     return 0;
 }
 
@@ -675,7 +723,7 @@ static void flac_parse_close(AVCodecParserContext *c)
         av_free(curr);
         curr = temp;
     }
-    av_fifo_free(fpc->fifo_buf);
+    av_fifo_freep(&fpc->fifo_buf);
     av_free(fpc->wrap_buf);
 }
 
diff --git a/libavcodec/flacdata.c b/libavcodec/flacdata.c
index 820c3aa..1954f32 100644
--- a/libavcodec/flacdata.c
+++ b/libavcodec/flacdata.c
@@ -2,20 +2,20 @@
  * FLAC data
  * Copyright (c) 2003 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@ const int ff_flac_sample_rate_table[16] =
   8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000,
   0, 0, 0, 0 };
 
-const int16_t ff_flac_blocksize_table[16] = {
+const int32_t ff_flac_blocksize_table[16] = {
      0,    192, 576<<0, 576<<1, 576<<2, 576<<3,      0,      0,
 256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7
 };
diff --git a/libavcodec/flacdata.h b/libavcodec/flacdata.h
index f566377..e2c1e5d 100644
--- a/libavcodec/flacdata.h
+++ b/libavcodec/flacdata.h
@@ -2,20 +2,20 @@
  * FLAC data header
  * Copyright (c) 2003 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,6 @@
 
 extern const int ff_flac_sample_rate_table[16];
 
-extern const int16_t ff_flac_blocksize_table[16];
+extern const int32_t ff_flac_blocksize_table[16];
 
 #endif /* AVCODEC_FLACDATA_H */
diff --git a/libavcodec/flacdec.c b/libavcodec/flacdec.c
index 9ca55cc..b8d45b8 100644
--- a/libavcodec/flacdec.c
+++ b/libavcodec/flacdec.c
@@ -2,20 +2,20 @@
  * FLAC (Free Lossless Audio Codec) decoder
  * Copyright (c) 2003 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,6 +33,7 @@
 
 #include <limits.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/crc.h"
 #include "avcodec.h"
@@ -43,6 +44,9 @@
 #include "flac.h"
 #include "flacdata.h"
 #include "flacdsp.h"
+#include "thread.h"
+#include "unary.h"
+
 
 typedef struct FLACContext {
     FLACSTREAMINFO
@@ -127,6 +131,8 @@ static int allocate_buffers(FLACContext *s)
 {
     int buf_size;
 
+    av_assert0(s->max_blocksize);
+
     buf_size = av_samples_get_buffer_size(NULL, s->channels, s->max_blocksize,
                                           AV_SAMPLE_FMT_S32P, 0);
     if (buf_size < 0)
@@ -215,6 +221,12 @@ static int decode_residuals(FLACContext *s, int32_t *decoded, int pred_order)
     rice_order = get_bits(&s->gb, 4);
 
     samples= s->blocksize >> rice_order;
+    if (samples << rice_order != s->blocksize) {
+        av_log(s->avctx, AV_LOG_ERROR, "invalid rice order: %i blocksize %i\n",
+               rice_order, s->blocksize);
+        return AVERROR_INVALIDDATA;
+    }
+
     if (pred_order > samples) {
         av_log(s->avctx, AV_LOG_ERROR, "invalid predictor order: %i > %i\n",
                pred_order, samples);
@@ -247,7 +259,8 @@ static int decode_subframe_fixed(FLACContext *s, int32_t *decoded,
                                  int pred_order, int bps)
 {
     const int blocksize = s->blocksize;
-    int a, b, c, d, i, ret;
+    int av_uninit(a), av_uninit(b), av_uninit(c), av_uninit(d), i;
+    int ret;
 
     /* warm up samples */
     for (i = 0; i < pred_order; i++) {
@@ -352,7 +365,6 @@ static inline int decode_subframe(FLACContext *s, int channel)
 
     if (get_bits1(&s->gb)) {
         int left = get_bits_left(&s->gb);
-        wasted = 1;
         if ( left < 0 ||
             (left < bps && !show_bits_long(&s->gb, left)) ||
                            !show_bits_long(&s->gb, bps)) {
@@ -361,8 +373,7 @@ static inline int decode_subframe(FLACContext *s, int channel)
                    bps, left);
             return AVERROR_INVALIDDATA;
         }
-        while (!get_bits1(&s->gb))
-            wasted++;
+        wasted = 1 + get_unary(&s->gb, 1, get_bits_left(&s->gb));
         bps -= wasted;
     }
     if (bps > 32) {
@@ -485,6 +496,7 @@ static int flac_decode_frame(AVCodecContext *avctx, void *data,
                              int *got_frame_ptr, AVPacket *avpkt)
 {
     AVFrame *frame     = data;
+    ThreadFrame tframe = { .f = data };
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     FLACContext *s = avctx->priv_data;
@@ -499,6 +511,16 @@ static int flac_decode_frame(AVCodecContext *avctx, void *data,
                                        FLAC_MAX_CHANNELS, 32);
     }
 
+    if (buf_size > 5 && !memcmp(buf, "\177FLAC", 5)) {
+        av_log(s->avctx, AV_LOG_DEBUG, "skipping flac header packet 1\n");
+        return buf_size;
+    }
+
+    if (buf_size > 0 && (*buf & 0x7F) == FLAC_METADATA_TYPE_VORBIS_COMMENT) {
+        av_log(s->avctx, AV_LOG_DEBUG, "skipping vorbis comment\n");
+        return buf_size;
+    }
+
     /* check that there is at least the smallest decodable amount of data.
        this amount corresponds to the smallest valid FLAC frame possible.
        FF F8 69 02 00 00 9A 00 00 34 46 */
@@ -515,19 +537,26 @@ static int flac_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     /* decode frame */
-    init_get_bits(&s->gb, buf, buf_size*8);
+    if ((ret = init_get_bits8(&s->gb, buf, buf_size)) < 0)
+        return ret;
     if ((ret = decode_frame(s)) < 0) {
         av_log(s->avctx, AV_LOG_ERROR, "decode_frame() failed\n");
         return ret;
     }
-    bytes_read = (get_bits_count(&s->gb)+7)/8;
+    bytes_read = get_bits_count(&s->gb)/8;
+
+    if ((s->avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_COMPLIANT)) &&
+        av_crc(av_crc_get_table(AV_CRC_16_ANSI),
+               0, buf, bytes_read)) {
+        av_log(s->avctx, AV_LOG_ERROR, "CRC error at PTS %"PRId64"\n", avpkt->pts);
+        if (s->avctx->err_recognition & AV_EF_EXPLODE)
+            return AVERROR_INVALIDDATA;
+    }
 
     /* get output buffer */
     frame->nb_samples = s->blocksize;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
         return ret;
-    }
 
     s->dsp.decorrelate[s->ch_mode](frame->data, s->decoded, s->channels,
                                    s->blocksize, s->sample_shift);
@@ -546,6 +575,17 @@ static int flac_decode_frame(AVCodecContext *avctx, void *data,
     return bytes_read;
 }
 
+static int init_thread_copy(AVCodecContext *avctx)
+{
+    FLACContext *s = avctx->priv_data;
+    s->decoded_buffer = NULL;
+    s->decoded_buffer_size = 0;
+    s->avctx = avctx;
+    if (s->max_blocksize)
+        return allocate_buffers(s);
+    return 0;
+}
+
 static av_cold int flac_decode_close(AVCodecContext *avctx)
 {
     FLACContext *s = avctx->priv_data;
@@ -564,10 +604,11 @@ AVCodec ff_flac_decoder = {
     .init           = flac_decode_init,
     .close          = flac_decode_close,
     .decode         = flac_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16,
                                                       AV_SAMPLE_FMT_S16P,
                                                       AV_SAMPLE_FMT_S32,
                                                       AV_SAMPLE_FMT_S32P,
-                                                      -1 },
+                                                      AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/flacdsp.c b/libavcodec/flacdsp.c
index b916869..b15bc74 100644
--- a/libavcodec/flacdsp.c
+++ b/libavcodec/flacdsp.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -128,4 +128,6 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt,
 
     if (ARCH_ARM)
         ff_flacdsp_init_arm(c, fmt, bps);
+    if (ARCH_X86)
+        ff_flacdsp_init_x86(c, fmt, bps);
 }
diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 33184b5..272cf2a 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,5 +33,6 @@ typedef struct FLACDSPContext {
 
 void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
 void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
+void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int bps);
 
 #endif /* AVCODEC_FLACDSP_H */
diff --git a/libavcodec/flacdsp_lpc_template.c b/libavcodec/flacdsp_lpc_template.c
index 269e64b..acdac04 100644
--- a/libavcodec/flacdsp_lpc_template.c
+++ b/libavcodec/flacdsp_lpc_template.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -139,3 +139,21 @@ static void FUNC(flac_lpc_encode_c)(int32_t *res, const int32_t *smp, int len,
     }
 #endif
 }
+
+/* Comment for clarity/de-obfuscation.
+ *
+ * for (int i = order; i < len; i++) {
+ *     int32_t p = 0;
+ *     for (int j = 0; j < order; j++) {
+ *         int c = coefs[j];
+ *         int s = smp[(i-1)-j];
+ *         p    += c*s;
+ *     }
+ *     res[i] = smp[i] - (p >> shift);
+ * }
+ *
+ * The CONFIG_SMALL code above simplifies to this, in the case of SAMPLE_SIZE
+ * not being equal to 32 (at the present time that means for 16-bit audio). The
+ * code above does 2 samples per iteration.  Commit bfdd5bc ( made all the way
+ * back in 2007) says that way is faster.
+ */
diff --git a/libavcodec/flacdsp_template.c b/libavcodec/flacdsp_template.c
index 0affe22..62c0a15 100644
--- a/libavcodec/flacdsp_template.c
+++ b/libavcodec/flacdsp_template.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 1160da2..3a3b2ae 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -2,30 +2,31 @@
  * FLAC audio encoder
  * Copyright (c) 2006  Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/crc.h"
 #include "libavutil/intmath.h"
 #include "libavutil/md5.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
 #include "bswapdsp.h"
-#include "get_bits.h"
+#include "put_bits.h"
 #include "golomb.h"
 #include "internal.h"
 #include "lpc.h"
@@ -156,7 +157,7 @@ static int select_blocksize(int samplerate, int block_time_ms)
     int target;
     int blocksize;
 
-    assert(samplerate > 0);
+    av_assert0(samplerate > 0);
     blocksize = ff_flac_blocksize_table[1];
     target    = (samplerate * block_time_ms) / 1000;
     for (i = 0; i < 16; i++) {
@@ -250,8 +251,11 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
         break;
     }
 
-    if (channels < 1 || channels > FLAC_MAX_CHANNELS)
-        return -1;
+    if (channels < 1 || channels > FLAC_MAX_CHANNELS) {
+        av_log(avctx, AV_LOG_ERROR, "%d channels not supported (max %d)\n",
+               channels, FLAC_MAX_CHANNELS);
+        return AVERROR(EINVAL);
+    }
     s->channels = channels;
 
     /* find samplerate in table */
@@ -277,7 +281,8 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
             s->sr_code[0] = 13;
             s->sr_code[1] = freq;
         } else {
-            return -1;
+            av_log(avctx, AV_LOG_ERROR, "%d Hz not supported\n", freq);
+            return AVERROR(EINVAL);
         }
         s->samplerate = freq;
     }
@@ -292,7 +297,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
     if (level > 12) {
         av_log(avctx, AV_LOG_ERROR, "invalid compression level: %d\n",
                s->options.compression_level);
-        return -1;
+        return AVERROR(EINVAL);
     }
 
     s->options.block_time_ms = ((int[]){ 27, 27, 27,105,105,105,105,105,105,105,105,105,105})[level];
@@ -331,13 +336,13 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
             if (avctx->min_prediction_order > MAX_FIXED_ORDER) {
                 av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
                        avctx->min_prediction_order);
-                return -1;
+                return AVERROR(EINVAL);
             }
         } else if (avctx->min_prediction_order < MIN_LPC_ORDER ||
                    avctx->min_prediction_order > MAX_LPC_ORDER) {
             av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n",
                    avctx->min_prediction_order);
-            return -1;
+            return AVERROR(EINVAL);
         }
         s->options.min_prediction_order = avctx->min_prediction_order;
     }
@@ -348,20 +353,20 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
             if (avctx->max_prediction_order > MAX_FIXED_ORDER) {
                 av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
                        avctx->max_prediction_order);
-                return -1;
+                return AVERROR(EINVAL);
             }
         } else if (avctx->max_prediction_order < MIN_LPC_ORDER ||
                    avctx->max_prediction_order > MAX_LPC_ORDER) {
             av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n",
                    avctx->max_prediction_order);
-            return -1;
+            return AVERROR(EINVAL);
         }
         s->options.max_prediction_order = avctx->max_prediction_order;
     }
     if (s->options.max_prediction_order < s->options.min_prediction_order) {
         av_log(avctx, AV_LOG_ERROR, "invalid prediction orders: min=%d max=%d\n",
                s->options.min_prediction_order, s->options.max_prediction_order);
-        return -1;
+        return AVERROR(EINVAL);
     }
 
     if (avctx->frame_size > 0) {
@@ -369,7 +374,7 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
                 avctx->frame_size > FLAC_MAX_BLOCKSIZE) {
             av_log(avctx, AV_LOG_ERROR, "invalid block size: %d\n",
                    avctx->frame_size);
-            return -1;
+            return AVERROR(EINVAL);
         }
     } else {
         s->avctx->frame_size = select_blocksize(s->samplerate, s->options.block_time_ms);
@@ -397,6 +402,28 @@ static av_cold int flac_encode_init(AVCodecContext *avctx)
     s->frame_count   = 0;
     s->min_framesize = s->max_framesize;
 
+    if (channels == 3 &&
+            avctx->channel_layout != (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER) ||
+        channels == 4 &&
+            avctx->channel_layout != AV_CH_LAYOUT_2_2 &&
+            avctx->channel_layout != AV_CH_LAYOUT_QUAD ||
+        channels == 5 &&
+            avctx->channel_layout != AV_CH_LAYOUT_5POINT0 &&
+            avctx->channel_layout != AV_CH_LAYOUT_5POINT0_BACK ||
+        channels == 6 &&
+            avctx->channel_layout != AV_CH_LAYOUT_5POINT1 &&
+            avctx->channel_layout != AV_CH_LAYOUT_5POINT1_BACK) {
+        if (avctx->channel_layout) {
+            av_log(avctx, AV_LOG_ERROR, "Channel layout not supported by Flac, "
+                                             "output stream will have incorrect "
+                                             "channel layout.\n");
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The encoder "
+                                               "will use Flac channel layout for "
+                                               "%d channels.\n", channels);
+        }
+    }
+
     ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
                       s->options.max_prediction_order, FF_LPC_TYPE_LEVINSON);
 
@@ -619,13 +646,13 @@ static uint64_t calc_rice_params(RiceContext *rc, int pmin, int pmax,
     uint32_t *udata;
     uint64_t sums[MAX_PARTITION_ORDER+1][MAX_PARTITIONS];
 
-    assert(pmin >= 0 && pmin <= MAX_PARTITION_ORDER);
-    assert(pmax >= 0 && pmax <= MAX_PARTITION_ORDER);
-    assert(pmin <= pmax);
+    av_assert1(pmin >= 0 && pmin <= MAX_PARTITION_ORDER);
+    av_assert1(pmax >= 0 && pmax <= MAX_PARTITION_ORDER);
+    av_assert1(pmin <= pmax);
 
     tmp_rc.coding_mode = rc->coding_mode;
 
-    udata = av_malloc(n * sizeof(uint32_t));
+    udata = av_malloc_array(n,  sizeof(uint32_t));
     for (i = 0; i < n; i++)
         udata[i] = (2*data[i]) ^ (data[i]>>31);
 
@@ -1260,10 +1287,8 @@ static int flac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         }
     }
 
-    if ((ret = ff_alloc_packet(avpkt, frame_bytes))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, frame_bytes)) < 0)
         return ret;
-    }
 
     out_bytes = write_frame(s, avpkt);
 
@@ -1310,7 +1335,7 @@ static const AVOption options[] = {
 { "fixed",    NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LPC_TYPE_FIXED },    INT_MIN, INT_MAX, FLAGS, "lpc_type" },
 { "levinson", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LPC_TYPE_LEVINSON }, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
 { "cholesky", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LPC_TYPE_CHOLESKY }, INT_MIN, INT_MAX, FLAGS, "lpc_type" },
-{ "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes),  AV_OPT_TYPE_INT, {.i64 = 1 }, 1, INT_MAX, FLAGS },
+{ "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes),  AV_OPT_TYPE_INT, {.i64 = 2 }, 1, INT_MAX, FLAGS },
 { "min_partition_order",  NULL, offsetof(FlacEncodeContext, options.min_partition_order),  AV_OPT_TYPE_INT, {.i64 = -1 },      -1, MAX_PARTITION_ORDER, FLAGS },
 { "max_partition_order",  NULL, offsetof(FlacEncodeContext, options.max_partition_order),  AV_OPT_TYPE_INT, {.i64 = -1 },      -1, MAX_PARTITION_ORDER, FLAGS },
 { "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), AV_OPT_TYPE_INT, {.i64 = -1 }, -1, ORDER_METHOD_LOG, FLAGS, "predm" },
@@ -1345,7 +1370,7 @@ AVCodec ff_flac_encoder = {
     .init           = flac_encode_init,
     .encode2        = flac_encode_frame,
     .close          = flac_encode_close,
-    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
+    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_LOSSLESS,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_S32,
                                                      AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c
index de7979c..8791a2d 100644
--- a/libavcodec/flashsv.c
+++ b/libavcodec/flashsv.c
@@ -3,20 +3,20 @@
  * Copyright (C) 2004 Alex Beregszaszi
  * Copyright (C) 2006 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -69,7 +69,7 @@ typedef struct FlashSVContext {
     int             diff_start, diff_height;
 } FlashSVContext;
 
-static int decode_hybrid(const uint8_t *sptr, uint8_t *dptr, int dx, int dy,
+static int decode_hybrid(const uint8_t *sptr, const uint8_t *sptr_end, uint8_t *dptr, int dx, int dy,
                          int h, int w, int stride, const uint32_t *pal)
 {
     int x, y;
@@ -78,6 +78,8 @@ static int decode_hybrid(const uint8_t *sptr, uint8_t *dptr, int dx, int dy,
     for (y = dx + h; y > dx; y--) {
         uint8_t *dst = dptr + (y * stride) + dy * 3;
         for (x = 0; x < w; x++) {
+            if (sptr >= sptr_end)
+                return AVERROR_INVALIDDATA;
             if (*sptr & 0x80) {
                 /* 15-bit color */
                 unsigned c = AV_RB16(sptr) & ~0x8000;
@@ -107,7 +109,7 @@ static av_cold int flashsv_decode_end(AVCodecContext *avctx)
     av_frame_free(&s->frame);
 
     /* free the tmpblock */
-    av_free(s->tmpblock);
+    av_freep(&s->tmpblock);
 
     return 0;
 }
@@ -142,6 +144,9 @@ static int flashsv2_prime(FlashSVContext *s, uint8_t *src, int size)
     z_stream zs;
     int zret; // Zlib return code
 
+    if (!src)
+        return AVERROR_INVALIDDATA;
+
     zs.zalloc = NULL;
     zs.zfree  = NULL;
     zs.opaque = NULL;
@@ -152,7 +157,8 @@ static int flashsv2_prime(FlashSVContext *s, uint8_t *src, int size)
     s->zstream.avail_out = s->block_size * 3;
     inflate(&s->zstream, Z_SYNC_FLUSH);
 
-    deflateInit(&zs, 0);
+    if (deflateInit(&zs, 0) != Z_OK)
+        return -1;
     zs.next_in   = s->tmpblock;
     zs.avail_in  = s->block_size * 3 - s->zstream.avail_out;
     zs.next_out  = s->deflate_block;
@@ -228,10 +234,15 @@ static int flashsv_decode_block(AVCodecContext *avctx, AVPacket *avpkt,
         }
     } else {
         /* hybrid 15-bit/palette mode */
-        decode_hybrid(s->tmpblock, s->frame->data[0],
+        ret = decode_hybrid(s->tmpblock, s->zstream.next_out,
+                      s->frame->data[0],
                       s->image_height - (y_pos + 1 + s->diff_height),
                       x_pos, s->diff_height, width,
                       s->frame->linesize[0], s->pal);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "decode_hybrid failed\n");
+            return ret;
+        }
     }
     skip_bits_long(gb, 8 * block_size); /* skip the consumed bits */
     return 0;
@@ -260,6 +271,8 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
     FlashSVContext *s = avctx->priv_data;
     int h_blocks, v_blocks, h_part, v_part, i, j, ret;
     GetBitContext gb;
+    int last_blockwidth = s->block_width;
+    int last_blockheight= s->block_height;
 
     /* no supplementary picture */
     if (buf_size == 0)
@@ -267,7 +280,8 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
     if (buf_size < 4)
         return -1;
 
-    init_get_bits(&gb, avpkt->data, buf_size * 8);
+    if ((ret = init_get_bits8(&gb, avpkt->data, buf_size)) < 0)
+        return ret;
 
     /* start to parse the bitstream */
     s->block_width  = 16 * (get_bits(&gb, 4) + 1);
@@ -275,6 +289,10 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
     s->block_height = 16 * (get_bits(&gb, 4) + 1);
     s->image_height = get_bits(&gb, 12);
 
+    if (   last_blockwidth != s->block_width
+        || last_blockheight!= s->block_height)
+        av_freep(&s->blocks);
+
     if (s->ver == 2) {
         skip_bits(&gb, 6);
         if (get_bits1(&gb)) {
@@ -322,8 +340,8 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
 
     /* initialize the image size once */
     if (avctx->width == 0 && avctx->height == 0) {
-        avctx->width  = s->image_width;
-        avctx->height = s->image_height;
+        if ((ret = ff_set_dimensions(avctx, s->image_width, s->image_height)) < 0)
+            return ret;
     }
 
     /* check for changes of image width and image height */
@@ -342,19 +360,17 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
         if ((err = av_reallocp(&s->keyframedata, avpkt->size)) < 0)
             return err;
         memcpy(s->keyframedata, avpkt->data, avpkt->size);
-        if ((err = av_reallocp(&s->blocks, (v_blocks + !!v_part) *
-                               (h_blocks + !!h_part) * sizeof(s->blocks[0]))) < 0)
-            return err;
     }
+    if(s->ver == 2 && !s->blocks)
+        s->blocks = av_mallocz((v_blocks + !!v_part) * (h_blocks + !!h_part) *
+                               sizeof(s->blocks[0]));
 
     av_dlog(avctx, "image: %dx%d block: %dx%d num: %dx%d part: %dx%d\n",
             s->image_width, s->image_height, s->block_width, s->block_height,
             h_blocks, v_blocks, h_part, v_part);
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     /* loop over all block columns */
     for (j = 0; j < v_blocks + (v_part ? 1 : 0); j++) {
diff --git a/libavcodec/flashsv2enc.c b/libavcodec/flashsv2enc.c
new file mode 100644
index 0000000..436daa4
--- /dev/null
+++ b/libavcodec/flashsv2enc.c
@@ -0,0 +1,920 @@
+/*
+ * Flash Screen Video Version 2 encoder
+ * Copyright (C) 2009 Joshua Warner
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Flash Screen Video Version 2 encoder
+ * @author Joshua Warner
+ */
+
+/* Differences from version 1 stream:
+ * NOTE: Currently, the only player that supports version 2 streams is Adobe Flash Player itself.
+ * * Supports sending only a range of scanlines in a block,
+ *   indicating a difference from the corresponding block in the last keyframe.
+ * * Supports initializing the zlib dictionary with data from the corresponding
+ *   block in the last keyframe, to improve compression.
+ * * Supports a hybrid 15-bit rgb / 7-bit palette color space.
+ */
+
+/* TODO:
+ * Don't keep Block structures for both current frame and keyframe.
+ * Make better heuristics for deciding stream parameters (optimum_* functions).  Currently these return constants.
+ * Figure out how to encode palette information in the stream, choose an optimum palette at each keyframe.
+ * Figure out how the zlibPrimeCompressCurrent flag works, implement support.
+ * Find other sample files (that weren't generated here), develop a decoder.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <zlib.h>
+
+#include "libavutil/imgutils.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "put_bits.h"
+#include "bytestream.h"
+
+#define HAS_IFRAME_IMAGE 0x02
+#define HAS_PALLET_INFO 0x01
+
+#define COLORSPACE_BGR 0x00
+#define COLORSPACE_15_7 0x10
+#define HAS_DIFF_BLOCKS 0x04
+#define ZLIB_PRIME_COMPRESS_CURRENT 0x02
+#define ZLIB_PRIME_COMPRESS_PREVIOUS 0x01
+
+// Disables experimental "smart" parameter-choosing code, as well as the statistics that it depends on.
+// At the moment, the "smart" code is a great example of how the parameters *shouldn't* be chosen.
+#define FLASHSV2_DUMB
+
+typedef struct Block {
+    uint8_t *enc;
+    uint8_t *sl_begin, *sl_end;
+    int enc_size;
+    uint8_t *data;
+    unsigned long data_size;
+
+    uint8_t start, len;
+    uint8_t dirty;
+    uint8_t col, row, width, height;
+    uint8_t flags;
+} Block;
+
+typedef struct Palette {
+    unsigned colors[128];
+    uint8_t index[1 << 15];
+} Palette;
+
+typedef struct FlashSV2Context {
+    AVCodecContext *avctx;
+    uint8_t *current_frame;
+    uint8_t *key_frame;
+    uint8_t *encbuffer;
+    uint8_t *keybuffer;
+    uint8_t *databuffer;
+
+    uint8_t *blockbuffer;
+    int blockbuffer_size;
+
+    Block *frame_blocks;
+    Block *key_blocks;
+    int frame_size;
+    int blocks_size;
+
+    int use15_7, dist, comp;
+
+    int rows, cols;
+
+    int last_key_frame;
+
+    int image_width, image_height;
+    int block_width, block_height;
+    uint8_t flags;
+    uint8_t use_custom_palette;
+    uint8_t palette_type;       ///< 0=>default, 1=>custom - changed when palette regenerated.
+    Palette palette;
+#ifndef FLASHSV2_DUMB
+    double tot_blocks;          ///< blocks encoded since last keyframe
+    double diff_blocks;         ///< blocks that were different since last keyframe
+    double tot_lines;           ///< total scanlines in image since last keyframe
+    double diff_lines;          ///< scanlines that were different since last keyframe
+    double raw_size;            ///< size of raw frames since last keyframe
+    double comp_size;           ///< size of compressed data since last keyframe
+    double uncomp_size;         ///< size of uncompressed data since last keyframe
+
+    double total_bits;          ///< total bits written to stream so far
+#endif
+} FlashSV2Context;
+
+static av_cold void cleanup(FlashSV2Context * s)
+{
+    av_freep(&s->encbuffer);
+    av_freep(&s->keybuffer);
+    av_freep(&s->databuffer);
+    av_freep(&s->blockbuffer);
+    av_freep(&s->current_frame);
+    av_freep(&s->key_frame);
+
+    av_freep(&s->frame_blocks);
+    av_freep(&s->key_blocks);
+}
+
+static void init_blocks(FlashSV2Context * s, Block * blocks,
+                        uint8_t * encbuf, uint8_t * databuf)
+{
+    int row, col;
+    Block *b;
+    for (col = 0; col < s->cols; col++) {
+        for (row = 0; row < s->rows; row++) {
+            b = blocks + (col + row * s->cols);
+            b->width = (col < s->cols - 1) ?
+                s->block_width :
+                s->image_width - col * s->block_width;
+
+            b->height = (row < s->rows - 1) ?
+                s->block_height :
+                s->image_height - row * s->block_height;
+
+            b->row   = row;
+            b->col   = col;
+            b->enc   = encbuf;
+            b->data  = databuf;
+            encbuf  += b->width * b->height * 3;
+            databuf += !databuf ? 0 : b->width * b->height * 6;
+        }
+    }
+}
+
+static void reset_stats(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    s->diff_blocks = 0.1;
+    s->tot_blocks = 1;
+    s->diff_lines = 0.1;
+    s->tot_lines = 1;
+    s->raw_size = s->comp_size = s->uncomp_size = 10;
+#endif
+}
+
+static av_cold int flashsv2_encode_init(AVCodecContext * avctx)
+{
+    FlashSV2Context *s = avctx->priv_data;
+
+    s->avctx = avctx;
+
+    s->comp = avctx->compression_level;
+    if (s->comp == -1)
+        s->comp = 9;
+    if (s->comp < 0 || s->comp > 9) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Compression level should be 0-9, not %d\n", s->comp);
+        return -1;
+    }
+
+
+    if ((avctx->width > 4095) || (avctx->height > 4095)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Input dimensions too large, input must be max 4096x4096 !\n");
+        return -1;
+    }
+    if ((avctx->width < 16) || (avctx->height < 16)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Input dimensions too small, input must be at least 16x16 !\n");
+        return -1;
+    }
+
+    if (av_image_check_size(avctx->width, avctx->height, 0, avctx) < 0)
+        return -1;
+
+
+    s->last_key_frame = 0;
+
+    s->image_width  = avctx->width;
+    s->image_height = avctx->height;
+
+    s->block_width  = (s->image_width /  12) & ~15;
+    s->block_height = (s->image_height / 12) & ~15;
+
+    if(!s->block_width)
+        s->block_width = 1;
+    if(!s->block_height)
+        s->block_height = 1;
+
+    s->rows = (s->image_height + s->block_height - 1) / s->block_height;
+    s->cols = (s->image_width +  s->block_width -  1) / s->block_width;
+
+    s->frame_size  = s->image_width * s->image_height * 3;
+    s->blocks_size = s->rows * s->cols * sizeof(Block);
+
+    s->encbuffer     = av_mallocz(s->frame_size);
+    s->keybuffer     = av_mallocz(s->frame_size);
+    s->databuffer    = av_mallocz(s->frame_size * 6);
+    s->current_frame = av_mallocz(s->frame_size);
+    s->key_frame     = av_mallocz(s->frame_size);
+    s->frame_blocks  = av_mallocz(s->blocks_size);
+    s->key_blocks    = av_mallocz(s->blocks_size);
+
+    s->blockbuffer      = NULL;
+    s->blockbuffer_size = 0;
+
+    init_blocks(s, s->frame_blocks, s->encbuffer, s->databuffer);
+    init_blocks(s, s->key_blocks,   s->keybuffer, 0);
+    reset_stats(s);
+#ifndef FLASHSV2_DUMB
+    s->total_bits = 1;
+#endif
+
+    s->use_custom_palette =  0;
+    s->palette_type       = -1;        // so that the palette will be generated in reconfigure_at_keyframe
+
+    if (!s->encbuffer || !s->keybuffer || !s->databuffer
+        || !s->current_frame || !s->key_frame || !s->key_blocks
+        || !s->frame_blocks) {
+        av_log(avctx, AV_LOG_ERROR, "Memory allocation failed.\n");
+        cleanup(s);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int new_key_frame(FlashSV2Context * s)
+{
+    int i;
+    memcpy(s->key_blocks, s->frame_blocks, s->blocks_size);
+    memcpy(s->key_frame, s->current_frame, s->frame_size);
+
+    for (i = 0; i < s->rows * s->cols; i++) {
+        s->key_blocks[i].enc += (s->keybuffer - s->encbuffer);
+        s->key_blocks[i].sl_begin = 0;
+        s->key_blocks[i].sl_end   = 0;
+        s->key_blocks[i].data     = 0;
+    }
+    memcpy(s->keybuffer, s->encbuffer, s->frame_size);
+
+    return 0;
+}
+
+static int write_palette(FlashSV2Context * s, uint8_t * buf, int buf_size)
+{
+    //this isn't implemented yet!  Default palette only!
+    return -1;
+}
+
+static int write_header(FlashSV2Context * s, uint8_t * buf, int buf_size)
+{
+    PutBitContext pb;
+    int buf_pos, len;
+
+    if (buf_size < 5)
+        return -1;
+
+    init_put_bits(&pb, buf, buf_size * 8);
+
+    put_bits(&pb, 4, (s->block_width  >> 4) - 1);
+    put_bits(&pb, 12, s->image_width);
+    put_bits(&pb, 4, (s->block_height >> 4) - 1);
+    put_bits(&pb, 12, s->image_height);
+
+    flush_put_bits(&pb);
+    buf_pos = 4;
+
+    buf[buf_pos++] = s->flags;
+
+    if (s->flags & HAS_PALLET_INFO) {
+        len = write_palette(s, buf + buf_pos, buf_size - buf_pos);
+        if (len < 0)
+            return -1;
+        buf_pos += len;
+    }
+
+    return buf_pos;
+}
+
+static int write_block(Block * b, uint8_t * buf, int buf_size)
+{
+    int buf_pos = 0;
+    unsigned block_size = b->data_size;
+
+    if (b->flags & HAS_DIFF_BLOCKS)
+        block_size += 2;
+    if (b->flags & ZLIB_PRIME_COMPRESS_CURRENT)
+        block_size += 2;
+    if (block_size > 0)
+        block_size += 1;
+    if (buf_size < block_size + 2)
+        return -1;
+
+    buf[buf_pos++] = block_size >> 8;
+    buf[buf_pos++] = block_size;
+
+    if (block_size == 0)
+        return buf_pos;
+
+    buf[buf_pos++] = b->flags;
+
+    if (b->flags & HAS_DIFF_BLOCKS) {
+        buf[buf_pos++] = (b->start);
+        buf[buf_pos++] = (b->len);
+    }
+
+    if (b->flags & ZLIB_PRIME_COMPRESS_CURRENT) {
+        //This feature of the format is poorly understood, and as of now, unused.
+        buf[buf_pos++] = (b->col);
+        buf[buf_pos++] = (b->row);
+    }
+
+    memcpy(buf + buf_pos, b->data, b->data_size);
+
+    buf_pos += b->data_size;
+
+    return buf_pos;
+}
+
+static int encode_zlib(Block * b, uint8_t * buf, unsigned long *buf_size, int comp)
+{
+    int res = compress2(buf, buf_size, b->sl_begin, b->sl_end - b->sl_begin, comp);
+    return res == Z_OK ? 0 : -1;
+}
+
+static int encode_zlibprime(Block * b, Block * prime, uint8_t * buf,
+                            int *buf_size, int comp)
+{
+    z_stream s;
+    int res;
+    s.zalloc = NULL;
+    s.zfree  = NULL;
+    s.opaque = NULL;
+    res = deflateInit(&s, comp);
+    if (res < 0)
+        return -1;
+
+    s.next_in  = prime->enc;
+    s.avail_in = prime->enc_size;
+    while (s.avail_in > 0) {
+        s.next_out  = buf;
+        s.avail_out = *buf_size;
+        res = deflate(&s, Z_SYNC_FLUSH);
+        if (res < 0)
+            return -1;
+    }
+
+    s.next_in   = b->sl_begin;
+    s.avail_in  = b->sl_end - b->sl_begin;
+    s.next_out  = buf;
+    s.avail_out = *buf_size;
+    res = deflate(&s, Z_FINISH);
+    deflateEnd(&s);
+    *buf_size -= s.avail_out;
+    if (res != Z_STREAM_END)
+        return -1;
+    return 0;
+}
+
+static int encode_bgr(Block * b, const uint8_t * src, int stride)
+{
+    int i;
+    uint8_t *ptr = b->enc;
+    for (i = 0; i < b->start; i++)
+        memcpy(ptr + i * b->width * 3, src + i * stride, b->width * 3);
+    b->sl_begin = ptr + i * b->width * 3;
+    for (; i < b->start + b->len; i++)
+        memcpy(ptr + i * b->width * 3, src + i * stride, b->width * 3);
+    b->sl_end = ptr + i * b->width * 3;
+    for (; i < b->height; i++)
+        memcpy(ptr + i * b->width * 3, src + i * stride, b->width * 3);
+    b->enc_size = ptr + i * b->width * 3 - b->enc;
+    return b->enc_size;
+}
+
+static inline unsigned pixel_color15(const uint8_t * src)
+{
+    return (src[0] >> 3) | ((src[1] & 0xf8) << 2) | ((src[2] & 0xf8) << 7);
+}
+
+static inline unsigned int chroma_diff(unsigned int c1, unsigned int c2)
+{
+    unsigned int t1 = (c1 & 0x000000ff) + ((c1 & 0x0000ff00) >> 8) + ((c1 & 0x00ff0000) >> 16);
+    unsigned int t2 = (c2 & 0x000000ff) + ((c2 & 0x0000ff00) >> 8) + ((c2 & 0x00ff0000) >> 16);
+
+    return abs(t1 - t2) + abs((c1 & 0x000000ff) - (c2 & 0x000000ff)) +
+        abs(((c1 & 0x0000ff00) >> 8) - ((c2 & 0x0000ff00) >> 8)) +
+        abs(((c1 & 0x00ff0000) >> 16) - ((c2 & 0x00ff0000) >> 16));
+}
+
+static inline int pixel_color7_fast(Palette * palette, unsigned c15)
+{
+    return palette->index[c15];
+}
+
+static int pixel_color7_slow(Palette * palette, unsigned color)
+{
+    int i, min = 0x7fffffff;
+    int minc = -1;
+    for (i = 0; i < 128; i++) {
+        int c1 = palette->colors[i];
+        int diff = chroma_diff(c1, color);
+        if (diff < min) {
+            min = diff;
+            minc = i;
+        }
+    }
+    return minc;
+}
+
+static inline unsigned pixel_bgr(const uint8_t * src)
+{
+    return (src[0]) | (src[1] << 8) | (src[2] << 16);
+}
+
+static int write_pixel_15_7(Palette * palette, uint8_t * dest, const uint8_t * src,
+                            int dist)
+{
+    unsigned c15 = pixel_color15(src);
+    unsigned color = pixel_bgr(src);
+    int d15 = chroma_diff(color, color & 0x00f8f8f8);
+    int c7 = pixel_color7_fast(palette, c15);
+    int d7 = chroma_diff(color, palette->colors[c7]);
+    if (dist + d15 >= d7) {
+        dest[0] = c7;
+        return 1;
+    } else {
+        dest[0] = 0x80 | (c15 >> 8);
+        dest[1] = c15 & 0xff;
+        return 2;
+    }
+}
+
+static int update_palette_index(Palette * palette)
+{
+    int r, g, b;
+    unsigned int bgr, c15, index;
+    for (r = 4; r < 256; r += 8) {
+        for (g = 4; g < 256; g += 8) {
+            for (b = 4; b < 256; b += 8) {
+                bgr = b | (g << 8) | (r << 16);
+                c15 = (b >> 3) | ((g & 0xf8) << 2) | ((r & 0xf8) << 7);
+                index = pixel_color7_slow(palette, bgr);
+
+                palette->index[c15] = index;
+            }
+        }
+    }
+    return 0;
+}
+
+static const unsigned int default_screen_video_v2_palette[128] = {
+    0x00000000, 0x00333333, 0x00666666, 0x00999999, 0x00CCCCCC, 0x00FFFFFF,
+    0x00330000, 0x00660000, 0x00990000, 0x00CC0000, 0x00FF0000, 0x00003300,
+    0x00006600, 0x00009900, 0x0000CC00, 0x0000FF00, 0x00000033, 0x00000066,
+    0x00000099, 0x000000CC, 0x000000FF, 0x00333300, 0x00666600, 0x00999900,
+    0x00CCCC00, 0x00FFFF00, 0x00003333, 0x00006666, 0x00009999, 0x0000CCCC,
+    0x0000FFFF, 0x00330033, 0x00660066, 0x00990099, 0x00CC00CC, 0x00FF00FF,
+    0x00FFFF33, 0x00FFFF66, 0x00FFFF99, 0x00FFFFCC, 0x00FF33FF, 0x00FF66FF,
+    0x00FF99FF, 0x00FFCCFF, 0x0033FFFF, 0x0066FFFF, 0x0099FFFF, 0x00CCFFFF,
+    0x00CCCC33, 0x00CCCC66, 0x00CCCC99, 0x00CCCCFF, 0x00CC33CC, 0x00CC66CC,
+    0x00CC99CC, 0x00CCFFCC, 0x0033CCCC, 0x0066CCCC, 0x0099CCCC, 0x00FFCCCC,
+    0x00999933, 0x00999966, 0x009999CC, 0x009999FF, 0x00993399, 0x00996699,
+    0x0099CC99, 0x0099FF99, 0x00339999, 0x00669999, 0x00CC9999, 0x00FF9999,
+    0x00666633, 0x00666699, 0x006666CC, 0x006666FF, 0x00663366, 0x00669966,
+    0x0066CC66, 0x0066FF66, 0x00336666, 0x00996666, 0x00CC6666, 0x00FF6666,
+    0x00333366, 0x00333399, 0x003333CC, 0x003333FF, 0x00336633, 0x00339933,
+    0x0033CC33, 0x0033FF33, 0x00663333, 0x00993333, 0x00CC3333, 0x00FF3333,
+    0x00003366, 0x00336600, 0x00660033, 0x00006633, 0x00330066, 0x00663300,
+    0x00336699, 0x00669933, 0x00993366, 0x00339966, 0x00663399, 0x00996633,
+    0x006699CC, 0x0099CC66, 0x00CC6699, 0x0066CC99, 0x009966CC, 0x00CC9966,
+    0x0099CCFF, 0x00CCFF99, 0x00FF99CC, 0x0099FFCC, 0x00CC99FF, 0x00FFCC99,
+    0x00111111, 0x00222222, 0x00444444, 0x00555555, 0x00AAAAAA, 0x00BBBBBB,
+    0x00DDDDDD, 0x00EEEEEE
+};
+
+static int generate_default_palette(Palette * palette)
+{
+    memcpy(palette->colors, default_screen_video_v2_palette,
+           sizeof(default_screen_video_v2_palette));
+
+    return update_palette_index(palette);
+}
+
+static int generate_optimum_palette(Palette * palette, const uint8_t * image,
+                                   int width, int height, int stride)
+{
+    //this isn't implemented yet!  Default palette only!
+    return -1;
+}
+
+static inline int encode_15_7_sl(Palette * palette, uint8_t * dest,
+                                 const uint8_t * src, int width, int dist)
+{
+    int len = 0, x;
+    for (x = 0; x < width; x++) {
+        len += write_pixel_15_7(palette, dest + len, src + 3 * x, dist);
+    }
+    return len;
+}
+
+static int encode_15_7(Palette * palette, Block * b, const uint8_t * src,
+                       int stride, int dist)
+{
+    int i;
+    uint8_t *ptr = b->enc;
+    for (i = 0; i < b->start; i++)
+        ptr += encode_15_7_sl(palette, ptr, src + i * stride, b->width, dist);
+    b->sl_begin = ptr;
+    for (; i < b->start + b->len; i++)
+        ptr += encode_15_7_sl(palette, ptr, src + i * stride, b->width, dist);
+    b->sl_end = ptr;
+    for (; i < b->height; i++)
+        ptr += encode_15_7_sl(palette, ptr, src + i * stride, b->width, dist);
+    b->enc_size = ptr - b->enc;
+    return b->enc_size;
+}
+
+static int encode_block(FlashSV2Context *s, Palette * palette, Block * b,
+                        Block * prev, const uint8_t * src, int stride, int comp,
+                        int dist, int keyframe)
+{
+    unsigned buf_size = b->width * b->height * 6;
+    uint8_t *buf = s->blockbuffer;
+    int res;
+
+    if (b->flags & COLORSPACE_15_7) {
+        encode_15_7(palette, b, src, stride, dist);
+    } else {
+        encode_bgr(b, src, stride);
+    }
+
+    if (b->len > 0) {
+        b->data_size = buf_size;
+        res = encode_zlib(b, b->data, &b->data_size, comp);
+        if (res)
+            return res;
+
+        if (!keyframe) {
+            res = encode_zlibprime(b, prev, buf, &buf_size, comp);
+            if (res)
+                return res;
+
+            if (buf_size < b->data_size) {
+                b->data_size = buf_size;
+                memcpy(b->data, buf, buf_size);
+                b->flags |= ZLIB_PRIME_COMPRESS_PREVIOUS;
+            }
+        }
+    } else {
+        b->data_size = 0;
+    }
+    return 0;
+}
+
+static int compare_sl(FlashSV2Context * s, Block * b, const uint8_t * src,
+                      uint8_t * frame, uint8_t * key, int y, int keyframe)
+{
+    if (memcmp(src, frame, b->width * 3) != 0) {
+        b->dirty = 1;
+        memcpy(frame, src, b->width * 3);
+#ifndef FLASHSV2_DUMB
+        s->diff_lines++;
+#endif
+    }
+    if (memcmp(src, key, b->width * 3) != 0) {
+        if (b->len == 0)
+            b->start = y;
+        b->len = y + 1 - b->start;
+    }
+    return 0;
+}
+
+static int mark_all_blocks(FlashSV2Context * s, const uint8_t * src, int stride,
+                           int keyframe)
+{
+    int sl, rsl, col, pos, possl;
+    Block *b;
+    for (sl = s->image_height - 1; sl >= 0; sl--) {
+        for (col = 0; col < s->cols; col++) {
+            rsl = s->image_height - sl - 1;
+            b = s->frame_blocks + col + rsl / s->block_height * s->cols;
+            possl = stride * sl + col * s->block_width * 3;
+            pos = s->image_width * rsl * 3 + col * s->block_width * 3;
+            compare_sl(s, b, src + possl, s->current_frame + pos,
+                       s->key_frame + pos, rsl % s->block_height, keyframe);
+        }
+    }
+#ifndef FLASHSV2_DUMB
+    s->tot_lines += s->image_height * s->cols;
+#endif
+    return 0;
+}
+
+static int encode_all_blocks(FlashSV2Context * s, int keyframe)
+{
+    int row, col, res;
+    uint8_t *data;
+    Block *b, *prev;
+    for (row = 0; row < s->rows; row++) {
+        for (col = 0; col < s->cols; col++) {
+            b = s->frame_blocks + (row * s->cols + col);
+            prev = s->key_blocks + (row * s->cols + col);
+            b->flags = s->use15_7 ? COLORSPACE_15_7 : 0;
+            if (keyframe) {
+                b->start = 0;
+                b->len = b->height;
+            } else if (!b->dirty) {
+                b->start = 0;
+                b->len = 0;
+                b->data_size = 0;
+                continue;
+            } else if (b->start != 0 || b->len != b->height) {
+                b->flags |= HAS_DIFF_BLOCKS;
+            }
+            data = s->current_frame + s->image_width * 3 * s->block_height * row + s->block_width * col * 3;
+            res = encode_block(s, &s->palette, b, prev, data, s->image_width * 3, s->comp, s->dist, keyframe);
+#ifndef FLASHSV2_DUMB
+            if (b->dirty)
+                s->diff_blocks++;
+            s->comp_size += b->data_size;
+            s->uncomp_size += b->enc_size;
+#endif
+            if (res)
+                return res;
+        }
+    }
+#ifndef FLASHSV2_DUMB
+    s->raw_size += s->image_width * s->image_height * 3;
+    s->tot_blocks += s->rows * s->cols;
+#endif
+    return 0;
+}
+
+static int write_all_blocks(FlashSV2Context * s, uint8_t * buf,
+                            int buf_size)
+{
+    int row, col, buf_pos = 0, len;
+    Block *b;
+    for (row = 0; row < s->rows; row++) {
+        for (col = 0; col < s->cols; col++) {
+            b = s->frame_blocks + row * s->cols + col;
+            len = write_block(b, buf + buf_pos, buf_size - buf_pos);
+            b->start = b->len = b->dirty = 0;
+            if (len < 0)
+                return len;
+            buf_pos += len;
+        }
+    }
+    return buf_pos;
+}
+
+static int write_bitstream(FlashSV2Context * s, const uint8_t * src, int stride,
+                           uint8_t * buf, int buf_size, int keyframe)
+{
+    int buf_pos, res;
+
+    res = mark_all_blocks(s, src, stride, keyframe);
+    if (res)
+        return res;
+    res = encode_all_blocks(s, keyframe);
+    if (res)
+        return res;
+
+    res = write_header(s, buf, buf_size);
+    if (res < 0) {
+        return res;
+    } else {
+        buf_pos = res;
+    }
+    res = write_all_blocks(s, buf + buf_pos, buf_size - buf_pos);
+    if (res < 0)
+        return res;
+    buf_pos += res;
+#ifndef FLASHSV2_DUMB
+    s->total_bits += ((double) buf_pos) * 8.0;
+#endif
+
+    return buf_pos;
+}
+
+static void recommend_keyframe(FlashSV2Context * s, int *keyframe)
+{
+#ifndef FLASHSV2_DUMB
+    double block_ratio, line_ratio, enc_ratio, comp_ratio, data_ratio;
+    if (s->avctx->gop_size > 0) {
+        block_ratio = s->diff_blocks / s->tot_blocks;
+        line_ratio = s->diff_lines / s->tot_lines;
+        enc_ratio = s->uncomp_size / s->raw_size;
+        comp_ratio = s->comp_size / s->uncomp_size;
+        data_ratio = s->comp_size / s->raw_size;
+
+        if ((block_ratio >= 0.5 && line_ratio / block_ratio <= 0.5) || line_ratio >= 0.95) {
+            *keyframe = 1;
+            return;
+        }
+    }
+#else
+    return;
+#endif
+}
+
+static const double block_size_fraction = 1.0 / 300;
+static int optimum_block_width(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    double save = (1-pow(s->diff_lines/s->diff_blocks/s->block_height, 0.5)) * s->comp_size/s->tot_blocks;
+    double width = block_size_fraction * sqrt(0.5 * save * s->rows * s->cols) * s->image_width;
+    int pwidth = ((int) width);
+    return FFCLIP(pwidth & ~15, 256, 16);
+#else
+    return 64;
+#endif
+}
+
+static int optimum_block_height(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    double save = (1-pow(s->diff_lines/s->diff_blocks/s->block_height, 0.5)) * s->comp_size/s->tot_blocks;
+    double height = block_size_fraction * sqrt(0.5 * save * s->rows * s->cols) * s->image_height;
+    int pheight = ((int) height);
+    return FFCLIP(pheight & ~15, 256, 16);
+#else
+    return 64;
+#endif
+}
+
+static const double use15_7_threshold = 8192;
+
+static int optimum_use15_7(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    double ideal = ((double)(s->avctx->bit_rate * s->avctx->time_base.den * s->avctx->ticks_per_frame)) /
+        ((double) s->avctx->time_base.num) * s->avctx->frame_number;
+    if (ideal + use15_7_threshold < s->total_bits) {
+        return 1;
+    } else {
+        return 0;
+    }
+#else
+    return s->avctx->global_quality == 0;
+#endif
+}
+
+static const double color15_7_factor = 100;
+
+static int optimum_dist(FlashSV2Context * s)
+{
+#ifndef FLASHSV2_DUMB
+    double ideal =
+        s->avctx->bit_rate * s->avctx->time_base.den *
+        s->avctx->ticks_per_frame;
+    int dist = pow((s->total_bits / ideal) * color15_7_factor, 3);
+    av_log(s->avctx, AV_LOG_DEBUG, "dist: %d\n", dist);
+    return dist;
+#else
+    return 15;
+#endif
+}
+
+
+static int reconfigure_at_keyframe(FlashSV2Context * s, const uint8_t * image,
+                                   int stride)
+{
+    int update_palette = 0;
+    int res;
+    int block_width  = optimum_block_width (s);
+    int block_height = optimum_block_height(s);
+
+    s->rows = (s->image_height + block_height - 1) / block_height;
+    s->cols = (s->image_width  + block_width  - 1) / block_width;
+
+    if (block_width != s->block_width || block_height != s->block_height) {
+        s->block_width  = block_width;
+        s->block_height = block_height;
+        if (s->rows * s->cols > s->blocks_size / sizeof(Block)) {
+            s->frame_blocks = av_realloc(s->frame_blocks, s->rows * s->cols * sizeof(Block));
+            s->key_blocks = av_realloc(s->key_blocks, s->cols * s->rows * sizeof(Block));
+            if (!s->frame_blocks || !s->key_blocks) {
+                av_log(s->avctx, AV_LOG_ERROR, "Memory allocation failed.\n");
+                return -1;
+            }
+            s->blocks_size = s->rows * s->cols * sizeof(Block);
+        }
+        init_blocks(s, s->frame_blocks, s->encbuffer, s->databuffer);
+        init_blocks(s, s->key_blocks, s->keybuffer, 0);
+
+        av_fast_malloc(&s->blockbuffer, &s->blockbuffer_size, block_width * block_height * 6);
+        if (!s->blockbuffer) {
+            av_log(s->avctx, AV_LOG_ERROR, "Could not allocate block buffer.\n");
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    s->use15_7 = optimum_use15_7(s);
+    if (s->use15_7) {
+        if ((s->use_custom_palette && s->palette_type != 1) || update_palette) {
+            res = generate_optimum_palette(&s->palette, image, s->image_width, s->image_height, stride);
+            if (res)
+                return res;
+            s->palette_type = 1;
+            av_log(s->avctx, AV_LOG_DEBUG, "Generated optimum palette\n");
+        } else if (!s->use_custom_palette && s->palette_type != 0) {
+            res = generate_default_palette(&s->palette);
+            if (res)
+                return res;
+            s->palette_type = 0;
+            av_log(s->avctx, AV_LOG_DEBUG, "Generated default palette\n");
+        }
+    }
+
+
+    reset_stats(s);
+
+    return 0;
+}
+
+static int flashsv2_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                                 const AVFrame *p, int *got_packet)
+{
+    FlashSV2Context *const s = avctx->priv_data;
+    int res;
+    int keyframe = 0;
+
+    if ((res = ff_alloc_packet2(avctx, pkt, s->frame_size + FF_MIN_BUFFER_SIZE)) < 0)
+        return res;
+
+    /* First frame needs to be a keyframe */
+    if (avctx->frame_number == 0)
+        keyframe = 1;
+
+    /* Check the placement of keyframes */
+    if (avctx->gop_size > 0) {
+        if (avctx->frame_number >= s->last_key_frame + avctx->gop_size)
+            keyframe = 1;
+    }
+
+    if (!keyframe
+        && avctx->frame_number > s->last_key_frame + avctx->keyint_min) {
+        recommend_keyframe(s, &keyframe);
+        if (keyframe)
+            av_log(avctx, AV_LOG_DEBUG, "Recommending key frame at frame %d\n", avctx->frame_number);
+    }
+
+    if (keyframe) {
+        res = reconfigure_at_keyframe(s, p->data[0], p->linesize[0]);
+        if (res)
+            return res;
+    }
+
+    if (s->use15_7)
+        s->dist = optimum_dist(s);
+
+    res = write_bitstream(s, p->data[0], p->linesize[0], pkt->data, pkt->size, keyframe);
+
+    if (keyframe) {
+        new_key_frame(s);
+        s->last_key_frame = avctx->frame_number;
+        pkt->flags |= AV_PKT_FLAG_KEY;
+        av_log(avctx, AV_LOG_DEBUG, "Inserting key frame at frame %d\n", avctx->frame_number);
+    }
+
+    pkt->size = res;
+    *got_packet = 1;
+
+    return 0;
+}
+
+static av_cold int flashsv2_encode_end(AVCodecContext * avctx)
+{
+    FlashSV2Context *s = avctx->priv_data;
+
+    cleanup(s);
+
+    return 0;
+}
+
+AVCodec ff_flashsv2_encoder = {
+    .name           = "flashsv2",
+    .long_name      = NULL_IF_CONFIG_SMALL("Flash Screen Video Version 2"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_FLASHSV2,
+    .priv_data_size = sizeof(FlashSV2Context),
+    .init           = flashsv2_encode_init,
+    .encode2        = flashsv2_encode_frame,
+    .close          = flashsv2_encode_end,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_BGR24, AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/flashsvenc.c b/libavcodec/flashsvenc.c
index 71c81bd..7ad15f1 100644
--- a/libavcodec/flashsvenc.c
+++ b/libavcodec/flashsvenc.c
@@ -3,20 +3,20 @@
  * Copyright (C) 2004 Alex Beregszaszi
  * Copyright (C) 2006 Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -246,12 +246,8 @@ static int flashsv_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         I_frame = 1;
     }
 
-    if ((res = ff_alloc_packet(pkt, s->image_width * s->image_height * 3)) < 0) {
-        //Conservative upper bound check for compressed data
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n",
-               s->image_width * s->image_height * 3);
+    if ((res = ff_alloc_packet2(avctx, pkt, s->image_width * s->image_height * 3)) < 0)
         return res;
-    }
 
     pkt->size = encode_bitstream(s, p, pkt->data, pkt->size, opt_w * 16, opt_h * 16,
                                  pfptr, &I_frame);
diff --git a/libavcodec/flicvideo.c b/libavcodec/flicvideo.c
index 68f45b4..a2d59e8 100644
--- a/libavcodec/flicvideo.c
+++ b/libavcodec/flicvideo.c
@@ -2,20 +2,20 @@
  * FLI/FLC Animation Video Decoder
  * Copyright (C) 2003, 2004 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -64,7 +64,7 @@
 
 #define CHECK_PIXEL_PTR(n) \
     if (pixel_ptr + n > pixel_limit) { \
-        av_log (s->avctx, AV_LOG_INFO, "Problem: pixel_ptr >= pixel_limit (%d >= %d)\n", \
+        av_log (s->avctx, AV_LOG_ERROR, "Invalid pixel_ptr = %d > pixel_limit = %d\n", \
         pixel_ptr + n, pixel_limit); \
         return AVERROR_INVALIDDATA; \
     } \
@@ -84,22 +84,40 @@ static av_cold int flic_decode_init(AVCodecContext *avctx)
     unsigned char *fli_header = (unsigned char *)avctx->extradata;
     int depth;
 
-    if (avctx->extradata_size != 12 &&
-        avctx->extradata_size != 128) {
-        av_log(avctx, AV_LOG_ERROR, "Expected extradata of 12 or 128 bytes\n");
+    if (avctx->extradata_size != 0 &&
+        avctx->extradata_size != 12 &&
+        avctx->extradata_size != 128 &&
+        avctx->extradata_size != 256 &&
+        avctx->extradata_size != 904 &&
+        avctx->extradata_size != 1024) {
+        av_log(avctx, AV_LOG_ERROR, "Unexpected extradata size %d\n", avctx->extradata_size);
         return AVERROR_INVALIDDATA;
     }
 
     s->avctx = avctx;
 
-    s->fli_type = AV_RL16(&fli_header[4]); /* Might be overridden if a Magic Carpet FLC */
-
-    depth = 0;
     if (s->avctx->extradata_size == 12) {
         /* special case for magic carpet FLIs */
         s->fli_type = FLC_MAGIC_CARPET_SYNTHETIC_TYPE_CODE;
         depth = 8;
+    } else if (avctx->extradata_size == 1024) {
+        uint8_t *ptr = avctx->extradata;
+        int i;
+
+        for (i = 0; i < 256; i++) {
+            s->palette[i] = AV_RL32(ptr);
+            ptr += 4;
+        }
+        depth = 8;
+        /* FLI in MOV, see e.g. FFmpeg trac issue #626 */
+    } else if (avctx->extradata_size == 0 ||
+               avctx->extradata_size == 256 ||
+        /* see FFmpeg ticket #1234 */
+               avctx->extradata_size == 904) {
+        s->fli_type = FLI_TYPE_CODE;
+        depth = 8;
     } else {
+        s->fli_type = AV_RL16(&fli_header[4]);
         depth = AV_RL16(&fli_header[12]);
     }
 
@@ -116,7 +134,7 @@ static av_cold int flic_decode_init(AVCodecContext *avctx)
         case 15 : avctx->pix_fmt = AV_PIX_FMT_RGB555; break;
         case 16 : avctx->pix_fmt = AV_PIX_FMT_RGB565; break;
         case 24 : avctx->pix_fmt = AV_PIX_FMT_BGR24; /* Supposedly BGR, but havent any files to test with */
-                  av_log(avctx, AV_LOG_ERROR, "24Bpp FLC/FLX is unsupported due to no test files.\n");
+                  avpriv_request_sample(avctx, "24Bpp FLC/FLX");
                   return AVERROR_PATCHWELCOME;
         default :
                   av_log(avctx, AV_LOG_ERROR, "Unknown FLC/FLX depth of %d Bpp is unsupported.\n",depth);
@@ -139,7 +157,6 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
     FlicDecodeContext *s = avctx->priv_data;
 
     GetByteContext g2;
-    int stream_ptr_after_color_chunk;
     int pixel_ptr;
     int palette_ptr;
     unsigned char palette_idx1;
@@ -171,14 +188,16 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
 
     bytestream2_init(&g2, buf, buf_size);
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     pixels = s->frame->data[0];
     pixel_limit = s->avctx->height * s->frame->linesize[0];
+    if (buf_size < 16 || buf_size > INT_MAX - (3 * 256 + FF_INPUT_BUFFER_PADDING_SIZE))
+        return AVERROR_INVALIDDATA;
     frame_size = bytestream2_get_le32(&g2);
+    if (frame_size > buf_size)
+        frame_size = buf_size;
     bytestream2_skip(&g2, 2); /* skip the magic number */
     num_chunks = bytestream2_get_le16(&g2);
     bytestream2_skip(&g2, 8);  /* skip padding */
@@ -186,15 +205,22 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
     frame_size -= 16;
 
     /* iterate through the chunks */
-    while ((frame_size > 0) && (num_chunks > 0)) {
+    while ((frame_size >= 6) && (num_chunks > 0) &&
+            bytestream2_get_bytes_left(&g2) >= 4) {
+        int stream_ptr_after_chunk;
         chunk_size = bytestream2_get_le32(&g2);
+        if (chunk_size > frame_size) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "Invalid chunk_size = %u > frame_size = %u\n", chunk_size, frame_size);
+            chunk_size = frame_size;
+        }
+        stream_ptr_after_chunk = bytestream2_tell(&g2) - 4 + chunk_size;
+
         chunk_type = bytestream2_get_le16(&g2);
 
         switch (chunk_type) {
         case FLI_256_COLOR:
         case FLI_COLOR:
-            stream_ptr_after_color_chunk = bytestream2_tell(&g2) + chunk_size - 6;
-
             /* check special case: If this file is from the Magic Carpet
              * game and uses 6-bit colors even though it reports 256-color
              * chunks in a 0xAF12-type file (fli_type is set to 0xAF13 during
@@ -217,6 +243,9 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                 if (color_changes == 0)
                     color_changes = 256;
 
+                if (bytestream2_tell(&g2) + color_changes * 3 > stream_ptr_after_chunk)
+                    break;
+
                 for (j = 0; j < color_changes; j++) {
                     unsigned int entry;
 
@@ -227,26 +256,22 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                     r = bytestream2_get_byte(&g2) << color_shift;
                     g = bytestream2_get_byte(&g2) << color_shift;
                     b = bytestream2_get_byte(&g2) << color_shift;
-                    entry = (r << 16) | (g << 8) | b;
+                    entry = 0xFFU << 24 | r << 16 | g << 8 | b;
+                    if (color_shift == 2)
+                        entry |= entry >> 6 & 0x30303;
                     if (s->palette[palette_ptr] != entry)
                         s->new_palette = 1;
                     s->palette[palette_ptr++] = entry;
                 }
             }
-
-            /* color chunks sometimes have weird 16-bit alignment issues;
-             * therefore, take the hardline approach and skip
-             * to the value calculated w.r.t. the size specified by the color
-             * chunk header */
-            if (stream_ptr_after_color_chunk - bytestream2_tell(&g2) > 0)
-                bytestream2_skip(&g2, stream_ptr_after_color_chunk - bytestream2_tell(&g2));
-
             break;
 
         case FLI_DELTA:
             y_ptr = 0;
             compressed_lines = bytestream2_get_le16(&g2);
             while (compressed_lines > 0) {
+                if (bytestream2_tell(&g2) + 2 > stream_ptr_after_chunk)
+                    break;
                 line_packets = bytestream2_get_le16(&g2);
                 if ((line_packets & 0xC000) == 0xC000) {
                     // line skip opcode
@@ -265,6 +290,8 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                     CHECK_PIXEL_PTR(0);
                     pixel_countdown = s->avctx->width;
                     for (i = 0; i < line_packets; i++) {
+                        if (bytestream2_tell(&g2) + 2 > stream_ptr_after_chunk)
+                            break;
                         /* account for the skip bytes */
                         pixel_skip = bytestream2_get_byte(&g2);
                         pixel_ptr += pixel_skip;
@@ -281,6 +308,8 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                             }
                         } else {
                             CHECK_PIXEL_PTR(byte_run * 2);
+                            if (bytestream2_tell(&g2) + byte_run * 2 > stream_ptr_after_chunk)
+                                break;
                             for (j = 0; j < byte_run * 2; j++, pixel_countdown--) {
                                 pixels[pixel_ptr++] = bytestream2_get_byte(&g2);
                             }
@@ -303,16 +332,22 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                 pixel_ptr = y_ptr;
                 CHECK_PIXEL_PTR(0);
                 pixel_countdown = s->avctx->width;
+                if (bytestream2_tell(&g2) + 1 > stream_ptr_after_chunk)
+                    break;
                 line_packets = bytestream2_get_byte(&g2);
                 if (line_packets > 0) {
                     for (i = 0; i < line_packets; i++) {
                         /* account for the skip bytes */
+                        if (bytestream2_tell(&g2) + 1 > stream_ptr_after_chunk)
+                            break;
                         pixel_skip = bytestream2_get_byte(&g2);
                         pixel_ptr += pixel_skip;
                         pixel_countdown -= pixel_skip;
                         byte_run = sign_extend(bytestream2_get_byte(&g2),8);
                         if (byte_run > 0) {
                             CHECK_PIXEL_PTR(byte_run);
+                            if (bytestream2_tell(&g2) + byte_run > stream_ptr_after_chunk)
+                                break;
                             for (j = 0; j < byte_run; j++, pixel_countdown--) {
                                 pixels[pixel_ptr++] = bytestream2_get_byte(&g2);
                             }
@@ -349,6 +384,8 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                  bytestream2_skip(&g2, 1);
                 pixel_countdown = s->avctx->width;
                 while (pixel_countdown > 0) {
+                    if (bytestream2_tell(&g2) + 1 > stream_ptr_after_chunk)
+                        break;
                     byte_run = sign_extend(bytestream2_get_byte(&g2), 8);
                     if (!byte_run) {
                         av_log(avctx, AV_LOG_ERROR, "Invalid byte run value.\n");
@@ -368,6 +405,8 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
                     } else {  /* copy bytes if byte_run < 0 */
                         byte_run = -byte_run;
                         CHECK_PIXEL_PTR(byte_run);
+                        if (bytestream2_tell(&g2) + byte_run > stream_ptr_after_chunk)
+                            break;
                         for (j = 0; j < byte_run; j++) {
                             pixels[pixel_ptr++] = bytestream2_get_byte(&g2);
                             pixel_countdown--;
@@ -384,9 +423,9 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
 
         case FLI_COPY:
             /* copy the chunk (uncompressed frame) */
-            if (chunk_size - 6 > s->avctx->width * s->avctx->height) {
+            if (chunk_size - 6 != s->avctx->width * s->avctx->height) {
                 av_log(avctx, AV_LOG_ERROR, "In chunk FLI_COPY : source data (%d bytes) " \
-                       "bigger than image, skipping chunk\n", chunk_size - 6);
+                       "has incorrect size, skipping chunk\n", chunk_size - 6);
                 bytestream2_skip(&g2, chunk_size - 6);
             } else {
                 for (y_ptr = 0; y_ptr < s->frame->linesize[0] * s->avctx->height;
@@ -399,7 +438,6 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
 
         case FLI_MINI:
             /* some sort of a thumbnail? disregard this chunk... */
-            bytestream2_skip(&g2, chunk_size - 6);
             break;
 
         default:
@@ -407,14 +445,16 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
             break;
         }
 
+        if (stream_ptr_after_chunk - bytestream2_tell(&g2) > 0)
+            bytestream2_skip(&g2, stream_ptr_after_chunk - bytestream2_tell(&g2));
+
         frame_size -= chunk_size;
         num_chunks--;
     }
 
     /* by the end of the chunk, the stream ptr should equal the frame
-     * size (minus 1, possibly); if it doesn't, issue a warning */
-    if ((bytestream2_get_bytes_left(&g2) != 0) &&
-        (bytestream2_get_bytes_left(&g2) != 1))
+     * size (minus 1 or 2, possibly); if it doesn't, issue a warning */
+    if (bytestream2_get_bytes_left(&g2) > 2)
         av_log(avctx, AV_LOG_ERROR, "Processed FLI chunk where chunk size = %d " \
                "and final chunk ptr = %d\n", buf_size,
                buf_size - bytestream2_get_bytes_left(&g2));
@@ -467,10 +507,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
 
     bytestream2_init(&g2, buf, buf_size);
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     pixels = s->frame->data[0];
     pixel_limit = s->avctx->height * s->frame->linesize[0];
@@ -479,14 +517,26 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
     bytestream2_skip(&g2, 2);  /* skip the magic number */
     num_chunks = bytestream2_get_le16(&g2);
     bytestream2_skip(&g2, 8);  /* skip padding */
+    if (frame_size > buf_size)
+        frame_size = buf_size;
 
     frame_size -= 16;
 
     /* iterate through the chunks */
-    while ((frame_size > 0) && (num_chunks > 0)) {
+    while ((frame_size > 0) && (num_chunks > 0) &&
+            bytestream2_get_bytes_left(&g2) >= 4) {
+        int stream_ptr_after_chunk;
         chunk_size = bytestream2_get_le32(&g2);
+        if (chunk_size > frame_size) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "Invalid chunk_size = %u > frame_size = %u\n", chunk_size, frame_size);
+            chunk_size = frame_size;
+        }
+        stream_ptr_after_chunk = bytestream2_tell(&g2) - 4 + chunk_size;
+
         chunk_type = bytestream2_get_le16(&g2);
 
+
         switch (chunk_type) {
         case FLI_256_COLOR:
         case FLI_COLOR:
@@ -504,6 +554,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
             y_ptr = 0;
             compressed_lines = bytestream2_get_le16(&g2);
             while (compressed_lines > 0) {
+                if (bytestream2_tell(&g2) + 2 > stream_ptr_after_chunk)
+                    break;
                 line_packets = bytestream2_get_le16(&g2);
                 if (line_packets < 0) {
                     line_packets = -line_packets;
@@ -515,6 +567,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                     pixel_countdown = s->avctx->width;
                     for (i = 0; i < line_packets; i++) {
                         /* account for the skip bytes */
+                        if (bytestream2_tell(&g2) + 2 > stream_ptr_after_chunk)
+                            break;
                         pixel_skip = bytestream2_get_byte(&g2);
                         pixel_ptr += (pixel_skip*2); /* Pixel is 2 bytes wide */
                         pixel_countdown -= pixel_skip;
@@ -528,6 +582,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                                 pixel_ptr += 2;
                             }
                         } else {
+                            if (bytestream2_tell(&g2) + 2*byte_run > stream_ptr_after_chunk)
+                                break;
                             CHECK_PIXEL_PTR(2 * byte_run);
                             for (j = 0; j < byte_run; j++, pixel_countdown--) {
                                 *((signed short*)(&pixels[pixel_ptr])) = bytestream2_get_le16(&g2);
@@ -562,6 +618,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                 pixel_countdown = (s->avctx->width * 2);
 
                 while (pixel_countdown > 0) {
+                    if (bytestream2_tell(&g2) + 1 > stream_ptr_after_chunk)
+                        break;
                     byte_run = sign_extend(bytestream2_get_byte(&g2), 8);
                     if (byte_run > 0) {
                         palette_idx1 = bytestream2_get_byte(&g2);
@@ -575,6 +633,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                         }
                     } else {  /* copy bytes if byte_run < 0 */
                         byte_run = -byte_run;
+                        if (bytestream2_tell(&g2) + byte_run > stream_ptr_after_chunk)
+                            break;
                         CHECK_PIXEL_PTR(byte_run);
                         for (j = 0; j < byte_run; j++) {
                             palette_idx1 = bytestream2_get_byte(&g2);
@@ -614,6 +674,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                 pixel_countdown = s->avctx->width; /* Width is in pixels, not bytes */
 
                 while (pixel_countdown > 0) {
+                    if (bytestream2_tell(&g2) + 1 > stream_ptr_after_chunk)
+                        break;
                     byte_run = sign_extend(bytestream2_get_byte(&g2), 8);
                     if (byte_run > 0) {
                         pixel    = bytestream2_get_le16(&g2);
@@ -628,6 +690,8 @@ static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                         }
                     } else {  /* copy pixels if byte_run < 0 */
                         byte_run = -byte_run;
+                        if (bytestream2_tell(&g2) + 2 * byte_run > stream_ptr_after_chunk)
+                            break;
                         CHECK_PIXEL_PTR(2 * byte_run);
                         for (j = 0; j < byte_run; j++) {
                             *((signed short*)(&pixels[pixel_ptr])) = bytestream2_get_le16(&g2);
diff --git a/libavcodec/flv.h b/libavcodec/flv.h
index 3d9a2d5..16bc88b 100644
--- a/libavcodec/flv.h
+++ b/libavcodec/flv.h
@@ -1,19 +1,19 @@
 /*
  * FLV specific private header.
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/flvdec.c b/libavcodec/flvdec.c
index 3405058..3b048f6 100644
--- a/libavcodec/flvdec.c
+++ b/libavcodec/flvdec.c
@@ -1,19 +1,19 @@
 /*
  * FLV decoding.
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -41,12 +41,12 @@ int ff_flv_decode_picture_header(MpegEncContext *s)
     /* picture header */
     if (get_bits_long(&s->gb, 17) != 1) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     format = get_bits(&s->gb, 5);
     if (format != 0 && format != 1) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad picture format\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     s->h263_flv       = format + 1;
     s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */
@@ -85,7 +85,7 @@ int ff_flv_decode_picture_header(MpegEncContext *s)
         break;
     }
     if (av_image_check_size(width, height, 0, s->avctx))
-        return -1;
+        return AVERROR(EINVAL);
     s->width  = width;
     s->height = height;
 
@@ -103,10 +103,14 @@ int ff_flv_decode_picture_header(MpegEncContext *s)
     s->h263_long_vectors = 0;
 
     /* PEI */
-    while (get_bits1(&s->gb) != 0)
-        skip_bits(&s->gb, 8);
+    if (skip_1stop_8data_bits(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
+
     s->f_code = 1;
 
+    if (s->ehc_mode)
+        s->avctx->sample_aspect_ratio= (AVRational){1,2};
+
     if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
         av_log(s->avctx, AV_LOG_DEBUG, "%c esc_type:%d, qp:%d num:%d\n",
                s->droppable ? 'D' : av_get_picture_type_char(s->pict_type),
@@ -128,6 +132,7 @@ AVCodec ff_flv_decoder = {
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
     .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE
diff --git a/libavcodec/flvenc.c b/libavcodec/flvenc.c
index fbdb23d..9421955 100644
--- a/libavcodec/flvenc.c
+++ b/libavcodec/flvenc.c
@@ -1,19 +1,19 @@
 /*
  * FLV Encoding specific code.
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index 63f62c3..fb4302c 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000, 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -98,4 +98,36 @@ av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
     if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx);
     if (ARCH_PPC) ff_fmt_convert_init_ppc(c, avctx);
     if (ARCH_X86) ff_fmt_convert_init_x86(c, avctx);
+    if (HAVE_MIPSFPU) ff_fmt_convert_init_mips(c);
+}
+
+/* ffdshow custom code */
+void float_interleave(float *dst, const float **src, long len, int channels)
+{
+    int i,j,c;
+    if(channels==2){
+        for(i=0; i<len; i++){
+            dst[2*i]   = src[0][i] / 32768.0f;
+            dst[2*i+1] = src[1][i] / 32768.0f;
+        }
+    }else{
+        for(c=0; c<channels; c++)
+            for(i=0, j=c; i<len; i++, j+=channels)
+                dst[j] = src[c][i] / 32768.0f;
+    }
+}
+
+void float_interleave_noscale(float *dst, const float **src, long len, int channels)
+{
+    int i,j,c;
+    if(channels==2){
+        for(i=0; i<len; i++){
+            dst[2*i]   = src[0][i];
+            dst[2*i+1] = src[1][i];
+        }
+    }else{
+        for(c=0; c<channels; c++)
+            for(i=0, j=c; i<len; i++, j+=channels)
+                dst[j] = src[c][i];
+    }
 }
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index bd833ef..30abcc3 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2000, 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -109,5 +109,10 @@ void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_ppc(FmtConvertContext *c, AVCodecContext *avctx);
 void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx);
+void ff_fmt_convert_init_mips(FmtConvertContext *c);
+
+/* ffdshow custom code */
+void float_interleave(float *dst, const float **src, long len, int channels);
+void float_interleave_noscale(float *dst, const float **src, long len, int channels);
 
 #endif /* AVCODEC_FMTCONVERT_H */
diff --git a/libavcodec/frame_thread_encoder.c b/libavcodec/frame_thread_encoder.c
new file mode 100644
index 0000000..9e17698
--- /dev/null
+++ b/libavcodec/frame_thread_encoder.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright (c) 2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "frame_thread_encoder.h"
+
+#include "libavutil/fifo.h"
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "thread.h"
+
+#if HAVE_PTHREADS
+#include <pthread.h>
+#elif HAVE_W32THREADS
+#include "compat/w32pthreads.h"
+#elif HAVE_OS2THREADS
+#include "compat/os2threads.h"
+#endif
+
+#define MAX_THREADS 64
+#define BUFFER_SIZE (2*MAX_THREADS)
+
+typedef struct{
+    void *indata;
+    void *outdata;
+    int64_t return_code;
+    unsigned index;
+} Task;
+
+typedef struct{
+    AVCodecContext *parent_avctx;
+    pthread_mutex_t buffer_mutex;
+
+    AVFifoBuffer *task_fifo;
+    pthread_mutex_t task_fifo_mutex;
+    pthread_cond_t task_fifo_cond;
+
+    Task finished_tasks[BUFFER_SIZE];
+    pthread_mutex_t finished_task_mutex;
+    pthread_cond_t finished_task_cond;
+
+    unsigned task_index;
+    unsigned finished_task_index;
+
+    pthread_t worker[MAX_THREADS];
+    int exit;
+} ThreadContext;
+
+static void * attribute_align_arg worker(void *v){
+    AVCodecContext *avctx = v;
+    ThreadContext *c = avctx->internal->frame_thread_encoder;
+    AVPacket *pkt = NULL;
+
+    while(!c->exit){
+        int got_packet, ret;
+        AVFrame *frame;
+        Task task;
+
+        if(!pkt) pkt= av_mallocz(sizeof(*pkt));
+        if(!pkt) continue;
+        av_init_packet(pkt);
+
+        pthread_mutex_lock(&c->task_fifo_mutex);
+        while (av_fifo_size(c->task_fifo) <= 0 || c->exit) {
+            if(c->exit){
+                pthread_mutex_unlock(&c->task_fifo_mutex);
+                goto end;
+            }
+            pthread_cond_wait(&c->task_fifo_cond, &c->task_fifo_mutex);
+        }
+        av_fifo_generic_read(c->task_fifo, &task, sizeof(task), NULL);
+        pthread_mutex_unlock(&c->task_fifo_mutex);
+        frame = task.indata;
+
+        ret = avcodec_encode_video2(avctx, pkt, frame, &got_packet);
+        pthread_mutex_lock(&c->buffer_mutex);
+        av_frame_unref(frame);
+        pthread_mutex_unlock(&c->buffer_mutex);
+        av_frame_free(&frame);
+        if(got_packet) {
+            av_dup_packet(pkt);
+        } else {
+            pkt->data = NULL;
+            pkt->size = 0;
+        }
+        pthread_mutex_lock(&c->finished_task_mutex);
+        c->finished_tasks[task.index].outdata = pkt; pkt = NULL;
+        c->finished_tasks[task.index].return_code = ret;
+        pthread_cond_signal(&c->finished_task_cond);
+        pthread_mutex_unlock(&c->finished_task_mutex);
+    }
+end:
+    av_free(pkt);
+    pthread_mutex_lock(&c->buffer_mutex);
+    avcodec_close(avctx);
+    pthread_mutex_unlock(&c->buffer_mutex);
+    av_freep(&avctx);
+    return NULL;
+}
+
+int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options){
+    int i=0;
+    ThreadContext *c;
+
+
+    if(   !(avctx->thread_type & FF_THREAD_FRAME)
+       || !(avctx->codec->capabilities & CODEC_CAP_INTRA_ONLY))
+        return 0;
+
+    if(   !avctx->thread_count
+       && avctx->codec_id == AV_CODEC_ID_MJPEG
+       && !(avctx->flags & CODEC_FLAG_QSCALE)) {
+        av_log(avctx, AV_LOG_DEBUG,
+               "Forcing thread count to 1 for MJPEG encoding, use -thread_type slice "
+               "or a constant quantizer if you want to use multiple cpu cores\n");
+        avctx->thread_count = 1;
+    }
+    if(   avctx->thread_count > 1
+       && avctx->codec_id == AV_CODEC_ID_MJPEG
+       && !(avctx->flags & CODEC_FLAG_QSCALE))
+        av_log(avctx, AV_LOG_WARNING,
+               "MJPEG CBR encoding works badly with frame multi-threading, consider "
+               "using -threads 1, -thread_type slice or a constant quantizer.\n");
+
+    if (avctx->codec_id == AV_CODEC_ID_HUFFYUV ||
+        avctx->codec_id == AV_CODEC_ID_FFVHUFF) {
+        // huffyuv does not support these with multiple frame threads currently
+        if (avctx->context_model > 0 || (avctx->flags & CODEC_FLAG_PASS1)) {
+            av_log(avctx, AV_LOG_WARNING,
+               "Forcing thread count to 1 for huffyuv encoding with first pass or context 1\n");
+            avctx->thread_count = 1;
+        }
+    }
+
+    if(!avctx->thread_count) {
+        avctx->thread_count = av_cpu_count();
+        avctx->thread_count = FFMIN(avctx->thread_count, MAX_THREADS);
+    }
+
+    if(avctx->thread_count <= 1)
+        return 0;
+
+    if(avctx->thread_count > MAX_THREADS)
+        return AVERROR(EINVAL);
+
+    av_assert0(!avctx->internal->frame_thread_encoder);
+    c = avctx->internal->frame_thread_encoder = av_mallocz(sizeof(ThreadContext));
+    if(!c)
+        return AVERROR(ENOMEM);
+
+    c->parent_avctx = avctx;
+
+    c->task_fifo = av_fifo_alloc_array(BUFFER_SIZE, sizeof(Task));
+    if(!c->task_fifo)
+        goto fail;
+
+    pthread_mutex_init(&c->task_fifo_mutex, NULL);
+    pthread_mutex_init(&c->finished_task_mutex, NULL);
+    pthread_mutex_init(&c->buffer_mutex, NULL);
+    pthread_cond_init(&c->task_fifo_cond, NULL);
+    pthread_cond_init(&c->finished_task_cond, NULL);
+
+    for(i=0; i<avctx->thread_count ; i++){
+        AVDictionary *tmp = NULL;
+        void *tmpv;
+        AVCodecContext *thread_avctx = avcodec_alloc_context3(avctx->codec);
+        if(!thread_avctx)
+            goto fail;
+        tmpv = thread_avctx->priv_data;
+        *thread_avctx = *avctx;
+        thread_avctx->priv_data = tmpv;
+        thread_avctx->internal = NULL;
+        memcpy(thread_avctx->priv_data, avctx->priv_data, avctx->codec->priv_data_size);
+        thread_avctx->thread_count = 1;
+        thread_avctx->active_thread_type &= ~FF_THREAD_FRAME;
+
+        av_dict_copy(&tmp, options, 0);
+        av_dict_set(&tmp, "threads", "1", 0);
+        if(avcodec_open2(thread_avctx, avctx->codec, &tmp) < 0) {
+            av_dict_free(&tmp);
+            goto fail;
+        }
+        av_dict_free(&tmp);
+        av_assert0(!thread_avctx->internal->frame_thread_encoder);
+        thread_avctx->internal->frame_thread_encoder = c;
+        if(pthread_create(&c->worker[i], NULL, worker, thread_avctx)) {
+            goto fail;
+        }
+    }
+
+    avctx->active_thread_type = FF_THREAD_FRAME;
+
+    return 0;
+fail:
+    avctx->thread_count = i;
+    av_log(avctx, AV_LOG_ERROR, "ff_frame_thread_encoder_init failed\n");
+    ff_frame_thread_encoder_free(avctx);
+    return -1;
+}
+
+void ff_frame_thread_encoder_free(AVCodecContext *avctx){
+    int i;
+    ThreadContext *c= avctx->internal->frame_thread_encoder;
+
+    pthread_mutex_lock(&c->task_fifo_mutex);
+    c->exit = 1;
+    pthread_cond_broadcast(&c->task_fifo_cond);
+    pthread_mutex_unlock(&c->task_fifo_mutex);
+
+    for (i=0; i<avctx->thread_count; i++) {
+         pthread_join(c->worker[i], NULL);
+    }
+
+    pthread_mutex_destroy(&c->task_fifo_mutex);
+    pthread_mutex_destroy(&c->finished_task_mutex);
+    pthread_mutex_destroy(&c->buffer_mutex);
+    pthread_cond_destroy(&c->task_fifo_cond);
+    pthread_cond_destroy(&c->finished_task_cond);
+    av_fifo_freep(&c->task_fifo);
+    av_freep(&avctx->internal->frame_thread_encoder);
+}
+
+int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet_ptr){
+    ThreadContext *c = avctx->internal->frame_thread_encoder;
+    Task task;
+    int ret;
+
+    av_assert1(!*got_packet_ptr);
+
+    if(frame){
+        if(!(avctx->flags & CODEC_FLAG_INPUT_PRESERVED)){
+            AVFrame *new = av_frame_alloc();
+            if(!new)
+                return AVERROR(ENOMEM);
+            pthread_mutex_lock(&c->buffer_mutex);
+            ret = ff_get_buffer(c->parent_avctx, new, 0);
+            pthread_mutex_unlock(&c->buffer_mutex);
+            if(ret<0)
+                return ret;
+            new->pts = frame->pts;
+            new->quality = frame->quality;
+            new->pict_type = frame->pict_type;
+            av_image_copy(new->data, new->linesize, (const uint8_t **)frame->data, frame->linesize,
+                          avctx->pix_fmt, avctx->width, avctx->height);
+            frame = new;
+        }
+
+        task.index = c->task_index;
+        task.indata = (void*)frame;
+        pthread_mutex_lock(&c->task_fifo_mutex);
+        av_fifo_generic_write(c->task_fifo, &task, sizeof(task), NULL);
+        pthread_cond_signal(&c->task_fifo_cond);
+        pthread_mutex_unlock(&c->task_fifo_mutex);
+
+        c->task_index = (c->task_index+1) % BUFFER_SIZE;
+
+        if(!c->finished_tasks[c->finished_task_index].outdata && (c->task_index - c->finished_task_index) % BUFFER_SIZE <= avctx->thread_count)
+            return 0;
+    }
+
+    if(c->task_index == c->finished_task_index)
+        return 0;
+
+    pthread_mutex_lock(&c->finished_task_mutex);
+    while (!c->finished_tasks[c->finished_task_index].outdata) {
+        pthread_cond_wait(&c->finished_task_cond, &c->finished_task_mutex);
+    }
+    task = c->finished_tasks[c->finished_task_index];
+    *pkt = *(AVPacket*)(task.outdata);
+    if(pkt->data)
+        *got_packet_ptr = 1;
+    av_freep(&c->finished_tasks[c->finished_task_index].outdata);
+    c->finished_task_index = (c->finished_task_index+1) % BUFFER_SIZE;
+    pthread_mutex_unlock(&c->finished_task_mutex);
+
+    return task.return_code;
+}
diff --git a/libavcodec/frame_thread_encoder.h b/libavcodec/frame_thread_encoder.h
new file mode 100644
index 0000000..1da0ce1
--- /dev/null
+++ b/libavcodec/frame_thread_encoder.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+
+int ff_frame_thread_encoder_init(AVCodecContext *avctx, AVDictionary *options);
+void ff_frame_thread_encoder_free(AVCodecContext *avctx);
+int ff_thread_video_encode_frame(AVCodecContext *avctx, AVPacket *pkt, const AVFrame *frame, int *got_packet_ptr);
+
diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c
index 4b4b02c..c49866e 100644
--- a/libavcodec/fraps.c
+++ b/libavcodec/fraps.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2005 Roine Gustafsson
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,8 +37,10 @@
 #include "bytestream.h"
 #include "bswapdsp.h"
 #include "internal.h"
+#include "thread.h"
 
 #define FPS_TAG MKTAG('F', 'P', 'S', 'x')
+#define VLC_BITS 11
 
 /**
  * local variable storage
@@ -46,7 +48,6 @@
 typedef struct FrapsContext {
     AVCodecContext *avctx;
     BswapDSPContext bdsp;
-    AVFrame *frame;
     uint8_t *tmpbuf;
     int tmpbuf_size;
 } FrapsContext;
@@ -61,15 +62,9 @@ static av_cold int decode_init(AVCodecContext *avctx)
 {
     FrapsContext * const s = avctx->priv_data;
 
-    avctx->pix_fmt     = AV_PIX_FMT_NONE; /* set in decode_frame */
-
     s->avctx  = avctx;
     s->tmpbuf = NULL;
 
-    s->frame = av_frame_alloc();
-    if (!s->frame)
-        return AVERROR(ENOMEM);
-
     ff_bswapdsp_init(&s->bdsp);
 
     return 0;
@@ -100,7 +95,8 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w,
     for (i = 0; i < 256; i++)
         nodes[i].count = bytestream_get_le32(&src);
     size -= 1024;
-    if ((ret = ff_huff_build_tree(s->avctx, &vlc, 256, nodes, huff_cmp,
+    if ((ret = ff_huff_build_tree(s->avctx, &vlc, 256, VLC_BITS,
+                                  nodes, huff_cmp,
                                   FF_HUFFMAN_FLAG_ZERO_COUNT)) < 0)
         return ret;
     /* we have built Huffman table and are ready to decode plane */
@@ -112,7 +108,7 @@ static int fraps2_decode_plane(FrapsContext *s, uint8_t *dst, int stride, int w,
     init_get_bits(&gb, s->tmpbuf, size * 8);
     for (j = 0; j < h; j++) {
         for (i = 0; i < w*step; i += step) {
-            dst[i] = get_vlc2(&gb, vlc.table, 9, 3);
+            dst[i] = get_vlc2(&gb, vlc.table, VLC_BITS, 3);
             /* lines are stored as deltas between previous lines
              * and we need to add 0x80 to the first lines of chroma planes
              */
@@ -138,17 +134,17 @@ static int decode_frame(AVCodecContext *avctx,
     FrapsContext * const s = avctx->priv_data;
     const uint8_t *buf     = avpkt->data;
     int buf_size           = avpkt->size;
-    AVFrame *frame         = data;
-    AVFrame * const f      = s->frame;
+    ThreadFrame frame = { .f = data };
+    AVFrame * const f = data;
     uint32_t header;
     unsigned int version,header_size;
     unsigned int x, y;
     const uint32_t *buf32;
     uint32_t *luma1,*luma2,*cb,*cr;
     uint32_t offs[4];
-    int i, j, ret, is_chroma, planes;
-    enum AVPixelFormat pix_fmt;
-    int prev_pic_bit, expected_size;
+    int i, j, ret, is_chroma;
+    const int planes = 3;
+    uint8_t *out;
 
     if (buf_size < 4) {
         av_log(avctx, AV_LOG_ERROR, "Packet is too short\n");
@@ -158,7 +154,6 @@ static int decode_frame(AVCodecContext *avctx,
     header      = AV_RL32(buf);
     version     = header & 0xff;
     header_size = (header & (1<<30))? 8 : 4; /* bit 30 means pad to 8 bytes */
-    prev_pic_bit = header & (1U << 31); /* bit 31 means same as previous pic */
 
     if (version > 5) {
         av_log(avctx, AV_LOG_ERROR,
@@ -167,89 +162,92 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR_PATCHWELCOME;
     }
 
-    buf += 4;
-    if (header_size == 8)
-        buf += 4;
+    buf += header_size;
 
-    pix_fmt = version & 1 ? AV_PIX_FMT_BGR24 : AV_PIX_FMT_YUVJ420P;
-    if (avctx->pix_fmt != pix_fmt && f->data[0]) {
-        av_frame_unref(f);
+    if (version < 2) {
+        unsigned needed_size = avctx->width * avctx->height * 3;
+        if (version == 0) needed_size /= 2;
+        needed_size += header_size;
+        /* bit 31 means same as previous pic */
+        if (header & (1U<<31)) {
+            *got_frame = 0;
+            return buf_size;
+        }
+        if (buf_size != needed_size) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid frame length %d (should be %d)\n",
+                   buf_size, needed_size);
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        /* skip frame */
+        if (buf_size == 8) {
+            *got_frame = 0;
+            return buf_size;
+        }
+        if (AV_RL32(buf) != FPS_TAG || buf_size < planes*1024 + 24) {
+            av_log(avctx, AV_LOG_ERROR, "Fraps: error in data stream\n");
+            return AVERROR_INVALIDDATA;
+        }
+        for (i = 0; i < planes; i++) {
+            offs[i] = AV_RL32(buf + 4 + i * 4);
+            if (offs[i] >= buf_size - header_size || (i && offs[i] <= offs[i - 1] + 1024)) {
+                av_log(avctx, AV_LOG_ERROR, "Fraps: plane %i offset is out of bounds\n", i);
+                return AVERROR_INVALIDDATA;
+            }
+        }
+        offs[planes] = buf_size - header_size;
+        for (i = 0; i < planes; i++) {
+            av_fast_padded_malloc(&s->tmpbuf, &s->tmpbuf_size, offs[i + 1] - offs[i] - 1024);
+            if (!s->tmpbuf)
+                return AVERROR(ENOMEM);
+        }
     }
-    avctx->pix_fmt = pix_fmt;
+
+    f->pict_type = AV_PICTURE_TYPE_I;
+    f->key_frame = 1;
+
+    avctx->pix_fmt = version & 1 ? AV_PIX_FMT_BGR24 : AV_PIX_FMT_YUVJ420P;
     avctx->color_range = version & 1 ? AVCOL_RANGE_UNSPECIFIED
                                      : AVCOL_RANGE_JPEG;
+    avctx->colorspace = version & 1 ? AVCOL_SPC_UNSPECIFIED : AVCOL_SPC_BT709;
 
-    expected_size = header_size;
+    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
+        return ret;
 
     switch (version) {
     case 0:
     default:
         /* Fraps v0 is a reordered YUV420 */
-        if (!prev_pic_bit)
-            expected_size += avctx->width * avctx->height * 3 / 2;
-        if (buf_size != expected_size) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Invalid frame length %d (should be %d)\n",
-                   buf_size, expected_size);
-            return AVERROR_INVALIDDATA;
-        }
-
         if (((avctx->width % 8) != 0) || ((avctx->height % 2) != 0)) {
             av_log(avctx, AV_LOG_ERROR, "Invalid frame size %dx%d\n",
                    avctx->width, avctx->height);
             return AVERROR_INVALIDDATA;
         }
 
-        if ((ret = ff_reget_buffer(avctx, f)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
-            return ret;
-        }
-        f->pict_type = prev_pic_bit ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
-        f->key_frame = f->pict_type == AV_PICTURE_TYPE_I;
-
-        if (f->pict_type == AV_PICTURE_TYPE_I) {
-            buf32 = (const uint32_t*)buf;
-            for (y = 0; y < avctx->height / 2; y++) {
-                luma1 = (uint32_t*)&f->data[0][ y * 2      * f->linesize[0]];
-                luma2 = (uint32_t*)&f->data[0][(y * 2 + 1) * f->linesize[0]];
-                cr    = (uint32_t*)&f->data[1][ y          * f->linesize[1]];
-                cb    = (uint32_t*)&f->data[2][ y          * f->linesize[2]];
-                for (x = 0; x < avctx->width; x += 8) {
-                    *(luma1++) = *(buf32++);
-                    *(luma1++) = *(buf32++);
-                    *(luma2++) = *(buf32++);
-                    *(luma2++) = *(buf32++);
-                    *(cr++) = *(buf32++);
-                    *(cb++) = *(buf32++);
-                }
+        buf32 = (const uint32_t*)buf;
+        for (y = 0; y < avctx->height / 2; y++) {
+            luma1 = (uint32_t*)&f->data[0][  y * 2      * f->linesize[0] ];
+            luma2 = (uint32_t*)&f->data[0][ (y * 2 + 1) * f->linesize[0] ];
+            cr    = (uint32_t*)&f->data[1][  y          * f->linesize[1] ];
+            cb    = (uint32_t*)&f->data[2][  y          * f->linesize[2] ];
+            for (x = 0; x < avctx->width; x += 8) {
+                *luma1++ = *buf32++;
+                *luma1++ = *buf32++;
+                *luma2++ = *buf32++;
+                *luma2++ = *buf32++;
+                *cr++    = *buf32++;
+                *cb++    = *buf32++;
             }
         }
         break;
 
     case 1:
         /* Fraps v1 is an upside-down BGR24 */
-        if (!prev_pic_bit)
-            expected_size += avctx->width * avctx->height * 3;
-        if (buf_size != expected_size) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Invalid frame length %d (should be %d)\n",
-                   buf_size, expected_size);
-            return AVERROR_INVALIDDATA;
-        }
-
-        if ((ret = ff_reget_buffer(avctx, f)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
-            return ret;
-        }
-        f->pict_type = prev_pic_bit ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
-        f->key_frame = f->pict_type == AV_PICTURE_TYPE_I;
-
-        if (f->pict_type == AV_PICTURE_TYPE_I) {
             for (y = 0; y<avctx->height; y++)
                 memcpy(&f->data[0][(avctx->height - y - 1) * f->linesize[0]],
                        &buf[y * avctx->width * 3],
                        3 * avctx->width);
-        }
         break;
 
     case 2:
@@ -258,37 +256,8 @@ static int decode_frame(AVCodecContext *avctx,
          * Fraps v2 is Huffman-coded YUV420 planes
          * Fraps v4 is virtually the same
          */
-        planes = 3;
-        if ((ret = ff_reget_buffer(avctx, f)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
-            return ret;
-        }
-        /* skip frame */
-        if (buf_size == 8) {
-            f->pict_type = AV_PICTURE_TYPE_P;
-            f->key_frame = 0;
-            break;
-        }
-        f->pict_type = AV_PICTURE_TYPE_I;
-        f->key_frame = 1;
-        if ((AV_RL32(buf) != FPS_TAG) || (buf_size < (planes * 1024 + 24))) {
-            av_log(avctx, AV_LOG_ERROR, "Fraps: error in data stream\n");
-            return AVERROR_INVALIDDATA;
-        }
-        for (i = 0; i < planes; i++) {
-            offs[i] = AV_RL32(buf + 4 + i * 4);
-            if (offs[i] >= buf_size || (i && offs[i] <= offs[i - 1] + 1024)) {
-                av_log(avctx, AV_LOG_ERROR, "Fraps: plane %i offset is out of bounds\n", i);
-                return AVERROR_INVALIDDATA;
-            }
-        }
-        offs[planes] = buf_size;
         for (i = 0; i < planes; i++) {
             is_chroma = !!i;
-            av_fast_padded_malloc(&s->tmpbuf, &s->tmpbuf_size,
-                                  offs[i + 1] - offs[i] - 1024);
-            if (!s->tmpbuf)
-                return AVERROR(ENOMEM);
             if ((ret = fraps2_decode_plane(s, f->data[i], f->linesize[i],
                                            avctx->width  >> is_chroma,
                                            avctx->height >> is_chroma,
@@ -302,36 +271,7 @@ static int decode_frame(AVCodecContext *avctx,
     case 3:
     case 5:
         /* Virtually the same as version 4, but is for RGB24 */
-        planes = 3;
-        if ((ret = ff_reget_buffer(avctx, f)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
-            return ret;
-        }
-        /* skip frame */
-        if (buf_size == 8) {
-            f->pict_type = AV_PICTURE_TYPE_P;
-            f->key_frame = 0;
-            break;
-        }
-        f->pict_type = AV_PICTURE_TYPE_I;
-        f->key_frame = 1;
-        if ((AV_RL32(buf) != FPS_TAG)||(buf_size < (planes*1024 + 24))) {
-            av_log(avctx, AV_LOG_ERROR, "Fraps: error in data stream\n");
-            return AVERROR_INVALIDDATA;
-        }
-        for (i = 0; i < planes; i++) {
-            offs[i] = AV_RL32(buf + 4 + i * 4);
-            if (offs[i] >= buf_size || (i && offs[i] <= offs[i - 1] + 1024)) {
-                av_log(avctx, AV_LOG_ERROR, "Fraps: plane %i offset is out of bounds\n", i);
-                return AVERROR_INVALIDDATA;
-            }
-        }
-        offs[planes] = buf_size;
         for (i = 0; i < planes; i++) {
-            av_fast_padded_malloc(&s->tmpbuf, &s->tmpbuf_size,
-                                  offs[i + 1] - offs[i] - 1024);
-            if (!s->tmpbuf)
-                return AVERROR(ENOMEM);
             if ((ret = fraps2_decode_plane(s, f->data[0] + i + (f->linesize[0] * (avctx->height - 1)),
                                            -f->linesize[0], avctx->width, avctx->height,
                                            buf + offs[i], offs[i + 1] - offs[i], 0, 3)) < 0) {
@@ -339,18 +279,20 @@ static int decode_frame(AVCodecContext *avctx,
                 return ret;
             }
         }
+        out = f->data[0];
         // convert pseudo-YUV into real RGB
         for (j = 0; j < avctx->height; j++) {
-            for (i = 0; i < avctx->width; i++) {
-                f->data[0][0 + i*3 + j*f->linesize[0]] += f->data[0][1 + i*3 + j*f->linesize[0]];
-                f->data[0][2 + i*3 + j*f->linesize[0]] += f->data[0][1 + i*3 + j*f->linesize[0]];
+            uint8_t *line_end = out + 3*avctx->width;
+            while (out < line_end) {
+                out[0]  += out[1];
+                out[2]  += out[1];
+                out += 3;
             }
+            out += f->linesize[0] - 3*avctx->width;
         }
         break;
     }
 
-    if ((ret = av_frame_ref(frame, f)) < 0)
-        return ret;
     *got_frame = 1;
 
     return buf_size;
@@ -366,8 +308,6 @@ static av_cold int decode_end(AVCodecContext *avctx)
 {
     FrapsContext *s = (FrapsContext*)avctx->priv_data;
 
-    av_frame_free(&s->frame);
-
     av_freep(&s->tmpbuf);
     return 0;
 }
@@ -382,5 +322,5 @@ AVCodec ff_fraps_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/frwu.c b/libavcodec/frwu.c
index 568b94f..c778dbd 100644
--- a/libavcodec/frwu.c
+++ b/libavcodec/frwu.c
@@ -3,26 +3,32 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
+#include "libavutil/opt.h"
+
+typedef struct {
+    AVClass *av_class;
+    int change_field_order;
+} FRWUContext;
 
 static av_cold int decode_init(AVCodecContext *avctx)
 {
@@ -38,6 +44,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
 static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                         AVPacket *avpkt)
 {
+    FRWUContext *s = avctx->priv_data;
     int field, ret;
     AVFrame *pic = data;
     const uint8_t *buf = avpkt->data;
@@ -52,15 +59,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
         return ret;
-    }
 
     pic->pict_type = AV_PICTURE_TYPE_I;
     pic->key_frame = 1;
-    pic->interlaced_frame = 1;
-    pic->top_field_first = 1;
 
     for (field = 0; field < 2; field++) {
         int i;
@@ -79,9 +82,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             av_log(avctx, AV_LOG_ERROR, "Packet is too small, need %i, have %i\n", field_size, (int)(buf_end - buf));
             return AVERROR_INVALIDDATA;
         }
-        if (field)
+        if (field ^ s->change_field_order) {
             dst += pic->linesize[0];
+        } else if (s->change_field_order) {
+            dst += 2 * pic->linesize[0];
+        }
         for (i = 0; i < field_h; i++) {
+            if (s->change_field_order && field && i == field_h - 1)
+                dst = pic->data[0];
             memcpy(dst, buf, avctx->width * 2);
             buf += avctx->width * 2;
             dst += pic->linesize[0] << 1;
@@ -94,12 +102,27 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     return avpkt->size;
 }
 
+static const AVOption frwu_options[] = {
+    {"change_field_order", "Change field order", offsetof(FRWUContext, change_field_order), FF_OPT_TYPE_INT,
+     {.i64 = 0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM},
+    {NULL}
+};
+
+static const AVClass frwu_class = {
+    .class_name = "frwu Decoder",
+    .item_name  = av_default_item_name,
+    .option     = frwu_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_frwu_decoder = {
     .name           = "frwu",
     .long_name      = NULL_IF_CONFIG_SMALL("Forward Uncompressed"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_FRWU,
+    .priv_data_size = sizeof(FRWUContext),
     .init           = decode_init,
     .decode         = decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .priv_class     = &frwu_class,
 };
diff --git a/libavcodec/g2meet.c b/libavcodec/g2meet.c
index c405f38..1004e19 100644
--- a/libavcodec/g2meet.c
+++ b/libavcodec/g2meet.c
@@ -2,20 +2,20 @@
  * Go2Webinar decoder
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -384,6 +384,8 @@ static int kempf_decode_tile(G2MContext *c, int tile_x, int tile_y,
         src += 3;
     }
     npal = *src++ + 1;
+    if (src_end - src < npal * 3)
+        return AVERROR_INVALIDDATA;
     memcpy(pal, src, npal * 3);
     src += npal * 3;
     if (sub_type != 2) {
@@ -400,7 +402,7 @@ static int kempf_decode_tile(G2MContext *c, int tile_x, int tile_y,
     zsize = (src[0] << 8) | src[1];
     src  += 2;
 
-    if (src_end - src < zsize)
+    if (src_end - src < zsize + (sub_type != 2))
         return AVERROR_INVALIDDATA;
 
     ret = uncompress(c->kempf_buf, &dlen, src, zsize);
@@ -422,6 +424,8 @@ static int kempf_decode_tile(G2MContext *c, int tile_x, int tile_y,
     for (i = 0; i < (FFALIGN(height, 16) >> 4); i++) {
         for (j = 0; j < (FFALIGN(width, 16) >> 4); j++) {
             if (!bits) {
+                if (src >= src_end)
+                    return AVERROR_INVALIDDATA;
                 bitbuf = *src++;
                 bits   = 8;
             }
@@ -455,8 +459,8 @@ static int g2m_init_buffers(G2MContext *c)
     int aligned_height;
 
     if (!c->framebuf || c->old_width < c->width || c->old_height < c->height) {
-        c->framebuf_stride = FFALIGN(c->width * 3, 16);
-        aligned_height     = FFALIGN(c->height,    16);
+        c->framebuf_stride = FFALIGN(c->width + 15, 16) * 3;
+        aligned_height     = c->height + 15;
         av_free(c->framebuf);
         c->framebuf = av_mallocz(c->framebuf_stride * aligned_height);
         if (!c->framebuf)
@@ -465,7 +469,7 @@ static int g2m_init_buffers(G2MContext *c)
     if (!c->synth_tile || !c->jpeg_tile ||
         c->old_tile_w < c->tile_width ||
         c->old_tile_h < c->tile_height) {
-        c->tile_stride = FFALIGN(c->tile_width * 3, 16);
+        c->tile_stride = FFALIGN(c->tile_width, 16) * 3;
         aligned_height = FFALIGN(c->tile_height,    16);
         av_free(c->synth_tile);
         av_free(c->jpeg_tile);
@@ -501,7 +505,7 @@ static int g2m_load_cursor(AVCodecContext *avctx, G2MContext *c,
     cursor_hot_y = bytestream2_get_byte(gb);
     cursor_fmt   = bytestream2_get_byte(gb);
 
-    cursor_stride = FFALIGN(cursor_w, 32) * 4;
+    cursor_stride = FFALIGN(cursor_w, cursor_fmt==1 ? 32 : 1) * 4;
 
     if (cursor_w < 1 || cursor_w > 256 ||
         cursor_h < 1 || cursor_h > 256) {
@@ -551,7 +555,6 @@ static int g2m_load_cursor(AVCodecContext *avctx, G2MContext *c,
                     bits <<= 1;
                 }
             }
-            dst += c->cursor_stride - c->cursor_w * 4;
         }
 
         dst = c->cursor;
@@ -583,7 +586,6 @@ static int g2m_load_cursor(AVCodecContext *avctx, G2MContext *c,
                     bits <<= 1;
                 }
             }
-            dst += c->cursor_stride - c->cursor_w * 4;
         }
         break;
     case 32: // full colour
@@ -597,7 +599,6 @@ static int g2m_load_cursor(AVCodecContext *avctx, G2MContext *c,
                 *dst++ = val >> 16;
                 *dst++ = val >> 24;
             }
-            dst += c->cursor_stride - c->cursor_w * 4;
         }
         break;
     default:
@@ -703,6 +704,7 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
         }
         switch (chunk_type) {
         case DISPLAY_INFO:
+            got_header =
             c->got_header = 0;
             if (chunk_size < 21) {
                 av_log(avctx, AV_LOG_ERROR, "Invalid display info size %"PRIu32"\n",
@@ -719,14 +721,18 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
                 ret = AVERROR_INVALIDDATA;
                 goto header_fail;
             }
-            if (c->width != avctx->width || c->height != avctx->height)
-                ff_set_dimensions(avctx, c->width, c->height);
+            if (c->width != avctx->width || c->height != avctx->height) {
+                ret = ff_set_dimensions(avctx, c->width, c->height);
+                if (ret < 0)
+                    goto header_fail;
+            }
             c->compression = bytestream2_get_be32(&bc);
             if (c->compression != 2 && c->compression != 3) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Unknown compression method %d\n",
                        c->compression);
-                return AVERROR_PATCHWELCOME;
+                ret = AVERROR_PATCHWELCOME;
+                goto header_fail;
             }
             c->tile_width  = bytestream2_get_be32(&bc);
             c->tile_height = bytestream2_get_be32(&bc);
@@ -746,7 +752,8 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
                     (chunk_size - 21) < 16) {
                     av_log(avctx, AV_LOG_ERROR,
                            "Display info: missing bitmasks!\n");
-                    return AVERROR_INVALIDDATA;
+                    ret = AVERROR_INVALIDDATA;
+                    goto header_fail;
                 }
                 r_mask = bytestream2_get_be32(&bc);
                 g_mask = bytestream2_get_be32(&bc);
@@ -755,11 +762,13 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
                     av_log(avctx, AV_LOG_ERROR,
                            "Invalid or unsupported bitmasks: R=%"PRIX32", G=%"PRIX32", B=%"PRIX32"\n",
                            r_mask, g_mask, b_mask);
-                    return AVERROR_PATCHWELCOME;
+                    ret = AVERROR_PATCHWELCOME;
+                    goto header_fail;
                 }
             } else {
                 avpriv_request_sample(avctx, "bpp=%d", c->bpp);
-                return AVERROR_PATCHWELCOME;
+                ret = AVERROR_PATCHWELCOME;
+                goto header_fail;
             }
             if (g2m_init_buffers(c)) {
                 ret = AVERROR(ENOMEM);
@@ -835,11 +844,9 @@ static int g2m_decode_frame(AVCodecContext *avctx, void *data,
     if (got_header)
         c->got_header = 1;
 
-    if (c->width && c->height) {
-        if ((ret = ff_get_buffer(avctx, pic, 0)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if (c->width && c->height && c->framebuf) {
+        if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
             return ret;
-        }
 
         pic->key_frame = got_header;
         pic->pict_type = got_header ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
diff --git a/libavcodec/g722.c b/libavcodec/g722.c
index a911bc7..2c04c40 100644
--- a/libavcodec/g722.c
+++ b/libavcodec/g722.c
@@ -7,20 +7,20 @@
  * Copyright (c) 2009 Kenan Gillet
  * Copyright (c) 2010 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/g722.h b/libavcodec/g722.h
index 71d03fc..3f89827 100644
--- a/libavcodec/g722.h
+++ b/libavcodec/g722.h
@@ -5,20 +5,20 @@
  * Copyright (c) 2009 Kenan Gillet
  * Copyright (c) 2010 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/g722dec.c b/libavcodec/g722dec.c
index 26f288b..470fbbf 100644
--- a/libavcodec/g722dec.c
+++ b/libavcodec/g722dec.c
@@ -5,20 +5,20 @@
  * Copyright (c) 2009 Kenan Gillet
  * Copyright (c) 2010 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -94,10 +94,8 @@ static int g722_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = avpkt->size * 2;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     out_buf = (int16_t *)frame->data[0];
 
     init_get_bits(&gb, avpkt->data, avpkt->size * 8);
diff --git a/libavcodec/g722enc.c b/libavcodec/g722enc.c
index e7b67da..c4d6c7b 100644
--- a/libavcodec/g722enc.c
+++ b/libavcodec/g722enc.c
@@ -5,20 +5,20 @@
  * Copyright (c) 2009 Kenan Gillet
  * Copyright (c) 2010 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,7 @@
  * G.722 ADPCM audio encoder
  */
 
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "g722.h"
@@ -236,7 +237,7 @@ static void g722_encode_trellis(G722Context *c, int trellis,
                     continue;\
                 if (heap_pos[index] < frontier) {\
                     pos = heap_pos[index]++;\
-                    assert(pathn[index] < FREEZE_INTERVAL * frontier);\
+                    av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
                     node = nodes_next[index][pos] = next[index]++;\
                     node->path = pathn[index]++;\
                 } else {\
@@ -356,10 +357,8 @@ static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int nb_samples, out_size, ret;
 
     out_size = (frame->nb_samples + 1) / 2;
-    if ((ret = ff_alloc_packet(avpkt, out_size))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
         return ret;
-    }
 
     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
 
diff --git a/libavcodec/g723_1.c b/libavcodec/g723_1.c
index bbdb404..66afd6a 100644
--- a/libavcodec/g723_1.c
+++ b/libavcodec/g723_1.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2006 Benjamin Larsson
  * Copyright (c) 2010 Mohamed Naufal Basheer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,47 +33,12 @@
 #include "get_bits.h"
 #include "acelp_vectors.h"
 #include "celp_filters.h"
+#include "celp_math.h"
 #include "g723_1_data.h"
 #include "internal.h"
 
 #define CNG_RANDOM_SEED 12345
 
-/**
- * G723.1 frame types
- */
-enum FrameType {
-    ACTIVE_FRAME,        ///< Active speech
-    SID_FRAME,           ///< Silence Insertion Descriptor frame
-    UNTRANSMITTED_FRAME
-};
-
-enum Rate {
-    RATE_6300,
-    RATE_5300
-};
-
-/**
- * G723.1 unpacked data subframe
- */
-typedef struct {
-    int ad_cb_lag;     ///< adaptive codebook lag
-    int ad_cb_gain;
-    int dirac_train;
-    int pulse_sign;
-    int grid_index;
-    int amp_index;
-    int pulse_pos;
-} G723_1_Subframe;
-
-/**
- * Pitch postfilter parameters
- */
-typedef struct {
-    int     index;    ///< postfilter backward/forward lag
-    int16_t opt_gain; ///< optimal gain
-    int16_t sc_gain;  ///< scaling gain
-} PPFParam;
-
 typedef struct g723_1_context {
     AVClass *class;
 
@@ -100,10 +65,21 @@ typedef struct g723_1_context {
     int sid_gain;
     int cur_gain;
     int reflection_coef;
-    int pf_gain;
+    int pf_gain;                 ///< formant postfilter
+                                 ///< gain scaling unit memory
     int postfilter;
 
     int16_t audio[FRAME_LEN + LPC_ORDER + PITCH_MAX + 4];
+    int16_t prev_data[HALF_FRAME_LEN];
+    int16_t prev_weight_sig[PITCH_MAX];
+
+
+    int16_t hpf_fir_mem;                   ///< highpass filter fir
+    int     hpf_iir_mem;                   ///< and iir memories
+    int16_t perf_fir_mem[LPC_ORDER];       ///< perceptual filter fir
+    int16_t perf_iir_mem[LPC_ORDER];       ///< and iir memories
+
+    int16_t harmonic_mem[PITCH_MAX];
 } G723_1_Context;
 
 static av_cold int g723_1_decode_init(AVCodecContext *avctx)
@@ -113,7 +89,6 @@ static av_cold int g723_1_decode_init(AVCodecContext *avctx)
     avctx->channel_layout = AV_CH_LAYOUT_MONO;
     avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
     avctx->channels       = 1;
-    avctx->sample_rate    = 8000;
     p->pf_gain            = 1 << 12;
 
     memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(*p->prev_lsp));
@@ -197,13 +172,13 @@ static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf,
         }
     }
 
-    p->subframe[0].grid_index = get_bits(&gb, 1);
-    p->subframe[1].grid_index = get_bits(&gb, 1);
-    p->subframe[2].grid_index = get_bits(&gb, 1);
-    p->subframe[3].grid_index = get_bits(&gb, 1);
+    p->subframe[0].grid_index = get_bits1(&gb);
+    p->subframe[1].grid_index = get_bits1(&gb);
+    p->subframe[2].grid_index = get_bits1(&gb);
+    p->subframe[3].grid_index = get_bits1(&gb);
 
     if (p->cur_rate == RATE_6300) {
-        skip_bits(&gb, 1);  /* skip reserved bit */
+        skip_bits1(&gb);  /* skip reserved bit */
 
         /* Compute pulse_pos index using the 13-bit combined position index */
         temp = get_bits(&gb, 13);
@@ -247,32 +222,27 @@ static int unpack_bitstream(G723_1_Context *p, const uint8_t *buf,
 /**
  * Bitexact implementation of sqrt(val/2).
  */
-static int16_t square_root(int val)
+static int16_t square_root(unsigned val)
 {
-    int16_t res = 0;
-    int16_t exp = 0x4000;
-    int i;
+    av_assert2(!(val & 0x80000000));
 
-    for (i = 0; i < 14; i ++) {
-        int res_exp = res + exp;
-        if (val >= res_exp * res_exp << 1)
-            res += exp;
-        exp >>= 1;
-    }
-    return res;
+    return (ff_sqrt(val << 1) >> 1) & (~1);
 }
 
 /**
  * Calculate the number of left-shifts required for normalizing the input.
  *
  * @param num   input number
- * @param width width of the input, 16 bits(0) / 32 bits(1)
+ * @param width width of the input, 15 or 31 bits
  */
 static int normalize_bits(int num, int width)
 {
     return width - av_log2(num) - 1;
 }
 
+#define normalize_bits_int16(num) normalize_bits(num, 15)
+#define normalize_bits_int32(num) normalize_bits(num, 31)
+
 /**
  * Scale vector contents based on the largest of their absolutes.
  */
@@ -281,12 +251,11 @@ static int scale_vector(int16_t *dst, const int16_t *vector, int length)
     int bits, max = 0;
     int i;
 
-
     for (i = 0; i < length; i++)
         max |= FFABS(vector[i]);
 
-    max   = FFMIN(max, 0x7FFF);
-    bits  = normalize_bits(max, 15);
+    bits= 14 - av_log2_16bit(max);
+    bits= FFMAX(bits, 0);
 
     for (i = 0; i < length; i++)
         dst[i] = vector[i] << bits >> 3;
@@ -371,7 +340,7 @@ static void inverse_quant(int16_t *cur_lsp, int16_t *prev_lsp,
  * @param b 16 bit multiplier
  */
 #define MULL2(a, b) \
-        ((((a) >> 16) * (b) << 1) + (((a) & 0xffff) * (b) >> 15))
+        MULL(a,b,15)
 
 /**
  * Convert LSP frequencies to LPC coefficients.
@@ -386,7 +355,7 @@ static void lsp2lpc(int16_t *lpc)
 
     /* Calculate negative cosine */
     for (j = 0; j < LPC_ORDER; j++) {
-        int index     = lpc[j] >> 7;
+        int index     = (lpc[j] >> 7) & 0x1FF;
         int offset    = lpc[j] & 0x7f;
         int temp1     = cos_tab[index] << 16;
         int temp2     = (cos_tab[index + 1] - cos_tab[index]) *
@@ -567,13 +536,8 @@ static void get_residual(int16_t *residual, int16_t *prev_excitation, int lag)
 
 static int dot_product(const int16_t *a, const int16_t *b, int length)
 {
-    int i, sum = 0;
-
-    for (i = 0; i < length; i++) {
-        int prod = a[i] * b[i];
-        sum = av_sat_dadd32(sum, prod);
-    }
-    return sum;
+    int sum = ff_dot_product(a,b,length);
+    return av_sat_add32(sum, sum);
 }
 
 /**
@@ -593,16 +557,16 @@ static void gen_acb_excitation(int16_t *vector, int16_t *prev_excitation,
     get_residual(residual, prev_excitation, lag);
 
     /* Select quantization table */
-    if (cur_rate == RATE_6300 && pitch_lag < SUBFRAME_LEN - 2)
+    if (cur_rate == RATE_6300 && pitch_lag < SUBFRAME_LEN - 2) {
         cb_ptr = adaptive_cb_gain85;
-    else
+    } else
         cb_ptr = adaptive_cb_gain170;
 
     /* Calculate adaptive vector */
     cb_ptr += subfrm->ad_cb_gain * 20;
     for (i = 0; i < SUBFRAME_LEN; i++) {
-        sum = dot_product(residual + i, cb_ptr, PITCH_ORDER);
-        vector[i] = av_sat_dadd32(1 << 15, sum) >> 16;
+        sum = ff_dot_product(residual + i, cb_ptr, PITCH_ORDER);
+        vector[i] = av_sat_dadd32(1 << 15, av_sat_add32(sum, sum)) >> 16;
     }
 }
 
@@ -810,9 +774,9 @@ static int comp_interp_index(G723_1_Context *p, int pitch_lag,
 
     temp = best_eng * *exc_eng >> 3;
 
-    if (temp < ccr * ccr)
+    if (temp < ccr * ccr) {
         return index;
-    else
+    } else
         return 0;
 }
 
@@ -852,21 +816,24 @@ static void residual_interp(int16_t *buf, int16_t *out, int lag,
  * @param iir_coef IIR coefficients
  * @param src      source vector
  * @param dest     destination vector
+ * @param width    width of the output, 16 bits(0) / 32 bits(1)
  */
-static inline void iir_filter(int16_t *fir_coef, int16_t *iir_coef,
-                              int16_t *src, int *dest)
-{
-    int m, n;
-
-    for (m = 0; m < SUBFRAME_LEN; m++) {
-        int64_t filter = 0;
-        for (n = 1; n <= LPC_ORDER; n++) {
-            filter -= fir_coef[n - 1] * src[m - n] -
-                      iir_coef[n - 1] * (dest[m - n] >> 16);
-        }
-
-        dest[m] = av_clipl_int32((src[m] << 16) + (filter << 3) + (1 << 15));
-    }
+#define iir_filter(fir_coef, iir_coef, src, dest, width)\
+{\
+    int m, n;\
+    int res_shift = 16 & ~-(width);\
+    int in_shift  = 16 - res_shift;\
+\
+    for (m = 0; m < SUBFRAME_LEN; m++) {\
+        int64_t filter = 0;\
+        for (n = 1; n <= LPC_ORDER; n++) {\
+            filter -= (fir_coef)[n - 1] * (src)[m - n] -\
+                      (iir_coef)[n - 1] * ((dest)[m - n] >> in_shift);\
+        }\
+\
+        (dest)[m] = av_clipl_int32(((src)[m] << 16) + (filter << 3) +\
+                                   (1 << 15)) >> res_shift;\
+    }\
 }
 
 /**
@@ -937,13 +904,12 @@ static void formant_postfilter(G723_1_Context *p, int16_t *lpc,
                                  (1 << 14)) >> 15;
         }
         iir_filter(filter_coef[0], filter_coef[1], buf + i,
-                   filter_signal + i);
+                   filter_signal + i, 1);
         lpc += LPC_ORDER;
     }
 
-    memcpy(p->fir_mem, buf + FRAME_LEN, LPC_ORDER * sizeof(*p->fir_mem));
-    memcpy(p->iir_mem, filter_signal + FRAME_LEN,
-           LPC_ORDER * sizeof(*p->iir_mem));
+    memcpy(p->fir_mem, buf + FRAME_LEN, LPC_ORDER * sizeof(int16_t));
+    memcpy(p->iir_mem, filter_signal + FRAME_LEN, LPC_ORDER * sizeof(int));
 
     buf += LPC_ORDER;
     signal_ptr = filter_signal + LPC_ORDER;
@@ -1218,10 +1184,8 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     frame->nb_samples = FRAME_LEN;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-         return ret;
-    }
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
 
     out = (int16_t *)frame->data[0];
 
@@ -1376,3 +1340,1145 @@ AVCodec ff_g723_1_decoder = {
     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
     .priv_class     = &g723_1dec_class,
 };
+
+#if CONFIG_G723_1_ENCODER
+#define BITSTREAM_WRITER_LE
+#include "put_bits.h"
+
+static av_cold int g723_1_encode_init(AVCodecContext *avctx)
+{
+    G723_1_Context *p = avctx->priv_data;
+
+    if (avctx->sample_rate != 8000) {
+        av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
+        return -1;
+    }
+
+    if (avctx->channels != 1) {
+        av_log(avctx, AV_LOG_ERROR, "Only mono supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->bit_rate == 6300) {
+        p->cur_rate = RATE_6300;
+    } else if (avctx->bit_rate == 5300) {
+        av_log(avctx, AV_LOG_ERROR, "Bitrate not supported yet, use 6.3k\n");
+        return AVERROR_PATCHWELCOME;
+    } else {
+        av_log(avctx, AV_LOG_ERROR,
+               "Bitrate not supported, use 6.3k\n");
+        return AVERROR(EINVAL);
+    }
+    avctx->frame_size = 240;
+    memcpy(p->prev_lsp, dc_lsp, LPC_ORDER * sizeof(int16_t));
+
+    return 0;
+}
+
+/**
+ * Remove DC component from the input signal.
+ *
+ * @param buf input signal
+ * @param fir zero memory
+ * @param iir pole memory
+ */
+static void highpass_filter(int16_t *buf, int16_t *fir, int *iir)
+{
+    int i;
+    for (i = 0; i < FRAME_LEN; i++) {
+        *iir   = (buf[i] << 15) + ((-*fir) << 15) + MULL2(*iir, 0x7f00);
+        *fir   = buf[i];
+        buf[i] = av_clipl_int32((int64_t)*iir + (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Estimate autocorrelation of the input vector.
+ *
+ * @param buf      input buffer
+ * @param autocorr autocorrelation coefficients vector
+ */
+static void comp_autocorr(int16_t *buf, int16_t *autocorr)
+{
+    int i, scale, temp;
+    int16_t vector[LPC_FRAME];
+
+    scale_vector(vector, buf, LPC_FRAME);
+
+    /* Apply the Hamming window */
+    for (i = 0; i < LPC_FRAME; i++)
+        vector[i] = (vector[i] * hamming_window[i] + (1 << 14)) >> 15;
+
+    /* Compute the first autocorrelation coefficient */
+    temp = ff_dot_product(vector, vector, LPC_FRAME);
+
+    /* Apply a white noise correlation factor of (1025/1024) */
+    temp += temp >> 10;
+
+    /* Normalize */
+    scale = normalize_bits_int32(temp);
+    autocorr[0] = av_clipl_int32((int64_t)(temp << scale) +
+                                 (1 << 15)) >> 16;
+
+    /* Compute the remaining coefficients */
+    if (!autocorr[0]) {
+        memset(autocorr + 1, 0, LPC_ORDER * sizeof(int16_t));
+    } else {
+        for (i = 1; i <= LPC_ORDER; i++) {
+           temp = ff_dot_product(vector, vector + i, LPC_FRAME - i);
+           temp = MULL2((temp << scale), binomial_window[i - 1]);
+           autocorr[i] = av_clipl_int32((int64_t)temp + (1 << 15)) >> 16;
+        }
+    }
+}
+
+/**
+ * Use Levinson-Durbin recursion to compute LPC coefficients from
+ * autocorrelation values.
+ *
+ * @param lpc      LPC coefficients vector
+ * @param autocorr autocorrelation coefficients vector
+ * @param error    prediction error
+ */
+static void levinson_durbin(int16_t *lpc, int16_t *autocorr, int16_t error)
+{
+    int16_t vector[LPC_ORDER];
+    int16_t partial_corr;
+    int i, j, temp;
+
+    memset(lpc, 0, LPC_ORDER * sizeof(int16_t));
+
+    for (i = 0; i < LPC_ORDER; i++) {
+        /* Compute the partial correlation coefficient */
+        temp = 0;
+        for (j = 0; j < i; j++)
+            temp -= lpc[j] * autocorr[i - j - 1];
+        temp = ((autocorr[i] << 13) + temp) << 3;
+
+        if (FFABS(temp) >= (error << 16))
+            break;
+
+        partial_corr = temp / (error << 1);
+
+        lpc[i] = av_clipl_int32((int64_t)(partial_corr << 14) +
+                                (1 << 15)) >> 16;
+
+        /* Update the prediction error */
+        temp  = MULL2(temp, partial_corr);
+        error = av_clipl_int32((int64_t)(error << 16) - temp +
+                                (1 << 15)) >> 16;
+
+        memcpy(vector, lpc, i * sizeof(int16_t));
+        for (j = 0; j < i; j++) {
+            temp = partial_corr * vector[i - j - 1] << 1;
+            lpc[j] = av_clipl_int32((int64_t)(lpc[j] << 16) - temp +
+                                    (1 << 15)) >> 16;
+        }
+    }
+}
+
+/**
+ * Calculate LPC coefficients for the current frame.
+ *
+ * @param buf       current frame
+ * @param prev_data 2 trailing subframes of the previous frame
+ * @param lpc       LPC coefficients vector
+ */
+static void comp_lpc_coeff(int16_t *buf, int16_t *lpc)
+{
+    int16_t autocorr[(LPC_ORDER + 1) * SUBFRAMES];
+    int16_t *autocorr_ptr = autocorr;
+    int16_t *lpc_ptr      = lpc;
+    int i, j;
+
+    for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) {
+        comp_autocorr(buf + i, autocorr_ptr);
+        levinson_durbin(lpc_ptr, autocorr_ptr + 1, autocorr_ptr[0]);
+
+        lpc_ptr += LPC_ORDER;
+        autocorr_ptr += LPC_ORDER + 1;
+    }
+}
+
+static void lpc2lsp(int16_t *lpc, int16_t *prev_lsp, int16_t *lsp)
+{
+    int f[LPC_ORDER + 2]; ///< coefficients of the sum and difference
+                          ///< polynomials (F1, F2) ordered as
+                          ///< f1[0], f2[0], ...., f1[5], f2[5]
+
+    int max, shift, cur_val, prev_val, count, p;
+    int i, j;
+    int64_t temp;
+
+    /* Initialize f1[0] and f2[0] to 1 in Q25 */
+    for (i = 0; i < LPC_ORDER; i++)
+        lsp[i] = (lpc[i] * bandwidth_expand[i] + (1 << 14)) >> 15;
+
+    /* Apply bandwidth expansion on the LPC coefficients */
+    f[0] = f[1] = 1 << 25;
+
+    /* Compute the remaining coefficients */
+    for (i = 0; i < LPC_ORDER / 2; i++) {
+        /* f1 */
+        f[2 * i + 2] = -f[2 * i] - ((lsp[i] + lsp[LPC_ORDER - 1 - i]) << 12);
+        /* f2 */
+        f[2 * i + 3] = f[2 * i + 1] - ((lsp[i] - lsp[LPC_ORDER - 1 - i]) << 12);
+    }
+
+    /* Divide f1[5] and f2[5] by 2 for use in polynomial evaluation */
+    f[LPC_ORDER] >>= 1;
+    f[LPC_ORDER + 1] >>= 1;
+
+    /* Normalize and shorten */
+    max = FFABS(f[0]);
+    for (i = 1; i < LPC_ORDER + 2; i++)
+        max = FFMAX(max, FFABS(f[i]));
+
+    shift = normalize_bits_int32(max);
+
+    for (i = 0; i < LPC_ORDER + 2; i++)
+        f[i] = av_clipl_int32((int64_t)(f[i] << shift) + (1 << 15)) >> 16;
+
+    /**
+     * Evaluate F1 and F2 at uniform intervals of pi/256 along the
+     * unit circle and check for zero crossings.
+     */
+    p    = 0;
+    temp = 0;
+    for (i = 0; i <= LPC_ORDER / 2; i++)
+        temp += f[2 * i] * cos_tab[0];
+    prev_val = av_clipl_int32(temp << 1);
+    count    = 0;
+    for ( i = 1; i < COS_TBL_SIZE / 2; i++) {
+        /* Evaluate */
+        temp = 0;
+        for (j = 0; j <= LPC_ORDER / 2; j++)
+            temp += f[LPC_ORDER - 2 * j + p] * cos_tab[i * j % COS_TBL_SIZE];
+        cur_val = av_clipl_int32(temp << 1);
+
+        /* Check for sign change, indicating a zero crossing */
+        if ((cur_val ^ prev_val) < 0) {
+            int abs_cur  = FFABS(cur_val);
+            int abs_prev = FFABS(prev_val);
+            int sum      = abs_cur + abs_prev;
+
+            shift        = normalize_bits_int32(sum);
+            sum          <<= shift;
+            abs_prev     = abs_prev << shift >> 8;
+            lsp[count++] = ((i - 1) << 7) + (abs_prev >> 1) / (sum >> 16);
+
+            if (count == LPC_ORDER)
+                break;
+
+            /* Switch between sum and difference polynomials */
+            p ^= 1;
+
+            /* Evaluate */
+            temp = 0;
+            for (j = 0; j <= LPC_ORDER / 2; j++){
+                temp += f[LPC_ORDER - 2 * j + p] *
+                        cos_tab[i * j % COS_TBL_SIZE];
+            }
+            cur_val = av_clipl_int32(temp<<1);
+        }
+        prev_val = cur_val;
+    }
+
+    if (count != LPC_ORDER)
+        memcpy(lsp, prev_lsp, LPC_ORDER * sizeof(int16_t));
+}
+
+/**
+ * Quantize the current LSP subvector.
+ *
+ * @param num    band number
+ * @param offset offset of the current subvector in an LPC_ORDER vector
+ * @param size   size of the current subvector
+ */
+#define get_index(num, offset, size) \
+{\
+    int error, max = -1;\
+    int16_t temp[4];\
+    int i, j;\
+    for (i = 0; i < LSP_CB_SIZE; i++) {\
+        for (j = 0; j < size; j++){\
+            temp[j] = (weight[j + (offset)] * lsp_band##num[i][j] +\
+                      (1 << 14)) >> 15;\
+        }\
+        error =  dot_product(lsp + (offset), temp, size) << 1;\
+        error -= dot_product(lsp_band##num[i], temp, size);\
+        if (error > max) {\
+            max = error;\
+            lsp_index[num] = i;\
+        }\
+    }\
+}
+
+/**
+ * Vector quantize the LSP frequencies.
+ *
+ * @param lsp      the current lsp vector
+ * @param prev_lsp the previous lsp vector
+ */
+static void lsp_quantize(uint8_t *lsp_index, int16_t *lsp, int16_t *prev_lsp)
+{
+    int16_t weight[LPC_ORDER];
+    int16_t min, max;
+    int shift, i;
+
+    /* Calculate the VQ weighting vector */
+    weight[0] = (1 << 20) / (lsp[1] - lsp[0]);
+    weight[LPC_ORDER - 1] = (1 << 20) /
+                            (lsp[LPC_ORDER - 1] - lsp[LPC_ORDER - 2]);
+
+    for (i = 1; i < LPC_ORDER - 1; i++) {
+        min  = FFMIN(lsp[i] - lsp[i - 1], lsp[i + 1] - lsp[i]);
+        if (min > 0x20)
+            weight[i] = (1 << 20) / min;
+        else
+            weight[i] = INT16_MAX;
+    }
+
+    /* Normalize */
+    max = 0;
+    for (i = 0; i < LPC_ORDER; i++)
+        max = FFMAX(weight[i], max);
+
+    shift = normalize_bits_int16(max);
+    for (i = 0; i < LPC_ORDER; i++) {
+        weight[i] <<= shift;
+    }
+
+    /* Compute the VQ target vector */
+    for (i = 0; i < LPC_ORDER; i++) {
+        lsp[i] -= dc_lsp[i] +
+                  (((prev_lsp[i] - dc_lsp[i]) * 12288 + (1 << 14)) >> 15);
+    }
+
+    get_index(0, 0, 3);
+    get_index(1, 3, 3);
+    get_index(2, 6, 4);
+}
+
+/**
+ * Apply the formant perceptual weighting filter.
+ *
+ * @param flt_coef filter coefficients
+ * @param unq_lpc  unquantized lpc vector
+ */
+static void perceptual_filter(G723_1_Context *p, int16_t *flt_coef,
+                              int16_t *unq_lpc, int16_t *buf)
+{
+    int16_t vector[FRAME_LEN + LPC_ORDER];
+    int i, j, k, l = 0;
+
+    memcpy(buf, p->iir_mem, sizeof(int16_t) * LPC_ORDER);
+    memcpy(vector, p->fir_mem, sizeof(int16_t) * LPC_ORDER);
+    memcpy(vector + LPC_ORDER, buf + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
+
+    for (i = LPC_ORDER, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++) {
+        for (k = 0; k < LPC_ORDER; k++) {
+            flt_coef[k + 2 * l] = (unq_lpc[k + l] * percept_flt_tbl[0][k] +
+                                  (1 << 14)) >> 15;
+            flt_coef[k + 2 * l + LPC_ORDER] = (unq_lpc[k + l] *
+                                             percept_flt_tbl[1][k] +
+                                             (1 << 14)) >> 15;
+        }
+        iir_filter(flt_coef + 2 * l, flt_coef + 2 * l + LPC_ORDER, vector + i,
+                   buf + i, 0);
+        l += LPC_ORDER;
+    }
+    memcpy(p->iir_mem, buf + FRAME_LEN, sizeof(int16_t) * LPC_ORDER);
+    memcpy(p->fir_mem, vector + FRAME_LEN, sizeof(int16_t) * LPC_ORDER);
+}
+
+/**
+ * Estimate the open loop pitch period.
+ *
+ * @param buf   perceptually weighted speech
+ * @param start estimation is carried out from this position
+ */
+static int estimate_pitch(int16_t *buf, int start)
+{
+    int max_exp = 32;
+    int max_ccr = 0x4000;
+    int max_eng = 0x7fff;
+    int index   = PITCH_MIN;
+    int offset  = start - PITCH_MIN + 1;
+
+    int ccr, eng, orig_eng, ccr_eng, exp;
+    int diff, temp;
+
+    int i;
+
+    orig_eng = ff_dot_product(buf + offset, buf + offset, HALF_FRAME_LEN);
+
+    for (i = PITCH_MIN; i <= PITCH_MAX - 3; i++) {
+        offset--;
+
+        /* Update energy and compute correlation */
+        orig_eng += buf[offset] * buf[offset] -
+                    buf[offset + HALF_FRAME_LEN] * buf[offset + HALF_FRAME_LEN];
+        ccr      =  ff_dot_product(buf + start, buf + offset, HALF_FRAME_LEN);
+        if (ccr <= 0)
+            continue;
+
+        /* Split into mantissa and exponent to maintain precision */
+        exp  =   normalize_bits_int32(ccr);
+        ccr  =   av_clipl_int32((int64_t)(ccr << exp) + (1 << 15)) >> 16;
+        exp  <<= 1;
+        ccr  *=  ccr;
+        temp =   normalize_bits_int32(ccr);
+        ccr  =   ccr << temp >> 16;
+        exp  +=  temp;
+
+        temp =   normalize_bits_int32(orig_eng);
+        eng  =   av_clipl_int32((int64_t)(orig_eng << temp) + (1 << 15)) >> 16;
+        exp  -=  temp;
+
+        if (ccr >= eng) {
+            exp--;
+            ccr >>= 1;
+        }
+        if (exp > max_exp)
+            continue;
+
+        if (exp + 1 < max_exp)
+            goto update;
+
+        /* Equalize exponents before comparison */
+        if (exp + 1 == max_exp)
+            temp = max_ccr >> 1;
+        else
+            temp = max_ccr;
+        ccr_eng = ccr * max_eng;
+        diff    = ccr_eng - eng * temp;
+        if (diff > 0 && (i - index < PITCH_MIN || diff > ccr_eng >> 2)) {
+update:
+            index   = i;
+            max_exp = exp;
+            max_ccr = ccr;
+            max_eng = eng;
+        }
+    }
+    return index;
+}
+
+/**
+ * Compute harmonic noise filter parameters.
+ *
+ * @param buf       perceptually weighted speech
+ * @param pitch_lag open loop pitch period
+ * @param hf        harmonic filter parameters
+ */
+static void comp_harmonic_coeff(int16_t *buf, int16_t pitch_lag, HFParam *hf)
+{
+    int ccr, eng, max_ccr, max_eng;
+    int exp, max, diff;
+    int energy[15];
+    int i, j;
+
+    for (i = 0, j = pitch_lag - 3; j <= pitch_lag + 3; i++, j++) {
+        /* Compute residual energy */
+        energy[i << 1] = ff_dot_product(buf - j, buf - j, SUBFRAME_LEN);
+        /* Compute correlation */
+        energy[(i << 1) + 1] = ff_dot_product(buf, buf - j, SUBFRAME_LEN);
+    }
+
+    /* Compute target energy */
+    energy[14] = ff_dot_product(buf, buf, SUBFRAME_LEN);
+
+    /* Normalize */
+    max = 0;
+    for (i = 0; i < 15; i++)
+        max = FFMAX(max, FFABS(energy[i]));
+
+    exp = normalize_bits_int32(max);
+    for (i = 0; i < 15; i++) {
+        energy[i] = av_clipl_int32((int64_t)(energy[i] << exp) +
+                                   (1 << 15)) >> 16;
+    }
+
+    hf->index = -1;
+    hf->gain  =  0;
+    max_ccr   =  1;
+    max_eng   =  0x7fff;
+
+    for (i = 0; i <= 6; i++) {
+        eng = energy[i << 1];
+        ccr = energy[(i << 1) + 1];
+
+        if (ccr <= 0)
+            continue;
+
+        ccr  = (ccr * ccr + (1 << 14)) >> 15;
+        diff = ccr * max_eng - eng * max_ccr;
+        if (diff > 0) {
+            max_ccr   = ccr;
+            max_eng   = eng;
+            hf->index = i;
+        }
+    }
+
+    if (hf->index == -1) {
+        hf->index = pitch_lag;
+        return;
+    }
+
+    eng = energy[14] * max_eng;
+    eng = (eng >> 2) + (eng >> 3);
+    ccr = energy[(hf->index << 1) + 1] * energy[(hf->index << 1) + 1];
+    if (eng < ccr) {
+        eng = energy[(hf->index << 1) + 1];
+
+        if (eng >= max_eng)
+            hf->gain = 0x2800;
+        else
+            hf->gain = ((eng << 15) / max_eng * 0x2800 + (1 << 14)) >> 15;
+    }
+    hf->index += pitch_lag - 3;
+}
+
+/**
+ * Apply the harmonic noise shaping filter.
+ *
+ * @param hf filter parameters
+ */
+static void harmonic_filter(HFParam *hf, const int16_t *src, int16_t *dest)
+{
+    int i;
+
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t temp = hf->gain * src[i - hf->index] << 1;
+        dest[i] = av_clipl_int32((src[i] << 16) - temp + (1 << 15)) >> 16;
+    }
+}
+
+static void harmonic_noise_sub(HFParam *hf, const int16_t *src, int16_t *dest)
+{
+    int i;
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t temp = hf->gain * src[i - hf->index] << 1;
+        dest[i] = av_clipl_int32(((dest[i] - src[i]) << 16) + temp +
+                                 (1 << 15)) >> 16;
+
+    }
+}
+
+/**
+ * Combined synthesis and formant perceptual weighting filer.
+ *
+ * @param qnt_lpc  quantized lpc coefficients
+ * @param perf_lpc perceptual filter coefficients
+ * @param perf_fir perceptual filter fir memory
+ * @param perf_iir perceptual filter iir memory
+ * @param scale    the filter output will be scaled by 2^scale
+ */
+static void synth_percept_filter(int16_t *qnt_lpc, int16_t *perf_lpc,
+                                 int16_t *perf_fir, int16_t *perf_iir,
+                                 const int16_t *src, int16_t *dest, int scale)
+{
+    int i, j;
+    int16_t buf_16[SUBFRAME_LEN + LPC_ORDER];
+    int64_t buf[SUBFRAME_LEN];
+
+    int16_t *bptr_16 = buf_16 + LPC_ORDER;
+
+    memcpy(buf_16, perf_fir, sizeof(int16_t) * LPC_ORDER);
+    memcpy(dest - LPC_ORDER, perf_iir, sizeof(int16_t) * LPC_ORDER);
+
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t temp = 0;
+        for (j = 1; j <= LPC_ORDER; j++)
+            temp -= qnt_lpc[j - 1] * bptr_16[i - j];
+
+        buf[i]     = (src[i] << 15) + (temp << 3);
+        bptr_16[i] = av_clipl_int32(buf[i] + (1 << 15)) >> 16;
+    }
+
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t fir = 0, iir = 0;
+        for (j = 1; j <= LPC_ORDER; j++) {
+            fir -= perf_lpc[j - 1] * bptr_16[i - j];
+            iir += perf_lpc[j + LPC_ORDER - 1] * dest[i - j];
+        }
+        dest[i] = av_clipl_int32(((buf[i] + (fir << 3)) << scale) + (iir << 3) +
+                                 (1 << 15)) >> 16;
+    }
+    memcpy(perf_fir, buf_16 + SUBFRAME_LEN, sizeof(int16_t) * LPC_ORDER);
+    memcpy(perf_iir, dest + SUBFRAME_LEN - LPC_ORDER,
+           sizeof(int16_t) * LPC_ORDER);
+}
+
+/**
+ * Compute the adaptive codebook contribution.
+ *
+ * @param buf   input signal
+ * @param index the current subframe index
+ */
+static void acb_search(G723_1_Context *p, int16_t *residual,
+                       int16_t *impulse_resp, const int16_t *buf,
+                       int index)
+{
+
+    int16_t flt_buf[PITCH_ORDER][SUBFRAME_LEN];
+
+    const int16_t *cb_tbl = adaptive_cb_gain85;
+
+    int ccr_buf[PITCH_ORDER * SUBFRAMES << 2];
+
+    int pitch_lag = p->pitch_lag[index >> 1];
+    int acb_lag   = 1;
+    int acb_gain  = 0;
+    int odd_frame = index & 1;
+    int iter      = 3 + odd_frame;
+    int count     = 0;
+    int tbl_size  = 85;
+
+    int i, j, k, l, max;
+    int64_t temp;
+
+    if (!odd_frame) {
+        if (pitch_lag == PITCH_MIN)
+            pitch_lag++;
+        else
+            pitch_lag = FFMIN(pitch_lag, PITCH_MAX - 5);
+    }
+
+    for (i = 0; i < iter; i++) {
+        get_residual(residual, p->prev_excitation, pitch_lag + i - 1);
+
+        for (j = 0; j < SUBFRAME_LEN; j++) {
+            temp = 0;
+            for (k = 0; k <= j; k++)
+                temp += residual[PITCH_ORDER - 1 + k] * impulse_resp[j - k];
+            flt_buf[PITCH_ORDER - 1][j] = av_clipl_int32((temp << 1) +
+                                                         (1 << 15)) >> 16;
+        }
+
+        for (j = PITCH_ORDER - 2; j >= 0; j--) {
+            flt_buf[j][0] = ((residual[j] << 13) + (1 << 14)) >> 15;
+            for (k = 1; k < SUBFRAME_LEN; k++) {
+                temp = (flt_buf[j + 1][k - 1] << 15) +
+                       residual[j] * impulse_resp[k];
+                flt_buf[j][k] = av_clipl_int32((temp << 1) + (1 << 15)) >> 16;
+            }
+        }
+
+        /* Compute crosscorrelation with the signal */
+        for (j = 0; j < PITCH_ORDER; j++) {
+            temp = ff_dot_product(buf, flt_buf[j], SUBFRAME_LEN);
+            ccr_buf[count++] = av_clipl_int32(temp << 1);
+        }
+
+        /* Compute energies */
+        for (j = 0; j < PITCH_ORDER; j++) {
+            ccr_buf[count++] = dot_product(flt_buf[j], flt_buf[j],
+                                           SUBFRAME_LEN);
+        }
+
+        for (j = 1; j < PITCH_ORDER; j++) {
+            for (k = 0; k < j; k++) {
+                temp = ff_dot_product(flt_buf[j], flt_buf[k], SUBFRAME_LEN);
+                ccr_buf[count++] = av_clipl_int32(temp<<2);
+            }
+        }
+    }
+
+    /* Normalize and shorten */
+    max = 0;
+    for (i = 0; i < 20 * iter; i++)
+        max = FFMAX(max, FFABS(ccr_buf[i]));
+
+    temp = normalize_bits_int32(max);
+
+    for (i = 0; i < 20 * iter; i++){
+        ccr_buf[i] = av_clipl_int32((int64_t)(ccr_buf[i] << temp) +
+                                    (1 << 15)) >> 16;
+    }
+
+    max = 0;
+    for (i = 0; i < iter; i++) {
+        /* Select quantization table */
+        if (!odd_frame && pitch_lag + i - 1 >= SUBFRAME_LEN - 2 ||
+            odd_frame && pitch_lag >= SUBFRAME_LEN - 2) {
+            cb_tbl = adaptive_cb_gain170;
+            tbl_size = 170;
+        }
+
+        for (j = 0, k = 0; j < tbl_size; j++, k += 20) {
+            temp = 0;
+            for (l = 0; l < 20; l++)
+                temp += ccr_buf[20 * i + l] * cb_tbl[k + l];
+            temp =  av_clipl_int32(temp);
+
+            if (temp > max) {
+                max      = temp;
+                acb_gain = j;
+                acb_lag  = i;
+            }
+        }
+    }
+
+    if (!odd_frame) {
+        pitch_lag += acb_lag - 1;
+        acb_lag   =  1;
+    }
+
+    p->pitch_lag[index >> 1]      = pitch_lag;
+    p->subframe[index].ad_cb_lag  = acb_lag;
+    p->subframe[index].ad_cb_gain = acb_gain;
+}
+
+/**
+ * Subtract the adaptive codebook contribution from the input
+ * to obtain the residual.
+ *
+ * @param buf target vector
+ */
+static void sub_acb_contrib(const int16_t *residual, const int16_t *impulse_resp,
+                            int16_t *buf)
+{
+    int i, j;
+    /* Subtract adaptive CB contribution to obtain the residual */
+    for (i = 0; i < SUBFRAME_LEN; i++) {
+        int64_t temp = buf[i] << 14;
+        for (j = 0; j <= i; j++)
+            temp -= residual[j] * impulse_resp[i - j];
+
+        buf[i] = av_clipl_int32((temp << 2) + (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Quantize the residual signal using the fixed codebook (MP-MLQ).
+ *
+ * @param optim optimized fixed codebook parameters
+ * @param buf   excitation vector
+ */
+static void get_fcb_param(FCBParam *optim, int16_t *impulse_resp,
+                          int16_t *buf, int pulse_cnt, int pitch_lag)
+{
+    FCBParam param;
+    int16_t impulse_r[SUBFRAME_LEN];
+    int16_t temp_corr[SUBFRAME_LEN];
+    int16_t impulse_corr[SUBFRAME_LEN];
+
+    int ccr1[SUBFRAME_LEN];
+    int ccr2[SUBFRAME_LEN];
+    int amp, err, max, max_amp_index, min, scale, i, j, k, l;
+
+    int64_t temp;
+
+    /* Update impulse response */
+    memcpy(impulse_r, impulse_resp, sizeof(int16_t) * SUBFRAME_LEN);
+    param.dirac_train = 0;
+    if (pitch_lag < SUBFRAME_LEN - 2) {
+        param.dirac_train = 1;
+        gen_dirac_train(impulse_r, pitch_lag);
+    }
+
+    for (i = 0; i < SUBFRAME_LEN; i++)
+        temp_corr[i] = impulse_r[i] >> 1;
+
+    /* Compute impulse response autocorrelation */
+    temp = dot_product(temp_corr, temp_corr, SUBFRAME_LEN);
+
+    scale = normalize_bits_int32(temp);
+    impulse_corr[0] = av_clipl_int32((temp << scale) + (1 << 15)) >> 16;
+
+    for (i = 1; i < SUBFRAME_LEN; i++) {
+        temp = dot_product(temp_corr + i, temp_corr, SUBFRAME_LEN - i);
+        impulse_corr[i] = av_clipl_int32((temp << scale) + (1 << 15)) >> 16;
+    }
+
+    /* Compute crosscorrelation of impulse response with residual signal */
+    scale -= 4;
+    for (i = 0; i < SUBFRAME_LEN; i++){
+        temp = dot_product(buf + i, impulse_r, SUBFRAME_LEN - i);
+        if (scale < 0)
+            ccr1[i] = temp >> -scale;
+        else
+            ccr1[i] = av_clipl_int32(temp << scale);
+    }
+
+    /* Search loop */
+    for (i = 0; i < GRID_SIZE; i++) {
+        /* Maximize the crosscorrelation */
+        max = 0;
+        for (j = i; j < SUBFRAME_LEN; j += GRID_SIZE) {
+            temp = FFABS(ccr1[j]);
+            if (temp >= max) {
+                max = temp;
+                param.pulse_pos[0] = j;
+            }
+        }
+
+        /* Quantize the gain (max crosscorrelation/impulse_corr[0]) */
+        amp = max;
+        min = 1 << 30;
+        max_amp_index = GAIN_LEVELS - 2;
+        for (j = max_amp_index; j >= 2; j--) {
+            temp = av_clipl_int32((int64_t)fixed_cb_gain[j] *
+                                  impulse_corr[0] << 1);
+            temp = FFABS(temp - amp);
+            if (temp < min) {
+                min = temp;
+                max_amp_index = j;
+            }
+        }
+
+        max_amp_index--;
+        /* Select additional gain values */
+        for (j = 1; j < 5; j++) {
+            for (k = i; k < SUBFRAME_LEN; k += GRID_SIZE) {
+                temp_corr[k] = 0;
+                ccr2[k]      = ccr1[k];
+            }
+            param.amp_index = max_amp_index + j - 2;
+            amp = fixed_cb_gain[param.amp_index];
+
+            param.pulse_sign[0] = (ccr2[param.pulse_pos[0]] < 0) ? -amp : amp;
+            temp_corr[param.pulse_pos[0]] = 1;
+
+            for (k = 1; k < pulse_cnt; k++) {
+                max = -1 << 30;
+                for (l = i; l < SUBFRAME_LEN; l += GRID_SIZE) {
+                    if (temp_corr[l])
+                        continue;
+                    temp = impulse_corr[FFABS(l - param.pulse_pos[k - 1])];
+                    temp = av_clipl_int32((int64_t)temp *
+                                          param.pulse_sign[k - 1] << 1);
+                    ccr2[l] -= temp;
+                    temp = FFABS(ccr2[l]);
+                    if (temp > max) {
+                        max = temp;
+                        param.pulse_pos[k] = l;
+                    }
+                }
+
+                param.pulse_sign[k] = (ccr2[param.pulse_pos[k]] < 0) ?
+                                      -amp : amp;
+                temp_corr[param.pulse_pos[k]] = 1;
+            }
+
+            /* Create the error vector */
+            memset(temp_corr, 0, sizeof(int16_t) * SUBFRAME_LEN);
+
+            for (k = 0; k < pulse_cnt; k++)
+                temp_corr[param.pulse_pos[k]] = param.pulse_sign[k];
+
+            for (k = SUBFRAME_LEN - 1; k >= 0; k--) {
+                temp = 0;
+                for (l = 0; l <= k; l++) {
+                    int prod = av_clipl_int32((int64_t)temp_corr[l] *
+                                              impulse_r[k - l] << 1);
+                    temp     = av_clipl_int32(temp + prod);
+                }
+                temp_corr[k] = temp << 2 >> 16;
+            }
+
+            /* Compute square of error */
+            err = 0;
+            for (k = 0; k < SUBFRAME_LEN; k++) {
+                int64_t prod;
+                prod = av_clipl_int32((int64_t)buf[k] * temp_corr[k] << 1);
+                err  = av_clipl_int32(err - prod);
+                prod = av_clipl_int32((int64_t)temp_corr[k] * temp_corr[k]);
+                err  = av_clipl_int32(err + prod);
+            }
+
+            /* Minimize */
+            if (err < optim->min_err) {
+                optim->min_err     = err;
+                optim->grid_index  = i;
+                optim->amp_index   = param.amp_index;
+                optim->dirac_train = param.dirac_train;
+
+                for (k = 0; k < pulse_cnt; k++) {
+                    optim->pulse_sign[k] = param.pulse_sign[k];
+                    optim->pulse_pos[k]  = param.pulse_pos[k];
+                }
+            }
+        }
+    }
+}
+
+/**
+ * Encode the pulse position and gain of the current subframe.
+ *
+ * @param optim optimized fixed CB parameters
+ * @param buf   excitation vector
+ */
+static void pack_fcb_param(G723_1_Subframe *subfrm, FCBParam *optim,
+                           int16_t *buf, int pulse_cnt)
+{
+    int i, j;
+
+    j = PULSE_MAX - pulse_cnt;
+
+    subfrm->pulse_sign = 0;
+    subfrm->pulse_pos  = 0;
+
+    for (i = 0; i < SUBFRAME_LEN >> 1; i++) {
+        int val = buf[optim->grid_index + (i << 1)];
+        if (!val) {
+            subfrm->pulse_pos += combinatorial_table[j][i];
+        } else {
+            subfrm->pulse_sign <<= 1;
+            if (val < 0) subfrm->pulse_sign++;
+            j++;
+
+            if (j == PULSE_MAX) break;
+        }
+    }
+    subfrm->amp_index   = optim->amp_index;
+    subfrm->grid_index  = optim->grid_index;
+    subfrm->dirac_train = optim->dirac_train;
+}
+
+/**
+ * Compute the fixed codebook excitation.
+ *
+ * @param buf          target vector
+ * @param impulse_resp impulse response of the combined filter
+ */
+static void fcb_search(G723_1_Context *p, int16_t *impulse_resp,
+                       int16_t *buf, int index)
+{
+    FCBParam optim;
+    int pulse_cnt = pulses[index];
+    int i;
+
+    optim.min_err = 1 << 30;
+    get_fcb_param(&optim, impulse_resp, buf, pulse_cnt, SUBFRAME_LEN);
+
+    if (p->pitch_lag[index >> 1] < SUBFRAME_LEN - 2) {
+        get_fcb_param(&optim, impulse_resp, buf, pulse_cnt,
+                      p->pitch_lag[index >> 1]);
+    }
+
+    /* Reconstruct the excitation */
+    memset(buf, 0, sizeof(int16_t) * SUBFRAME_LEN);
+    for (i = 0; i < pulse_cnt; i++)
+        buf[optim.pulse_pos[i]] = optim.pulse_sign[i];
+
+    pack_fcb_param(&p->subframe[index], &optim, buf, pulse_cnt);
+
+    if (optim.dirac_train)
+        gen_dirac_train(buf, p->pitch_lag[index >> 1]);
+}
+
+/**
+ * Pack the frame parameters into output bitstream.
+ *
+ * @param frame output buffer
+ * @param size  size of the buffer
+ */
+static int pack_bitstream(G723_1_Context *p, unsigned char *frame, int size)
+{
+    PutBitContext pb;
+    int info_bits, i, temp;
+
+    init_put_bits(&pb, frame, size);
+
+    if (p->cur_rate == RATE_6300) {
+        info_bits = 0;
+        put_bits(&pb, 2, info_bits);
+    }else
+        av_assert0(0);
+
+    put_bits(&pb, 8, p->lsp_index[2]);
+    put_bits(&pb, 8, p->lsp_index[1]);
+    put_bits(&pb, 8, p->lsp_index[0]);
+
+    put_bits(&pb, 7, p->pitch_lag[0] - PITCH_MIN);
+    put_bits(&pb, 2, p->subframe[1].ad_cb_lag);
+    put_bits(&pb, 7, p->pitch_lag[1] - PITCH_MIN);
+    put_bits(&pb, 2, p->subframe[3].ad_cb_lag);
+
+    /* Write 12 bit combined gain */
+    for (i = 0; i < SUBFRAMES; i++) {
+        temp = p->subframe[i].ad_cb_gain * GAIN_LEVELS +
+               p->subframe[i].amp_index;
+        if (p->cur_rate ==  RATE_6300)
+            temp += p->subframe[i].dirac_train << 11;
+        put_bits(&pb, 12, temp);
+    }
+
+    put_bits(&pb, 1, p->subframe[0].grid_index);
+    put_bits(&pb, 1, p->subframe[1].grid_index);
+    put_bits(&pb, 1, p->subframe[2].grid_index);
+    put_bits(&pb, 1, p->subframe[3].grid_index);
+
+    if (p->cur_rate == RATE_6300) {
+        skip_put_bits(&pb, 1); /* reserved bit */
+
+        /* Write 13 bit combined position index */
+        temp = (p->subframe[0].pulse_pos >> 16) * 810 +
+               (p->subframe[1].pulse_pos >> 14) *  90 +
+               (p->subframe[2].pulse_pos >> 16) *   9 +
+               (p->subframe[3].pulse_pos >> 14);
+        put_bits(&pb, 13, temp);
+
+        put_bits(&pb, 16, p->subframe[0].pulse_pos & 0xffff);
+        put_bits(&pb, 14, p->subframe[1].pulse_pos & 0x3fff);
+        put_bits(&pb, 16, p->subframe[2].pulse_pos & 0xffff);
+        put_bits(&pb, 14, p->subframe[3].pulse_pos & 0x3fff);
+
+        put_bits(&pb, 6, p->subframe[0].pulse_sign);
+        put_bits(&pb, 5, p->subframe[1].pulse_sign);
+        put_bits(&pb, 6, p->subframe[2].pulse_sign);
+        put_bits(&pb, 5, p->subframe[3].pulse_sign);
+    }
+
+    flush_put_bits(&pb);
+    return frame_size[info_bits];
+}
+
+static int g723_1_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                            const AVFrame *frame, int *got_packet_ptr)
+{
+    G723_1_Context *p = avctx->priv_data;
+    int16_t unq_lpc[LPC_ORDER * SUBFRAMES];
+    int16_t qnt_lpc[LPC_ORDER * SUBFRAMES];
+    int16_t cur_lsp[LPC_ORDER];
+    int16_t weighted_lpc[LPC_ORDER * SUBFRAMES << 1];
+    int16_t vector[FRAME_LEN + PITCH_MAX];
+    int offset, ret;
+    int16_t *in_orig = av_memdup(frame->data[0], frame->nb_samples * sizeof(int16_t));
+    int16_t *in = in_orig;
+
+    HFParam hf[4];
+    int i, j;
+
+    if (!in)
+        return AVERROR(ENOMEM);
+
+    highpass_filter(in, &p->hpf_fir_mem, &p->hpf_iir_mem);
+
+    memcpy(vector, p->prev_data, HALF_FRAME_LEN * sizeof(int16_t));
+    memcpy(vector + HALF_FRAME_LEN, in, FRAME_LEN * sizeof(int16_t));
+
+    comp_lpc_coeff(vector, unq_lpc);
+    lpc2lsp(&unq_lpc[LPC_ORDER * 3], p->prev_lsp, cur_lsp);
+    lsp_quantize(p->lsp_index, cur_lsp, p->prev_lsp);
+
+    /* Update memory */
+    memcpy(vector + LPC_ORDER, p->prev_data + SUBFRAME_LEN,
+           sizeof(int16_t) * SUBFRAME_LEN);
+    memcpy(vector + LPC_ORDER + SUBFRAME_LEN, in,
+           sizeof(int16_t) * (HALF_FRAME_LEN + SUBFRAME_LEN));
+    memcpy(p->prev_data, in + HALF_FRAME_LEN,
+           sizeof(int16_t) * HALF_FRAME_LEN);
+    memcpy(in, vector + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
+
+    perceptual_filter(p, weighted_lpc, unq_lpc, vector);
+
+    memcpy(in, vector + LPC_ORDER, sizeof(int16_t) * FRAME_LEN);
+    memcpy(vector, p->prev_weight_sig, sizeof(int16_t) * PITCH_MAX);
+    memcpy(vector + PITCH_MAX, in, sizeof(int16_t) * FRAME_LEN);
+
+    scale_vector(vector, vector, FRAME_LEN + PITCH_MAX);
+
+    p->pitch_lag[0] = estimate_pitch(vector, PITCH_MAX);
+    p->pitch_lag[1] = estimate_pitch(vector, PITCH_MAX + HALF_FRAME_LEN);
+
+    for (i = PITCH_MAX, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+        comp_harmonic_coeff(vector + i, p->pitch_lag[j >> 1], hf + j);
+
+    memcpy(vector, p->prev_weight_sig, sizeof(int16_t) * PITCH_MAX);
+    memcpy(vector + PITCH_MAX, in, sizeof(int16_t) * FRAME_LEN);
+    memcpy(p->prev_weight_sig, vector + FRAME_LEN, sizeof(int16_t) * PITCH_MAX);
+
+    for (i = 0, j = 0; j < SUBFRAMES; i += SUBFRAME_LEN, j++)
+        harmonic_filter(hf + j, vector + PITCH_MAX + i, in + i);
+
+    inverse_quant(cur_lsp, p->prev_lsp, p->lsp_index, 0);
+    lsp_interpolate(qnt_lpc, cur_lsp, p->prev_lsp);
+
+    memcpy(p->prev_lsp, cur_lsp, sizeof(int16_t) * LPC_ORDER);
+
+    offset = 0;
+    for (i = 0; i < SUBFRAMES; i++) {
+        int16_t impulse_resp[SUBFRAME_LEN];
+        int16_t residual[SUBFRAME_LEN + PITCH_ORDER - 1];
+        int16_t flt_in[SUBFRAME_LEN];
+        int16_t zero[LPC_ORDER], fir[LPC_ORDER], iir[LPC_ORDER];
+
+        /**
+         * Compute the combined impulse response of the synthesis filter,
+         * formant perceptual weighting filter and harmonic noise shaping filter
+         */
+        memset(zero, 0, sizeof(int16_t) * LPC_ORDER);
+        memset(vector, 0, sizeof(int16_t) * PITCH_MAX);
+        memset(flt_in, 0, sizeof(int16_t) * SUBFRAME_LEN);
+
+        flt_in[0] = 1 << 13; /* Unit impulse */
+        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
+                             zero, zero, flt_in, vector + PITCH_MAX, 1);
+        harmonic_filter(hf + i, vector + PITCH_MAX, impulse_resp);
+
+         /* Compute the combined zero input response */
+        flt_in[0] = 0;
+        memcpy(fir, p->perf_fir_mem, sizeof(int16_t) * LPC_ORDER);
+        memcpy(iir, p->perf_iir_mem, sizeof(int16_t) * LPC_ORDER);
+
+        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
+                             fir, iir, flt_in, vector + PITCH_MAX, 0);
+        memcpy(vector, p->harmonic_mem, sizeof(int16_t) * PITCH_MAX);
+        harmonic_noise_sub(hf + i, vector + PITCH_MAX, in);
+
+        acb_search(p, residual, impulse_resp, in, i);
+        gen_acb_excitation(residual, p->prev_excitation,p->pitch_lag[i >> 1],
+                           &p->subframe[i], p->cur_rate);
+        sub_acb_contrib(residual, impulse_resp, in);
+
+        fcb_search(p, impulse_resp, in, i);
+
+        /* Reconstruct the excitation */
+        gen_acb_excitation(impulse_resp, p->prev_excitation, p->pitch_lag[i >> 1],
+                           &p->subframe[i], RATE_6300);
+
+        memmove(p->prev_excitation, p->prev_excitation + SUBFRAME_LEN,
+               sizeof(int16_t) * (PITCH_MAX - SUBFRAME_LEN));
+        for (j = 0; j < SUBFRAME_LEN; j++)
+            in[j] = av_clip_int16((in[j] << 1) + impulse_resp[j]);
+        memcpy(p->prev_excitation + PITCH_MAX - SUBFRAME_LEN, in,
+               sizeof(int16_t) * SUBFRAME_LEN);
+
+        /* Update filter memories */
+        synth_percept_filter(qnt_lpc + offset, weighted_lpc + (offset << 1),
+                             p->perf_fir_mem, p->perf_iir_mem,
+                             in, vector + PITCH_MAX, 0);
+        memmove(p->harmonic_mem, p->harmonic_mem + SUBFRAME_LEN,
+                sizeof(int16_t) * (PITCH_MAX - SUBFRAME_LEN));
+        memcpy(p->harmonic_mem + PITCH_MAX - SUBFRAME_LEN, vector + PITCH_MAX,
+               sizeof(int16_t) * SUBFRAME_LEN);
+
+        in += SUBFRAME_LEN;
+        offset += LPC_ORDER;
+    }
+
+    av_freep(&in_orig); in = NULL;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 24)) < 0)
+        return ret;
+
+    *got_packet_ptr = 1;
+    avpkt->size = pack_bitstream(p, avpkt->data, avpkt->size);
+    return 0;
+}
+
+AVCodec ff_g723_1_encoder = {
+    .name           = "g723_1",
+    .long_name      = NULL_IF_CONFIG_SMALL("G.723.1"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_G723_1,
+    .priv_data_size = sizeof(G723_1_Context),
+    .init           = g723_1_encode_init,
+    .encode2        = g723_1_encode_frame,
+    .sample_fmts    = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,
+                                                    AV_SAMPLE_FMT_NONE},
+};
+#endif
diff --git a/libavcodec/g723_1_data.h b/libavcodec/g723_1_data.h
index 04f8a06..38a6c59 100644
--- a/libavcodec/g723_1_data.h
+++ b/libavcodec/g723_1_data.h
@@ -1,28 +1,28 @@
 /*
- * G.723.1 compatible decoder data tables.
+ * G723.1 compatible decoder data tables.
  * Copyright (c) 2006 Benjamin Larsson
  * Copyright (c) 2010 Mohamed Naufal Basheer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
- * G.723.1 compatible decoder data tables
+ * G723.1 compatible decoder data tables
  */
 
 #ifndef AVCODEC_G723_1_DATA_H
@@ -33,6 +33,8 @@
 #define SUBFRAMES       4
 #define SUBFRAME_LEN    60
 #define FRAME_LEN       (SUBFRAME_LEN << 2)
+#define HALF_FRAME_LEN  (FRAME_LEN / 2)
+#define LPC_FRAME       (HALF_FRAME_LEN + SUBFRAME_LEN)
 #define LPC_ORDER       10
 #define LSP_BANDS       3
 #define LSP_CB_SIZE     256
@@ -44,19 +46,89 @@
 #define GAIN_LEVELS     24
 #define COS_TBL_SIZE    512
 
+/**
+ * G723.1 frame types
+ */
+typedef enum FrameType {
+    ACTIVE_FRAME,        ///< Active speech
+    SID_FRAME,           ///< Silence Insertion Descriptor frame
+    UNTRANSMITTED_FRAME
+} FrameType;
+
 static const uint8_t frame_size[4] = { 24, 20, 4, 1 };
 
-/* Postfilter gain weighting factors scaled by 2^15 */
-static const int16_t ppf_gain_weight[2] = { 0x1800, 0x2000 };
+typedef enum Rate {
+    RATE_6300,
+    RATE_5300
+} Rate;
+
+/**
+ * G723.1 unpacked data subframe
+ */
+typedef struct G723_1_Subframe {
+    int ad_cb_lag;     ///< adaptive codebook lag
+    int ad_cb_gain;
+    int dirac_train;
+    int pulse_sign;
+    int grid_index;
+    int amp_index;
+    int pulse_pos;
+} G723_1_Subframe;
+
+/**
+ * Pitch postfilter parameters
+ */
+typedef struct {
+    int     index;    ///< postfilter backward/forward lag
+    int16_t opt_gain; ///< optimal gain
+    int16_t sc_gain;  ///< scaling gain
+} PPFParam;
+
+/**
+ * Harmonic filter parameters
+ */
+typedef struct {
+    int index;
+    int gain;
+} HFParam;
+
+/**
+ * Optimized fixed codebook excitation parameters
+ */
+typedef struct {
+    int min_err;
+    int amp_index;
+    int grid_index;
+    int dirac_train;
+    int pulse_pos[PULSE_MAX];
+    int pulse_sign[PULSE_MAX];
+} FCBParam;
+
+/**
+ * Postfilter gain weighting factors scaled by 2^15
+ */
+static const int16_t ppf_gain_weight[2] = {0x1800, 0x2000};
 
-/* LSP DC component */
+/**
+ * LSP DC component
+ */
 static const int16_t dc_lsp[LPC_ORDER] = {
-    0x0c3b, 0x1271, 0x1e0a, 0x2a36, 0x3630,
-    0x406f, 0x4d28, 0x56f4, 0x638c, 0x6c46
+    0x0c3b,
+    0x1271,
+    0x1e0a,
+    0x2a36,
+    0x3630,
+    0x406f,
+    0x4d28,
+    0x56f4,
+    0x638c,
+    0x6c46
 };
 
-/* Cosine table scaled by 2^14 */
-static const int16_t cos_tab[COS_TBL_SIZE] = {
+/**
+ * Cosine table scaled by 2^14
+ */
+static const int16_t cos_tab[COS_TBL_SIZE+1] = {
     16384,  16383,  16379,  16373,  16364,  16353,  16340,  16324,
     16305,  16284,  16261,  16235,  16207,  16176,  16143,  16107,
     16069,  16029,  15986,  15941,  15893,  15843,  15791,  15736,
@@ -121,9 +193,12 @@ static const int16_t cos_tab[COS_TBL_SIZE] = {
     15679,  15736,  15791,  15843,  15893,  15941,  15986,  16029,
     16069,  16107,  16143,  16176,  16207,  16235,  16261,  16284,
     16305,  16324,  16340,  16353,  16364,  16373,  16379,  16383,
+    16384
 };
 
-/* LSP VQ tables */
+/**
+ *  LSP VQ tables
+ */
 static const int16_t lsp_band0[LSP_CB_SIZE][3] = {
     {    0,      0,      0}, { -270,  -1372,  -1032}, { -541,  -1650,  -1382},
     { -723,  -2011,  -2213}, { -941,  -1122,  -1942}, { -780,  -1145,  -2454},
@@ -433,12 +508,12 @@ static const int16_t lsp_band2[LSP_CB_SIZE][4] = {
     { 3633,   2336,   2408,   1453}, { 2923,   3517,   2567,   1318},
 };
 
-/*
+/**
  * Used for the coding/decoding of the pulses positions
  * for the MP-MLQ codebook
  */
 static const int32_t combinatorial_table[PULSE_MAX][SUBFRAME_LEN/GRID_SIZE] = {
-    {118755, 98280, 80730, 65780L, 53130,
+    {118755, 98280, 80730,  65780, 53130,
       42504, 33649, 26334,  20349, 15504,
       11628,  8568,  6188,   4368,  3003,
        2002,  1287,   792,    462,   252,
@@ -527,10 +602,14 @@ static const int16_t pitch_contrib[340] = {
     -2, 25144,  0, 17998
 };
 
-/* Number of non-zero pulses in the MP-MLQ excitation */
+/**
+ * Number of non-zero pulses in the MP-MLQ excitation
+ */
 static const int8_t pulses[4] = {6, 5, 6, 5};
 
-/* Size of the MP-MLQ fixed excitation codebooks */
+/**
+ * Size of the MP-MLQ fixed excitation codebooks
+ */
 static const int32_t max_pos[4] = {593775, 142506, 593775, 142506};
 
 static const int16_t fixed_cb_gain[GAIN_LEVELS] = {
@@ -1183,12 +1262,62 @@ static const int16_t adaptive_cb_gain170[170 * 20] = {
     -4534,  -2487,  -3932,  -4166,  -2113,  -3341,  -3540,  -3070
 };
 
-/* 0.65^i (Zero part) and 0.75^i (Pole part) scaled by 2^15 */
+/**
+ * 0.65^i (Zero part) and 0.75^i (Pole part) scaled by 2^15
+ */
 static const int16_t postfilter_tbl[2][LPC_ORDER] = {
     /* Zero */
-    { 21299, 13844,  8999,  5849, 3802, 2471, 1606, 1044,  679,  441 },
+    {21299, 13844,  8999,  5849, 3802, 2471, 1606, 1044,  679,  441},
     /* Pole */
-    { 24576, 18432, 13824, 10368, 7776, 5832, 4374, 3281, 2460, 1845 }
+    {24576, 18432, 13824, 10368, 7776, 5832, 4374, 3281, 2460, 1845}
+};
+
+/**
+ * Hamming window coefficients scaled by 2^15
+ */
+static const int16_t hamming_window[LPC_FRAME] = {
+     2621,  2631,  2659,  2705,  2770,  2853,  2955,  3074,  3212,  3367,
+     3541,  3731,  3939,  4164,  4405,  4663,  4937,  5226,  5531,  5851,
+     6186,  6534,  6897,  7273,  7661,  8062,  8475,  8899,  9334,  9780,
+    10235, 10699, 11172, 11653, 12141, 12636, 13138, 13645, 14157, 14673,
+    15193, 15716, 16242, 16769, 17298, 17827, 18356, 18884, 19411, 19935,
+    20457, 20975, 21489, 21999, 22503, 23002, 23494, 23978, 24455, 24924,
+    25384, 25834, 26274, 26704, 27122, 27529, 27924, 28306, 28675, 29031,
+    29373, 29700, 30012, 30310, 30592, 30857, 31107, 31340, 31557, 31756,
+    31938, 32102, 32249, 32377, 32488, 32580, 32654, 32710, 32747, 32766,
+    32766, 32747, 32710, 32654, 32580, 32488, 32377, 32249, 32102, 31938,
+    31756, 31557, 31340, 31107, 30857, 30592, 30310, 30012, 29700, 29373,
+    29031, 28675, 28306, 27924, 27529, 27122, 26704, 26274, 25834, 25384,
+    24924, 24455, 23978, 23494, 23002, 22503, 21999, 21489, 20975, 20457,
+    19935, 19411, 18884, 18356, 17827, 17298, 16769, 16242, 15716, 15193,
+    14673, 14157, 13645, 13138, 12636, 12141, 11653, 11172, 10699, 10235,
+     9780, 9334,   8899,  8475,  8062,  7661,  7273,  6897,  6534,  6186,
+     5851, 5531,   5226,  4937,  4663,  4405,  4164,  3939,  3731,  3541,
+     3367, 3212,   3074,  2955,  2853,  2770,  2705,  2659,  2631,  2621
+};
+
+/**
+ * Binomial window coefficients scaled by 2^15
+ */
+static const int16_t binomial_window[LPC_ORDER] = {
+    32749, 32695, 32604, 32477, 32315, 32118, 31887, 31622, 31324, 30995
+};
+
+/**
+ * 0.994^i scaled by 2^15
+ */
+static const int16_t bandwidth_expand[LPC_ORDER] = {
+    32571, 32376, 32182, 31989, 31797, 31606, 31416, 31228, 31040, 30854
+};
+
+/**
+ * 0.5^i scaled by 2^15
+ */
+static const int16_t percept_flt_tbl[2][LPC_ORDER] = {
+    /* Zero part */
+    {29491, 26542, 23888, 21499, 19349, 17414, 15673, 14106, 12695, 11425},
+    /* Pole part */
+    {16384,  8192,  4096,  2048,  1024,   512,   256,   128,    64,    32}
 };
 
 static const int cng_adaptive_cb_lag[4] = { 1, 0, 1, 3 };
diff --git a/libavcodec/g726.c b/libavcodec/g726.c
index 62aeb79..b0331d8 100644
--- a/libavcodec/g726.c
+++ b/libavcodec/g726.c
@@ -5,20 +5,20 @@
  * This is a very straightforward rendition of the G.726
  * Section 4 "Computational Details".
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include <limits.h>
@@ -96,6 +96,7 @@ typedef struct G726Context {
     int sez;            /**< estimated second order prediction */
     int y;              /**< quantizer scaling factor for the next iteration */
     int code_size;
+    int little_endian;  /**< little-endian bitstream as used in aiff and Sun AU */
 } G726Context;
 
 static const int quant_tbl16[] =                  /**< 16kbit/s 2bits per sample */
@@ -347,10 +348,8 @@ static int g726_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int i, ret, out_size;
 
     out_size = (frame->nb_samples * c->code_size + 7) / 8;
-    if ((ret = ff_alloc_packet(avpkt, out_size))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)) < 0)
         return ret;
-    }
     init_put_bits(&pb, avpkt->data, avpkt->size);
 
     for (i = 0; i < frame->nb_samples; i++)
@@ -370,7 +369,7 @@ static const AVOption options[] = {
     { NULL },
 };
 
-static const AVClass class = {
+static const AVClass g726_class = {
     .class_name = "g726",
     .item_name  = av_default_item_name,
     .option     = options,
@@ -393,19 +392,25 @@ AVCodec ff_adpcm_g726_encoder = {
     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
-    .priv_class     = &class,
+    .priv_class     = &g726_class,
     .defaults       = defaults,
 };
 #endif
 
-#if CONFIG_ADPCM_G726_DECODER
+#if CONFIG_ADPCM_G726_DECODER || CONFIG_ADPCM_G726LE_DECODER
 static av_cold int g726_decode_init(AVCodecContext *avctx)
 {
     G726Context* c = avctx->priv_data;
 
+    if(avctx->channels > 1){
+        avpriv_request_sample(avctx, "Decoding more than one channel");
+        return AVERROR_PATCHWELCOME;
+    }
     avctx->channels       = 1;
     avctx->channel_layout = AV_CH_LAYOUT_MONO;
 
+    c->little_endian = !strcmp(avctx->codec->name, "g726le");
+
     c->code_size = avctx->bits_per_coded_sample;
     if (c->code_size < 2 || c->code_size > 5) {
         av_log(avctx, AV_LOG_ERROR, "Invalid number of bits %d\n", c->code_size);
@@ -433,16 +438,16 @@ static int g726_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = out_samples;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (int16_t *)frame->data[0];
 
     init_get_bits(&gb, buf, buf_size * 8);
 
     while (out_samples--)
-        *samples++ = g726_decode(c, get_bits(&gb, c->code_size));
+        *samples++ = g726_decode(c, c->little_endian ?
+                                    get_bits_le(&gb, c->code_size) :
+                                    get_bits(&gb, c->code_size));
 
     if (get_bits_left(&gb) > 0)
         av_log(avctx, AV_LOG_ERROR, "Frame invalidly split, missing parser?\n");
@@ -457,7 +462,9 @@ static void g726_decode_flush(AVCodecContext *avctx)
     G726Context *c = avctx->priv_data;
     g726_reset(c);
 }
+#endif
 
+#if CONFIG_ADPCM_G726_DECODER
 AVCodec ff_adpcm_g726_decoder = {
     .name           = "g726",
     .long_name      = NULL_IF_CONFIG_SMALL("G.726 ADPCM"),
@@ -470,3 +477,17 @@ AVCodec ff_adpcm_g726_decoder = {
     .capabilities   = CODEC_CAP_DR1,
 };
 #endif
+
+#if CONFIG_ADPCM_G726LE_DECODER
+AVCodec ff_adpcm_g726le_decoder = {
+    .name           = "g726le",
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_ADPCM_G726LE,
+    .priv_data_size = sizeof(G726Context),
+    .init           = g726_decode_init,
+    .decode         = g726_decode_frame,
+    .flush          = g726_decode_flush,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("G.726 ADPCM little-endian"),
+};
+#endif
diff --git a/libavcodec/x86/audiodsp.h b/libavcodec/g729.h
index 321056b..6168313 100644
--- a/libavcodec/x86/audiodsp.h
+++ b/libavcodec/g729.h
@@ -1,25 +1,29 @@
 /*
- * This file is part of Libav.
+ * G.729, G729 Annex D decoders
+ * Copyright (c) 2008 Vladimir Voroshilov
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+#ifndef AVCODEC_G729_H
+#define AVCODEC_G729_H
 
-#ifndef AVCODEC_X86_AUDIODSP_H
-#define AVCODEC_X86_AUDIODSP_H
-
-void ff_vector_clipf_sse(float *dst, const float *src,
-                         float min, float max, int len);
+/**
+ * subframe size
+ */
+#define SUBFRAME_SIZE 40
 
-#endif /* AVCODEC_X86_AUDIODSP_H */
+#endif // AVCODEC_G729_H
diff --git a/libavcodec/g729data.h b/libavcodec/g729data.h
new file mode 100644
index 0000000..365ca47
--- /dev/null
+++ b/libavcodec/g729data.h
@@ -0,0 +1,382 @@
+/*
+ * data for G.729, G729 Annex D decoders
+ * Copyright (c) 2007 Vladimir Voroshilov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_G729DATA_H
+#define AVCODEC_G729DATA_H
+
+#include <stdint.h>
+
+#define MA_NP                4  ///< Moving Average (MA) prediction order
+
+#define VQ_1ST_BITS          7  ///< first stage vector of quantizer (size in bits)
+#define VQ_2ND_BITS          5  ///< second stage vector of quantizer (size in bits)
+
+#define GC_1ST_IDX_BITS_8K   3  ///< gain codebook (first stage) index, 8k mode (size in bits)
+#define GC_2ND_IDX_BITS_8K   4  ///< gain codebook (second stage) index, 8k mode (size in bits)
+
+#define GC_1ST_IDX_BITS_6K4  3  ///< gain codebook (first stage) index, 6.4k mode (size in bits)
+#define GC_2ND_IDX_BITS_6K4  3  ///< gain codebook (second stage) index, 6.4k mode (size in bits)
+
+/**
+ * first stage LSP codebook
+ * (10-dimensional, with 128 entries (3.24 of G.729)
+ */
+static const int16_t cb_lsp_1st[1<<VQ_1ST_BITS][10] = { /* (2.13) */
+  { 1486,  2168,  3751,  9074, 12134, 13944, 17983, 19173, 21190, 21820},
+  { 1730,  2640,  3450,  4870,  6126,  7876, 15644, 17817, 20294, 21902},
+  { 1568,  2256,  3088,  4874, 11063, 13393, 18307, 19293, 21109, 21741},
+  { 1733,  2512,  3357,  4708,  6977, 10296, 17024, 17956, 19145, 20350},
+  { 1744,  2436,  3308,  8731, 10432, 12007, 15614, 16639, 21359, 21913},
+  { 1786,  2369,  3372,  4521,  6795, 12963, 17674, 18988, 20855, 21640},
+  { 1631,  2433,  3361,  6328, 10709, 12013, 13277, 13904, 19441, 21088},
+  { 1489,  2364,  3291,  6250,  9227, 10403, 13843, 15278, 17721, 21451},
+  { 1869,  2533,  3475,  4365,  9152, 14513, 15908, 17022, 20611, 21411},
+  { 2070,  3025,  4333,  5854,  7805,  9231, 10597, 16047, 20109, 21834},
+  { 1910,  2673,  3419,  4261, 11168, 15111, 16577, 17591, 19310, 20265},
+  { 1141,  1815,  2624,  4623,  6495,  9588, 13968, 16428, 19351, 21286},
+  { 2192,  3171,  4707,  5808, 10904, 12500, 14162, 15664, 21124, 21789},
+  { 1286,  1907,  2548,  3453,  9574, 11964, 15978, 17344, 19691, 22495},
+  { 1921,  2720,  4604,  6684, 11503, 12992, 14350, 15262, 16997, 20791},
+  { 2052,  2759,  3897,  5246,  6638, 10267, 15834, 16814, 18149, 21675},
+  { 1798,  2497,  5617, 11449, 13189, 14711, 17050, 18195, 20307, 21182},
+  { 1009,  1647,  2889,  5709,  9541, 12354, 15231, 18494, 20966, 22033},
+  { 3016,  3794,  5406,  7469, 12488, 13984, 15328, 16334, 19952, 20791},
+  { 2203,  3040,  3796,  5442, 11987, 13512, 14931, 16370, 17856, 18803},
+  { 2912,  4292,  7988,  9572, 11562, 13244, 14556, 16529, 20004, 21073},
+  { 2861,  3607,  5923,  7034,  9234, 12054, 13729, 18056, 20262, 20974},
+  { 3069,  4311,  5967,  7367, 11482, 12699, 14309, 16233, 18333, 19172},
+  { 2434,  3661,  4866,  5798, 10383, 11722, 13049, 15668, 18862, 19831},
+  { 2020,  2605,  3860,  9241, 13275, 14644, 16010, 17099, 19268, 20251},
+  { 1877,  2809,  3590,  4707, 11056, 12441, 15622, 17168, 18761, 19907},
+  { 2107,  2873,  3673,  5799, 13579, 14687, 15938, 17077, 18890, 19831},
+  { 1612,  2284,  2944,  3572,  8219, 13959, 15924, 17239, 18592, 20117},
+  { 2420,  3156,  6542, 10215, 12061, 13534, 15305, 16452, 18717, 19880},
+  { 1667,  2612,  3534,  5237, 10513, 11696, 12940, 16798, 18058, 19378},
+  { 2388,  3017,  4839,  9333, 11413, 12730, 15024, 16248, 17449, 18677},
+  { 1875,  2786,  4231,  6320,  8694, 10149, 11785, 17013, 18608, 19960},
+  {  679,  1411,  4654,  8006, 11446, 13249, 15763, 18127, 20361, 21567},
+  { 1838,  2596,  3578,  4608,  5650, 11274, 14355, 15886, 20579, 21754},
+  { 1303,  1955,  2395,  3322, 12023, 13764, 15883, 18077, 20180, 21232},
+  { 1438,  2102,  2663,  3462,  8328, 10362, 13763, 17248, 19732, 22344},
+  {  860,  1904,  6098,  7775,  9815, 12007, 14821, 16709, 19787, 21132},
+  { 1673,  2723,  3704,  6125,  7668,  9447, 13683, 14443, 20538, 21731},
+  { 1246,  1849,  2902,  4508,  7221, 12710, 14835, 16314, 19335, 22720},
+  { 1525,  2260,  3862,  5659,  7342, 11748, 13370, 14442, 18044, 21334},
+  { 1196,  1846,  3104,  7063, 10972, 12905, 14814, 17037, 19922, 22636},
+  { 2147,  3106,  4475,  6511,  8227,  9765, 10984, 12161, 18971, 21300},
+  { 1585,  2405,  2994,  4036, 11481, 13177, 14519, 15431, 19967, 21275},
+  { 1778,  2688,  3614,  4680,  9465, 11064, 12473, 16320, 19742, 20800},
+  { 1862,  2586,  3492,  6719, 11708, 13012, 14364, 16128, 19610, 20425},
+  { 1395,  2156,  2669,  3386, 10607, 12125, 13614, 16705, 18976, 21367},
+  { 1444,  2117,  3286,  6233,  9423, 12981, 14998, 15853, 17188, 21857},
+  { 2004,  2895,  3783,  4897,  6168,  7297, 12609, 16445, 19297, 21465},
+  { 1495,  2863,  6360,  8100, 11399, 14271, 15902, 17711, 20479, 22061},
+  { 2484,  3114,  5718,  7097,  8400, 12616, 14073, 14847, 20535, 21396},
+  { 2424,  3277,  5296,  6284, 11290, 12903, 16022, 17508, 19333, 20283},
+  { 2565,  3778,  5360,  6989,  8782, 10428, 14390, 15742, 17770, 21734},
+  { 2727,  3384,  6613,  9254, 10542, 12236, 14651, 15687, 20074, 21102},
+  { 1916,  2953,  6274,  8088,  9710, 10925, 12392, 16434, 20010, 21183},
+  { 3384,  4366,  5349,  7667, 11180, 12605, 13921, 15324, 19901, 20754},
+  { 3075,  4283,  5951,  7619,  9604, 11010, 12384, 14006, 20658, 21497},
+  { 1751,  2455,  5147,  9966, 11621, 13176, 14739, 16470, 20788, 21756},
+  { 1442,  2188,  3330,  6813,  8929, 12135, 14476, 15306, 19635, 20544},
+  { 2294,  2895,  4070,  8035, 12233, 13416, 14762, 17367, 18952, 19688},
+  { 1937,  2659,  4602,  6697,  9071, 12863, 14197, 15230, 16047, 18877},
+  { 2071,  2663,  4216,  9445, 10887, 12292, 13949, 14909, 19236, 20341},
+  { 1740,  2491,  3488,  8138,  9656, 11153, 13206, 14688, 20896, 21907},
+  { 2199,  2881,  4675,  8527, 10051, 11408, 14435, 15463, 17190, 20597},
+  { 1943,  2988,  4177,  6039,  7478,  8536, 14181, 15551, 17622, 21579},
+  { 1825,  3175,  7062,  9818, 12824, 15450, 18330, 19856, 21830, 22412},
+  { 2464,  3046,  4822,  5977,  7696, 15398, 16730, 17646, 20588, 21320},
+  { 2550,  3393,  5305,  6920, 10235, 14083, 18143, 19195, 20681, 21336},
+  { 3003,  3799,  5321,  6437,  7919, 11643, 15810, 16846, 18119, 18980},
+  { 3455,  4157,  6838,  8199,  9877, 12314, 15905, 16826, 19949, 20892},
+  { 3052,  3769,  4891,  5810,  6977, 10126, 14788, 15990, 19773, 20904},
+  { 3671,  4356,  5827,  6997,  8460, 12084, 14154, 14939, 19247, 20423},
+  { 2716,  3684,  5246,  6686,  8463, 10001, 12394, 14131, 16150, 19776},
+  { 1945,  2638,  4130,  7995, 14338, 15576, 17057, 18206, 20225, 20997},
+  { 2304,  2928,  4122,  4824,  5640, 13139, 15825, 16938, 20108, 21054},
+  { 1800,  2516,  3350,  5219, 13406, 15948, 17618, 18540, 20531, 21252},
+  { 1436,  2224,  2753,  4546,  9657, 11245, 15177, 16317, 17489, 19135},
+  { 2319,  2899,  4980,  6936,  8404, 13489, 15554, 16281, 20270, 20911},
+  { 2187,  2919,  4610,  5875,  7390, 12556, 14033, 16794, 20998, 21769},
+  { 2235,  2923,  5121,  6259,  8099, 13589, 15340, 16340, 17927, 20159},
+  { 1765,  2638,  3751,  5730,  7883, 10108, 13633, 15419, 16808, 18574},
+  { 3460,  5741,  9596, 11742, 14413, 16080, 18173, 19090, 20845, 21601},
+  { 3735,  4426,  6199,  7363,  9250, 14489, 16035, 17026, 19873, 20876},
+  { 3521,  4778,  6887,  8680, 12717, 14322, 15950, 18050, 20166, 21145},
+  { 2141,  2968,  6865,  8051, 10010, 13159, 14813, 15861, 17528, 18655},
+  { 4148,  6128,  9028, 10871, 12686, 14005, 15976, 17208, 19587, 20595},
+  { 4403,  5367,  6634,  8371, 10163, 11599, 14963, 16331, 17982, 18768},
+  { 4091,  5386,  6852,  8770, 11563, 13290, 15728, 16930, 19056, 20102},
+  { 2746,  3625,  5299,  7504, 10262, 11432, 13172, 15490, 16875, 17514},
+  { 2248,  3556,  8539, 10590, 12665, 14696, 16515, 17824, 20268, 21247},
+  { 1279,  1960,  3920,  7793, 10153, 14753, 16646, 18139, 20679, 21466},
+  { 2440,  3475,  6737,  8654, 12190, 14588, 17119, 17925, 19110, 19979},
+  { 1879,  2514,  4497,  7572, 10017, 14948, 16141, 16897, 18397, 19376},
+  { 2804,  3688,  7490, 10086, 11218, 12711, 16307, 17470, 20077, 21126},
+  { 2023,  2682,  3873,  8268, 10255, 11645, 15187, 17102, 18965, 19788},
+  { 2823,  3605,  5815,  8595, 10085, 11469, 16568, 17462, 18754, 19876},
+  { 2851,  3681,  5280,  7648,  9173, 10338, 14961, 16148, 17559, 18474},
+  { 1348,  2645,  5826,  8785, 10620, 12831, 16255, 18319, 21133, 22586},
+  { 2141,  3036,  4293,  6082,  7593, 10629, 17158, 18033, 21466, 22084},
+  { 1608,  2375,  3384,  6878,  9970, 11227, 16928, 17650, 20185, 21120},
+  { 2774,  3616,  5014,  6557,  7788,  8959, 17068, 18302, 19537, 20542},
+  { 1934,  4813,  6204,  7212,  8979, 11665, 15989, 17811, 20426, 21703},
+  { 2288,  3507,  5037,  6841,  8278,  9638, 15066, 16481, 21653, 22214},
+  { 2951,  3771,  4878,  7578,  9016, 10298, 14490, 15242, 20223, 20990},
+  { 3256,  4791,  6601,  7521,  8644,  9707, 13398, 16078, 19102, 20249},
+  { 1827,  2614,  3486,  6039, 12149, 13823, 16191, 17282, 21423, 22041},
+  { 1000,  1704,  3002,  6335,  8471, 10500, 14878, 16979, 20026, 22427},
+  { 1646,  2286,  3109,  7245, 11493, 12791, 16824, 17667, 18981, 20222},
+  { 1708,  2501,  3315,  6737,  8729,  9924, 16089, 17097, 18374, 19917},
+  { 2623,  3510,  4478,  5645,  9862, 11115, 15219, 18067, 19583, 20382},
+  { 2518,  3434,  4728,  6388,  8082,  9285, 13162, 18383, 19819, 20552},
+  { 1726,  2383,  4090,  6303,  7805, 12845, 14612, 17608, 19269, 20181},
+  { 2860,  3735,  4838,  6044,  7254,  8402, 14031, 16381, 18037, 19410},
+  { 4247,  5993,  7952,  9792, 12342, 14653, 17527, 18774, 20831, 21699},
+  { 3502,  4051,  5680,  6805,  8146, 11945, 16649, 17444, 20390, 21564},
+  { 3151,  4893,  5899,  7198, 11418, 13073, 15124, 17673, 20520, 21861},
+  { 3960,  4848,  5926,  7259,  8811, 10529, 15661, 16560, 18196, 20183},
+  { 4499,  6604,  8036,  9251, 10804, 12627, 15880, 17512, 20020, 21046},
+  { 4251,  5541,  6654,  8318,  9900, 11686, 15100, 17093, 20572, 21687},
+  { 3769,  5327,  7865,  9360, 10684, 11818, 13660, 15366, 18733, 19882},
+  { 3083,  3969,  6248,  8121,  9798, 10994, 12393, 13686, 17888, 19105},
+  { 2731,  4670,  7063,  9201, 11346, 13735, 16875, 18797, 20787, 22360},
+  { 1187,  2227,  4737,  7214,  9622, 12633, 15404, 17968, 20262, 23533},
+  { 1911,  2477,  3915, 10098, 11616, 12955, 16223, 17138, 19270, 20729},
+  { 1764,  2519,  3887,  6944,  9150, 12590, 16258, 16984, 17924, 18435},
+  { 1400,  3674,  7131,  8718, 10688, 12508, 15708, 17711, 19720, 21068},
+  { 2322,  3073,  4287,  8108,  9407, 10628, 15862, 16693, 19714, 21474},
+  { 2630,  3339,  4758,  8360, 10274, 11333, 12880, 17374, 19221, 19936},
+  { 1721,  2577,  5553,  7195,  8651, 10686, 15069, 16953, 18703, 19929}
+};
+
+/**
+ * second stage LSP codebook, high and low parts
+   (both 5-dimensional, with 32 entries (3.2.4 of G.729)
+ */
+static const int16_t cb_lsp_2nd[1<<VQ_2ND_BITS][10] = { /* (2.13) */
+  { -435,  -815,  -742,  1033,  -518,   582, -1201,   829,    86,   385},
+  { -833,  -891,   463,    -8, -1251,  1450,    72,  -231,   864,   661},
+  {-1021,   231,  -306,   321,  -220,  -163,  -526,  -754, -1633,   267},
+  {   57,  -198,  -339,   -33, -1468,   573,   796,  -169,  -631,   816},
+  {  171,  -350,   294,  1660,   453,   519,   291,   159,  -640, -1296},
+  { -701,  -842,   -58,   950,   892,  1549,   715,   527,  -714,  -193},
+  {  584,    31,  -289,   356,  -333,  -457,   612,  -283, -1381,  -741},
+  { -109,  -808,   231,    77,   -87,  -344,  1341,  1087,  -654,  -569},
+  { -859,  1236,   550,   854,   714,  -543, -1752,  -195,   -98,  -276},
+  { -877,  -954, -1248,  -299,   212,  -235,  -728,   949,  1517,   895},
+  {  -77,   344,  -620,   763,   413,   502,  -362,  -960,  -483,  1386},
+  { -314,  -307,  -256, -1260,  -429,   450,  -466,  -108,  1010,  2223},
+  {  711,   693,   521,   650,  1305,   -28,  -378,   744, -1005,   240},
+  { -112,  -271,  -500,   946,  1733,   271,   -15,   909,  -259,  1688},
+  {  575,   -10,  -468,  -199,  1101, -1011,   581,   -53,  -747,   878},
+  {  145,  -285, -1280,  -398,    36,  -498, -1377,    18,  -444,  1483},
+  {-1133,  -835,  1350,  1284,   -95,  1015,  -222,   443,   372,  -354},
+  {-1459, -1237,   416,  -213,   466,   669,   659,  1640,   932,   534},
+  {  -15,    66,   468,  1019,  -748,  1385,  -182,  -907,  -721,  -262},
+  { -338,   148,  1445,    75,  -760,   569,  1247,   337,   416,  -121},
+  {  389,   239,  1568,   981,   113,   369, -1003,  -507,  -587,  -904},
+  { -312,   -98,   949,    31,  1104,    72,  -141,  1465,    63,  -785},
+  { 1127,   584,   835,   277, -1159,   208,   301,  -882,   117,  -404},
+  {  539,  -114,   856,  -493,   223,  -912,   623,   -76,   276,  -440},
+  { 2197,  2337,  1268,   670,   304,  -267,  -525,   140,   882,  -139},
+  {-1596,   550,   801,  -456,   -56,  -697,   865,  1060,   413,   446},
+  { 1154,   593,   -77,  1237,   -31,   581, -1037,  -895,   669,   297},
+  {  397,   558,   203,  -797,  -919,     3,   692,  -292,  1050,   782},
+  {  334,  1475,   632,   -80,    48, -1061,  -484,   362,  -597,  -852},
+  { -545,  -330,  -429,  -680,  1133, -1182,  -744,  1340,   262,    63},
+  { 1320,   827,  -398,  -576,   341,  -774,  -483, -1247,   -70,    98},
+  { -163,   674,   -11,  -886,   531, -1125,  -265,  -242,   724,   934}
+};
+
+/**
+ * gain codebook (first stage), 8k mode (3.9.2 of G.729)
+ */
+static const int16_t cb_gain_1st_8k[1<<GC_1ST_IDX_BITS_8K][2] = { /*(0.14) (2.13) */
+  { 3242 ,  9949 },
+  { 1551 ,  2425 },
+  { 2678 , 27162 },
+  { 1921 ,  9291 },
+  { 1831 ,  5022 },
+  {    1 ,  1516 },
+  {  356 , 14756 },
+  {   57 ,  5404 },
+};
+
+/**
+ * gain codebook (second stage), 8k mode (3.9.2 of G.729)
+ */
+static const int16_t cb_gain_2nd_8k[1<<GC_2ND_IDX_BITS_8K][2] = { /*(1.14) (1.13) */
+  {  5142 ,   592 },
+  { 17299 ,  1861 },
+  {  6160 ,  2395 },
+  { 16112 ,  3392 },
+  {   826 ,  2005 },
+  { 18973 ,  5935 },
+  {  1994 ,     0 },
+  { 15434 ,   237 },
+  { 10573 ,  2966 },
+  { 15132 ,  4914 },
+  { 11569 ,  1196 },
+  { 14194 ,  1630 },
+  {  8091 ,  4861 },
+  { 15161 , 14276 },
+  {  9120 ,   525 },
+  { 13260 ,  3256 },
+};
+
+/**
+ * gain codebook (first stage), 6.4k mode (D.3.9.2 of G.729)
+ */
+static const int16_t cb_gain_1st_6k4[1<<GC_1ST_IDX_BITS_6K4][2] =
+{ /*(0.14) (1.14)*/
+ { 5849,     0 },
+ { 3171,  9280 },
+ { 3617,  6747 },
+ { 4987, 22294 },
+ { 2929,  1078 },
+ { 6068,  6093 },
+ { 9425,  2731 },
+ { 3915, 12872 },
+};
+
+/**
+ * gain codebook (second stage), 6.4k mode (D.3.9.2 of G.729)
+ */
+static const int16_t cb_gain_2nd_6k4[1<<GC_2ND_IDX_BITS_6K4][2] =
+{ /*(1.14) (1.14)*/
+ {    0,  4175 },
+ {10828, 27602 },
+ {16423, 15724 },
+ { 4478,  7324 },
+ { 3988,     0 },
+ {10291, 11385 },
+ {11956, 10735 },
+ { 7876,  7821 },
+};
+
+/**
+ * 4th order Moving Average (MA) Predictor codebook (3.2.4 of G.729)
+ *
+ * float cb_ma_predictor_float[2][MA_NP][10] = {
+ *   {
+ *     {0.2570, 0.2780, 0.2800, 0.2736, 0.2757, 0.2764, 0.2675, 0.2678, 0.2779, 0.2647},
+ *     {0.2142, 0.2194, 0.2331, 0.2230, 0.2272, 0.2252, 0.2148, 0.2123, 0.2115, 0.2096},
+ *     {0.1670, 0.1523, 0.1567, 0.1580, 0.1601, 0.1569, 0.1589, 0.1555, 0.1474, 0.1571},
+ *     {0.1238, 0.0925, 0.0798, 0.0923, 0.0890, 0.0828, 0.1010, 0.0988, 0.0872, 0.1060},
+ *   },
+ *   {
+ *     {0.2360, 0.2405, 0.2499, 0.2495, 0.2517, 0.2591, 0.2636, 0.2625, 0.2551, 0.2310},
+ *     {0.1285, 0.0925, 0.0779, 0.1060, 0.1183, 0.1176, 0.1277, 0.1268, 0.1193, 0.1211},
+ *     {0.0981, 0.0589, 0.0401, 0.0654, 0.0761, 0.0728, 0.0841, 0.0826, 0.0776, 0.0891},
+ *     {0.0923, 0.0486, 0.0287, 0.0498, 0.0526, 0.0482, 0.0621, 0.0636, 0.0584, 0.0794},
+ *   },
+ * };
+ *                                    15
+ * cb_ma_predictor[j][k][i] = floor( 2 * cb_ma_predictor_float[j][k][i] )
+ *
+ * j=0..1, i=0..9, k=0..MA_NP-1
+ */
+static const int16_t cb_ma_predictor[2][MA_NP][10] = { /* (0.15) */
+  {
+    { 8421,  9109,  9175,  8965,  9034,  9057,  8765,  8775,  9106,  8673},
+    { 7018,  7189,  7638,  7307,  7444,  7379,  7038,  6956,  6930,  6868},
+    { 5472,  4990,  5134,  5177,  5246,  5141,  5206,  5095,  4830,  5147},
+    { 4056,  3031,  2614,  3024,  2916,  2713,  3309,  3237,  2857,  3473}
+  },
+  {
+    { 7733,  7880,  8188,  8175,  8247,  8490,  8637,  8601,  8359,  7569},
+    { 4210,  3031,  2552,  3473,  3876,  3853,  4184,  4154,  3909,  3968},
+    { 3214,  1930,  1313,  2143,  2493,  2385,  2755,  2706,  2542,  2919},
+    { 3024,  1592,   940,  1631,  1723,  1579,  2034,  2084,  1913,  2601}
+  }
+};
+
+/**
+ *                                     15         3
+ * cb_ma_predictor_sum[j][i] = floor( 2 * (1.0 - sum ( cb_ma_predictor_float[j][k][i] ) ) )
+ *                                               k=0
+ * j=0..1, i=0..9
+ */
+static const int16_t cb_ma_predictor_sum[2][10] = { /* (0.15) */
+  { 7798,  8447,  8205,  8293,  8126,  8477,  8447,  8703,  9043,  8604},
+  {14585, 18333, 19772, 17344, 16426, 16459, 15155, 15220, 16043, 15708}
+};
+
+/**
+ *                                                           12
+ *                                                          2
+ * cb_ma_predictor_sum_inv[j][i] = floor(---------------------------------------------)
+ *                                               3
+ *                                        1.0 - sum ( cb_ma_predictor_float[j][k][i] )
+ *                                              k=0
+ * j=0..1, i=0..9
+ */
+static const int16_t cb_ma_predictor_sum_inv[2][10] = { /* (3.12) */
+  {17210, 15888, 16357, 16183, 16516, 15833, 15888, 15421, 14840, 15597},
+  { 9202,  7320,  6788,  7738,  8170,  8154,  8856,  8818,  8366,  8544}
+};
+
+/**
+ * MA prediction coefficients (3.9.1 of G.729, near Equation 69)
+ */
+static const uint16_t ma_prediction_coeff[4] = { /* (0.13) */
+  5571, 4751, 2785, 1556
+};
+
+/**
+ * initial LSP coefficients belongs to virtual frame preceding  the
+ * first frame of the stream
+ */
+static const int16_t lsp_init[10]= { /* (0.15) */
+   30000, 26000, 21000, 15000, 8000, 0, -8000,-15000,-21000,-26000
+};
+
+/**
+ * additional "phase" post-processing filter impulse response (D.6.2 of G.729)
+ *
+ * Table contains three impulse responses, correspond to
+ * different amounts of spreading.
+ */
+static const int16_t phase_filter[3][40] =
+{
+  { // maximum spreading (for noise-like segments)
+    14690, 11518,  1268, -2762, -5672,  7514,  -36, -2808, -3041,  4823,
+     2952, -8425,  3785,  1455,  2179, -8638, 8051, -2104, -1455,   777,
+     1108, -2386,  2254,  -364,  -675, -2104, 6046, -5682,  1072,  3123,
+    -5059,  5312, -2330, -3729,  6924, -3890,  675, -1776,    29, 10145,
+  },
+  { // medium spreading
+    30274,  3831, -4037,  2972, -1049, -1003,  2477, -3044,  2815, -2232,
+     1753, -1612,  1714, -1776,  1543, -1009,   429,  -170,   472, -1265,
+     2176, -2707,  2523, -1622,   344,   826, -1530,  1724, -1658,  1701,
+    -2064,  2644, -3061,  2897, -1979,   557,   780, -1370,   842,   655,
+  },
+  { // no spreading (for voiced speech)
+    32767, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  }
+};
+#endif /* AVCODEC_G729DATA_H */
diff --git a/libavcodec/g729dec.c b/libavcodec/g729dec.c
new file mode 100644
index 0000000..6eb057f
--- /dev/null
+++ b/libavcodec/g729dec.c
@@ -0,0 +1,726 @@
+/*
+ * G.729, G729 Annex D decoders
+ * Copyright (c) 2008 Vladimir Voroshilov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "avcodec.h"
+#include "libavutil/avutil.h"
+#include "get_bits.h"
+#include "audiodsp.h"
+#include "internal.h"
+
+
+#include "g729.h"
+#include "lsp.h"
+#include "celp_math.h"
+#include "celp_filters.h"
+#include "acelp_filters.h"
+#include "acelp_pitch_delay.h"
+#include "acelp_vectors.h"
+#include "g729data.h"
+#include "g729postfilter.h"
+
+/**
+ * minimum quantized LSF value (3.2.4)
+ * 0.005 in Q13
+ */
+#define LSFQ_MIN                   40
+
+/**
+ * maximum quantized LSF value (3.2.4)
+ * 3.135 in Q13
+ */
+#define LSFQ_MAX                   25681
+
+/**
+ * minimum LSF distance (3.2.4)
+ * 0.0391 in Q13
+ */
+#define LSFQ_DIFF_MIN              321
+
+/// interpolation filter length
+#define INTERPOL_LEN              11
+
+/**
+ * minimum gain pitch value (3.8, Equation 47)
+ * 0.2 in (1.14)
+ */
+#define SHARP_MIN                  3277
+
+/**
+ * maximum gain pitch value (3.8, Equation 47)
+ * (EE) This does not comply with the specification.
+ * Specification says about 0.8, which should be
+ * 13107 in (1.14), but reference C code uses
+ * 13017 (equals to 0.7945) instead of it.
+ */
+#define SHARP_MAX                  13017
+
+/**
+ * MR_ENERGY (mean removed energy) = mean_energy + 10 * log10(2^26  * subframe_size) in (7.13)
+ */
+#define MR_ENERGY 1018156
+
+#define DECISION_NOISE        0
+#define DECISION_INTERMEDIATE 1
+#define DECISION_VOICE        2
+
+typedef enum {
+    FORMAT_G729_8K = 0,
+    FORMAT_G729D_6K4,
+    FORMAT_COUNT,
+} G729Formats;
+
+typedef struct {
+    uint8_t ac_index_bits[2];   ///< adaptive codebook index for second subframe (size in bits)
+    uint8_t parity_bit;         ///< parity bit for pitch delay
+    uint8_t gc_1st_index_bits;  ///< gain codebook (first stage) index (size in bits)
+    uint8_t gc_2nd_index_bits;  ///< gain codebook (second stage) index (size in bits)
+    uint8_t fc_signs_bits;      ///< number of pulses in fixed-codebook vector
+    uint8_t fc_indexes_bits;    ///< size (in bits) of fixed-codebook index entry
+} G729FormatDescription;
+
+typedef struct {
+    AudioDSPContext adsp;
+
+    /// past excitation signal buffer
+    int16_t exc_base[2*SUBFRAME_SIZE+PITCH_DELAY_MAX+INTERPOL_LEN];
+
+    int16_t* exc;               ///< start of past excitation data in buffer
+    int pitch_delay_int_prev;   ///< integer part of previous subframe's pitch delay (4.1.3)
+
+    /// (2.13) LSP quantizer outputs
+    int16_t  past_quantizer_output_buf[MA_NP + 1][10];
+    int16_t* past_quantizer_outputs[MA_NP + 1];
+
+    int16_t lsfq[10];           ///< (2.13) quantized LSF coefficients from previous frame
+    int16_t lsp_buf[2][10];     ///< (0.15) LSP coefficients (previous and current frames) (3.2.5)
+    int16_t *lsp[2];            ///< pointers to lsp_buf
+
+    int16_t quant_energy[4];    ///< (5.10) past quantized energy
+
+    /// previous speech data for LP synthesis filter
+    int16_t syn_filter_data[10];
+
+
+    /// residual signal buffer (used in long-term postfilter)
+    int16_t residual[SUBFRAME_SIZE + RES_PREV_DATA_SIZE];
+
+    /// previous speech data for residual calculation filter
+    int16_t res_filter_data[SUBFRAME_SIZE+10];
+
+    /// previous speech data for short-term postfilter
+    int16_t pos_filter_data[SUBFRAME_SIZE+10];
+
+    /// (1.14) pitch gain of current and five previous subframes
+    int16_t past_gain_pitch[6];
+
+    /// (14.1) gain code from current and previous subframe
+    int16_t past_gain_code[2];
+
+    /// voice decision on previous subframe (0-noise, 1-intermediate, 2-voice), G.729D
+    int16_t voice_decision;
+
+    int16_t onset;              ///< detected onset level (0-2)
+    int16_t was_periodic;       ///< whether previous frame was declared as periodic or not (4.4)
+    int16_t ht_prev_data;       ///< previous data for 4.2.3, equation 86
+    int gain_coeff;             ///< (1.14) gain coefficient (4.2.4)
+    uint16_t rand_value;        ///< random number generator value (4.4.4)
+    int ma_predictor_prev;      ///< switched MA predictor of LSP quantizer from last good frame
+
+    /// (14.14) high-pass filter data (past input)
+    int hpf_f[2];
+
+    /// high-pass filter data (past output)
+    int16_t hpf_z[2];
+}  G729Context;
+
+static const G729FormatDescription format_g729_8k = {
+    .ac_index_bits     = {8,5},
+    .parity_bit        = 1,
+    .gc_1st_index_bits = GC_1ST_IDX_BITS_8K,
+    .gc_2nd_index_bits = GC_2ND_IDX_BITS_8K,
+    .fc_signs_bits     = 4,
+    .fc_indexes_bits   = 13,
+};
+
+static const G729FormatDescription format_g729d_6k4 = {
+    .ac_index_bits     = {8,4},
+    .parity_bit        = 0,
+    .gc_1st_index_bits = GC_1ST_IDX_BITS_6K4,
+    .gc_2nd_index_bits = GC_2ND_IDX_BITS_6K4,
+    .fc_signs_bits     = 2,
+    .fc_indexes_bits   = 9,
+};
+
+/**
+ * @brief pseudo random number generator
+ */
+static inline uint16_t g729_prng(uint16_t value)
+{
+    return 31821 * value + 13849;
+}
+
+/**
+ * Get parity bit of bit 2..7
+ */
+static inline int get_parity(uint8_t value)
+{
+   return (0x6996966996696996ULL >> (value >> 2)) & 1;
+}
+
+/**
+ * Decodes LSF (Line Spectral Frequencies) from L0-L3 (3.2.4).
+ * @param[out] lsfq (2.13) quantized LSF coefficients
+ * @param[in,out] past_quantizer_outputs (2.13) quantizer outputs from previous frames
+ * @param ma_predictor switched MA predictor of LSP quantizer
+ * @param vq_1st first stage vector of quantizer
+ * @param vq_2nd_low second stage lower vector of LSP quantizer
+ * @param vq_2nd_high second stage higher vector of LSP quantizer
+ */
+static void lsf_decode(int16_t* lsfq, int16_t* past_quantizer_outputs[MA_NP + 1],
+                       int16_t ma_predictor,
+                       int16_t vq_1st, int16_t vq_2nd_low, int16_t vq_2nd_high)
+{
+    int i,j;
+    static const uint8_t min_distance[2]={10, 5}; //(2.13)
+    int16_t* quantizer_output = past_quantizer_outputs[MA_NP];
+
+    for (i = 0; i < 5; i++) {
+        quantizer_output[i]     = cb_lsp_1st[vq_1st][i    ] + cb_lsp_2nd[vq_2nd_low ][i    ];
+        quantizer_output[i + 5] = cb_lsp_1st[vq_1st][i + 5] + cb_lsp_2nd[vq_2nd_high][i + 5];
+    }
+
+    for (j = 0; j < 2; j++) {
+        for (i = 1; i < 10; i++) {
+            int diff = (quantizer_output[i - 1] - quantizer_output[i] + min_distance[j]) >> 1;
+            if (diff > 0) {
+                quantizer_output[i - 1] -= diff;
+                quantizer_output[i    ] += diff;
+            }
+        }
+    }
+
+    for (i = 0; i < 10; i++) {
+        int sum = quantizer_output[i] * cb_ma_predictor_sum[ma_predictor][i];
+        for (j = 0; j < MA_NP; j++)
+            sum += past_quantizer_outputs[j][i] * cb_ma_predictor[ma_predictor][j][i];
+
+        lsfq[i] = sum >> 15;
+    }
+
+    ff_acelp_reorder_lsf(lsfq, LSFQ_DIFF_MIN, LSFQ_MIN, LSFQ_MAX, 10);
+}
+
+/**
+ * Restores past LSP quantizer output using LSF from previous frame
+ * @param[in,out] lsfq (2.13) quantized LSF coefficients
+ * @param[in,out] past_quantizer_outputs (2.13) quantizer outputs from previous frames
+ * @param ma_predictor_prev MA predictor from previous frame
+ * @param lsfq_prev (2.13) quantized LSF coefficients from previous frame
+ */
+static void lsf_restore_from_previous(int16_t* lsfq,
+                                      int16_t* past_quantizer_outputs[MA_NP + 1],
+                                      int ma_predictor_prev)
+{
+    int16_t* quantizer_output = past_quantizer_outputs[MA_NP];
+    int i,k;
+
+    for (i = 0; i < 10; i++) {
+        int tmp = lsfq[i] << 15;
+
+        for (k = 0; k < MA_NP; k++)
+            tmp -= past_quantizer_outputs[k][i] * cb_ma_predictor[ma_predictor_prev][k][i];
+
+        quantizer_output[i] = ((tmp >> 15) * cb_ma_predictor_sum_inv[ma_predictor_prev][i]) >> 12;
+    }
+}
+
+/**
+ * Constructs new excitation signal and applies phase filter to it
+ * @param[out] out constructed speech signal
+ * @param in original excitation signal
+ * @param fc_cur (2.13) original fixed-codebook vector
+ * @param gain_code (14.1) gain code
+ * @param subframe_size length of the subframe
+ */
+static void g729d_get_new_exc(
+        int16_t* out,
+        const int16_t* in,
+        const int16_t* fc_cur,
+        int dstate,
+        int gain_code,
+        int subframe_size)
+{
+    int i;
+    int16_t fc_new[SUBFRAME_SIZE];
+
+    ff_celp_convolve_circ(fc_new, fc_cur, phase_filter[dstate], subframe_size);
+
+    for(i=0; i<subframe_size; i++)
+    {
+        out[i]  = in[i];
+        out[i] -= (gain_code * fc_cur[i] + 0x2000) >> 14;
+        out[i] += (gain_code * fc_new[i] + 0x2000) >> 14;
+    }
+}
+
+/**
+ * Makes decision about onset in current subframe
+ * @param past_onset decision result of previous subframe
+ * @param past_gain_code gain code of current and previous subframe
+ *
+ * @return onset decision result for current subframe
+ */
+static int g729d_onset_decision(int past_onset, const int16_t* past_gain_code)
+{
+    if((past_gain_code[0] >> 1) > past_gain_code[1])
+        return 2;
+    else
+        return FFMAX(past_onset-1, 0);
+}
+
+/**
+ * Makes decision about voice presence in current subframe
+ * @param onset onset level
+ * @param prev_voice_decision voice decision result from previous subframe
+ * @param past_gain_pitch pitch gain of current and previous subframes
+ *
+ * @return voice decision result for current subframe
+ */
+static int16_t g729d_voice_decision(int onset, int prev_voice_decision, const int16_t* past_gain_pitch)
+{
+    int i, low_gain_pitch_cnt, voice_decision;
+
+    if(past_gain_pitch[0] >= 14745)      // 0.9
+        voice_decision = DECISION_VOICE;
+    else if (past_gain_pitch[0] <= 9830) // 0.6
+        voice_decision = DECISION_NOISE;
+    else
+        voice_decision = DECISION_INTERMEDIATE;
+
+    for(i=0, low_gain_pitch_cnt=0; i<6; i++)
+        if(past_gain_pitch[i] < 9830)
+            low_gain_pitch_cnt++;
+
+    if(low_gain_pitch_cnt > 2 && !onset)
+        voice_decision = DECISION_NOISE;
+
+    if(!onset && voice_decision > prev_voice_decision + 1)
+        voice_decision--;
+
+    if(onset && voice_decision < DECISION_VOICE)
+        voice_decision++;
+
+    return voice_decision;
+}
+
+static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
+{
+    int res = 0;
+
+    while (order--)
+        res += *v1++ * *v2++;
+
+    return res;
+}
+
+static av_cold int decoder_init(AVCodecContext * avctx)
+{
+    G729Context* ctx = avctx->priv_data;
+    int i,k;
+
+    if (avctx->channels != 1) {
+        av_log(avctx, AV_LOG_ERROR, "Only mono sound is supported (requested channels: %d).\n", avctx->channels);
+        return AVERROR(EINVAL);
+    }
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+
+    /* Both 8kbit/s and 6.4kbit/s modes uses two subframes per frame. */
+    avctx->frame_size = SUBFRAME_SIZE << 1;
+
+    ctx->gain_coeff = 16384; // 1.0 in (1.14)
+
+    for (k = 0; k < MA_NP + 1; k++) {
+        ctx->past_quantizer_outputs[k] = ctx->past_quantizer_output_buf[k];
+        for (i = 1; i < 11; i++)
+            ctx->past_quantizer_outputs[k][i - 1] = (18717 * i) >> 3;
+    }
+
+    ctx->lsp[0] = ctx->lsp_buf[0];
+    ctx->lsp[1] = ctx->lsp_buf[1];
+    memcpy(ctx->lsp[0], lsp_init, 10 * sizeof(int16_t));
+
+    ctx->exc = &ctx->exc_base[PITCH_DELAY_MAX+INTERPOL_LEN];
+
+    ctx->pitch_delay_int_prev = PITCH_DELAY_MIN;
+
+    /* random seed initialization */
+    ctx->rand_value = 21845;
+
+    /* quantized prediction error */
+    for(i=0; i<4; i++)
+        ctx->quant_energy[i] = -14336; // -14 in (5.10)
+
+    ff_audiodsp_init(&ctx->adsp);
+    ctx->adsp.scalarproduct_int16 = scalarproduct_int16_c;
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
+                        AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    int16_t *out_frame;
+    GetBitContext gb;
+    const G729FormatDescription *format;
+    int frame_erasure = 0;    ///< frame erasure detected during decoding
+    int bad_pitch = 0;        ///< parity check failed
+    int i;
+    int16_t *tmp;
+    G729Formats packet_type;
+    G729Context *ctx = avctx->priv_data;
+    int16_t lp[2][11];           // (3.12)
+    uint8_t ma_predictor;     ///< switched MA predictor of LSP quantizer
+    uint8_t quantizer_1st;    ///< first stage vector of quantizer
+    uint8_t quantizer_2nd_lo; ///< second stage lower vector of quantizer (size in bits)
+    uint8_t quantizer_2nd_hi; ///< second stage higher vector of quantizer (size in bits)
+
+    int pitch_delay_int[2];      // pitch delay, integer part
+    int pitch_delay_3x;          // pitch delay, multiplied by 3
+    int16_t fc[SUBFRAME_SIZE];   // fixed-codebook vector
+    int16_t synth[SUBFRAME_SIZE+10]; // fixed-codebook vector
+    int j, ret;
+    int gain_before, gain_after;
+    int is_periodic = 0;         // whether one of the subframes is declared as periodic or not
+    AVFrame *frame = data;
+
+    frame->nb_samples = SUBFRAME_SIZE<<1;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+    out_frame = (int16_t*) frame->data[0];
+
+    if (buf_size == 10) {
+        packet_type = FORMAT_G729_8K;
+        format = &format_g729_8k;
+        //Reset voice decision
+        ctx->onset = 0;
+        ctx->voice_decision = DECISION_VOICE;
+        av_log(avctx, AV_LOG_DEBUG, "Packet type: %s\n", "G.729 @ 8kbit/s");
+    } else if (buf_size == 8) {
+        packet_type = FORMAT_G729D_6K4;
+        format = &format_g729d_6k4;
+        av_log(avctx, AV_LOG_DEBUG, "Packet type: %s\n", "G.729D @ 6.4kbit/s");
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Packet size %d is unknown.\n", buf_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    for (i=0; i < buf_size; i++)
+        frame_erasure |= buf[i];
+    frame_erasure = !frame_erasure;
+
+    init_get_bits(&gb, buf, 8*buf_size);
+
+    ma_predictor     = get_bits(&gb, 1);
+    quantizer_1st    = get_bits(&gb, VQ_1ST_BITS);
+    quantizer_2nd_lo = get_bits(&gb, VQ_2ND_BITS);
+    quantizer_2nd_hi = get_bits(&gb, VQ_2ND_BITS);
+
+    if(frame_erasure)
+        lsf_restore_from_previous(ctx->lsfq, ctx->past_quantizer_outputs,
+                                  ctx->ma_predictor_prev);
+    else {
+        lsf_decode(ctx->lsfq, ctx->past_quantizer_outputs,
+                   ma_predictor,
+                   quantizer_1st, quantizer_2nd_lo, quantizer_2nd_hi);
+        ctx->ma_predictor_prev = ma_predictor;
+    }
+
+    tmp = ctx->past_quantizer_outputs[MA_NP];
+    memmove(ctx->past_quantizer_outputs + 1, ctx->past_quantizer_outputs,
+            MA_NP * sizeof(int16_t*));
+    ctx->past_quantizer_outputs[0] = tmp;
+
+    ff_acelp_lsf2lsp(ctx->lsp[1], ctx->lsfq, 10);
+
+    ff_acelp_lp_decode(&lp[0][0], &lp[1][0], ctx->lsp[1], ctx->lsp[0], 10);
+
+    FFSWAP(int16_t*, ctx->lsp[1], ctx->lsp[0]);
+
+    for (i = 0; i < 2; i++) {
+        int gain_corr_factor;
+
+        uint8_t ac_index;      ///< adaptive codebook index
+        uint8_t pulses_signs;  ///< fixed-codebook vector pulse signs
+        int fc_indexes;        ///< fixed-codebook indexes
+        uint8_t gc_1st_index;  ///< gain codebook (first stage) index
+        uint8_t gc_2nd_index;  ///< gain codebook (second stage) index
+
+        ac_index      = get_bits(&gb, format->ac_index_bits[i]);
+        if(!i && format->parity_bit)
+            bad_pitch = get_parity(ac_index) == get_bits1(&gb);
+        fc_indexes    = get_bits(&gb, format->fc_indexes_bits);
+        pulses_signs  = get_bits(&gb, format->fc_signs_bits);
+        gc_1st_index  = get_bits(&gb, format->gc_1st_index_bits);
+        gc_2nd_index  = get_bits(&gb, format->gc_2nd_index_bits);
+
+        if (frame_erasure)
+            pitch_delay_3x   = 3 * ctx->pitch_delay_int_prev;
+        else if(!i) {
+            if (bad_pitch)
+                pitch_delay_3x   = 3 * ctx->pitch_delay_int_prev;
+            else
+                pitch_delay_3x = ff_acelp_decode_8bit_to_1st_delay3(ac_index);
+        } else {
+            int pitch_delay_min = av_clip(ctx->pitch_delay_int_prev - 5,
+                                          PITCH_DELAY_MIN, PITCH_DELAY_MAX - 9);
+
+            if(packet_type == FORMAT_G729D_6K4)
+                pitch_delay_3x = ff_acelp_decode_4bit_to_2nd_delay3(ac_index, pitch_delay_min);
+            else
+                pitch_delay_3x = ff_acelp_decode_5_6_bit_to_2nd_delay3(ac_index, pitch_delay_min);
+        }
+
+        /* Round pitch delay to nearest (used everywhere except ff_acelp_interpolate). */
+        pitch_delay_int[i]  = (pitch_delay_3x + 1) / 3;
+        if (pitch_delay_int[i] > PITCH_DELAY_MAX) {
+            av_log(avctx, AV_LOG_WARNING, "pitch_delay_int %d is too large\n", pitch_delay_int[i]);
+            pitch_delay_int[i] = PITCH_DELAY_MAX;
+        }
+
+        if (frame_erasure) {
+            ctx->rand_value = g729_prng(ctx->rand_value);
+            fc_indexes   = ctx->rand_value & ((1 << format->fc_indexes_bits) - 1);
+
+            ctx->rand_value = g729_prng(ctx->rand_value);
+            pulses_signs = ctx->rand_value;
+        }
+
+
+        memset(fc, 0, sizeof(int16_t) * SUBFRAME_SIZE);
+        switch (packet_type) {
+            case FORMAT_G729_8K:
+                ff_acelp_fc_pulse_per_track(fc, ff_fc_4pulses_8bits_tracks_13,
+                                            ff_fc_4pulses_8bits_track_4,
+                                            fc_indexes, pulses_signs, 3, 3);
+                break;
+            case FORMAT_G729D_6K4:
+                ff_acelp_fc_pulse_per_track(fc, ff_fc_2pulses_9bits_track1_gray,
+                                            ff_fc_2pulses_9bits_track2_gray,
+                                            fc_indexes, pulses_signs, 1, 4);
+                break;
+        }
+
+        /*
+          This filter enhances harmonic components of the fixed-codebook vector to
+          improve the quality of the reconstructed speech.
+
+                     / fc_v[i],                                    i < pitch_delay
+          fc_v[i] = <
+                     \ fc_v[i] + gain_pitch * fc_v[i-pitch_delay], i >= pitch_delay
+        */
+        ff_acelp_weighted_vector_sum(fc + pitch_delay_int[i],
+                                     fc + pitch_delay_int[i],
+                                     fc, 1 << 14,
+                                     av_clip(ctx->past_gain_pitch[0], SHARP_MIN, SHARP_MAX),
+                                     0, 14,
+                                     SUBFRAME_SIZE - pitch_delay_int[i]);
+
+        memmove(ctx->past_gain_pitch+1, ctx->past_gain_pitch, 5 * sizeof(int16_t));
+        ctx->past_gain_code[1] = ctx->past_gain_code[0];
+
+        if (frame_erasure) {
+            ctx->past_gain_pitch[0] = (29491 * ctx->past_gain_pitch[0]) >> 15; // 0.90 (0.15)
+            ctx->past_gain_code[0]  = ( 2007 * ctx->past_gain_code[0] ) >> 11; // 0.98 (0.11)
+
+            gain_corr_factor = 0;
+        } else {
+            if (packet_type == FORMAT_G729D_6K4) {
+                ctx->past_gain_pitch[0]  = cb_gain_1st_6k4[gc_1st_index][0] +
+                                           cb_gain_2nd_6k4[gc_2nd_index][0];
+                gain_corr_factor = cb_gain_1st_6k4[gc_1st_index][1] +
+                                   cb_gain_2nd_6k4[gc_2nd_index][1];
+
+                /* Without check below overflow can occur in ff_acelp_update_past_gain.
+                   It is not issue for G.729, because gain_corr_factor in it's case is always
+                   greater than 1024, while in G.729D it can be even zero. */
+                gain_corr_factor = FFMAX(gain_corr_factor, 1024);
+#ifndef G729_BITEXACT
+                gain_corr_factor >>= 1;
+#endif
+            } else {
+                ctx->past_gain_pitch[0]  = cb_gain_1st_8k[gc_1st_index][0] +
+                                           cb_gain_2nd_8k[gc_2nd_index][0];
+                gain_corr_factor = cb_gain_1st_8k[gc_1st_index][1] +
+                                   cb_gain_2nd_8k[gc_2nd_index][1];
+            }
+
+            /* Decode the fixed-codebook gain. */
+            ctx->past_gain_code[0] = ff_acelp_decode_gain_code(&ctx->adsp, gain_corr_factor,
+                                                               fc, MR_ENERGY,
+                                                               ctx->quant_energy,
+                                                               ma_prediction_coeff,
+                                                               SUBFRAME_SIZE, 4);
+#ifdef G729_BITEXACT
+            /*
+              This correction required to get bit-exact result with
+              reference code, because gain_corr_factor in G.729D is
+              two times larger than in original G.729.
+
+              If bit-exact result is not issue then gain_corr_factor
+              can be simpler divided by 2 before call to g729_get_gain_code
+              instead of using correction below.
+            */
+            if (packet_type == FORMAT_G729D_6K4) {
+                gain_corr_factor >>= 1;
+                ctx->past_gain_code[0] >>= 1;
+            }
+#endif
+        }
+        ff_acelp_update_past_gain(ctx->quant_energy, gain_corr_factor, 2, frame_erasure);
+
+        /* Routine requires rounding to lowest. */
+        ff_acelp_interpolate(ctx->exc + i * SUBFRAME_SIZE,
+                             ctx->exc + i * SUBFRAME_SIZE - pitch_delay_3x / 3,
+                             ff_acelp_interp_filter, 6,
+                             (pitch_delay_3x % 3) << 1,
+                             10, SUBFRAME_SIZE);
+
+        ff_acelp_weighted_vector_sum(ctx->exc + i * SUBFRAME_SIZE,
+                                     ctx->exc + i * SUBFRAME_SIZE, fc,
+                                     (!ctx->was_periodic && frame_erasure) ? 0 : ctx->past_gain_pitch[0],
+                                     ( ctx->was_periodic && frame_erasure) ? 0 : ctx->past_gain_code[0],
+                                     1 << 13, 14, SUBFRAME_SIZE);
+
+        memcpy(synth, ctx->syn_filter_data, 10 * sizeof(int16_t));
+
+        if (ff_celp_lp_synthesis_filter(
+            synth+10,
+            &lp[i][1],
+            ctx->exc  + i * SUBFRAME_SIZE,
+            SUBFRAME_SIZE,
+            10,
+            1,
+            0,
+            0x800))
+            /* Overflow occurred, downscale excitation signal... */
+            for (j = 0; j < 2 * SUBFRAME_SIZE + PITCH_DELAY_MAX + INTERPOL_LEN; j++)
+                ctx->exc_base[j] >>= 2;
+
+        /* ... and make synthesis again. */
+        if (packet_type == FORMAT_G729D_6K4) {
+            int16_t exc_new[SUBFRAME_SIZE];
+
+            ctx->onset = g729d_onset_decision(ctx->onset, ctx->past_gain_code);
+            ctx->voice_decision = g729d_voice_decision(ctx->onset, ctx->voice_decision, ctx->past_gain_pitch);
+
+            g729d_get_new_exc(exc_new, ctx->exc  + i * SUBFRAME_SIZE, fc, ctx->voice_decision, ctx->past_gain_code[0], SUBFRAME_SIZE);
+
+            ff_celp_lp_synthesis_filter(
+                    synth+10,
+                    &lp[i][1],
+                    exc_new,
+                    SUBFRAME_SIZE,
+                    10,
+                    0,
+                    0,
+                    0x800);
+        } else {
+            ff_celp_lp_synthesis_filter(
+                    synth+10,
+                    &lp[i][1],
+                    ctx->exc  + i * SUBFRAME_SIZE,
+                    SUBFRAME_SIZE,
+                    10,
+                    0,
+                    0,
+                    0x800);
+        }
+        /* Save data (without postfilter) for use in next subframe. */
+        memcpy(ctx->syn_filter_data, synth+SUBFRAME_SIZE, 10 * sizeof(int16_t));
+
+        /* Calculate gain of unfiltered signal for use in AGC. */
+        gain_before = 0;
+        for (j = 0; j < SUBFRAME_SIZE; j++)
+            gain_before += FFABS(synth[j+10]);
+
+        /* Call postfilter and also update voicing decision for use in next frame. */
+        ff_g729_postfilter(
+                &ctx->adsp,
+                &ctx->ht_prev_data,
+                &is_periodic,
+                &lp[i][0],
+                pitch_delay_int[0],
+                ctx->residual,
+                ctx->res_filter_data,
+                ctx->pos_filter_data,
+                synth+10,
+                SUBFRAME_SIZE);
+
+        /* Calculate gain of filtered signal for use in AGC. */
+        gain_after = 0;
+        for(j=0; j<SUBFRAME_SIZE; j++)
+            gain_after += FFABS(synth[j+10]);
+
+        ctx->gain_coeff = ff_g729_adaptive_gain_control(
+                gain_before,
+                gain_after,
+                synth+10,
+                SUBFRAME_SIZE,
+                ctx->gain_coeff);
+
+        if (frame_erasure)
+            ctx->pitch_delay_int_prev = FFMIN(ctx->pitch_delay_int_prev + 1, PITCH_DELAY_MAX);
+        else
+            ctx->pitch_delay_int_prev = pitch_delay_int[i];
+
+        memcpy(synth+8, ctx->hpf_z, 2*sizeof(int16_t));
+        ff_acelp_high_pass_filter(
+                out_frame + i*SUBFRAME_SIZE,
+                ctx->hpf_f,
+                synth+10,
+                SUBFRAME_SIZE);
+        memcpy(ctx->hpf_z, synth+8+SUBFRAME_SIZE, 2*sizeof(int16_t));
+    }
+
+    ctx->was_periodic = is_periodic;
+
+    /* Save signal for use in next frame. */
+    memmove(ctx->exc_base, ctx->exc_base + 2 * SUBFRAME_SIZE, (PITCH_DELAY_MAX+INTERPOL_LEN)*sizeof(int16_t));
+
+    *got_frame_ptr = 1;
+    return buf_size;
+}
+
+AVCodec ff_g729_decoder = {
+    .name           = "g729",
+    .long_name      = NULL_IF_CONFIG_SMALL("G.729"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_G729,
+    .priv_data_size = sizeof(G729Context),
+    .init           = decoder_init,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/g729postfilter.c b/libavcodec/g729postfilter.c
new file mode 100644
index 0000000..9a775c4
--- /dev/null
+++ b/libavcodec/g729postfilter.c
@@ -0,0 +1,610 @@
+/*
+ * G.729, G729 Annex D postfilter
+ * Copyright (c) 2008 Vladimir Voroshilov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <inttypes.h>
+#include <limits.h>
+
+#include "avcodec.h"
+#include "g729.h"
+#include "acelp_pitch_delay.h"
+#include "g729postfilter.h"
+#include "celp_math.h"
+#include "acelp_filters.h"
+#include "acelp_vectors.h"
+#include "celp_filters.h"
+
+#define FRAC_BITS 15
+#include "mathops.h"
+
+/**
+ * short interpolation filter (of length 33, according to spec)
+ * for computing signal with non-integer delay
+ */
+static const int16_t ff_g729_interp_filt_short[(ANALYZED_FRAC_DELAYS+1)*SHORT_INT_FILT_LEN] = {
+      0, 31650, 28469, 23705, 18050, 12266,  7041,  2873,
+      0, -1597, -2147, -1992, -1492,  -933,  -484,  -188,
+};
+
+/**
+ * long interpolation filter (of length 129, according to spec)
+ * for computing signal with non-integer delay
+ */
+static const int16_t ff_g729_interp_filt_long[(ANALYZED_FRAC_DELAYS+1)*LONG_INT_FILT_LEN] = {
+   0, 31915, 29436, 25569, 20676, 15206,  9639,  4439,
+   0, -3390, -5579, -6549, -6414, -5392, -3773, -1874,
+   0,  1595,  2727,  3303,  3319,  2850,  2030,  1023,
+   0,  -887, -1527, -1860, -1876, -1614, -1150,  -579,
+   0,   501,   859,  1041,  1044,   892,   631,   315,
+   0,  -266,  -453,  -543,  -538,  -455,  -317,  -156,
+   0,   130,   218,   258,   253,   212,   147,    72,
+   0,   -59,  -101,  -122,  -123,  -106,   -77,   -40,
+};
+
+/**
+ * formant_pp_factor_num_pow[i] = FORMANT_PP_FACTOR_NUM^(i+1)
+ */
+static const int16_t formant_pp_factor_num_pow[10]= {
+  /* (0.15) */
+  18022, 9912, 5451, 2998, 1649, 907, 499, 274, 151, 83
+};
+
+/**
+ * formant_pp_factor_den_pow[i] = FORMANT_PP_FACTOR_DEN^(i+1)
+ */
+static const int16_t formant_pp_factor_den_pow[10] = {
+  /* (0.15) */
+  22938, 16057, 11240, 7868, 5508, 3856, 2699, 1889, 1322, 925
+};
+
+/**
+ * \brief Residual signal calculation (4.2.1 if G.729)
+ * \param out [out] output data filtered through A(z/FORMANT_PP_FACTOR_NUM)
+ * \param filter_coeffs (3.12) A(z/FORMANT_PP_FACTOR_NUM) filter coefficients
+ * \param in input speech data to process
+ * \param subframe_size size of one subframe
+ *
+ * \note in buffer must contain 10 items of previous speech data before top of the buffer
+ * \remark It is safe to pass the same buffer for input and output.
+ */
+static void residual_filter(int16_t* out, const int16_t* filter_coeffs, const int16_t* in,
+                            int subframe_size)
+{
+    int i, n;
+
+    for (n = subframe_size - 1; n >= 0; n--) {
+        int sum = 0x800;
+        for (i = 0; i < 10; i++)
+            sum += filter_coeffs[i] * in[n - i - 1];
+
+        out[n] = in[n] + (sum >> 12);
+    }
+}
+
+/**
+ * \brief long-term postfilter (4.2.1)
+ * \param dsp initialized DSP context
+ * \param pitch_delay_int integer part of the pitch delay in the first subframe
+ * \param residual filtering input data
+ * \param residual_filt [out] speech signal with applied A(z/FORMANT_PP_FACTOR_NUM) filter
+ * \param subframe_size size of subframe
+ *
+ * \return 0 if long-term prediction gain is less than 3dB, 1 -  otherwise
+ */
+static int16_t long_term_filter(AudioDSPContext *adsp, int pitch_delay_int,
+                                const int16_t* residual, int16_t *residual_filt,
+                                int subframe_size)
+{
+    int i, k, tmp, tmp2;
+    int sum;
+    int L_temp0;
+    int L_temp1;
+    int64_t L64_temp0;
+    int64_t L64_temp1;
+    int16_t shift;
+    int corr_int_num, corr_int_den;
+
+    int ener;
+    int16_t sh_ener;
+
+    int16_t gain_num,gain_den; //selected signal's gain numerator and denominator
+    int16_t sh_gain_num, sh_gain_den;
+    int gain_num_square;
+
+    int16_t gain_long_num,gain_long_den; //filtered through long interpolation filter signal's gain numerator and denominator
+    int16_t sh_gain_long_num, sh_gain_long_den;
+
+    int16_t best_delay_int, best_delay_frac;
+
+    int16_t delayed_signal_offset;
+    int lt_filt_factor_a, lt_filt_factor_b;
+
+    int16_t * selected_signal;
+    const int16_t * selected_signal_const; //Necessary to avoid compiler warning
+
+    int16_t sig_scaled[SUBFRAME_SIZE + RES_PREV_DATA_SIZE];
+    int16_t delayed_signal[ANALYZED_FRAC_DELAYS][SUBFRAME_SIZE+1];
+    int corr_den[ANALYZED_FRAC_DELAYS][2];
+
+    tmp = 0;
+    for(i=0; i<subframe_size + RES_PREV_DATA_SIZE; i++)
+        tmp |= FFABS(residual[i]);
+
+    if(!tmp)
+        shift = 3;
+    else
+        shift = av_log2(tmp) - 11;
+
+    if (shift > 0)
+        for (i = 0; i < subframe_size + RES_PREV_DATA_SIZE; i++)
+            sig_scaled[i] = residual[i] >> shift;
+    else
+        for (i = 0; i < subframe_size + RES_PREV_DATA_SIZE; i++)
+            sig_scaled[i] = residual[i] << -shift;
+
+    /* Start of best delay searching code */
+    gain_num = 0;
+
+    ener = adsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE,
+                                    sig_scaled + RES_PREV_DATA_SIZE,
+                                    subframe_size);
+    if (ener) {
+        sh_ener = FFMAX(av_log2(ener) - 14, 0);
+        ener >>= sh_ener;
+        /* Search for best pitch delay.
+
+                       sum{ r(n) * r(k,n) ] }^2
+           R'(k)^2 := -------------------------
+                       sum{ r(k,n) * r(k,n) }
+
+
+           R(T)    :=  sum{ r(n) * r(n-T) ] }
+
+
+           where
+           r(n-T) is integer delayed signal with delay T
+           r(k,n) is non-integer delayed signal with integer delay best_delay
+           and fractional delay k */
+
+        /* Find integer delay best_delay which maximizes correlation R(T).
+
+           This is also equals to numerator of R'(0),
+           since the fine search (second step) is done with 1/8
+           precision around best_delay. */
+        corr_int_num = 0;
+        best_delay_int = pitch_delay_int - 1;
+        for (i = pitch_delay_int - 1; i <= pitch_delay_int + 1; i++) {
+            sum = adsp->scalarproduct_int16(sig_scaled + RES_PREV_DATA_SIZE,
+                                           sig_scaled + RES_PREV_DATA_SIZE - i,
+                                           subframe_size);
+            if (sum > corr_int_num) {
+                corr_int_num = sum;
+                best_delay_int = i;
+            }
+        }
+        if (corr_int_num) {
+            /* Compute denominator of pseudo-normalized correlation R'(0). */
+            corr_int_den = adsp->scalarproduct_int16(sig_scaled - best_delay_int + RES_PREV_DATA_SIZE,
+                                                    sig_scaled - best_delay_int + RES_PREV_DATA_SIZE,
+                                                    subframe_size);
+
+            /* Compute signals with non-integer delay k (with 1/8 precision),
+               where k is in [0;6] range.
+               Entire delay is qual to best_delay+(k+1)/8
+               This is archieved by applying an interpolation filter of
+               legth 33 to source signal. */
+            for (k = 0; k < ANALYZED_FRAC_DELAYS; k++) {
+                ff_acelp_interpolate(&delayed_signal[k][0],
+                                     &sig_scaled[RES_PREV_DATA_SIZE - best_delay_int],
+                                     ff_g729_interp_filt_short,
+                                     ANALYZED_FRAC_DELAYS+1,
+                                     8 - k - 1,
+                                     SHORT_INT_FILT_LEN,
+                                     subframe_size + 1);
+            }
+
+            /* Compute denominator of pseudo-normalized correlation R'(k).
+
+                 corr_den[k][0] is square root of R'(k) denominator, for int(T) == int(T0)
+                 corr_den[k][1] is square root of R'(k) denominator, for int(T) == int(T0)+1
+
+              Also compute maximum value of above denominators over all k. */
+            tmp = corr_int_den;
+            for (k = 0; k < ANALYZED_FRAC_DELAYS; k++) {
+                sum = adsp->scalarproduct_int16(&delayed_signal[k][1],
+                                               &delayed_signal[k][1],
+                                               subframe_size - 1);
+                corr_den[k][0] = sum + delayed_signal[k][0            ] * delayed_signal[k][0            ];
+                corr_den[k][1] = sum + delayed_signal[k][subframe_size] * delayed_signal[k][subframe_size];
+
+                tmp = FFMAX3(tmp, corr_den[k][0], corr_den[k][1]);
+            }
+
+            sh_gain_den = av_log2(tmp) - 14;
+            if (sh_gain_den >= 0) {
+
+                sh_gain_num =  FFMAX(sh_gain_den, sh_ener);
+                /* Loop through all k and find delay that maximizes
+                   R'(k) correlation.
+                   Search is done in [int(T0)-1; intT(0)+1] range
+                   with 1/8 precision. */
+                delayed_signal_offset = 1;
+                best_delay_frac = 0;
+                gain_den = corr_int_den >> sh_gain_den;
+                gain_num = corr_int_num >> sh_gain_num;
+                gain_num_square = gain_num * gain_num;
+                for (k = 0; k < ANALYZED_FRAC_DELAYS; k++) {
+                    for (i = 0; i < 2; i++) {
+                        int16_t gain_num_short, gain_den_short;
+                        int gain_num_short_square;
+                        /* Compute numerator of pseudo-normalized
+                           correlation R'(k). */
+                        sum = adsp->scalarproduct_int16(&delayed_signal[k][i],
+                                                       sig_scaled + RES_PREV_DATA_SIZE,
+                                                       subframe_size);
+                        gain_num_short = FFMAX(sum >> sh_gain_num, 0);
+
+                        /*
+                                      gain_num_short_square                gain_num_square
+                           R'(T)^2 = -----------------------, max R'(T)^2= --------------
+                                           den                                 gain_den
+                        */
+                        gain_num_short_square = gain_num_short * gain_num_short;
+                        gain_den_short = corr_den[k][i] >> sh_gain_den;
+
+                        tmp = MULL(gain_num_short_square, gain_den, FRAC_BITS);
+                        tmp2 = MULL(gain_num_square, gain_den_short, FRAC_BITS);
+
+                        // R'(T)^2 > max R'(T)^2
+                        if (tmp > tmp2) {
+                            gain_num = gain_num_short;
+                            gain_den = gain_den_short;
+                            gain_num_square = gain_num_short_square;
+                            delayed_signal_offset = i;
+                            best_delay_frac = k + 1;
+                        }
+                    }
+                }
+
+                /*
+                       R'(T)^2
+                  2 * --------- < 1
+                        R(0)
+                */
+                L64_temp0 =  (int64_t)gain_num_square  << ((sh_gain_num << 1) + 1);
+                L64_temp1 = ((int64_t)gain_den * ener) << (sh_gain_den + sh_ener);
+                if (L64_temp0 < L64_temp1)
+                    gain_num = 0;
+            } // if(sh_gain_den >= 0)
+        } // if(corr_int_num)
+    } // if(ener)
+    /* End of best delay searching code  */
+
+    if (!gain_num) {
+        memcpy(residual_filt, residual + RES_PREV_DATA_SIZE, subframe_size * sizeof(int16_t));
+
+        /* Long-term prediction gain is less than 3dB. Long-term postfilter is disabled. */
+        return 0;
+    }
+    if (best_delay_frac) {
+        /* Recompute delayed signal with an interpolation filter of length 129. */
+        ff_acelp_interpolate(residual_filt,
+                             &sig_scaled[RES_PREV_DATA_SIZE - best_delay_int + delayed_signal_offset],
+                             ff_g729_interp_filt_long,
+                             ANALYZED_FRAC_DELAYS + 1,
+                             8 - best_delay_frac,
+                             LONG_INT_FILT_LEN,
+                             subframe_size + 1);
+        /* Compute R'(k) correlation's numerator. */
+        sum = adsp->scalarproduct_int16(residual_filt,
+                                       sig_scaled + RES_PREV_DATA_SIZE,
+                                       subframe_size);
+
+        if (sum < 0) {
+            gain_long_num = 0;
+            sh_gain_long_num = 0;
+        } else {
+            tmp = FFMAX(av_log2(sum) - 14, 0);
+            sum >>= tmp;
+            gain_long_num = sum;
+            sh_gain_long_num = tmp;
+        }
+
+        /* Compute R'(k) correlation's denominator. */
+        sum = adsp->scalarproduct_int16(residual_filt, residual_filt, subframe_size);
+
+        tmp = FFMAX(av_log2(sum) - 14, 0);
+        sum >>= tmp;
+        gain_long_den = sum;
+        sh_gain_long_den = tmp;
+
+        /* Select between original and delayed signal.
+           Delayed signal will be selected if it increases R'(k)
+           correlation. */
+        L_temp0 = gain_num * gain_num;
+        L_temp0 = MULL(L_temp0, gain_long_den, FRAC_BITS);
+
+        L_temp1 = gain_long_num * gain_long_num;
+        L_temp1 = MULL(L_temp1, gain_den, FRAC_BITS);
+
+        tmp = ((sh_gain_long_num - sh_gain_num) << 1) - (sh_gain_long_den - sh_gain_den);
+        if (tmp > 0)
+            L_temp0 >>= tmp;
+        else
+            L_temp1 >>= -tmp;
+
+        /* Check if longer filter increases the values of R'(k). */
+        if (L_temp1 > L_temp0) {
+            /* Select long filter. */
+            selected_signal = residual_filt;
+            gain_num = gain_long_num;
+            gain_den = gain_long_den;
+            sh_gain_num = sh_gain_long_num;
+            sh_gain_den = sh_gain_long_den;
+        } else
+            /* Select short filter. */
+            selected_signal = &delayed_signal[best_delay_frac-1][delayed_signal_offset];
+
+        /* Rescale selected signal to original value. */
+        if (shift > 0)
+            for (i = 0; i < subframe_size; i++)
+                selected_signal[i] <<= shift;
+        else
+            for (i = 0; i < subframe_size; i++)
+                selected_signal[i] >>= -shift;
+
+        /* necessary to avoid compiler warning */
+        selected_signal_const = selected_signal;
+    } // if(best_delay_frac)
+    else
+        selected_signal_const = residual + RES_PREV_DATA_SIZE - (best_delay_int + 1 - delayed_signal_offset);
+#ifdef G729_BITEXACT
+    tmp = sh_gain_num - sh_gain_den;
+    if (tmp > 0)
+        gain_den >>= tmp;
+    else
+        gain_num >>= -tmp;
+
+    if (gain_num > gain_den)
+        lt_filt_factor_a = MIN_LT_FILT_FACTOR_A;
+    else {
+        gain_num >>= 2;
+        gain_den >>= 1;
+        lt_filt_factor_a = (gain_den << 15) / (gain_den + gain_num);
+    }
+#else
+    L64_temp0 = (((int64_t)gain_num) << sh_gain_num) >> 1;
+    L64_temp1 = ((int64_t)gain_den) << sh_gain_den;
+    lt_filt_factor_a = FFMAX((L64_temp1 << 15) / (L64_temp1 + L64_temp0), MIN_LT_FILT_FACTOR_A);
+#endif
+
+    /* Filter through selected filter. */
+    lt_filt_factor_b = 32767 - lt_filt_factor_a + 1;
+
+    ff_acelp_weighted_vector_sum(residual_filt, residual + RES_PREV_DATA_SIZE,
+                                 selected_signal_const,
+                                 lt_filt_factor_a, lt_filt_factor_b,
+                                 1<<14, 15, subframe_size);
+
+    // Long-term prediction gain is larger than 3dB.
+    return 1;
+}
+
+/**
+ * \brief Calculate reflection coefficient for tilt compensation filter (4.2.3).
+ * \param dsp initialized DSP context
+ * \param lp_gn (3.12) coefficients of A(z/FORMANT_PP_FACTOR_NUM) filter
+ * \param lp_gd (3.12) coefficients of A(z/FORMANT_PP_FACTOR_DEN) filter
+ * \param speech speech to update
+ * \param subframe_size size of subframe
+ *
+ * \return (3.12) reflection coefficient
+ *
+ * \remark The routine also calculates the gain term for the short-term
+ *         filter (gf) and multiplies the speech data by 1/gf.
+ *
+ * \note All members of lp_gn, except 10-19 must be equal to zero.
+ */
+static int16_t get_tilt_comp(AudioDSPContext *adsp, int16_t *lp_gn,
+                             const int16_t *lp_gd, int16_t* speech,
+                             int subframe_size)
+{
+    int rh1,rh0; // (3.12)
+    int temp;
+    int i;
+    int gain_term;
+
+    lp_gn[10] = 4096; //1.0 in (3.12)
+
+    /* Apply 1/A(z/FORMANT_PP_FACTOR_DEN) filter to hf. */
+    ff_celp_lp_synthesis_filter(lp_gn + 11, lp_gd + 1, lp_gn + 11, 22, 10, 0, 0, 0x800);
+    /* Now lp_gn (starting with 10) contains impulse response
+       of A(z/FORMANT_PP_FACTOR_NUM)/A(z/FORMANT_PP_FACTOR_DEN) filter. */
+
+    rh0 = adsp->scalarproduct_int16(lp_gn + 10, lp_gn + 10, 20);
+    rh1 = adsp->scalarproduct_int16(lp_gn + 10, lp_gn + 11, 20);
+
+    /* downscale to avoid overflow */
+    temp = av_log2(rh0) - 14;
+    if (temp > 0) {
+        rh0 >>= temp;
+        rh1 >>= temp;
+    }
+
+    if (FFABS(rh1) > rh0 || !rh0)
+        return 0;
+
+    gain_term = 0;
+    for (i = 0; i < 20; i++)
+        gain_term += FFABS(lp_gn[i + 10]);
+    gain_term >>= 2; // (3.12) -> (5.10)
+
+    if (gain_term > 0x400) { // 1.0 in (5.10)
+        temp = 0x2000000 / gain_term; // 1.0/gain_term in (0.15)
+        for (i = 0; i < subframe_size; i++)
+            speech[i] = (speech[i] * temp + 0x4000) >> 15;
+    }
+
+    return -(rh1 << 15) / rh0;
+}
+
+/**
+ * \brief Apply tilt compensation filter (4.2.3).
+ * \param res_pst [in/out] residual signal (partially filtered)
+ * \param k1 (3.12) reflection coefficient
+ * \param subframe_size size of subframe
+ * \param ht_prev_data previous data for 4.2.3, equation 86
+ *
+ * \return new value for ht_prev_data
+*/
+static int16_t apply_tilt_comp(int16_t* out, int16_t* res_pst, int refl_coeff,
+                               int subframe_size, int16_t ht_prev_data)
+{
+    int tmp, tmp2;
+    int i;
+    int gt, ga;
+    int fact, sh_fact;
+
+    if (refl_coeff > 0) {
+        gt = (refl_coeff * G729_TILT_FACTOR_PLUS + 0x4000) >> 15;
+        fact = 0x4000; // 0.5 in (0.15)
+        sh_fact = 15;
+    } else {
+        gt = (refl_coeff * G729_TILT_FACTOR_MINUS + 0x4000) >> 15;
+        fact = 0x800; // 0.5 in (3.12)
+        sh_fact = 12;
+    }
+    ga = (fact << 15) / av_clip_int16(32768 - FFABS(gt));
+    gt >>= 1;
+
+    /* Apply tilt compensation filter to signal. */
+    tmp = res_pst[subframe_size - 1];
+
+    for (i = subframe_size - 1; i >= 1; i--) {
+        tmp2 = (res_pst[i] << 15) + ((gt * res_pst[i-1]) << 1);
+        tmp2 = (tmp2 + 0x4000) >> 15;
+
+        tmp2 = (tmp2 * ga * 2 + fact) >> sh_fact;
+        out[i] = tmp2;
+    }
+    tmp2 = (res_pst[0] << 15) + ((gt * ht_prev_data) << 1);
+    tmp2 = (tmp2 + 0x4000) >> 15;
+    tmp2 = (tmp2 * ga * 2 + fact) >> sh_fact;
+    out[0] = tmp2;
+
+    return tmp;
+}
+
+void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voicing,
+                     const int16_t *lp_filter_coeffs, int pitch_delay_int,
+                     int16_t* residual, int16_t* res_filter_data,
+                     int16_t* pos_filter_data, int16_t *speech, int subframe_size)
+{
+    int16_t residual_filt_buf[SUBFRAME_SIZE+11];
+    int16_t lp_gn[33]; // (3.12)
+    int16_t lp_gd[11]; // (3.12)
+    int tilt_comp_coeff;
+    int i;
+
+    /* Zero-filling is necessary for tilt-compensation filter. */
+    memset(lp_gn, 0, 33 * sizeof(int16_t));
+
+    /* Calculate A(z/FORMANT_PP_FACTOR_NUM) filter coefficients. */
+    for (i = 0; i < 10; i++)
+        lp_gn[i + 11] = (lp_filter_coeffs[i + 1] * formant_pp_factor_num_pow[i] + 0x4000) >> 15;
+
+    /* Calculate A(z/FORMANT_PP_FACTOR_DEN) filter coefficients. */
+    for (i = 0; i < 10; i++)
+        lp_gd[i + 1] = (lp_filter_coeffs[i + 1] * formant_pp_factor_den_pow[i] + 0x4000) >> 15;
+
+    /* residual signal calculation (one-half of short-term postfilter) */
+    memcpy(speech - 10, res_filter_data, 10 * sizeof(int16_t));
+    residual_filter(residual + RES_PREV_DATA_SIZE, lp_gn + 11, speech, subframe_size);
+    /* Save data to use it in the next subframe. */
+    memcpy(res_filter_data, speech + subframe_size - 10, 10 * sizeof(int16_t));
+
+    /* long-term filter. If long-term prediction gain is larger than 3dB (returned value is
+       nonzero) then declare current subframe as periodic. */
+    *voicing = FFMAX(*voicing, long_term_filter(adsp, pitch_delay_int,
+                                                residual, residual_filt_buf + 10,
+                                                subframe_size));
+
+    /* shift residual for using in next subframe */
+    memmove(residual, residual + subframe_size, RES_PREV_DATA_SIZE * sizeof(int16_t));
+
+    /* short-term filter tilt compensation */
+    tilt_comp_coeff = get_tilt_comp(adsp, lp_gn, lp_gd, residual_filt_buf + 10, subframe_size);
+
+    /* Apply second half of short-term postfilter: 1/A(z/FORMANT_PP_FACTOR_DEN) */
+    ff_celp_lp_synthesis_filter(pos_filter_data + 10, lp_gd + 1,
+                                residual_filt_buf + 10,
+                                subframe_size, 10, 0, 0, 0x800);
+    memcpy(pos_filter_data, pos_filter_data + subframe_size, 10 * sizeof(int16_t));
+
+    *ht_prev_data = apply_tilt_comp(speech, pos_filter_data + 10, tilt_comp_coeff,
+                                    subframe_size, *ht_prev_data);
+}
+
+/**
+ * \brief Adaptive gain control (4.2.4)
+ * \param gain_before gain of speech before applying postfilters
+ * \param gain_after  gain of speech after applying postfilters
+ * \param speech [in/out] signal buffer
+ * \param subframe_size length of subframe
+ * \param gain_prev (3.12) previous value of gain coefficient
+ *
+ * \return (3.12) last value of gain coefficient
+ */
+int16_t ff_g729_adaptive_gain_control(int gain_before, int gain_after, int16_t *speech,
+                                   int subframe_size, int16_t gain_prev)
+{
+    int gain; // (3.12)
+    int n;
+    int exp_before, exp_after;
+
+    if(!gain_after && gain_before)
+        return 0;
+
+    if (gain_before) {
+
+        exp_before  = 14 - av_log2(gain_before);
+        gain_before = bidir_sal(gain_before, exp_before);
+
+        exp_after  = 14 - av_log2(gain_after);
+        gain_after = bidir_sal(gain_after, exp_after);
+
+        if (gain_before < gain_after) {
+            gain = (gain_before << 15) / gain_after;
+            gain = bidir_sal(gain, exp_after - exp_before - 1);
+        } else {
+            gain = ((gain_before - gain_after) << 14) / gain_after + 0x4000;
+            gain = bidir_sal(gain, exp_after - exp_before);
+        }
+        gain = (gain * G729_AGC_FAC1 + 0x4000) >> 15; // gain * (1-0.9875)
+    } else
+        gain = 0;
+
+    for (n = 0; n < subframe_size; n++) {
+        // gain_prev = gain + 0.9875 * gain_prev
+        gain_prev = (G729_AGC_FACTOR * gain_prev + 0x4000) >> 15;
+        gain_prev = av_clip_int16(gain + gain_prev);
+        speech[n] = av_clip_int16((speech[n] * gain_prev + 0x2000) >> 14);
+    }
+    return gain_prev;
+}
diff --git a/libavcodec/g729postfilter.h b/libavcodec/g729postfilter.h
new file mode 100644
index 0000000..89e3e40
--- /dev/null
+++ b/libavcodec/g729postfilter.h
@@ -0,0 +1,116 @@
+/*
+ * G.729, G729 Annex D postfilter
+ * Copyright (c) 2008 Vladimir Voroshilov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef FFMPEG_G729POSTFILTER_H
+#define FFMPEG_G729POSTFILTER_H
+
+#include <stdint.h>
+#include "audiodsp.h"
+
+/**
+ * tilt compensation factor (G.729, k1>0)
+ * 0.2 in Q15
+ */
+#define G729_TILT_FACTOR_PLUS       6554
+
+/**
+ * tilt compensation factor (G.729, k1<0)
+ * 0.9 in Q15
+ */
+#define G729_TILT_FACTOR_MINUS     29491
+
+/* 4.2.2 */
+#define FORMANT_PP_FACTOR_NUM  18022             //0.55 in Q15
+#define FORMANT_PP_FACTOR_DEN  22938             //0.70 in Q15
+
+/**
+ * gain adjustment factor (G.729, 4.2.4)
+ * 0.9875 in Q15
+ */
+#define G729_AGC_FACTOR            32358
+#define G729_AGC_FAC1 (32768-G729_AGC_FACTOR)
+
+/**
+ * 1.0 / (1.0 + 0.5) in Q15
+ * where 0.5 is the minimum value of
+ * weight factor, controlling amount of long-term postfiltering
+ */
+#define MIN_LT_FILT_FACTOR_A       21845
+
+/**
+ * Short interpolation filter length
+ */
+#define SHORT_INT_FILT_LEN         2
+
+/**
+ * Long interpolation filter length
+ */
+#define LONG_INT_FILT_LEN          8
+
+/**
+ * Number of analyzed fractional pitch delays in second stage of long-term
+ * postfilter
+ */
+#define ANALYZED_FRAC_DELAYS       7
+
+/**
+ * Amount of past residual signal data stored in buffer
+ */
+#define RES_PREV_DATA_SIZE (PITCH_DELAY_MAX + LONG_INT_FILT_LEN + 1)
+
+/**
+ * \brief Signal postfiltering (4.2)
+ * \param dsp initialized DSP context
+ * \param ht_prev_data [in/out] (Q12) pointer to variable receiving tilt
+ *                     compensation filter data from previous subframe
+ * \param voicing [in/out] (Q0) pointer to variable receiving voicing decision
+ * \param lp_filter_coeffs (Q12) LP filter coefficients
+ * \param pitch_delay_int integer part of the pitch delay
+ * \param residual [in/out] (Q0) residual signal buffer (used in long-term postfilter)
+ * \param res_filter_data [in/out] (Q0) speech data of previous subframe
+ * \param pos_filter_data [in/out] (Q0) previous speech data for short-term postfilter
+ * \param speech [in/out] (Q0) signal buffer
+ * \param subframe_size size of subframe
+ *
+ * Filtering has the following  stages:
+ *   Long-term postfilter (4.2.1)
+ *   Short-term postfilter (4.2.2).
+ *   Tilt-compensation (4.2.3)
+ */
+void ff_g729_postfilter(AudioDSPContext *adsp, int16_t* ht_prev_data, int* voicing,
+                     const int16_t *lp_filter_coeffs, int pitch_delay_int,
+                     int16_t* residual, int16_t* res_filter_data,
+                     int16_t* pos_filter_data, int16_t *speech,
+                     int subframe_size);
+
+/**
+ * \brief Adaptive gain control (4.2.4)
+ * \param gain_before (Q0) gain of speech before applying postfilters
+ * \param gain_after  (Q0) gain of speech after applying postfilters
+ * \param speech [in/out] (Q0) signal buffer
+ * \param subframe_size length of subframe
+ * \param gain_prev (Q12) previous value of gain coefficient
+ *
+ * \return (Q12) last value of gain coefficient
+ */
+int16_t ff_g729_adaptive_gain_control(int gain_before, int gain_after, int16_t *speech,
+                                   int subframe_size, int16_t gain_prev);
+
+#endif // FFMPEG_G729POSTFILTER_H
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 5a0089a..fd32535 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/log.h"
+#include "libavutil/avassert.h"
 #include "mathops.h"
 
 /*
@@ -54,9 +55,7 @@ typedef struct GetBitContext {
     const uint8_t *buffer, *buffer_end;
     int index;
     int size_in_bits;
-#if !UNCHECKED_BITSTREAM_READER
     int size_in_bits_plus8;
-#endif
 } GetBitContext;
 
 #define VLC_TYPE int16_t
@@ -126,7 +125,7 @@ typedef struct RL_VLC_ELEM {
 #if UNCHECKED_BITSTREAM_READER
 #define OPEN_READER(name, gb)                   \
     unsigned int name ## _index = (gb)->index;  \
-    unsigned int av_unused name ## _cache = 0
+    unsigned int av_unused name ## _cache
 
 #define HAVE_BITS_REMAINING(name, gb) 1
 #else
@@ -140,27 +139,34 @@ typedef struct RL_VLC_ELEM {
 
 #define CLOSE_READER(name, gb) (gb)->index = name ## _index
 
+# ifdef LONG_BITSTREAM_READER
+
+# define UPDATE_CACHE_LE(name, gb) name ## _cache = \
+      AV_RL64((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7)
+
+# define UPDATE_CACHE_BE(name, gb) name ## _cache = \
+      AV_RB64((gb)->buffer + (name ## _index >> 3)) >> (32 - (name ## _index & 7))
+
+#else
+
+# define UPDATE_CACHE_LE(name, gb) name ## _cache = \
+      AV_RL32((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7)
+
+# define UPDATE_CACHE_BE(name, gb) name ## _cache = \
+      AV_RB32((gb)->buffer + (name ## _index >> 3)) << (name ## _index & 7)
+
+#endif
+
+
 #ifdef BITSTREAM_READER_LE
 
-# ifdef LONG_BITSTREAM_READER
-#   define UPDATE_CACHE(name, gb) name ## _cache = \
-        AV_RL64((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7)
-# else
-#   define UPDATE_CACHE(name, gb) name ## _cache = \
-        AV_RL32((gb)->buffer + (name ## _index >> 3)) >> (name ## _index & 7)
-# endif
+# define UPDATE_CACHE(name, gb) UPDATE_CACHE_LE(name, gb)
 
 # define SKIP_CACHE(name, gb, num) name ## _cache >>= (num)
 
 #else
 
-# ifdef LONG_BITSTREAM_READER
-#   define UPDATE_CACHE(name, gb) name ## _cache = \
-        AV_RB64((gb)->buffer + (name ## _index >> 3)) >> (32 - (name ## _index & 7))
-# else
-#   define UPDATE_CACHE(name, gb) name ## _cache = \
-        AV_RB32((gb)->buffer + (name ## _index >> 3)) << (name ## _index & 7)
-# endif
+# define UPDATE_CACHE(name, gb) UPDATE_CACHE_BE(name, gb)
 
 # define SKIP_CACHE(name, gb, num) name ## _cache <<= (num)
 
@@ -181,12 +187,18 @@ typedef struct RL_VLC_ELEM {
 
 #define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
 
+#define SHOW_UBITS_LE(name, gb, num) zero_extend(name ## _cache, num)
+#define SHOW_SBITS_LE(name, gb, num) sign_extend(name ## _cache, num)
+
+#define SHOW_UBITS_BE(name, gb, num) NEG_USR32(name ## _cache, num)
+#define SHOW_SBITS_BE(name, gb, num) NEG_SSR32(name ## _cache, num)
+
 #ifdef BITSTREAM_READER_LE
-#   define SHOW_UBITS(name, gb, num) zero_extend(name ## _cache, num)
-#   define SHOW_SBITS(name, gb, num) sign_extend(name ## _cache, num)
+#   define SHOW_UBITS(name, gb, num) SHOW_UBITS_LE(name, gb, num)
+#   define SHOW_SBITS(name, gb, num) SHOW_SBITS_LE(name, gb, num)
 #else
-#   define SHOW_UBITS(name, gb, num) NEG_USR32(name ## _cache, num)
-#   define SHOW_SBITS(name, gb, num) NEG_SSR32(name ## _cache, num)
+#   define SHOW_UBITS(name, gb, num) SHOW_UBITS_BE(name, gb, num)
+#   define SHOW_SBITS(name, gb, num) SHOW_SBITS_BE(name, gb, num)
 #endif
 
 #define GET_CACHE(name, gb) ((uint32_t) name ## _cache)
@@ -206,7 +218,7 @@ static inline void skip_bits_long(GetBitContext *s, int n)
 }
 
 /**
- * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
+ * read mpeg1 dc style vlc (sign bit + mantissa with no MSB).
  * if MSB not set it is negative
  * @param n length in bits
  */
@@ -215,6 +227,7 @@ static inline int get_xbits(GetBitContext *s, int n)
     register int sign;
     register int32_t cache;
     OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
     cache = GET_CACHE(re, s);
     sign  = ~cache >> 31;
@@ -227,6 +240,7 @@ static inline int get_sbits(GetBitContext *s, int n)
 {
     register int tmp;
     OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
     tmp = SHOW_SBITS(re, s, n);
     LAST_SKIP_BITS(re, s, n);
@@ -241,6 +255,7 @@ static inline unsigned int get_bits(GetBitContext *s, int n)
 {
     register int tmp;
     OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
     tmp = SHOW_UBITS(re, s, n);
     LAST_SKIP_BITS(re, s, n);
@@ -248,6 +263,18 @@ static inline unsigned int get_bits(GetBitContext *s, int n)
     return tmp;
 }
 
+static inline unsigned int get_bits_le(GetBitContext *s, int n)
+{
+    register int tmp;
+    OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
+    UPDATE_CACHE_LE(re, s);
+    tmp = SHOW_UBITS_LE(re, s, n);
+    LAST_SKIP_BITS(re, s, n);
+    CLOSE_READER(re, s);
+    return tmp;
+}
+
 /**
  * Show 1-25 bits.
  */
@@ -255,6 +282,7 @@ static inline unsigned int show_bits(GetBitContext *s, int n)
 {
     register int tmp;
     OPEN_READER(re, s);
+    av_assert2(n>0 && n<=25);
     UPDATE_CACHE(re, s);
     tmp = SHOW_UBITS(re, s, n);
     return tmp;
@@ -263,7 +291,6 @@ static inline unsigned int show_bits(GetBitContext *s, int n)
 static inline void skip_bits(GetBitContext *s, int n)
 {
     OPEN_READER(re, s);
-    UPDATE_CACHE(re, s);
     LAST_SKIP_BITS(re, s, n);
     CLOSE_READER(re, s);
 }
@@ -303,20 +330,22 @@ static inline void skip_bits1(GetBitContext *s)
  */
 static inline unsigned int get_bits_long(GetBitContext *s, int n)
 {
-    if (n <= MIN_CACHE_BITS) {
+    if (!n) {
+        return 0;
+    } else if (n <= MIN_CACHE_BITS) {
         return get_bits(s, n);
     } else {
 #ifdef BITSTREAM_READER_LE
-        int ret = get_bits(s, 16);
+        unsigned ret = get_bits(s, 16);
         return ret | (get_bits(s, n - 16) << 16);
 #else
-        int ret = get_bits(s, 16) << (n - 16);
+        unsigned ret = get_bits(s, 16) << (n - 16);
         return ret | get_bits(s, n - 16);
 #endif
     }
 }
 
-/*
+/**
  * Read 0-64 bits.
  */
 static inline uint64_t get_bits64(GetBitContext *s, int n)
@@ -378,8 +407,8 @@ static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
     int buffer_size;
     int ret = 0;
 
-    if (bit_size > INT_MAX - 7 || bit_size < 0 || !buffer) {
-        buffer_size = bit_size = 0;
+    if (bit_size >= INT_MAX - 7 || bit_size < 0 || !buffer) {
+        bit_size    = 0;
         buffer      = NULL;
         ret         = AVERROR_INVALIDDATA;
     }
@@ -388,9 +417,7 @@ static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
 
     s->buffer             = buffer;
     s->size_in_bits       = bit_size;
-#if !UNCHECKED_BITSTREAM_READER
     s->size_in_bits_plus8 = bit_size + 8;
-#endif
     s->buffer_end         = buffer + buffer_size;
     s->index              = 0;
 
@@ -408,8 +435,8 @@ static inline int init_get_bits(GetBitContext *s, const uint8_t *buffer,
 static inline int init_get_bits8(GetBitContext *s, const uint8_t *buffer,
                                  int byte_size)
 {
-    if (byte_size > INT_MAX / 8)
-        return AVERROR_INVALIDDATA;
+    if (byte_size > INT_MAX / 8 || byte_size < 0)
+        byte_size = -1;
     return init_get_bits(s, buffer, byte_size * 8);
 }
 
@@ -485,7 +512,7 @@ void ff_free_vlc(VLC *vlc);
         SKIP_BITS(name, gb, n);                                 \
     } while (0)
 
-#define GET_RL_VLC(level, run, name, gb, table, bits,           \
+#define GET_RL_VLC_INTERNAL(level, run, name, gb, table, bits,  \
                    max_depth, need_update)                      \
     do {                                                        \
         int n, nb_bits;                                         \
@@ -557,6 +584,20 @@ static inline int get_bits_left(GetBitContext *gb)
     return gb->size_in_bits - get_bits_count(gb);
 }
 
+static inline int skip_1stop_8data_bits(GetBitContext *gb)
+{
+    if (get_bits_left(gb) <= 0)
+        return AVERROR_INVALIDDATA;
+
+    while (get_bits1(gb)) {
+        skip_bits(gb, 8);
+        if (get_bits_left(gb) <= 0)
+            return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
 //#define TRACE
 
 #ifdef TRACE
@@ -600,6 +641,25 @@ static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2],
     return r;
 }
 
+#define GET_RL_VLC(level, run, name, gb, table, bits,           \
+                   max_depth, need_update)                      \
+    do {                                                        \
+        int show  = SHOW_UBITS(name, gb, 24);                   \
+        int len;                                                \
+        int pos = name ## _index;                               \
+                                                                \
+        GET_RL_VLC_INTERNAL(level, run, name, gb, table, bits,max_depth, need_update); \
+                                                                \
+        len = name ## _index - pos + 1;                         \
+        show = show >> (24 - len);                              \
+                                                                \
+        print_bin(show, len);                                   \
+                                                                \
+        av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d/%-3d rlv @%5d in %s %s:%d\n",\
+               show, len, run-1, level, pos, __FILE__, __PRETTY_FUNCTION__, __LINE__);\
+    } while (0)                                                 \
+
+
 static inline int get_xbits_trace(GetBitContext *s, int n, const char *file,
                                   const char *func, int line)
 {
@@ -624,6 +684,7 @@ static inline int get_xbits_trace(GetBitContext *s, int n, const char *file,
 
 #else //TRACE
 #define tprintf(p, ...) { }
+#define GET_RL_VLC GET_RL_VLC_INTERNAL
 #endif
 
 #endif /* AVCODEC_GET_BITS_H */
diff --git a/libavcodec/gif.c b/libavcodec/gif.c
index c6c37b6..27d054e 100644
--- a/libavcodec/gif.c
+++ b/libavcodec/gif.c
@@ -1,113 +1,195 @@
 /*
- * GIF encoder.
  * Copyright (c) 2000 Fabrice Bellard
  * Copyright (c) 2002 Francois Revol
  * Copyright (c) 2006 Baptiste Coudurier
  *
  * first version by Francois Revol <revol@free.fr>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-/*
- * Features and limitations:
- * - currently no compression is performed,
- *   in fact the size of the data is 9/8 the size of the image in 8bpp
- * - uses only a global standard palette
- * - tested with IE 5.0, Opera for BeOS, NetPositive (BeOS), and Mozilla (BeOS).
- *
- * Reference documents:
- * http://www.goice.co.jp/member/mo/formats/gif.html
- * http://astronomy.swin.edu.au/pbourke/dataformats/gif/
- * http://www.dcs.ed.ac.uk/home/mxr/gfx/2d/GIF89a.txt
- *
- * this url claims to have an LZW algorithm not covered by Unisys patent:
- * http://www.msg.net/utility/whirlgif/gifencod.html
- * could help reduce the size of the files _a lot_...
- * some sites mentions an RLE type compression also.
+/**
+ * @file
+ * GIF encoder
+ * @see http://www.w3.org/Graphics/GIF/spec-gif89a.txt
  */
 
+#define BITSTREAM_WRITER_LE
+#include "libavutil/opt.h"
+#include "libavutil/imgutils.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
 #include "lzw.h"
-
-/* The GIF format uses reversed order for bitstreams... */
-/* at least they don't use PDP_ENDIAN :) */
-#define BITSTREAM_WRITER_LE
+#include "gif.h"
 
 #include "put_bits.h"
 
 typedef struct {
+    const AVClass *class;
     LZWState *lzw;
     uint8_t *buf;
+    AVFrame *last_frame;
+    int flags;
+    uint32_t palette[AVPALETTE_COUNT];  ///< local reference palette for !pal8
+    uint8_t *tmpl;                      ///< temporary line buffer
 } GIFContext;
 
-/* GIF header */
-static int gif_image_write_header(AVCodecContext *avctx,
-                                  uint8_t **bytestream, uint32_t *palette)
+enum {
+    GF_OFFSETTING = 1<<0,
+    GF_TRANSDIFF  = 1<<1,
+};
+
+static int pick_palette_entry(const uint8_t *buf, int linesize, int w, int h)
 {
-    int i;
-    unsigned int v;
-
-    bytestream_put_buffer(bytestream, "GIF", 3);
-    bytestream_put_buffer(bytestream, "89a", 3);
-    bytestream_put_le16(bytestream, avctx->width);
-    bytestream_put_le16(bytestream, avctx->height);
-
-    bytestream_put_byte(bytestream, 0xf7); /* flags: global clut, 256 entries */
-    bytestream_put_byte(bytestream, 0x1f); /* background color index */
-    bytestream_put_byte(bytestream, 0); /* aspect ratio */
-
-    /* the global palette */
-    for(i=0;i<256;i++) {
-        v = palette[i];
-        bytestream_put_be24(bytestream, v);
-    }
+    int histogram[AVPALETTE_COUNT] = {0};
+    int x, y, i;
 
-    return 0;
+    for (y = 0; y < h; y++) {
+        for (x = 0; x < w; x++)
+            histogram[buf[x]]++;
+        buf += linesize;
+    }
+    for (i = 0; i < FF_ARRAY_ELEMS(histogram); i++)
+        if (!histogram[i])
+            return i;
+    return -1;
 }
 
 static int gif_image_write_image(AVCodecContext *avctx,
                                  uint8_t **bytestream, uint8_t *end,
-                                 const uint8_t *buf, int linesize)
+                                 const uint32_t *palette,
+                                 const uint8_t *buf, const int linesize,
+                                 AVPacket *pkt)
 {
     GIFContext *s = avctx->priv_data;
-    int len = 0, height;
+    int len = 0, height = avctx->height, width = avctx->width, x, y;
+    int x_start = 0, y_start = 0, trans = -1;
     const uint8_t *ptr;
+
+    /* Crop image */
+    // TODO support with palette change
+    if ((s->flags & GF_OFFSETTING) && s->last_frame && !palette) {
+        const uint8_t *ref = s->last_frame->data[0];
+        const int ref_linesize = s->last_frame->linesize[0];
+        int x_end = avctx->width  - 1,
+            y_end = avctx->height - 1;
+
+        /* skip common lines */
+        while (y_start < y_end) {
+            if (memcmp(ref + y_start*ref_linesize, buf + y_start*linesize, width))
+                break;
+            y_start++;
+        }
+        while (y_end > y_start) {
+            if (memcmp(ref + y_end*ref_linesize, buf + y_end*linesize, width))
+                break;
+            y_end--;
+        }
+        height = y_end + 1 - y_start;
+
+        /* skip common columns */
+        while (x_start < x_end) {
+            int same_column = 1;
+            for (y = y_start; y < y_end; y++) {
+                if (ref[y*ref_linesize + x_start] != buf[y*linesize + x_start]) {
+                    same_column = 0;
+                    break;
+                }
+            }
+            if (!same_column)
+                break;
+            x_start++;
+        }
+        while (x_end > x_start) {
+            int same_column = 1;
+            for (y = y_start; y < y_end; y++) {
+                if (ref[y*ref_linesize + x_end] != buf[y*linesize + x_end]) {
+                    same_column = 0;
+                    break;
+                }
+            }
+            if (!same_column)
+                break;
+            x_end--;
+        }
+        width = x_end + 1 - x_start;
+
+        av_log(avctx, AV_LOG_DEBUG,"%dx%d image at pos (%d;%d) [area:%dx%d]\n",
+               width, height, x_start, y_start, avctx->width, avctx->height);
+    }
+
     /* image block */
+    bytestream_put_byte(bytestream, GIF_IMAGE_SEPARATOR);
+    bytestream_put_le16(bytestream, x_start);
+    bytestream_put_le16(bytestream, y_start);
+    bytestream_put_le16(bytestream, width);
+    bytestream_put_le16(bytestream, height);
 
-    bytestream_put_byte(bytestream, 0x2c);
-    bytestream_put_le16(bytestream, 0);
-    bytestream_put_le16(bytestream, 0);
-    bytestream_put_le16(bytestream, avctx->width);
-    bytestream_put_le16(bytestream, avctx->height);
-    bytestream_put_byte(bytestream, 0x00); /* flags */
-    /* no local clut */
+    if (!palette) {
+        bytestream_put_byte(bytestream, 0x00); /* flags */
+    } else {
+        unsigned i;
+        bytestream_put_byte(bytestream, 1<<7 | 0x7); /* flags */
+        for (i = 0; i < AVPALETTE_COUNT; i++) {
+            const uint32_t v = palette[i];
+            bytestream_put_be24(bytestream, v);
+        }
+    }
+
+    /* TODO: support with palette change (pal8) */
+    if ((s->flags & GF_TRANSDIFF) && s->last_frame && !palette) {
+        trans = pick_palette_entry(buf + y_start*linesize + x_start,
+                                   linesize, width, height);
+        if (trans < 0) { // TODO, patch welcome
+            av_log(avctx, AV_LOG_DEBUG, "No available color, can not use transparency\n");
+        } else {
+            uint8_t *pal_exdata = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);
+            if (!pal_exdata)
+                return AVERROR(ENOMEM);
+            memcpy(pal_exdata, s->palette, AVPALETTE_SIZE);
+            pal_exdata[trans*4 + 3*!HAVE_BIGENDIAN] = 0x00;
+        }
+    }
 
     bytestream_put_byte(bytestream, 0x08);
 
-    ff_lzw_encode_init(s->lzw, s->buf, avctx->width*avctx->height,
+    ff_lzw_encode_init(s->lzw, s->buf, 2 * width * height,
                        12, FF_LZW_GIF, put_bits);
 
-    ptr = buf;
-    for (height = avctx->height; height--;) {
-        len += ff_lzw_encode(s->lzw, ptr, avctx->width);
-        ptr += linesize;
+    ptr = buf + y_start*linesize + x_start;
+    if (trans >= 0) {
+        const int ref_linesize = s->last_frame->linesize[0];
+        const uint8_t *ref = s->last_frame->data[0] + y_start*ref_linesize + x_start;
+
+        for (y = 0; y < height; y++) {
+            memcpy(s->tmpl, ptr, width);
+            for (x = 0; x < width; x++)
+                if (ref[x] == ptr[x])
+                    s->tmpl[x] = trans;
+            len += ff_lzw_encode(s->lzw, s->tmpl, width);
+            ptr += linesize;
+            ref += ref_linesize;
+        }
+    } else {
+        for (y = 0; y < height; y++) {
+            len += ff_lzw_encode(s->lzw, ptr, width);
+            ptr += linesize;
+        }
     }
     len += ff_lzw_encode_flush(s->lzw, flush_put_bits);
 
@@ -122,7 +204,6 @@ static int gif_image_write_image(AVCodecContext *avctx,
         len -= size;
     }
     bytestream_put_byte(bytestream, 0x00); /* end of image block */
-    bytestream_put_byte(bytestream, 0x3b);
     return 0;
 }
 
@@ -130,6 +211,11 @@ static av_cold int gif_encode_init(AVCodecContext *avctx)
 {
     GIFContext *s = avctx->priv_data;
 
+    if (avctx->width > 65535 || avctx->height > 65535) {
+        av_log(avctx, AV_LOG_ERROR, "GIF does not support resolutions above 65535x65535\n");
+        return AVERROR(EINVAL);
+    }
+
     avctx->coded_frame = av_frame_alloc();
     if (!avctx->coded_frame)
         return AVERROR(ENOMEM);
@@ -138,30 +224,49 @@ static av_cold int gif_encode_init(AVCodecContext *avctx)
     avctx->coded_frame->key_frame = 1;
 
     s->lzw = av_mallocz(ff_lzw_encode_state_size);
-    if (!s->lzw)
-        return AVERROR(ENOMEM);
     s->buf = av_malloc(avctx->width*avctx->height*2);
-    if (!s->buf)
-         return AVERROR(ENOMEM);
+    s->tmpl = av_malloc(avctx->width);
+    if (!s->tmpl || !s->buf || !s->lzw)
+        return AVERROR(ENOMEM);
+
+    if (avpriv_set_systematic_pal2(s->palette, avctx->pix_fmt) < 0)
+        av_assert0(avctx->pix_fmt == AV_PIX_FMT_PAL8);
+
     return 0;
 }
 
-/* better than nothing gif encoder */
 static int gif_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                             const AVFrame *pict, int *got_packet)
 {
+    GIFContext *s = avctx->priv_data;
     uint8_t *outbuf_ptr, *end;
+    const uint32_t *palette = NULL;
     int ret;
 
-    if ((ret = ff_alloc_packet(pkt, avctx->width*avctx->height*7/5 + FF_MIN_BUFFER_SIZE)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*7/5 + FF_MIN_BUFFER_SIZE)) < 0)
         return ret;
-    }
     outbuf_ptr = pkt->data;
     end        = pkt->data + pkt->size;
 
-    gif_image_write_header(avctx, &outbuf_ptr, (uint32_t *)pict->data[1]);
-    gif_image_write_image(avctx, &outbuf_ptr, end, pict->data[0], pict->linesize[0]);
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
+        uint8_t *pal_exdata = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);
+        if (!pal_exdata)
+            return AVERROR(ENOMEM);
+        memcpy(pal_exdata, pict->data[1], AVPALETTE_SIZE);
+        palette = (uint32_t*)pict->data[1];
+    }
+
+    gif_image_write_image(avctx, &outbuf_ptr, end, palette,
+                          pict->data[0], pict->linesize[0], pkt);
+    if (!s->last_frame) {
+        s->last_frame = av_frame_alloc();
+        if (!s->last_frame)
+            return AVERROR(ENOMEM);
+    }
+    av_frame_unref(s->last_frame);
+    ret = av_frame_ref(s->last_frame, (AVFrame*)pict);
+    if (ret < 0)
+        return ret;
 
     pkt->size   = outbuf_ptr - pkt->data;
     pkt->flags |= AV_PKT_FLAG_KEY;
@@ -178,9 +283,27 @@ static int gif_encode_close(AVCodecContext *avctx)
 
     av_freep(&s->lzw);
     av_freep(&s->buf);
+    av_frame_free(&s->last_frame);
+    av_freep(&s->tmpl);
     return 0;
 }
 
+#define OFFSET(x) offsetof(GIFContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption gif_options[] = {
+    { "gifflags", "set GIF flags", OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = GF_OFFSETTING|GF_TRANSDIFF}, 0, INT_MAX, FLAGS, "flags" },
+        { "offsetting", "enable picture offsetting", 0, AV_OPT_TYPE_CONST, {.i64=GF_OFFSETTING}, INT_MIN, INT_MAX, FLAGS, "flags" },
+        { "transdiff", "enable transparency detection between frames", 0, AV_OPT_TYPE_CONST, {.i64=GF_TRANSDIFF}, INT_MIN, INT_MAX, FLAGS, "flags" },
+    { NULL }
+};
+
+static const AVClass gif_class = {
+    .class_name = "GIF encoder",
+    .item_name  = av_default_item_name,
+    .option     = gif_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_gif_encoder = {
     .name           = "gif",
     .long_name      = NULL_IF_CONFIG_SMALL("GIF (Graphics Interchange Format)"),
@@ -194,4 +317,5 @@ AVCodec ff_gif_encoder = {
         AV_PIX_FMT_RGB8, AV_PIX_FMT_BGR8, AV_PIX_FMT_RGB4_BYTE, AV_PIX_FMT_BGR4_BYTE,
         AV_PIX_FMT_GRAY8, AV_PIX_FMT_PAL8, AV_PIX_FMT_NONE
     },
+    .priv_class     = &gif_class,
 };
diff --git a/libavcodec/gif.h b/libavcodec/gif.h
new file mode 100644
index 0000000..b4cf665
--- /dev/null
+++ b/libavcodec/gif.h
@@ -0,0 +1,49 @@
+/*
+ * GIF format definitions
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2006 Baptiste Coudurier
+ * Copyright (c) 2012 Vitaliy E Sugrobov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * GIF format definitions.
+ */
+
+#ifndef AVCODEC_GIFDEFS_H
+#define AVCODEC_GIFDEFS_H
+
+#include <stdint.h>
+
+static const uint8_t gif87a_sig[6] = "GIF87a";
+static const uint8_t gif89a_sig[6] = "GIF89a";
+
+#define GCE_DISPOSAL_NONE       0
+#define GCE_DISPOSAL_INPLACE    1
+#define GCE_DISPOSAL_BACKGROUND 2
+#define GCE_DISPOSAL_RESTORE    3
+
+#define GIF_TRAILER                 0x3b
+#define GIF_EXTENSION_INTRODUCER    0x21
+#define GIF_IMAGE_SEPARATOR         0x2c
+#define GIF_GCE_EXT_LABEL           0xf9
+#define GIF_APP_EXT_LABEL           0xff
+#define NETSCAPE_EXT_STR            "NETSCAPE2.0"
+
+#endif /* AVCODEC_GIFDEFS_H */
diff --git a/libavcodec/gifdec.c b/libavcodec/gifdec.c
index cdb7f23..78c8900 100644
--- a/libavcodec/gifdec.c
+++ b/libavcodec/gifdec.c
@@ -2,122 +2,255 @@
  * GIF decoder
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2006 Baptiste Coudurier
+ * Copyright (c) 2012 Vitaliy E Sugrobov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
 #include "lzw.h"
+#include "gif.h"
 
-#define GCE_DISPOSAL_NONE       0
-#define GCE_DISPOSAL_INPLACE    1
-#define GCE_DISPOSAL_BACKGROUND 2
-#define GCE_DISPOSAL_RESTORE    3
+/* This value is intentionally set to "transparent white" color.
+ * It is much better to have white background instead of black
+ * when gif image converted to format which not support transparency.
+ */
+#define GIF_TRANSPARENT_COLOR    0x00ffffff
 
 typedef struct GifState {
+    const AVClass *class;
+    AVFrame *frame;
     int screen_width;
     int screen_height;
+    int has_global_palette;
     int bits_per_pixel;
+    uint32_t bg_color;
     int background_color_index;
     int transparent_color_index;
     int color_resolution;
-    uint32_t *image_palette;
+    /* intermediate buffer for storing color indices
+     * obtained from lzw-encoded data stream */
+    uint8_t *idx_line;
+    int idx_line_size;
 
     /* after the frame is displayed, the disposal method is used */
+    int gce_prev_disposal;
     int gce_disposal;
-    /* delay during which the frame is shown */
-    int gce_delay;
+    /* rectangle describing area that must be disposed */
+    int gce_l, gce_t, gce_w, gce_h;
+    /* depending on disposal method we store either part of the image
+     * drawn on the canvas or background color that
+     * should be used upon disposal */
+    uint32_t * stored_img;
+    int stored_img_size;
+    int stored_bg_color;
 
-    /* LZW compatible decoder */
     GetByteContext gb;
     LZWState *lzw;
 
     /* aux buffers */
-    uint8_t global_palette[256 * 3];
-    uint8_t local_palette[256 * 3];
+    uint32_t global_palette[256];
+    uint32_t local_palette[256];
 
-  AVCodecContext* avctx;
+    AVCodecContext *avctx;
+    int keyframe;
+    int keyframe_ok;
+    int trans_color;    /**< color value that is used instead of transparent color */
 } GifState;
 
-static const uint8_t gif87a_sig[6] = "GIF87a";
-static const uint8_t gif89a_sig[6] = "GIF89a";
+static void gif_read_palette(GifState *s, uint32_t *pal, int nb)
+{
+    int i;
+
+    for (i = 0; i < nb; i++, pal++)
+        *pal = (0xffu << 24) | bytestream2_get_be24u(&s->gb);
+}
+
+static void gif_fill(AVFrame *picture, uint32_t color)
+{
+    uint32_t *p = (uint32_t *)picture->data[0];
+    uint32_t *p_end = p + (picture->linesize[0] / sizeof(uint32_t)) * picture->height;
+
+    for (; p < p_end; p++)
+        *p = color;
+}
+
+static void gif_fill_rect(AVFrame *picture, uint32_t color, int l, int t, int w, int h)
+{
+    const int linesize = picture->linesize[0] / sizeof(uint32_t);
+    const uint32_t *py = (uint32_t *)picture->data[0] + t * linesize;
+    const uint32_t *pr, *pb = py + h * linesize;
+    uint32_t *px;
+
+    for (; py < pb; py += linesize) {
+        px = (uint32_t *)py + l;
+        pr = px + w;
+
+        for (; px < pr; px++)
+            *px = color;
+    }
+}
+
+static void gif_copy_img_rect(const uint32_t *src, uint32_t *dst,
+                              int linesize, int l, int t, int w, int h)
+{
+    const int y_start = t * linesize;
+    const uint32_t *src_px,
+                   *src_py = src + y_start,
+                   *dst_py = dst + y_start;
+    const uint32_t *src_pb = src_py + h * linesize;
+    uint32_t *dst_px;
+
+    for (; src_py < src_pb; src_py += linesize, dst_py += linesize) {
+        src_px = src_py + l;
+        dst_px = (uint32_t *)dst_py + l;
+
+        memcpy(dst_px, src_px, w * sizeof(uint32_t));
+    }
+}
 
 static int gif_read_image(GifState *s, AVFrame *frame)
 {
     int left, top, width, height, bits_per_pixel, code_size, flags;
-    int is_interleaved, has_local_palette, y, pass, y1, linesize, n, i;
-    uint8_t *ptr, *spal, *palette, *ptr1;
-
-    left   = bytestream2_get_le16(&s->gb);
-    top    = bytestream2_get_le16(&s->gb);
-    width  = bytestream2_get_le16(&s->gb);
-    height = bytestream2_get_le16(&s->gb);
-    flags  = bytestream2_get_byte(&s->gb);
+    int is_interleaved, has_local_palette, y, pass, y1, linesize, pal_size;
+    uint32_t *ptr, *pal, *px, *pr, *ptr1;
+    int ret;
+    uint8_t *idx;
+
+    /* At least 9 bytes of Image Descriptor. */
+    if (bytestream2_get_bytes_left(&s->gb) < 9)
+        return AVERROR_INVALIDDATA;
+
+    left   = bytestream2_get_le16u(&s->gb);
+    top    = bytestream2_get_le16u(&s->gb);
+    width  = bytestream2_get_le16u(&s->gb);
+    height = bytestream2_get_le16u(&s->gb);
+    flags  = bytestream2_get_byteu(&s->gb);
     is_interleaved = flags & 0x40;
     has_local_palette = flags & 0x80;
     bits_per_pixel = (flags & 0x07) + 1;
 
-    av_dlog(s->avctx, "gif: image x=%d y=%d w=%d h=%d\n", left, top, width, height);
+    av_dlog(s->avctx, "image x=%d y=%d w=%d h=%d\n", left, top, width, height);
 
     if (has_local_palette) {
-        bytestream2_get_buffer(&s->gb, s->local_palette, 3 * (1 << bits_per_pixel));
-        palette = s->local_palette;
+        pal_size = 1 << bits_per_pixel;
+
+        if (bytestream2_get_bytes_left(&s->gb) < pal_size * 3)
+            return AVERROR_INVALIDDATA;
+
+        gif_read_palette(s, s->local_palette, pal_size);
+        pal = s->local_palette;
     } else {
-        palette = s->global_palette;
-        bits_per_pixel = s->bits_per_pixel;
+        if (!s->has_global_palette) {
+            av_log(s->avctx, AV_LOG_ERROR, "picture doesn't have either global or local palette.\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        pal = s->global_palette;
+    }
+
+    if (s->keyframe) {
+        if (s->transparent_color_index == -1 && s->has_global_palette) {
+            /* transparency wasn't set before the first frame, fill with background color */
+            gif_fill(frame, s->bg_color);
+        } else {
+            /* otherwise fill with transparent color.
+             * this is necessary since by default picture filled with 0x80808080. */
+            gif_fill(frame, s->trans_color);
+        }
     }
 
     /* verify that all the image is inside the screen dimensions */
     if (left + width > s->screen_width ||
-        top + height > s->screen_height ||
-        !width || !height) {
+        top + height > s->screen_height) {
+        av_log(s->avctx, AV_LOG_ERROR, "image is outside the screen dimensions.\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (width <= 0 || height <= 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Invalid image dimensions.\n");
         return AVERROR_INVALIDDATA;
     }
 
-    /* build the palette */
-    n = (1 << bits_per_pixel);
-    spal = palette;
-    for(i = 0; i < n; i++) {
-        s->image_palette[i] = (0xffu << 24) | AV_RB24(spal);
-        spal += 3;
+    /* process disposal method */
+    if (s->gce_prev_disposal == GCE_DISPOSAL_BACKGROUND) {
+        gif_fill_rect(frame, s->stored_bg_color, s->gce_l, s->gce_t, s->gce_w, s->gce_h);
+    } else if (s->gce_prev_disposal == GCE_DISPOSAL_RESTORE) {
+        gif_copy_img_rect(s->stored_img, (uint32_t *)frame->data[0],
+            frame->linesize[0] / sizeof(uint32_t), s->gce_l, s->gce_t, s->gce_w, s->gce_h);
     }
-    for(; i < 256; i++)
-        s->image_palette[i] = (0xffu << 24);
-    /* handle transparency */
-    if (s->transparent_color_index >= 0)
-        s->image_palette[s->transparent_color_index] = 0;
+
+    s->gce_prev_disposal = s->gce_disposal;
+
+    if (s->gce_disposal != GCE_DISPOSAL_NONE) {
+        s->gce_l = left;  s->gce_t = top;
+        s->gce_w = width; s->gce_h = height;
+
+        if (s->gce_disposal == GCE_DISPOSAL_BACKGROUND) {
+            if (s->transparent_color_index >= 0)
+                s->stored_bg_color = s->trans_color;
+            else
+                s->stored_bg_color = s->bg_color;
+        } else if (s->gce_disposal == GCE_DISPOSAL_RESTORE) {
+            av_fast_malloc(&s->stored_img, &s->stored_img_size, frame->linesize[0] * frame->height);
+            if (!s->stored_img)
+                return AVERROR(ENOMEM);
+
+            gif_copy_img_rect((uint32_t *)frame->data[0], s->stored_img,
+                frame->linesize[0] / sizeof(uint32_t), left, top, width, height);
+        }
+    }
+
+    /* Expect at least 2 bytes: 1 for lzw code size and 1 for block size. */
+    if (bytestream2_get_bytes_left(&s->gb) < 2)
+        return AVERROR_INVALIDDATA;
 
     /* now get the image data */
-    code_size = bytestream2_get_byte(&s->gb);
-    ff_lzw_decode_init(s->lzw, code_size, s->gb.buffer,
-                       bytestream2_get_bytes_left(&s->gb), FF_LZW_GIF);
+    code_size = bytestream2_get_byteu(&s->gb);
+    if ((ret = ff_lzw_decode_init(s->lzw, code_size, s->gb.buffer,
+                                  bytestream2_get_bytes_left(&s->gb), FF_LZW_GIF)) < 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "LZW init failed\n");
+        return ret;
+    }
 
     /* read all the image */
-    linesize = frame->linesize[0];
-    ptr1 = frame->data[0] + top * linesize + left;
+    linesize = frame->linesize[0] / sizeof(uint32_t);
+    ptr1 = (uint32_t *)frame->data[0] + top * linesize + left;
     ptr = ptr1;
     pass = 0;
     y1 = 0;
     for (y = 0; y < height; y++) {
-        ff_lzw_decode(s->lzw, ptr, width);
+        int count = ff_lzw_decode(s->lzw, s->idx_line, width);
+        if (count != width) {
+            if (count)
+                av_log(s->avctx, AV_LOG_ERROR, "LZW decode failed\n");
+            goto decode_tail;
+        }
+
+        pr = ptr + width;
+
+        for (px = ptr, idx = s->idx_line; px < pr; px++, idx++) {
+            if (*idx != s->transparent_color_index)
+                *px = pal[*idx];
+        }
+
         if (is_interleaved) {
             switch(pass) {
             default:
@@ -149,53 +282,76 @@ static int gif_read_image(GifState *s, AVFrame *frame)
             ptr += linesize;
         }
     }
+
+ decode_tail:
     /* read the garbage data until end marker is found */
     ff_lzw_decode_tail(s->lzw);
 
-    bytestream2_skip(&s->gb, ff_lzw_size_read(s->lzw));
+    /* Graphic Control Extension's scope is single frame.
+     * Remove its influence. */
+    s->transparent_color_index = -1;
+    s->gce_disposal = GCE_DISPOSAL_NONE;
+
     return 0;
 }
 
 static int gif_read_extension(GifState *s)
 {
-    int ext_code, ext_len, i, gce_flags, gce_transparent_index;
+    int ext_code, ext_len, gce_flags, gce_transparent_index;
+
+    /* There must be at least 2 bytes:
+     * 1 for extension label and 1 for extension length. */
+    if (bytestream2_get_bytes_left(&s->gb) < 2)
+        return AVERROR_INVALIDDATA;
 
-    /* extension */
-    ext_code = bytestream2_get_byte(&s->gb);
-    ext_len  = bytestream2_get_byte(&s->gb);
+    ext_code = bytestream2_get_byteu(&s->gb);
+    ext_len  = bytestream2_get_byteu(&s->gb);
 
-    av_dlog(s->avctx, "gif: ext_code=0x%x len=%d\n", ext_code, ext_len);
+    av_dlog(s->avctx, "ext_code=0x%x len=%d\n", ext_code, ext_len);
 
     switch(ext_code) {
-    case 0xf9:
+    case GIF_GCE_EXT_LABEL:
         if (ext_len != 4)
             goto discard_ext;
-        s->transparent_color_index = -1;
-        gce_flags    = bytestream2_get_byte(&s->gb);
-        s->gce_delay = bytestream2_get_le16(&s->gb);
-        gce_transparent_index = bytestream2_get_byte(&s->gb);
+
+        /* We need at least 5 bytes more: 4 is for extension body
+         * and 1 for next block size. */
+        if (bytestream2_get_bytes_left(&s->gb) < 5)
+            return AVERROR_INVALIDDATA;
+
+        gce_flags    = bytestream2_get_byteu(&s->gb);
+        bytestream2_skipu(&s->gb, 2);    // delay during which the frame is shown
+        gce_transparent_index = bytestream2_get_byteu(&s->gb);
         if (gce_flags & 0x01)
             s->transparent_color_index = gce_transparent_index;
         else
             s->transparent_color_index = -1;
         s->gce_disposal = (gce_flags >> 2) & 0x7;
 
-        av_dlog(s->avctx, "gif: gce_flags=%x delay=%d tcolor=%d disposal=%d\n",
-               gce_flags, s->gce_delay,
+        av_dlog(s->avctx, "gce_flags=%x tcolor=%d disposal=%d\n",
+               gce_flags,
                s->transparent_color_index, s->gce_disposal);
 
-        ext_len = bytestream2_get_byte(&s->gb);
+        if (s->gce_disposal > 3) {
+            s->gce_disposal = GCE_DISPOSAL_NONE;
+            av_dlog(s->avctx, "invalid value in gce_disposal (%d). Using default value of 0.\n", ext_len);
+        }
+
+        ext_len = bytestream2_get_byteu(&s->gb);
         break;
     }
 
     /* NOTE: many extension blocks can come after */
  discard_ext:
-    while (ext_len != 0) {
-        for (i = 0; i < ext_len; i++)
-            bytestream2_get_byte(&s->gb);
-        ext_len = bytestream2_get_byte(&s->gb);
+    while (ext_len) {
+        /* There must be at least ext_len bytes and 1 for next block size byte. */
+        if (bytestream2_get_bytes_left(&s->gb) < ext_len + 1)
+            return AVERROR_INVALIDDATA;
 
-        av_dlog(s->avctx, "gif: ext_len1=%d\n", ext_len);
+        bytestream2_skipu(&s->gb, ext_len);
+        ext_len = bytestream2_get_byteu(&s->gb);
+
+        av_dlog(s->avctx, "ext_len1=%d\n", ext_len);
     }
     return 0;
 }
@@ -204,44 +360,48 @@ static int gif_read_header1(GifState *s)
 {
     uint8_t sig[6];
     int v, n;
-    int has_global_palette;
+    int background_color_index;
 
     if (bytestream2_get_bytes_left(&s->gb) < 13)
         return AVERROR_INVALIDDATA;
 
     /* read gif signature */
-    bytestream2_get_buffer(&s->gb, sig, 6);
-    if (memcmp(sig, gif87a_sig, 6) != 0 &&
-        memcmp(sig, gif89a_sig, 6) != 0)
+    bytestream2_get_bufferu(&s->gb, sig, 6);
+    if (memcmp(sig, gif87a_sig, 6) &&
+        memcmp(sig, gif89a_sig, 6))
         return AVERROR_INVALIDDATA;
 
     /* read screen header */
     s->transparent_color_index = -1;
-    s->screen_width  = bytestream2_get_le16(&s->gb);
-    s->screen_height = bytestream2_get_le16(&s->gb);
-    if(   (unsigned)s->screen_width  > 32767
-       || (unsigned)s->screen_height > 32767){
-        av_log(NULL, AV_LOG_ERROR, "picture size too large\n");
-        return AVERROR_INVALIDDATA;
-    }
+    s->screen_width  = bytestream2_get_le16u(&s->gb);
+    s->screen_height = bytestream2_get_le16u(&s->gb);
 
-    v = bytestream2_get_byte(&s->gb);
+    v = bytestream2_get_byteu(&s->gb);
     s->color_resolution = ((v & 0x70) >> 4) + 1;
-    has_global_palette = (v & 0x80);
+    s->has_global_palette = (v & 0x80);
     s->bits_per_pixel = (v & 0x07) + 1;
-    s->background_color_index = bytestream2_get_byte(&s->gb);
-    bytestream2_get_byte(&s->gb);                /* ignored */
+    background_color_index = bytestream2_get_byteu(&s->gb);
+    n = bytestream2_get_byteu(&s->gb);
+    if (n) {
+        s->avctx->sample_aspect_ratio.num = n + 15;
+        s->avctx->sample_aspect_ratio.den = 64;
+    }
 
-    av_dlog(s->avctx, "gif: screen_w=%d screen_h=%d bpp=%d global_palette=%d\n",
+    av_dlog(s->avctx, "screen_w=%d screen_h=%d bpp=%d global_palette=%d\n",
            s->screen_width, s->screen_height, s->bits_per_pixel,
-           has_global_palette);
+           s->has_global_palette);
 
-    if (has_global_palette) {
+    if (s->has_global_palette) {
+        s->background_color_index = background_color_index;
         n = 1 << s->bits_per_pixel;
         if (bytestream2_get_bytes_left(&s->gb) < n * 3)
             return AVERROR_INVALIDDATA;
-        bytestream2_get_buffer(&s->gb, s->global_palette, n * 3);
-    }
+
+        gif_read_palette(s, s->global_palette, n);
+        s->bg_color = s->global_palette[s->background_color_index];
+    } else
+        s->background_color_index = -1;
+
     return 0;
 }
 
@@ -251,23 +411,24 @@ static int gif_parse_next_image(GifState *s, AVFrame *frame)
         int code = bytestream2_get_byte(&s->gb);
         int ret;
 
-        av_dlog(s->avctx, "gif: code=%02x '%c'\n", code, code);
+        av_log(s->avctx, AV_LOG_DEBUG, "code=%02x '%c'\n", code, code);
 
         switch (code) {
-        case ',':
+        case GIF_IMAGE_SEPARATOR:
             return gif_read_image(s, frame);
-        case '!':
+        case GIF_EXTENSION_INTRODUCER:
             if ((ret = gif_read_extension(s)) < 0)
                 return ret;
             break;
-        case ';':
+        case GIF_TRAILER:
             /* end of image */
+            return AVERROR_EOF;
         default:
-            /* error or erroneous EOF */
+            /* erroneous block label */
             return AVERROR_INVALIDDATA;
         }
     }
-    return AVERROR_INVALIDDATA;
+    return AVERROR_EOF;
 }
 
 static av_cold int gif_decode_init(AVCodecContext *avctx)
@@ -276,38 +437,74 @@ static av_cold int gif_decode_init(AVCodecContext *avctx)
 
     s->avctx = avctx;
 
+    avctx->pix_fmt = AV_PIX_FMT_RGB32;
+    s->frame = av_frame_alloc();
+    if (!s->frame)
+        return AVERROR(ENOMEM);
     ff_lzw_decode_open(&s->lzw);
     return 0;
 }
 
-static int gif_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
-                            AVPacket *avpkt)
+static int gif_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPacket *avpkt)
 {
-    const uint8_t *buf = avpkt->data;
-    int buf_size = avpkt->size;
     GifState *s = avctx->priv_data;
-    AVFrame *picture = data;
     int ret;
 
-    bytestream2_init(&s->gb, buf, buf_size);
-    if ((ret = gif_read_header1(s)) < 0)
-        return ret;
+    bytestream2_init(&s->gb, avpkt->data, avpkt->size);
 
-    avctx->pix_fmt = AV_PIX_FMT_PAL8;
+    s->frame->pts     = avpkt->pts;
+    s->frame->pkt_pts = avpkt->pts;
+    s->frame->pkt_dts = avpkt->dts;
+    av_frame_set_pkt_duration(s->frame, avpkt->duration);
 
-    if ((ret = ff_set_dimensions(avctx, s->screen_width, s->screen_height)) < 0)
-        return ret;
+    if (avpkt->size >= 6) {
+        s->keyframe = memcmp(avpkt->data, gif87a_sig, 6) == 0 ||
+                      memcmp(avpkt->data, gif89a_sig, 6) == 0;
+    } else {
+        s->keyframe = 0;
+    }
 
-    if ((ret = ff_get_buffer(avctx, picture, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-        return ret;
+    if (s->keyframe) {
+        s->keyframe_ok = 0;
+        s->gce_prev_disposal = GCE_DISPOSAL_NONE;
+        if ((ret = gif_read_header1(s)) < 0)
+            return ret;
+
+        if ((ret = ff_set_dimensions(avctx, s->screen_width, s->screen_height)) < 0)
+            return ret;
+
+        av_frame_unref(s->frame);
+        if ((ret = ff_get_buffer(avctx, s->frame, 0)) < 0)
+            return ret;
+
+        av_fast_malloc(&s->idx_line, &s->idx_line_size, s->screen_width);
+        if (!s->idx_line)
+            return AVERROR(ENOMEM);
+
+        s->frame->pict_type = AV_PICTURE_TYPE_I;
+        s->frame->key_frame = 1;
+        s->keyframe_ok = 1;
+    } else {
+        if (!s->keyframe_ok) {
+            av_log(avctx, AV_LOG_ERROR, "cannot decode frame without keyframe\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
+            return ret;
+
+        s->frame->pict_type = AV_PICTURE_TYPE_P;
+        s->frame->key_frame = 0;
     }
-    s->image_palette = (uint32_t *)picture->data[1];
-    ret = gif_parse_next_image(s, picture);
+
+    ret = gif_parse_next_image(s, s->frame);
     if (ret < 0)
         return ret;
 
+    if ((ret = av_frame_ref(data, s->frame)) < 0)
+        return ret;
     *got_frame = 1;
+
     return bytestream2_tell(&s->gb);
 }
 
@@ -316,9 +513,29 @@ static av_cold int gif_decode_close(AVCodecContext *avctx)
     GifState *s = avctx->priv_data;
 
     ff_lzw_decode_close(&s->lzw);
+    av_frame_free(&s->frame);
+    av_freep(&s->idx_line);
+    av_freep(&s->stored_img);
+
     return 0;
 }
 
+static const AVOption options[] = {
+    { "trans_color", "color value (ARGB) that is used instead of transparent color",
+      offsetof(GifState, trans_color), AV_OPT_TYPE_INT,
+      {.i64 = GIF_TRANSPARENT_COLOR}, 0, 0xffffffff,
+      AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_VIDEO_PARAM },
+    { NULL },
+};
+
+static const AVClass decoder_class = {
+    .class_name = "gif decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DECODER,
+};
+
 AVCodec ff_gif_decoder = {
     .name           = "gif",
     .long_name      = NULL_IF_CONFIG_SMALL("GIF (Graphics Interchange Format)"),
@@ -329,4 +546,5 @@ AVCodec ff_gif_decoder = {
     .close          = gif_decode_close,
     .decode         = gif_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .priv_class     = &decoder_class,
 };
diff --git a/libavcodec/golomb-test.c b/libavcodec/golomb-test.c
index e740a20..2dfe917 100644
--- a/libavcodec/golomb-test.c
+++ b/libavcodec/golomb-test.c
@@ -1,18 +1,20 @@
 /*
- * This file is part of Libav.
+ * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -56,7 +58,7 @@ int main(void)
         }
     }
 
-#define EXTEND(i) (i << 3 | i & 7)
+#define EXTEND(i) ((i) << 3 | (i) & 7)
     init_put_bits(&pb, temp, SIZE);
     for (i = 0; i < COUNT; i++)
         set_ue_golomb(&pb, EXTEND(i));
diff --git a/libavcodec/golomb.c b/libavcodec/golomb.c
index 550c41e..937ac22 100644
--- a/libavcodec/golomb.c
+++ b/libavcodec/golomb.c
@@ -2,20 +2,20 @@
  * exp golomb vlc stuff
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h
index 1754706..1c4e210 100644
--- a/libavcodec/golomb.h
+++ b/libavcodec/golomb.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2004 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -66,10 +66,14 @@ static inline int get_ue_golomb(GetBitContext *gb)
         return ff_ue_golomb_vlc_code[buf];
     } else {
         int log = 2 * av_log2(buf) - 31;
-        buf >>= log;
-        buf--;
         LAST_SKIP_BITS(re, gb, 32 - log);
         CLOSE_READER(re, gb);
+        if (CONFIG_FTRAPV && log < 0) {
+            av_log(0, AV_LOG_ERROR, "Invalid UE golomb code\n");
+            return AVERROR_INVALIDDATA;
+        }
+        buf >>= log;
+        buf--;
 
         return buf;
     }
@@ -138,7 +142,7 @@ static inline unsigned svq3_get_ue_golomb(GetBitContext *gb)
             ret = (ret << 4) | ff_interleaved_dirac_golomb_vlc_code[buf];
             UPDATE_CACHE(re, gb);
             buf = GET_CACHE(re, gb);
-        } while (HAVE_BITS_REMAINING(re, gb));
+        } while (ret<0x8000000U && HAVE_BITS_REMAINING(re, gb));
 
         CLOSE_READER(re, gb);
         return ret - 1;
@@ -150,7 +154,7 @@ static inline unsigned svq3_get_ue_golomb(GetBitContext *gb)
  */
 static inline int get_te0_golomb(GetBitContext *gb, int range)
 {
-    assert(range >= 1);
+    av_assert2(range >= 1);
 
     if (range == 1)
         return 0;
@@ -165,7 +169,7 @@ static inline int get_te0_golomb(GetBitContext *gb, int range)
  */
 static inline int get_te_golomb(GetBitContext *gb, int range)
 {
-    assert(range >= 1);
+    av_assert2(range >= 1);
 
     if (range == 2)
         return get_bits1(gb) ^ 1;
@@ -191,7 +195,11 @@ static inline int get_se_golomb(GetBitContext *gb)
 
         return ff_se_golomb_vlc_code[buf];
     } else {
-        int log = 2 * av_log2(buf) - 31;
+        int log = av_log2(buf);
+        LAST_SKIP_BITS(re, gb, 31 - log);
+        UPDATE_CACHE(re, gb);
+        buf = GET_CACHE(re, gb);
+
         buf >>= log;
 
         LAST_SKIP_BITS(re, gb, 32 - log);
@@ -328,7 +336,9 @@ static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit,
         return buf;
     } else {
         int i;
-        for (i = 0; i < limit && SHOW_UBITS(re, gb, 1) == 0 && HAVE_BITS_REMAINING(re, gb); i++) {
+        for (i = 0; i < limit && SHOW_UBITS(re, gb, 1) == 0; i++) {
+            if (gb->size_in_bits <= re_index)
+                return -1;
             LAST_SKIP_BITS(re, gb, 1);
             UPDATE_CACHE(re, gb);
         }
@@ -467,7 +477,7 @@ static inline int get_te(GetBitContext *s, int r, char *file, const char *func,
  */
 static inline void set_ue_golomb(PutBitContext *pb, int i)
 {
-    assert(i >= 0);
+    av_assert2(i >= 0);
 
 #if 0
     if (i = 0) {
@@ -488,8 +498,8 @@ static inline void set_ue_golomb(PutBitContext *pb, int i)
  */
 static inline void set_te_golomb(PutBitContext *pb, int i, int range)
 {
-    assert(range >= 1);
-    assert(i <= range);
+    av_assert2(range >= 1);
+    av_assert2(i <= range);
 
     if (range == 2)
         put_bits(pb, 1, i ^ 1);
@@ -526,7 +536,7 @@ static inline void set_ur_golomb(PutBitContext *pb, int i, int k, int limit,
 {
     int e;
 
-    assert(i >= 0);
+    av_assert2(i >= 0);
 
     e = i >> k;
     if (e < limit)
@@ -543,7 +553,7 @@ static inline void set_ur_golomb_jpegls(PutBitContext *pb, int i, int k,
 {
     int e;
 
-    assert(i >= 0);
+    av_assert2(i >= 0);
 
     e = (i >> k) + 1;
     if (e < limit) {
diff --git a/libavcodec/gsm.h b/libavcodec/gsm.h
index 238cb73..53d65c4 100644
--- a/libavcodec/gsm.h
+++ b/libavcodec/gsm.h
@@ -1,20 +1,20 @@
 /*
  * GSM common header
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/gsm_parser.c b/libavcodec/gsm_parser.c
index c0befc7..9a3b94e 100644
--- a/libavcodec/gsm_parser.c
+++ b/libavcodec/gsm_parser.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012  Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -55,7 +55,10 @@ static int gsm_parse(AVCodecParserContext *s1, AVCodecContext *avctx,
             s->duration   = GSM_FRAME_SIZE * 2;
             break;
         default:
-            return AVERROR(EINVAL);
+            *poutbuf      = buf;
+            *poutbuf_size = buf_size;
+            av_log(avctx, AV_LOG_ERROR, "Invalid codec_id\n");
+            return buf_size;
         }
     }
 
diff --git a/libavcodec/gsmdec.c b/libavcodec/gsmdec.c
index b763ce8..c4cde92 100644
--- a/libavcodec/gsmdec.c
+++ b/libavcodec/gsmdec.c
@@ -2,20 +2,20 @@
  * gsm 06.10 decoder
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -79,10 +79,8 @@ static int gsm_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = avctx->frame_size;
-    if ((res = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((res = ff_get_buffer(avctx, frame, 0)) < 0)
         return res;
-    }
     samples = (int16_t *)frame->data[0];
 
     switch (avctx->codec_id) {
@@ -112,6 +110,7 @@ static void gsm_flush(AVCodecContext *avctx)
     memset(s, 0, sizeof(*s));
 }
 
+#if CONFIG_GSM_DECODER
 AVCodec ff_gsm_decoder = {
     .name           = "gsm",
     .long_name      = NULL_IF_CONFIG_SMALL("GSM"),
@@ -123,7 +122,8 @@ AVCodec ff_gsm_decoder = {
     .flush          = gsm_flush,
     .capabilities   = CODEC_CAP_DR1,
 };
-
+#endif
+#if CONFIG_GSM_MS_DECODER
 AVCodec ff_gsm_ms_decoder = {
     .name           = "gsm_ms",
     .long_name      = NULL_IF_CONFIG_SMALL("GSM Microsoft variant"),
@@ -135,3 +135,4 @@ AVCodec ff_gsm_ms_decoder = {
     .flush          = gsm_flush,
     .capabilities   = CODEC_CAP_DR1,
 };
+#endif
diff --git a/libavcodec/gsmdec_data.c b/libavcodec/gsmdec_data.c
index c9b3183..d90c69b 100644
--- a/libavcodec/gsmdec_data.c
+++ b/libavcodec/gsmdec_data.c
@@ -2,20 +2,20 @@
  * gsm 06.10 decoder data
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/gsmdec_data.h b/libavcodec/gsmdec_data.h
index f5581d5..b57194b 100644
--- a/libavcodec/gsmdec_data.h
+++ b/libavcodec/gsmdec_data.h
@@ -2,20 +2,20 @@
  * gsm 06.10 decoder data
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/gsmdec_template.c b/libavcodec/gsmdec_template.c
index 0b54dc5..0c60813 100644
--- a/libavcodec/gsmdec_template.c
+++ b/libavcodec/gsmdec_template.c
@@ -2,20 +2,20 @@
  * gsm 06.10 decoder
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h261.c b/libavcodec/h261.c
index b9783f1..9836905 100644
--- a/libavcodec/h261.c
+++ b/libavcodec/h261.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2004 Maarten Daniels
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h261.h b/libavcodec/h261.h
index ad7e28b..5586462 100644
--- a/libavcodec/h261.h
+++ b/libavcodec/h261.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2004 Maarten Daniels
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,7 +38,6 @@ typedef struct H261Context {
     MpegEncContext s;
 
     int current_mba;
-    int previous_mba;
     int mba_diff;
     int mtype;
     int current_mv_x;
diff --git a/libavcodec/h261_parser.c b/libavcodec/h261_parser.c
index 2469424..9c31557 100644
--- a/libavcodec/h261_parser.c
+++ b/libavcodec/h261_parser.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2004 Maarten Daniels
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -71,11 +71,15 @@ static int h261_parse(AVCodecParserContext *s,
     ParseContext *pc = s->priv_data;
     int next;
 
-    next = h261_find_frame_end(pc, avctx, buf, buf_size);
-    if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
-        *poutbuf      = NULL;
-        *poutbuf_size = 0;
-        return buf_size;
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        next = buf_size;
+    } else {
+        next = h261_find_frame_end(pc, avctx, buf, buf_size);
+        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+            *poutbuf      = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
     }
     *poutbuf      = buf;
     *poutbuf_size = buf_size;
diff --git a/libavcodec/h261data.c b/libavcodec/h261data.c
index a81ccdf..a9891ed 100644
--- a/libavcodec/h261data.c
+++ b/libavcodec/h261data.c
@@ -2,20 +2,20 @@
  * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  * copyright (c) 2004 Maarten Daniels
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h261dec.c b/libavcodec/h261dec.c
index d83fb31..7e63f29 100644
--- a/libavcodec/h261dec.c
+++ b/libavcodec/h261dec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2004 Maarten Daniels
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,7 @@
  * H.261 decoder.
  */
 
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "mpeg_er.h"
 #include "mpegutils.h"
@@ -127,12 +128,12 @@ static int h261_decode_gob_header(H261Context *h)
     }
 
     /* GEI */
-    while (get_bits1(&s->gb) != 0)
-        skip_bits(&s->gb, 8);
+    if (skip_1stop_8data_bits(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
 
     if (s->qscale == 0) {
         av_log(s->avctx, AV_LOG_ERROR, "qscale has forbidden 0 value\n");
-        if (s->avctx->err_recognition & AV_EF_BITSTREAM)
+        if (s->avctx->err_recognition & (AV_EF_BITSTREAM | AV_EF_COMPLIANT))
             return -1;
     }
 
@@ -379,11 +380,12 @@ static int h261_decode_mb(H261Context *h)
 
     // Read mtype
     h->mtype = get_vlc2(&s->gb, h261_mtype_vlc.table, H261_MTYPE_VLC_BITS, 2);
-    if (h->mtype < 0 || h->mtype >= FF_ARRAY_ELEMS(ff_h261_mtype_map)) {
+    if (h->mtype < 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Invalid mtype index %d\n",
                h->mtype);
         return SLICE_ERROR;
     }
+    av_assert0(h->mtype < FF_ARRAY_ELEMS(ff_h261_mtype_map));
     h->mtype = ff_h261_mtype_map[h->mtype];
 
     // Read mquant
@@ -431,6 +433,13 @@ static int h261_decode_mb(H261Context *h)
     s->mv[0][0][0]                 = h->current_mv_x * 2; // gets divided by 2 in motion compensation
     s->mv[0][0][1]                 = h->current_mv_y * 2;
 
+    if (s->current_picture.motion_val[0]) {
+        int b_stride = 2*s->mb_width + 1;
+        int b_xy     = 2 * s->mb_x + (2 * s->mb_y) * b_stride;
+        s->current_picture.motion_val[0][b_xy][0] = s->mv[0][0][0];
+        s->current_picture.motion_val[0][b_xy][1] = s->mv[0][0][1];
+    }
+
 intra:
     /* decode each block */
     if (s->mb_intra || HAS_CBP(h->mtype)) {
@@ -506,8 +515,8 @@ static int h261_decode_picture_header(H261Context *h)
     skip_bits1(&s->gb); /* Reserved */
 
     /* PEI */
-    while (get_bits1(&s->gb) != 0)
-        skip_bits(&s->gb, 8);
+    if (skip_1stop_8data_bits(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
 
     /* H.261 has no I-frames, but if we pass AV_PICTURE_TYPE_I for the first
      * frame, the codec crashes if it does not contain all I-blocks
@@ -633,12 +642,12 @@ retry:
     }
     ff_MPV_frame_end(s);
 
-    assert(s->current_picture.f->pict_type == s->current_picture_ptr->f->pict_type);
-    assert(s->current_picture.f->pict_type == s->pict_type);
+    av_assert0(s->current_picture.f->pict_type == s->current_picture_ptr->f->pict_type);
+    av_assert0(s->current_picture.f->pict_type == s->pict_type);
 
     if ((ret = av_frame_ref(pict, s->current_picture_ptr->f)) < 0)
         return ret;
-    ff_print_debug_info(s, s->current_picture_ptr);
+    ff_print_debug_info(s, s->current_picture_ptr, pict);
 
     *got_frame = 1;
 
@@ -664,4 +673,5 @@ AVCodec ff_h261_decoder = {
     .close          = h261_decode_end,
     .decode         = h261_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
 };
diff --git a/libavcodec/h261enc.c b/libavcodec/h261enc.c
index 5f0baba..47696e5 100644
--- a/libavcodec/h261enc.c
+++ b/libavcodec/h261enc.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2004 Maarten Daniels
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,12 +26,16 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "mpegutils.h"
 #include "mpegvideo.h"
 #include "h263.h"
 #include "h261.h"
 
+static uint8_t uni_h261_rl_len [64*64*2*2];
+#define UNI_ENC_INDEX(last,run,level) ((last)*128*64 + (run)*128 + (level))
+
 int ff_h261_get_picture_format(int width, int height)
 {
     // QCIF
@@ -77,7 +81,7 @@ void ff_h261_encode_picture_header(MpegEncContext *s, int picture_number)
         h->gob_number = -1;
     else
         h->gob_number = 0;
-    h->current_mba = 0;
+    s->mb_skip_run = 0;
 }
 
 /**
@@ -95,18 +99,21 @@ static void h261_encode_gob_header(MpegEncContext *s, int mb_line)
     put_bits(&s->pb, 4, h->gob_number); /* GN */
     put_bits(&s->pb, 5, s->qscale);     /* GQUANT */
     put_bits(&s->pb, 1, 0);             /* no GEI */
-    h->current_mba  = 0;
-    h->previous_mba = 0;
-    h->current_mv_x = 0;
-    h->current_mv_y = 0;
+    s->mb_skip_run = 0;
+    s->last_mv[0][0][0] = 0;
+    s->last_mv[0][0][1] = 0;
 }
 
 void ff_h261_reorder_mb_index(MpegEncContext *s)
 {
     int index = s->mb_x + s->mb_y * s->mb_width;
 
-    if (index % 33 == 0)
-        h261_encode_gob_header(s, 0);
+    if (index % 11 == 0) {
+        if (index % 33 == 0)
+            h261_encode_gob_header(s, 0);
+        s->last_mv[0][0][0] = 0;
+        s->last_mv[0][0][1] = 0;
+    }
 
     /* for CIF the GOB's are fragmented in the middle of a scanline
      * that's why we need to adjust the x and y index of the macroblocks */
@@ -213,8 +220,8 @@ static void h261_encode_block(H261Context *h, int16_t *block, int n)
             put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
             if (code == rl->n) {
                 put_bits(&s->pb, 6, run);
-                assert(slevel != 0);
-                assert(level <= 127);
+                av_assert1(slevel != 0);
+                av_assert1(level <= 127);
                 put_sbits(&s->pb, 8, slevel);
             } else {
                 put_bits(&s->pb, 1, sign);
@@ -234,7 +241,6 @@ void ff_h261_encode_mb(MpegEncContext *s, int16_t block[6][64],
     cbp = 63; // avoid warning
     mvd = 0;
 
-    h->current_mba++;
     h->mtype = 0;
 
     if (!s->mb_intra) {
@@ -247,16 +253,18 @@ void ff_h261_encode_mb(MpegEncContext *s, int16_t block[6][64],
         if ((cbp | mvd | s->dquant) == 0) {
             /* skip macroblock */
             s->skip_count++;
-            h->current_mv_x = 0;
-            h->current_mv_y = 0;
+            s->mb_skip_run++;
+            s->last_mv[0][0][0] = 0;
+            s->last_mv[0][0][1] = 0;
             return;
         }
     }
 
     /* MB is not skipped, encode MBA */
     put_bits(&s->pb,
-             ff_h261_mba_bits[(h->current_mba - h->previous_mba) - 1],
-             ff_h261_mba_code[(h->current_mba - h->previous_mba) - 1]);
+             ff_h261_mba_bits[s->mb_skip_run],
+             ff_h261_mba_code[s->mb_skip_run]);
+    s->mb_skip_run = 0;
 
     /* calculate MTYPE */
     if (!s->mb_intra) {
@@ -268,7 +276,7 @@ void ff_h261_encode_mb(MpegEncContext *s, int16_t block[6][64],
             h->mtype += 3;
         if (cbp || s->dquant)
             h->mtype++;
-        assert(h->mtype > 1);
+        av_assert1(h->mtype > 1);
     }
 
     if (s->dquant)
@@ -286,18 +294,16 @@ void ff_h261_encode_mb(MpegEncContext *s, int16_t block[6][64],
     }
 
     if (IS_16X16(h->mtype)) {
-        mv_diff_x       = (motion_x >> 1) - h->current_mv_x;
-        mv_diff_y       = (motion_y >> 1) - h->current_mv_y;
-        h->current_mv_x = (motion_x >> 1);
-        h->current_mv_y = (motion_y >> 1);
+        mv_diff_x       = (motion_x >> 1) - s->last_mv[0][0][0];
+        mv_diff_y       = (motion_y >> 1) - s->last_mv[0][0][1];
+        s->last_mv[0][0][0] = (motion_x >> 1);
+        s->last_mv[0][0][1] = (motion_y >> 1);
         h261_encode_motion(h, mv_diff_x);
         h261_encode_motion(h, mv_diff_y);
     }
 
-    h->previous_mba = h->current_mba;
-
     if (HAS_CBP(h->mtype)) {
-        assert(cbp > 0);
+        av_assert1(cbp > 0);
         put_bits(&s->pb,
                  ff_h261_cbp_tab[cbp - 1][1],
                  ff_h261_cbp_tab[cbp - 1][0]);
@@ -306,10 +312,49 @@ void ff_h261_encode_mb(MpegEncContext *s, int16_t block[6][64],
         /* encode each block */
         h261_encode_block(h, block[i], i);
 
-    if ((h->current_mba == 11) || (h->current_mba == 22) ||
-        (h->current_mba == 33) || (!IS_16X16(h->mtype))) {
-        h->current_mv_x = 0;
-        h->current_mv_y = 0;
+    if (!IS_16X16(h->mtype)) {
+        s->last_mv[0][0][0] = 0;
+        s->last_mv[0][0][1] = 0;
+    }
+}
+
+static av_cold void init_uni_h261_rl_tab(RLTable *rl, uint32_t *bits_tab,
+                                         uint8_t *len_tab)
+{
+    int slevel, run, last;
+
+    av_assert0(MAX_LEVEL >= 64);
+    av_assert0(MAX_RUN   >= 63);
+
+    for(slevel=-64; slevel<64; slevel++){
+        if(slevel==0) continue;
+        for(run=0; run<64; run++){
+            for(last=0; last<=1; last++){
+                const int index= UNI_ENC_INDEX(last, run, slevel+64);
+                int level= slevel < 0 ? -slevel : slevel;
+                int len, code;
+
+                len_tab[index]= 100;
+
+                /* ESC0 */
+                code= get_rl_index(rl, 0, run, level);
+                len=  rl->table_vlc[code][1] + 1;
+                if(last)
+                    len += 2;
+
+                if(code!=rl->n && len < len_tab[index]){
+                    len_tab [index]= len;
+                }
+                /* ESC */
+                len = rl->table_vlc[rl->n][1];
+                if(last)
+                    len += 2;
+
+                if(len < len_tab[index]){
+                    len_tab [index]= len;
+                }
+            }
+        }
     }
 }
 
@@ -321,6 +366,12 @@ av_cold void ff_h261_encode_init(MpegEncContext *s)
     s->max_qcoeff       = 127;
     s->y_dc_scale_table =
     s->c_dc_scale_table = ff_mpeg1_dc_scale_table;
+    s->ac_esc_length    = 6+6+8;
+
+    init_uni_h261_rl_tab(&ff_h261_rl_tcoeff, NULL, uni_h261_rl_len);
+
+    s->intra_ac_vlc_length      = s->inter_ac_vlc_length      = uni_h261_rl_len;
+    s->intra_ac_vlc_last_length = s->inter_ac_vlc_last_length = uni_h261_rl_len + 128*64;
 }
 
 FF_MPV_GENERIC_CLASS(h261)
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index 9019548..b08e32e 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -5,20 +5,20 @@
  * Copyright (c) 2001 Juan J. Sierralta P
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h263.h b/libavcodec/h263.h
index dbbe7ce..54ce2a5 100644
--- a/libavcodec/h263.h
+++ b/libavcodec/h263.h
@@ -1,20 +1,20 @@
 /*
  * H263 internal header
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #ifndef AVCODEC_H263_H
@@ -67,8 +67,8 @@ extern RLTable ff_rl_intra_aic;
 
 extern const uint16_t ff_h263_format[8][2];
 extern const uint8_t ff_modified_quant_tab[2][32];
-extern uint16_t ff_mba_max[6];
-extern uint8_t ff_mba_length[7];
+extern const uint16_t ff_mba_max[6];
+extern const uint8_t ff_mba_length[7];
 
 extern uint8_t ff_h263_static_rl_table_store[2][2][2*MAX_RUN + MAX_LEVEL + 3];
 
@@ -120,7 +120,6 @@ int av_const h263_get_picture_format(int width, int height);
 
 void ff_clean_h263_qscales(MpegEncContext *s);
 int ff_h263_resync(MpegEncContext *s);
-const uint8_t *ff_h263_find_resync_marker(const uint8_t *p, const uint8_t *end);
 int ff_h263_get_gob_height(MpegEncContext *s);
 void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code);
 
diff --git a/libavcodec/h263_parser.c b/libavcodec/h263_parser.c
index 71e047a..2e7d493 100644
--- a/libavcodec/h263_parser.c
+++ b/libavcodec/h263_parser.c
@@ -2,20 +2,20 @@
  * H.263 parser
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -70,12 +70,16 @@ static int h263_parse(AVCodecParserContext *s,
     ParseContext *pc = s->priv_data;
     int next;
 
-    next= ff_h263_find_frame_end(pc, buf, buf_size);
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        next = buf_size;
+    } else {
+        next= ff_h263_find_frame_end(pc, buf, buf_size);
 
-    if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
-        *poutbuf = NULL;
-        *poutbuf_size = 0;
-        return buf_size;
+        if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
+            *poutbuf = NULL;
+            *poutbuf_size = 0;
+            return buf_size;
+        }
     }
 
     *poutbuf = buf;
diff --git a/libavcodec/h263_parser.h b/libavcodec/h263_parser.h
index 5bd715f..565a222 100644
--- a/libavcodec/h263_parser.h
+++ b/libavcodec/h263_parser.h
@@ -2,20 +2,20 @@
  * H.263 parser
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h263data.h b/libavcodec/h263data.h
index c966aab..1cd965f 100644
--- a/libavcodec/h263data.h
+++ b/libavcodec/h263data.h
@@ -4,20 +4,20 @@
  * copyright (c) 2001 Juan J. Sierralta P
  * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -264,11 +264,11 @@ const uint8_t ff_h263_chroma_qscale_table[32]={
     0, 1, 2, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,10,11,11,12,12,12,13,13,13,14,14,14,14,14,15,15,15,15,15
 };
 
-uint16_t ff_mba_max[6]={
+const uint16_t ff_mba_max[6]={
      47,  98, 395,1583,6335,9215
 };
 
-uint8_t ff_mba_length[7]={
+const uint8_t ff_mba_length[7]={
       6,   7,   9,  11,  13,  14,  14
 };
 
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index cdd5544..31ec642 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,8 @@
  * H.263 decoder.
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
+
 #include "libavutil/cpu.h"
 #include "avcodec.h"
 #include "error_resilience.h"
@@ -38,6 +40,7 @@
 #include "mpegvideo.h"
 #include "msmpeg4.h"
 #include "qpeldsp.h"
+#include "vdpau_internal.h"
 #include "thread.h"
 
 av_cold int ff_h263_decode_init(AVCodecContext *avctx)
@@ -65,6 +68,7 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx)
     /* select sub codec */
     switch (avctx->codec->id) {
     case AV_CODEC_ID_H263:
+    case AV_CODEC_ID_H263P:
         s->unrestricted_mv = 0;
         avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
         break;
@@ -111,8 +115,12 @@ av_cold int ff_h263_decode_init(AVCodecContext *avctx)
     }
     s->codec_id    = avctx->codec->id;
 
+    if (avctx->stream_codec_tag == AV_RL32("l263") && avctx->extradata_size == 56 && avctx->extradata[0] == 1)
+        s->ehc_mode = 1;
+
     /* for h263, we allocate the images after having read the header */
     if (avctx->codec->id != AV_CODEC_ID_H263 &&
+        avctx->codec->id != AV_CODEC_ID_H263P &&
         avctx->codec->id != AV_CODEC_ID_MPEG4)
         if ((ret = ff_MPV_common_init(s)) < 0)
             return ret;
@@ -165,7 +173,7 @@ static int decode_slice(MpegEncContext *s)
 {
     const int part_mask = s->partitioned_frame
                           ? (ER_AC_END | ER_AC_ERROR) : 0x7F;
-    const int mb_size = 16;
+    const int mb_size   = 16 >> s->avctx->lowres;
     int ret;
 
     s->last_resync_gb   = s->gb;
@@ -177,10 +185,10 @@ static int decode_slice(MpegEncContext *s)
 
     if (s->avctx->hwaccel) {
         const uint8_t *start = s->gb.buffer + get_bits_count(&s->gb) / 8;
-        const uint8_t *end   = ff_h263_find_resync_marker(start + 1,
-                                                          s->gb.buffer_end);
-        skip_bits_long(&s->gb, 8 * (end - start));
-        return s->avctx->hwaccel->decode_slice(s->avctx, start, end - start);
+        ret = s->avctx->hwaccel->decode_slice(s->avctx, start, s->gb.buffer_end - start);
+        // ensure we exit decode loop
+        s->mb_y = s->mb_height;
+        return ret;
     }
 
     if (s->partitioned_frame) {
@@ -229,6 +237,8 @@ static int decode_slice(MpegEncContext *s)
             s->mv_type = MV_TYPE_16X16;
             av_dlog(s, "%d %d %06X\n",
                     ret, get_bits_count(&s->gb), show_bits(&s->gb, 24));
+
+            tprintf(NULL, "Decoding MB at %dx%d\n", s->mb_x, s->mb_y);
             ret = s->decode_mb(s, s->block);
 
             if (s->pict_type != AV_PICTURE_TYPE_B)
@@ -265,6 +275,8 @@ static int decode_slice(MpegEncContext *s)
                 ff_er_add_slice(&s->er, s->resync_mb_x, s->resync_mb_y,
                                 s->mb_x, s->mb_y, ER_MB_ERROR & part_mask);
 
+                if (s->err_recognition & AV_EF_IGNORE_ERR)
+                    continue;
                 return AVERROR_INVALIDDATA;
             }
 
@@ -279,7 +291,7 @@ static int decode_slice(MpegEncContext *s)
         s->mb_x = 0;
     }
 
-    assert(s->mb_x == 0 && s->mb_y == s->mb_height);
+    av_assert1(s->mb_x == 0 && s->mb_y == s->mb_height);
 
     if (s->codec_id == AV_CODEC_ID_MPEG4         &&
         (s->workaround_bugs & FF_BUG_AUTODETECT) &&
@@ -292,7 +304,7 @@ static int decode_slice(MpegEncContext *s)
     if (s->codec_id == AV_CODEC_ID_MPEG4         &&
         (s->workaround_bugs & FF_BUG_AUTODETECT) &&
         get_bits_left(&s->gb) >= 0               &&
-        get_bits_left(&s->gb) < 48               &&
+        get_bits_left(&s->gb) < 137              &&
         !s->data_partitioning) {
         const int bits_count = get_bits_count(&s->gb);
         const int bits_left  = s->gb.size_in_bits - bits_count;
@@ -313,6 +325,17 @@ static int decode_slice(MpegEncContext *s)
         }
     }
 
+    if (s->codec_id == AV_CODEC_ID_H263          &&
+        (s->workaround_bugs & FF_BUG_AUTODETECT) &&
+        get_bits_left(&s->gb) >= 8               &&
+        get_bits_left(&s->gb) < 300              &&
+        s->pict_type == AV_PICTURE_TYPE_I        &&
+        show_bits(&s->gb, 8) == 0                &&
+        !s->data_partitioning) {
+
+        s->padding_bug_score += 32;
+    }
+
     if (s->workaround_bugs & FF_BUG_AUTODETECT) {
         if (s->padding_bug_score > -2 && !s->data_partitioning)
             s->workaround_bugs |= FF_BUG_NO_PADDING;
@@ -332,7 +355,7 @@ static int decode_slice(MpegEncContext *s)
         /* buggy padding but the frame should still end approximately at
          * the bitstream end */
         if ((s->workaround_bugs & FF_BUG_NO_PADDING) &&
-            (s->err_recognition & AV_EF_BUFFER))
+            (s->err_recognition & (AV_EF_BUFFER|AV_EF_AGGRESSIVE)))
             max_extra += 48;
         else if ((s->workaround_bugs & FF_BUG_NO_PADDING))
             max_extra += 256 * 256 * 256 * 64;
@@ -367,6 +390,7 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     int buf_size       = avpkt->size;
     MpegEncContext *s  = avctx->priv_data;
     int ret;
+    int slice_ret = 0;
     AVFrame *pict = data;
 
     s->flags  = avctx->flags;
@@ -393,6 +417,8 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             next = ff_mpeg4_find_frame_end(&s->parse_context, buf, buf_size);
         } else if (CONFIG_H263_DECODER && s->codec_id == AV_CODEC_ID_H263) {
             next = ff_h263_find_frame_end(&s->parse_context, buf, buf_size);
+        } else if (CONFIG_H263P_DECODER && s->codec_id == AV_CODEC_ID_H263P) {
+            next = ff_h263_find_frame_end(&s->parse_context, buf, buf_size);
         } else {
             av_log(s->avctx, AV_LOG_ERROR,
                    "this codec does not support truncated bitstreams\n");
@@ -404,13 +430,27 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             return buf_size;
     }
 
+retry:
+    if (s->divx_packed && s->bitstream_buffer_size) {
+        int i;
+        for(i=0; i < buf_size-3; i++) {
+            if (buf[i]==0 && buf[i+1]==0 && buf[i+2]==1) {
+                if (buf[i+3]==0xB0) {
+                    av_log(s->avctx, AV_LOG_WARNING, "Discarding excessive bitstream in packed xvid\n");
+                    s->bitstream_buffer_size = 0;
+                }
+                break;
+            }
+        }
+    }
+
     if (s->bitstream_buffer_size && (s->divx_packed || buf_size < 20)) // divx 5.01+/xvid frame reorder
         ret = init_get_bits8(&s->gb, s->bitstream_buffer,
                              s->bitstream_buffer_size);
     else
         ret = init_get_bits8(&s->gb, buf, buf_size);
-    s->bitstream_buffer_size = 0;
 
+    s->bitstream_buffer_size = 0;
     if (ret < 0)
         return ret;
 
@@ -437,11 +477,8 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         if (s->avctx->extradata_size && s->picture_number == 0) {
             GetBitContext gb;
 
-            ret = init_get_bits8(&gb, s->avctx->extradata,
-                                 s->avctx->extradata_size);
-            if (ret < 0)
-                return ret;
-            ff_mpeg4_decode_picture_header(avctx->priv_data, &gb);
+            if (init_get_bits8(&gb, s->avctx->extradata, s->avctx->extradata_size) >= 0 )
+                ff_mpeg4_decode_picture_header(avctx->priv_data, &gb);
         }
         ret = ff_mpeg4_decode_picture_header(avctx->priv_data, &s->gb);
     } else if (CONFIG_H263I_DECODER && s->codec_id == AV_CODEC_ID_H263I) {
@@ -452,6 +489,14 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         ret = ff_h263_decode_picture_header(s);
     }
 
+    if (ret < 0 || ret == FRAME_SKIPPED) {
+        if (   s->width  != avctx->coded_width
+            || s->height != avctx->coded_height) {
+                av_log(s->avctx, AV_LOG_WARNING, "Reverting picture dimensions change due to header decoding failure\n");
+                s->width = avctx->coded_width;
+                s->height= avctx->coded_height;
+        }
+    }
     if (ret == FRAME_SKIPPED)
         return get_consumed_bytes(s, buf_size);
 
@@ -463,25 +508,9 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     avctx->has_b_frames = !s->low_delay;
 
-#define SET_QPEL_FUNC(postfix1, postfix2)                           \
-    s->qdsp.put_        ## postfix1 = ff_put_        ## postfix2;   \
-    s->qdsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;   \
-    s->qdsp.avg_        ## postfix1 = ff_avg_        ## postfix2;
-
-    if (s->workaround_bugs & FF_BUG_STD_QPEL) {
-        SET_QPEL_FUNC(qpel_pixels_tab[0][5], qpel16_mc11_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[0][7], qpel16_mc31_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[0][9], qpel16_mc12_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_old_c)
-
-        SET_QPEL_FUNC(qpel_pixels_tab[1][5], qpel8_mc11_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[1][7], qpel8_mc31_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[1][9], qpel8_mc12_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_old_c)
-        SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_old_c)
+    if (CONFIG_MPEG4_DECODER && avctx->codec_id == AV_CODEC_ID_MPEG4) {
+        if (ff_mpeg4_workaround_bugs(avctx) == 1)
+            goto retry;
     }
 
     /* After H263 & mpeg4 header decode we have the height, width,
@@ -545,6 +574,11 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (!s->divx_packed && !avctx->hwaccel)
         ff_thread_finish_setup(avctx);
 
+    if (CONFIG_MPEG4_VDPAU_DECODER && (s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)) {
+        ff_vdpau_mpeg4_decode_picture(avctx->priv_data, s->gb.buffer, s->gb.buffer_end - s->gb.buffer);
+        goto frame_end;
+    }
+
     if (avctx->hwaccel) {
         ret = avctx->hwaccel->start_frame(avctx, s->gb.buffer,
                                           s->gb.buffer_end - s->gb.buffer);
@@ -562,14 +596,14 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         if (ret < 0)
             return ret;
         if (ret == 1)
-            goto intrax8_decoded;
+            goto frame_end;
     }
 
     /* decode each macroblock */
     s->mb_x = 0;
     s->mb_y = 0;
 
-    ret = decode_slice(s);
+    slice_ret = decode_slice(s);
     while (s->mb_y < s->mb_height) {
         if (s->msmpeg4_version) {
             if (s->slice_height == 0 || s->mb_x != 0 ||
@@ -587,7 +621,7 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             ff_mpeg4_clean_buffers(s);
 
         if (decode_slice(s) < 0)
-            ret = AVERROR_INVALIDDATA;
+            slice_ret = AVERROR_INVALIDDATA;
     }
 
     if (s->msmpeg4_version && s->msmpeg4_version < 4 &&
@@ -596,12 +630,8 @@ int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             ff_msmpeg4_decode_ext_header(s, buf_size) < 0)
             s->er.error_status_table[s->mb_num - 1] = ER_MB_ERROR;
 
-    assert(s->bitstream_buffer_size == 0);
-
-    if (CONFIG_MPEG4_DECODER && avctx->codec_id == AV_CODEC_ID_MPEG4)
-        ff_mpeg4_frame_end(avctx, buf, buf_size);
-
-intrax8_decoded:
+    av_assert1(s->bitstream_buffer_size == 0);
+frame_end:
     ff_er_frame_end(&s->er);
 
     if (avctx->hwaccel) {
@@ -612,26 +642,46 @@ intrax8_decoded:
 
     ff_MPV_frame_end(s);
 
+    if (CONFIG_MPEG4_DECODER && avctx->codec_id == AV_CODEC_ID_MPEG4)
+        ff_mpeg4_frame_end(avctx, buf, buf_size);
+
     if (!s->divx_packed && avctx->hwaccel)
         ff_thread_finish_setup(avctx);
 
-    assert(s->current_picture.f->pict_type ==
-           s->current_picture_ptr->f->pict_type);
-    assert(s->current_picture.f->pict_type == s->pict_type);
+    av_assert1(s->current_picture.f->pict_type == s->current_picture_ptr->f->pict_type);
+    av_assert1(s->current_picture.f->pict_type == s->pict_type);
     if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay) {
         if ((ret = av_frame_ref(pict, s->current_picture_ptr->f)) < 0)
             return ret;
-        ff_print_debug_info(s, s->current_picture_ptr);
+        ff_print_debug_info(s, s->current_picture_ptr, pict);
+        ff_mpv_export_qp_table(s, pict, s->current_picture_ptr, FF_QSCALE_TYPE_MPEG1);
     } else if (s->last_picture_ptr != NULL) {
         if ((ret = av_frame_ref(pict, s->last_picture_ptr->f)) < 0)
             return ret;
-        ff_print_debug_info(s, s->last_picture_ptr);
+        ff_print_debug_info(s, s->last_picture_ptr, pict);
+        ff_mpv_export_qp_table(s, pict, s->last_picture_ptr, FF_QSCALE_TYPE_MPEG1);
     }
 
-    if (s->last_picture_ptr || s->low_delay)
+    if (s->last_picture_ptr || s->low_delay) {
+        if (   pict->format == AV_PIX_FMT_YUV420P
+            && (s->codec_tag == AV_RL32("GEOV") || s->codec_tag == AV_RL32("GEOX"))) {
+            int x, y, p;
+            av_frame_make_writable(pict);
+            for (p=0; p<3; p++) {
+                int w = FF_CEIL_RSHIFT(pict-> width, !!p);
+                int h = FF_CEIL_RSHIFT(pict->height, !!p);
+                int linesize = pict->linesize[p];
+                for (y=0; y<(h>>1); y++)
+                    for (x=0; x<w; x++)
+                        FFSWAP(int,
+                               pict->data[p][x + y*linesize],
+                               pict->data[p][x + (h-1-y)*linesize]);
+            }
+        }
         *got_frame = 1;
+    }
 
-    if (ret && (avctx->err_recognition & AV_EF_EXPLODE))
+    if (slice_ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
         return ret;
     else
         return get_consumed_bytes(s, buf_size);
@@ -660,5 +710,22 @@ AVCodec ff_h263_decoder = {
     .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 |
                       CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
     .flush          = ff_mpeg_flush,
+    .max_lowres     = 3,
+    .pix_fmts       = ff_h263_hwaccel_pixfmt_list_420,
+};
+
+AVCodec ff_h263p_decoder = {
+    .name           = "h263p",
+    .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996, H.263+ / H.263-1998 / H.263 version 2"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H263P,
+    .priv_data_size = sizeof(MpegEncContext),
+    .init           = ff_h263_decode_init,
+    .close          = ff_h263_decode_end,
+    .decode         = ff_h263_decode_frame,
+    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 |
+                      CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
+    .flush          = ff_mpeg_flush,
+    .max_lowres     = 3,
     .pix_fmts       = ff_h263_hwaccel_pixfmt_list_420,
 };
diff --git a/libavcodec/h263dsp.c b/libavcodec/h263dsp.c
index 70ecdb9..a70ff24 100644
--- a/libavcodec/h263dsp.c
+++ b/libavcodec/h263dsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h263dsp.h b/libavcodec/h263dsp.h
index 40f041c..d2cc2ff 100644
--- a/libavcodec/h263dsp.h
+++ b/libavcodec/h263dsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index ba30e5d..8fa35c7 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,9 +25,12 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
+
 #include "libavutil/avassert.h"
 #include "libavutil/display.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
 #include "libavutil/stereo3d.h"
 #include "libavutil/timer.h"
 #include "internal.h"
@@ -46,11 +49,18 @@
 #include "rectangle.h"
 #include "svq3.h"
 #include "thread.h"
+#include "vdpau_internal.h"
 
 #include <assert.h>
 
 const uint16_t ff_h264_mb_sizes[4] = { 256, 384, 512, 768 };
 
+int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx)
+{
+    H264Context *h = avctx->priv_data;
+    return h ? h->sps.num_reorder_frames : 0;
+}
+
 static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
                               int (*mv)[2][4][2],
                               int mb_x, int mb_y, int mb_intra, int mb_skipped)
@@ -61,19 +71,28 @@ static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
     h->mb_y  = mb_y;
     h->mb_xy = mb_x + mb_y * h->mb_stride;
     memset(h->non_zero_count_cache, 0, sizeof(h->non_zero_count_cache));
-    assert(ref >= 0);
+    av_assert1(ref >= 0);
     /* FIXME: It is possible albeit uncommon that slice references
      * differ between slices. We take the easy approach and ignore
      * it for now. If this turns out to have any relevance in
      * practice then correct remapping should be added. */
     if (ref >= h->ref_count[0])
         ref = 0;
+    if (!h->ref_list[0][ref].f.data[0]) {
+        av_log(h->avctx, AV_LOG_DEBUG, "Reference not available for error concealing\n");
+        ref = 0;
+    }
+    if ((h->ref_list[0][ref].reference&3) != 3) {
+        av_log(h->avctx, AV_LOG_DEBUG, "Reference invalid\n");
+        return;
+    }
     fill_rectangle(&h->cur_pic.ref_index[0][4 * h->mb_xy],
                    2, 2, 2, ref, 1);
     fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
     fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
                    pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4);
-    assert(!FRAME_MBAFF(h));
+    h->mb_mbaff =
+    h->mb_field_decoding_flag = 0;
     ff_h264_hl_decode_mb(h);
 }
 
@@ -274,21 +293,26 @@ const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
     }
 #endif
 
-    if (i >= length - 1) { // no escaped 0
-        *dst_length = length;
-        *consumed   = length + 1; // +1 for the header
-        return src;
-    }
-
     // use second escape buffer for inter data
     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0;
-    av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx],
-                   length + FF_INPUT_BUFFER_PADDING_SIZE);
+
+    av_fast_padded_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+MAX_MBPAIR_SIZE);
     dst = h->rbsp_buffer[bufidx];
 
     if (dst == NULL)
         return NULL;
 
+    if(i>=length-1){ //no escaped 0
+        *dst_length= length;
+        *consumed= length+1; //+1 for the header
+        if(h->avctx->flags2 & CODEC_FLAG2_FAST){
+            return src;
+        }else{
+            memcpy(dst, src, length);
+            return dst;
+        }
+    }
+
     memcpy(dst, src, i);
     si = di = i;
     while (si + 2 < length) {
@@ -404,7 +428,7 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp)
 int ff_h264_alloc_tables(H264Context *h)
 {
     const int big_mb_num = h->mb_stride * (h->mb_height + 1);
-    const int row_mb_num = h->mb_stride * 2 * h->avctx->thread_count;
+    const int row_mb_num = 2*h->mb_stride*FFMAX(h->avctx->thread_count, 1);
     int x, y, i;
 
     FF_ALLOCZ_OR_GOTO(h->avctx, h->intra4x4_pred_mode,
@@ -541,20 +565,23 @@ fail:
 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
                             int parse_extradata);
 
-int ff_h264_decode_extradata(H264Context *h)
+int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size)
 {
     AVCodecContext *avctx = h->avctx;
     int ret;
 
-    if (avctx->extradata[0] == 1) {
+    if (!buf || size <= 0)
+        return -1;
+
+    if (buf[0] == 1) {
         int i, cnt, nalsize;
-        unsigned char *p = avctx->extradata;
+        const unsigned char *p = buf;
 
         h->is_avc = 1;
 
-        if (avctx->extradata_size < 7) {
+        if (size < 7) {
             av_log(avctx, AV_LOG_ERROR,
-                   "avcC %d too short\n", avctx->extradata_size);
+                   "avcC %d too short\n", size);
             return AVERROR_INVALIDDATA;
         }
         /* sps and pps in the avcC always have length coded with 2 bytes,
@@ -565,7 +592,7 @@ int ff_h264_decode_extradata(H264Context *h)
         p  += 6;
         for (i = 0; i < cnt; i++) {
             nalsize = AV_RB16(p) + 2;
-            if (p - avctx->extradata + nalsize > avctx->extradata_size)
+            if(nalsize > size - (p-buf))
                 return AVERROR_INVALIDDATA;
             ret = decode_nal_units(h, p, nalsize, 1);
             if (ret < 0) {
@@ -579,7 +606,7 @@ int ff_h264_decode_extradata(H264Context *h)
         cnt = *(p++); // Number of pps
         for (i = 0; i < cnt; i++) {
             nalsize = AV_RB16(p) + 2;
-            if (p - avctx->extradata + nalsize > avctx->extradata_size)
+            if(nalsize > size - (p-buf))
                 return AVERROR_INVALIDDATA;
             ret = decode_nal_units(h, p, nalsize, 1);
             if (ret < 0) {
@@ -590,14 +617,14 @@ int ff_h264_decode_extradata(H264Context *h)
             p += nalsize;
         }
         // Store right nal length size that will be used to parse all other nals
-        h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
+        h->nal_length_size = (buf[4] & 0x03) + 1;
     } else {
         h->is_avc = 0;
-        ret = decode_nal_units(h, avctx->extradata, avctx->extradata_size, 1);
+        ret = decode_nal_units(h, buf, size, 1);
         if (ret < 0)
             return ret;
     }
-    return 0;
+    return size;
 }
 
 av_cold int ff_h264_decode_init(AVCodecContext *avctx)
@@ -611,12 +638,17 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
     h->bit_depth_luma    = 8;
     h->chroma_format_idc = 1;
 
+    h->avctx->bits_per_raw_sample = 8;
+    h->cur_chroma_format_idc = 1;
+
     ff_h264dsp_init(&h->h264dsp, 8, 1);
+    av_assert0(h->sps.bit_depth_chroma == 0);
     ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
     ff_h264qpel_init(&h->h264qpel, 8);
     ff_h264_pred_init(&h->hpc, h->avctx->codec_id, 8, 1);
 
     h->dequant_coeff_pps = -1;
+    h->current_sps_id = -1;
 
     /* needed so that IDCT permutation is known early */
     if (CONFIG_ERROR_RESILIENCE)
@@ -650,20 +682,26 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
         h->last_pocs[i] = INT_MIN;
     h->prev_poc_msb = 1 << 16;
+    h->prev_frame_num = -1;
     h->x264_build   = -1;
+    h->sei_fpa.frame_packing_arrangement_cancel_flag = -1;
     ff_h264_reset_sei(h);
-    h->recovery_frame = -1;
-    h->frame_recovered = 0;
     if (avctx->codec_id == AV_CODEC_ID_H264) {
-        if (avctx->ticks_per_frame == 1)
-            h->avctx->time_base.den *= 2;
+        if (avctx->ticks_per_frame == 1) {
+            if(h->avctx->time_base.den < INT_MAX/2) {
+                h->avctx->time_base.den *= 2;
+            } else
+                h->avctx->time_base.num /= 2;
+        }
         avctx->ticks_per_frame = 2;
     }
 
     if (avctx->extradata_size > 0 && avctx->extradata) {
-       ret = ff_h264_decode_extradata(h);
-       if (ret < 0)
-           return ret;
+        ret = ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size);
+        if (ret < 0) {
+            ff_h264_free_context(h);
+            return ret;
+        }
     }
 
     if (h->sps.bitstream_restriction_flag &&
@@ -674,6 +712,8 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
 
     avctx->internal->allocate_progress = 1;
 
+    ff_h264_flush_change(h);
+
     return 0;
 }
 
@@ -686,6 +726,10 @@ static int decode_init_thread_copy(AVCodecContext *avctx)
     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
 
+    h->rbsp_buffer[0] = NULL;
+    h->rbsp_buffer[1] = NULL;
+    h->rbsp_buffer_size[0] = 0;
+    h->rbsp_buffer_size[1] = 0;
     h->context_initialized = 0;
 
     return 0;
@@ -704,7 +748,6 @@ static void decode_postinit(H264Context *h, int setup_finished)
     H264Picture *out = h->cur_pic_ptr;
     H264Picture *cur = h->cur_pic_ptr;
     int i, pics, out_of_order, out_idx;
-    int invalid = 0, cnt = 0;
 
     h->cur_pic_ptr->f.pict_type = h->pict_type;
 
@@ -840,6 +883,9 @@ static void decode_postinit(H264Context *h, int setup_finished)
                                h->sei_vflip, h->sei_hflip);
     }
 
+    cur->mmco_reset = h->mmco_reset;
+    h->mmco_reset = 0;
+
     // FIXME do something with unavailable reference frames
 
     /* Sort B-frames into display order */
@@ -856,107 +902,71 @@ static void decode_postinit(H264Context *h, int setup_finished)
         h->low_delay           = 0;
     }
 
+    for (i = 0; 1; i++) {
+        if(i == MAX_DELAYED_PIC_COUNT || cur->poc < h->last_pocs[i]){
+            if(i)
+                h->last_pocs[i-1] = cur->poc;
+            break;
+        } else if(i) {
+            h->last_pocs[i-1]= h->last_pocs[i];
+        }
+    }
+    out_of_order = MAX_DELAYED_PIC_COUNT - i;
+    if(   cur->f.pict_type == AV_PICTURE_TYPE_B
+       || (h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > INT_MIN && h->last_pocs[MAX_DELAYED_PIC_COUNT-1] - h->last_pocs[MAX_DELAYED_PIC_COUNT-2] > 2))
+        out_of_order = FFMAX(out_of_order, 1);
+    if (out_of_order == MAX_DELAYED_PIC_COUNT) {
+        av_log(h->avctx, AV_LOG_VERBOSE, "Invalid POC %d<%d\n", cur->poc, h->last_pocs[0]);
+        for (i = 1; i < MAX_DELAYED_PIC_COUNT; i++)
+            h->last_pocs[i] = INT_MIN;
+        h->last_pocs[0] = cur->poc;
+        cur->mmco_reset = 1;
+    } else if(h->avctx->has_b_frames < out_of_order && !h->sps.bitstream_restriction_flag){
+        av_log(h->avctx, AV_LOG_VERBOSE, "Increasing reorder buffer to %d\n", out_of_order);
+        h->avctx->has_b_frames = out_of_order;
+        h->low_delay = 0;
+    }
+
     pics = 0;
     while (h->delayed_pic[pics])
         pics++;
 
-    assert(pics <= MAX_DELAYED_PIC_COUNT);
+    av_assert0(pics <= MAX_DELAYED_PIC_COUNT);
 
     h->delayed_pic[pics++] = cur;
     if (cur->reference == 0)
         cur->reference = DELAYED_PIC_REF;
 
-    /* Frame reordering. This code takes pictures from coding order and sorts
-     * them by their incremental POC value into display order. It supports POC
-     * gaps, MMCO reset codes and random resets.
-     * A "display group" can start either with a IDR frame (f.key_frame = 1),
-     * and/or can be closed down with a MMCO reset code. In sequences where
-     * there is no delay, we can't detect that (since the frame was already
-     * output to the user), so we also set h->mmco_reset to detect the MMCO
-     * reset code.
-     * FIXME: if we detect insufficient delays (as per h->avctx->has_b_frames),
-     * we increase the delay between input and output. All frames affected by
-     * the lag (e.g. those that should have been output before another frame
-     * that we already returned to the user) will be dropped. This is a bug
-     * that we will fix later. */
-    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
-        cnt     += out->poc < h->last_pocs[i];
-        invalid += out->poc == INT_MIN;
-    }
-    if (!h->mmco_reset && !cur->f.key_frame &&
-        cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
-        h->mmco_reset = 2;
-        if (pics > 1)
-            h->delayed_pic[pics - 2]->mmco_reset = 2;
-    }
-    if (h->mmco_reset || cur->f.key_frame) {
-        for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
-            h->last_pocs[i] = INT_MIN;
-        cnt     = 0;
-        invalid = MAX_DELAYED_PIC_COUNT;
-    }
     out     = h->delayed_pic[0];
     out_idx = 0;
-    for (i = 1; i < MAX_DELAYED_PIC_COUNT &&
-                h->delayed_pic[i] &&
-                !h->delayed_pic[i - 1]->mmco_reset &&
-                !h->delayed_pic[i]->f.key_frame;
+    for (i = 1; h->delayed_pic[i] &&
+                !h->delayed_pic[i]->f.key_frame &&
+                !h->delayed_pic[i]->mmco_reset;
          i++)
         if (h->delayed_pic[i]->poc < out->poc) {
             out     = h->delayed_pic[i];
             out_idx = i;
         }
     if (h->avctx->has_b_frames == 0 &&
-        (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
+        (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset))
         h->next_outputed_poc = INT_MIN;
-    out_of_order = !out->f.key_frame && !h->mmco_reset &&
-                   (out->poc < h->next_outputed_poc);
+    out_of_order = out->poc < h->next_outputed_poc;
 
-    if (h->sps.bitstream_restriction_flag &&
-        h->avctx->has_b_frames >= h->sps.num_reorder_frames) {
-    } else if (out_of_order && pics - 1 == h->avctx->has_b_frames &&
-               h->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
-        if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
-            h->avctx->has_b_frames = FFMAX(h->avctx->has_b_frames, cnt);
-        }
-        h->low_delay = 0;
-    } else if (h->low_delay &&
-               ((h->next_outputed_poc != INT_MIN &&
-                 out->poc > h->next_outputed_poc + 2) ||
-                cur->f.pict_type == AV_PICTURE_TYPE_B)) {
-        h->low_delay = 0;
-        h->avctx->has_b_frames++;
-    }
-
-    if (pics > h->avctx->has_b_frames) {
+    if (out_of_order || pics > h->avctx->has_b_frames) {
         out->reference &= ~DELAYED_PIC_REF;
         // for frame threading, the owner must be the second field's thread or
         // else the first thread can release the picture and reuse it unsafely
         for (i = out_idx; h->delayed_pic[i]; i++)
             h->delayed_pic[i] = h->delayed_pic[i + 1];
     }
-    memmove(h->last_pocs, &h->last_pocs[1],
-            sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
-    h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
     if (!out_of_order && pics > h->avctx->has_b_frames) {
         h->next_output_pic = out;
-        if (out->mmco_reset) {
-            if (out_idx > 0) {
-                h->next_outputed_poc                    = out->poc;
-                h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
-            } else {
-                h->next_outputed_poc = INT_MIN;
-            }
-        } else {
-            if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
-                h->next_outputed_poc = INT_MIN;
-            } else {
-                h->next_outputed_poc = out->poc;
-            }
-        }
-        h->mmco_reset = 0;
+        if (out_idx == 0 && h->delayed_pic[0] && (h->delayed_pic[0]->f.key_frame || h->delayed_pic[0]->mmco_reset)) {
+            h->next_outputed_poc = INT_MIN;
+        } else
+            h->next_outputed_poc = out->poc;
     } else {
-        av_log(h->avctx, AV_LOG_DEBUG, "no picture\n");
+        av_log(h->avctx, AV_LOG_DEBUG, "no picture %s\n", out_of_order ? "ooo" : "");
     }
 
     if (h->next_output_pic) {
@@ -1039,24 +1049,33 @@ int ff_pred_weight_table(H264Context *h)
  */
 static void idr(H264Context *h)
 {
+    int i;
     ff_h264_remove_all_refs(h);
     h->prev_frame_num        =
-    h->prev_frame_num_offset =
-    h->prev_poc_msb          =
+    h->prev_frame_num_offset = 0;
+    h->prev_poc_msb          = 1<<16;
     h->prev_poc_lsb          = 0;
+    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
+        h->last_pocs[i] = INT_MIN;
 }
 
 /* forget old pics after a seek */
 void ff_h264_flush_change(H264Context *h)
 {
-    int i;
-    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
-        h->last_pocs[i] = INT_MIN;
+    int i, j;
+
     h->outputed_poc          = h->next_outputed_poc = INT_MIN;
     h->prev_interlaced_frame = 1;
     idr(h);
-    if (h->cur_pic_ptr)
+
+    h->prev_frame_num = -1;
+    if (h->cur_pic_ptr) {
         h->cur_pic_ptr->reference = 0;
+        for (j=i=0; h->delayed_pic[i]; i++)
+            if (h->delayed_pic[i] != h->cur_pic_ptr)
+                h->delayed_pic[j++] = h->delayed_pic[i];
+        h->delayed_pic[j] = NULL;
+    }
     h->first_field = 0;
     memset(h->ref_list[0], 0, sizeof(h->ref_list[0]));
     memset(h->ref_list[1], 0, sizeof(h->ref_list[1]));
@@ -1065,6 +1084,9 @@ void ff_h264_flush_change(H264Context *h)
     ff_h264_reset_sei(h);
     h->recovery_frame = -1;
     h->frame_recovered = 0;
+    h->list_count = 0;
+    h->current_slice = 0;
+    h->mmco_reset = 1;
 }
 
 /* forget old pics after a seek */
@@ -1073,7 +1095,7 @@ static void flush_dpb(AVCodecContext *avctx)
     H264Context *h = avctx->priv_data;
     int i;
 
-    for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
+    for (i = 0; i <= MAX_DELAYED_PIC_COUNT; i++) {
         if (h->delayed_pic[i])
             h->delayed_pic[i]->reference = 0;
         h->delayed_pic[i] = NULL;
@@ -1222,7 +1244,15 @@ int ff_h264_set_parameter_from_sps(H264Context *h)
 
     if (h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
         h->cur_chroma_format_idc      != h->sps.chroma_format_idc) {
-        if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
+        if (h->avctx->codec &&
+            h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU &&
+            (h->sps.bit_depth_luma != 8 || h->sps.chroma_format_idc > 1)) {
+            av_log(h->avctx, AV_LOG_ERROR,
+                   "VDPAU decoding does not support video colorspace.\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 14 &&
+            h->sps.bit_depth_luma != 11 && h->sps.bit_depth_luma != 13) {
             h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
             h->cur_chroma_format_idc      = h->sps.chroma_format_idc;
             h->pixel_shift                = h->sps.bit_depth_luma > 8;
@@ -1233,6 +1263,7 @@ int ff_h264_set_parameter_from_sps(H264Context *h)
             ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma);
             ff_h264_pred_init(&h->hpc, h->avctx->codec_id, h->sps.bit_depth_luma,
                               h->sps.chroma_format_idc);
+
             if (CONFIG_ERROR_RESILIENCE)
                 ff_me_cmp_init(&h->mecc, h->avctx);
             ff_videodsp_init(&h->vdsp, h->sps.bit_depth_luma);
@@ -1248,26 +1279,34 @@ int ff_h264_set_parameter_from_sps(H264Context *h)
 int ff_set_ref_count(H264Context *h)
 {
     int ref_count[2], list_count;
-    int num_ref_idx_active_override_flag, max_refs;
+    int num_ref_idx_active_override_flag;
 
     // set defaults, might be overridden a few lines later
     ref_count[0] = h->pps.ref_count[0];
     ref_count[1] = h->pps.ref_count[1];
 
     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
+        unsigned max[2];
+        max[0] = max[1] = h->picture_structure == PICT_FRAME ? 15 : 31;
+
         if (h->slice_type_nos == AV_PICTURE_TYPE_B)
             h->direct_spatial_mv_pred = get_bits1(&h->gb);
         num_ref_idx_active_override_flag = get_bits1(&h->gb);
 
         if (num_ref_idx_active_override_flag) {
             ref_count[0] = get_ue_golomb(&h->gb) + 1;
-            if (ref_count[0] < 1)
-                return AVERROR_INVALIDDATA;
             if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
                 ref_count[1] = get_ue_golomb(&h->gb) + 1;
-                if (ref_count[1] < 1)
-                    return AVERROR_INVALIDDATA;
-            }
+            } else
+                // full range is spec-ok in this case, even for frames
+                ref_count[1] = 1;
+        }
+
+        if (ref_count[0]-1 > max[0] || ref_count[1]-1 > max[1]){
+            av_log(h->avctx, AV_LOG_ERROR, "reference overflow %u > %u or %u > %u\n", ref_count[0]-1, max[0], ref_count[1]-1, max[1]);
+            h->ref_count[0] = h->ref_count[1] = 0;
+            h->list_count   = 0;
+            return AVERROR_INVALIDDATA;
         }
 
         if (h->slice_type_nos == AV_PICTURE_TYPE_B)
@@ -1279,14 +1318,6 @@ int ff_set_ref_count(H264Context *h)
         ref_count[0] = ref_count[1] = 0;
     }
 
-    max_refs = h->picture_structure == PICT_FRAME ? 16 : 32;
-
-    if (ref_count[0] > max_refs || ref_count[1] > max_refs) {
-        av_log(h->avctx, AV_LOG_ERROR, "reference overflow\n");
-        h->ref_count[0] = h->ref_count[1] = 0;
-        return AVERROR_INVALIDDATA;
-    }
-
     if (list_count != h->list_count ||
         ref_count[0] != h->ref_count[0] ||
         ref_count[1] != h->ref_count[1]) {
@@ -1299,6 +1330,8 @@ int ff_set_ref_count(H264Context *h)
     return 0;
 }
 
+static const uint8_t start_code[] = { 0x00, 0x00, 0x01 };
+
 static int find_start_code(const uint8_t *buf, int buf_size,
                            int buf_index, int next_avc)
 {
@@ -1310,10 +1343,12 @@ static int find_start_code(const uint8_t *buf, int buf_size,
             buf[buf_index + 2] == 1)
             break;
 
-    if (buf_index + 3 >= buf_size)
+    buf_index += 3;
+
+    if (buf_index >= buf_size)
         return buf_size;
 
-    return buf_index + 3;
+    return buf_index;
 }
 
 static int get_avc_nalsize(H264Context *h, const uint8_t *buf,
@@ -1359,6 +1394,7 @@ static int get_last_needed_nal(H264Context *h, const uint8_t *buf, int buf_size)
     int nal_index   = 0;
     int buf_index   = 0;
     int nals_needed = 0;
+    int first_slice = 0;
 
     while(1) {
         int nalsize = 0;
@@ -1374,6 +1410,8 @@ static int get_last_needed_nal(H264Context *h, const uint8_t *buf, int buf_size)
             buf_index = find_start_code(buf, buf_size, buf_index, next_avc);
             if (buf_index >= buf_size)
                 break;
+            if (buf_index >= next_avc)
+                continue;
         }
 
         ptr = ff_h264_decode_nal(h, buf + buf_index, &dst_length, &consumed,
@@ -1401,8 +1439,12 @@ static int get_last_needed_nal(H264Context *h, const uint8_t *buf, int buf_size)
         case NAL_IDR_SLICE:
         case NAL_SLICE:
             init_get_bits(&h->gb, ptr, bit_length);
-            if (!get_ue_golomb(&h->gb))
+            if (!get_ue_golomb(&h->gb) ||
+                !first_slice ||
+                first_slice != h->nal_unit_type)
                 nals_needed = nal_index;
+            if (!first_slice)
+                first_slice = h->nal_unit_type;
         }
     }
 
@@ -1419,8 +1461,13 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
     int next_avc;
     int nals_needed = 0; ///< number of NALs that need decoding before the next frame thread starts
     int nal_index;
+    int idr_cleared=0;
     int ret = 0;
 
+    h->nal_unit_type= 0;
+
+    if(!h->slice_context_count)
+         h->slice_context_count= 1;
     h->max_contexts = h->slice_context_count;
     if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS)) {
         h->current_slice = 0;
@@ -1429,6 +1476,13 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
         ff_h264_reset_sei(h);
     }
 
+    if (h->nal_length_size == 4) {
+        if (buf_size > 8 && AV_RB32(buf) == 1 && AV_RB32(buf+5) > (unsigned)buf_size) {
+            h->is_avc = 0;
+        }else if(buf_size > 3 && AV_RB32(buf) > 1 && AV_RB32(buf) <= (unsigned)buf_size)
+            h->is_avc = 1;
+    }
+
     if (avctx->active_thread_type & FF_THREAD_FRAME)
         nals_needed = get_last_needed_nal(h, buf, buf_size);
 
@@ -1454,6 +1508,8 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
                 buf_index = find_start_code(buf, buf_size, buf_index, next_avc);
                 if (buf_index >= buf_size)
                     break;
+                if (buf_index >= next_avc)
+                    continue;
             }
 
             hx = h->thread_context[context_count];
@@ -1470,8 +1526,8 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
 
             if (h->avctx->debug & FF_DEBUG_STARTCODE)
                 av_log(h->avctx, AV_LOG_DEBUG,
-                       "NAL %d at %d/%d length %d\n",
-                       hx->nal_unit_type, buf_index, buf_size, dst_length);
+                       "NAL %d/%d at %d/%d length %d\n",
+                       hx->nal_unit_type, hx->nal_ref_idc, buf_index, buf_size, dst_length);
 
             if (h->is_avc && (nalsize != consumed) && nalsize)
                 av_log(h->avctx, AV_LOG_DEBUG,
@@ -1487,21 +1543,30 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
                 continue;
 
 again:
-            /* Ignore every NAL unit type except PPS and SPS during extradata
+            if (   !(avctx->active_thread_type & FF_THREAD_FRAME)
+                || nals_needed >= nal_index)
+                h->au_pps_id = -1;
+            /* Ignore per frame NAL unit type during extradata
              * parsing. Decoding slices is not possible in codec init
              * with frame-mt */
-            if (parse_extradata && HAVE_THREADS &&
-                (h->avctx->active_thread_type & FF_THREAD_FRAME) &&
-                (hx->nal_unit_type != NAL_PPS &&
-                 hx->nal_unit_type != NAL_SPS)) {
-                if (hx->nal_unit_type < NAL_AUD ||
-                    hx->nal_unit_type > NAL_AUXILIARY_SLICE)
-                    av_log(avctx, AV_LOG_INFO,
-                           "Ignoring NAL unit %d during extradata parsing\n",
+            if (parse_extradata) {
+                switch (hx->nal_unit_type) {
+                case NAL_IDR_SLICE:
+                case NAL_SLICE:
+                case NAL_DPA:
+                case NAL_DPB:
+                case NAL_DPC:
+                    av_log(h->avctx, AV_LOG_WARNING,
+                           "Ignoring NAL %d in global header/extradata\n",
                            hx->nal_unit_type);
-                hx->nal_unit_type = NAL_FF_IGNORE;
+                    // fall through to next case
+                case NAL_AUXILIARY_SLICE:
+                    hx->nal_unit_type = NAL_FF_IGNORE;
+                }
             }
+
             err = 0;
+
             switch (hx->nal_unit_type) {
             case NAL_IDR_SLICE:
                 if (h->nal_unit_type != NAL_IDR_SLICE) {
@@ -1510,7 +1575,10 @@ again:
                     ret = -1;
                     goto end;
                 }
-                idr(h); // FIXME ensure we don't lose some frames if there is reordering
+                if(!idr_cleared)
+                    idr(h); // FIXME ensure we don't lose some frames if there is reordering
+                idr_cleared = 1;
+                h->has_recovery_point = 1;
             case NAL_SLICE:
                 init_get_bits(&hx->gb, ptr, bit_length);
                 hx->intra_gb_ptr      =
@@ -1520,14 +1588,22 @@ again:
                 if ((err = ff_h264_decode_slice_header(hx, h)))
                     break;
 
-                if (h->sei_recovery_frame_cnt >= 0 && h->recovery_frame < 0) {
-                    h->recovery_frame = (h->frame_num + h->sei_recovery_frame_cnt) &
-                                        ((1 << h->sps.log2_max_frame_num) - 1);
+                if (h->sei_recovery_frame_cnt >= 0) {
+                    if (h->frame_num != h->sei_recovery_frame_cnt || hx->slice_type_nos != AV_PICTURE_TYPE_I)
+                        h->valid_recovery_point = 1;
+
+                    if (   h->recovery_frame < 0
+                        || ((h->recovery_frame - h->frame_num) & ((1 << h->sps.log2_max_frame_num)-1)) > h->sei_recovery_frame_cnt) {
+                        h->recovery_frame = (h->frame_num + h->sei_recovery_frame_cnt) &
+                                            ((1 << h->sps.log2_max_frame_num) - 1);
+
+                        if (!h->valid_recovery_point)
+                            h->recovery_frame = h->frame_num;
+                    }
                 }
 
                 h->cur_pic_ptr->f.key_frame |=
-                    (hx->nal_unit_type == NAL_IDR_SLICE) ||
-                    (h->sei_recovery_frame_cnt >= 0);
+                    (hx->nal_unit_type == NAL_IDR_SLICE);
 
                 if (hx->nal_unit_type == NAL_IDR_SLICE ||
                     h->recovery_frame == h->frame_num) {
@@ -1538,7 +1614,13 @@ again:
                 // "recovered".
                 if (hx->nal_unit_type == NAL_IDR_SLICE)
                     h->frame_recovered |= FRAME_RECOVERED_IDR;
+                h->frame_recovered |= 3*!!(avctx->flags2 & CODEC_FLAG2_SHOW_ALL);
+                h->frame_recovered |= 3*!!(avctx->flags & CODEC_FLAG_OUTPUT_CORRUPT);
+#if 1
+                h->cur_pic_ptr->recovered |= h->frame_recovered;
+#else
                 h->cur_pic_ptr->recovered |= !!(h->frame_recovered & FRAME_RECOVERED_IDR);
+#endif
 
                 if (h->current_slice == 1) {
                     if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS))
@@ -1547,22 +1629,26 @@ again:
                     if (h->avctx->hwaccel &&
                         (ret = h->avctx->hwaccel->start_frame(h->avctx, NULL, 0)) < 0)
                         return ret;
+                    if (CONFIG_H264_VDPAU_DECODER &&
+                        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
+                        ff_vdpau_h264_picture_start(h);
                 }
 
-                if (hx->redundant_pic_count == 0 &&
-                    (avctx->skip_frame < AVDISCARD_NONREF ||
-                     hx->nal_ref_idc) &&
-                    (avctx->skip_frame < AVDISCARD_BIDIR  ||
-                     hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
-                    (avctx->skip_frame < AVDISCARD_NONKEY ||
-                     hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
-                    avctx->skip_frame < AVDISCARD_ALL) {
+                if (hx->redundant_pic_count == 0) {
                     if (avctx->hwaccel) {
                         ret = avctx->hwaccel->decode_slice(avctx,
                                                            &buf[buf_index - consumed],
                                                            consumed);
                         if (ret < 0)
                             return ret;
+                    } else if (CONFIG_H264_VDPAU_DECODER &&
+                               h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) {
+                        ff_vdpau_add_data_chunk(h->cur_pic_ptr->f.data[0],
+                                                start_code,
+                                                sizeof(start_code));
+                        ff_vdpau_add_data_chunk(h->cur_pic_ptr->f.data[0],
+                                                &buf[buf_index - consumed],
+                                                consumed);
                     } else
                         context_count++;
                 }
@@ -1579,7 +1665,7 @@ again:
                 hx->intra_gb_ptr =
                 hx->inter_gb_ptr = NULL;
 
-                if ((err = ff_h264_decode_slice_header(hx, h)) < 0) {
+                if ((err = ff_h264_decode_slice_header(hx, h))) {
                     /* make sure data_partitioning is cleared if it was set
                      * before, so we don't try decoding a slice without a valid
                      * slice header later */
@@ -1597,6 +1683,9 @@ again:
                 init_get_bits(&hx->inter_gb, ptr, bit_length);
                 hx->inter_gb_ptr = &hx->inter_gb;
 
+                av_log(h->avctx, AV_LOG_ERROR, "Partitioned H.264 support is incomplete\n");
+                break;
+
                 if (hx->redundant_pic_count == 0 &&
                     hx->intra_gb_ptr &&
                     hx->data_partitioning &&
@@ -1604,7 +1693,7 @@ again:
                     (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc) &&
                     (avctx->skip_frame < AVDISCARD_BIDIR  ||
                      hx->slice_type_nos != AV_PICTURE_TYPE_B) &&
-                    (avctx->skip_frame < AVDISCARD_NONKEY ||
+                    (avctx->skip_frame < AVDISCARD_NONINTRA ||
                      hx->slice_type_nos == AV_PICTURE_TYPE_I) &&
                     avctx->skip_frame < AVDISCARD_ALL)
                     context_count++;
@@ -1617,19 +1706,18 @@ again:
                 break;
             case NAL_SPS:
                 init_get_bits(&h->gb, ptr, bit_length);
-                ret = ff_h264_decode_seq_parameter_set(h);
-                if (ret < 0 && h->is_avc && (nalsize != consumed) && nalsize) {
+                if (ff_h264_decode_seq_parameter_set(h) < 0 && (h->is_avc ? nalsize : 1)) {
                     av_log(h->avctx, AV_LOG_DEBUG,
                            "SPS decoding failure, trying again with the complete NAL\n");
-                    init_get_bits(&h->gb, buf + buf_index + 1 - consumed,
-                                  8 * (nalsize - 1));
+                    if (h->is_avc)
+                        av_assert0(next_avc - buf_index + consumed == nalsize);
+                    if ((next_avc - buf_index + consumed - 1) >= INT_MAX/8)
+                        break;
+                    init_get_bits(&h->gb, &buf[buf_index + 1 - consumed],
+                                  8*(next_avc - buf_index + consumed - 1));
                     ff_h264_decode_seq_parameter_set(h);
                 }
 
-                ret = ff_h264_set_parameter_from_sps(h);
-                if (ret < 0)
-                    goto end;
-
                 break;
             case NAL_PPS:
                 init_get_bits(&h->gb, ptr, bit_length);
@@ -1658,10 +1746,11 @@ again:
                 context_count = 0;
             }
 
-            if (err < 0) {
-                av_log(h->avctx, AV_LOG_ERROR, "decode_slice_header error\n");
+            if (err < 0 || err == SLICE_SKIPED) {
+                if (err < 0)
+                    av_log(h->avctx, AV_LOG_ERROR, "decode_slice_header error\n");
                 h->ref_count[0] = h->ref_count[1] = h->list_count = 0;
-            } else if (err == 1) {
+            } else if (err == SLICE_SINGLETHREAD) {
                 /* Slice could not be decoded in parallel mode, copy down
                  * NAL unit stuff to context 0 and restart. Note that
                  * rbsp_buffer is not transferred, but since we no longer
@@ -1679,6 +1768,7 @@ again:
             goto end;
     }
 
+    ret = 0;
 end:
     /* clean up */
     if (h->cur_pic_ptr && !h->droppable) {
@@ -1702,26 +1792,54 @@ static int get_consumed_bytes(int pos, int buf_size)
     return pos;
 }
 
-static int output_frame(H264Context *h, AVFrame *dst, AVFrame *src)
+static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
 {
+    AVFrame *src = &srcp->f;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(src->format);
     int i;
     int ret = av_frame_ref(dst, src);
     if (ret < 0)
         return ret;
 
-    if (!h->sps.crop)
+    av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(h), 0);
+
+    if (srcp->sei_recovery_frame_cnt == 0)
+        dst->key_frame = 1;
+    if (!srcp->crop)
         return 0;
 
-    for (i = 0; i < 3; i++) {
-        int hshift = (i > 0) ? h->chroma_x_shift : 0;
-        int vshift = (i > 0) ? h->chroma_y_shift : 0;
-        int off    = ((h->sps.crop_left >> hshift) << h->pixel_shift) +
-                     (h->sps.crop_top >> vshift) * dst->linesize[i];
+    for (i = 0; i < desc->nb_components; i++) {
+        int hshift = (i > 0) ? desc->log2_chroma_w : 0;
+        int vshift = (i > 0) ? desc->log2_chroma_h : 0;
+        int off    = ((srcp->crop_left >> hshift) << h->pixel_shift) +
+                      (srcp->crop_top  >> vshift) * dst->linesize[i];
         dst->data[i] += off;
     }
     return 0;
 }
 
+static int is_extra(const uint8_t *buf, int buf_size)
+{
+    int cnt= buf[5]&0x1f;
+    const uint8_t *p= buf+6;
+    while(cnt--){
+        int nalsize= AV_RB16(p) + 2;
+        if(nalsize > buf_size - (p-buf) || p[2]!=0x67)
+            return 0;
+        p += nalsize;
+    }
+    cnt = *(p++);
+    if(!cnt)
+        return 0;
+    while(cnt--){
+        int nalsize= AV_RB16(p) + 2;
+        if(nalsize > buf_size - (p-buf) || p[2]!=0x68)
+            return 0;
+        p += nalsize;
+    }
+    return 1;
+}
+
 static int h264_decode_frame(AVCodecContext *avctx, void *data,
                              int *got_frame, AVPacket *avpkt)
 {
@@ -1730,6 +1848,8 @@ static int h264_decode_frame(AVCodecContext *avctx, void *data,
     H264Context *h     = avctx->priv_data;
     AVFrame *pict      = data;
     int buf_index      = 0;
+    H264Picture *out;
+    int i, out_idx;
     int ret;
 
     h->flags = avctx->flags;
@@ -1738,12 +1858,11 @@ static int h264_decode_frame(AVCodecContext *avctx, void *data,
     h->data_partitioning = 0;
 
     /* end of stream, output what is still in the buffers */
-out:
     if (buf_size == 0) {
-        H264Picture *out;
-        int i, out_idx;
+ out:
 
         h->cur_pic_ptr = NULL;
+        h->first_field = 0;
 
         // FIXME factorize this with the output code below
         out     = h->delayed_pic[0];
@@ -1762,7 +1881,8 @@ out:
             h->delayed_pic[i] = h->delayed_pic[i + 1];
 
         if (out) {
-            ret = output_frame(h, pict, &out->f);
+            out->reference &= ~DELAYED_PIC_REF;
+            ret = output_frame(h, pict, out);
             if (ret < 0)
                 return ret;
             *got_frame = 1;
@@ -1770,19 +1890,30 @@ out:
 
         return buf_index;
     }
+    if (h->is_avc && av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, NULL)) {
+        int side_size;
+        uint8_t *side = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
+        if (is_extra(side, side_size))
+            ff_h264_decode_extradata(h, side, side_size);
+    }
+    if(h->is_avc && buf_size >= 9 && buf[0]==1 && buf[2]==0 && (buf[4]&0xFC)==0xFC && (buf[5]&0x1F) && buf[8]==0x67){
+        if (is_extra(buf, buf_size))
+            return ff_h264_decode_extradata(h, buf, buf_size);
+    }
 
     buf_index = decode_nal_units(h, buf, buf_size, 0);
     if (buf_index < 0)
         return AVERROR_INVALIDDATA;
 
     if (!h->cur_pic_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
-        buf_size = 0;
+        av_assert0(buf_index <= buf_size);
         goto out;
     }
 
     if (!(avctx->flags2 & CODEC_FLAG2_CHUNKS) && !h->cur_pic_ptr) {
-        if (avctx->skip_frame >= AVDISCARD_NONREF)
-            return 0;
+        if (avctx->skip_frame >= AVDISCARD_NONREF ||
+            buf_size >= 4 && !memcmp("Q264", buf, 4))
+            return buf_size;
         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
         return AVERROR_INVALIDDATA;
     }
@@ -1794,16 +1925,25 @@ out:
 
         ff_h264_field_end(h, 0);
 
+        /* Wait for second field. */
         *got_frame = 0;
-        if (h->next_output_pic && ((avctx->flags & CODEC_FLAG_OUTPUT_CORRUPT) ||
+        if (h->next_output_pic && (
                                    h->next_output_pic->recovered)) {
             if (!h->next_output_pic->recovered)
                 h->next_output_pic->f.flags |= AV_FRAME_FLAG_CORRUPT;
 
-            ret = output_frame(h, pict, &h->next_output_pic->f);
+            ret = output_frame(h, pict, h->next_output_pic);
             if (ret < 0)
                 return ret;
             *got_frame = 1;
+            if (CONFIG_MPEGVIDEO) {
+                ff_print_debug_info2(h->avctx, pict, h->er.mbskip_table,
+                                    h->next_output_pic->mb_type,
+                                    h->next_output_pic->qscale_table,
+                                    h->next_output_pic->motion_val,
+                                    &h->low_delay,
+                                    h->mb_width, h->mb_height, h->mb_stride, 1);
+            }
         }
     }
 
@@ -1829,6 +1969,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
 {
     H264Context *h = avctx->priv_data;
 
+    ff_h264_remove_all_refs(h);
     ff_h264_free_context(h);
 
     ff_h264_unref_picture(h, &h->cur_pic);
@@ -1853,6 +1994,26 @@ static const AVProfile profiles[] = {
     { FF_PROFILE_UNKNOWN },
 };
 
+static const AVOption h264_options[] = {
+    {"is_avc", "is avc", offsetof(H264Context, is_avc), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0},
+    {"nal_length_size", "nal_length_size", offsetof(H264Context, nal_length_size), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 4, 0},
+    {NULL}
+};
+
+static const AVClass h264_class = {
+    .class_name = "H264 Decoder",
+    .item_name  = av_default_item_name,
+    .option     = h264_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVClass h264_vdpau_class = {
+    .class_name = "H264 VDPAU Decoder",
+    .item_name  = av_default_item_name,
+    .option     = h264_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_h264_decoder = {
     .name                  = "h264",
     .long_name             = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
@@ -1869,4 +2030,24 @@ AVCodec ff_h264_decoder = {
     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(ff_h264_update_thread_context),
     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
+    .priv_class            = &h264_class,
 };
+
+#if CONFIG_H264_VDPAU_DECODER
+AVCodec ff_h264_vdpau_decoder = {
+    .name           = "h264_vdpau",
+    .long_name      = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .priv_data_size = sizeof(H264Context),
+    .init           = ff_h264_decode_init,
+    .close          = h264_decode_end,
+    .decode         = h264_decode_frame,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    .flush          = flush_dpb,
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_H264,
+                                                     AV_PIX_FMT_NONE},
+    .profiles       = NULL_IF_CONFIG_SMALL(profiles),
+    .priv_class     = &h264_vdpau_class,
+};
+#endif
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index cd4bf87..5ec4f0c 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,8 +43,8 @@
 #include "rectangle.h"
 #include "videodsp.h"
 
-#define H264_MAX_PICTURE_COUNT 32
-#define H264_MAX_THREADS       16
+#define H264_MAX_PICTURE_COUNT 36
+#define H264_MAX_THREADS       32
 
 #define MAX_SPS_COUNT          32
 #define MAX_PPS_COUNT         256
@@ -53,6 +53,8 @@
 
 #define MAX_DELAYED_PIC_COUNT  16
 
+#define MAX_MBPAIR_SIZE (256*1024) // a tighter bound could be calculated if someone cares about a few bytes
+
 /* Compiling in interlaced support reduces the speed
  * of progressive decoding by about 2%. */
 #define ALLOW_INTERLACE
@@ -66,10 +68,10 @@
 #define MAX_SLICES 16
 
 #ifdef ALLOW_INTERLACE
-#define MB_MBAFF(h)    h->mb_mbaff
-#define MB_FIELD(h)    h->mb_field_decoding_flag
-#define FRAME_MBAFF(h) h->mb_aff_frame
-#define FIELD_PICTURE(h) (h->picture_structure != PICT_FRAME)
+#define MB_MBAFF(h)    (h)->mb_mbaff
+#define MB_FIELD(h)    (h)->mb_field_decoding_flag
+#define FRAME_MBAFF(h) (h)->mb_aff_frame
+#define FIELD_PICTURE(h) ((h)->picture_structure != PICT_FRAME)
 #define LEFT_MBS 2
 #define LTOP     0
 #define LBOT     1
@@ -89,11 +91,12 @@
 #define FIELD_OR_MBAFF_PICTURE(h) (FRAME_MBAFF(h) || FIELD_PICTURE(h))
 
 #ifndef CABAC
-#define CABAC(h) h->pps.cabac
+#define CABAC(h) (h)->pps.cabac
 #endif
 
-#define CHROMA422(h) (h->sps.chroma_format_idc == 2)
-#define CHROMA444(h) (h->sps.chroma_format_idc == 3)
+#define CHROMA(h)    ((h)->sps.chroma_format_idc)
+#define CHROMA422(h) ((h)->sps.chroma_format_idc == 2)
+#define CHROMA444(h) ((h)->sps.chroma_format_idc == 3)
 
 #define EXTENDED_SAR       255
 
@@ -102,7 +105,7 @@
 #define IS_REF0(a)         ((a) & MB_TYPE_REF0)
 #define IS_8x8DCT(a)       ((a) & MB_TYPE_8x8DCT)
 
-#define QP_MAX_NUM (51 + 2 * 6)           // The maximum supported qp
+#define QP_MAX_NUM (51 + 6*6)           // The maximum supported qp
 
 /* NAL unit types */
 enum {
@@ -129,6 +132,7 @@ enum {
 typedef enum {
     SEI_TYPE_BUFFERING_PERIOD       = 0,   ///< buffering period (H.264, D.1.1)
     SEI_TYPE_PIC_TIMING             = 1,   ///< picture timing
+    SEI_TYPE_USER_DATA_ITU_T_T35    = 4,   ///< user data registered by ITU-T Recommendation T.35
     SEI_TYPE_USER_DATA_UNREGISTERED = 5,   ///< unregistered user data
     SEI_TYPE_RECOVERY_POINT         = 6,   ///< recovery point (frame # to decoder sync)
     SEI_TYPE_FRAME_PACKING          = 45,  ///< frame packing arrangement
@@ -151,6 +155,19 @@ typedef enum {
 } SEI_PicStructType;
 
 /**
+ * frame_packing_arrangement types
+ */
+typedef enum {
+    SEI_FPA_TYPE_CHECKERBOARD        = 0,
+    SEI_FPA_TYPE_INTERLEAVE_COLUMN   = 1,
+    SEI_FPA_TYPE_INTERLEAVE_ROW      = 2,
+    SEI_FPA_TYPE_SIDE_BY_SIDE        = 3,
+    SEI_FPA_TYPE_TOP_BOTTOM          = 4,
+    SEI_FPA_TYPE_INTERLEAVE_TEMPORAL = 5,
+    SEI_FPA_TYPE_2D                  = 6,
+} SEI_FpaType;
+
+/**
  * Sequence parameter set
  */
 typedef struct SPS {
@@ -234,11 +251,23 @@ typedef struct PPS {
     int transform_8x8_mode;         ///< transform_8x8_mode_flag
     uint8_t scaling_matrix4[6][16];
     uint8_t scaling_matrix8[6][64];
-    uint8_t chroma_qp_table[2][64]; ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
+    uint8_t chroma_qp_table[2][QP_MAX_NUM+1];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
     int chroma_qp_diff;
 } PPS;
 
 /**
+ * Frame Packing Arrangement Type
+ */
+typedef struct FPA {
+    int         frame_packing_arrangement_id;
+    int         frame_packing_arrangement_cancel_flag; ///< is previous arrangement canceled, -1 if never received
+    SEI_FpaType frame_packing_arrangement_type;
+    int         frame_packing_arrangement_repetition_period;
+    int         content_interpretation_type;
+    int         quincunx_sampling_flag;
+} FPA;
+
+/**
  * Memory management control operation opcode.
  */
 typedef enum MMCOOpcode {
@@ -262,6 +291,7 @@ typedef struct MMCO {
 
 typedef struct H264Picture {
     struct AVFrame f;
+    uint8_t avframe_padding[1024]; // hack to allow linking to a avutil with larger AVFrame
     ThreadFrame tf;
 
     AVBufferRef *qscale_table_buf;
@@ -287,7 +317,7 @@ typedef struct H264Picture {
     int pic_id;             /**< pic_num (short -> no wrap version of pic_num,
                                  pic_num & max_pic_num; long -> long_pic_num) */
     int long_ref;           ///< 1->long term reference 0->short term reference
-    int ref_poc[2][2][32];  ///< POCs of the frames used as reference (FIXME need per slice)
+    int ref_poc[2][2][32];  ///< POCs of the frames/fields used as reference (FIXME need per slice)
     int ref_count[2][2];    ///< number of entries in ref_poc         (FIXME need per slice)
     int mbaff;              ///< 1 -> MBAFF frame 0-> not MBAFF
     int field_picture;      ///< whether or not picture was encoded in separate fields
@@ -295,6 +325,12 @@ typedef struct H264Picture {
     int needs_realloc;      ///< picture needs to be reallocated (eg due to a frame size change)
     int reference;
     int recovered;          ///< picture at IDR or recovery point + recovery count
+    int invalid_gap;
+    int sei_recovery_frame_cnt;
+
+    int crop;
+    int crop_left;
+    int crop_top;
 } H264Picture;
 
 /**
@@ -398,9 +434,12 @@ typedef struct H264Context {
     ptrdiff_t mb_linesize;  ///< may be equal to s->linesize or s->linesize * 2, for mbaff
     ptrdiff_t mb_uvlinesize;
 
+    unsigned current_sps_id; ///< id of the current SPS
     SPS sps; ///< current sps
     PPS pps; ///< current pps
 
+    int au_pps_id; ///< pps_id of current access unit
+
     uint32_t dequant4_buffer[6][QP_MAX_NUM + 1][16]; // FIXME should these be moved down?
     uint32_t dequant8_buffer[6][QP_MAX_NUM + 1][64];
     uint32_t(*dequant4_coeff[6])[16];
@@ -457,7 +496,7 @@ typedef struct H264Context {
     GetBitContext *inter_gb_ptr;
 
     const uint8_t *intra_pcm_ptr;
-    DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
+    DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2]; ///< as a dct coefficient is int32_t in high depth, we need to reserve twice the space.
     DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
     int16_t mb_padding[256 * 2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
 
@@ -486,12 +525,12 @@ typedef struct H264Context {
     uint8_t field_scan[16];
     uint8_t field_scan8x8[64];
     uint8_t field_scan8x8_cavlc[64];
-    const uint8_t *zigzag_scan_q0;
-    const uint8_t *zigzag_scan8x8_q0;
-    const uint8_t *zigzag_scan8x8_cavlc_q0;
-    const uint8_t *field_scan_q0;
-    const uint8_t *field_scan8x8_q0;
-    const uint8_t *field_scan8x8_cavlc_q0;
+    uint8_t zigzag_scan_q0[16];
+    uint8_t zigzag_scan8x8_q0[64];
+    uint8_t zigzag_scan8x8_cavlc_q0[64];
+    uint8_t field_scan_q0[16];
+    uint8_t field_scan8x8_q0[64];
+    uint8_t field_scan8x8_cavlc_q0[64];
 
     int x264_build;
 
@@ -588,7 +627,7 @@ typedef struct H264Context {
     struct H264Context *thread_context[H264_MAX_THREADS];
 
     /**
-     * current slice number, used to initalize slice_num of each thread/context
+     * current slice number, used to initialize slice_num of each thread/context
      */
     int current_slice;
 
@@ -611,6 +650,7 @@ typedef struct H264Context {
     enum AVPictureType pict_type;
 
     int last_slice_type;
+    unsigned int last_ref_count[2];
     /** @} */
 
     /**
@@ -668,6 +708,13 @@ typedef struct H264Context {
     int sei_recovery_frame_cnt;
 
     /**
+     * Are the SEI recovery points looking valid.
+     */
+    int valid_recovery_point;
+
+    FPA sei_fpa;
+
+    /**
      * recovery_frame is the frame_num at which the next frame should
      * be fully constructed.
      *
@@ -688,6 +735,8 @@ typedef struct H264Context {
 
     int frame_recovered;    ///< Initial frame has been completely recovered
 
+    int has_recovery_point;
+
     int luma_weight_flag[2];    ///< 7.4.3.2 luma_weight_lX_flag
     int chroma_weight_flag[2];  ///< 7.4.3.2 chroma_weight_lX_flag
 
@@ -697,6 +746,12 @@ typedef struct H264Context {
 
     int cur_chroma_format_idc;
     uint8_t *bipred_scratchpad;
+
+    int16_t slice_row[MAX_SLICES]; ///< to detect when MAX_SLICES is too low
+
+    uint8_t parse_history[6];
+    int parse_history_count;
+    int parse_last_mb;
     uint8_t *edge_emu_buffer;
     int16_t *dc_val_base;
 
@@ -710,7 +765,7 @@ typedef struct H264Context {
     qpel_mc_func (*qpel_avg)[16];
 } H264Context;
 
-extern const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM + 1]; ///< One chroma qp table for each supported bit depth (8, 9, 10).
+extern const uint8_t ff_h264_chroma_qp[7][QP_MAX_NUM + 1]; ///< One chroma qp table for each possible bit depth (8-14).
 extern const uint16_t ff_h264_mb_sizes[4];
 
 /**
@@ -793,7 +848,7 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h);
 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma);
 
 void ff_h264_hl_decode_mb(H264Context *h);
-int ff_h264_decode_extradata(H264Context *h);
+int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size);
 int ff_h264_decode_init(AVCodecContext *avctx);
 void ff_h264_decode_init_vlc(void);
 
@@ -831,6 +886,12 @@ void ff_h264_filter_mb(H264Context *h, int mb_x, int mb_y,
  */
 void ff_h264_reset_sei(H264Context *h);
 
+/**
+ * Get stereo_mode string from the h264 frame_packing_arrangement
+ * @param h H.264 context.
+ */
+const char* ff_h264_sei_stereo_mode(H264Context *h);
+
 /*
  * o-o o-o
  *  / / /
@@ -1045,6 +1106,9 @@ int ff_pred_weight_table(H264Context *h);
 int ff_set_ref_count(H264Context *h);
 
 int ff_h264_decode_slice_header(H264Context *h, H264Context *h0);
+#define SLICE_SINGLETHREAD 1
+#define SLICE_SKIPED 2
+
 int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count);
 int ff_h264_update_thread_context(AVCodecContext *dst,
                                   const AVCodecContext *src);
@@ -1053,4 +1117,6 @@ void ff_h264_flush_change(H264Context *h);
 
 void ff_h264_free_tables(H264Context *h, int free_rbsp);
 
+void ff_h264_set_erpic(ERPicture *dst, H264Picture *src);
+
 #endif /* AVCODEC_H264_H */
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index f1ab97a..5b1a827 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... cabac decoding
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,8 +26,10 @@
  */
 
 #define CABAC(h) 1
+#define UNCHECKED_BITSTREAM_READER 1
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/timer.h"
 #include "config.h"
 #include "cabac.h"
@@ -44,8 +46,6 @@
 #include "x86/h264_i386.h"
 #endif
 
-#include <assert.h>
-
 /* Cabac pre state table */
 
 static const int8_t cabac_context_init_I[1024][2] =
@@ -1621,7 +1621,9 @@ decode_cabac_residual_internal(H264Context *h, int16_t *block,
     cc.range     = h->cabac.range;
     cc.low       = h->cabac.low;
     cc.bytestream= h->cabac.bytestream;
+#if !UNCHECKED_BITSTREAM_READER || ARCH_AARCH64
     cc.bytestream_end = h->cabac.bytestream_end;
+#endif
 #else
 #define CC &h->cabac
 #endif
@@ -1670,7 +1672,7 @@ decode_cabac_residual_internal(H264Context *h, int16_t *block,
         }
 #endif
     }
-    assert(coeff_count > 0);
+    av_assert2(coeff_count > 0);
 
     if( is_dc ) {
         if( cat == 3 )
@@ -1682,7 +1684,7 @@ decode_cabac_residual_internal(H264Context *h, int16_t *block,
         if( max_coeff == 64 )
             fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
         else {
-            assert( cat == 1 || cat ==  2 || cat ==  4 || cat == 7 || cat == 8 || cat == 11 || cat == 12 );
+            av_assert2( cat == 1 || cat ==  2 || cat ==  4 || cat == 7 || cat == 8 || cat == 11 || cat == 12 );
             h->non_zero_count_cache[scan8[n]] = coeff_count;
         }
     }
@@ -1711,7 +1713,7 @@ decode_cabac_residual_internal(H264Context *h, int16_t *block,
 \
             if( coeff_abs >= 15 ) { \
                 int j = 0; \
-                while( get_cabac_bypass( CC ) ) { \
+                while(get_cabac_bypass( CC ) && j<30) { \
                     j++; \
                 } \
 \
@@ -1885,6 +1887,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
     int dct8x8_allowed= h->pps.transform_8x8_mode;
     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
+    unsigned local_ref_count[2];
 
     mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
 
@@ -1927,7 +1930,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
 
     if( h->slice_type_nos == AV_PICTURE_TYPE_B ) {
         int ctx = 0;
-        assert(h->slice_type_nos == AV_PICTURE_TYPE_B);
+        av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_B);
 
         if( !IS_DIRECT( h->left_type[LTOP]-1 ) )
             ctx++;
@@ -1980,7 +1983,7 @@ int ff_h264_decode_mb_cabac(H264Context *h) {
         mb_type= decode_cabac_intra_mb_type(h, 3, 1);
         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
             mb_type--;
-        assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
+        av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_I);
 decode_intra_mb:
         partition_count = 0;
         cbp= i_mb_type_info[mb_type].cbp;
@@ -2026,6 +2029,9 @@ decode_intra_mb:
         return 0;
     }
 
+    local_ref_count[0] = h->ref_count[0] << MB_MBAFF(h);
+    local_ref_count[1] = h->ref_count[1] << MB_MBAFF(h);
+
     fill_decode_caches(h, mb_type);
 
     if( IS_INTRA( mb_type ) ) {
@@ -2094,11 +2100,10 @@ decode_intra_mb:
                 for( i = 0; i < 4; i++ ) {
                     if(IS_DIRECT(h->sub_mb_type[i])) continue;
                     if(IS_DIR(h->sub_mb_type[i], 0, list)){
-                        int rc = h->ref_count[list] << MB_MBAFF(h);
-                        if (rc > 1) {
+                        if (local_ref_count[list] > 1) {
                             ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
-                            if (ref[list][i] >= (unsigned) rc) {
-                                av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], rc);
+                            if (ref[list][i] >= (unsigned)local_ref_count[list]) {
+                                av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], local_ref_count[list]);
                                 return -1;
                             }
                         }else
@@ -2180,16 +2185,16 @@ decode_intra_mb:
         if(IS_16X16(mb_type)){
             for(list=0; list<h->list_count; list++){
                 if(IS_DIR(mb_type, 0, list)){
-                    int ref, rc = h->ref_count[list] << MB_MBAFF(h);
-                    if (rc > 1) {
+                    int ref;
+                    if (local_ref_count[list] > 1) {
                         ref= decode_cabac_mb_ref(h, list, 0);
-                        if (ref >= (unsigned) rc) {
-                            av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc);
+                        if (ref >= (unsigned)local_ref_count[list]) {
+                            av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
                             return -1;
                         }
                     }else
                         ref=0;
-                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
+                    fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
                 }
             }
             for(list=0; list<h->list_count; list++){
@@ -2208,11 +2213,11 @@ decode_intra_mb:
             for(list=0; list<h->list_count; list++){
                     for(i=0; i<2; i++){
                         if(IS_DIR(mb_type, i, list)){
-                            int ref, rc = h->ref_count[list] << MB_MBAFF(h);
-                            if (rc > 1) {
+                            int ref;
+                            if (local_ref_count[list] > 1) {
                                 ref= decode_cabac_mb_ref( h, list, 8*i );
-                                if (ref >= (unsigned) rc) {
-                                    av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc);
+                                if (ref >= (unsigned)local_ref_count[list]) {
+                                    av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
                                     return -1;
                                 }
                             }else
@@ -2239,15 +2244,15 @@ decode_intra_mb:
                 }
             }
         }else{
-            assert(IS_8X16(mb_type));
+            av_assert2(IS_8X16(mb_type));
             for(list=0; list<h->list_count; list++){
                     for(i=0; i<2; i++){
                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
-                            int ref, rc = h->ref_count[list] << MB_MBAFF(h);
-                            if (rc > 1) {
+                            int ref;
+                            if (local_ref_count[list] > 1) {
                                 ref= decode_cabac_mb_ref( h, list, 4*i );
-                                if (ref >= (unsigned) rc) {
-                                    av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, rc);
+                                if (ref >= (unsigned)local_ref_count[list]) {
+                                    av_log(h->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, local_ref_count[list]);
                                     return -1;
                                 }
                             }else
@@ -2285,6 +2290,11 @@ decode_intra_mb:
         cbp  = decode_cabac_mb_cbp_luma( h );
         if(decode_chroma)
             cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
+    } else {
+        if (!decode_chroma && cbp>15) {
+            av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
+            return AVERROR_INVALIDDATA;
+        }
     }
 
     h->cbp_table[mb_xy] = h->cbp = cbp;
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c
index 0ab0355..ca587a4 100644
--- a/libavcodec/h264_cavlc.c
+++ b/libavcodec/h264_cavlc.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
  */
 
 #define CABAC(h) 0
+#define UNCHECKED_BITSTREAM_READER 1
 
 #include "internal.h"
 #include "avcodec.h"
@@ -34,8 +35,8 @@
 #include "h264_mvpred.h"
 #include "golomb.h"
 #include "mpegutils.h"
+#include "libavutil/avassert.h"
 
-#include <assert.h>
 
 static const uint8_t golomb_to_inter_cbp_gray[16]={
  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
@@ -359,7 +360,7 @@ av_cold void ff_h264_decode_init_vlc(void){
          * the packed static coeff_token_vlc table sizes
          * were initialized correctly.
          */
-        assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
+        av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
 
         for(i=0; i<3; i++){
             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
@@ -478,7 +479,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, in
 
     trailing_ones= coeff_token&3;
     tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
-    assert(total_coeff<=16);
+    av_assert2(total_coeff<=16);
 
     i = show_bits(gb, 3);
     skip_bits(gb, trailing_ones);
@@ -510,7 +511,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, in
                 else
                     level_code= prefix + get_bits(gb, 4); //part
             }else{
-                level_code= 30 + get_bits(gb, prefix-3); //part
+                level_code= 30;
                 if(prefix>=16){
                     if(prefix > 25+3){
                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
@@ -518,6 +519,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, in
                     }
                     level_code += (1<<(prefix-3))-4096;
                 }
+                level_code += get_bits(gb, prefix-3); //part
             }
 
             if(trailing_ones < 3) level_code += 2;
@@ -547,9 +549,15 @@ static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, in
                 if(prefix<15){
                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
                 }else{
-                    level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
-                    if(prefix>=16)
+                    level_code = 15<<suffix_length;
+                    if (prefix>=16) {
+                        if(prefix > 25+3){
+                            av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
+                            return AVERROR_INVALIDDATA;
+                        }
                         level_code += (1<<(prefix-3))-4096;
+                    }
+                    level_code += get_bits(gb, prefix-3);
                 }
                 mask= -(level_code&1);
                 level_code= (((2+level_code)>>1) ^ mask) - mask;
@@ -564,13 +572,13 @@ static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, in
     else{
         if (max_coeff <= 8) {
             if (max_coeff == 4)
-                zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
+                zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
             else
-                zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
+                zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
         } else {
-            zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
+            zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
         }
     }
 
@@ -580,7 +588,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, in
         ((type*)block)[*scantable] = level[0]; \
         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
             if(zeros_left < 7) \
-                run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
+                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
             else \
                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
             zeros_left -= run_before; \
@@ -595,7 +603,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, in
         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
             if(zeros_left < 7) \
-                run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
+                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
             else \
                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
             zeros_left -= run_before; \
@@ -608,18 +616,17 @@ static int decode_residual(H264Context *h, GetBitContext *gb, int16_t *block, in
         } \
     }
 
-    if (zeros_left < 0) {
-        av_log(h->avctx, AV_LOG_ERROR,
-               "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
-        return AVERROR_INVALIDDATA;
-    }
-
     if (h->pixel_shift) {
         STORE_BLOCK(int32_t)
     } else {
         STORE_BLOCK(int16_t)
     }
 
+    if(zeros_left<0){
+        av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
+        return -1;
+    }
+
     return 0;
 }
 
@@ -635,7 +642,7 @@ static av_always_inline int decode_luma_residual(H264Context *h, GetBitContext *
             return -1; //FIXME continue if partitioned and other return -1 too
         }
 
-        assert((cbp&15) == 0 || (cbp&15) == 15);
+        av_assert2((cbp&15) == 0 || (cbp&15) == 15);
 
         if(cbp&15){
             for(i8x8=0; i8x8<4; i8x8++){
@@ -696,6 +703,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
     int dct8x8_allowed= h->pps.transform_8x8_mode;
     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
+    unsigned local_ref_count[2];
 
     mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
 
@@ -704,7 +712,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
                 down the code */
     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
         if(h->mb_skip_run==-1)
-            h->mb_skip_run= get_ue_golomb(&h->gb);
+            h->mb_skip_run= get_ue_golomb_long(&h->gb);
 
         if (h->mb_skip_run--) {
             if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
@@ -740,7 +748,7 @@ int ff_h264_decode_mb_cavlc(H264Context *h){
             goto decode_intra_mb;
         }
     }else{
-       assert(h->slice_type_nos == AV_PICTURE_TYPE_I);
+       av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_I);
         if(h->slice_type == AV_PICTURE_TYPE_SI && mb_type)
             mb_type--;
 decode_intra_mb:
@@ -780,6 +788,9 @@ decode_intra_mb:
         return 0;
     }
 
+    local_ref_count[0] = h->ref_count[0] << MB_MBAFF(h);
+    local_ref_count[1] = h->ref_count[1] << MB_MBAFF(h);
+
     fill_decode_neighbors(h, mb_type);
     fill_decode_caches(h, mb_type);
 
@@ -846,7 +857,7 @@ decode_intra_mb:
                 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
             }
         }else{
-            assert(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
+            av_assert2(h->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
             for(i=0; i<4; i++){
                 h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
                 if(h->sub_mb_type[i] >=4){
@@ -859,7 +870,7 @@ decode_intra_mb:
         }
 
         for(list=0; list<h->list_count; list++){
-            int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF(h);
+            int ref_count = IS_REF0(mb_type) ? 1 : local_ref_count[list];
             for(i=0; i<4; i++){
                 if(IS_DIRECT(h->sub_mb_type[i])) continue;
                 if(IS_DIR(h->sub_mb_type[i], 0, list)){
@@ -939,14 +950,13 @@ decode_intra_mb:
             for(list=0; list<h->list_count; list++){
                     unsigned int val;
                     if(IS_DIR(mb_type, 0, list)){
-                        int rc = h->ref_count[list] << MB_MBAFF(h);
-                        if (rc == 1) {
+                        if(local_ref_count[list]==1){
                             val= 0;
-                        } else if (rc == 2) {
+                        } else if(local_ref_count[list]==2){
                             val= get_bits1(&h->gb)^1;
                         }else{
                             val= get_ue_golomb_31(&h->gb);
-                            if (val >= rc) {
+                            if (val >= local_ref_count[list]){
                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                 return -1;
                             }
@@ -970,14 +980,13 @@ decode_intra_mb:
                     for(i=0; i<2; i++){
                         unsigned int val;
                         if(IS_DIR(mb_type, i, list)){
-                            int rc = h->ref_count[list] << MB_MBAFF(h);
-                            if (rc == 1) {
+                            if(local_ref_count[list] == 1) {
                                 val= 0;
-                            } else if (rc == 2) {
+                            } else if(local_ref_count[list] == 2) {
                                 val= get_bits1(&h->gb)^1;
                             }else{
                                 val= get_ue_golomb_31(&h->gb);
-                                if (val >= rc) {
+                                if (val >= local_ref_count[list]){
                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                     return -1;
                                 }
@@ -1003,19 +1012,18 @@ decode_intra_mb:
                 }
             }
         }else{
-            assert(IS_8X16(mb_type));
+            av_assert2(IS_8X16(mb_type));
             for(list=0; list<h->list_count; list++){
                     for(i=0; i<2; i++){
                         unsigned int val;
                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
-                            int rc = h->ref_count[list] << MB_MBAFF(h);
-                            if (rc == 1) {
+                            if(local_ref_count[list]==1){
                                 val= 0;
-                            } else if (rc == 2) {
+                            } else if(local_ref_count[list]==2){
                                 val= get_bits1(&h->gb)^1;
                             }else{
                                 val= get_ue_golomb_31(&h->gb);
-                                if (val >= rc) {
+                                if (val >= local_ref_count[list]){
                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                     return -1;
                                 }
@@ -1064,6 +1072,11 @@ decode_intra_mb:
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
             else                     cbp= golomb_to_inter_cbp_gray[cbp];
         }
+    } else {
+        if (!decode_chroma && cbp>15) {
+            av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
+            return AVERROR_INVALIDDATA;
+        }
     }
 
     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
@@ -1116,12 +1129,15 @@ decode_intra_mb:
             if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
                 return -1;
             }
-        } else if (CHROMA422(h)) {
+        } else {
+            const int num_c8x8 = h->sps.chroma_format_idc;
+
             if(cbp&0x30){
                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
                     if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
-                                        CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
-                                        NULL, 8) < 0) {
+                                        CHROMA_DC_BLOCK_INDEX+chroma_idx,
+                                        CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
+                                        NULL, 4*num_c8x8) < 0) {
                         return -1;
                     }
             }
@@ -1130,7 +1146,7 @@ decode_intra_mb:
                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
                     int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
-                    for (i8x8 = 0; i8x8 < 2; i8x8++) {
+                    for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
                             if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0)
@@ -1143,28 +1159,6 @@ decode_intra_mb:
                 fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
                 fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
             }
-        } else /* yuv420 */ {
-            if(cbp&0x30){
-                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                    if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
-                        return -1;
-                    }
-            }
-
-            if(cbp&0x20){
-                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
-                    const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
-                    for(i4x4=0; i4x4<4; i4x4++){
-                        const int index= 16 + 16*chroma_idx + i4x4;
-                        if( decode_residual(h, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
-                            return -1;
-                        }
-                    }
-                }
-            }else{
-                fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
-                fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
-            }
         }
     }else{
         fill_rectangle(&h->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);
diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index f983898..3289fe4 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -229,6 +229,7 @@ static void pred_spatial_direct_motion(H264Context *const h, int *mb_type)
                 else
                     mv[list] = AV_RN32A(C);
             }
+            av_assert2(ref[list] < (h->ref_count[list] << !!FRAME_MBAFF(h)));
         } else {
             int mask = ~(MB_TYPE_L0 << (2 * list));
             mv[list]  = 0;
@@ -314,8 +315,8 @@ single_col:
 
     await_reference_mb_row(h, &h->ref_list[1][0], mb_y);
 
-    l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]];
-    l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]];
+    l1mv0  = (void*)&h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]];
+    l1mv1  = (void*)&h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]];
     l1ref0 = &h->ref_list[1][0].ref_index[0][4 * mb_xy];
     l1ref1 = &h->ref_list[1][0].ref_index[1][4 * mb_xy];
     if (!b8_stride) {
@@ -537,8 +538,8 @@ single_col:
 
     await_reference_mb_row(h, &h->ref_list[1][0], mb_y);
 
-    l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]];
-    l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]];
+    l1mv0  = (void*)&h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]];
+    l1mv1  = (void*)&h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]];
     l1ref0 = &h->ref_list[1][0].ref_index[0][4 * mb_xy];
     l1ref1 = &h->ref_list[1][0].ref_index[1][4 * mb_xy];
     if (!b8_stride) {
diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c
index 717c225..8c052c3 100644
--- a/libavcodec/h264_loopfilter.c
+++ b/libavcodec/h264_loopfilter.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... loop filter
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,8 +34,6 @@
 #include "mpegutils.h"
 #include "rectangle.h"
 
-#include <assert.h>
-
 /* Deblocking filter (p153) */
 static const uint8_t alpha_table[52*3] = {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -243,7 +241,7 @@ static av_always_inline void h264_filter_mb_fast_internal(H264Context *h,
                                                           unsigned int uvlinesize,
                                                           int pixel_shift)
 {
-    int chroma = !(CONFIG_GRAY && (h->flags&CODEC_FLAG_GRAY));
+    int chroma = CHROMA(h) && !(CONFIG_GRAY && (h->flags&CODEC_FLAG_GRAY));
     int chroma444 = CHROMA444(h);
     int chroma422 = CHROMA422(h);
 
@@ -416,7 +414,7 @@ static av_always_inline void h264_filter_mb_fast_internal(H264Context *h,
 }
 
 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
-    assert(!FRAME_MBAFF(h));
+    av_assert2(!FRAME_MBAFF(h));
     if(!h->h264dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
         ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
         return;
@@ -580,7 +578,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
             // value in IPCM macroblocks.
             if(bS[0]+bS[1]+bS[2]+bS[3]){
                 qp = (h->cur_pic.qscale_table[mb_xy] + h->cur_pic.qscale_table[mbm_xy] + 1) >> 1;
+                //tprintf(h->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], h->cur_pic.qscale_table[mbn_xy]);
                 tprintf(h->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
+                //{ int i; for (i = 0; i < 4; i++) tprintf(h->avctx, " bS[%d]:%d", i, bS[i]); tprintf(h->avctx, "\n"); }
                 chroma_qp_avg[0] = (h->chroma_qp[0] + get_chroma_qp(h, 0, h->cur_pic.qscale_table[mbm_xy]) + 1) >> 1;
                 chroma_qp_avg[1] = (h->chroma_qp[1] + get_chroma_qp(h, 1, h->cur_pic.qscale_table[mbm_xy]) + 1) >> 1;
                 if( dir == 0 ) {
@@ -663,7 +663,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u
         // Do not use s->qscale as luma quantizer because it has not the same
         // value in IPCM macroblocks.
         qp = h->cur_pic.qscale_table[mb_xy];
+        //tprintf(h->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], h->cur_pic.qscale_table[mbn_xy]);
         tprintf(h->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
+        //{ int i; for (i = 0; i < 4; i++) tprintf(h->avctx, " bS[%d]:%d", i, bS[i]); tprintf(h->avctx, "\n"); }
         if( dir == 0 ) {
             filter_mb_edgev( &img_y[4*edge << h->pixel_shift], linesize, bS, qp, a, b, h, 0 );
             if (chroma) {
@@ -705,7 +707,7 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
     const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
     int first_vertical_edge_done = 0;
     av_unused int dir;
-    int chroma = !(CONFIG_GRAY && (h->flags&CODEC_FLAG_GRAY));
+    int chroma = CHROMA(h) && !(CONFIG_GRAY && (h->flags&CODEC_FLAG_GRAY));
     int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
     int a = 52 + h->slice_alpha_c0_offset - qp_bd_offset;
     int b = 52 + h->slice_beta_offset - qp_bd_offset;
diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c
index 61d68ab..191c01a 100644
--- a/libavcodec/h264_mb.c
+++ b/libavcodec/h264_mb.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -40,13 +40,13 @@ static inline int get_lowest_part_list_y(H264Context *h, H264Picture *pic, int n
                                          int height, int y_offset, int list)
 {
     int raw_my             = h->mv_cache[list][scan8[n]][1];
-    int filter_height_up   = (raw_my & 3) ? 2 : 0;
     int filter_height_down = (raw_my & 3) ? 3 : 0;
     int full_my            = (raw_my >> 2) + y_offset;
-    int top                = full_my - filter_height_up;
     int bottom             = full_my + filter_height_down + height;
 
-    return FFMAX(abs(top), bottom);
+    av_assert2(height >= 0);
+
+    return FFMAX(0, bottom);
 }
 
 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n,
@@ -118,7 +118,7 @@ static void await_references(H264Context *h)
     } else {
         int i;
 
-        assert(IS_8X8(mb_type));
+        av_assert2(IS_8X8(mb_type));
 
         for (i = 0; i < 4; i++) {
             const int sub_mb_type = h->sub_mb_type[i];
@@ -150,7 +150,7 @@ static void await_references(H264Context *h)
                                   nrefs);
             } else {
                 int j;
-                assert(IS_SUB_4X4(sub_mb_type));
+                av_assert2(IS_SUB_4X4(sub_mb_type));
                 for (j = 0; j < 4; j++) {
                     int sub_y_offset = y_offset + 2 * (j & 2);
                     get_lowest_part_y(h, refs, n + j, 4, sub_y_offset,
@@ -487,9 +487,7 @@ static av_always_inline void prefetch_motion(H264Context *h, int list,
             h->vdsp.prefetch(src[1] + off, h->linesize, 4);
             h->vdsp.prefetch(src[2] + off, h->linesize, 4);
         } else {
-            off = ((mx >> 1) << pixel_shift) +
-                  ((my >> 1) + (h->mb_x & 7)) * h->uvlinesize +
-                  (64 << pixel_shift);
+            off= (((mx>>1)+64)<<pixel_shift) + ((my>>1) + (h->mb_x&7))*h->uvlinesize;
             h->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
         }
     }
@@ -555,10 +553,8 @@ static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
             XCHG(h->top_borders[top_idx][h->mb_x + 1],
                  src_y + (17 << pixel_shift), 1);
         }
-    }
-    if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
-        if (chroma444) {
-            if (deblock_top) {
+        if (simple || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
+            if (chroma444) {
                 if (deblock_topleft) {
                     XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
                     XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
@@ -571,9 +567,7 @@ static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
                     XCHG(h->top_borders[top_idx][h->mb_x + 1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
                     XCHG(h->top_borders[top_idx][h->mb_x + 1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
                 }
-            }
-        } else {
-            if (deblock_top) {
+            } else {
                 if (deblock_topleft) {
                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
@@ -630,7 +624,12 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
                 uint8_t *const ptr = dest_y + block_offset[i];
                 const int dir      = h->intra4x4_pred_mode_cache[scan8[i]];
                 if (transform_bypass && h->sps.profile_idc == 244 && dir <= 1) {
-                    h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
+                    if (h->x264_build != -1) {
+                        h->hpc.pred8x8l_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift), linesize);
+                    } else
+                        h->hpc.pred8x8l_filter_add[dir](ptr, h->mb + (i * 16 + p * 256 << pixel_shift),
+                                                        (h-> topleft_samples_available << i) & 0x8000,
+                                                        (h->topright_samples_available << i) & 0x4000, linesize);
                 } else {
                     const int nnz = h->non_zero_count_cache[scan8[i + p * 16]];
                     h->hpc.pred8x8l[dir](ptr, (h->topleft_samples_available << i) & 0x8000,
@@ -663,7 +662,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
                     uint64_t tr_high;
                     if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
                         const int topright_avail = (h->topright_samples_available << i) & 0x8000;
-                        assert(h->mb_y || linesize <= block_offset[i]);
+                        av_assert2(h->mb_y || linesize <= block_offset[i]);
                         if (!topright_avail) {
                             if (pixel_shift) {
                                 tr_high  = ((uint16_t *)ptr)[3 - linesize / 2] * 0x0001000100010001ULL;
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index 5b2917f..7c9d72b 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -100,8 +100,8 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
     }
 
     if (!SIMPLE && IS_INTRA_PCM(mb_type)) {
+        const int bit_depth = h->sps.bit_depth_luma;
         if (PIXEL_SHIFT) {
-            const int bit_depth = h->sps.bit_depth_luma;
             int j;
             GetBitContext gb;
             init_get_bits(&gb, h->intra_pcm_ptr,
@@ -116,13 +116,10 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
                 if (!h->sps.chroma_format_idc) {
                     for (i = 0; i < block_h; i++) {
                         uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
-                        for (j = 0; j < 8; j++)
-                            tmp_cb[j] = 1 << (bit_depth - 1);
-                    }
-                    for (i = 0; i < block_h; i++) {
                         uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
-                        for (j = 0; j < 8; j++)
-                            tmp_cr[j] = 1 << (bit_depth - 1);
+                        for (j = 0; j < 8; j++) {
+                            tmp_cb[j] = tmp_cr[j] = 1 << (bit_depth - 1);
+                        }
                     }
                 } else {
                     for (i = 0; i < block_h; i++) {
@@ -142,9 +139,9 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
                 memcpy(dest_y + i * linesize, h->intra_pcm_ptr + i * 16, 16);
             if (SIMPLE || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) {
                 if (!h->sps.chroma_format_idc) {
-                    for (i = 0; i < block_h; i++) {
-                        memset(dest_cb + i * uvlinesize, 128, 8);
-                        memset(dest_cr + i * uvlinesize, 128, 8);
+                    for (i = 0; i < 8; i++) {
+                        memset(dest_cb + i * uvlinesize, 1 << (bit_depth - 1), 8);
+                        memset(dest_cr + i * uvlinesize, 1 << (bit_depth - 1), 8);
                     }
                 } else {
                     const uint8_t *src_cb = h->intra_pcm_ptr + 256;
diff --git a/libavcodec/h264_mc_template.c b/libavcodec/h264_mc_template.c
index c085cc3..0e4e477 100644
--- a/libavcodec/h264_mc_template.c
+++ b/libavcodec/h264_mc_template.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -72,7 +72,7 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
     const int mb_xy   = h->mb_xy;
     const int mb_type = h->cur_pic.mb_type[mb_xy];
 
-    assert(IS_INTER(mb_type));
+    av_assert2(IS_INTER(mb_type));
 
     if (HAVE_THREADS && (h->avctx->active_thread_type & FF_THREAD_FRAME))
         await_references(h);
@@ -104,7 +104,7 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
     } else {
         int i;
 
-        assert(IS_8X8(mb_type));
+        av_assert2(IS_8X8(mb_type));
 
         for (i = 0; i < 4; i++) {
             const int sub_mb_type = h->sub_mb_type[i];
@@ -142,7 +142,7 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
             } else {
                 int j;
-                assert(IS_SUB_4X4(sub_mb_type));
+                av_assert2(IS_SUB_4X4(sub_mb_type));
                 for (j = 0; j < 4; j++) {
                     int sub_x_offset = x_offset + 2 * (j & 1);
                     int sub_y_offset = y_offset + (j & 2);
diff --git a/libavcodec/h264_mp4toannexb_bsf.c b/libavcodec/h264_mp4toannexb_bsf.c
index 3b212e5..739ff95 100644
--- a/libavcodec/h264_mp4toannexb_bsf.c
+++ b/libavcodec/h264_mp4toannexb_bsf.c
@@ -2,20 +2,20 @@
  * H.264 MP4 to Annex B byte stream format filter
  * Copyright (c) 2007 Benoit Fouet <benoit.fouet@free.fr>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,8 @@
 
 typedef struct H264BSFContext {
     uint8_t  length_size;
-    uint8_t  first_idr;
+    uint8_t  new_idr;
+    uint8_t  idr_sps_pps_seen;
     int      extradata_parsed;
 } H264BSFContext;
 
@@ -69,17 +70,10 @@ static int h264_extradata_to_annexb(AVCodecContext *avctx, const int padding)
     static const uint8_t nalu_header[4] = { 0, 0, 0, 1 };
     int length_size = (*extradata++ & 0x3) + 1; // retrieve length coded size
 
-    if (length_size == 3)
-        return AVERROR(EINVAL);
-
     /* retrieve sps and pps unit(s) */
     unit_nb = *extradata++ & 0x1f; /* number of sps unit(s) */
     if (!unit_nb) {
-        unit_nb = *extradata++; /* number of pps unit(s) */
-        sps_done++;
-
-        if (unit_nb)
-            pps_seen = 1;
+        goto pps;
     } else {
         sps_seen = 1;
     }
@@ -89,9 +83,15 @@ static int h264_extradata_to_annexb(AVCodecContext *avctx, const int padding)
 
         unit_size   = AV_RB16(extradata);
         total_size += unit_size + 4;
-        if (total_size > INT_MAX - padding ||
-            extradata + 2 + unit_size > avctx->extradata +
-            avctx->extradata_size) {
+        if (total_size > INT_MAX - padding) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Too big extradata size, corrupted stream or invalid MP4/AVCC bitstream\n");
+            av_free(out);
+            return AVERROR(EINVAL);
+        }
+        if (extradata + 2 + unit_size > avctx->extradata + avctx->extradata_size) {
+            av_log(avctx, AV_LOG_ERROR, "Packet header is not contained in global extradata, "
+                   "corrupted stream or invalid MP4/AVCC bitstream\n");
             av_free(out);
             return AVERROR(EINVAL);
         }
@@ -100,7 +100,7 @@ static int h264_extradata_to_annexb(AVCodecContext *avctx, const int padding)
         memcpy(out + total_size - unit_size - 4, nalu_header, 4);
         memcpy(out + total_size - unit_size, extradata + 2, unit_size);
         extradata += 2 + unit_size;
-
+pps:
         if (!unit_nb && !sps_done++) {
             unit_nb = *extradata++; /* number of pps unit(s) */
             if (unit_nb)
@@ -135,6 +135,7 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
                                    int keyframe)
 {
     H264BSFContext *ctx = bsfc->priv_data;
+    int i;
     uint8_t unit_type;
     int32_t nal_size;
     uint32_t cumul_size    = 0;
@@ -154,22 +155,20 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
         if (ret < 0)
             return ret;
         ctx->length_size      = ret;
-        ctx->first_idr        = 1;
+        ctx->new_idr          = 1;
+        ctx->idr_sps_pps_seen = 0;
         ctx->extradata_parsed = 1;
     }
 
     *poutbuf_size = 0;
     *poutbuf      = NULL;
     do {
+        ret= AVERROR(EINVAL);
         if (buf + ctx->length_size > buf_end)
             goto fail;
 
-        if (ctx->length_size == 1) {
-            nal_size = buf[0];
-        } else if (ctx->length_size == 2) {
-            nal_size = AV_RB16(buf);
-        } else
-            nal_size = AV_RB32(buf);
+        for (nal_size = 0, i = 0; i<ctx->length_size; i++)
+            nal_size = (nal_size << 8) | buf[i];
 
         buf      += ctx->length_size;
         unit_type = *buf & 0x1f;
@@ -177,19 +176,30 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
         if (buf + nal_size > buf_end || nal_size < 0)
             goto fail;
 
-        /* prepend only to the first type 5 NAL unit of an IDR picture */
-        if (ctx->first_idr && unit_type == 5) {
-            if (alloc_and_copy(poutbuf, poutbuf_size,
+        if (unit_type == 7 || unit_type == 8)
+            ctx->idr_sps_pps_seen = 1;
+
+        /* if this is a new IDR picture following an IDR picture, reset the idr flag.
+         * Just check first_mb_in_slice to be 0 as this is the simplest solution.
+         * This could be checking idr_pic_id instead, but would complexify the parsing. */
+        if (!ctx->new_idr && unit_type == 5 && (buf[1] & 0x80))
+            ctx->new_idr = 1;
+
+        /* prepend only to the first type 5 NAL unit of an IDR picture, if no sps/pps are already present */
+        if (ctx->new_idr && unit_type == 5 && !ctx->idr_sps_pps_seen) {
+            if ((ret=alloc_and_copy(poutbuf, poutbuf_size,
                                avctx->extradata, avctx->extradata_size,
-                               buf, nal_size) < 0)
+                               buf, nal_size)) < 0)
                 goto fail;
-            ctx->first_idr = 0;
+            ctx->new_idr = 0;
         } else {
-            if (alloc_and_copy(poutbuf, poutbuf_size,
-                               NULL, 0, buf, nal_size) < 0)
+            if ((ret=alloc_and_copy(poutbuf, poutbuf_size,
+                               NULL, 0, buf, nal_size)) < 0)
                 goto fail;
-            if (!ctx->first_idr && unit_type == 1)
-                ctx->first_idr = 1;
+            if (!ctx->new_idr && unit_type == 1) {
+                ctx->new_idr = 1;
+                ctx->idr_sps_pps_seen = 0;
+            }
         }
 
         buf        += nal_size;
@@ -201,11 +211,11 @@ static int h264_mp4toannexb_filter(AVBitStreamFilterContext *bsfc,
 fail:
     av_freep(poutbuf);
     *poutbuf_size = 0;
-    return AVERROR(EINVAL);
+    return ret;
 }
 
 AVBitStreamFilter ff_h264_mp4toannexb_bsf = {
-    "h264_mp4toannexb",
-    sizeof(H264BSFContext),
-    h264_mp4toannexb_filter,
+    .name           = "h264_mp4toannexb",
+    .priv_data_size = sizeof(H264BSFContext),
+    .filter         = h264_mp4toannexb_filter,
 };
diff --git a/libavcodec/h264_mvpred.h b/libavcodec/h264_mvpred.h
index f7c3887..5f1e9a1 100644
--- a/libavcodec/h264_mvpred.h
+++ b/libavcodec/h264_mvpred.h
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... motion vector predicion
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,8 +32,8 @@
 #include "avcodec.h"
 #include "h264.h"
 #include "mpegutils.h"
+#include "libavutil/avassert.h"
 
-#include <assert.h>
 
 static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C,
                                               int i, int list, int part_width)
@@ -103,7 +103,7 @@ static av_always_inline void pred_motion(H264Context *const h, int n,
     const int16_t *C;
     int diagonal_ref, match_count;
 
-    assert(part_width == 1 || part_width == 2 || part_width == 4);
+    av_assert2(part_width == 1 || part_width == 2 || part_width == 4);
 
 /* mv_cache
  * B . . A T T T T
@@ -482,7 +482,7 @@ static void fill_decode_caches(H264Context *h, int mb_type)
                 } else {
                     int left_typei = h->cur_pic.mb_type[left_xy[LTOP] + h->mb_stride];
 
-                    assert(left_xy[LTOP] == left_xy[LBOT]);
+                    av_assert2(left_xy[LTOP] == left_xy[LBOT]);
                     if (!((left_typei & type_mask) && (left_type[LTOP] & type_mask))) {
                         h->topleft_samples_available &= 0xDF5F;
                         h->left_samples_available    &= 0x5F5F;
@@ -607,7 +607,7 @@ static void fill_decode_caches(H264Context *h, int mb_type)
             int16_t(*mv)[2]       = h->cur_pic.motion_val[list];
             if (!USES_LIST(mb_type, list))
                 continue;
-            assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
+            av_assert2(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
 
             if (USES_LIST(top_type, list)) {
                 const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride;
@@ -664,7 +664,7 @@ static void fill_decode_caches(H264Context *h, int mb_type)
                 ref_cache[4 - 1 * 8] = topright_type ? LIST_NOT_USED
                                                      : PART_NOT_AVAILABLE;
             }
-            if (ref_cache[4 - 1 * 8] < 0) {
+            if(ref_cache[2 - 1*8] < 0 || ref_cache[4 - 1 * 8] < 0) {
                 if (USES_LIST(topleft_type, list)) {
                     const int b_xy  = h->mb2b_xy[topleft_xy] + 3 + b_stride +
                                       (h->topleft_partition & 2 * b_stride);
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index 8ced0b8..1d4c2cf 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... parser
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,8 @@
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
+
 #include "libavutil/attributes.h"
 #include "parser.h"
 #include "h264data.h"
@@ -32,24 +34,40 @@
 #include "internal.h"
 #include "mpegutils.h"
 
-#include <assert.h>
-
 
 static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
                                int buf_size)
 {
-    int i;
+    int i, j;
     uint32_t state;
     ParseContext *pc = &h->parse_context;
+    int next_avc= h->is_avc ? 0 : buf_size;
+
 //    mb_addr= pc->mb_addr - 1;
     state = pc->state;
     if (state > 13)
         state = 7;
 
+    if (h->is_avc && !h->nal_length_size)
+        av_log(h->avctx, AV_LOG_ERROR, "AVC-parser: nal length size invalid\n");
+
     for (i = 0; i < buf_size; i++) {
+        if (i >= next_avc) {
+            int nalsize = 0;
+            i = next_avc;
+            for (j = 0; j < h->nal_length_size; j++)
+                nalsize = (nalsize << 8) | buf[i++];
+            if (nalsize <= 0 || nalsize > buf_size - i) {
+                av_log(h->avctx, AV_LOG_ERROR, "AVC-parser: nal size %d remaining %d\n", nalsize, buf_size - i);
+                return buf_size;
+            }
+            next_avc = i + nalsize;
+            state    = 5;
+        }
+
         if (state == 7) {
-            i += h->h264dsp.startcode_find_candidate(buf + i, buf_size - i);
-            if (i < buf_size)
+            i += h->h264dsp.startcode_find_candidate(buf + i, next_avc - i);
+            if (i < next_avc)
                 state = 2;
         } else if (state <= 2) {
             if (buf[i] == 1)
@@ -68,26 +86,40 @@ static int h264_find_frame_end(H264Context *h, const uint8_t *buf,
                 }
             } else if (nalu_type == NAL_SLICE || nalu_type == NAL_DPA ||
                        nalu_type == NAL_IDR_SLICE) {
+                state += 8;
+                continue;
+            }
+            state = 7;
+        } else {
+            h->parse_history[h->parse_history_count++]= buf[i];
+            if (h->parse_history_count>5) {
+                unsigned int mb, last_mb= h->parse_last_mb;
+                GetBitContext gb;
+
+                init_get_bits(&gb, h->parse_history, 8*h->parse_history_count);
+                h->parse_history_count=0;
+                mb= get_ue_golomb_long(&gb);
+                h->parse_last_mb= mb;
                 if (pc->frame_start_found) {
-                    state += 8;
-                    continue;
+                    if (mb <= last_mb)
+                        goto found;
                 } else
                     pc->frame_start_found = 1;
+                state = 7;
             }
-            state = 7;
-        } else {
-            if (buf[i] & 0x80)
-                goto found;
-            state = 7;
         }
     }
     pc->state = state;
+    if (h->is_avc)
+        return next_avc;
     return END_NOT_FOUND;
 
 found:
     pc->state             = 7;
     pc->frame_start_found = 0;
-    return i - (state & 5);
+    if (h->is_avc)
+        return next_avc;
+    return i - (state & 5) - 5 * (state > 7);
 }
 
 static int scan_mmco_reset(AVCodecParserContext *s)
@@ -178,6 +210,7 @@ static inline int parse_nal_units(AVCodecParserContext *s,
     unsigned int slice_type;
     int state = -1, got_reset = 0;
     const uint8_t *ptr;
+    int q264 = buf_size >=4 && !memcmp("Q264", buf, 4);
     int field_poc[2];
 
     /* set some sane default values */
@@ -187,17 +220,31 @@ static inline int parse_nal_units(AVCodecParserContext *s,
 
     h->avctx = avctx;
     ff_h264_reset_sei(h);
+    h->sei_fpa.frame_packing_arrangement_cancel_flag = -1;
 
     if (!buf_size)
         return 0;
 
     for (;;) {
-        int src_length, dst_length, consumed;
+        int src_length, dst_length, consumed, nalsize = 0;
+        if (h->is_avc) {
+            int i;
+            if (h->nal_length_size >= buf_end - buf) break;
+            nalsize = 0;
+            for (i = 0; i < h->nal_length_size; i++)
+                nalsize = (nalsize << 8) | *buf++;
+            if (nalsize <= 0 || nalsize > buf_end - buf) {
+                av_log(h->avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
+                break;
+            }
+            src_length = nalsize;
+        } else {
         buf = avpriv_find_start_code(buf, buf_end, &state);
         if (buf >= buf_end)
             break;
         --buf;
         src_length = buf_end - buf;
+        }
         switch (state & 0x1f) {
         case NAL_SLICE:
         case NAL_IDR_SLICE:
@@ -238,7 +285,7 @@ static inline int parse_nal_units(AVCodecParserContext *s,
             h->prev_poc_lsb          = 0;
         /* fall through */
         case NAL_SLICE:
-            get_ue_golomb(&h->gb);  // skip first_mb_in_slice
+            get_ue_golomb_long(&h->gb);  // skip first_mb_in_slice
             slice_type   = get_ue_golomb_31(&h->gb);
             s->pict_type = golomb_to_pict_type[slice_type % 5];
             if (h->sei_recovery_frame_cnt >= 0) {
@@ -265,6 +312,9 @@ static inline int parse_nal_units(AVCodecParserContext *s,
             h->sps       = *h->sps_buffers[h->pps.sps_id];
             h->frame_num = get_bits(&h->gb, h->sps.log2_max_frame_num);
 
+            if(h->sps.ref_frame_count <= 1 && h->pps.ref_count[0] <= 1 && s->pict_type == AV_PICTURE_TYPE_I)
+                s->key_frame = 1;
+
             avctx->profile = ff_h264_get_profile(&h->sps);
             avctx->level   = h->sps.level_idc;
 
@@ -389,10 +439,12 @@ static inline int parse_nal_units(AVCodecParserContext *s,
 
             return 0; /* no need to evaluate the rest */
         }
-        buf += consumed;
+        buf += h->is_avc ? nalsize : consumed;
     }
+    if (q264)
+        return 0;
     /* didn't find a picture! */
-    av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit\n");
+    av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit with size %d\n", buf_size);
     return -1;
 }
 
@@ -409,14 +461,13 @@ static int h264_parse(AVCodecParserContext *s,
         h->got_first = 1;
         if (avctx->extradata_size) {
             h->avctx = avctx;
-            // must be done like in the decoder.
-            // otherwise opening the parser, creating extradata,
-            // and then closing and opening again
+            // must be done like in decoder, otherwise opening the parser,
+            // letting it create extradata and then closing and opening again
             // will cause has_b_frames to be always set.
-            // NB: estimate_timings_from_pts behaves exactly like this.
+            // Note that estimate_timings_from_pts does exactly this.
             if (!avctx->has_b_frames)
                 h->low_delay = 1;
-            ff_h264_decode_extradata(h);
+            ff_h264_decode_extradata(h, avctx->extradata, avctx->extradata_size);
         }
     }
 
@@ -432,7 +483,7 @@ static int h264_parse(AVCodecParserContext *s,
         }
 
         if (next < 0 && next != END_NOT_FOUND) {
-            assert(pc->last_index + next >= 0);
+            av_assert1(pc->last_index + next >= 0);
             h264_find_frame_end(h, &pc->buffer[pc->last_index + next], -next); // update state
         }
     }
@@ -468,7 +519,7 @@ static int h264_split(AVCodecContext *avctx,
     for (i = 0; i <= buf_size; i++) {
         if ((state & 0xFFFFFF1F) == 0x107)
             has_sps = 1;
-        /*  if((state&0xFFFFFF1F) == 0x101 ||
+        /*  if ((state&0xFFFFFF1F) == 0x101 ||
          *     (state&0xFFFFFF1F) == 0x102 ||
          *     (state&0xFFFFFF1F) == 0x105) {
          *  }
diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index 1d7461b..b6bdae4 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -42,6 +42,7 @@
 #include "mpegutils.h"
 #include "rectangle.h"
 #include "thread.h"
+#include "vdpau_internal.h"
 
 void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
 {
@@ -115,7 +116,12 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
     dst->field_picture = src->field_picture;
     dst->needs_realloc = src->needs_realloc;
     dst->reference     = src->reference;
+    dst->crop          = src->crop;
+    dst->crop_left     = src->crop_left;
+    dst->crop_top      = src->crop_top;
     dst->recovered     = src->recovered;
+    dst->invalid_gap   = src->invalid_gap;
+    dst->sei_recovery_frame_cnt = src->sei_recovery_frame_cnt;
 
     return 0;
 fail:
@@ -123,11 +129,13 @@ fail:
     return ret;
 }
 
-#if CONFIG_ERROR_RESILIENCE
-static void h264_set_erpic(ERPicture *dst, H264Picture *src)
+void ff_h264_set_erpic(ERPicture *dst, H264Picture *src)
 {
+#if CONFIG_ERROR_RESILIENCE
     int i;
 
+    memset(dst, 0, sizeof(*dst));
+
     if (!src)
         return;
 
@@ -141,8 +149,8 @@ static void h264_set_erpic(ERPicture *dst, H264Picture *src)
 
     dst->mb_type = src->mb_type;
     dst->field_picture = src->field_picture;
-}
 #endif /* CONFIG_ERROR_RESILIENCE */
+}
 
 int ff_h264_field_end(H264Context *h, int in_setup)
 {
@@ -150,9 +158,9 @@ int ff_h264_field_end(H264Context *h, int in_setup)
     int err = 0;
     h->mb_y = 0;
 
-    if (!in_setup && !h->droppable)
-        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
-                                  h->picture_structure == PICT_BOTTOM_FIELD);
+    if (CONFIG_H264_VDPAU_DECODER &&
+        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
+        ff_vdpau_h264_set_reference_frames(h);
 
     if (in_setup || !(avctx->active_thread_type & FF_THREAD_FRAME)) {
         if (!h->droppable) {
@@ -171,6 +179,10 @@ int ff_h264_field_end(H264Context *h, int in_setup)
                    "hardware accelerator failed to decode picture\n");
     }
 
+    if (CONFIG_H264_VDPAU_DECODER &&
+        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
+        ff_vdpau_h264_picture_complete(h);
+
 #if CONFIG_ERROR_RESILIENCE
     /*
      * FIXME: Error handling code does not seem to support interlaced
@@ -184,16 +196,15 @@ int ff_h264_field_end(H264Context *h, int in_setup)
      * past end by one (callers fault) and resync_mb_y != 0
      * causes problems for the first MB line, too.
      */
-    if (!FIELD_PICTURE(h)) {
-        h264_set_erpic(&h->er.cur_pic, h->cur_pic_ptr);
-        h264_set_erpic(&h->er.last_pic,
-                       h->ref_count[0] ? &h->ref_list[0][0] : NULL);
-        h264_set_erpic(&h->er.next_pic,
-                       h->ref_count[1] ? &h->ref_list[1][0] : NULL);
+    if (!FIELD_PICTURE(h) && h->current_slice && !h->sps.new) {
+        ff_h264_set_erpic(&h->er.cur_pic, h->cur_pic_ptr);
         ff_er_frame_end(&h->er);
     }
 #endif /* CONFIG_ERROR_RESILIENCE */
 
+    if (!in_setup && !h->droppable)
+        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
+                                  h->picture_structure == PICT_BOTTOM_FIELD);
     emms_c();
 
     h->current_slice = 0;
diff --git a/libavcodec/h264_ps.c b/libavcodec/h264_ps.c
index 183cc44..9707332 100644
--- a/libavcodec/h264_ps.c
+++ b/libavcodec/h264_ps.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... parameter set decoding
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -70,13 +70,35 @@ static const AVRational pixel_aspect[17] = {
     QP(37, d), QP(37, d), QP(37, d), QP(38, d), QP(38, d), QP(38, d),   \
     QP(39, d), QP(39, d), QP(39, d), QP(39, d)
 
-const uint8_t ff_h264_chroma_qp[3][QP_MAX_NUM + 1] = {
+const uint8_t ff_h264_chroma_qp[7][QP_MAX_NUM + 1] = {
     { CHROMA_QP_TABLE_END(8) },
     { 0, 1, 2, 3, 4, 5,
       CHROMA_QP_TABLE_END(9) },
-    { 0, 1, 2, 3, 4, 5,
+    { 0, 1, 2, 3,  4,  5,
       6, 7, 8, 9, 10, 11,
       CHROMA_QP_TABLE_END(10) },
+    { 0,  1, 2, 3,  4,  5,
+      6,  7, 8, 9, 10, 11,
+      12,13,14,15, 16, 17,
+      CHROMA_QP_TABLE_END(11) },
+    { 0,  1, 2, 3,  4,  5,
+      6,  7, 8, 9, 10, 11,
+      12,13,14,15, 16, 17,
+      18,19,20,21, 22, 23,
+      CHROMA_QP_TABLE_END(12) },
+    { 0,  1, 2, 3,  4,  5,
+      6,  7, 8, 9, 10, 11,
+      12,13,14,15, 16, 17,
+      18,19,20,21, 22, 23,
+      24,25,26,27, 28, 29,
+      CHROMA_QP_TABLE_END(13) },
+    { 0,  1, 2, 3,  4,  5,
+      6,  7, 8, 9, 10, 11,
+      12,13,14,15, 16, 17,
+      18,19,20,21, 22, 23,
+      24,25,26,27, 28, 29,
+      30,31,32,33, 34, 35,
+      CHROMA_QP_TABLE_END(14) },
 };
 
 static const uint8_t default_scaling4[2][16] = {
@@ -182,6 +204,11 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps)
         get_ue_golomb(&h->gb);  /* chroma_sample_location_type_bottom_field */
     }
 
+    if (show_bits1(&h->gb) && get_bits_left(&h->gb) < 10) {
+        av_log(h->avctx, AV_LOG_WARNING, "Truncated VUI\n");
+        return 0;
+    }
+
     sps->timing_info_present_flag = get_bits1(&h->gb);
     if (sps->timing_info_present_flag) {
         sps->num_units_in_tick = get_bits_long(&h->gb, 32);
@@ -207,7 +234,8 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps)
         sps->vcl_hrd_parameters_present_flag)
         get_bits1(&h->gb);     /* low_delay_hrd_flag */
     sps->pic_struct_present_flag = get_bits1(&h->gb);
-
+    if (!get_bits_left(&h->gb))
+        return 0;
     sps->bitstream_restriction_flag = get_bits1(&h->gb);
     if (sps->bitstream_restriction_flag) {
         get_bits1(&h->gb);     /* motion_vectors_over_pic_boundaries_flag */
@@ -232,6 +260,7 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps)
             return AVERROR_INVALIDDATA;
         }
     }
+
     if (get_bits_left(&h->gb) < 0) {
         av_log(h->avctx, AV_LOG_ERROR,
                "Overread VUI by %d bits\n", -get_bits_left(&h->gb));
@@ -283,13 +312,11 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps,
         decode_scaling_list(h, scaling_matrix4[5], 16, default_scaling4[1], scaling_matrix4[4]); // Inter, Cb
         if (is_sps || pps->transform_8x8_mode) {
             decode_scaling_list(h, scaling_matrix8[0], 64, default_scaling8[0], fallback[2]); // Intra, Y
-            if (sps->chroma_format_idc == 3) {
-                decode_scaling_list(h, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0]); // Intra, Cr
-                decode_scaling_list(h, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1]); // Intra, Cb
-            }
             decode_scaling_list(h, scaling_matrix8[3], 64, default_scaling8[1], fallback[3]); // Inter, Y
             if (sps->chroma_format_idc == 3) {
+                decode_scaling_list(h, scaling_matrix8[1], 64, default_scaling8[0], scaling_matrix8[0]); // Intra, Cr
                 decode_scaling_list(h, scaling_matrix8[4], 64, default_scaling8[1], scaling_matrix8[3]); // Inter, Cr
+                decode_scaling_list(h, scaling_matrix8[2], 64, default_scaling8[0], scaling_matrix8[1]); // Intra, Cb
                 decode_scaling_list(h, scaling_matrix8[5], 64, default_scaling8[1], scaling_matrix8[4]); // Inter, Cb
             }
         }
@@ -327,10 +354,12 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
     sps->profile_idc          = profile_idc;
     sps->constraint_set_flags = constraint_set_flags;
     sps->level_idc            = level_idc;
+    sps->full_range           = -1;
 
     memset(sps->scaling_matrix4, 16, sizeof(sps->scaling_matrix4));
     memset(sps->scaling_matrix8, 16, sizeof(sps->scaling_matrix8));
     sps->scaling_matrix_present = 0;
+    sps->colorspace = 2; //AVCOL_SPC_UNSPECIFIED
 
     if (sps->profile_idc == 100 ||  // High profile
         sps->profile_idc == 110 ||  // High10 profile
@@ -344,12 +373,16 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
         sps->profile_idc == 138 ||  // Multiview Depth High profile (MVCD)
         sps->profile_idc == 144) {  // old High444 profile
         sps->chroma_format_idc = get_ue_golomb_31(&h->gb);
-        if (sps->chroma_format_idc > 3) {
+        if (sps->chroma_format_idc > 3U) {
             avpriv_request_sample(h->avctx, "chroma_format_idc %u",
                                   sps->chroma_format_idc);
             goto fail;
         } else if (sps->chroma_format_idc == 3) {
             sps->residual_color_transform_flag = get_bits1(&h->gb);
+            if (sps->residual_color_transform_flag) {
+                av_log(h->avctx, AV_LOG_ERROR, "separate color planes are not supported\n");
+                goto fail;
+            }
         }
         sps->bit_depth_luma   = get_ue_golomb(&h->gb) + 8;
         sps->bit_depth_chroma = get_ue_golomb(&h->gb) + 8;
@@ -358,6 +391,11 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
                                   "Different chroma and luma bit depth");
             goto fail;
         }
+        if (sps->bit_depth_luma > 14U || sps->bit_depth_chroma > 14U) {
+            av_log(h->avctx, AV_LOG_ERROR, "illegal bit depth value (%d, %d)\n",
+                   sps->bit_depth_luma, sps->bit_depth_chroma);
+            goto fail;
+        }
         sps->transform_bypass = get_bits1(&h->gb);
         decode_scaling_matrices(h, sps, NULL, 1,
                                 sps->scaling_matrix4, sps->scaling_matrix8);
@@ -380,7 +418,12 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
     sps->poc_type = get_ue_golomb_31(&h->gb);
 
     if (sps->poc_type == 0) { // FIXME #define
-        sps->log2_max_poc_lsb = get_ue_golomb(&h->gb) + 4;
+        unsigned t = get_ue_golomb(&h->gb);
+        if (t>12) {
+            av_log(h->avctx, AV_LOG_ERROR, "log2_max_poc_lsb (%d) is out of range\n", t);
+            goto fail;
+        }
+        sps->log2_max_poc_lsb = t + 4;
     } else if (sps->poc_type == 1) { // FIXME #define
         sps->delta_pic_order_always_zero_flag = get_bits1(&h->gb);
         sps->offset_for_non_ref_pic           = get_se_golomb(&h->gb);
@@ -402,8 +445,10 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
     }
 
     sps->ref_frame_count = get_ue_golomb_31(&h->gb);
+    if (h->avctx->codec_tag == MKTAG('S', 'M', 'V', '2'))
+        sps->ref_frame_count = FFMAX(2, sps->ref_frame_count);
     if (sps->ref_frame_count > H264_MAX_PICTURE_COUNT - 2 ||
-        sps->ref_frame_count >= 32U) {
+        sps->ref_frame_count > 16U) {
         av_log(h->avctx, AV_LOG_ERROR,
                "too many reference frames %d\n", sps->ref_frame_count);
         goto fail;
@@ -426,11 +471,6 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
         sps->mb_aff = 0;
 
     sps->direct_8x8_inference_flag = get_bits1(&h->gb);
-    if (!sps->frame_mbs_only_flag && !sps->direct_8x8_inference_flag) {
-        av_log(h->avctx, AV_LOG_ERROR,
-               "This stream was generated by a broken encoder, invalid 8x8 inference\n");
-        goto fail;
-    }
 
 #ifndef ALLOW_INTERLACE
     if (sps->mb_aff)
@@ -443,6 +483,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
         int crop_right  = get_ue_golomb(&h->gb);
         int crop_top    = get_ue_golomb(&h->gb);
         int crop_bottom = get_ue_golomb(&h->gb);
+        int width  = 16 * sps->mb_width;
+        int height = 16 * sps->mb_height * (2 - sps->frame_mbs_only_flag);
 
         if (h->avctx->flags2 & CODEC_FLAG2_IGNORE_CROP) {
             av_log(h->avctx, AV_LOG_DEBUG, "discarding sps cropping, original "
@@ -469,6 +511,17 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
                        crop_left);
             }
 
+            if (crop_left  > (unsigned)INT_MAX / 4 / step_x ||
+                crop_right > (unsigned)INT_MAX / 4 / step_x ||
+                crop_top   > (unsigned)INT_MAX / 4 / step_y ||
+                crop_bottom> (unsigned)INT_MAX / 4 / step_y ||
+                (crop_left + crop_right ) * step_x >= width ||
+                (crop_top  + crop_bottom) * step_y >= height
+            ) {
+                av_log(h->avctx, AV_LOG_ERROR, "crop values invalid %d %d %d %d / %d %d\n", crop_left, crop_right, crop_top, crop_bottom, width, height);
+                goto fail;
+            }
+
             sps->crop_left   = crop_left   * step_x;
             sps->crop_right  = crop_right  * step_x;
             sps->crop_top    = crop_top    * step_y;
@@ -485,7 +538,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
     sps->vui_parameters_present_flag = get_bits1(&h->gb);
     if (sps->vui_parameters_present_flag) {
         int ret = decode_vui_parameters(h, sps);
-        if (ret < 0 && h->avctx->err_recognition & AV_EF_EXPLODE)
+        if (ret < 0)
             goto fail;
     }
 
@@ -495,7 +548,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
     if (h->avctx->debug & FF_DEBUG_PICT_INFO) {
         static const char csp[4][5] = { "Gray", "420", "422", "444" };
         av_log(h->avctx, AV_LOG_DEBUG,
-               "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%u/%u/%u/%u %s %s %"PRId32"/%"PRId32"\n",
+               "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%u/%u/%u/%u %s %s %"PRId32"/%"PRId32" b%d reo:%d\n",
                sps_id, sps->profile_idc, sps->level_idc,
                sps->poc_type,
                sps->ref_frame_count,
@@ -507,13 +560,15 @@ int ff_h264_decode_seq_parameter_set(H264Context *h)
                sps->vui_parameters_present_flag ? "VUI" : "",
                csp[sps->chroma_format_idc],
                sps->timing_info_present_flag ? sps->num_units_in_tick : 0,
-               sps->timing_info_present_flag ? sps->time_scale : 0);
+               sps->timing_info_present_flag ? sps->time_scale : 0,
+               sps->bit_depth_luma,
+               sps->bitstream_restriction_flag ? sps->num_reorder_frames : -1
+               );
     }
     sps->new = 1;
 
     av_free(h->sps_buffers[sps_id]);
     h->sps_buffers[sps_id] = sps;
-    h->sps                 = *sps;
 
     return 0;
 
@@ -531,21 +586,32 @@ static void build_qp_table(PPS *pps, int t, int index, const int depth)
             ff_h264_chroma_qp[depth - 8][av_clip(i + index, 0, max_qp)];
 }
 
+static int more_rbsp_data_in_pps(H264Context *h, PPS *pps)
+{
+    const SPS *sps = h->sps_buffers[pps->sps_id];
+    int profile_idc = sps->profile_idc;
+
+    if ((profile_idc == 66 || profile_idc == 77 ||
+         profile_idc == 88) && (sps->constraint_set_flags & 7)) {
+        av_log(h->avctx, AV_LOG_VERBOSE,
+               "Current profile doesn't provide more RBSP data in PPS, skipping\n");
+        return 0;
+    }
+
+    return 1;
+}
+
 int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length)
 {
     unsigned int pps_id = get_ue_golomb(&h->gb);
     PPS *pps;
-    const int qp_bd_offset = 6 * (h->sps.bit_depth_luma - 8);
+    SPS *sps;
+    int qp_bd_offset;
     int bits_left;
 
     if (pps_id >= MAX_PPS_COUNT) {
         av_log(h->avctx, AV_LOG_ERROR, "pps_id %u out of range\n", pps_id);
         return AVERROR_INVALIDDATA;
-    } else if (h->sps.bit_depth_luma > 10) {
-        av_log(h->avctx, AV_LOG_ERROR,
-               "Unimplemented luma bit depth=%d (max=10)\n",
-               h->sps.bit_depth_luma);
-        return AVERROR_PATCHWELCOME;
     }
 
     pps = av_mallocz(sizeof(PPS));
@@ -557,6 +623,19 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length)
         av_log(h->avctx, AV_LOG_ERROR, "sps_id %u out of range\n", pps->sps_id);
         goto fail;
     }
+    sps = h->sps_buffers[pps->sps_id];
+    qp_bd_offset = 6 * (sps->bit_depth_luma - 8);
+    if (sps->bit_depth_luma > 14) {
+        av_log(h->avctx, AV_LOG_ERROR,
+               "Invalid luma bit depth=%d\n",
+               sps->bit_depth_luma);
+        goto fail;
+    } else if (sps->bit_depth_luma == 11 || sps->bit_depth_luma == 13) {
+        av_log(h->avctx, AV_LOG_ERROR,
+               "Unimplemented luma bit depth=%d\n",
+               sps->bit_depth_luma);
+        goto fail;
+    }
 
     pps->cabac             = get_bits1(&h->gb);
     pps->pic_order_present = get_bits1(&h->gb);
@@ -621,8 +700,7 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length)
            sizeof(pps->scaling_matrix8));
 
     bits_left = bit_length - get_bits_count(&h->gb);
-    if (bits_left && (bits_left > 8 ||
-                      show_bits(&h->gb, bits_left) != 1 << (bits_left - 1))) {
+    if (bits_left > 0 && more_rbsp_data_in_pps(h, pps)) {
         pps->transform_8x8_mode = get_bits1(&h->gb);
         decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0,
                                 pps->scaling_matrix4, pps->scaling_matrix8);
@@ -632,10 +710,8 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length)
         pps->chroma_qp_index_offset[1] = pps->chroma_qp_index_offset[0];
     }
 
-    build_qp_table(pps, 0, pps->chroma_qp_index_offset[0],
-                   h->sps.bit_depth_luma);
-    build_qp_table(pps, 1, pps->chroma_qp_index_offset[1],
-                   h->sps.bit_depth_luma);
+    build_qp_table(pps, 0, pps->chroma_qp_index_offset[0], sps->bit_depth_luma);
+    build_qp_table(pps, 1, pps->chroma_qp_index_offset[1], sps->bit_depth_luma);
     if (pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
         pps->chroma_qp_diff = 1;
 
diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c
index ffe1fcc..de1f60a 100644
--- a/libavcodec/h264_refs.c
+++ b/libavcodec/h264_refs.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... reference picture handling
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,7 @@
 
 #include <inttypes.h>
 
+#include "libavutil/avassert.h"
 #include "internal.h"
 #include "avcodec.h"
 #include "h264.h"
@@ -76,16 +77,18 @@ static int build_def_list(H264Picture *def, int def_len,
     int  i[2] = { 0 };
     int index = 0;
 
-    while ((i[0] < len || i[1] < len) && index < def_len) {
+    while (i[0] < len || i[1] < len) {
         while (i[0] < len && !(in[i[0]] && (in[i[0]]->reference & sel)))
             i[0]++;
         while (i[1] < len && !(in[i[1]] && (in[i[1]]->reference & (sel ^ 3))))
             i[1]++;
-        if (i[0] < len && index < def_len) {
+        if (i[0] < len) {
+            av_assert0(index < def_len);
             in[i[0]]->pic_id = is_long ? i[0] : in[i[0]]->frame_num;
             split_field_copy(&def[index++], in[i[0]++], sel, 1);
         }
-        if (i[1] < len && index < def_len) {
+        if (i[1] < len) {
+            av_assert0(index < def_len);
             in[i[1]]->pic_id = is_long ? i[1] : in[i[1]]->frame_num;
             split_field_copy(&def[index++], in[i[1]++], sel ^ 3, 0);
         }
@@ -133,13 +136,14 @@ int ff_h264_fill_default_ref_list(H264Context *h)
         for (list = 0; list < 2; list++) {
             len  = add_sorted(sorted,       h->short_ref, h->short_ref_count, cur_poc, 1 ^ list);
             len += add_sorted(sorted + len, h->short_ref, h->short_ref_count, cur_poc, 0 ^ list);
-            assert(len <= 32);
+            av_assert0(len <= 32);
 
             len  = build_def_list(h->default_ref_list[list], FF_ARRAY_ELEMS(h->default_ref_list[0]),
                                   sorted, len, 0, h->picture_structure);
             len += build_def_list(h->default_ref_list[list] + len,
                                   FF_ARRAY_ELEMS(h->default_ref_list[0]) - len,
                                   h->long_ref, 16, 1, h->picture_structure);
+            av_assert0(len <= 32);
 
             if (len < h->ref_count[list])
                 memset(&h->default_ref_list[list][len], 0, sizeof(H264Picture) * (h->ref_count[list] - len));
@@ -163,6 +167,7 @@ int ff_h264_fill_default_ref_list(H264Context *h)
         len += build_def_list(h->default_ref_list[0] + len,
                               FF_ARRAY_ELEMS(h->default_ref_list[0]) - len,
                               h-> long_ref, 16, 1, h->picture_structure);
+        av_assert0(len <= 32);
 
         if (len < h->ref_count[0])
             memset(&h->default_ref_list[0][len], 0, sizeof(H264Picture) * (h->ref_count[0] - len));
@@ -324,13 +329,19 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h)
     }
     for (list = 0; list < h->list_count; list++) {
         for (index = 0; index < h->ref_count[list]; index++) {
-            if (!h->ref_list[list][index].f.buf[0]) {
-                av_log(h->avctx, AV_LOG_ERROR, "Missing reference picture\n");
-                if (h->default_ref_list[list][0].f.buf[0])
+            if (   !h->ref_list[list][index].f.buf[0]
+                || (!FIELD_PICTURE(h) && (h->ref_list[list][index].reference&3) != 3)) {
+                int i;
+                av_log(h->avctx, AV_LOG_ERROR, "Missing reference picture, default is %d\n", h->default_ref_list[list][0].poc);
+                for (i = 0; i < FF_ARRAY_ELEMS(h->last_pocs); i++)
+                    h->last_pocs[i] = INT_MIN;
+                if (h->default_ref_list[list][0].f.buf[0]
+                    && !(!FIELD_PICTURE(h) && (h->default_ref_list[list][0].reference&3) != 3))
                     COPY_PICTURE(&h->ref_list[list][index], &h->default_ref_list[list][0]);
                 else
                     return -1;
             }
+            av_assert0(av_buffer_get_ref_count(h->ref_list[list][index].f.buf[0]) > 0);
         }
     }
 
@@ -340,7 +351,7 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h)
 void ff_h264_fill_mbaff_ref_list(H264Context *h)
 {
     int list, i, j;
-    for (list = 0; list < 2; list++) { //FIXME try list_count
+    for (list = 0; list < h->list_count; list++) {
         for (i = 0; i < h->ref_count[list]; i++) {
             H264Picture *frame = &h->ref_list[list][i];
             H264Picture *field = &h->ref_list[list][16 + 2 * i];
@@ -487,6 +498,9 @@ void ff_h264_remove_all_refs(H264Context *h)
         h->short_ref[i] = NULL;
     }
     h->short_ref_count = 0;
+
+    memset(h->default_ref_list, 0, sizeof(h->default_ref_list));
+    memset(h->ref_list, 0, sizeof(h->ref_list));
 }
 
 /**
@@ -528,8 +542,11 @@ static int check_opcodes(MMCO *mmco1, MMCO *mmco2, int n_mmcos)
     int i;
 
     for (i = 0; i < n_mmcos; i++) {
-        if (mmco1[i].opcode != mmco2[i].opcode)
+        if (mmco1[i].opcode != mmco2[i].opcode) {
+            av_log(NULL, AV_LOG_ERROR, "MMCO opcode [%d, %d] at %d mismatches between slices\n",
+                   mmco1[i].opcode, mmco2[i].opcode, i);
             return -1;
+        }
     }
 
     return 0;
@@ -540,10 +557,8 @@ int ff_generate_sliding_window_mmcos(H264Context *h, int first_slice)
     MMCO mmco_temp[MAX_MMCO_COUNT], *mmco = first_slice ? h->mmco : mmco_temp;
     int mmco_index = 0, i = 0;
 
-    assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
-
     if (h->short_ref_count &&
-        h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
+        h->long_ref_count + h->short_ref_count >= h->sps.ref_frame_count &&
         !(FIELD_PICTURE(h) && !h->first_field && h->cur_pic_ptr->reference)) {
         mmco[0].opcode        = MMCO_SHORT2UNUSED;
         mmco[0].short_pic_num = h->short_ref[h->short_ref_count - 1]->frame_num;
@@ -562,8 +577,8 @@ int ff_generate_sliding_window_mmcos(H264Context *h, int first_slice)
                (mmco_index != h->mmco_index ||
                 (i = check_opcodes(h->mmco, mmco_temp, mmco_index)))) {
         av_log(h->avctx, AV_LOG_ERROR,
-               "Inconsistent MMCO state between slices [%d, %d, %d]\n",
-               mmco_index, h->mmco_index, i);
+               "Inconsistent MMCO state between slices [%d, %d]\n",
+               mmco_index, h->mmco_index);
         return AVERROR_INVALIDDATA;
     }
     return 0;
@@ -572,6 +587,7 @@ int ff_generate_sliding_window_mmcos(H264Context *h, int first_slice)
 int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
 {
     int i, av_uninit(j);
+    int pps_count;
     int current_ref_assigned = 0, err = 0;
     H264Picture *av_uninit(pic);
 
@@ -592,7 +608,7 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
                 if (mmco[i].opcode != MMCO_SHORT2LONG ||
                     !h->long_ref[mmco[i].long_arg]    ||
                     h->long_ref[mmco[i].long_arg]->frame_num != frame_num) {
-                    av_log(h->avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
+                    av_log(h->avctx, h->short_ref_count ? AV_LOG_ERROR : AV_LOG_DEBUG, "mmco: unref short failure\n");
                     err = AVERROR_INVALIDDATA;
                 }
                 continue;
@@ -633,10 +649,21 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
                      * Report the problem and keep the pair where it is,
                      * and mark this field valid.
                      */
-            if (h->short_ref[0] == h->cur_pic_ptr)
+            if (h->short_ref[0] == h->cur_pic_ptr) {
+                av_log(h->avctx, AV_LOG_ERROR, "mmco: cannot assign current picture to short and long at the same time\n");
                 remove_short_at_index(h, 0);
+            }
 
             if (h->long_ref[mmco[i].long_arg] != h->cur_pic_ptr) {
+                if (h->cur_pic_ptr->long_ref) {
+                    for(j=0; j<16; j++) {
+                        if(h->long_ref[j] == h->cur_pic_ptr) {
+                            remove_long(h, j, 0);
+                            av_log(h->avctx, AV_LOG_ERROR, "mmco: cannot assign current picture to 2 long term references\n");
+                        }
+                    }
+                }
+                av_assert0(!h->cur_pic_ptr->long_ref);
                 remove_long(h, mmco[i].long_arg, 0);
 
                 h->long_ref[mmco[i].long_arg]           = h->cur_pic_ptr;
@@ -664,6 +691,8 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
             h->frame_num  = h->cur_pic_ptr->frame_num = 0;
             h->mmco_reset = 1;
             h->cur_pic_ptr->mmco_reset = 1;
+            for (j = 0; j < MAX_DELAYED_PIC_COUNT; j++)
+                h->last_pocs[j] = INT_MIN;
             break;
         default: assert(0);
         }
@@ -702,8 +731,7 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
         }
     }
 
-    if (h->long_ref_count + h->short_ref_count -
-        (h->short_ref[0] == h->cur_pic_ptr) > h->sps.ref_frame_count) {
+    if (h->long_ref_count + h->short_ref_count > FFMAX(h->sps.ref_frame_count, 1)) {
 
         /* We have too many reference frames, probably due to corrupted
          * stream. Need to discard one frame. Prevents overrun of the
@@ -728,8 +756,32 @@ int ff_h264_execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count)
         }
     }
 
+    for (i = 0; i<h->short_ref_count; i++) {
+        pic = h->short_ref[i];
+        if (pic->invalid_gap) {
+            int d = (h->cur_pic_ptr->frame_num - pic->frame_num) & ((1 << h->sps.log2_max_frame_num)-1);
+            if (d > h->sps.ref_frame_count)
+                remove_short(h, pic->frame_num, 0);
+        }
+    }
+
     print_short_term(h);
     print_long_term(h);
+
+    pps_count = 0;
+    for (i = 0; i < FF_ARRAY_ELEMS(h->pps_buffers); i++)
+        pps_count += !!h->pps_buffers[i];
+
+    if (   err >= 0
+        && h->long_ref_count==0
+        && (h->short_ref_count<=2 || h->pps.ref_count[0] <= 1 && h->pps.ref_count[1] <= 1 && pps_count == 1)
+        && h->pps.ref_count[0]<=2 + (h->picture_structure != PICT_FRAME) + (2*!h->has_recovery_point)
+        && h->cur_pic_ptr->f.pict_type == AV_PICTURE_TYPE_I){
+        h->cur_pic_ptr->recovered |= 1;
+        if(!h->avctx->has_b_frames)
+            h->frame_recovered |= FRAME_RECOVERED_SEI;
+    }
+
     return (h->avctx->err_recognition & AV_EF_EXPLODE) ? err : 0;
 }
 
@@ -737,7 +789,7 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb,
                                    int first_slice)
 {
     int i, ret;
-    MMCO mmco_temp[MAX_MMCO_COUNT], *mmco = first_slice ? h->mmco : mmco_temp;
+    MMCO mmco_temp[MAX_MMCO_COUNT], *mmco = mmco_temp;
     int mmco_index = 0;
 
     if (h->nal_unit_type == NAL_IDR_SLICE) { // FIXME fields
@@ -803,6 +855,7 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb,
     }
 
     if (first_slice && mmco_index != -1) {
+        memcpy(h->mmco, mmco_temp, sizeof(h->mmco));
         h->mmco_index = mmco_index;
     } else if (!first_slice && mmco_index >= 0 &&
                (mmco_index != h->mmco_index ||
diff --git a/libavcodec/h264_sei.c b/libavcodec/h264_sei.c
index 52ff2ff..aa889b8 100644
--- a/libavcodec/h264_sei.c
+++ b/libavcodec/h264_sei.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... sei decoding
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -46,14 +46,20 @@ void ff_h264_reset_sei(H264Context *h)
 
 static int decode_picture_timing(H264Context *h)
 {
-    if (h->sps.nal_hrd_parameters_present_flag ||
-        h->sps.vcl_hrd_parameters_present_flag) {
-        h->sei_cpb_removal_delay = get_bits(&h->gb,
-                                            h->sps.cpb_removal_delay_length);
-        h->sei_dpb_output_delay  = get_bits(&h->gb,
-                                            h->sps.dpb_output_delay_length);
+    SPS *sps = &h->sps;
+    int i;
+
+    for (i = 0; i<MAX_SPS_COUNT; i++)
+        if (!sps->log2_max_frame_num && h->sps_buffers[i])
+            sps = h->sps_buffers[i];
+
+    if (sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag) {
+        h->sei_cpb_removal_delay = get_bits_long(&h->gb,
+                                                 sps->cpb_removal_delay_length);
+        h->sei_dpb_output_delay  = get_bits_long(&h->gb,
+                                                 sps->dpb_output_delay_length);
     }
-    if (h->sps.pic_struct_present_flag) {
+    if (sps->pic_struct_present_flag) {
         unsigned int i, num_clock_ts;
 
         h->sei_pic_struct = get_bits(&h->gb, 4);
@@ -89,9 +95,9 @@ static int decode_picture_timing(H264Context *h)
                         }
                     }
                 }
-                if (h->sps.time_offset_length > 0)
+                if (sps->time_offset_length > 0)
                     skip_bits(&h->gb,
-                              h->sps.time_offset_length); /* time_offset */
+                              sps->time_offset_length); /* time_offset */
             }
         }
 
@@ -102,6 +108,43 @@ static int decode_picture_timing(H264Context *h)
     return 0;
 }
 
+static int decode_user_data_itu_t_t35(H264Context *h, int size)
+{
+    uint32_t user_identifier;
+    int dtg_active_format;
+
+    if (size < 7)
+        return -1;
+    size -= 7;
+
+    skip_bits(&h->gb, 8);   // country_code
+    skip_bits(&h->gb, 16);  // provider_code
+    user_identifier = get_bits_long(&h->gb, 32);
+
+    switch (user_identifier) {
+        case 0x44544731:    // "DTG1" - AFD_data
+            if (size < 1)
+                return -1;
+            skip_bits(&h->gb, 1);
+            if (get_bits(&h->gb, 1)) {
+                skip_bits(&h->gb, 6);
+                if (size < 2)
+                    return -1;
+                skip_bits(&h->gb, 4);
+                dtg_active_format = get_bits(&h->gb, 4);
+                h->avctx->dtg_active_format = dtg_active_format;
+            } else {
+                skip_bits(&h->gb, 6);
+            }
+            break;
+        default:
+            skip_bits(&h->gb, size * 8);
+            break;
+    }
+
+    return 0;
+}
+
 static int decode_unregistered_user_data(H264Context *h, int size)
 {
     uint8_t user_data[16 + 256];
@@ -117,6 +160,8 @@ static int decode_unregistered_user_data(H264Context *h, int size)
     e = sscanf(user_data + 16, "x264 - core %d", &build);
     if (e == 1 && build > 0)
         h->x264_build = build;
+    if (e == 1 && build == 1 && !strncmp(user_data+16, "x264 - core 0000", 16))
+        h->x264_build = 67;
 
     if (h->avctx->debug & FF_DEBUG_BUGS)
         av_log(h->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data + 16);
@@ -136,6 +181,11 @@ static int decode_recovery_point(H264Context *h)
      * 2b changing_slice_group_idc */
     skip_bits(&h->gb, 4);
 
+    if (h->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(h->avctx, AV_LOG_DEBUG, "sei_recovery_frame_cnt: %d\n", h->sei_recovery_frame_cnt);
+
+    h->has_recovery_point = 1;
+
     return 0;
 }
 
@@ -157,7 +207,7 @@ static int decode_buffering_period(H264Context *h)
     if (sps->nal_hrd_parameters_present_flag) {
         for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
             h->initial_cpb_removal_delay[sched_sel_idx] =
-                get_bits(&h->gb, sps->initial_cpb_removal_delay_length);
+                get_bits_long(&h->gb, sps->initial_cpb_removal_delay_length);
             // initial_cpb_removal_delay_offset
             skip_bits(&h->gb, sps->initial_cpb_removal_delay_length);
         }
@@ -165,7 +215,7 @@ static int decode_buffering_period(H264Context *h)
     if (sps->vcl_hrd_parameters_present_flag) {
         for (sched_sel_idx = 0; sched_sel_idx < sps->cpb_cnt; sched_sel_idx++) {
             h->initial_cpb_removal_delay[sched_sel_idx] =
-                get_bits(&h->gb, sps->initial_cpb_removal_delay_length);
+                get_bits_long(&h->gb, sps->initial_cpb_removal_delay_length);
             // initial_cpb_removal_delay_offset
             skip_bits(&h->gb, sps->initial_cpb_removal_delay_length);
         }
@@ -177,12 +227,16 @@ static int decode_buffering_period(H264Context *h)
 
 static int decode_frame_packing_arrangement(H264Context *h)
 {
-    get_ue_golomb(&h->gb);              // frame_packing_arrangement_id
-    h->sei_frame_packing_present = !get_bits1(&h->gb);
+    h->sei_fpa.frame_packing_arrangement_id          = get_ue_golomb(&h->gb);
+    h->sei_fpa.frame_packing_arrangement_cancel_flag = get_bits1(&h->gb);
+    h->sei_frame_packing_present = !h->sei_fpa.frame_packing_arrangement_cancel_flag;
 
     if (h->sei_frame_packing_present) {
+        h->sei_fpa.frame_packing_arrangement_type =
         h->frame_packing_arrangement_type = get_bits(&h->gb, 7);
+        h->sei_fpa.quincunx_sampling_flag         =
         h->quincunx_subsampling           = get_bits1(&h->gb);
+        h->sei_fpa.content_interpretation_type    =
         h->content_interpretation_type    = get_bits(&h->gb, 6);
 
         // the following skips: spatial_flipping_flag, frame0_flipped_flag,
@@ -193,10 +247,19 @@ static int decode_frame_packing_arrangement(H264Context *h)
         if (!h->quincunx_subsampling && h->frame_packing_arrangement_type != 5)
             skip_bits(&h->gb, 16);      // frame[01]_grid_position_[xy]
         skip_bits(&h->gb, 8);           // frame_packing_arrangement_reserved_byte
-        get_ue_golomb(&h->gb);          // frame_packing_arrangement_repetition_period
+        h->sei_fpa.frame_packing_arrangement_repetition_period = get_ue_golomb(&h->gb) /* frame_packing_arrangement_repetition_period */;
     }
     skip_bits1(&h->gb);                 // frame_packing_arrangement_extension_flag
 
+    if (h->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(h->avctx, AV_LOG_DEBUG, "SEI FPA %d %d %d %d %d %d\n",
+                                       h->sei_fpa.frame_packing_arrangement_id,
+                                       h->sei_fpa.frame_packing_arrangement_cancel_flag,
+                                       h->sei_fpa.frame_packing_arrangement_type,
+                                       h->sei_fpa.quincunx_sampling_flag,
+                                       h->sei_fpa.content_interpretation_type,
+                                       h->sei_fpa.frame_packing_arrangement_repetition_period);
+
     return 0;
 }
 
@@ -219,29 +282,32 @@ static int decode_display_orientation(H264Context *h)
 int ff_h264_decode_sei(H264Context *h)
 {
     while (get_bits_left(&h->gb) > 16) {
-        int size = 0;
         int type = 0;
+        unsigned size = 0;
+        unsigned next;
         int ret  = 0;
-        int last = 0;
 
-        while (get_bits_left(&h->gb) >= 8 &&
-               (last = get_bits(&h->gb, 8)) == 255) {
-            type += 255;
-        }
-        type += last;
+        do {
+            if (get_bits_left(&h->gb) < 8)
+                return AVERROR_INVALIDDATA;
+            type += show_bits(&h->gb, 8);
+        } while (get_bits(&h->gb, 8) == 255);
 
-        last = 0;
-        while (get_bits_left(&h->gb) >= 8 &&
-               (last = get_bits(&h->gb, 8)) == 255) {
-            size += 255;
-        }
-        size += last;
+        do {
+            if (get_bits_left(&h->gb) < 8)
+                return AVERROR_INVALIDDATA;
+            size += show_bits(&h->gb, 8);
+        } while (get_bits(&h->gb, 8) == 255);
+
+        if (h->avctx->debug&FF_DEBUG_STARTCODE)
+            av_log(h->avctx, AV_LOG_DEBUG, "SEI %d len:%d\n", type, size);
 
         if (size > get_bits_left(&h->gb) / 8) {
-            av_log(h->avctx, AV_LOG_ERROR, "SEI type %d truncated at %d\n",
-                   type, get_bits_left(&h->gb));
+            av_log(h->avctx, AV_LOG_ERROR, "SEI type %d size %d truncated at %d\n",
+                   type, 8*size, get_bits_left(&h->gb));
             return AVERROR_INVALIDDATA;
         }
+        next = get_bits_count(&h->gb) + 8 * size;
 
         switch (type) {
         case SEI_TYPE_PIC_TIMING: // Picture timing SEI
@@ -249,6 +315,10 @@ int ff_h264_decode_sei(H264Context *h)
             if (ret < 0)
                 return ret;
             break;
+        case SEI_TYPE_USER_DATA_ITU_T_T35:
+            if (decode_user_data_itu_t_t35(h, size) < 0)
+                return -1;
+            break;
         case SEI_TYPE_USER_DATA_UNREGISTERED:
             ret = decode_unregistered_user_data(h, size);
             if (ret < 0)
@@ -276,8 +346,8 @@ int ff_h264_decode_sei(H264Context *h)
             break;
         default:
             av_log(h->avctx, AV_LOG_DEBUG, "unknown SEI type %d\n", type);
-            skip_bits(&h->gb, 8 * size);
         }
+        skip_bits_long(&h->gb, next - get_bits_count(&h->gb));
 
         // FIXME check bits here
         align_get_bits(&h->gb);
@@ -285,3 +355,48 @@ int ff_h264_decode_sei(H264Context *h)
 
     return 0;
 }
+
+const char* ff_h264_sei_stereo_mode(H264Context *h)
+{
+    if (h->sei_fpa.frame_packing_arrangement_cancel_flag == 0) {
+        switch (h->sei_fpa.frame_packing_arrangement_type) {
+            case SEI_FPA_TYPE_CHECKERBOARD:
+                if (h->sei_fpa.content_interpretation_type == 2)
+                    return "checkerboard_rl";
+                else
+                    return "checkerboard_lr";
+            case SEI_FPA_TYPE_INTERLEAVE_COLUMN:
+                if (h->sei_fpa.content_interpretation_type == 2)
+                    return "col_interleaved_rl";
+                else
+                    return "col_interleaved_lr";
+            case SEI_FPA_TYPE_INTERLEAVE_ROW:
+                if (h->sei_fpa.content_interpretation_type == 2)
+                    return "row_interleaved_rl";
+                else
+                    return "row_interleaved_lr";
+            case SEI_FPA_TYPE_SIDE_BY_SIDE:
+                if (h->sei_fpa.content_interpretation_type == 2)
+                    return "right_left";
+                else
+                    return "left_right";
+            case SEI_FPA_TYPE_TOP_BOTTOM:
+                if (h->sei_fpa.content_interpretation_type == 2)
+                    return "bottom_top";
+                else
+                    return "top_bottom";
+            case SEI_FPA_TYPE_INTERLEAVE_TEMPORAL:
+                if (h->sei_fpa.content_interpretation_type == 2)
+                    return "block_rl";
+                else
+                    return "block_lr";
+            case SEI_FPA_TYPE_2D:
+            default:
+                return "mono";
+        }
+    } else if (h->sei_fpa.frame_packing_arrangement_cancel_flag == 1) {
+        return "mono";
+    } else {
+        return NULL;
+    }
+}
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index ce62fbf..f1fbbdc 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,23 +47,27 @@
 static const uint8_t rem6[QP_MAX_NUM + 1] = {
     0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
     3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
-    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+    0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
+    3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
+    0, 1, 2, 3,
 };
 
 static const uint8_t div6[QP_MAX_NUM + 1] = {
     0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
     3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
-    7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
+    7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10,
+   10,10,10,11,11,11,11,11,11,12,12,12,12,12,12,13,13,13, 13, 13, 13,
+   14,14,14,14,
 };
 
-static const uint8_t field_scan[16] = {
+static const uint8_t field_scan[16+1] = {
     0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4,
     0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
     2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4,
     3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4,
 };
 
-static const uint8_t field_scan8x8[64] = {
+static const uint8_t field_scan8x8[64+1] = {
     0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8,
     1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8,
     2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8,
@@ -82,7 +86,7 @@ static const uint8_t field_scan8x8[64] = {
     7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8,
 };
 
-static const uint8_t field_scan8x8_cavlc[64] = {
+static const uint8_t field_scan8x8_cavlc[64+1] = {
     0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8,
     2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8,
     3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8,
@@ -102,7 +106,7 @@ static const uint8_t field_scan8x8_cavlc[64] = {
 };
 
 // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
-static const uint8_t zigzag_scan8x8_cavlc[64] = {
+static const uint8_t zigzag_scan8x8_cavlc[64+1] = {
     0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8,
     4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8,
     3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8,
@@ -255,6 +259,9 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
 
     h->linesize   = pic->f.linesize[0];
     h->uvlinesize = pic->f.linesize[1];
+    pic->crop     = h->sps.crop;
+    pic->crop_top = h->sps.crop_top;
+    pic->crop_left= h->sps.crop_left;
 
     if (h->avctx->hwaccel) {
         const AVHWAccel *hwaccel = h->avctx->hwaccel;
@@ -266,6 +273,18 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
             pic->hwaccel_picture_private = pic->hwaccel_priv_buf->data;
         }
     }
+    if (!h->avctx->hwaccel && CONFIG_GRAY && h->flags & CODEC_FLAG_GRAY && pic->f.data[2]) {
+        int h_chroma_shift, v_chroma_shift;
+        av_pix_fmt_get_chroma_sub_sample(pic->f.format,
+                                         &h_chroma_shift, &v_chroma_shift);
+
+        for(i=0; i<FF_CEIL_RSHIFT(h->avctx->height, v_chroma_shift); i++) {
+            memset(pic->f.data[1] + pic->f.linesize[1]*i,
+                   0x80, FF_CEIL_RSHIFT(h->avctx->width, h_chroma_shift));
+            memset(pic->f.data[2] + pic->f.linesize[2]*i,
+                   0x80, FF_CEIL_RSHIFT(h->avctx->width, h_chroma_shift));
+        }
+    }
 
     if (!h->qscale_table_pool) {
         ret = init_table_pools(h);
@@ -383,6 +402,8 @@ void h264_init_dequant_tables(H264Context *h)
 {
     int i, x;
     init_dequant4_coeff_table(h);
+    memset(h->dequant8_coeff, 0, sizeof(h->dequant8_coeff));
+
     if (h->pps.transform_8x8_mode)
         init_dequant8_coeff_table(h);
     if (h->sps.transform_bypass) {
@@ -424,9 +445,9 @@ static void clone_tables(H264Context *dst, H264Context *src, int i)
 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b) + (size))))
 #undef REBASE_PICTURE
 #define REBASE_PICTURE(pic, new_ctx, old_ctx)             \
-    ((pic && pic >= old_ctx->DPB &&                       \
-      pic < old_ctx->DPB + H264_MAX_PICTURE_COUNT) ?          \
-     &new_ctx->DPB[pic - old_ctx->DPB] : NULL)
+    (((pic) && (pic) >= (old_ctx)->DPB &&                       \
+      (pic) < (old_ctx)->DPB + H264_MAX_PICTURE_COUNT) ?          \
+     &(new_ctx)->DPB[(pic) - (old_ctx)->DPB] : NULL)
 
 static void copy_picture_range(H264Picture **to, H264Picture **from, int count,
                                H264Context *new_base,
@@ -464,8 +485,8 @@ static int copy_parameter_set(void **to, void **from, int count, int size)
 }
 
 #define copy_fields(to, from, start_field, end_field)                   \
-    memcpy(&to->start_field, &from->start_field,                        \
-           (char *)&to->end_field - (char *)&to->start_field)
+    memcpy(&(to)->start_field, &(from)->start_field,                        \
+           (char *)&(to)->end_field - (char *)&(to)->start_field)
 
 static int h264_slice_header_init(H264Context *h, int reinit);
 
@@ -477,7 +498,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
     int context_reinitialized = 0;
     int i, ret;
 
-    if (dst == src || !h1->context_initialized)
+    if (dst == src)
         return 0;
 
     if (inited &&
@@ -503,6 +524,17 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
         h->mb_num    = h1->mb_num;
         h->mb_stride = h1->mb_stride;
         h->b_stride  = h1->b_stride;
+        // SPS/PPS
+        if ((ret = copy_parameter_set((void **)h->sps_buffers,
+                                      (void **)h1->sps_buffers,
+                                      MAX_SPS_COUNT, sizeof(SPS))) < 0)
+            return ret;
+        h->sps = h1->sps;
+        if ((ret = copy_parameter_set((void **)h->pps_buffers,
+                                      (void **)h1->pps_buffers,
+                                      MAX_PPS_COUNT, sizeof(PPS))) < 0)
+            return ret;
+        h->pps = h1->pps;
 
         if ((err = h264_slice_header_init(h, 1)) < 0) {
             av_log(h->avctx, AV_LOG_ERROR, "h264_slice_header_init() failed");
@@ -510,14 +542,19 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
         }
         context_reinitialized = 1;
 
-        /* update linesize on resize. The decoder doesn't
-         * necessarily call h264_frame_start in the new thread */
-        h->linesize   = h1->linesize;
-        h->uvlinesize = h1->uvlinesize;
-
-        /* copy block_offset since frame_start may not be called */
-        memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset));
+#if 0
+        h264_set_parameter_from_sps(h);
+        //Note we set context_reinitialized which will cause h264_set_parameter_from_sps to be reexecuted
+        h->cur_chroma_format_idc = h1->cur_chroma_format_idc;
+#endif
     }
+    /* update linesize on resize for h264. The h264 decoder doesn't
+     * necessarily call ff_MPV_frame_start in the new thread */
+    h->linesize   = h1->linesize;
+    h->uvlinesize = h1->uvlinesize;
+
+    /* copy block_offset since frame_start may not be called */
+    memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset));
 
     if (!inited) {
         for (i = 0; i < MAX_SPS_COUNT; i++)
@@ -526,18 +563,21 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
         for (i = 0; i < MAX_PPS_COUNT; i++)
             av_freep(h->pps_buffers + i);
 
-        memcpy(h, h1, sizeof(*h1));
+        av_freep(&h->rbsp_buffer[0]);
+        av_freep(&h->rbsp_buffer[1]);
+        memcpy(h, h1, offsetof(H264Context, intra_pcm_ptr));
+        memcpy(&h->cabac, &h1->cabac,
+               sizeof(H264Context) - offsetof(H264Context, cabac));
+        av_assert0((void*)&h->cabac == &h->mb_padding + 1);
+
         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
+
         memset(&h->er, 0, sizeof(h->er));
         memset(&h->mb, 0, sizeof(h->mb));
         memset(&h->mb_luma_dc, 0, sizeof(h->mb_luma_dc));
         memset(&h->mb_padding, 0, sizeof(h->mb_padding));
-        h->context_initialized = 0;
-
         memset(&h->cur_pic, 0, sizeof(h->cur_pic));
-        av_frame_unref(&h->cur_pic.f);
-        h->cur_pic.tf.f = &h->cur_pic.f;
 
         h->avctx             = dst;
         h->DPB               = NULL;
@@ -545,6 +585,17 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
         h->mb_type_pool      = NULL;
         h->ref_index_pool    = NULL;
         h->motion_val_pool   = NULL;
+        for (i = 0; i < 2; i++) {
+            h->rbsp_buffer[i] = NULL;
+            h->rbsp_buffer_size[i] = 0;
+        }
+
+        if (h1->context_initialized) {
+        h->context_initialized = 0;
+
+        memset(&h->cur_pic, 0, sizeof(h->cur_pic));
+        av_frame_unref(&h->cur_pic.f);
+        h->cur_pic.tf.f = &h->cur_pic.f;
 
         ret = ff_h264_alloc_tables(h);
         if (ret < 0) {
@@ -556,17 +607,13 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
             av_log(dst, AV_LOG_ERROR, "context_init() failed.\n");
             return ret;
         }
-
-        for (i = 0; i < 2; i++) {
-            h->rbsp_buffer[i]      = NULL;
-            h->rbsp_buffer_size[i] = 0;
         }
+
         h->bipred_scratchpad = NULL;
         h->edge_emu_buffer   = NULL;
 
         h->thread_context[0] = h;
-
-        h->context_initialized = 1;
+        h->context_initialized = h1->context_initialized;
     }
 
     h->avctx->coded_height  = h1->avctx->coded_height;
@@ -580,28 +627,22 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
     h->droppable            = h1->droppable;
     h->low_delay            = h1->low_delay;
 
-    for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
+    for (i = 0; h->DPB && i < H264_MAX_PICTURE_COUNT; i++) {
         ff_h264_unref_picture(h, &h->DPB[i]);
-        if (h1->DPB[i].f.buf[0] &&
+        if (h1->DPB && h1->DPB[i].f.buf[0] &&
             (ret = ff_h264_ref_picture(h, &h->DPB[i], &h1->DPB[i])) < 0)
             return ret;
     }
 
     h->cur_pic_ptr = REBASE_PICTURE(h1->cur_pic_ptr, h, h1);
     ff_h264_unref_picture(h, &h->cur_pic);
-    if ((ret = ff_h264_ref_picture(h, &h->cur_pic, &h1->cur_pic)) < 0)
+    if (h1->cur_pic.f.buf[0] && (ret = ff_h264_ref_picture(h, &h->cur_pic, &h1->cur_pic)) < 0)
         return ret;
 
     h->workaround_bugs = h1->workaround_bugs;
     h->low_delay       = h1->low_delay;
     h->droppable       = h1->droppable;
 
-    /* frame_start may not be called for the next thread (if it's decoding
-     * a bottom field) so this has to be allocated here */
-    err = alloc_scratch_buffers(h, h1->linesize);
-    if (err < 0)
-        return err;
-
     // extradata/NAL handling
     h->is_avc = h1->is_avc;
 
@@ -642,7 +683,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
     copy_picture_range(h->delayed_pic, h1->delayed_pic,
                        MAX_DELAYED_PIC_COUNT + 2, h, h1);
 
-    h->last_slice_type = h1->last_slice_type;
+    h->frame_recovered       = h1->frame_recovered;
 
     if (context_reinitialized)
         ff_h264_set_parameter_from_sps(h);
@@ -660,7 +701,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
     h->outputed_poc          = h->next_outputed_poc;
 
     h->recovery_frame        = h1->recovery_frame;
-    h->frame_recovered       = h1->frame_recovered;
 
     return err;
 }
@@ -670,6 +710,17 @@ static int h264_frame_start(H264Context *h)
     H264Picture *pic;
     int i, ret;
     const int pixel_shift = h->pixel_shift;
+    int c[4] = {
+        1<<(h->sps.bit_depth_luma-1),
+        1<<(h->sps.bit_depth_chroma-1),
+        1<<(h->sps.bit_depth_chroma-1),
+        -1
+    };
+
+    if (!ff_thread_can_start_frame(h->avctx)) {
+        av_log(h->avctx, AV_LOG_ERROR, "Attempt to start a frame outside SETUP state\n");
+        return -1;
+    }
 
     release_unused_pictures(h, 1);
     h->cur_pic_ptr = NULL;
@@ -684,6 +735,7 @@ static int h264_frame_start(H264Context *h)
     pic->reference              = h->droppable ? 0 : h->picture_structure;
     pic->f.coded_picture_number = h->coded_picture_number++;
     pic->field_picture          = h->picture_structure != PICT_FRAME;
+
     /*
      * Zero key_frame here; IDR markings per slice in frame or fields are ORed
      * in later.
@@ -692,17 +744,29 @@ static int h264_frame_start(H264Context *h)
     pic->f.key_frame = 0;
     pic->mmco_reset  = 0;
     pic->recovered   = 0;
+    pic->invalid_gap = 0;
+    pic->sei_recovery_frame_cnt = h->sei_recovery_frame_cnt;
 
     if ((ret = alloc_picture(h, pic)) < 0)
         return ret;
+    if(!h->frame_recovered && !h->avctx->hwaccel &&
+       !(h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU))
+        avpriv_color_frame(&pic->f, c);
 
     h->cur_pic_ptr = pic;
     ff_h264_unref_picture(h, &h->cur_pic);
+    if (CONFIG_ERROR_RESILIENCE) {
+        ff_h264_set_erpic(&h->er.cur_pic, NULL);
+    }
+
     if ((ret = ff_h264_ref_picture(h, &h->cur_pic, h->cur_pic_ptr)) < 0)
         return ret;
 
-    if (CONFIG_ERROR_RESILIENCE)
+    if (CONFIG_ERROR_RESILIENCE) {
         ff_er_frame_start(&h->er);
+        ff_h264_set_erpic(&h->er.last_pic, NULL);
+        ff_h264_set_erpic(&h->er.next_pic, NULL);
+    }
 
     assert(h->linesize && h->uvlinesize);
 
@@ -717,20 +781,6 @@ static int h264_frame_start(H264Context *h)
         h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7) << pixel_shift) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
     }
 
-    /* can't be in alloc_tables because linesize isn't known there.
-     * FIXME: redo bipred weight to not require extra buffer? */
-    for (i = 0; i < h->slice_context_count; i++)
-        if (h->thread_context[i]) {
-            ret = alloc_scratch_buffers(h->thread_context[i], h->linesize);
-            if (ret < 0)
-                return ret;
-        }
-
-    /* Some macroblocks can be accessed before they're available in case
-     * of lost slices, MBAFF or threading. */
-    memset(h->slice_table, -1,
-           (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
-
     /* We mark the current picture as non-reference after allocating it, so
      * that if we break out due to an error it can be released automatically
      * in the next ff_MPV_frame_start().
@@ -915,13 +965,13 @@ static void init_scan_tables(H264Context *h)
 {
     int i;
     for (i = 0; i < 16; i++) {
-#define TRANSPOSE(x) (x >> 2) | ((x << 2) & 0xF)
+#define TRANSPOSE(x) ((x) >> 2) | (((x) << 2) & 0xF)
         h->zigzag_scan[i] = TRANSPOSE(zigzag_scan[i]);
         h->field_scan[i]  = TRANSPOSE(field_scan[i]);
 #undef TRANSPOSE
     }
     for (i = 0; i < 64; i++) {
-#define TRANSPOSE(x) (x >> 3) | ((x & 7) << 3)
+#define TRANSPOSE(x) ((x) >> 3) | (((x) & 7) << 3)
         h->zigzag_scan8x8[i]       = TRANSPOSE(ff_zigzag_direct[i]);
         h->zigzag_scan8x8_cavlc[i] = TRANSPOSE(zigzag_scan8x8_cavlc[i]);
         h->field_scan8x8[i]        = TRANSPOSE(field_scan8x8[i]);
@@ -929,19 +979,19 @@ static void init_scan_tables(H264Context *h)
 #undef TRANSPOSE
     }
     if (h->sps.transform_bypass) { // FIXME same ugly
-        h->zigzag_scan_q0          = zigzag_scan;
-        h->zigzag_scan8x8_q0       = ff_zigzag_direct;
-        h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
-        h->field_scan_q0           = field_scan;
-        h->field_scan8x8_q0        = field_scan8x8;
-        h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
+        memcpy(h->zigzag_scan_q0          , zigzag_scan             , sizeof(h->zigzag_scan_q0         ));
+        memcpy(h->zigzag_scan8x8_q0       , ff_zigzag_direct        , sizeof(h->zigzag_scan8x8_q0      ));
+        memcpy(h->zigzag_scan8x8_cavlc_q0 , zigzag_scan8x8_cavlc    , sizeof(h->zigzag_scan8x8_cavlc_q0));
+        memcpy(h->field_scan_q0           , field_scan              , sizeof(h->field_scan_q0          ));
+        memcpy(h->field_scan8x8_q0        , field_scan8x8           , sizeof(h->field_scan8x8_q0       ));
+        memcpy(h->field_scan8x8_cavlc_q0  , field_scan8x8_cavlc     , sizeof(h->field_scan8x8_cavlc_q0 ));
     } else {
-        h->zigzag_scan_q0          = h->zigzag_scan;
-        h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
-        h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
-        h->field_scan_q0           = h->field_scan;
-        h->field_scan8x8_q0        = h->field_scan8x8;
-        h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
+        memcpy(h->zigzag_scan_q0          , h->zigzag_scan          , sizeof(h->zigzag_scan_q0         ));
+        memcpy(h->zigzag_scan8x8_q0       , h->zigzag_scan8x8       , sizeof(h->zigzag_scan8x8_q0      ));
+        memcpy(h->zigzag_scan8x8_cavlc_q0 , h->zigzag_scan8x8_cavlc , sizeof(h->zigzag_scan8x8_cavlc_q0));
+        memcpy(h->field_scan_q0           , h->field_scan           , sizeof(h->field_scan_q0          ));
+        memcpy(h->field_scan8x8_q0        , h->field_scan8x8        , sizeof(h->field_scan8x8_q0       ));
+        memcpy(h->field_scan8x8_cavlc_q0  , h->field_scan8x8_cavlc  , sizeof(h->field_scan8x8_cavlc_q0 ));
     }
 }
 
@@ -973,7 +1023,7 @@ static int clone_slice(H264Context *dst, H264Context *src)
     return 0;
 }
 
-static enum AVPixelFormat get_pixel_format(H264Context *h)
+static enum AVPixelFormat get_pixel_format(H264Context *h, int force_callback)
 {
     switch (h->sps.bit_depth_luma) {
     case 9:
@@ -998,22 +1048,53 @@ static enum AVPixelFormat get_pixel_format(H264Context *h)
         else
             return AV_PIX_FMT_YUV420P10;
         break;
-    case 8:
+    case 12:
         if (CHROMA444(h)) {
             if (h->avctx->colorspace == AVCOL_SPC_RGB) {
-                return AV_PIX_FMT_GBRP;
+                return AV_PIX_FMT_GBRP12;
             } else
-                return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P
-                                                                 : AV_PIX_FMT_YUV444P;
+                return AV_PIX_FMT_YUV444P12;
+        } else if (CHROMA422(h))
+            return AV_PIX_FMT_YUV422P12;
+        else
+            return AV_PIX_FMT_YUV420P12;
+        break;
+    case 14:
+        if (CHROMA444(h)) {
+            if (h->avctx->colorspace == AVCOL_SPC_RGB) {
+                return AV_PIX_FMT_GBRP14;
+            } else
+                return AV_PIX_FMT_YUV444P14;
+        } else if (CHROMA422(h))
+            return AV_PIX_FMT_YUV422P14;
+        else
+            return AV_PIX_FMT_YUV420P14;
+        break;
+    case 8:
+        if (CHROMA444(h)) {
+            if (h->avctx->colorspace == AVCOL_SPC_RGB) {
+                av_log(h->avctx, AV_LOG_DEBUG, "Detected GBR colorspace.\n");
+                return AV_PIX_FMT_GBR24P;
+            } else if (h->avctx->colorspace == AVCOL_SPC_YCGCO) {
+                av_log(h->avctx, AV_LOG_WARNING, "Detected unsupported YCgCo colorspace.\n");
+            }
+            return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ444P
+                                                                : AV_PIX_FMT_YUV444P;
         } else if (CHROMA422(h)) {
             return h->avctx->color_range == AVCOL_RANGE_JPEG ? AV_PIX_FMT_YUVJ422P
                                                              : AV_PIX_FMT_YUV422P;
         } else {
-            return ff_get_format(h->avctx, h->avctx->codec->pix_fmts ?
-                                 h->avctx->codec->pix_fmts :
-                                 h->avctx->color_range == AVCOL_RANGE_JPEG ?
-                                 h264_hwaccel_pixfmt_list_jpeg_420 :
-                                 h264_hwaccel_pixfmt_list_420);
+            int i;
+            const enum AVPixelFormat * fmt = h->avctx->codec->pix_fmts ?
+                                        h->avctx->codec->pix_fmts :
+                                        h->avctx->color_range == AVCOL_RANGE_JPEG ?
+                                        h264_hwaccel_pixfmt_list_jpeg_420 :
+                                        h264_hwaccel_pixfmt_list_420;
+
+            for (i=0; fmt[i] != AV_PIX_FMT_NONE; i++)
+                if (fmt[i] == h->avctx->pix_fmt && !force_callback)
+                    return fmt[i];
+            return ff_thread_get_format(h->avctx, fmt);
         }
         break;
     default:
@@ -1028,6 +1109,8 @@ static int init_dimensions(H264Context *h)
 {
     int width  = h->width  - (h->sps.crop_right + h->sps.crop_left);
     int height = h->height - (h->sps.crop_top   + h->sps.crop_bottom);
+    av_assert0(h->sps.crop_right + h->sps.crop_left < (unsigned)h->width);
+    av_assert0(h->sps.crop_top + h->sps.crop_bottom < (unsigned)h->height);
 
     /* handle container cropping */
     if (!h->sps.crop &&
@@ -1118,7 +1201,9 @@ static int h264_slice_header_init(H264Context *h, int reinit)
             if (!c)
                 return AVERROR(ENOMEM);
             c->avctx             = h->avctx;
-            c->mecc              = h->mecc;
+            if (CONFIG_ERROR_RESILIENCE) {
+                c->mecc              = h->mecc;
+            }
             c->vdsp              = h->vdsp;
             c->h264dsp           = h->h264dsp;
             c->h264qpel          = h->h264qpel;
@@ -1126,6 +1211,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
             c->sps               = h->sps;
             c->pps               = h->pps;
             c->pixel_shift       = h->pixel_shift;
+            c->cur_chroma_format_idc = h->cur_chroma_format_idc;
             c->width             = h->width;
             c->height            = h->height;
             c->linesize          = h->linesize;
@@ -1161,6 +1247,17 @@ static int h264_slice_header_init(H264Context *h, int reinit)
     return 0;
 }
 
+static enum AVPixelFormat non_j_pixfmt(enum AVPixelFormat a)
+{
+    switch (a) {
+    case AV_PIX_FMT_YUVJ420P: return AV_PIX_FMT_YUV420P;
+    case AV_PIX_FMT_YUVJ422P: return AV_PIX_FMT_YUV422P;
+    case AV_PIX_FMT_YUVJ444P: return AV_PIX_FMT_YUV444P;
+    default:
+        return a;
+    }
+}
+
 /**
  * Decode a slice header.
  * This will (re)intialize the decoder and call h264_frame_start() as needed.
@@ -1177,15 +1274,15 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
     unsigned int pps_id;
     int ret;
     unsigned int slice_type, tmp, i, j;
-    int default_ref_list_done = 0;
     int last_pic_structure, last_pic_droppable;
+    int must_reinit;
     int needs_reinit = 0;
     int field_pic_flag, bottom_field_flag;
 
     h->qpel_put = h->h264qpel.put_h264_qpel_pixels_tab;
     h->qpel_avg = h->h264qpel.avg_h264_qpel_pixels_tab;
 
-    first_mb_in_slice = get_ue_golomb(&h->gb);
+    first_mb_in_slice = get_ue_golomb_long(&h->gb);
 
     if (first_mb_in_slice == 0) { // FIXME better field boundary detection
         if (h0->current_slice && h->cur_pic_ptr && FIELD_PICTURE(h)) {
@@ -1216,10 +1313,6 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
         h->slice_type_fixed = 0;
 
     slice_type = golomb_to_pict_type[slice_type];
-    if (slice_type == AV_PICTURE_TYPE_I ||
-        (h0->current_slice != 0 && slice_type == h0->last_slice_type)) {
-        default_ref_list_done = 1;
-    }
     h->slice_type     = slice_type;
     h->slice_type_nos = slice_type & 3;
 
@@ -1229,6 +1322,15 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
         return AVERROR_INVALIDDATA;
     }
 
+    if (
+        (h->avctx->skip_frame >= AVDISCARD_NONREF && !h->nal_ref_idc) ||
+        (h->avctx->skip_frame >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B) ||
+        (h->avctx->skip_frame >= AVDISCARD_NONINTRA && h->slice_type_nos != AV_PICTURE_TYPE_I) ||
+        (h->avctx->skip_frame >= AVDISCARD_NONKEY && h->nal_unit_type != NAL_IDR_SLICE) ||
+         h->avctx->skip_frame >= AVDISCARD_ALL) {
+         return SLICE_SKIPED;
+     }
+
     // to make a few old functions happy, it's wrong though
     h->pict_type = h->slice_type;
 
@@ -1243,6 +1345,12 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
                pps_id);
         return AVERROR_INVALIDDATA;
     }
+    if (h0->au_pps_id >= 0 && pps_id != h0->au_pps_id) {
+        av_log(h->avctx, AV_LOG_ERROR,
+               "PPS change from %d to %d forbidden\n",
+               h0->au_pps_id, pps_id);
+        return AVERROR_INVALIDDATA;
+    }
     h->pps = *h0->pps_buffers[pps_id];
 
     if (!h0->sps_buffers[h->pps.sps_id]) {
@@ -1253,11 +1361,18 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
     }
 
     if (h->pps.sps_id != h->sps.sps_id ||
+        h->pps.sps_id != h->current_sps_id ||
         h0->sps_buffers[h->pps.sps_id]->new) {
-        h0->sps_buffers[h->pps.sps_id]->new = 0;
 
         h->sps = *h0->sps_buffers[h->pps.sps_id];
 
+        if (h->mb_width  != h->sps.mb_width ||
+            h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) ||
+            h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
+            h->cur_chroma_format_idc != h->sps.chroma_format_idc
+        )
+            needs_reinit = 1;
+
         if (h->bit_depth_luma    != h->sps.bit_depth_luma ||
             h->chroma_format_idc != h->sps.chroma_format_idc) {
             h->bit_depth_luma    = h->sps.bit_depth_luma;
@@ -1272,9 +1387,17 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
     h->avctx->level   = h->sps.level_idc;
     h->avctx->refs    = h->sps.ref_frame_count;
 
-    if (h->mb_width  != h->sps.mb_width ||
-        h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag))
-        needs_reinit = 1;
+    must_reinit = (h->context_initialized &&
+                    (   16*h->sps.mb_width != h->avctx->coded_width
+                     || 16*h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag) != h->avctx->coded_height
+                     || h->avctx->bits_per_raw_sample != h->sps.bit_depth_luma
+                     || h->cur_chroma_format_idc != h->sps.chroma_format_idc
+                     || av_cmp_q(h->sps.sar, h->avctx->sample_aspect_ratio)
+                     || h->mb_width  != h->sps.mb_width
+                     || h->mb_height != h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag)
+                    ));
+    if (non_j_pixfmt(h0->avctx->pix_fmt) != non_j_pixfmt(get_pixel_format(h0, 0)))
+        must_reinit = 1;
 
     h->mb_width  = h->sps.mb_width;
     h->mb_height = h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
@@ -1293,8 +1416,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
         return ret;
 
     if (h->sps.video_signal_type_present_flag) {
-        h->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG
-                                                  : AVCOL_RANGE_MPEG;
+        h->avctx->color_range = h->sps.full_range>0 ? AVCOL_RANGE_JPEG
+                                                    : AVCOL_RANGE_MPEG;
         if (h->sps.colour_description_present_flag) {
             if (h->avctx->colorspace != h->sps.colorspace)
                 needs_reinit = 1;
@@ -1304,7 +1427,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
         }
     }
 
-    if (h->context_initialized && needs_reinit) {
+    if (h->context_initialized &&
+        (must_reinit || needs_reinit)) {
         if (h != h0) {
             av_log(h->avctx, AV_LOG_ERROR,
                    "changing width %d -> %d / height %d -> %d on "
@@ -1317,12 +1441,12 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
 
         ff_h264_flush_change(h);
 
-        if ((ret = get_pixel_format(h)) < 0)
+        if ((ret = get_pixel_format(h, 1)) < 0)
             return ret;
         h->avctx->pix_fmt = ret;
 
         av_log(h->avctx, AV_LOG_INFO, "Reinit context to %dx%d, "
-               "pix_fmt: %d\n", h->width, h->height, h->avctx->pix_fmt);
+               "pix_fmt: %s\n", h->width, h->height, av_get_pix_fmt_name(h->avctx->pix_fmt));
 
         if ((ret = h264_slice_header_init(h, 1)) < 0) {
             av_log(h->avctx, AV_LOG_ERROR,
@@ -1337,7 +1461,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
             return AVERROR_PATCHWELCOME;
         }
 
-        if ((ret = get_pixel_format(h)) < 0)
+        if ((ret = get_pixel_format(h, 1)) < 0)
             return ret;
         h->avctx->pix_fmt = ret;
 
@@ -1363,6 +1487,10 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
     if (h->sps.frame_mbs_only_flag) {
         h->picture_structure = PICT_FRAME;
     } else {
+        if (!h->sps.direct_8x8_inference_flag && slice_type == AV_PICTURE_TYPE_B) {
+            av_log(h->avctx, AV_LOG_ERROR, "This stream was generated by a broken encoder, invalid 8x8 inference\n");
+            return -1;
+        }
         field_pic_flag = get_bits1(&h->gb);
         if (field_pic_flag) {
             bottom_field_flag = get_bits1(&h->gb);
@@ -1412,17 +1540,23 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
          * Here, we're using that to see if we should mark previously
          * decode frames as "finished".
          * We have to do that before the "dummy" in-between frame allocation,
-         * since that can modify s->current_picture_ptr. */
+         * since that can modify h->cur_pic_ptr. */
         if (h0->first_field) {
             assert(h0->cur_pic_ptr);
             assert(h0->cur_pic_ptr->f.buf[0]);
             assert(h0->cur_pic_ptr->reference != DELAYED_PIC_REF);
 
+            /* Mark old field/frame as completed */
+            if (h0->cur_pic_ptr->tf.owner == h0->avctx) {
+                ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX,
+                                          last_pic_structure == PICT_BOTTOM_FIELD);
+            }
+
             /* figure out if we have a complementary field pair */
             if (!FIELD_PICTURE(h) || h->picture_structure == last_pic_structure) {
                 /* Previous field is unmatched. Don't display it, but let it
                  * remain for reference if marked as such. */
-                if (!last_pic_droppable && last_pic_structure != PICT_FRAME) {
+                if (last_pic_structure != PICT_FRAME) {
                     ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX,
                                               last_pic_structure == PICT_TOP_FIELD);
                 }
@@ -1432,7 +1566,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
                      * different frame_nums. Consider this field first in
                      * pair. Throw away previous field except for reference
                      * purposes. */
-                    if (!last_pic_droppable && last_pic_structure != PICT_FRAME) {
+                    if (last_pic_structure != PICT_FRAME) {
                         ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX,
                                                   last_pic_structure == PICT_TOP_FIELD);
                     }
@@ -1459,11 +1593,14 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
             }
         }
 
-        while (h->frame_num != h->prev_frame_num &&
+        while (h->frame_num != h->prev_frame_num && !h0->first_field &&
                h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
             H264Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
             av_log(h->avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n",
                    h->frame_num, h->prev_frame_num);
+            if (!h->sps.gaps_in_frame_num_allowed_flag)
+                for(i=0; i<FF_ARRAY_ELEMS(h->last_pocs); i++)
+                    h->last_pocs[i] = INT_MIN;
             ret = h264_frame_start(h);
             if (ret < 0) {
                 h0->first_field = 0;
@@ -1473,6 +1610,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
             h->prev_frame_num++;
             h->prev_frame_num        %= 1 << h->sps.log2_max_frame_num;
             h->cur_pic_ptr->frame_num = h->prev_frame_num;
+            h->cur_pic_ptr->invalid_gap = !h->sps.gaps_in_frame_num_allowed_flag;
             ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 0);
             ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 1);
             ret = ff_generate_sliding_window_mmcos(h, 1);
@@ -1519,6 +1657,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
                 h0->first_field = FIELD_PICTURE(h);
             } else {
                 if (h0->cur_pic_ptr->frame_num != h->frame_num) {
+                    ff_thread_report_progress(&h0->cur_pic_ptr->tf, INT_MAX,
+                                              h0->picture_structure==PICT_BOTTOM_FIELD);
                     /* This and the previous field had different frame_nums.
                      * Consider this field first in pair. Throw away previous
                      * one except for reference purposes. */
@@ -1542,13 +1682,32 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
         } else {
             release_unused_pictures(h, 0);
         }
+        /* Some macroblocks can be accessed before they're available in case
+        * of lost slices, MBAFF or threading. */
+        if (FIELD_PICTURE(h)) {
+            for(i = (h->picture_structure == PICT_BOTTOM_FIELD); i<h->mb_height; i++)
+                memset(h->slice_table + i*h->mb_stride, -1, (h->mb_stride - (i+1==h->mb_height)) * sizeof(*h->slice_table));
+        } else {
+            memset(h->slice_table, -1,
+                (h->mb_height * h->mb_stride - 1) * sizeof(*h->slice_table));
+        }
+        h0->last_slice_type = -1;
     }
     if (h != h0 && (ret = clone_slice(h, h0)) < 0)
         return ret;
 
+    /* can't be in alloc_tables because linesize isn't known there.
+     * FIXME: redo bipred weight to not require extra buffer? */
+    for (i = 0; i < h->slice_context_count; i++)
+        if (h->thread_context[i]) {
+            ret = alloc_scratch_buffers(h->thread_context[i], h->linesize);
+            if (ret < 0)
+                return ret;
+        }
+
     h->cur_pic_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup
 
-    assert(h->mb_num == h->mb_width * h->mb_height);
+    av_assert1(h->mb_num == h->mb_width * h->mb_height);
     if (first_mb_in_slice << FIELD_OR_MBAFF_PICTURE(h) >= h->mb_num ||
         first_mb_in_slice >= h->mb_num) {
         av_log(h->avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
@@ -1559,7 +1718,7 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
                                FIELD_OR_MBAFF_PICTURE(h);
     if (h->picture_structure == PICT_BOTTOM_FIELD)
         h->resync_mb_y = h->mb_y = h->mb_y + 1;
-    assert(h->mb_y < h->mb_height);
+    av_assert1(h->mb_y < h->mb_height);
 
     if (h->picture_structure == PICT_FRAME) {
         h->curr_pic_num = h->frame_num;
@@ -1594,11 +1753,14 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
     ret = ff_set_ref_count(h);
     if (ret < 0)
         return ret;
-    else if (ret == 1)
-        default_ref_list_done = 0;
 
-    if (!default_ref_list_done)
+    if (slice_type != AV_PICTURE_TYPE_I &&
+        (h0->current_slice == 0 ||
+         slice_type != h0->last_slice_type ||
+         memcmp(h0->last_ref_count, h0->ref_count, sizeof(h0->ref_count)))) {
+
         ff_h264_fill_default_ref_list(h);
+    }
 
     if (h->slice_type_nos != AV_PICTURE_TYPE_I) {
        ret = ff_h264_decode_ref_pic_list_reordering(h);
@@ -1705,6 +1867,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
 
     if (h->avctx->skip_loop_filter >= AVDISCARD_ALL ||
         (h->avctx->skip_loop_filter >= AVDISCARD_NONKEY &&
+         h->nal_unit_type != NAL_IDR_SLICE) ||
+        (h->avctx->skip_loop_filter >= AVDISCARD_NONINTRA &&
          h->slice_type_nos != AV_PICTURE_TYPE_I) ||
         (h->avctx->skip_loop_filter >= AVDISCARD_BIDIR  &&
          h->slice_type_nos == AV_PICTURE_TYPE_B) ||
@@ -1721,13 +1885,16 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
             h0->max_contexts = 1;
             if (!h0->single_decode_warning) {
                 av_log(h->avctx, AV_LOG_INFO,
-                       "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
+                       "Cannot parallelize slice decoding with deblocking filter type 1, decoding such frames in sequential order\n"
+                       "To parallelize slice decoding you need video encoded with disable_deblocking_filter_idc set to 2 (deblock only edges that do not cross slices).\n"
+                       "Setting the flags2 libavcodec option to +fast (-flags2 +fast) will disable deblocking across slices and enable parallel slice decoding "
+                       "but will generate non-standard-compliant output.\n");
                 h0->single_decode_warning = 1;
             }
             if (h != h0) {
                 av_log(h->avctx, AV_LOG_ERROR,
                        "Deblocking switched inside frame.\n");
-                return 1;
+                return SLICE_SINGLETHREAD;
             }
         }
     }
@@ -1739,10 +1906,16 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
                    6 * (h->sps.bit_depth_luma - 8);
 
     h0->last_slice_type = slice_type;
+    memcpy(h0->last_ref_count, h0->ref_count, sizeof(h0->last_ref_count));
     h->slice_num        = ++h0->current_slice;
-    if (h->slice_num >= MAX_SLICES) {
-        av_log(h->avctx, AV_LOG_ERROR,
-               "Too many slices, increase MAX_SLICES and recompile\n");
+
+    if (h->slice_num)
+        h0->slice_row[(h->slice_num-1)&(MAX_SLICES-1)]= h->resync_mb_y;
+    if (   h0->slice_row[h->slice_num&(MAX_SLICES-1)] + 3 >= h->resync_mb_y
+        && h0->slice_row[h->slice_num&(MAX_SLICES-1)] <= h->resync_mb_y
+        && h->slice_num >= MAX_SLICES) {
+        //in case of ASO this check needs to be updated depending on how we decide to assign slice numbers in this case
+        av_log(h->avctx, AV_LOG_WARNING, "Possibly too many slices (%d >= %d), increase MAX_SLICES and recompile if there are artifacts\n", h->slice_num, MAX_SLICES);
     }
 
     for (j = 0; j < 2; j++) {
@@ -1778,6 +1951,15 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
                              (h->ref_list[j][i].reference & 3);
     }
 
+    if (h->ref_count[0]) ff_h264_set_erpic(&h->er.last_pic, &h->ref_list[0][0]);
+    if (h->ref_count[1]) ff_h264_set_erpic(&h->er.next_pic, &h->ref_list[1][0]);
+
+    h->er.ref_count = h->ref_count[0];
+    h0->au_pps_id = pps_id;
+    h->sps.new =
+    h0->sps_buffers[h->pps.sps_id]->new = 0;
+    h->current_sps_id = h->pps.sps_id;
+
     if (h->avctx->debug & FF_DEBUG_PICT_INFO) {
         av_log(h->avctx, AV_LOG_DEBUG,
                "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
@@ -1834,7 +2016,7 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h,
         if (USES_LIST(top_type, list)) {
             const int b_xy  = h->mb2b_xy[top_xy] + 3 * b_stride;
             const int b8_xy = 4 * top_xy + 2;
-            int (*ref2frm)[64] = h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
+            int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_table[top_xy] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2));
             AV_COPY128(mv_dst - 1 * 8, h->cur_pic.motion_val[list][b_xy + 0]);
             ref_cache[0 - 1 * 8] =
             ref_cache[1 - 1 * 8] = ref2frm[list][h->cur_pic.ref_index[list][b8_xy + 0]];
@@ -1849,7 +2031,7 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h,
             if (USES_LIST(left_type[LTOP], list)) {
                 const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
                 const int b8_xy = 4 * left_xy[LTOP] + 1;
-                int (*ref2frm)[64] = h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
+                int (*ref2frm)[64] =(void*)( h->ref2frm[h->slice_table[left_xy[LTOP]] & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2));
                 AV_COPY32(mv_dst - 1 +  0, h->cur_pic.motion_val[list][b_xy + b_stride * 0]);
                 AV_COPY32(mv_dst - 1 +  8, h->cur_pic.motion_val[list][b_xy + b_stride * 1]);
                 AV_COPY32(mv_dst - 1 + 16, h->cur_pic.motion_val[list][b_xy + b_stride * 2]);
@@ -1882,7 +2064,7 @@ static av_always_inline void fill_filter_caches_inter(H264Context *h,
 
     {
         int8_t *ref = &h->cur_pic.ref_index[list][4 * mb_xy];
-        int (*ref2frm)[64] = h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2);
+        int (*ref2frm)[64] = (void*)(h->ref2frm[h->slice_num & (MAX_SLICES - 1)][0] + (MB_MBAFF(h) ? 20 : 2));
         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]], ref2frm[list][ref[1]]) & 0x00FF00FF) * 0x0101;
         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]], ref2frm[list][ref[3]]) & 0x00FF00FF) * 0x0101;
         AV_WN32A(&ref_cache[0 * 8], ref01);
@@ -2155,7 +2337,7 @@ static void decode_finish_row(H264Context *h)
 
     ff_h264_draw_horiz_band(h, top, height);
 
-    if (h->droppable)
+    if (h->droppable || h->er.error_occurred)
         return;
 
     ff_thread_report_progress(&h->cur_pic_ptr->tf, top + height - 1,
@@ -2165,12 +2347,11 @@ static void decode_finish_row(H264Context *h)
 static void er_add_slice(H264Context *h, int startx, int starty,
                          int endx, int endy, int status)
 {
-#if CONFIG_ERROR_RESILIENCE
-    ERContext *er = &h->er;
+    if (CONFIG_ERROR_RESILIENCE) {
+        ERContext *er = &h->er;
 
-    er->ref_count = h->ref_count[0];
-    ff_er_add_slice(er, startx, starty, endx, endy, status);
-#endif
+        ff_er_add_slice(er, startx, starty, endx, endy, status);
+    }
 }
 
 static int decode_slice(struct AVCodecContext *avctx, void *arg)
@@ -2180,10 +2361,22 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
 
     h->mb_skip_run = -1;
 
+    av_assert0(h->block_offset[15] == (4 * ((scan8[15] - scan8[0]) & 7) << h->pixel_shift) + 4 * h->linesize * ((scan8[15] - scan8[0]) >> 3));
+
     h->is_complex = FRAME_MBAFF(h) || h->picture_structure != PICT_FRAME ||
                     avctx->codec_id != AV_CODEC_ID_H264 ||
                     (CONFIG_GRAY && (h->flags & CODEC_FLAG_GRAY));
 
+    if (!(h->avctx->active_thread_type & FF_THREAD_SLICE) && h->picture_structure == PICT_FRAME && h->er.error_status_table) {
+        const int start_i  = av_clip(h->resync_mb_x + h->resync_mb_y * h->mb_width, 0, h->mb_num - 1);
+        if (start_i) {
+            int prev_status = h->er.error_status_table[h->er.mb_index2xy[start_i - 1]];
+            prev_status &= ~ VP_START;
+            if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END))
+                h->er.error_occurred = 1;
+        }
+    }
+
     if (h->pps.cabac) {
         /* realign */
         align_get_bits(&h->gb);
@@ -2224,9 +2417,11 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
                     loop_filter(h, lf_x_start, h->mb_x + 1);
                 return 0;
             }
-            if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
+            if (h->cabac.bytestream > h->cabac.bytestream_end + 2 )
+                av_log(h->avctx, AV_LOG_DEBUG, "bytestream overread %"PTRDIFF_SPECIFIER"\n", h->cabac.bytestream_end - h->cabac.bytestream);
+            if (ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 4) {
                 av_log(h->avctx, AV_LOG_ERROR,
-                       "error while decoding MB %d %d, bytestream %td\n",
+                       "error while decoding MB %d %d, bytestream %"PTRDIFF_SPECIFIER"\n",
                        h->mb_x, h->mb_y,
                        h->cabac.bytestream_end - h->cabac.bytestream);
                 er_add_slice(h, h->resync_mb_x, h->resync_mb_y, h->mb_x,
@@ -2295,14 +2490,15 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
                     tprintf(h->avctx, "slice end %d %d\n",
                             get_bits_count(&h->gb), h->gb.size_in_bits);
 
-                    if (get_bits_left(&h->gb) == 0) {
+                    if (   get_bits_left(&h->gb) == 0
+                        || get_bits_left(&h->gb) > 0 && !(h->avctx->err_recognition & AV_EF_AGGRESSIVE)) {
                         er_add_slice(h, h->resync_mb_x, h->resync_mb_y,
                                      h->mb_x - 1, h->mb_y, ER_MB_END);
 
                         return 0;
                     } else {
                         er_add_slice(h, h->resync_mb_x, h->resync_mb_y,
-                                     h->mb_x - 1, h->mb_y, ER_MB_END);
+                                     h->mb_x, h->mb_y, ER_MB_END);
 
                         return AVERROR_INVALIDDATA;
                     }
@@ -2343,20 +2539,21 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
     H264Context *hx;
     int i;
 
-    if (h->mb_y >= h->mb_height) {
-        av_log(h->avctx, AV_LOG_ERROR,
-               "Input contains more MB rows than the frame height.\n");
-        return AVERROR_INVALIDDATA;
-    }
+    av_assert0(h->mb_y < h->mb_height);
 
-    if (h->avctx->hwaccel)
+    if (h->avctx->hwaccel ||
+        h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
         return 0;
     if (context_count == 1) {
         return decode_slice(avctx, &h);
     } else {
+        av_assert0(context_count > 0);
         for (i = 1; i < context_count; i++) {
             hx                 = h->thread_context[i];
-            hx->er.error_count = 0;
+            if (CONFIG_ERROR_RESILIENCE) {
+                hx->er.error_count = 0;
+            }
+            hx->x264_build     = h->x264_build;
         }
 
         avctx->execute(avctx, decode_slice, h->thread_context,
@@ -2368,8 +2565,10 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
         h->mb_y              = hx->mb_y;
         h->droppable         = hx->droppable;
         h->picture_structure = hx->picture_structure;
-        for (i = 1; i < context_count; i++)
-            h->er.error_count += h->thread_context[i]->er.error_count;
+        if (CONFIG_ERROR_RESILIENCE) {
+            for (i = 1; i < context_count; i++)
+                h->er.error_count += h->thread_context[i]->er.error_count;
+        }
     }
 
     return 0;
diff --git a/libavcodec/h264addpx_template.c b/libavcodec/h264addpx_template.c
index cdbfc67..046b6c2 100644
--- a/libavcodec/h264addpx_template.c
+++ b/libavcodec/h264addpx_template.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h264chroma.c b/libavcodec/h264chroma.c
index d5146de..5b3e13b 100644
--- a/libavcodec/h264chroma.c
+++ b/libavcodec/h264chroma.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,9 +32,11 @@
     c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_ ## depth ## _c; \
     c->put_h264_chroma_pixels_tab[1] = put_h264_chroma_mc4_ ## depth ## _c; \
     c->put_h264_chroma_pixels_tab[2] = put_h264_chroma_mc2_ ## depth ## _c; \
+    c->put_h264_chroma_pixels_tab[3] = put_h264_chroma_mc1_ ## depth ## _c; \
     c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_ ## depth ## _c; \
     c->avg_h264_chroma_pixels_tab[1] = avg_h264_chroma_mc4_ ## depth ## _c; \
     c->avg_h264_chroma_pixels_tab[2] = avg_h264_chroma_mc2_ ## depth ## _c; \
+    c->avg_h264_chroma_pixels_tab[3] = avg_h264_chroma_mc1_ ## depth ## _c; \
 
 av_cold void ff_h264chroma_init(H264ChromaContext *c, int bit_depth)
 {
diff --git a/libavcodec/h264chroma.h b/libavcodec/h264chroma.h
index 93064fe..d4b8a0e 100644
--- a/libavcodec/h264chroma.h
+++ b/libavcodec/h264chroma.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,8 +24,8 @@
 typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
 
 typedef struct H264ChromaContext {
-    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
-    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[4];
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[4];
 } H264ChromaContext;
 
 void ff_h264chroma_init(H264ChromaContext *c, int bit_depth);
diff --git a/libavcodec/h264chroma_template.c b/libavcodec/h264chroma_template.c
index 028ed13..072b5e0 100644
--- a/libavcodec/h264chroma_template.c
+++ b/libavcodec/h264chroma_template.c
@@ -2,28 +2,62 @@
  * Copyright (c) 2000, 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <assert.h>
+#include "libavutil/avassert.h"
 
 #include "bit_depth_template.c"
 
 #define H264_CHROMA_MC(OPNAME, OP)\
+static void FUNCC(OPNAME ## h264_chroma_mc1)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+    pixel *dst = (pixel*)_dst;\
+    pixel *src = (pixel*)_src;\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    stride >>= sizeof(pixel)-1;\
+    \
+    av_assert2(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    } else if (B + C) {\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    } else {\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
 static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
     pixel *dst = (pixel*)_dst;\
     pixel *src = (pixel*)_src;\
@@ -32,9 +66,9 @@ static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *
     const int C=(8-x)*(  y);\
     const int D=(  x)*(  y);\
     int i;\
-    stride /= sizeof(pixel);\
+    stride >>= sizeof(pixel)-1;\
     \
-    assert(x<8 && y<8 && x>=0 && y>=0);\
+    av_assert2(x<8 && y<8 && x>=0 && y>=0);\
 \
     if(D){\
         for(i=0; i<h; i++){\
@@ -70,9 +104,9 @@ static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *
     const int C=(8-x)*(  y);\
     const int D=(  x)*(  y);\
     int i;\
-    stride /= sizeof(pixel);\
+    stride >>= sizeof(pixel)-1;\
     \
-    assert(x<8 && y<8 && x>=0 && y>=0);\
+    av_assert2(x<8 && y<8 && x>=0 && y>=0);\
 \
     if(D){\
         for(i=0; i<h; i++){\
@@ -114,9 +148,9 @@ static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *
     const int C=(8-x)*(  y);\
     const int D=(  x)*(  y);\
     int i;\
-    stride /= sizeof(pixel);\
+    stride >>= sizeof(pixel)-1;\
     \
-    assert(x<8 && y<8 && x>=0 && y>=0);\
+    av_assert2(x<8 && y<8 && x>=0 && y>=0);\
 \
     if(D){\
         for(i=0; i<h; i++){\
diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h
index 6b40b39..38f3258 100644
--- a/libavcodec/h264data.h
+++ b/libavcodec/h264data.h
@@ -2,20 +2,20 @@
  * H26L/H264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -51,7 +51,7 @@ static const uint8_t golomb_to_inter_cbp[48] = {
     17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
 };
 
-static const uint8_t zigzag_scan[16] = {
+static const uint8_t zigzag_scan[16+1] = {
     0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
     1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
     1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index c8edbd0..ddcd6e6 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,8 @@
 #include <stdint.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+
 #include "avcodec.h"
 #include "h264dsp.h"
 #include "h264idct.h"
@@ -46,6 +48,14 @@
 #include "h264dsp_template.c"
 #undef BIT_DEPTH
 
+#define BIT_DEPTH 12
+#include "h264dsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 14
+#include "h264dsp_template.c"
+#undef BIT_DEPTH
+
 #define BIT_DEPTH 8
 #include "h264addpx_template.c"
 #undef BIT_DEPTH
@@ -130,7 +140,14 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
     case 10:
         H264_DSP(10);
         break;
+    case 12:
+        H264_DSP(12);
+        break;
+    case 14:
+        H264_DSP(14);
+        break;
     default:
+        av_assert0(bit_depth<=8);
         H264_DSP(8);
         break;
     }
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index c4be235..85ea40b 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index 3d99cfc..4d5faf0 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -1,21 +1,21 @@
 /*
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
- * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,7 +35,7 @@ static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int heig
 { \
     int y; \
     pixel *block = (pixel*)_block; \
-    stride /= sizeof(pixel); \
+    stride >>= sizeof(pixel)-1; \
     offset <<= (log2_denom + (BIT_DEPTH-8)); \
     if(log2_denom) offset += 1<<(log2_denom-1); \
     for (y = 0; y < height; y++, block += stride) { \
@@ -66,7 +66,7 @@ static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int s
     int y; \
     pixel *dst = (pixel*)_dst; \
     pixel *src = (pixel*)_src; \
-    stride /= sizeof(pixel); \
+    stride >>= sizeof(pixel)-1; \
     offset <<= (BIT_DEPTH-8); \
     offset = ((offset + 1) | 1) << log2_denom; \
     for (y = 0; y < height; y++, dst += stride, src += stride) { \
@@ -101,12 +101,12 @@ H264_WEIGHT(2)
 #undef op_scale2
 #undef H264_WEIGHT
 
-static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
 {
-    pixel *pix = (pixel*)_pix;
+    pixel *pix = (pixel*)p_pix;
     int i, d;
-    xstride /= sizeof(pixel);
-    ystride /= sizeof(pixel);
+    xstride >>= sizeof(pixel)-1;
+    ystride >>= sizeof(pixel)-1;
     alpha <<= BIT_DEPTH - 8;
     beta  <<= BIT_DEPTH - 8;
     for( i = 0; i < 4; i++ ) {
@@ -162,12 +162,12 @@ static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int a
     FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
 }
 
-static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
 {
-    pixel *pix = (pixel*)_pix;
+    pixel *pix = (pixel*)p_pix;
     int d;
-    xstride /= sizeof(pixel);
-    ystride /= sizeof(pixel);
+    xstride >>= sizeof(pixel)-1;
+    ystride >>= sizeof(pixel)-1;
     alpha <<= BIT_DEPTH - 8;
     beta  <<= BIT_DEPTH - 8;
     for( d = 0; d < 4 * inner_iters; d++ ) {
@@ -228,14 +228,14 @@ static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride,
     FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta);
 }
 
-static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta, int8_t *tc0)
 {
-    pixel *pix = (pixel*)_pix;
+    pixel *pix = (pixel*)p_pix;
     int i, d;
-    xstride /= sizeof(pixel);
-    ystride /= sizeof(pixel);
     alpha <<= BIT_DEPTH - 8;
     beta  <<= BIT_DEPTH - 8;
+    xstride >>= sizeof(pixel)-1;
+    ystride >>= sizeof(pixel)-1;
     for( i = 0; i < 4; i++ ) {
         const int tc = ((tc0[i] - 1) << (BIT_DEPTH - 8)) + 1;
         if( tc <= 0 ) {
@@ -282,12 +282,12 @@ static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride,
     FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0);
 }
 
-static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
+static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *p_pix, int xstride, int ystride, int inner_iters, int alpha, int beta)
 {
-    pixel *pix = (pixel*)_pix;
+    pixel *pix = (pixel*)p_pix;
     int d;
-    xstride /= sizeof(pixel);
-    ystride /= sizeof(pixel);
+    xstride >>= sizeof(pixel)-1;
+    ystride >>= sizeof(pixel)-1;
     alpha <<= BIT_DEPTH - 8;
     beta  <<= BIT_DEPTH - 8;
     for( d = 0; d < 4 * inner_iters; d++ ) {
diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
index ea08d03..6a771af 100644
--- a/libavcodec/h264idct.c
+++ b/libavcodec/h264idct.c
@@ -2,20 +2,20 @@
  * H.264 IDCT
  * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,3 +38,11 @@
 #define BIT_DEPTH 10
 #include "h264idct_template.c"
 #undef BIT_DEPTH
+
+#define BIT_DEPTH 12
+#include "h264idct_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 14
+#include "h264idct_template.c"
+#undef BIT_DEPTH
diff --git a/libavcodec/h264idct.h b/libavcodec/h264idct.h
index 816a825..17e0051 100644
--- a/libavcodec/h264idct.h
+++ b/libavcodec/h264idct.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,5 +38,7 @@ void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(int16_t *block, int qmul);
 H264_IDCT( 8)
 H264_IDCT( 9)
 H264_IDCT(10)
+H264_IDCT(12)
+H264_IDCT(14)
 
 #endif /* AVCODEC_H264IDCT_H */
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index 83c2a95..abf888e 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -2,20 +2,20 @@
  * H.264 IDCT
  * Copyright (c) 2004-2011 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,7 +35,7 @@ void FUNCC(ff_h264_idct_add)(uint8_t *_dst, int16_t *_block, int stride)
     int i;
     pixel *dst = (pixel*)_dst;
     dctcoef *block = (dctcoef*)_block;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     block[0] += 1 << 5;
 
@@ -70,7 +70,7 @@ void FUNCC(ff_h264_idct8_add)(uint8_t *_dst, int16_t *_block, int stride){
     int i;
     pixel *dst = (pixel*)_dst;
     dctcoef *block = (dctcoef*)_block;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     block[0] += 32;
 
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index b782a25..044fc90 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "h264pred.h"
@@ -42,6 +43,14 @@
 #include "h264pred_template.c"
 #undef BIT_DEPTH
 
+#define BIT_DEPTH 12
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 14
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
 static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright,
                                    ptrdiff_t stride)
 {
@@ -401,7 +410,7 @@ static void pred8x8_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
  */
 av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id,
                                const int bit_depth,
-                               const int chroma_format_idc)
+                               int chroma_format_idc)
 {
 #undef FUNC
 #undef FUNCC
@@ -552,6 +561,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id,
     h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add          , depth);\
     h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add           , depth);\
     h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add         , depth);\
+    h->pred8x8l_filter_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_filter_add           , depth);\
+    h->pred8x8l_filter_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_filter_add         , depth);\
     if (chroma_format_idc <= 1) {\
     h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add            , depth);\
     h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add          , depth);\
@@ -569,7 +580,14 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id,
         case 10:
             H264_PRED(10)
             break;
+        case 12:
+            H264_PRED(12)
+            break;
+        case 14:
+            H264_PRED(14)
+            break;
         default:
+            av_assert0(bit_depth<=8);
             H264_PRED(8)
             break;
     }
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index 5c4ef17..6708292 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -101,6 +101,8 @@ typedef struct H264PredContext {
                           int16_t *block /*align 16*/, ptrdiff_t stride);
     void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
                            int16_t *block /*align 16*/, ptrdiff_t stride);
+    void(*pred8x8l_filter_add[2])(uint8_t *pix /*align  8*/,
+                           int16_t *block /*align 16*/, int topleft, int topright, ptrdiff_t stride);
     void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
                           const int *block_offset,
                           int16_t *block /*align 16*/, ptrdiff_t stride);
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 48baec8..f684433 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,7 +35,7 @@ static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
                                     ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a= AV_RN4PA(src-stride);
 
     AV_WN4PA(src+0*stride, a);
@@ -48,7 +48,7 @@ static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
                                       ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
     AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
     AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
@@ -59,7 +59,7 @@ static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
                               ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
                    + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
     const pixel4 a = PIXEL_SPLAT_X4(dc);
@@ -74,7 +74,7 @@ static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
                                    ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
     const pixel4 a = PIXEL_SPLAT_X4(dc);
 
@@ -88,7 +88,7 @@ static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
                                   ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
     const pixel4 a = PIXEL_SPLAT_X4(dc);
 
@@ -102,7 +102,7 @@ static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
                                   ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
 
     AV_WN4PA(src+0*stride, a);
@@ -115,7 +115,7 @@ static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright,
                                   ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
 
     AV_WN4PA(src+0*stride, a);
@@ -128,7 +128,7 @@ static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright,
                                   ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
 
     AV_WN4PA(src+0*stride, a);
@@ -166,7 +166,7 @@ static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
                                       ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const int lt= src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
@@ -194,7 +194,7 @@ static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
 {
     pixel *src = (pixel*)_src;
     const pixel *topright = (const pixel*)_topright;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 //    LOAD_LEFT_EDGE
@@ -222,7 +222,7 @@ static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
                                           ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const int lt= src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
@@ -251,7 +251,7 @@ static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
 {
     pixel *src = (pixel*)_src;
     const pixel *topright = (const pixel*)_topright;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 
@@ -277,7 +277,7 @@ static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
                                          ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     LOAD_LEFT_EDGE
 
     src[0+0*stride]=(l0 + l1 + 1)>>1;
@@ -303,7 +303,7 @@ static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
                                            ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const int lt= src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
@@ -330,7 +330,7 @@ static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
 {
     int i;
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
     const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
     const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
@@ -348,7 +348,7 @@ static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
 {
     int i;
     pixel *src = (pixel*)_src;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     for(i=0; i<16; i++){
         const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
@@ -374,7 +374,7 @@ static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
     int i, dc=0;
     pixel *src = (pixel*)_src;
     pixel4 dcsplat;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     for(i=0;i<16; i++){
         dc+= src[-1+i*stride];
@@ -393,7 +393,7 @@ static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
     int i, dc=0;
     pixel *src = (pixel*)_src;
     pixel4 dcsplat;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     for(i=0;i<16; i++){
         dc+= src[-1+i*stride];
@@ -408,7 +408,7 @@ static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
     int i, dc=0;
     pixel *src = (pixel*)_src;
     pixel4 dcsplat;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     for(i=0;i<16; i++){
         dc+= src[i-stride];
@@ -423,7 +423,7 @@ static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
 {\
     int i;\
     pixel *src = (pixel*)_src;\
-    stride /= sizeof(pixel);\
+    stride >>= sizeof(pixel)-1;\
     PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
 }
 
@@ -440,7 +440,7 @@ static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
   int a;
   INIT_CLIP
   pixel *src = (pixel*)_src;
-  int stride = _stride/sizeof(pixel);
+  int stride = _stride>>(sizeof(pixel)-1);
   const pixel * const src0 = src +7-stride;
   const pixel *       src1 = src +8*stride-1;
   const pixel *       src2 = src1-2*stride;    // == src+6*stride-1;
@@ -489,7 +489,7 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
 {
     int i;
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
     const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
 
@@ -517,7 +517,7 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
 {
     int i;
     pixel *src = (pixel*)_src;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     for(i=0; i<8; i++){
         const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
@@ -544,7 +544,7 @@ static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
     int i;\
     const pixel4 a = PIXEL_SPLAT_X4(v);\
     pixel *src = (pixel*)_src;\
-    stride /= sizeof(pixel);\
+    stride >>= sizeof(pixel)-1;\
     for(i=0; i<8; i++){\
         AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
         AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
@@ -567,7 +567,7 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
     int dc0, dc2;
     pixel4 dc0splat, dc2splat;
     pixel *src = (pixel*)_src;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     dc0=dc2=0;
     for(i=0;i<4; i++){
@@ -599,7 +599,7 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
     int dc0, dc1;
     pixel4 dc0splat, dc1splat;
     pixel *src = (pixel*)_src;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     dc0=dc1=0;
     for(i=0;i<4; i++){
@@ -647,7 +647,7 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
     int dc0, dc1, dc2;
     pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
     pixel *src = (pixel*)_src;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
 
     dc0=dc1=dc2=0;
     for(i=0;i<4; i++){
@@ -713,6 +713,7 @@ static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
     }
 }
 
+//the following 4 function should not be optimized!
 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
 {
     FUNCC(pred8x8_top_dc)(src, stride);
@@ -771,7 +772,7 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
   int a;
   INIT_CLIP
   pixel *src = (pixel*)_src;
-  int stride = _stride/sizeof(pixel);
+  int stride = _stride>>(sizeof(pixel)-1);
   const pixel * const src0 = src +3-stride;
   const pixel *       src1 = src +4*stride-1;
   const pixel *       src2 = src1-2*stride;    // == src+2*stride-1;
@@ -885,7 +886,7 @@ static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
                                    int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
 
     PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
 }
@@ -893,7 +894,7 @@ static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
                                     int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
 
     PREDICT_8x8_LOAD_LEFT;
     const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
@@ -903,7 +904,7 @@ static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
                                    int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
 
     PREDICT_8x8_LOAD_TOP;
     const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
@@ -913,7 +914,7 @@ static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
                                int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
 
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOP;
@@ -925,7 +926,7 @@ static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
                                        int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     pixel4 a;
 
     PREDICT_8x8_LOAD_LEFT;
@@ -940,7 +941,7 @@ static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
 {
     int y;
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     pixel4 a, b;
 
     PREDICT_8x8_LOAD_TOP;
@@ -963,7 +964,7 @@ static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
                                       int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_TOPRIGHT;
     SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
@@ -986,7 +987,7 @@ static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
                                        int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -1010,7 +1011,7 @@ static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
                                            int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -1041,7 +1042,7 @@ static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
                                             int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_LEFT;
     PREDICT_8x8_LOAD_TOPLEFT;
@@ -1072,7 +1073,7 @@ static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
                                           int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_TOP;
     PREDICT_8x8_LOAD_TOPRIGHT;
     SRC(0,0)= (t0 + t1 + 1) >> 1;
@@ -1102,7 +1103,7 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
                                           int has_topright, ptrdiff_t _stride)
 {
     pixel *src = (pixel*)_src;
-    int stride = _stride/sizeof(pixel);
+    int stride = _stride>>(sizeof(pixel)-1);
     PREDICT_8x8_LOAD_LEFT;
     SRC(0,0)= (l0 + l1 + 1) >> 1;
     SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
@@ -1123,6 +1124,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
     SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
     SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
 }
+
+static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
+                                     int has_topright, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    const dctcoef *block = (const dctcoef*)_block;
+    pixel pix[8];
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_TOP;
+
+    pix[0] = t0;
+    pix[1] = t1;
+    pix[2] = t2;
+    pix[3] = t3;
+    pix[4] = t4;
+    pix[5] = t5;
+    pix[6] = t6;
+    pix[7] = t7;
+
+    for(i=0; i<8; i++){
+        pixel v = pix[i];
+        src[0*stride]= v += block[0];
+        src[1*stride]= v += block[8];
+        src[2*stride]= v += block[16];
+        src[3*stride]= v += block[24];
+        src[4*stride]= v += block[32];
+        src[5*stride]= v += block[40];
+        src[6*stride]= v += block[48];
+        src[7*stride]= v +  block[56];
+        src++;
+        block++;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
+static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
+                               int has_topright, ptrdiff_t _stride)
+{
+    int i;
+    pixel *src = (pixel*)_src;
+    const dctcoef *block = (const dctcoef*)_block;
+    pixel pix[8];
+    int stride = _stride>>(sizeof(pixel)-1);
+    PREDICT_8x8_LOAD_LEFT;
+
+    pix[0] = l0;
+    pix[1] = l1;
+    pix[2] = l2;
+    pix[3] = l3;
+    pix[4] = l4;
+    pix[5] = l5;
+    pix[6] = l6;
+    pix[7] = l7;
+
+    for(i=0; i<8; i++){
+        pixel v = pix[i];
+        src[0]= v += block[0];
+        src[1]= v += block[1];
+        src[2]= v += block[2];
+        src[3]= v += block[3];
+        src[4]= v += block[4];
+        src[5]= v += block[5];
+        src[6]= v += block[6];
+        src[7]= v +  block[7];
+        src+= stride;
+        block+= 8;
+    }
+
+    memset(_block, 0, sizeof(dctcoef) * 64);
+}
+
 #undef PREDICT_8x8_LOAD_LEFT
 #undef PREDICT_8x8_LOAD_TOP
 #undef PREDICT_8x8_LOAD_TOPLEFT
@@ -1139,7 +1213,7 @@ static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
     int i;
     pixel *pix = (pixel*)_pix;
     const dctcoef *block = (const dctcoef*)_block;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
     pix -= stride;
     for(i=0; i<4; i++){
         pixel v = pix[0];
@@ -1160,7 +1234,7 @@ static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
     int i;
     pixel *pix = (pixel*)_pix;
     const dctcoef *block = (const dctcoef*)_block;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
     for(i=0; i<4; i++){
         pixel v = pix[-1];
         pix[0]= v += block[0];
@@ -1180,7 +1254,7 @@ static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
     int i;
     pixel *pix = (pixel*)_pix;
     const dctcoef *block = (const dctcoef*)_block;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
     pix -= stride;
     for(i=0; i<8; i++){
         pixel v = pix[0];
@@ -1205,7 +1279,7 @@ static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
     int i;
     pixel *pix = (pixel*)_pix;
     const dctcoef *block = (const dctcoef*)_block;
-    stride /= sizeof(pixel);
+    stride >>= sizeof(pixel)-1;
     for(i=0; i<8; i++){
         pixel v = pix[-1];
         pix[0]= v += block[0];
diff --git a/libavcodec/h264qpel.c b/libavcodec/h264qpel.c
index ec46da2..5f1bfa3 100644
--- a/libavcodec/h264qpel.c
+++ b/libavcodec/h264qpel.c
@@ -2,26 +2,27 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
 #include "h264qpel.h"
 
+#define pixeltmp int16_t
 #define BIT_DEPTH 8
 #include "h264qpel_template.c"
 #undef BIT_DEPTH
@@ -33,6 +34,17 @@
 #define BIT_DEPTH 10
 #include "h264qpel_template.c"
 #undef BIT_DEPTH
+#undef pixeltmp
+
+#define pixeltmp int32_t
+#define BIT_DEPTH 12
+#include "h264qpel_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 14
+#include "h264qpel_template.c"
+#undef BIT_DEPTH
+
 
 av_cold void ff_h264qpel_init(H264QpelContext *c, int bit_depth)
 {
@@ -76,6 +88,12 @@ av_cold void ff_h264qpel_init(H264QpelContext *c, int bit_depth)
     case 10:
         SET_QPEL(10);
         break;
+    case 12:
+        SET_QPEL(12);
+        break;
+    case 14:
+        SET_QPEL(14);
+        break;
     }
 
     if (ARCH_AARCH64)
diff --git a/libavcodec/h264qpel.h b/libavcodec/h264qpel.h
index 97ce195..d71130d 100644
--- a/libavcodec/h264qpel.h
+++ b/libavcodec/h264qpel.h
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/h264qpel_template.c b/libavcodec/h264qpel_template.c
index 7dd901c..fee4f11 100644
--- a/libavcodec/h264qpel_template.c
+++ b/libavcodec/h264qpel_template.c
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -75,14 +75,14 @@ static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstS
 }
 
 #define H264_LOWPASS(OPNAME, OP, OP2) \
-static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, const uint8_t *_src, int dstStride, int srcStride){\
+static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, const uint8_t *p_src, int dstStride, int srcStride){\
     const int h=2;\
     INIT_CLIP\
     int i;\
-    pixel *dst = (pixel*)_dst;\
-    const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    pixel *dst = (pixel*)p_dst;\
+    const pixel *src = (const pixel*)p_src;\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     for(i=0; i<h; i++)\
     {\
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
@@ -98,8 +98,8 @@ static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, const
     int i;\
     pixel *dst = (pixel*)_dst;\
     const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     for(i=0; i<w; i++)\
     {\
         const int srcB= src[-2*srcStride];\
@@ -116,16 +116,16 @@ static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, const
     }\
 }\
 \
-static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
     const int h=2;\
     const int w=2;\
-    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+    const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
     INIT_CLIP\
     int i;\
     pixel *dst = (pixel*)_dst;\
     const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     src -= 2*srcStride;\
     for(i=0; i<h+5; i++)\
     {\
@@ -156,8 +156,8 @@ static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, const uint8_t *_
     int i;\
     pixel *dst = (pixel*)_dst;\
     const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     for(i=0; i<h; i++)\
     {\
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
@@ -175,8 +175,8 @@ static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, const uint8_t *_
     int i;\
     pixel *dst = (pixel*)_dst;\
     const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     for(i=0; i<w; i++)\
     {\
         const int srcB= src[-2*srcStride];\
@@ -197,16 +197,16 @@ static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, const uint8_t *_
     }\
 }\
 \
-static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
     const int h=4;\
     const int w=4;\
-    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+    const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
     INIT_CLIP\
     int i;\
     pixel *dst = (pixel*)_dst;\
     const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     src -= 2*srcStride;\
     for(i=0; i<h+5; i++)\
     {\
@@ -244,8 +244,8 @@ static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, const uint8_t *_
     int i;\
     pixel *dst = (pixel*)_dst;\
     const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     for(i=0; i<h; i++)\
     {\
         OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
@@ -267,8 +267,8 @@ static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, const uint8_t *_
     int i;\
     pixel *dst = (pixel*)_dst;\
     const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     for(i=0; i<w; i++)\
     {\
         const int srcB= src[-2*srcStride];\
@@ -297,16 +297,16 @@ static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, const uint8_t *_
     }\
 }\
 \
-static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, pixeltmp *tmp, const uint8_t *_src, int dstStride, int tmpStride, int srcStride){\
     const int h=8;\
     const int w=8;\
-    const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
+    const int pad = (BIT_DEPTH == 10) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\
     INIT_CLIP\
     int i;\
     pixel *dst = (pixel*)_dst;\
     const pixel *src = (const pixel*)_src;\
-    dstStride /= sizeof(pixel);\
-    srcStride /= sizeof(pixel);\
+    dstStride >>= sizeof(pixel)-1;\
+    srcStride >>= sizeof(pixel)-1;\
     src -= 2*srcStride;\
     for(i=0; i<h+5; i++)\
     {\
@@ -368,7 +368,7 @@ static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, const uint8_t *s
     FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\
 }\
 \
-static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, pixeltmp *tmp, const uint8_t *src, int dstStride, int tmpStride, int srcStride){\
     FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst                , tmp  , src                , dstStride, tmpStride, srcStride);\
     FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\
     src += 8*srcStride;\
@@ -480,13 +480,13 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, const uint
 \
 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
 {\
-    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
     FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\
 }\
 \
 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
 {\
-    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
     uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
     uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
     FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\
@@ -496,7 +496,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, const uint
 \
 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
 {\
-    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
     uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\
     uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
     FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\
@@ -508,7 +508,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, const uint
 {\
     uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
     uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
-    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
     uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
     uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
     FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel),  stride, SIZE + 5);\
@@ -521,7 +521,7 @@ static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, const uint
 {\
     uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\
     uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\
-    int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
+    pixeltmp tmp[SIZE*(SIZE+5)*sizeof(pixel)];\
     uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\
     uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\
     FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel),  stride, SIZE + 5);\
diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index 20d2878..389f3ec 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -1,28 +1,29 @@
 /*
- * HEVC video decoder
+ * HEVC video Decoder
  *
  * Copyright (C) 2012 - 2013 Guillaume Martres
  * Copyright (C) 2012 - 2013 Mickael Raulet
  * Copyright (C) 2012 - 2013 Gildas Cocherel
  * Copyright (C) 2012 - 2013 Wassim Hamidouche
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/atomic.h"
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
 #include "libavutil/display.h"
@@ -38,119 +39,7 @@
 #include "golomb.h"
 #include "hevc.h"
 
-const uint8_t ff_hevc_qpel_extra_before[4] = { 0, 3, 3, 2 };
-const uint8_t ff_hevc_qpel_extra_after[4]  = { 0, 3, 4, 4 };
-const uint8_t ff_hevc_qpel_extra[4]        = { 0, 6, 7, 6 };
-
-static const uint8_t scan_1x1[1] = { 0 };
-
-static const uint8_t horiz_scan2x2_x[4] = { 0, 1, 0, 1 };
-
-static const uint8_t horiz_scan2x2_y[4] = { 0, 0, 1, 1 };
-
-static const uint8_t horiz_scan4x4_x[16] = {
-    0, 1, 2, 3,
-    0, 1, 2, 3,
-    0, 1, 2, 3,
-    0, 1, 2, 3,
-};
-
-static const uint8_t horiz_scan4x4_y[16] = {
-    0, 0, 0, 0,
-    1, 1, 1, 1,
-    2, 2, 2, 2,
-    3, 3, 3, 3,
-};
-
-static const uint8_t horiz_scan8x8_inv[8][8] = {
-    {  0,  1,  2,  3, 16, 17, 18, 19, },
-    {  4,  5,  6,  7, 20, 21, 22, 23, },
-    {  8,  9, 10, 11, 24, 25, 26, 27, },
-    { 12, 13, 14, 15, 28, 29, 30, 31, },
-    { 32, 33, 34, 35, 48, 49, 50, 51, },
-    { 36, 37, 38, 39, 52, 53, 54, 55, },
-    { 40, 41, 42, 43, 56, 57, 58, 59, },
-    { 44, 45, 46, 47, 60, 61, 62, 63, },
-};
-
-static const uint8_t diag_scan2x2_x[4] = { 0, 0, 1, 1 };
-
-static const uint8_t diag_scan2x2_y[4] = { 0, 1, 0, 1 };
-
-static const uint8_t diag_scan2x2_inv[2][2] = {
-    { 0, 2, },
-    { 1, 3, },
-};
-
-const uint8_t ff_hevc_diag_scan4x4_x[16] = {
-    0, 0, 1, 0,
-    1, 2, 0, 1,
-    2, 3, 1, 2,
-    3, 2, 3, 3,
-};
-
-const uint8_t ff_hevc_diag_scan4x4_y[16] = {
-    0, 1, 0, 2,
-    1, 0, 3, 2,
-    1, 0, 3, 2,
-    1, 3, 2, 3,
-};
-
-static const uint8_t diag_scan4x4_inv[4][4] = {
-    { 0,  2,  5,  9, },
-    { 1,  4,  8, 12, },
-    { 3,  7, 11, 14, },
-    { 6, 10, 13, 15, },
-};
-
-const uint8_t ff_hevc_diag_scan8x8_x[64] = {
-    0, 0, 1, 0,
-    1, 2, 0, 1,
-    2, 3, 0, 1,
-    2, 3, 4, 0,
-    1, 2, 3, 4,
-    5, 0, 1, 2,
-    3, 4, 5, 6,
-    0, 1, 2, 3,
-    4, 5, 6, 7,
-    1, 2, 3, 4,
-    5, 6, 7, 2,
-    3, 4, 5, 6,
-    7, 3, 4, 5,
-    6, 7, 4, 5,
-    6, 7, 5, 6,
-    7, 6, 7, 7,
-};
-
-const uint8_t ff_hevc_diag_scan8x8_y[64] = {
-    0, 1, 0, 2,
-    1, 0, 3, 2,
-    1, 0, 4, 3,
-    2, 1, 0, 5,
-    4, 3, 2, 1,
-    0, 6, 5, 4,
-    3, 2, 1, 0,
-    7, 6, 5, 4,
-    3, 2, 1, 0,
-    7, 6, 5, 4,
-    3, 2, 1, 7,
-    6, 5, 4, 3,
-    2, 7, 6, 5,
-    4, 3, 7, 6,
-    5, 4, 7, 6,
-    5, 7, 6, 7,
-};
-
-static const uint8_t diag_scan8x8_inv[8][8] = {
-    {  0,  2,  5,  9, 14, 20, 27, 35, },
-    {  1,  4,  8, 13, 19, 26, 34, 42, },
-    {  3,  7, 12, 18, 25, 33, 41, 48, },
-    {  6, 11, 17, 24, 32, 40, 47, 53, },
-    { 10, 16, 23, 31, 39, 46, 52, 57, },
-    { 15, 22, 30, 38, 45, 51, 56, 60, },
-    { 21, 29, 37, 44, 50, 55, 59, 62, },
-    { 28, 36, 43, 49, 54, 58, 61, 63, },
-};
+const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
 
 /**
  * NOTE: Each function hls_foo correspond to the function foo in the
@@ -181,6 +70,10 @@ static void pic_arrays_free(HEVCContext *s)
     av_freep(&s->horizontal_bs);
     av_freep(&s->vertical_bs);
 
+    av_freep(&s->sh.entry_point_offset);
+    av_freep(&s->sh.size);
+    av_freep(&s->sh.offset);
+
     av_buffer_pool_uninit(&s->tab_mvf_pool);
     av_buffer_pool_uninit(&s->rpl_tab_pool);
 }
@@ -196,40 +89,40 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps)
     int ctb_count        = sps->ctb_width * sps->ctb_height;
     int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 
-    s->bs_width  = width  >> 3;
-    s->bs_height = height >> 3;
+    s->bs_width  = (width  >> 2) + 1;
+    s->bs_height = (height >> 2) + 1;
 
     s->sao           = av_mallocz_array(ctb_count, sizeof(*s->sao));
     s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
     if (!s->sao || !s->deblock)
         goto fail;
 
-    s->skip_flag    = av_malloc(pic_size_in_ctb);
-    s->tab_ct_depth = av_malloc(sps->min_cb_height * sps->min_cb_width);
+    s->skip_flag    = av_malloc(sps->min_cb_height * sps->min_cb_width);
+    s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
     if (!s->skip_flag || !s->tab_ct_depth)
         goto fail;
 
-    s->cbf_luma = av_malloc(sps->min_tb_width * sps->min_tb_height);
+    s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
     s->tab_ipm  = av_mallocz(min_pu_size);
-    s->is_pcm   = av_malloc(min_pu_size);
+    s->is_pcm   = av_malloc((sps->min_pu_width + 1) * (sps->min_pu_height + 1));
     if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
         goto fail;
 
     s->filter_slice_edges = av_malloc(ctb_count);
-    s->tab_slice_address  = av_malloc(pic_size_in_ctb *
+    s->tab_slice_address  = av_malloc_array(pic_size_in_ctb,
                                       sizeof(*s->tab_slice_address));
-    s->qp_y_tab           = av_malloc(pic_size_in_ctb *
+    s->qp_y_tab           = av_malloc_array(pic_size_in_ctb,
                                       sizeof(*s->qp_y_tab));
     if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
         goto fail;
 
-    s->horizontal_bs = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
-    s->vertical_bs   = av_mallocz(2 * s->bs_width * (s->bs_height + 1));
+    s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
+    s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
     if (!s->horizontal_bs || !s->vertical_bs)
         goto fail;
 
     s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
-                                          av_buffer_alloc);
+                                          av_buffer_allocz);
     s->rpl_tab_pool = av_buffer_pool_init(ctb_count * sizeof(RefPicListTab),
                                           av_buffer_allocz);
     if (!s->tab_mvf_pool || !s->rpl_tab_pool)
@@ -254,7 +147,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
     s->sh.luma_log2_weight_denom = get_ue_golomb_long(gb);
     if (s->sps->chroma_format_idc != 0) {
         int delta = get_se_golomb(gb);
-        s->sh.chroma_log2_weight_denom = av_clip_c(s->sh.luma_log2_weight_denom + delta, 0, 7);
+        s->sh.chroma_log2_weight_denom = av_clip(s->sh.luma_log2_weight_denom + delta, 0, 7);
     }
 
     for (i = 0; i < s->sh.nb_refs[L0]; i++) {
@@ -264,7 +157,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
             s->sh.luma_offset_l0[i] = 0;
         }
     }
-    if (s->sps->chroma_format_idc != 0) { // FIXME: invert "if" and "for"
+    if (s->sps->chroma_format_idc != 0) {
         for (i = 0; i < s->sh.nb_refs[L0]; i++)
             chroma_weight_l0_flag[i] = get_bits1(gb);
     } else {
@@ -282,7 +175,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
                 int delta_chroma_weight_l0 = get_se_golomb(gb);
                 int delta_chroma_offset_l0 = get_se_golomb(gb);
                 s->sh.chroma_weight_l0[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l0;
-                s->sh.chroma_offset_l0[i][j] = av_clip_c((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
+                s->sh.chroma_offset_l0[i][j] = av_clip((delta_chroma_offset_l0 - ((128 * s->sh.chroma_weight_l0[i][j])
                                                                                     >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
             }
         } else {
@@ -318,7 +211,7 @@ static void pred_weight_table(HEVCContext *s, GetBitContext *gb)
                     int delta_chroma_weight_l1 = get_se_golomb(gb);
                     int delta_chroma_offset_l1 = get_se_golomb(gb);
                     s->sh.chroma_weight_l1[i][j] = (1 << s->sh.chroma_log2_weight_denom) + delta_chroma_weight_l1;
-                    s->sh.chroma_offset_l1[i][j] = av_clip_c((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
+                    s->sh.chroma_offset_l1[i][j] = av_clip((delta_chroma_offset_l1 - ((128 * s->sh.chroma_weight_l1[i][j])
                                                                                         >> s->sh.chroma_log2_weight_denom) + 128), -128, 127);
                 }
             } else {
@@ -347,7 +240,7 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
         nb_sps = get_ue_golomb_long(gb);
     nb_sh = get_ue_golomb_long(gb);
 
-    if (nb_sh + nb_sps > FF_ARRAY_ELEMS(rps->poc))
+    if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
         return AVERROR_INVALIDDATA;
 
     rps->nb_refs = nb_sh + nb_sps;
@@ -383,6 +276,24 @@ static int decode_lt_rps(HEVCContext *s, LongTermRPS *rps, GetBitContext *gb)
     return 0;
 }
 
+static int get_buffer_sao(HEVCContext *s, AVFrame *frame, const HEVCSPS *sps)
+{
+    int ret, i;
+
+    frame->width  = s->avctx->width  + 2;
+    frame->height = s->avctx->height + 2;
+    if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+    for (i = 0; frame->data[i]; i++) {
+        int offset = frame->linesize[i] + (1 << sps->pixel_shift);
+        frame->data[i] += offset;
+    }
+    frame->width  = s->avctx->width;
+    frame->height = s->avctx->height;
+
+    return 0;
+}
+
 static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 {
     int ret;
@@ -424,10 +335,8 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps)
 
     if (sps->sao_enabled) {
         av_frame_unref(s->tmp_frame);
-        ret = ff_get_buffer(s->avctx, s->tmp_frame, AV_GET_BUFFER_FLAG_REF);
-        if (ret < 0)
-            goto fail;
-        s->frame = s->tmp_frame;
+        ret = get_buffer_sao(s, s->tmp_frame, sps);
+        s->sao_frame = s->tmp_frame;
     }
 
     s->sps = sps;
@@ -455,9 +364,9 @@ fail:
 
 static int hls_slice_header(HEVCContext *s)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     SliceHeader *sh   = &s->sh;
-    int i, ret;
+    int i, j, ret;
 
     // Coded parameters
     sh->first_slice_in_pic_flag = get_bits1(gb);
@@ -467,6 +376,7 @@ static int hls_slice_header(HEVCContext *s)
         if (IS_IDR(s))
             ff_hevc_clear_refs(s);
     }
+    sh->no_output_of_prior_pics_flag = 0;
     if (IS_IRAP(s))
         sh->no_output_of_prior_pics_flag = get_bits1(gb);
 
@@ -481,10 +391,18 @@ static int hls_slice_header(HEVCContext *s)
         return AVERROR_INVALIDDATA;
     }
     s->pps = (HEVCPPS*)s->pps_list[sh->pps_id]->data;
+    if (s->nal_unit_type == NAL_CRA_NUT && s->last_eos == 1)
+        sh->no_output_of_prior_pics_flag = 1;
 
     if (s->sps != (HEVCSPS*)s->sps_list[s->pps->sps_id]->data) {
+        const HEVCSPS* last_sps = s->sps;
         s->sps = (HEVCSPS*)s->sps_list[s->pps->sps_id]->data;
-
+        if (last_sps && IS_IRAP(s) && s->nal_unit_type != NAL_CRA_NUT) {
+            if (s->sps->width !=  last_sps->width || s->sps->height != last_sps->height ||
+                s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering !=
+                last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
+                sh->no_output_of_prior_pics_flag = 0;
+        }
         ff_hevc_clear_refs(s);
         ret = set_sps(s, s->sps);
         if (ret < 0)
@@ -713,6 +631,11 @@ static int hls_slice_header(HEVCContext *s)
             sh->slice_cr_qp_offset = 0;
         }
 
+        if (s->pps->chroma_qp_offset_list_enabled_flag)
+            sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
+        else
+            sh->cu_chroma_qp_offset_enabled_flag = 0;
+
         if (s->pps->deblocking_filter_control_present_flag) {
             int deblocking_filter_override_flag = 0;
 
@@ -754,20 +677,52 @@ static int hls_slice_header(HEVCContext *s)
         sh->num_entry_point_offsets = get_ue_golomb_long(gb);
         if (sh->num_entry_point_offsets > 0) {
             int offset_len = get_ue_golomb_long(gb) + 1;
-
-            for (i = 0; i < sh->num_entry_point_offsets; i++)
-                skip_bits(gb, offset_len);
-        }
+            int segments = offset_len >> 4;
+            int rest = (offset_len & 15);
+            av_freep(&sh->entry_point_offset);
+            av_freep(&sh->offset);
+            av_freep(&sh->size);
+            sh->entry_point_offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
+            sh->offset = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
+            sh->size = av_malloc_array(sh->num_entry_point_offsets, sizeof(int));
+            if (!sh->entry_point_offset || !sh->offset || !sh->size) {
+                sh->num_entry_point_offsets = 0;
+                av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
+                return AVERROR(ENOMEM);
+            }
+            for (i = 0; i < sh->num_entry_point_offsets; i++) {
+                int val = 0;
+                for (j = 0; j < segments; j++) {
+                    val <<= 16;
+                    val += get_bits(gb, 16);
+                }
+                if (rest) {
+                    val <<= rest;
+                    val += get_bits(gb, rest);
+                }
+                sh->entry_point_offset[i] = val + 1; // +1; // +1 to get the size
+            }
+            if (s->threads_number > 1 && (s->pps->num_tile_rows > 1 || s->pps->num_tile_columns > 1)) {
+                s->enable_parallel_tiles = 0; // TODO: you can enable tiles in parallel here
+                s->threads_number = 1;
+            } else
+                s->enable_parallel_tiles = 0;
+        } else
+            s->enable_parallel_tiles = 0;
     }
 
     if (s->pps->slice_header_extension_present_flag) {
         unsigned int length = get_ue_golomb_long(gb);
+        if (length*8LL > get_bits_left(gb)) {
+            av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
+            return AVERROR_INVALIDDATA;
+        }
         for (i = 0; i < length; i++)
             skip_bits(gb, 8);  // slice_header_extension_data_byte
     }
 
     // Inferred parameters
-    sh->slice_qp = 26 + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
+    sh->slice_qp = 26U + s->pps->pic_init_qp_minus26 + sh->slice_qp_delta;
     if (sh->slice_qp > 51 ||
         sh->slice_qp < -s->sps->qp_bd_offset) {
         av_log(s->avctx, AV_LOG_ERROR,
@@ -785,13 +740,14 @@ static int hls_slice_header(HEVCContext *s)
         return AVERROR_INVALIDDATA;
     }
 
-    s->HEVClc.first_qp_group = !s->sh.dependent_slice_segment_flag;
+    s->HEVClc->first_qp_group = !s->sh.dependent_slice_segment_flag;
 
     if (!s->pps->cu_qp_delta_enabled_flag)
-        s->HEVClc.qp_y = FFUMOD(s->sh.slice_qp + 52 + 2 * s->sps->qp_bd_offset,
-                                52 + s->sps->qp_bd_offset) - s->sps->qp_bd_offset;
+        s->HEVClc->qp_y = s->sh.slice_qp;
 
     s->slice_initialized = 1;
+    s->HEVClc->tu.cu_qp_offset_cb = 0;
+    s->HEVClc->tu.cu_qp_offset_cr = 0;
 
     return 0;
 }
@@ -812,10 +768,9 @@ do {                                                    \
 
 static void hls_sao_param(HEVCContext *s, int rx, int ry)
 {
-    HEVCLocalContext *lc    = &s->HEVClc;
+    HEVCLocalContext *lc    = s->HEVClc;
     int sao_merge_left_flag = 0;
     int sao_merge_up_flag   = 0;
-    int shift               = s->sps->bit_depth - FFMIN(s->sps->bit_depth, 10);
     SAOParams *sao          = &CTB(s->sao, rx, ry);
     int c_idx, i;
 
@@ -832,6 +787,9 @@ static void hls_sao_param(HEVCContext *s, int rx, int ry)
     }
 
     for (c_idx = 0; c_idx < 3; c_idx++) {
+        int log2_sao_offset_scale = c_idx == 0 ? s->pps->log2_sao_offset_scale_luma :
+                                                 s->pps->log2_sao_offset_scale_chroma;
+
         if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
             sao->type_idx[c_idx] = SAO_NOT_APPLIED;
             continue;
@@ -867,13 +825,14 @@ static void hls_sao_param(HEVCContext *s, int rx, int ry)
         // Inferred parameters
         sao->offset_val[c_idx][0] = 0;
         for (i = 0; i < 4; i++) {
-            sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i] << shift;
+            sao->offset_val[c_idx][i + 1] = sao->offset_abs[c_idx][i];
             if (sao->type_idx[c_idx] == SAO_EDGE) {
                 if (i > 1)
                     sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
             } else if (sao->offset_sign[c_idx][i]) {
                 sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
             }
+            sao->offset_val[c_idx][i + 1] <<= log2_sao_offset_scale;
         }
     }
 }
@@ -881,385 +840,46 @@ static void hls_sao_param(HEVCContext *s, int rx, int ry)
 #undef SET_SAO
 #undef CTB
 
-static void hls_residual_coding(HEVCContext *s, int x0, int y0,
-                                int log2_trafo_size, enum ScanType scan_idx,
-                                int c_idx)
-{
-#define GET_COORD(offset, n)                                    \
-    do {                                                        \
-        x_c = (scan_x_cg[offset >> 4] << 2) + scan_x_off[n];    \
-        y_c = (scan_y_cg[offset >> 4] << 2) + scan_y_off[n];    \
-    } while (0)
-    HEVCLocalContext *lc    = &s->HEVClc;
-    int transform_skip_flag = 0;
-
-    int last_significant_coeff_x, last_significant_coeff_y;
-    int last_scan_pos;
-    int n_end;
-    int num_coeff    = 0;
-    int greater1_ctx = 1;
-
-    int num_last_subset;
-    int x_cg_last_sig, y_cg_last_sig;
-
-    const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
-
-    ptrdiff_t stride = s->frame->linesize[c_idx];
-    int hshift       = s->sps->hshift[c_idx];
-    int vshift       = s->sps->vshift[c_idx];
-    uint8_t *dst     = &s->frame->data[c_idx][(y0 >> vshift) * stride +
-                                              ((x0 >> hshift) << s->sps->pixel_shift)];
-    DECLARE_ALIGNED(16, int16_t, coeffs[MAX_TB_SIZE * MAX_TB_SIZE]) = { 0 };
-    DECLARE_ALIGNED(8, uint8_t, significant_coeff_group_flag[8][8]) = { { 0 } };
-
-    int trafo_size = 1 << log2_trafo_size;
-    int i, qp, shift, add, scale, scale_m;
-    const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
-    const uint8_t *scale_matrix;
-    uint8_t dc_scale;
-
-    // Derive QP for dequant
-    if (!lc->cu.cu_transquant_bypass_flag) {
-        static const int qp_c[] = {
-            29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
-        };
-
-        static const uint8_t rem6[51 + 2 * 6 + 1] = {
-            0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
-            3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
-            0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
-        };
-
-        static const uint8_t div6[51 + 2 * 6 + 1] = {
-            0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,  3,  3,  3,
-            3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6,  6,  6,  6,
-            7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
-        };
-        int qp_y = lc->qp_y;
-
-        if (c_idx == 0) {
-            qp = qp_y + s->sps->qp_bd_offset;
-        } else {
-            int qp_i, offset;
-
-            if (c_idx == 1)
-                offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset;
-            else
-                offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset;
-
-            qp_i = av_clip_c(qp_y + offset, -s->sps->qp_bd_offset, 57);
-            if (qp_i < 30)
-                qp = qp_i;
-            else if (qp_i > 43)
-                qp = qp_i - 6;
-            else
-                qp = qp_c[qp_i - 30];
-
-            qp += s->sps->qp_bd_offset;
-        }
-
-        shift    = s->sps->bit_depth + log2_trafo_size - 5;
-        add      = 1 << (shift - 1);
-        scale    = level_scale[rem6[qp]] << (div6[qp]);
-        scale_m  = 16; // default when no custom scaling lists.
-        dc_scale = 16;
-
-        if (s->sps->scaling_list_enable_flag) {
-            const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
-                                    &s->pps->scaling_list : &s->sps->scaling_list;
-            int matrix_id = lc->cu.pred_mode != MODE_INTRA;
-
-            if (log2_trafo_size != 5)
-                matrix_id = 3 * matrix_id + c_idx;
-
-            scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
-            if (log2_trafo_size >= 4)
-                dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
-        }
-    }
-
-    if (s->pps->transform_skip_enabled_flag &&
-        !lc->cu.cu_transquant_bypass_flag   &&
-        log2_trafo_size == 2) {
-        transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
-    }
-
-    last_significant_coeff_x =
-        ff_hevc_last_significant_coeff_x_prefix_decode(s, c_idx, log2_trafo_size);
-    last_significant_coeff_y =
-        ff_hevc_last_significant_coeff_y_prefix_decode(s, c_idx, log2_trafo_size);
-
-    if (last_significant_coeff_x > 3) {
-        int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
-        last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
-                                   (2 + (last_significant_coeff_x & 1)) +
-                                   suffix;
-    }
-
-    if (last_significant_coeff_y > 3) {
-        int suffix = ff_hevc_last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
-        last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
-                                   (2 + (last_significant_coeff_y & 1)) +
-                                   suffix;
-    }
-
-    if (scan_idx == SCAN_VERT)
-        FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
+static int hls_cross_component_pred(HEVCContext *s, int idx) {
+    HEVCLocalContext *lc    = s->HEVClc;
+    int log2_res_scale_abs_plus1 = ff_hevc_log2_res_scale_abs(s, idx);
 
-    x_cg_last_sig = last_significant_coeff_x >> 2;
-    y_cg_last_sig = last_significant_coeff_y >> 2;
-
-    switch (scan_idx) {
-    case SCAN_DIAG: {
-        int last_x_c = last_significant_coeff_x & 3;
-        int last_y_c = last_significant_coeff_y & 3;
-
-        scan_x_off = ff_hevc_diag_scan4x4_x;
-        scan_y_off = ff_hevc_diag_scan4x4_y;
-        num_coeff  = diag_scan4x4_inv[last_y_c][last_x_c];
-        if (trafo_size == 4) {
-            scan_x_cg = scan_1x1;
-            scan_y_cg = scan_1x1;
-        } else if (trafo_size == 8) {
-            num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
-            scan_x_cg  = diag_scan2x2_x;
-            scan_y_cg  = diag_scan2x2_y;
-        } else if (trafo_size == 16) {
-            num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
-            scan_x_cg  = ff_hevc_diag_scan4x4_x;
-            scan_y_cg  = ff_hevc_diag_scan4x4_y;
-        } else { // trafo_size == 32
-            num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
-            scan_x_cg  = ff_hevc_diag_scan8x8_x;
-            scan_y_cg  = ff_hevc_diag_scan8x8_y;
-        }
-        break;
-    }
-    case SCAN_HORIZ:
-        scan_x_cg  = horiz_scan2x2_x;
-        scan_y_cg  = horiz_scan2x2_y;
-        scan_x_off = horiz_scan4x4_x;
-        scan_y_off = horiz_scan4x4_y;
-        num_coeff  = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
-        break;
-    default: //SCAN_VERT
-        scan_x_cg  = horiz_scan2x2_y;
-        scan_y_cg  = horiz_scan2x2_x;
-        scan_x_off = horiz_scan4x4_y;
-        scan_y_off = horiz_scan4x4_x;
-        num_coeff  = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
-        break;
+    if (log2_res_scale_abs_plus1 !=  0) {
+        int res_scale_sign_flag = ff_hevc_res_scale_sign_flag(s, idx);
+        lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
+                               (1 - 2 * res_scale_sign_flag);
+    } else {
+        lc->tu.res_scale_val = 0;
     }
-    num_coeff++;
-    num_last_subset = (num_coeff - 1) >> 4;
-
-    for (i = num_last_subset; i >= 0; i--) {
-        int n, m;
-        int x_cg, y_cg, x_c, y_c;
-        int implicit_non_zero_coeff = 0;
-        int64_t trans_coeff_level;
-        int prev_sig = 0;
-        int offset   = i << 4;
-
-        uint8_t significant_coeff_flag_idx[16];
-        uint8_t nb_significant_coeff_flag = 0;
-
-        x_cg = scan_x_cg[i];
-        y_cg = scan_y_cg[i];
-
-        if (i < num_last_subset && i > 0) {
-            int ctx_cg = 0;
-            if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
-                ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
-            if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
-                ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
-
-            significant_coeff_group_flag[x_cg][y_cg] =
-                ff_hevc_significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
-            implicit_non_zero_coeff = 1;
-        } else {
-            significant_coeff_group_flag[x_cg][y_cg] =
-                ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
-                 (x_cg == 0 && y_cg == 0));
-        }
-
-        last_scan_pos = num_coeff - offset - 1;
-
-        if (i == num_last_subset) {
-            n_end                         = last_scan_pos - 1;
-            significant_coeff_flag_idx[0] = last_scan_pos;
-            nb_significant_coeff_flag     = 1;
-        } else {
-            n_end = 15;
-        }
-
-        if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
-            prev_sig = significant_coeff_group_flag[x_cg + 1][y_cg];
-        if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
-            prev_sig += significant_coeff_group_flag[x_cg][y_cg + 1] << 1;
-
-        for (n = n_end; n >= 0; n--) {
-            GET_COORD(offset, n);
-
-            if (significant_coeff_group_flag[x_cg][y_cg] &&
-                (n > 0 || implicit_non_zero_coeff == 0)) {
-                if (ff_hevc_significant_coeff_flag_decode(s, c_idx, x_c, y_c,
-                                                          log2_trafo_size,
-                                                          scan_idx,
-                                                          prev_sig) == 1) {
-                    significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
-                    nb_significant_coeff_flag++;
-                    implicit_non_zero_coeff = 0;
-                }
-            } else {
-                int last_cg = (x_c == (x_cg << 2) && y_c == (y_cg << 2));
-                if (last_cg && implicit_non_zero_coeff && significant_coeff_group_flag[x_cg][y_cg]) {
-                    significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
-                    nb_significant_coeff_flag++;
-                }
-            }
-        }
 
-        n_end = nb_significant_coeff_flag;
-
-        if (n_end) {
-            int first_nz_pos_in_cg = 16;
-            int last_nz_pos_in_cg = -1;
-            int c_rice_param = 0;
-            int first_greater1_coeff_idx = -1;
-            uint8_t coeff_abs_level_greater1_flag[16] = { 0 };
-            uint16_t coeff_sign_flag;
-            int sum_abs = 0;
-            int sign_hidden = 0;
-
-            // initialize first elem of coeff_bas_level_greater1_flag
-            int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
-
-            if (!(i == num_last_subset) && greater1_ctx == 0)
-                ctx_set++;
-            greater1_ctx      = 1;
-            last_nz_pos_in_cg = significant_coeff_flag_idx[0];
-
-            for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
-                int n_idx = significant_coeff_flag_idx[m];
-                int inc   = (ctx_set << 2) + greater1_ctx;
-                coeff_abs_level_greater1_flag[n_idx] =
-                    ff_hevc_coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
-                if (coeff_abs_level_greater1_flag[n_idx]) {
-                    greater1_ctx = 0;
-                } else if (greater1_ctx > 0 && greater1_ctx < 3) {
-                    greater1_ctx++;
-                }
-
-                if (coeff_abs_level_greater1_flag[n_idx] &&
-                    first_greater1_coeff_idx == -1)
-                    first_greater1_coeff_idx = n_idx;
-            }
-            first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
-            sign_hidden        = last_nz_pos_in_cg - first_nz_pos_in_cg >= 4 &&
-                                 !lc->cu.cu_transquant_bypass_flag;
-
-            if (first_greater1_coeff_idx != -1) {
-                coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += ff_hevc_coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
-            }
-            if (!s->pps->sign_data_hiding_flag || !sign_hidden) {
-                coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
-            } else {
-                coeff_sign_flag = ff_hevc_coeff_sign_flag(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
-            }
-
-            for (m = 0; m < n_end; m++) {
-                n = significant_coeff_flag_idx[m];
-                GET_COORD(offset, n);
-                trans_coeff_level = 1 + coeff_abs_level_greater1_flag[n];
-                if (trans_coeff_level == ((m < 8) ?
-                                          ((n == first_greater1_coeff_idx) ? 3 : 2) : 1)) {
-                    int last_coeff_abs_level_remaining = ff_hevc_coeff_abs_level_remaining(s, trans_coeff_level, c_rice_param);
-
-                    trans_coeff_level += last_coeff_abs_level_remaining;
-                    if ((trans_coeff_level) > (3 * (1 << c_rice_param)))
-                        c_rice_param = FFMIN(c_rice_param + 1, 4);
-                }
-                if (s->pps->sign_data_hiding_flag && sign_hidden) {
-                    sum_abs += trans_coeff_level;
-                    if (n == first_nz_pos_in_cg && ((sum_abs & 1) == 1))
-                        trans_coeff_level = -trans_coeff_level;
-                }
-                if (coeff_sign_flag >> 15)
-                    trans_coeff_level = -trans_coeff_level;
-                coeff_sign_flag <<= 1;
-                if (!lc->cu.cu_transquant_bypass_flag) {
-                    if (s->sps->scaling_list_enable_flag) {
-                        if (y_c || x_c || log2_trafo_size < 4) {
-                            int pos;
-                            switch (log2_trafo_size) {
-                            case 3:  pos = (y_c        << 3) +  x_c;       break;
-                            case 4:  pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
-                            case 5:  pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
-                            default: pos = (y_c        << 2) +  x_c;
-                            }
-                            scale_m = scale_matrix[pos];
-                        } else {
-                            scale_m = dc_scale;
-                        }
-                    }
-                    trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
-                    if(trans_coeff_level < 0) {
-                        if((~trans_coeff_level) & 0xFffffffffff8000)
-                            trans_coeff_level = -32768;
-                    } else {
-                        if (trans_coeff_level & 0xffffffffffff8000)
-                            trans_coeff_level = 32767;
-                    }
-                }
-                coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
-            }
-        }
-    }
 
-    if (lc->cu.cu_transquant_bypass_flag) {
-        s->hevcdsp.transquant_bypass[log2_trafo_size - 2](dst, coeffs, stride);
-    } else {
-        if (transform_skip_flag)
-            s->hevcdsp.transform_skip(dst, coeffs, stride);
-        else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 &&
-                 log2_trafo_size == 2)
-            s->hevcdsp.transform_4x4_luma_add(dst, coeffs, stride);
-        else
-            s->hevcdsp.transform_add[log2_trafo_size - 2](dst, coeffs, stride);
-    }
+    return 0;
 }
 
 static int hls_transform_unit(HEVCContext *s, int x0, int y0,
                               int xBase, int yBase, int cb_xBase, int cb_yBase,
                               int log2_cb_size, int log2_trafo_size,
                               int trafo_depth, int blk_idx,
-                              int cbf_luma, int cbf_cb, int cbf_cr)
+                              int cbf_luma, int *cbf_cb, int *cbf_cr)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
+    const int log2_trafo_size_c = log2_trafo_size - s->sps->hshift[1];
+    int i;
 
     if (lc->cu.pred_mode == MODE_INTRA) {
         int trafo_size = 1 << log2_trafo_size;
         ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
 
         s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0);
-        if (log2_trafo_size > 2) {
-            trafo_size = trafo_size << (s->sps->hshift[1] - 1);
-            ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size);
-            s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 1);
-            s->hpc.intra_pred[log2_trafo_size - 3](s, x0, y0, 2);
-        } else if (blk_idx == 3) {
-            trafo_size = trafo_size << s->sps->hshift[1];
-            ff_hevc_set_neighbour_available(s, xBase, yBase,
-                                            trafo_size, trafo_size);
-            s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
-            s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
-        }
     }
 
-    if (cbf_luma || cbf_cb || cbf_cr) {
+    if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
+        (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
         int scan_idx   = SCAN_DIAG;
         int scan_idx_c = SCAN_DIAG;
+        int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
+                         (s->sps->chroma_format_idc == 2 &&
+                         (cbf_cb[1] || cbf_cr[1]));
 
         if (s->pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
             lc->tu.cu_qp_delta = ff_hevc_cu_qp_delta_abs(s);
@@ -1279,41 +899,167 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0,
                 return AVERROR_INVALIDDATA;
             }
 
-            ff_hevc_set_qPy(s, x0, y0, cb_xBase, cb_yBase, log2_cb_size);
+            ff_hevc_set_qPy(s, cb_xBase, cb_yBase, log2_cb_size);
+        }
+
+        if (s->sh.cu_chroma_qp_offset_enabled_flag && cbf_chroma &&
+            !lc->cu.cu_transquant_bypass_flag  &&  !lc->tu.is_cu_chroma_qp_offset_coded) {
+            int cu_chroma_qp_offset_flag = ff_hevc_cu_chroma_qp_offset_flag(s);
+            if (cu_chroma_qp_offset_flag) {
+                int cu_chroma_qp_offset_idx  = 0;
+                if (s->pps->chroma_qp_offset_list_len_minus1 > 0) {
+                    cu_chroma_qp_offset_idx = ff_hevc_cu_chroma_qp_offset_idx(s);
+                    av_log(s->avctx, AV_LOG_ERROR,
+                        "cu_chroma_qp_offset_idx not yet tested.\n");
+                }
+                lc->tu.cu_qp_offset_cb = s->pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
+                lc->tu.cu_qp_offset_cr = s->pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
+            } else {
+                lc->tu.cu_qp_offset_cb = 0;
+                lc->tu.cu_qp_offset_cr = 0;
+            }
+            lc->tu.is_cu_chroma_qp_offset_coded = 1;
         }
 
         if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
-            if (lc->tu.cur_intra_pred_mode >= 6 &&
-                lc->tu.cur_intra_pred_mode <= 14) {
+            if (lc->tu.intra_pred_mode >= 6 &&
+                lc->tu.intra_pred_mode <= 14) {
                 scan_idx = SCAN_VERT;
-            } else if (lc->tu.cur_intra_pred_mode >= 22 &&
-                       lc->tu.cur_intra_pred_mode <= 30) {
+            } else if (lc->tu.intra_pred_mode >= 22 &&
+                       lc->tu.intra_pred_mode <= 30) {
                 scan_idx = SCAN_HORIZ;
             }
 
-            if (lc->pu.intra_pred_mode_c >=  6 &&
-                lc->pu.intra_pred_mode_c <= 14) {
+            if (lc->tu.intra_pred_mode_c >=  6 &&
+                lc->tu.intra_pred_mode_c <= 14) {
                 scan_idx_c = SCAN_VERT;
-            } else if (lc->pu.intra_pred_mode_c >= 22 &&
-                       lc->pu.intra_pred_mode_c <= 30) {
+            } else if (lc->tu.intra_pred_mode_c >= 22 &&
+                       lc->tu.intra_pred_mode_c <= 30) {
                 scan_idx_c = SCAN_HORIZ;
             }
         }
 
+        lc->tu.cross_pf = 0;
+
         if (cbf_luma)
-            hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
-        if (log2_trafo_size > 2) {
-            if (cbf_cb)
-                hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 1);
-            if (cbf_cr)
-                hls_residual_coding(s, x0, y0, log2_trafo_size - 1, scan_idx_c, 2);
+            ff_hevc_hls_residual_coding(s, x0, y0, log2_trafo_size, scan_idx, 0);
+        if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
+            int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
+            int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
+            lc->tu.cross_pf  = (s->pps->cross_component_prediction_enabled_flag && cbf_luma &&
+                                (lc->cu.pred_mode == MODE_INTER ||
+                                 (lc->tu.chroma_mode_c ==  4)));
+
+            if (lc->tu.cross_pf) {
+                hls_cross_component_pred(s, 0);
+            }
+            for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+                if (lc->cu.pred_mode == MODE_INTRA) {
+                    ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
+                    s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1);
+                }
+                if (cbf_cb[i])
+                    ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
+                                                log2_trafo_size_c, scan_idx_c, 1);
+                else
+                    if (lc->tu.cross_pf) {
+                        ptrdiff_t stride = s->frame->linesize[1];
+                        int hshift = s->sps->hshift[1];
+                        int vshift = s->sps->vshift[1];
+                        int16_t *coeffs_y = lc->tu.coeffs[0];
+                        int16_t *coeffs =   lc->tu.coeffs[1];
+                        int size = 1 << log2_trafo_size_c;
+
+                        uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
+                                                              ((x0 >> hshift) << s->sps->pixel_shift)];
+                        for (i = 0; i < (size * size); i++) {
+                            coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
+                        }
+                        s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
+                    }
+            }
+
+            if (lc->tu.cross_pf) {
+                hls_cross_component_pred(s, 1);
+            }
+            for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+                if (lc->cu.pred_mode == MODE_INTRA) {
+                    ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
+                    s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2);
+                }
+                if (cbf_cr[i])
+                    ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c),
+                                                log2_trafo_size_c, scan_idx_c, 2);
+                else
+                    if (lc->tu.cross_pf) {
+                        ptrdiff_t stride = s->frame->linesize[2];
+                        int hshift = s->sps->hshift[2];
+                        int vshift = s->sps->vshift[2];
+                        int16_t *coeffs_y = lc->tu.coeffs[0];
+                        int16_t *coeffs =   lc->tu.coeffs[1];
+                        int size = 1 << log2_trafo_size_c;
+
+                        uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
+                                                          ((x0 >> hshift) << s->sps->pixel_shift)];
+                        for (i = 0; i < (size * size); i++) {
+                            coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
+                        }
+                        s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
+                    }
+            }
+        } else if (blk_idx == 3) {
+            int trafo_size_h = 1 << (log2_trafo_size + 1);
+            int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
+            for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+                if (lc->cu.pred_mode == MODE_INTRA) {
+                    ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
+                                                    trafo_size_h, trafo_size_v);
+                    s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1);
+                }
+                if (cbf_cb[i])
+                    ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
+                                                log2_trafo_size, scan_idx_c, 1);
+            }
+            for (i = 0; i < (s->sps->chroma_format_idc == 2 ? 2 : 1); i++) {
+                if (lc->cu.pred_mode == MODE_INTRA) {
+                    ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size),
+                                                trafo_size_h, trafo_size_v);
+                    s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2);
+                }
+                if (cbf_cr[i])
+                    ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size),
+                                                log2_trafo_size, scan_idx_c, 2);
+            }
+        }
+    } else if (lc->cu.pred_mode == MODE_INTRA) {
+        if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
+            int trafo_size_h = 1 << (log2_trafo_size_c + s->sps->hshift[1]);
+            int trafo_size_v = 1 << (log2_trafo_size_c + s->sps->vshift[1]);
+            ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v);
+            s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1);
+            s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2);
+            if (s->sps->chroma_format_idc == 2) {
+                ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c),
+                                                trafo_size_h, trafo_size_v);
+                s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1);
+                s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2);
+            }
         } else if (blk_idx == 3) {
-            if (cbf_cb)
-                hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 1);
-            if (cbf_cr)
-                hls_residual_coding(s, xBase, yBase, log2_trafo_size, scan_idx_c, 2);
+            int trafo_size_h = 1 << (log2_trafo_size + 1);
+            int trafo_size_v = 1 << (log2_trafo_size + s->sps->vshift[1]);
+            ff_hevc_set_neighbour_available(s, xBase, yBase,
+                                            trafo_size_h, trafo_size_v);
+            s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1);
+            s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2);
+            if (s->sps->chroma_format_idc == 2) {
+                ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)),
+                                                trafo_size_h, trafo_size_v);
+                s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1);
+                s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2);
+            }
         }
     }
+
     return 0;
 }
 
@@ -1336,17 +1082,34 @@ static int hls_transform_tree(HEVCContext *s, int x0, int y0,
                               int xBase, int yBase, int cb_xBase, int cb_yBase,
                               int log2_cb_size, int log2_trafo_size,
                               int trafo_depth, int blk_idx,
-                              int cbf_cb, int cbf_cr)
+                              const int *base_cbf_cb, const int *base_cbf_cr)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     uint8_t split_transform_flag;
+    int cbf_cb[2];
+    int cbf_cr[2];
     int ret;
 
+    cbf_cb[0] = base_cbf_cb[0];
+    cbf_cb[1] = base_cbf_cb[1];
+    cbf_cr[0] = base_cbf_cr[0];
+    cbf_cr[1] = base_cbf_cr[1];
+
     if (lc->cu.intra_split_flag) {
-        if (trafo_depth == 1)
-            lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[blk_idx];
+        if (trafo_depth == 1) {
+            lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
+            if (s->sps->chroma_format_idc == 3) {
+                lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
+                lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
+            } else {
+                lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
+                lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
+            }
+        }
     } else {
-        lc->tu.cur_intra_pred_mode = lc->pu.intra_pred_mode[0];
+        lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
+        lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
+        lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
     }
 
     if (log2_trafo_size <= s->sps->log2_max_trafo_size &&
@@ -1365,14 +1128,27 @@ static int hls_transform_tree(HEVCContext *s, int x0, int y0,
                                inter_split;
     }
 
-    if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cb))
-        cbf_cb = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
-    else if (log2_trafo_size > 2 || trafo_depth == 0)
-        cbf_cb = 0;
-    if (log2_trafo_size > 2 && (trafo_depth == 0 || cbf_cr))
-        cbf_cr = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
-    else if (log2_trafo_size > 2 || trafo_depth == 0)
-        cbf_cr = 0;
+    if (log2_trafo_size > 2 || s->sps->chroma_format_idc == 3) {
+        if (trafo_depth == 0 || cbf_cb[0]) {
+            cbf_cb[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
+            if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
+                cbf_cb[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
+            }
+        } else if (trafo_depth == 0) {
+            cbf_cb[0] =
+            cbf_cb[1] = 0;
+        }
+
+        if (trafo_depth == 0 || cbf_cr[0]) {
+            cbf_cr[0] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
+            if (s->sps->chroma_format_idc == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
+                cbf_cr[1] = ff_hevc_cbf_cb_cr_decode(s, trafo_depth);
+            }
+        } else if (trafo_depth == 0) {
+            cbf_cr[0] =
+            cbf_cr[1] = 0;
+        }
+    }
 
     if (split_transform_flag) {
         const int trafo_size_split = 1 << (log2_trafo_size - 1);
@@ -1401,8 +1177,10 @@ do {
         int cbf_luma         = 1;
 
         if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
-            cbf_cb || cbf_cr)
+            cbf_cb[0] || cbf_cr[0] ||
+            (s->sps->chroma_format_idc == 2 && (cbf_cb[1] || cbf_cr[1]))) {
             cbf_luma = ff_hevc_cbf_luma_decode(s, trafo_depth);
+        }
 
         ret = hls_transform_unit(s, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
                                  log2_cb_size, log2_trafo_size, trafo_depth,
@@ -1420,9 +1198,7 @@ do {
                 }
         }
         if (!s->sh.disable_deblocking_filter_flag) {
-            ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size,
-                                                  lc->slice_or_tiles_up_boundary,
-                                                  lc->slice_or_tiles_left_boundary);
+            ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_trafo_size);
             if (s->pps->transquant_bypass_enable_flag &&
                 lc->cu.cu_transquant_bypass_flag)
                 set_deblocking_bypass(s, x0, y0, log2_trafo_size);
@@ -1433,8 +1209,7 @@ do {
 
 static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
 {
-    //TODO: non-4:2:0 support
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     GetBitContext gb;
     int cb_size   = 1 << log2_cb_size;
     int stride0   = s->frame->linesize[0];
@@ -1444,50 +1219,34 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size)
     int   stride2 = s->frame->linesize[2];
     uint8_t *dst2 = &s->frame->data[2][(y0 >> s->sps->vshift[2]) * stride2 + ((x0 >> s->sps->hshift[2]) << s->sps->pixel_shift)];
 
-    int length         = cb_size * cb_size * s->sps->pcm.bit_depth + ((cb_size * cb_size) >> 1) * s->sps->pcm.bit_depth_chroma;
+    int length         = cb_size * cb_size * s->sps->pcm.bit_depth +
+                         (((cb_size >> s->sps->hshift[1]) * (cb_size >> s->sps->vshift[1])) +
+                          ((cb_size >> s->sps->hshift[2]) * (cb_size >> s->sps->vshift[2]))) *
+                          s->sps->pcm.bit_depth_chroma;
     const uint8_t *pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
     int ret;
 
-    ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
-                                          lc->slice_or_tiles_up_boundary,
-                                          lc->slice_or_tiles_left_boundary);
+    if (!s->sh.disable_deblocking_filter_flag)
+        ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
 
     ret = init_get_bits(&gb, pcm, length);
     if (ret < 0)
         return ret;
 
-    s->hevcdsp.put_pcm(dst0, stride0, cb_size,     &gb, s->sps->pcm.bit_depth);
-    s->hevcdsp.put_pcm(dst1, stride1, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
-    s->hevcdsp.put_pcm(dst2, stride2, cb_size / 2, &gb, s->sps->pcm.bit_depth_chroma);
+    s->hevcdsp.put_pcm(dst0, stride0, cb_size, cb_size,     &gb, s->sps->pcm.bit_depth);
+    s->hevcdsp.put_pcm(dst1, stride1,
+                       cb_size >> s->sps->hshift[1],
+                       cb_size >> s->sps->vshift[1],
+                       &gb, s->sps->pcm.bit_depth_chroma);
+    s->hevcdsp.put_pcm(dst2, stride2,
+                       cb_size >> s->sps->hshift[2],
+                       cb_size >> s->sps->vshift[2],
+                       &gb, s->sps->pcm.bit_depth_chroma);
     return 0;
 }
 
-static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
-{
-    HEVCLocalContext *lc = &s->HEVClc;
-    int x = ff_hevc_abs_mvd_greater0_flag_decode(s);
-    int y = ff_hevc_abs_mvd_greater0_flag_decode(s);
-
-    if (x)
-        x += ff_hevc_abs_mvd_greater1_flag_decode(s);
-    if (y)
-        y += ff_hevc_abs_mvd_greater1_flag_decode(s);
-
-    switch (x) {
-    case 2: lc->pu.mvd.x = ff_hevc_mvd_decode(s);           break;
-    case 1: lc->pu.mvd.x = ff_hevc_mvd_sign_flag_decode(s); break;
-    case 0: lc->pu.mvd.x = 0;                               break;
-    }
-
-    switch (y) {
-    case 2: lc->pu.mvd.y = ff_hevc_mvd_decode(s);           break;
-    case 1: lc->pu.mvd.y = ff_hevc_mvd_sign_flag_decode(s); break;
-    case 0: lc->pu.mvd.y = 0;                               break;
-    }
-}
-
 /**
- * 8.5.3.2.2.1 Luma sample interpolation process
+ * 8.5.3.2.2.1 Luma sample unidirectional interpolation process
  *
  * @param s HEVC decoding context
  * @param dst target buffer for block data at block position
@@ -1498,49 +1257,148 @@ static void hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
  * @param y_off vertical position of block from origin (0, 0)
  * @param block_w width of block
  * @param block_h height of block
+ * @param luma_weight weighting factor applied to the luma prediction
+ * @param luma_offset additive offset applied to the luma prediction value
  */
-static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
-                    AVFrame *ref, const Mv *mv, int x_off, int y_off,
-                    int block_w, int block_h)
+
+static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
+                        AVFrame *ref, const Mv *mv, int x_off, int y_off,
+                        int block_w, int block_h, int luma_weight, int luma_offset)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     uint8_t *src         = ref->data[0];
     ptrdiff_t srcstride  = ref->linesize[0];
     int pic_width        = s->sps->width;
     int pic_height       = s->sps->height;
-
-    int mx         = mv->x & 3;
-    int my         = mv->y & 3;
-    int extra_left = ff_hevc_qpel_extra_before[mx];
-    int extra_top  = ff_hevc_qpel_extra_before[my];
+    int mx               = mv->x & 3;
+    int my               = mv->y & 3;
+    int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
+    int idx              = ff_hevc_pel_weight[block_w];
 
     x_off += mv->x >> 2;
     y_off += mv->y >> 2;
     src   += y_off * srcstride + (x_off << s->sps->pixel_shift);
 
-    if (x_off < extra_left || y_off < extra_top ||
-        x_off >= pic_width - block_w - ff_hevc_qpel_extra_after[mx] ||
-        y_off >= pic_height - block_h - ff_hevc_qpel_extra_after[my]) {
+    if (x_off < QPEL_EXTRA_BEFORE || y_off < QPEL_EXTRA_AFTER ||
+        x_off >= pic_width - block_w - QPEL_EXTRA_AFTER ||
+        y_off >= pic_height - block_h - QPEL_EXTRA_AFTER) {
         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
-        int offset = extra_top * srcstride + (extra_left << s->sps->pixel_shift);
-        int buf_offset = extra_top *
-                         edge_emu_stride + (extra_left << s->sps->pixel_shift);
+        int offset     = QPEL_EXTRA_BEFORE * srcstride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src - offset,
                                  edge_emu_stride, srcstride,
-                                 block_w + ff_hevc_qpel_extra[mx],
-                                 block_h + ff_hevc_qpel_extra[my],
-                                 x_off - extra_left, y_off - extra_top,
+                                 block_w + QPEL_EXTRA,
+                                 block_h + QPEL_EXTRA,
+                                 x_off - QPEL_EXTRA_BEFORE, y_off - QPEL_EXTRA_BEFORE,
                                  pic_width, pic_height);
         src = lc->edge_emu_buffer + buf_offset;
         srcstride = edge_emu_stride;
     }
-    s->hevcdsp.put_hevc_qpel[my][mx](dst, dststride, src, srcstride, block_w,
-                                     block_h, lc->mc_buffer);
+
+    if (!weight_flag)
+        s->hevcdsp.put_hevc_qpel_uni[idx][!!my][!!mx](dst, dststride, src, srcstride,
+                                                      block_h, mx, my, block_w);
+    else
+        s->hevcdsp.put_hevc_qpel_uni_w[idx][!!my][!!mx](dst, dststride, src, srcstride,
+                                                        block_h, s->sh.luma_log2_weight_denom,
+                                                        luma_weight, luma_offset, mx, my, block_w);
 }
 
 /**
- * 8.5.3.2.2.2 Chroma sample interpolation process
+ * 8.5.3.2.2.1 Luma sample bidirectional interpolation process
+ *
+ * @param s HEVC decoding context
+ * @param dst target buffer for block data at block position
+ * @param dststride stride of the dst buffer
+ * @param ref0 reference picture0 buffer at origin (0, 0)
+ * @param mv0 motion vector0 (relative to block position) to get pixel data from
+ * @param x_off horizontal position of block from origin (0, 0)
+ * @param y_off vertical position of block from origin (0, 0)
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param ref1 reference picture1 buffer at origin (0, 0)
+ * @param mv1 motion vector1 (relative to block position) to get pixel data from
+ * @param current_mv current motion vector structure
+ */
+ static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
+                       AVFrame *ref0, const Mv *mv0, int x_off, int y_off,
+                       int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv)
+{
+    HEVCLocalContext *lc = s->HEVClc;
+    DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
+    ptrdiff_t src0stride  = ref0->linesize[0];
+    ptrdiff_t src1stride  = ref1->linesize[0];
+    int pic_width        = s->sps->width;
+    int pic_height       = s->sps->height;
+    int mx0              = mv0->x & 3;
+    int my0              = mv0->y & 3;
+    int mx1              = mv1->x & 3;
+    int my1              = mv1->y & 3;
+    int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
+    int x_off0           = x_off + (mv0->x >> 2);
+    int y_off0           = y_off + (mv0->y >> 2);
+    int x_off1           = x_off + (mv1->x >> 2);
+    int y_off1           = y_off + (mv1->y >> 2);
+    int idx              = ff_hevc_pel_weight[block_w];
+
+    uint8_t *src0  = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
+    uint8_t *src1  = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
+
+    if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER ||
+        x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
+        y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
+        int offset     = QPEL_EXTRA_BEFORE * src0stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
+
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset,
+                                 edge_emu_stride, src0stride,
+                                 block_w + QPEL_EXTRA,
+                                 block_h + QPEL_EXTRA,
+                                 x_off0 - QPEL_EXTRA_BEFORE, y_off0 - QPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+        src0 = lc->edge_emu_buffer + buf_offset;
+        src0stride = edge_emu_stride;
+    }
+
+    if (x_off1 < QPEL_EXTRA_BEFORE || y_off1 < QPEL_EXTRA_AFTER ||
+        x_off1 >= pic_width - block_w - QPEL_EXTRA_AFTER ||
+        y_off1 >= pic_height - block_h - QPEL_EXTRA_AFTER) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
+        int offset     = QPEL_EXTRA_BEFORE * src1stride       + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
+        int buf_offset = QPEL_EXTRA_BEFORE * edge_emu_stride + (QPEL_EXTRA_BEFORE << s->sps->pixel_shift);
+
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src1 - offset,
+                                 edge_emu_stride, src1stride,
+                                 block_w + QPEL_EXTRA,
+                                 block_h + QPEL_EXTRA,
+                                 x_off1 - QPEL_EXTRA_BEFORE, y_off1 - QPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+        src1 = lc->edge_emu_buffer2 + buf_offset;
+        src1stride = edge_emu_stride;
+    }
+
+    s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride,
+                                                block_h, mx0, my0, block_w);
+    if (!weight_flag)
+        s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
+                                                       block_h, mx1, my1, block_w);
+    else
+        s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
+                                                         block_h, s->sh.luma_log2_weight_denom,
+                                                         s->sh.luma_weight_l0[current_mv->ref_idx[0]],
+                                                         s->sh.luma_weight_l1[current_mv->ref_idx[1]],
+                                                         s->sh.luma_offset_l0[current_mv->ref_idx[0]],
+                                                         s->sh.luma_offset_l1[current_mv->ref_idx[1]],
+                                                         mx1, my1, block_w);
+
+}
+
+/**
+ * 8.5.3.2.2.2 Chroma sample uniprediction interpolation process
  *
  * @param s HEVC decoding context
  * @param dst1 target buffer for block data at block position (U plane)
@@ -1552,87 +1410,184 @@ static void luma_mc(HEVCContext *s, int16_t *dst, ptrdiff_t dststride,
  * @param y_off vertical position of block from origin (0, 0)
  * @param block_w width of block
  * @param block_h height of block
+ * @param chroma_weight weighting factor applied to the chroma prediction
+ * @param chroma_offset additive offset applied to the chroma prediction value
  */
-static void chroma_mc(HEVCContext *s, int16_t *dst1, int16_t *dst2,
-                      ptrdiff_t dststride, AVFrame *ref, const Mv *mv,
-                      int x_off, int y_off, int block_w, int block_h)
+
+static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0,
+                          ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist,
+                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
-    uint8_t *src1        = ref->data[1];
-    uint8_t *src2        = ref->data[2];
-    ptrdiff_t src1stride = ref->linesize[1];
-    ptrdiff_t src2stride = ref->linesize[2];
-    int pic_width        = s->sps->width >> 1;
-    int pic_height       = s->sps->height >> 1;
-
-    int mx = mv->x & 7;
-    int my = mv->y & 7;
-
-    x_off += mv->x >> 3;
-    y_off += mv->y >> 3;
-    src1  += y_off * src1stride + (x_off << s->sps->pixel_shift);
-    src2  += y_off * src2stride + (x_off << s->sps->pixel_shift);
+    HEVCLocalContext *lc = s->HEVClc;
+    int pic_width        = s->sps->width >> s->sps->hshift[1];
+    int pic_height       = s->sps->height >> s->sps->vshift[1];
+    const Mv *mv         = &current_mv->mv[reflist];
+    int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
+    int idx              = ff_hevc_pel_weight[block_w];
+    int hshift           = s->sps->hshift[1];
+    int vshift           = s->sps->vshift[1];
+    intptr_t mx          = mv->x & ((1 << (2 + hshift)) - 1);
+    intptr_t my          = mv->y & ((1 << (2 + vshift)) - 1);
+    intptr_t _mx         = mx << (1 - hshift);
+    intptr_t _my         = my << (1 - vshift);
+
+    x_off += mv->x >> (2 + hshift);
+    y_off += mv->y >> (2 + vshift);
+    src0  += y_off * srcstride + (x_off << s->sps->pixel_shift);
 
     if (x_off < EPEL_EXTRA_BEFORE || y_off < EPEL_EXTRA_AFTER ||
         x_off >= pic_width - block_w - EPEL_EXTRA_AFTER ||
         y_off >= pic_height - block_h - EPEL_EXTRA_AFTER) {
         const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
+        int offset0 = EPEL_EXTRA_BEFORE * (srcstride + (1 << s->sps->pixel_shift));
+        int buf_offset0 = EPEL_EXTRA_BEFORE *
+                          (edge_emu_stride + (1 << s->sps->pixel_shift));
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src0 - offset0,
+                                 edge_emu_stride, srcstride,
+                                 block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
+                                 x_off - EPEL_EXTRA_BEFORE,
+                                 y_off - EPEL_EXTRA_BEFORE,
+                                 pic_width, pic_height);
+
+        src0 = lc->edge_emu_buffer + buf_offset0;
+        srcstride = edge_emu_stride;
+    }
+    if (!weight_flag)
+        s->hevcdsp.put_hevc_epel_uni[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
+                                                  block_h, _mx, _my, block_w);
+    else
+        s->hevcdsp.put_hevc_epel_uni_w[idx][!!my][!!mx](dst0, dststride, src0, srcstride,
+                                                        block_h, s->sh.chroma_log2_weight_denom,
+                                                        chroma_weight, chroma_offset, _mx, _my, block_w);
+}
+
+/**
+ * 8.5.3.2.2.2 Chroma sample bidirectional interpolation process
+ *
+ * @param s HEVC decoding context
+ * @param dst target buffer for block data at block position
+ * @param dststride stride of the dst buffer
+ * @param ref0 reference picture0 buffer at origin (0, 0)
+ * @param mv0 motion vector0 (relative to block position) to get pixel data from
+ * @param x_off horizontal position of block from origin (0, 0)
+ * @param y_off vertical position of block from origin (0, 0)
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param ref1 reference picture1 buffer at origin (0, 0)
+ * @param mv1 motion vector1 (relative to block position) to get pixel data from
+ * @param current_mv current motion vector structure
+ * @param cidx chroma component(cb, cr)
+ */
+static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1,
+                         int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
+{
+    DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
+    int tmpstride = MAX_PB_SIZE;
+    HEVCLocalContext *lc = s->HEVClc;
+    uint8_t *src1        = ref0->data[cidx+1];
+    uint8_t *src2        = ref1->data[cidx+1];
+    ptrdiff_t src1stride = ref0->linesize[cidx+1];
+    ptrdiff_t src2stride = ref1->linesize[cidx+1];
+    int weight_flag      = (s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
+                           (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag);
+    int pic_width        = s->sps->width >> s->sps->hshift[1];
+    int pic_height       = s->sps->height >> s->sps->vshift[1];
+    Mv *mv0              = &current_mv->mv[0];
+    Mv *mv1              = &current_mv->mv[1];
+    int hshift = s->sps->hshift[1];
+    int vshift = s->sps->vshift[1];
+
+    intptr_t mx0 = mv0->x & ((1 << (2 + hshift)) - 1);
+    intptr_t my0 = mv0->y & ((1 << (2 + vshift)) - 1);
+    intptr_t mx1 = mv1->x & ((1 << (2 + hshift)) - 1);
+    intptr_t my1 = mv1->y & ((1 << (2 + vshift)) - 1);
+    intptr_t _mx0 = mx0 << (1 - hshift);
+    intptr_t _my0 = my0 << (1 - vshift);
+    intptr_t _mx1 = mx1 << (1 - hshift);
+    intptr_t _my1 = my1 << (1 - vshift);
+
+    int x_off0 = x_off + (mv0->x >> (2 + hshift));
+    int y_off0 = y_off + (mv0->y >> (2 + vshift));
+    int x_off1 = x_off + (mv1->x >> (2 + hshift));
+    int y_off1 = y_off + (mv1->y >> (2 + vshift));
+    int idx = ff_hevc_pel_weight[block_w];
+    src1  += y_off0 * src1stride + (int)((unsigned)x_off0 << s->sps->pixel_shift);
+    src2  += y_off1 * src2stride + (int)((unsigned)x_off1 << s->sps->pixel_shift);
+
+    if (x_off0 < EPEL_EXTRA_BEFORE || y_off0 < EPEL_EXTRA_AFTER ||
+        x_off0 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
+        y_off0 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
         int offset1 = EPEL_EXTRA_BEFORE * (src1stride + (1 << s->sps->pixel_shift));
         int buf_offset1 = EPEL_EXTRA_BEFORE *
                           (edge_emu_stride + (1 << s->sps->pixel_shift));
-        int offset2 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
-        int buf_offset2 = EPEL_EXTRA_BEFORE *
-                          (edge_emu_stride + (1 << s->sps->pixel_shift));
 
         s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src1 - offset1,
                                  edge_emu_stride, src1stride,
                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
-                                 x_off - EPEL_EXTRA_BEFORE,
-                                 y_off - EPEL_EXTRA_BEFORE,
+                                 x_off0 - EPEL_EXTRA_BEFORE,
+                                 y_off0 - EPEL_EXTRA_BEFORE,
                                  pic_width, pic_height);
 
         src1 = lc->edge_emu_buffer + buf_offset1;
         src1stride = edge_emu_stride;
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
-                                             block_w, block_h, mx, my, lc->mc_buffer);
+    }
+
+    if (x_off1 < EPEL_EXTRA_BEFORE || y_off1 < EPEL_EXTRA_AFTER ||
+        x_off1 >= pic_width - block_w - EPEL_EXTRA_AFTER ||
+        y_off1 >= pic_height - block_h - EPEL_EXTRA_AFTER) {
+        const int edge_emu_stride = EDGE_EMU_BUFFER_STRIDE << s->sps->pixel_shift;
+        int offset1 = EPEL_EXTRA_BEFORE * (src2stride + (1 << s->sps->pixel_shift));
+        int buf_offset1 = EPEL_EXTRA_BEFORE *
+                          (edge_emu_stride + (1 << s->sps->pixel_shift));
 
-        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer, src2 - offset2,
+        s->vdsp.emulated_edge_mc(lc->edge_emu_buffer2, src2 - offset1,
                                  edge_emu_stride, src2stride,
                                  block_w + EPEL_EXTRA, block_h + EPEL_EXTRA,
-                                 x_off - EPEL_EXTRA_BEFORE,
-                                 y_off - EPEL_EXTRA_BEFORE,
+                                 x_off1 - EPEL_EXTRA_BEFORE,
+                                 y_off1 - EPEL_EXTRA_BEFORE,
                                  pic_width, pic_height);
-        src2 = lc->edge_emu_buffer + buf_offset2;
-        src2stride = edge_emu_stride;
 
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
-    } else {
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst1, dststride, src1, src1stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
-        s->hevcdsp.put_hevc_epel[!!my][!!mx](dst2, dststride, src2, src2stride,
-                                             block_w, block_h, mx, my,
-                                             lc->mc_buffer);
+        src2 = lc->edge_emu_buffer2 + buf_offset1;
+        src2stride = edge_emu_stride;
     }
+
+    s->hevcdsp.put_hevc_epel[idx][!!my0][!!mx0](tmp, tmpstride, src1, src1stride,
+                                                block_h, _mx0, _my0, block_w);
+    if (!weight_flag)
+        s->hevcdsp.put_hevc_epel_bi[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
+                                                       src2, src2stride, tmp, tmpstride,
+                                                       block_h, _mx1, _my1, block_w);
+    else
+        s->hevcdsp.put_hevc_epel_bi_w[idx][!!my1][!!mx1](dst0, s->frame->linesize[cidx+1],
+                                                         src2, src2stride, tmp, tmpstride,
+                                                         block_h,
+                                                         s->sh.chroma_log2_weight_denom,
+                                                         s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
+                                                         s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
+                                                         s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
+                                                         s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
+                                                         _mx1, _my1, block_w);
 }
 
 static void hevc_await_progress(HEVCContext *s, HEVCFrame *ref,
                                 const Mv *mv, int y0, int height)
 {
     int y = (mv->y >> 2) + y0 + height + 9;
-    ff_thread_await_progress(&ref->tf, y, 0);
+
+    if (s->threads_type == FF_THREAD_FRAME )
+        ff_thread_await_progress(&ref->tf, y, 0);
 }
 
 static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                                 int nPbW, int nPbH,
-                                int log2_cb_size, int partIdx)
+                                int log2_cb_size, int partIdx, int idx)
 {
 #define POS(c_idx, x, y)                                                              \
     &s->frame->data[c_idx][((y) >> s->sps->vshift[c_idx]) * s->frame->linesize[c_idx] + \
                            (((x) >> s->sps->hshift[c_idx]) << s->sps->pixel_shift)]
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     int merge_idx = 0;
     struct MvField current_mv = {{{ 0 }}};
 
@@ -1641,9 +1596,6 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
     MvField *tab_mvf = s->ref->tab_mvf;
     RefPicList  *refPicList = s->ref->refPicList;
     HEVCFrame *ref0, *ref1;
-
-    int tmpstride = MAX_PB_SIZE;
-
     uint8_t *dst0 = POS(0, x0, y0);
     uint8_t *dst1 = POS(1, x0, y0);
     uint8_t *dst2 = POS(2, x0, y0);
@@ -1670,8 +1622,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
         x_pu = x0 >> s->sps->log2_min_pu_size;
         y_pu = y0 >> s->sps->log2_min_pu_size;
 
-        for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
-            for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
+        for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
+            for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
                 tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
     } else { /* MODE_INTER */
         lc->pu.merge_flag = ff_hevc_merge_flag_decode(s);
@@ -1686,12 +1638,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
             x_pu = x0 >> s->sps->log2_min_pu_size;
             y_pu = y0 >> s->sps->log2_min_pu_size;
 
-            for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
-                for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
+            for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
+                for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
         } else {
             enum InterPredIdc inter_pred_idc = PRED_L0;
             ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
+            current_mv.pred_flag = 0;
             if (s->sh.slice_type == B_SLICE)
                 inter_pred_idc = ff_hevc_inter_pred_idc_decode(s, nPbW, nPbH);
 
@@ -1700,8 +1653,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                     ref_idx[0] = ff_hevc_ref_idx_lx_decode(s, s->sh.nb_refs[L0]);
                     current_mv.ref_idx[0] = ref_idx[0];
                 }
-                current_mv.pred_flag[0] = 1;
-                hls_mvd_coding(s, x0, y0, 0);
+                current_mv.pred_flag = PF_L0;
+                ff_hevc_hls_mvd_coding(s, x0, y0, 0);
                 mvp_flag[0] = ff_hevc_mvp_lx_flag_decode(s);
                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
                                          partIdx, merge_idx, &current_mv,
@@ -1717,13 +1670,12 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
                 }
 
                 if (s->sh.mvd_l1_zero_flag == 1 && inter_pred_idc == PRED_BI) {
-                    lc->pu.mvd.x = 0;
-                    lc->pu.mvd.y = 0;
+                    AV_ZERO32(&lc->pu.mvd);
                 } else {
-                    hls_mvd_coding(s, x0, y0, 1);
+                    ff_hevc_hls_mvd_coding(s, x0, y0, 1);
                 }
 
-                current_mv.pred_flag[1] = 1;
+                current_mv.pred_flag += PF_L1;
                 mvp_flag[1] = ff_hevc_mvp_lx_flag_decode(s);
                 ff_hevc_luma_mv_mvp_mode(s, x0, y0, nPbW, nPbH, log2_cb_size,
                                          partIdx, merge_idx, &current_mv,
@@ -1735,154 +1687,75 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
             x_pu = x0 >> s->sps->log2_min_pu_size;
             y_pu = y0 >> s->sps->log2_min_pu_size;
 
-            for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
-                for(j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
+            for (j = 0; j < nPbH >> s->sps->log2_min_pu_size; j++)
+                for (i = 0; i < nPbW >> s->sps->log2_min_pu_size; i++)
                     tab_mvf[(y_pu + j) * min_pu_width + x_pu + i] = current_mv;
         }
     }
 
-    if (current_mv.pred_flag[0]) {
+    if (current_mv.pred_flag & PF_L0) {
         ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
         if (!ref0)
             return;
         hevc_await_progress(s, ref0, &current_mv.mv[0], y0, nPbH);
     }
-    if (current_mv.pred_flag[1]) {
+    if (current_mv.pred_flag & PF_L1) {
         ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
         if (!ref1)
             return;
         hevc_await_progress(s, ref1, &current_mv.mv[1], y0, nPbH);
     }
 
-    if (current_mv.pred_flag[0] && !current_mv.pred_flag[1]) {
-        DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
-        DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
-
-        luma_mc(s, tmp, tmpstride, ref0->frame,
-                &current_mv.mv[0], x0, y0, nPbW, nPbH);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
-                                     s->sh.luma_weight_l0[current_mv.ref_idx[0]],
-                                     s->sh.luma_offset_l0[current_mv.ref_idx[0]],
-                                     dst0, s->frame->linesize[0], tmp,
-                                     tmpstride, nPbW, nPbH);
-        } else {
-            s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
-        }
-        chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
-                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
-                                     s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
-                                     dst1, s->frame->linesize[1], tmp, tmpstride,
-                                     nPbW / 2, nPbH / 2);
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
-                                     s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
-                                     dst2, s->frame->linesize[2], tmp2, tmpstride,
-                                     nPbW / 2, nPbH / 2);
-        } else {
-            s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
-        }
-    } else if (!current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
-        DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
-        DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
-
-        if (!ref1)
-            return;
-
-        luma_mc(s, tmp, tmpstride, ref1->frame,
-                &current_mv.mv[1], x0, y0, nPbW, nPbH);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.luma_log2_weight_denom,
-                                      s->sh.luma_weight_l1[current_mv.ref_idx[1]],
-                                      s->sh.luma_offset_l1[current_mv.ref_idx[1]],
-                                      dst0, s->frame->linesize[0], tmp, tmpstride,
-                                      nPbW, nPbH);
-        } else {
-            s->hevcdsp.put_unweighted_pred(dst0, s->frame->linesize[0], tmp, tmpstride, nPbW, nPbH);
-        }
-
-        chroma_mc(s, tmp, tmp2, tmpstride, ref1->frame,
-                  &current_mv.mv[1], x0/2, y0/2, nPbW/2, nPbH/2);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
-                                     s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
-                                     dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.weighted_pred(s->sh.chroma_log2_weight_denom,
-                                     s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
-                                     s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
-                                     dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
-        } else {
-            s->hevcdsp.put_unweighted_pred(dst1, s->frame->linesize[1], tmp, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_unweighted_pred(dst2, s->frame->linesize[2], tmp2, tmpstride, nPbW/2, nPbH/2);
-        }
-    } else if (current_mv.pred_flag[0] && current_mv.pred_flag[1]) {
-        DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
-        DECLARE_ALIGNED(16, int16_t, tmp2[MAX_PB_SIZE * MAX_PB_SIZE]);
-        DECLARE_ALIGNED(16, int16_t, tmp3[MAX_PB_SIZE * MAX_PB_SIZE]);
-        DECLARE_ALIGNED(16, int16_t, tmp4[MAX_PB_SIZE * MAX_PB_SIZE]);
-        HEVCFrame *ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
-        HEVCFrame *ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
-
-        if (!ref0 || !ref1)
-            return;
-
-        luma_mc(s, tmp, tmpstride, ref0->frame,
-                &current_mv.mv[0], x0, y0, nPbW, nPbH);
-        luma_mc(s, tmp2, tmpstride, ref1->frame,
-                &current_mv.mv[1], x0, y0, nPbW, nPbH);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred_avg(s->sh.luma_log2_weight_denom,
-                                         s->sh.luma_weight_l0[current_mv.ref_idx[0]],
-                                         s->sh.luma_weight_l1[current_mv.ref_idx[1]],
-                                         s->sh.luma_offset_l0[current_mv.ref_idx[0]],
-                                         s->sh.luma_offset_l1[current_mv.ref_idx[1]],
-                                         dst0, s->frame->linesize[0],
-                                         tmp, tmp2, tmpstride, nPbW, nPbH);
-        } else {
-            s->hevcdsp.put_weighted_pred_avg(dst0, s->frame->linesize[0],
-                                             tmp, tmp2, tmpstride, nPbW, nPbH);
-        }
-
-        chroma_mc(s, tmp, tmp2, tmpstride, ref0->frame,
-                  &current_mv.mv[0], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
-        chroma_mc(s, tmp3, tmp4, tmpstride, ref1->frame,
-                  &current_mv.mv[1], x0 / 2, y0 / 2, nPbW / 2, nPbH / 2);
-
-        if ((s->sh.slice_type == P_SLICE && s->pps->weighted_pred_flag) ||
-            (s->sh.slice_type == B_SLICE && s->pps->weighted_bipred_flag)) {
-            s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
-                                         s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0],
-                                         s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0],
-                                         s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0],
-                                         s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0],
-                                         dst1, s->frame->linesize[1], tmp, tmp3,
-                                         tmpstride, nPbW / 2, nPbH / 2);
-            s->hevcdsp.weighted_pred_avg(s->sh.chroma_log2_weight_denom,
-                                         s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1],
-                                         s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1],
-                                         s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1],
-                                         s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1],
-                                         dst2, s->frame->linesize[2], tmp2, tmp4,
-                                         tmpstride, nPbW / 2, nPbH / 2);
-        } else {
-            s->hevcdsp.put_weighted_pred_avg(dst1, s->frame->linesize[1], tmp, tmp3, tmpstride, nPbW/2, nPbH/2);
-            s->hevcdsp.put_weighted_pred_avg(dst2, s->frame->linesize[2], tmp2, tmp4, tmpstride, nPbW/2, nPbH/2);
-        }
+    if (current_mv.pred_flag == PF_L0) {
+        int x0_c = x0 >> s->sps->hshift[1];
+        int y0_c = y0 >> s->sps->vshift[1];
+        int nPbW_c = nPbW >> s->sps->hshift[1];
+        int nPbH_c = nPbH >> s->sps->vshift[1];
+
+        luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame,
+                    &current_mv.mv[0], x0, y0, nPbW, nPbH,
+                    s->sh.luma_weight_l0[current_mv.ref_idx[0]],
+                    s->sh.luma_offset_l0[current_mv.ref_idx[0]]);
+
+        chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1],
+                      0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
+                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]);
+        chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2],
+                      0, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
+                      s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]);
+    } else if (current_mv.pred_flag == PF_L1) {
+        int x0_c = x0 >> s->sps->hshift[1];
+        int y0_c = y0 >> s->sps->vshift[1];
+        int nPbW_c = nPbW >> s->sps->hshift[1];
+        int nPbH_c = nPbH >> s->sps->vshift[1];
+
+        luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame,
+                    &current_mv.mv[1], x0, y0, nPbW, nPbH,
+                    s->sh.luma_weight_l1[current_mv.ref_idx[1]],
+                    s->sh.luma_offset_l1[current_mv.ref_idx[1]]);
+
+        chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1],
+                      1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
+                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]);
+
+        chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2],
+                      1, x0_c, y0_c, nPbW_c, nPbH_c, &current_mv,
+                      s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]);
+    } else if (current_mv.pred_flag == PF_BI) {
+        int x0_c = x0 >> s->sps->hshift[1];
+        int y0_c = y0 >> s->sps->vshift[1];
+        int nPbW_c = nPbW >> s->sps->hshift[1];
+        int nPbH_c = nPbH >> s->sps->vshift[1];
+
+        luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame,
+                   &current_mv.mv[0], x0, y0, nPbW, nPbH,
+                   ref1->frame, &current_mv.mv[1], &current_mv);
+
+        chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame,
+                     x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 0);
+
+        chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame,
+                     x0_c, y0_c, nPbW_c, nPbH_c, &current_mv, 1);
     }
 }
 
@@ -1892,7 +1765,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0,
 static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
                                 int prev_intra_luma_pred_flag)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     int x_pu             = x0 >> s->sps->log2_min_pu_size;
     int y_pu             = y0 >> s->sps->log2_min_pu_size;
     int min_pu_width     = s->sps->min_pu_width;
@@ -1962,15 +1835,7 @@ static int luma_intra_pred_mode(HEVCContext *s, int x0, int y0, int pu_size,
                intra_pred_mode, size_in_pus);
 
         for (j = 0; j < size_in_pus; j++) {
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].is_intra     = 1;
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[0] = 0;
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag[1] = 0;
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[0]   = 0;
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].ref_idx[1]   = 0;
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].x      = 0;
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[0].y      = 0;
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].x      = 0;
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].mv[1].y      = 0;
+            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
         }
     }
 
@@ -1990,10 +1855,14 @@ static av_always_inline void set_ct_depth(HEVCContext *s, int x0, int y0,
                ct_depth, length);
 }
 
+static const uint8_t tab_mode_idx[] = {
+     0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
+    21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
+
 static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
                                   int log2_cb_size)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
     uint8_t prev_intra_luma_pred_flag[4];
     int split   = lc->cu.part_mode == PART_NxN;
@@ -2019,14 +1888,42 @@ static void intra_prediction_unit(HEVCContext *s, int x0, int y0,
         }
     }
 
-    chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
-    if (chroma_mode != 4) {
-        if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
-            lc->pu.intra_pred_mode_c = 34;
-        else
-            lc->pu.intra_pred_mode_c = intra_chroma_table[chroma_mode];
-    } else {
-        lc->pu.intra_pred_mode_c = lc->pu.intra_pred_mode[0];
+    if (s->sps->chroma_format_idc == 3) {
+        for (i = 0; i < side; i++) {
+            for (j = 0; j < side; j++) {
+                lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
+                if (chroma_mode != 4) {
+                    if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
+                        lc->pu.intra_pred_mode_c[2 * i + j] = 34;
+                    else
+                        lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
+                } else {
+                    lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
+                }
+            }
+        }
+    } else if (s->sps->chroma_format_idc == 2) {
+        int mode_idx;
+        lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
+        if (chroma_mode != 4) {
+            if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
+                mode_idx = 34;
+            else
+                mode_idx = intra_chroma_table[chroma_mode];
+        } else {
+            mode_idx = lc->pu.intra_pred_mode[0];
+        }
+        lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
+    } else if (s->sps->chroma_format_idc != 0) {
+        chroma_mode = ff_hevc_intra_chroma_pred_mode_decode(s);
+        if (chroma_mode != 4) {
+            if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
+                lc->pu.intra_pred_mode_c[0] = 34;
+            else
+                lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
+        } else {
+            lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
+        }
     }
 }
 
@@ -2034,7 +1931,7 @@ static void intra_prediction_unit_default_value(HEVCContext *s,
                                                 int x0, int y0,
                                                 int log2_cb_size)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     int pb_size          = 1 << log2_cb_size;
     int size_in_pus      = pb_size >> s->sps->log2_min_pu_size;
     int min_pu_width     = s->sps->min_pu_width;
@@ -2045,22 +1942,25 @@ static void intra_prediction_unit_default_value(HEVCContext *s,
 
     if (size_in_pus == 0)
         size_in_pus = 1;
-    for (j = 0; j < size_in_pus; j++) {
+    for (j = 0; j < size_in_pus; j++)
         memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
-        for (k = 0; k < size_in_pus; k++)
-            tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].is_intra = lc->cu.pred_mode == MODE_INTRA;
-    }
+    if (lc->cu.pred_mode == MODE_INTRA)
+        for (j = 0; j < size_in_pus; j++)
+            for (k = 0; k < size_in_pus; k++)
+                tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
 }
 
 static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
 {
     int cb_size          = 1 << log2_cb_size;
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     int log2_min_cb_size = s->sps->log2_min_cb_size;
     int length           = cb_size >> log2_min_cb_size;
     int min_cb_width     = s->sps->min_cb_width;
     int x_cb             = x0 >> log2_min_cb_size;
     int y_cb             = y0 >> log2_min_cb_size;
+    int idx              = log2_cb_size - 2;
+    int qp_block_mask    = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
     int x, y, ret;
 
     lc->cu.x                = x0;
@@ -2084,23 +1984,26 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
     if (s->sh.slice_type != I_SLICE) {
         uint8_t skip_flag = ff_hevc_skip_flag_decode(s, x0, y0, x_cb, y_cb);
 
-        lc->cu.pred_mode = MODE_SKIP;
         x = y_cb * min_cb_width + x_cb;
         for (y = 0; y < length; y++) {
             memset(&s->skip_flag[x], skip_flag, length);
             x += min_cb_width;
         }
         lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
+    } else {
+        x = y_cb * min_cb_width + x_cb;
+        for (y = 0; y < length; y++) {
+            memset(&s->skip_flag[x], 0, length);
+            x += min_cb_width;
+        }
     }
 
     if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
-        hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
+        hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
         intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
 
         if (!s->sh.disable_deblocking_filter_flag)
-            ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
-                                                  lc->slice_or_tiles_up_boundary,
-                                                  lc->slice_or_tiles_left_boundary);
+            ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
     } else {
         if (s->sh.slice_type != I_SLICE)
             lc->cu.pred_mode = ff_hevc_pred_mode_decode(s);
@@ -2132,37 +2035,37 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
             intra_prediction_unit_default_value(s, x0, y0, log2_cb_size);
             switch (lc->cu.part_mode) {
             case PART_2Nx2N:
-                hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0);
+                hls_prediction_unit(s, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
                 break;
             case PART_2NxN:
-                hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0);
-                hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1);
+                hls_prediction_unit(s, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
+                hls_prediction_unit(s, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
                 break;
             case PART_Nx2N:
-                hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0);
-                hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1);
+                hls_prediction_unit(s, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
+                hls_prediction_unit(s, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
                 break;
             case PART_2NxnU:
-                hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0);
-                hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1);
+                hls_prediction_unit(s, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
+                hls_prediction_unit(s, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
                 break;
             case PART_2NxnD:
-                hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0);
-                hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1);
+                hls_prediction_unit(s, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
+                hls_prediction_unit(s, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
                 break;
             case PART_nLx2N:
-                hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0);
-                hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1);
+                hls_prediction_unit(s, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
+                hls_prediction_unit(s, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
                 break;
             case PART_nRx2N:
-                hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0);
-                hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1);
+                hls_prediction_unit(s, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
+                hls_prediction_unit(s, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
                 break;
             case PART_NxN:
-                hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0);
-                hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1);
-                hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2);
-                hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3);
+                hls_prediction_unit(s, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
+                hls_prediction_unit(s, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
+                hls_prediction_unit(s, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
+                hls_prediction_unit(s, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
                 break;
             }
         }
@@ -2173,25 +2076,24 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
                 lc->cu.rqt_root_cbf = ff_hevc_no_residual_syntax_flag_decode(s);
             }
             if (lc->cu.rqt_root_cbf) {
+                const static int cbf[2] = { 0 };
                 lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
                                          s->sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
                                          s->sps->max_transform_hierarchy_depth_inter;
                 ret = hls_transform_tree(s, x0, y0, x0, y0, x0, y0,
                                          log2_cb_size,
-                                         log2_cb_size, 0, 0, 0, 0);
+                                         log2_cb_size, 0, 0, cbf, cbf);
                 if (ret < 0)
                     return ret;
             } else {
                 if (!s->sh.disable_deblocking_filter_flag)
-                    ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size,
-                                                          lc->slice_or_tiles_up_boundary,
-                                                          lc->slice_or_tiles_left_boundary);
+                    ff_hevc_deblocking_boundary_strengths(s, x0, y0, log2_cb_size);
             }
         }
     }
 
     if (s->pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
-        ff_hevc_set_qPy(s, x0, y0, x0, y0, log2_cb_size);
+        ff_hevc_set_qPy(s, x0, y0, log2_cb_size);
 
     x = y_cb * min_cb_width + x_cb;
     for (y = 0; y < length; y++) {
@@ -2199,6 +2101,11 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
         x += min_cb_width;
     }
 
+    if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
+       ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
+        lc->qPy_pred = lc->qp_y;
+    }
+
     set_ct_depth(s, x0, y0, log2_cb_size, lc->ct.depth);
 
     return 0;
@@ -2207,8 +2114,10 @@ static int hls_coding_unit(HEVCContext *s, int x0, int y0, int log2_cb_size)
 static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
                                int log2_cb_size, int cb_depth)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     const int cb_size    = 1 << log2_cb_size;
+    int ret;
+    int qp_block_mask = (1<<(s->sps->log2_ctb_size - s->pps->diff_cu_qp_delta_depth)) - 1;
     int split_cu;
 
     lc->ct.depth = cb_depth;
@@ -2225,31 +2134,63 @@ static int hls_coding_quadtree(HEVCContext *s, int x0, int y0,
         lc->tu.cu_qp_delta          = 0;
     }
 
+    if (s->sh.cu_chroma_qp_offset_enabled_flag &&
+        log2_cb_size >= s->sps->log2_ctb_size - s->pps->diff_cu_chroma_qp_offset_depth) {
+        lc->tu.is_cu_chroma_qp_offset_coded = 0;
+    }
+
     if (split_cu) {
         const int cb_size_split = cb_size >> 1;
         const int x1 = x0 + cb_size_split;
         const int y1 = y0 + cb_size_split;
 
-        log2_cb_size--;
-        cb_depth++;
+        int more_data = 0;
 
-#define SUBDIVIDE(x, y)                                                \
-do {                                                                   \
-    if (x < s->sps->width && y < s->sps->height) {                     \
-        int ret = hls_coding_quadtree(s, x, y, log2_cb_size, cb_depth);\
-        if (ret < 0)                                                   \
-            return ret;                                                \
-    }                                                                  \
-} while (0)
+        more_data = hls_coding_quadtree(s, x0, y0, log2_cb_size - 1, cb_depth + 1);
+        if (more_data < 0)
+            return more_data;
+
+        if (more_data && x1 < s->sps->width) {
+            more_data = hls_coding_quadtree(s, x1, y0, log2_cb_size - 1, cb_depth + 1);
+            if (more_data < 0)
+                return more_data;
+        }
+        if (more_data && y1 < s->sps->height) {
+            more_data = hls_coding_quadtree(s, x0, y1, log2_cb_size - 1, cb_depth + 1);
+            if (more_data < 0)
+                return more_data;
+        }
+        if (more_data && x1 < s->sps->width &&
+            y1 < s->sps->height) {
+            more_data = hls_coding_quadtree(s, x1, y1, log2_cb_size - 1, cb_depth + 1);
+            if (more_data < 0)
+                return more_data;
+        }
 
-        SUBDIVIDE(x0, y0);
-        SUBDIVIDE(x1, y0);
-        SUBDIVIDE(x0, y1);
-        SUBDIVIDE(x1, y1);
+        if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
+            ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
+            lc->qPy_pred = lc->qp_y;
+
+        if (more_data)
+            return ((x1 + cb_size_split) < s->sps->width ||
+                    (y1 + cb_size_split) < s->sps->height);
+        else
+            return 0;
     } else {
-        int ret = hls_coding_unit(s, x0, y0, log2_cb_size);
+        ret = hls_coding_unit(s, x0, y0, log2_cb_size);
         if (ret < 0)
             return ret;
+        if ((!((x0 + cb_size) %
+               (1 << (s->sps->log2_ctb_size))) ||
+             (x0 + cb_size >= s->sps->width)) &&
+            (!((y0 + cb_size) %
+               (1 << (s->sps->log2_ctb_size))) ||
+             (y0 + cb_size >= s->sps->height))) {
+            int end_of_slice_flag = ff_hevc_end_of_slice_flag_decode(s);
+            return !end_of_slice_flag;
+        } else {
+            return 1;
+        }
     }
 
     return 0;
@@ -2258,14 +2199,11 @@ do {                                                                   \
 static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
                                  int ctb_addr_ts)
 {
-    HEVCLocalContext *lc  = &s->HEVClc;
+    HEVCLocalContext *lc  = s->HEVClc;
     int ctb_size          = 1 << s->sps->log2_ctb_size;
     int ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
     int ctb_addr_in_slice = ctb_addr_rs - s->sh.slice_addr;
 
-    int tile_left_boundary, tile_up_boundary;
-    int slice_left_boundary, slice_up_boundary;
-
     s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
 
     if (s->pps->entropy_coding_sync_enabled_flag) {
@@ -2275,7 +2213,6 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
     } else if (s->pps->tiles_enabled_flag) {
         if (ctb_addr_ts && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
             int idxX = s->pps->col_idxX[x_ctb >> s->sps->log2_ctb_size];
-            lc->start_of_tiles_x = x_ctb;
             lc->end_of_tiles_x   = x_ctb + (s->pps->column_width[idxX] << s->sps->log2_ctb_size);
             lc->first_qp_group   = 1;
         }
@@ -2285,37 +2222,49 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb,
 
     lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, s->sps->height);
 
+    lc->boundary_flags = 0;
     if (s->pps->tiles_enabled_flag) {
-        tile_left_boundary  = x_ctb > 0 &&
-                              s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - 1]];
-        slice_left_boundary = x_ctb > 0 &&
-                              s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - 1];
-        tile_up_boundary  = y_ctb > 0 &&
-                            s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
-        slice_up_boundary = y_ctb > 0 &&
-                            s->tab_slice_address[ctb_addr_rs] == s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width];
+        if (x_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]])
+            lc->boundary_flags |= BOUNDARY_LEFT_TILE;
+        if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
+            lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
+        if (y_ctb > 0 && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]])
+            lc->boundary_flags |= BOUNDARY_UPPER_TILE;
+        if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - s->sps->ctb_width])
+            lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
     } else {
-        tile_left_boundary  =
-        tile_up_boundary    = 1;
-        slice_left_boundary = ctb_addr_in_slice > 0;
-        slice_up_boundary   = ctb_addr_in_slice >= s->sps->ctb_width;
-    }
-    lc->slice_or_tiles_left_boundary = (!slice_left_boundary) + (!tile_left_boundary << 1);
-    lc->slice_or_tiles_up_boundary   = (!slice_up_boundary + (!tile_up_boundary << 1));
-    lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0) && tile_left_boundary);
-    lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && tile_up_boundary);
-    lc->ctb_up_right_flag = ((y_ctb > 0)  && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
-    lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
+        if (!ctb_addr_in_slice > 0)
+            lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
+        if (ctb_addr_in_slice < s->sps->ctb_width)
+            lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
+    }
+    lc->ctb_left_flag = ((x_ctb > 0) && (ctb_addr_in_slice > 0)                  && !(lc->boundary_flags & BOUNDARY_LEFT_TILE));
+    lc->ctb_up_flag   = ((y_ctb > 0) && (ctb_addr_in_slice >= s->sps->ctb_width) && !(lc->boundary_flags & BOUNDARY_UPPER_TILE));
+    lc->ctb_up_right_flag = ((y_ctb > 0)                 && (ctb_addr_in_slice+1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1 - s->sps->ctb_width]]));
+    lc->ctb_up_left_flag  = ((x_ctb > 0) && (y_ctb > 0)  && (ctb_addr_in_slice-1 >= s->sps->ctb_width) && (s->pps->tile_id[ctb_addr_ts] == s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->sps->ctb_width]]));
 }
 
-static int hls_slice_data(HEVCContext *s)
+static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
 {
+    HEVCContext *s  = avctxt->priv_data;
     int ctb_size    = 1 << s->sps->log2_ctb_size;
     int more_data   = 1;
     int x_ctb       = 0;
     int y_ctb       = 0;
     int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
-    int ret;
+
+    if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) {
+        av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->sh.dependent_slice_segment_flag) {
+        int prev_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
+        if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
+            av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
 
     while (more_data && ctb_addr_ts < s->sps->ctb_size) {
         int ctb_addr_rs = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
@@ -2332,10 +2281,12 @@ static int hls_slice_data(HEVCContext *s)
         s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
         s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
 
-        ret = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
-        if (ret < 0)
-            return ret;
-        more_data = !ff_hevc_end_of_slice_flag_decode(s);
+        more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
+        if (more_data < 0) {
+            s->tab_slice_address[ctb_addr_rs] = -1;
+            return more_data;
+        }
+
 
         ctb_addr_ts++;
         ff_hevc_save_states(s, ctb_addr_ts);
@@ -2344,18 +2295,181 @@ static int hls_slice_data(HEVCContext *s)
 
     if (x_ctb + ctb_size >= s->sps->width &&
         y_ctb + ctb_size >= s->sps->height)
-        ff_hevc_hls_filter(s, x_ctb, y_ctb);
+        ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
 
     return ctb_addr_ts;
 }
 
+static int hls_slice_data(HEVCContext *s)
+{
+    int arg[2];
+    int ret[2];
+
+    arg[0] = 0;
+    arg[1] = 1;
+
+    s->avctx->execute(s->avctx, hls_decode_entry, arg, ret , 1, sizeof(int));
+    return ret[0];
+}
+static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int job, int self_id)
+{
+    HEVCContext *s1  = avctxt->priv_data, *s;
+    HEVCLocalContext *lc;
+    int ctb_size    = 1<< s1->sps->log2_ctb_size;
+    int more_data   = 1;
+    int *ctb_row_p    = input_ctb_row;
+    int ctb_row = ctb_row_p[job];
+    int ctb_addr_rs = s1->sh.slice_ctb_addr_rs + ctb_row * ((s1->sps->width + ctb_size - 1) >> s1->sps->log2_ctb_size);
+    int ctb_addr_ts = s1->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
+    int thread = ctb_row % s1->threads_number;
+    int ret;
+
+    s = s1->sList[self_id];
+    lc = s->HEVClc;
+
+    if(ctb_row) {
+        ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]);
+
+        if (ret < 0)
+            return ret;
+        ff_init_cabac_decoder(&lc->cc, s->data + s->sh.offset[(ctb_row)-1], s->sh.size[ctb_row - 1]);
+    }
+
+    while(more_data && ctb_addr_ts < s->sps->ctb_size) {
+        int x_ctb = (ctb_addr_rs % s->sps->ctb_width) << s->sps->log2_ctb_size;
+        int y_ctb = (ctb_addr_rs / s->sps->ctb_width) << s->sps->log2_ctb_size;
+
+        hls_decode_neighbour(s, x_ctb, y_ctb, ctb_addr_ts);
+
+        ff_thread_await_progress2(s->avctx, ctb_row, thread, SHIFT_CTB_WPP);
+
+        if (avpriv_atomic_int_get(&s1->wpp_err)){
+            ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
+            return 0;
+        }
+
+        ff_hevc_cabac_init(s, ctb_addr_ts);
+        hls_sao_param(s, x_ctb >> s->sps->log2_ctb_size, y_ctb >> s->sps->log2_ctb_size);
+        more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->sps->log2_ctb_size, 0);
+
+        if (more_data < 0) {
+            s->tab_slice_address[ctb_addr_rs] = -1;
+            return more_data;
+        }
+
+        ctb_addr_ts++;
+
+        ff_hevc_save_states(s, ctb_addr_ts);
+        ff_thread_report_progress2(s->avctx, ctb_row, thread, 1);
+        ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size);
+
+        if (!more_data && (x_ctb+ctb_size) < s->sps->width && ctb_row != s->sh.num_entry_point_offsets) {
+            avpriv_atomic_int_set(&s1->wpp_err,  1);
+            ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
+            return 0;
+        }
+
+        if ((x_ctb+ctb_size) >= s->sps->width && (y_ctb+ctb_size) >= s->sps->height ) {
+            ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size);
+            ff_thread_report_progress2(s->avctx, ctb_row , thread, SHIFT_CTB_WPP);
+            return ctb_addr_ts;
+        }
+        ctb_addr_rs       = s->pps->ctb_addr_ts_to_rs[ctb_addr_ts];
+        x_ctb+=ctb_size;
+
+        if(x_ctb >= s->sps->width) {
+            break;
+        }
+    }
+    ff_thread_report_progress2(s->avctx, ctb_row ,thread, SHIFT_CTB_WPP);
+
+    return 0;
+}
+
+static int hls_slice_data_wpp(HEVCContext *s, const uint8_t *nal, int length)
+{
+    HEVCLocalContext *lc = s->HEVClc;
+    int *ret = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
+    int *arg = av_malloc_array(s->sh.num_entry_point_offsets + 1, sizeof(int));
+    int offset;
+    int startheader, cmpt = 0;
+    int i, j, res = 0;
+
+
+    if (!s->sList[1]) {
+        ff_alloc_entries(s->avctx, s->sh.num_entry_point_offsets + 1);
+
+
+        for (i = 1; i < s->threads_number; i++) {
+            s->sList[i] = av_malloc(sizeof(HEVCContext));
+            memcpy(s->sList[i], s, sizeof(HEVCContext));
+            s->HEVClcList[i] = av_mallocz(sizeof(HEVCLocalContext));
+            s->sList[i]->HEVClc = s->HEVClcList[i];
+        }
+    }
+
+    offset = (lc->gb.index >> 3);
+
+    for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < s->skipped_bytes; j++) {
+        if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
+            startheader--;
+            cmpt++;
+        }
+    }
+
+    for (i = 1; i < s->sh.num_entry_point_offsets; i++) {
+        offset += (s->sh.entry_point_offset[i - 1] - cmpt);
+        for (j = 0, cmpt = 0, startheader = offset
+             + s->sh.entry_point_offset[i]; j < s->skipped_bytes; j++) {
+            if (s->skipped_bytes_pos[j] >= offset && s->skipped_bytes_pos[j] < startheader) {
+                startheader--;
+                cmpt++;
+            }
+        }
+        s->sh.size[i - 1] = s->sh.entry_point_offset[i] - cmpt;
+        s->sh.offset[i - 1] = offset;
+
+    }
+    if (s->sh.num_entry_point_offsets != 0) {
+        offset += s->sh.entry_point_offset[s->sh.num_entry_point_offsets - 1] - cmpt;
+        s->sh.size[s->sh.num_entry_point_offsets - 1] = length - offset;
+        s->sh.offset[s->sh.num_entry_point_offsets - 1] = offset;
+
+    }
+    s->data = nal;
+
+    for (i = 1; i < s->threads_number; i++) {
+        s->sList[i]->HEVClc->first_qp_group = 1;
+        s->sList[i]->HEVClc->qp_y = s->sList[0]->HEVClc->qp_y;
+        memcpy(s->sList[i], s, sizeof(HEVCContext));
+        s->sList[i]->HEVClc = s->HEVClcList[i];
+    }
+
+    avpriv_atomic_int_set(&s->wpp_err, 0);
+    ff_reset_entries(s->avctx);
+
+    for (i = 0; i <= s->sh.num_entry_point_offsets; i++) {
+        arg[i] = i;
+        ret[i] = 0;
+    }
+
+    if (s->pps->entropy_coding_sync_enabled_flag)
+        s->avctx->execute2(s->avctx, (void *) hls_decode_entry_wpp, arg, ret, s->sh.num_entry_point_offsets + 1);
+
+    for (i = 0; i <= s->sh.num_entry_point_offsets; i++)
+        res += ret[i];
+    av_free(ret);
+    av_free(arg);
+    return res;
+}
+
 /**
  * @return AVERROR_INVALIDDATA if the packet is not a valid NAL unit,
  * 0 if the unit should be skipped, 1 otherwise
  */
 static int hls_nal_unit(HEVCContext *s)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     int nuh_layer_id;
 
     if (get_bits1(gb) != 0)
@@ -2375,33 +2489,6 @@ static int hls_nal_unit(HEVCContext *s)
     return nuh_layer_id == 0;
 }
 
-static void restore_tqb_pixels(HEVCContext *s)
-{
-    int min_pu_size = 1 << s->sps->log2_min_pu_size;
-    int x, y, c_idx;
-
-    for (c_idx = 0; c_idx < 3; c_idx++) {
-        ptrdiff_t stride = s->frame->linesize[c_idx];
-        int hshift       = s->sps->hshift[c_idx];
-        int vshift       = s->sps->vshift[c_idx];
-        for (y = 0; y < s->sps->min_pu_height; y++) {
-            for (x = 0; x < s->sps->min_pu_width; x++) {
-                if (s->is_pcm[y * s->sps->min_pu_width + x]) {
-                    int n;
-                    int len      = min_pu_size >> hshift;
-                    uint8_t *src = &s->frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
-                    uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
-                    for (n = 0; n < (min_pu_size >> vshift); n++) {
-                        memcpy(dst, src, len);
-                        src += stride;
-                        dst += stride;
-                    }
-                }
-            }
-        }
-    }
-}
-
 static int set_side_data(HEVCContext *s)
 {
     AVFrame *out = s->ref->frame;
@@ -2453,23 +2540,24 @@ static int set_side_data(HEVCContext *s)
 
 static int hevc_frame_start(HEVCContext *s)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
+    int pic_size_in_ctb  = ((s->sps->width  >> s->sps->log2_min_cb_size) + 1) *
+                           ((s->sps->height >> s->sps->log2_min_cb_size) + 1);
     int ret;
 
-    memset(s->horizontal_bs, 0, 2 * s->bs_width * (s->bs_height + 1));
-    memset(s->vertical_bs,   0, 2 * s->bs_width * (s->bs_height + 1));
+    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
+    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
     memset(s->cbf_luma,      0, s->sps->min_tb_width * s->sps->min_tb_height);
-    memset(s->is_pcm,        0, s->sps->min_pu_width * s->sps->min_pu_height);
+    memset(s->is_pcm,        0, (s->sps->min_pu_width + 1) * (s->sps->min_pu_height + 1));
+    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
 
-    lc->start_of_tiles_x = 0;
     s->is_decoded        = 0;
     s->first_nal_type    = s->nal_unit_type;
 
     if (s->pps->tiles_enabled_flag)
         lc->end_of_tiles_x = s->pps->column_width[0] << s->sps->log2_ctb_size;
 
-    ret = ff_hevc_set_new_ref(s, s->sps->sao_enabled ? &s->sao_frame : &s->frame,
-                              s->poc);
+    ret = ff_hevc_set_new_ref(s, &s->frame, s->poc);
     if (ret < 0)
         goto fail;
 
@@ -2485,6 +2573,11 @@ static int hevc_frame_start(HEVCContext *s)
     if (ret < 0)
         goto fail;
 
+    s->frame->pict_type = 3 - s->sh.slice_type;
+
+    if (!IS_IRAP(s))
+        ff_hevc_bump_frame(s);
+
     av_frame_unref(s->output_frame);
     ret = ff_hevc_output_frame(s, s->output_frame, 0);
     if (ret < 0)
@@ -2495,7 +2588,7 @@ static int hevc_frame_start(HEVCContext *s)
     return 0;
 
 fail:
-    if (s->ref)
+    if (s->ref && s->threads_type == FF_THREAD_FRAME)
         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
     s->ref = NULL;
     return ret;
@@ -2503,7 +2596,7 @@ fail:
 
 static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     GetBitContext *gb    = &lc->gb;
     int ctb_addr_ts, ret;
 
@@ -2605,13 +2698,12 @@ static int decode_nal_unit(HEVCContext *s, const uint8_t *nal, int length)
             }
         }
 
-        ctb_addr_ts = hls_slice_data(s);
+        if (s->threads_number > 1 && s->sh.num_entry_point_offsets > 0)
+            ctb_addr_ts = hls_slice_data_wpp(s, nal, length);
+        else
+            ctb_addr_ts = hls_slice_data(s);
         if (ctb_addr_ts >= (s->sps->ctb_width * s->sps->ctb_height)) {
             s->is_decoded = 1;
-            if ((s->pps->transquant_bypass_enable_flag ||
-                 (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) &&
-                s->sps->sao_enabled)
-                restore_tqb_pixels(s);
         }
 
         if (ctb_addr_ts < 0) {
@@ -2641,12 +2733,13 @@ fail:
 
 /* FIXME: This is adapted from ff_h264_decode_nal, avoiding duplication
  * between these functions would be nice. */
-static int extract_rbsp(const uint8_t *src, int length,
-                        HEVCNAL *nal)
+int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
+                         HEVCNAL *nal)
 {
     int i, si, di;
     uint8_t *dst;
 
+    s->skipped_bytes = 0;
 #define STARTCODE_TEST                                                  \
         if (i + 2 < length && src[i + 1] == 0 && src[i + 2] <= 3) {     \
             if (src[i + 2] != 3) {                                      \
@@ -2718,6 +2811,17 @@ static int extract_rbsp(const uint8_t *src, int length,
                 dst[di++] = 0;
                 si       += 3;
 
+                s->skipped_bytes++;
+                if (s->skipped_bytes_pos_size < s->skipped_bytes) {
+                    s->skipped_bytes_pos_size *= 2;
+                    av_reallocp_array(&s->skipped_bytes_pos,
+                            s->skipped_bytes_pos_size,
+                            sizeof(*s->skipped_bytes_pos));
+                    if (!s->skipped_bytes_pos)
+                        return AVERROR(ENOMEM);
+                }
+                if (s->skipped_bytes_pos)
+                    s->skipped_bytes_pos[s->skipped_bytes-1] = di - 1;
                 continue;
             } else // next start code
                 goto nsc;
@@ -2741,6 +2845,7 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
     int i, consumed, ret = 0;
 
     s->ref = NULL;
+    s->last_eos = s->eos;
     s->eos = 0;
 
     /* split the input packet into NAL units, so we know the upper bound on the
@@ -2763,21 +2868,24 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
                 goto fail;
             }
         } else {
-            if (buf[2] == 0) {
-                length--;
-                buf++;
-                continue;
-            }
-            if (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
-                ret = AVERROR_INVALIDDATA;
-                goto fail;
+            /* search start code */
+            while (buf[0] != 0 || buf[1] != 0 || buf[2] != 1) {
+                ++buf;
+                --length;
+                if (length < 4) {
+                    av_log(s->avctx, AV_LOG_ERROR, "No start code is found.\n");
+                    ret = AVERROR_INVALIDDATA;
+                    goto fail;
+                }
             }
 
             buf           += 3;
             length        -= 3;
-            extract_length = length;
         }
 
+        if (!s->is_nalff)
+            extract_length = length;
+
         if (s->nals_allocated < s->nb_nals + 1) {
             int new_size = s->nals_allocated + 1;
             HEVCNAL *tmp = av_realloc_array(s->nals, new_size, sizeof(*tmp));
@@ -2788,17 +2896,30 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
             s->nals = tmp;
             memset(s->nals + s->nals_allocated, 0,
                    (new_size - s->nals_allocated) * sizeof(*tmp));
+            av_reallocp_array(&s->skipped_bytes_nal, new_size, sizeof(*s->skipped_bytes_nal));
+            av_reallocp_array(&s->skipped_bytes_pos_size_nal, new_size, sizeof(*s->skipped_bytes_pos_size_nal));
+            av_reallocp_array(&s->skipped_bytes_pos_nal, new_size, sizeof(*s->skipped_bytes_pos_nal));
+            s->skipped_bytes_pos_size_nal[s->nals_allocated] = 1024; // initial buffer size
+            s->skipped_bytes_pos_nal[s->nals_allocated] = av_malloc_array(s->skipped_bytes_pos_size_nal[s->nals_allocated], sizeof(*s->skipped_bytes_pos));
             s->nals_allocated = new_size;
         }
-        nal = &s->nals[s->nb_nals++];
+        s->skipped_bytes_pos_size = s->skipped_bytes_pos_size_nal[s->nb_nals];
+        s->skipped_bytes_pos = s->skipped_bytes_pos_nal[s->nb_nals];
+        nal = &s->nals[s->nb_nals];
+
+        consumed = ff_hevc_extract_rbsp(s, buf, extract_length, nal);
+
+        s->skipped_bytes_nal[s->nb_nals] = s->skipped_bytes;
+        s->skipped_bytes_pos_size_nal[s->nb_nals] = s->skipped_bytes_pos_size;
+        s->skipped_bytes_pos_nal[s->nb_nals++] = s->skipped_bytes_pos;
+
 
-        consumed = extract_rbsp(buf, extract_length, nal);
         if (consumed < 0) {
             ret = consumed;
             goto fail;
         }
 
-        ret = init_get_bits8(&s->HEVClc.gb, nal->data, nal->size);
+        ret = init_get_bits8(&s->HEVClc->gb, nal->data, nal->size);
         if (ret < 0)
             goto fail;
         hls_nal_unit(s);
@@ -2813,7 +2934,11 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
 
     /* parse the NAL units */
     for (i = 0; i < s->nb_nals; i++) {
-        int ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
+        int ret;
+        s->skipped_bytes = s->skipped_bytes_nal[i];
+        s->skipped_bytes_pos = s->skipped_bytes_pos_nal[i];
+
+        ret = decode_nal_unit(s, s->nals[i].data, s->nals[i].size);
         if (ret < 0) {
             av_log(s->avctx, AV_LOG_WARNING,
                    "Error parsing NAL unit #%d.\n", i);
@@ -2822,7 +2947,7 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
     }
 
 fail:
-    if (s->ref)
+    if (s->ref && s->threads_type == FF_THREAD_FRAME)
         ff_thread_report_progress(&s->ref->tf, INT_MAX, 0);
 
     return ret;
@@ -2947,7 +3072,9 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
 
 static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
 {
-    int ret = ff_thread_ref_frame(&dst->tf, &src->tf);
+    int ret;
+
+    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
     if (ret < 0)
         return ret;
 
@@ -2986,6 +3113,15 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
 
     av_freep(&s->md5_ctx);
 
+    for(i=0; i < s->nals_allocated; i++) {
+        av_freep(&s->skipped_bytes_pos_nal[i]);
+    }
+    av_freep(&s->skipped_bytes_pos_size_nal);
+    av_freep(&s->skipped_bytes_nal);
+    av_freep(&s->skipped_bytes_pos_nal);
+
+    av_freep(&s->cabac_state);
+
     av_frame_free(&s->tmp_frame);
     av_frame_free(&s->output_frame);
 
@@ -3000,6 +3136,26 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
         av_buffer_unref(&s->sps_list[i]);
     for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
         av_buffer_unref(&s->pps_list[i]);
+    s->sps = NULL;
+    s->pps = NULL;
+    s->vps = NULL;
+
+    av_buffer_unref(&s->current_sps);
+
+    av_freep(&s->sh.entry_point_offset);
+    av_freep(&s->sh.offset);
+    av_freep(&s->sh.size);
+
+    for (i = 1; i < s->threads_number; i++) {
+        HEVCLocalContext *lc = s->HEVClcList[i];
+        if (lc) {
+            av_freep(&s->HEVClcList[i]);
+            av_freep(&s->sList[i]);
+        }
+    }
+    if (s->HEVClc == s->HEVClcList[0])
+        s->HEVClc = NULL;
+    av_freep(&s->HEVClcList[0]);
 
     for (i = 0; i < s->nals_allocated; i++)
         av_freep(&s->nals[i].rbsp_buffer);
@@ -3016,6 +3172,16 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
 
     s->avctx = avctx;
 
+    s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
+    if (!s->HEVClc)
+        goto fail;
+    s->HEVClcList[0] = s->HEVClc;
+    s->sList[0] = s;
+
+    s->cabac_state = av_malloc(HEVC_CONTEXTS);
+    if (!s->cabac_state)
+        goto fail;
+
     s->tmp_frame = av_frame_alloc();
     if (!s->tmp_frame)
         goto fail;
@@ -3040,6 +3206,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
     ff_bswapdsp_init(&s->bdsp);
 
     s->context_initialized = 1;
+    s->eos = 0;
 
     return 0;
 
@@ -3070,6 +3237,8 @@ static int hevc_update_thread_context(AVCodecContext *dst,
         }
     }
 
+    if (s->sps != s0->sps)
+        s->sps = NULL;
     for (i = 0; i < FF_ARRAY_ELEMS(s->vps_list); i++) {
         av_buffer_unref(&s->vps_list[i]);
         if (s0->vps_list[i]) {
@@ -3097,17 +3266,29 @@ static int hevc_update_thread_context(AVCodecContext *dst,
         }
     }
 
+    av_buffer_unref(&s->current_sps);
+    if (s0->current_sps) {
+        s->current_sps = av_buffer_ref(s0->current_sps);
+        if (!s->current_sps)
+            return AVERROR(ENOMEM);
+    }
+
     if (s->sps != s0->sps)
-        ret = set_sps(s, s0->sps);
+        if ((ret = set_sps(s, s0->sps)) < 0)
+            return ret;
 
     s->seq_decode = s0->seq_decode;
     s->seq_output = s0->seq_output;
     s->pocTid0    = s0->pocTid0;
     s->max_ra     = s0->max_ra;
+    s->eos        = s0->eos;
 
     s->is_nalff        = s0->is_nalff;
     s->nal_length_size = s0->nal_length_size;
 
+    s->threads_number      = s0->threads_number;
+    s->threads_type        = s0->threads_type;
+
     if (s0->eos) {
         s->seq_decode = (s->seq_decode + 1) & 0xff;
         s->max_ra = INT_MAX;
@@ -3193,6 +3374,14 @@ static av_cold int hevc_decode_init(AVCodecContext *avctx)
     if (ret < 0)
         return ret;
 
+    s->enable_parallel_tiles = 0;
+    s->picture_struct = 0;
+
+    if(avctx->active_thread_type & FF_THREAD_SLICE)
+        s->threads_number = avctx->thread_count;
+    else
+        s->threads_number = 1;
+
     if (avctx->extradata_size > 0 && avctx->extradata) {
         ret = hevc_decode_extradata(s);
         if (ret < 0) {
@@ -3201,6 +3390,11 @@ static av_cold int hevc_decode_init(AVCodecContext *avctx)
         }
     }
 
+    if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
+            s->threads_type = FF_THREAD_FRAME;
+        else
+            s->threads_type = FF_THREAD_SLICE;
+
     return 0;
 }
 
@@ -3232,12 +3426,15 @@ static const AVProfile profiles[] = {
     { FF_PROFILE_HEVC_MAIN,                 "Main"                },
     { FF_PROFILE_HEVC_MAIN_10,              "Main 10"             },
     { FF_PROFILE_HEVC_MAIN_STILL_PICTURE,   "Main Still Picture"  },
+    { FF_PROFILE_HEVC_REXT,                 "Rext"  },
     { FF_PROFILE_UNKNOWN },
 };
 
 static const AVOption options[] = {
     { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
         AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
+    { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
+        AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, PAR },
     { NULL },
 };
 
@@ -3262,6 +3459,6 @@ AVCodec ff_hevc_decoder = {
     .update_thread_context = hevc_update_thread_context,
     .init_thread_copy      = hevc_init_thread_copy,
     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_DELAY |
-                             CODEC_CAP_FRAME_THREADS,
+                             CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
     .profiles              = NULL_IF_CONFIG_SMALL(profiles),
 };
diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h
index 96dd80d..8420f38 100644
--- a/libavcodec/hevc.h
+++ b/libavcodec/hevc.h
@@ -3,29 +3,26 @@
  *
  * Copyright (C) 2012 - 2013 Guillaume Martres
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef AVCODEC_HEVC_H
 #define AVCODEC_HEVC_H
 
-#include <stddef.h>
-#include <stdint.h>
-
 #include "libavutil/buffer.h"
 #include "libavutil/md5.h"
 
@@ -33,6 +30,7 @@
 #include "bswapdsp.h"
 #include "cabac.h"
 #include "get_bits.h"
+#include "hevcpred.h"
 #include "hevcdsp.h"
 #include "internal.h"
 #include "thread.h"
@@ -41,6 +39,9 @@
 #define MAX_DPB_SIZE 16 // A.4.1
 #define MAX_REFS 16
 
+#define MAX_NB_THREADS 16
+#define SHIFT_CTB_WPP 2
+
 /**
  * 7.4.2.1
  */
@@ -60,7 +61,7 @@
 #define MAX_QP 51
 #define DEFAULT_INTRA_TC_OFFSET 2
 
-#define HEVC_CONTEXTS 183
+#define HEVC_CONTEXTS 199
 
 #define MRG_MAX_NUM_CANDS     5
 
@@ -70,6 +71,9 @@
 #define EPEL_EXTRA_BEFORE 1
 #define EPEL_EXTRA_AFTER  2
 #define EPEL_EXTRA        3
+#define QPEL_EXTRA_BEFORE 3
+#define QPEL_EXTRA_AFTER  4
+#define QPEL_EXTRA        7
 
 #define EDGE_EMU_BUFFER_STRIDE 80
 
@@ -79,13 +83,10 @@
 #define SAMPLE(tab, x, y) ((tab)[(y) * s->sps->width + (x)])
 #define SAMPLE_CTB(tab, x, y) ((tab)[(y) * min_cb_width + (x)])
 
-#define IS_IDR(s) (s->nal_unit_type == NAL_IDR_W_RADL || s->nal_unit_type == NAL_IDR_N_LP)
-#define IS_BLA(s) (s->nal_unit_type == NAL_BLA_W_RADL || s->nal_unit_type == NAL_BLA_W_LP || \
-                   s->nal_unit_type == NAL_BLA_N_LP)
-#define IS_IRAP(s) (s->nal_unit_type >= 16 && s->nal_unit_type <= 23)
-
-#define FFUDIV(a,b) (((a) > 0 ? (a) : (a) - (b) + 1) / (b))
-#define FFUMOD(a,b) ((a) - (b) * FFUDIV(a,b))
+#define IS_IDR(s) ((s)->nal_unit_type == NAL_IDR_W_RADL || (s)->nal_unit_type == NAL_IDR_N_LP)
+#define IS_BLA(s) ((s)->nal_unit_type == NAL_BLA_W_RADL || (s)->nal_unit_type == NAL_BLA_W_LP || \
+                   (s)->nal_unit_type == NAL_BLA_N_LP)
+#define IS_IRAP(s) ((s)->nal_unit_type >= 16 && (s)->nal_unit_type <= 23)
 
 /**
  * Table 7-3: NAL unit type codes
@@ -167,6 +168,8 @@ enum SyntaxElement {
     CBF_LUMA,
     CBF_CB_CR,
     TRANSFORM_SKIP_FLAG,
+    EXPLICIT_RDPCM_FLAG,
+    EXPLICIT_RDPCM_DIR_FLAG,
     LAST_SIGNIFICANT_COEFF_X_PREFIX,
     LAST_SIGNIFICANT_COEFF_Y_PREFIX,
     LAST_SIGNIFICANT_COEFF_X_SUFFIX,
@@ -177,6 +180,10 @@ enum SyntaxElement {
     COEFF_ABS_LEVEL_GREATER2_FLAG,
     COEFF_ABS_LEVEL_REMAINING,
     COEFF_SIGN_FLAG,
+    LOG2_RES_SCALE_ABS,
+    RES_SCALE_SIGN_FLAG,
+    CU_CHROMA_QP_OFFSET_FLAG,
+    CU_CHROMA_QP_OFFSET_IDX,
 };
 
 enum PartMode {
@@ -202,6 +209,13 @@ enum InterPredIdc {
     PRED_BI,
 };
 
+enum PredFlag {
+    PF_INTRA = 0,
+    PF_L0,
+    PF_L1,
+    PF_BI,
+};
+
 enum IntraPredMode {
     INTRA_PLANAR = 0,
     INTRA_DC,
@@ -244,6 +258,7 @@ enum SAOType {
     SAO_NOT_APPLIED = 0,
     SAO_BAND,
     SAO_EDGE,
+    SAO_APPLIED
 };
 
 enum SAOEOClass {
@@ -381,7 +396,7 @@ typedef struct ScalingList {
 } ScalingList;
 
 typedef struct HEVCSPS {
-    int vps_id;
+    unsigned vps_id;
     int chroma_format_idc;
     uint8_t separate_colour_plane_flag;
 
@@ -442,6 +457,13 @@ typedef struct HEVCSPS {
     int max_transform_hierarchy_depth_inter;
     int max_transform_hierarchy_depth_intra;
 
+    int transform_skip_rotation_enabled_flag;
+    int transform_skip_context_enabled_flag;
+    int implicit_rdpcm_enabled_flag;
+    int explicit_rdpcm_enabled_flag;
+    int intra_smoothing_disabled_flag;
+    int persistent_rice_adaptation_enabled_flag;
+
     ///< coded frame dimension in various units
     int width;
     int height;
@@ -454,6 +476,7 @@ typedef struct HEVCSPS {
     int min_tb_height;
     int min_pu_width;
     int min_pu_height;
+    int tb_mask;
 
     int hshift[3];
     int vshift[3];
@@ -510,6 +533,15 @@ typedef struct HEVCPPS {
     int log2_parallel_merge_level; ///< log2_parallel_merge_level_minus2 + 2
     int num_extra_slice_header_bits;
     uint8_t slice_header_extension_present_flag;
+    uint8_t log2_max_transform_skip_block_size;
+    uint8_t cross_component_prediction_enabled_flag;
+    uint8_t chroma_qp_offset_list_enabled_flag;
+    uint8_t diff_cu_chroma_qp_offset_depth;
+    uint8_t chroma_qp_offset_list_len_minus1;
+    int8_t  cb_qp_offset_list[5];
+    int8_t  cr_qp_offset_list[5];
+    uint8_t log2_sao_offset_scale_luma;
+    uint8_t log2_sao_offset_scale_chroma;
 
     // Inferred parameters
     unsigned int *column_width;  ///< ColumnWidth
@@ -523,6 +555,7 @@ typedef struct HEVCPPS {
     int *tile_id;           ///< TileId
     int *tile_pos_rs;       ///< TilePosRS
     int *min_tb_addr_zs;    ///< MinTbAddrZS
+    int *min_tb_addr_zs_tab;///< MinTbAddrZS
 } HEVCPPS;
 
 typedef struct SliceHeader {
@@ -568,11 +601,16 @@ typedef struct SliceHeader {
     int slice_cb_qp_offset;
     int slice_cr_qp_offset;
 
+    uint8_t cu_chroma_qp_offset_enabled_flag;
+
     int beta_offset;    ///< beta_offset_div2 * 2
     int tc_offset;      ///< tc_offset_div2 * 2
 
     unsigned int max_num_merge_cand; ///< 5 - 5_minus_max_num_merge_cand
 
+    int *entry_point_offset;
+    int * offset;
+    int * size;
     int num_entry_point_offsets;
 
     int8_t slice_qp;
@@ -621,10 +659,9 @@ typedef struct Mv {
 } Mv;
 
 typedef struct MvField {
-    Mv mv[2];
+    DECLARE_ALIGNED(4, Mv, mv)[2];
     int8_t ref_idx[2];
-    int8_t pred_flag[2];
-    uint8_t is_intra;
+    int8_t pred_flag;
 } MvField;
 
 typedef struct NeighbourAvailable {
@@ -642,15 +679,25 @@ typedef struct PredictionUnit {
     uint8_t intra_pred_mode[4];
     Mv mvd;
     uint8_t merge_flag;
-    uint8_t intra_pred_mode_c;
+    uint8_t intra_pred_mode_c[4];
+    uint8_t chroma_mode_c[4];
 } PredictionUnit;
 
 typedef struct TransformUnit {
+    DECLARE_ALIGNED(32, int16_t, coeffs[2][MAX_TB_SIZE * MAX_TB_SIZE]);
     int cu_qp_delta;
 
+    int res_scale_val;
+
     // Inferred parameters;
-    int cur_intra_pred_mode;
+    int intra_pred_mode;
+    int intra_pred_mode_c;
+    int chroma_mode_c;
     uint8_t is_cu_qp_delta_coded;
+    uint8_t is_cu_chroma_qp_offset_coded;
+    int8_t  cu_qp_offset_cb;
+    int8_t  cu_qp_offset_cr;
+    uint8_t cross_pf;
 } TransformUnit;
 
 typedef struct DBParams {
@@ -661,6 +708,7 @@ typedef struct DBParams {
 #define HEVC_FRAME_FLAG_OUTPUT    (1 << 0)
 #define HEVC_FRAME_FLAG_SHORT_REF (1 << 1)
 #define HEVC_FRAME_FLAG_LONG_REF  (1 << 2)
+#define HEVC_FRAME_FLAG_BUMPING   (1 << 3)
 
 typedef struct HEVCFrame {
     AVFrame *frame;
@@ -698,24 +746,12 @@ typedef struct HEVCNAL {
     const uint8_t *data;
 } HEVCNAL;
 
-struct HEVCContext;
-
-typedef struct HEVCPredContext {
-    void (*intra_pred[4])(struct HEVCContext *s, int x0, int y0, int c_idx);
-
-    void (*pred_planar[4])(uint8_t *src, const uint8_t *top,
-                           const uint8_t *left, ptrdiff_t stride);
-    void (*pred_dc)(uint8_t *src, const uint8_t *top, const uint8_t *left,
-                    ptrdiff_t stride, int log2_size, int c_idx);
-    void (*pred_angular[4])(uint8_t *src, const uint8_t *top,
-                            const uint8_t *left, ptrdiff_t stride,
-                            int c_idx, int mode);
-} HEVCPredContext;
-
 typedef struct HEVCLocalContext {
     DECLARE_ALIGNED(16, int16_t, mc_buffer[(MAX_PB_SIZE + 7) * MAX_PB_SIZE]);
     uint8_t cabac_state[HEVC_CONTEXTS];
 
+    uint8_t stat_coeff[4];
+
     uint8_t first_qp_group;
 
     GetBitContext gb;
@@ -724,33 +760,50 @@ typedef struct HEVCLocalContext {
     int8_t qp_y;
     int8_t curr_qp_y;
 
+    int qPy_pred;
+
     TransformUnit tu;
 
     uint8_t ctb_left_flag;
     uint8_t ctb_up_flag;
     uint8_t ctb_up_right_flag;
     uint8_t ctb_up_left_flag;
-    int     start_of_tiles_x;
     int     end_of_tiles_x;
     int     end_of_tiles_y;
     /* +7 is for subpixel interpolation, *2 for high bit depths */
     DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
+    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer2)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
+
     CodingTree ct;
     CodingUnit cu;
     PredictionUnit pu;
     NeighbourAvailable na;
 
-    uint8_t slice_or_tiles_left_boundary;
-    uint8_t slice_or_tiles_up_boundary;
+#define BOUNDARY_LEFT_SLICE     (1 << 0)
+#define BOUNDARY_LEFT_TILE      (1 << 1)
+#define BOUNDARY_UPPER_SLICE    (1 << 2)
+#define BOUNDARY_UPPER_TILE     (1 << 3)
+    /* properties of the boundary of the current CTB for the purposes
+     * of the deblocking filter */
+    int boundary_flags;
 } HEVCLocalContext;
 
 typedef struct HEVCContext {
     const AVClass *c;  // needed by private avoptions
     AVCodecContext *avctx;
 
-    HEVCLocalContext HEVClc;
+    struct HEVCContext  *sList[MAX_NB_THREADS];
 
-    uint8_t cabac_state[HEVC_CONTEXTS];
+    HEVCLocalContext    *HEVClcList[MAX_NB_THREADS];
+    HEVCLocalContext    *HEVClc;
+
+    uint8_t             threads_type;
+    uint8_t             threads_number;
+
+    int                 width;
+    int                 height;
+
+    uint8_t *cabac_state;
 
     /** 1 if the independent slice segment header was successfully parsed */
     uint8_t slice_initialized;
@@ -767,6 +820,8 @@ typedef struct HEVCContext {
     AVBufferRef *sps_list[MAX_SPS_COUNT];
     AVBufferRef *pps_list[MAX_PPS_COUNT];
 
+    AVBufferRef *current_sps;
+
     AVBufferPool *tab_mvf_pool;
     AVBufferPool *rpl_tab_pool;
 
@@ -784,6 +839,7 @@ typedef struct HEVCContext {
     int pocTid0;
     int slice_idx; ///< number of the slice being currently decoded
     int eos;       ///< current packet contains an EOS/EOB NAL
+    int last_eos;  ///< last packet contains an EOS/EOB NAL
     int max_ra;
     int bs_width;
     int bs_height;
@@ -823,6 +879,18 @@ typedef struct HEVCContext {
     uint16_t seq_decode;
     uint16_t seq_output;
 
+    int enable_parallel_tiles;
+    int wpp_err;
+    int skipped_bytes;
+    int *skipped_bytes_pos;
+    int skipped_bytes_pos_size;
+
+    int *skipped_bytes_nal;
+    int **skipped_bytes_pos_nal;
+    int *skipped_bytes_pos_size_nal;
+
+    const uint8_t *data;
+
     HEVCNAL *nals;
     int nb_nals;
     int nals_allocated;
@@ -839,6 +907,8 @@ typedef struct HEVCContext {
                             ///< as a format defined in 14496-15
     int apply_defdispwin;
 
+    int active_seq_parameter_set_id;
+
     int nal_length_size;    ///< Number of bytes used for nal length (1, 2 or 4)
     int nuh_layer_id;
 
@@ -852,6 +922,8 @@ typedef struct HEVCContext {
     int sei_display_orientation_present;
     int sei_anticlockwise_rotation;
     int sei_hflip, sei_vflip;
+
+    int picture_struct;
 } HEVCContext;
 
 int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
@@ -861,6 +933,9 @@ int ff_hevc_decode_nal_sps(HEVCContext *s);
 int ff_hevc_decode_nal_pps(HEVCContext *s);
 int ff_hevc_decode_nal_sei(HEVCContext *s);
 
+int ff_hevc_extract_rbsp(HEVCContext *s, const uint8_t *src, int length,
+                         HEVCNAL *nal);
+
 /**
  * Mark all frames in DPB as unused for reference.
  */
@@ -916,32 +991,11 @@ int ff_hevc_inter_pred_idc_decode(HEVCContext *s, int nPbW, int nPbH);
 int ff_hevc_ref_idx_lx_decode(HEVCContext *s, int num_ref_idx_lx);
 int ff_hevc_mvp_lx_flag_decode(HEVCContext *s);
 int ff_hevc_no_residual_syntax_flag_decode(HEVCContext *s);
-int ff_hevc_abs_mvd_greater0_flag_decode(HEVCContext *s);
-int ff_hevc_abs_mvd_greater1_flag_decode(HEVCContext *s);
-int ff_hevc_mvd_decode(HEVCContext *s);
-int ff_hevc_mvd_sign_flag_decode(HEVCContext *s);
 int ff_hevc_split_transform_flag_decode(HEVCContext *s, int log2_trafo_size);
 int ff_hevc_cbf_cb_cr_decode(HEVCContext *s, int trafo_depth);
 int ff_hevc_cbf_luma_decode(HEVCContext *s, int trafo_depth);
-int ff_hevc_transform_skip_flag_decode(HEVCContext *s, int c_idx);
-int ff_hevc_last_significant_coeff_x_prefix_decode(HEVCContext *s, int c_idx,
-                                                   int log2_size);
-int ff_hevc_last_significant_coeff_y_prefix_decode(HEVCContext *s, int c_idx,
-                                                   int log2_size);
-int ff_hevc_last_significant_coeff_suffix_decode(HEVCContext *s,
-                                                 int last_significant_coeff_prefix);
-int ff_hevc_significant_coeff_group_flag_decode(HEVCContext *s, int c_idx,
-                                                int ctx_cg);
-int ff_hevc_significant_coeff_flag_decode(HEVCContext *s, int c_idx, int x_c,
-                                          int y_c, int log2_trafo_size,
-                                          int scan_idx, int prev_sig);
-int ff_hevc_coeff_abs_level_greater1_flag_decode(HEVCContext *s, int c_idx,
-                                                 int ctx_set);
-int ff_hevc_coeff_abs_level_greater2_flag_decode(HEVCContext *s, int c_idx,
-                                                 int inc);
-int ff_hevc_coeff_abs_level_remaining(HEVCContext *s, int base_level,
-                                      int rc_rice_param);
-int ff_hevc_coeff_sign_flag(HEVCContext *s, uint8_t nb);
+int ff_hevc_log2_res_scale_abs(HEVCContext *s, int idx);
+int ff_hevc_res_scale_sign_flag(HEVCContext *s, int idx);
 
 /**
  * Get the number of candidate references for the current frame.
@@ -956,6 +1010,8 @@ int ff_hevc_set_new_ref(HEVCContext *s, AVFrame **frame, int poc);
  */
 int ff_hevc_output_frame(HEVCContext *s, AVFrame *frame, int flush);
 
+void ff_hevc_bump_frame(HEVCContext *s);
+
 void ff_hevc_unref_frame(HEVCContext *s, HEVCFrame *frame, int flags);
 
 void ff_hevc_set_neighbour_available(HEVCContext *s, int x0, int y0,
@@ -967,20 +1023,22 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0,
                               int nPbW, int nPbH, int log2_cb_size,
                               int part_idx, int merge_idx,
                               MvField *mv, int mvp_lx_flag, int LX);
-void ff_hevc_set_qPy(HEVCContext *s, int xC, int yC, int xBase, int yBase,
+void ff_hevc_set_qPy(HEVCContext *s, int xBase, int yBase,
                      int log2_cb_size);
 void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
-                                           int log2_trafo_size,
-                                           int slice_or_tiles_up_boundary,
-                                           int slice_or_tiles_left_boundary);
+                                           int log2_trafo_size);
 int ff_hevc_cu_qp_delta_sign_flag(HEVCContext *s);
 int ff_hevc_cu_qp_delta_abs(HEVCContext *s);
-void ff_hevc_hls_filter(HEVCContext *s, int x, int y);
+int ff_hevc_cu_chroma_qp_offset_flag(HEVCContext *s);
+int ff_hevc_cu_chroma_qp_offset_idx(HEVCContext *s);
+void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size);
 void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size);
+void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
+                                 int log2_trafo_size, enum ScanType scan_idx,
+                                 int c_idx);
 
-void ff_hevc_pps_free(HEVCPPS **ppps);
+void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size);
 
-void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
 
 extern const uint8_t ff_hevc_qpel_extra_before[4];
 extern const uint8_t ff_hevc_qpel_extra_after[4];
diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c
index ff55b02..2b3d8c0 100644
--- a/libavcodec/hevc_cabac.c
+++ b/libavcodec/hevc_cabac.c
@@ -4,20 +4,20 @@
  * Copyright (C) 2012 - 2013 Guillaume Martres
  * Copyright (C) 2012 - 2013 Gildas Cocherel
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@
 #include "cabac_functions.h"
 #include "hevc.h"
 
-#define CABAC_MAX_BIN 100
+#define CABAC_MAX_BIN 31
 
 /**
  * number of bin by SyntaxElement.
@@ -66,65 +66,77 @@ av_unused static const int8_t num_bins_in_se[] = {
      2, // cbf_luma
      4, // cbf_cb, cbf_cr
      2, // transform_skip_flag[][]
+     2, // explicit_rdpcm_flag[][]
+     2, // explicit_rdpcm_dir_flag[][]
     18, // last_significant_coeff_x_prefix
     18, // last_significant_coeff_y_prefix
      0, // last_significant_coeff_x_suffix
      0, // last_significant_coeff_y_suffix
      4, // significant_coeff_group_flag
-    42, // significant_coeff_flag
+    44, // significant_coeff_flag
     24, // coeff_abs_level_greater1_flag
      6, // coeff_abs_level_greater2_flag
      0, // coeff_abs_level_remaining
      0, // coeff_sign_flag
+     8, // log2_res_scale_abs
+     2, // res_scale_sign_flag
+     1, // cu_chroma_qp_offset_flag
+     1, // cu_chroma_qp_offset_idx
 };
 
 /**
  * Offset to ctxIdx 0 in init_values and states, indexed by SyntaxElement.
  */
 static const int elem_offset[sizeof(num_bins_in_se)] = {
-      0,
-      1,
-      2,
-      2,
-      2,
-      2,
-      2,
-      2,
-      5,
-      6,
-      9,
-     12,
-     13,
-     17,
-     17,
-     18,
-     18,
-     18,
-     20,
-     21,
-     22,
-     27,
-     29,
-     31,
-     33,
-     35,
-     35,
-     35,
-     36,
-     37,
-     40,
-     42,
-     46,
-     48,
-     66,
-     84,
-     84,
-     84,
-     88,
-    130,
-    154,
-    160,
-    160,
+    0, // sao_merge_flag
+    1, // sao_type_idx
+    2, // sao_eo_class
+    2, // sao_band_position
+    2, // sao_offset_abs
+    2, // sao_offset_sign
+    2, // end_of_slice_flag
+    2, // split_coding_unit_flag
+    5, // cu_transquant_bypass_flag
+    6, // skip_flag
+    9, // cu_qp_delta
+    12, // pred_mode
+    13, // part_mode
+    17, // pcm_flag
+    17, // prev_intra_luma_pred_mode
+    18, // mpm_idx
+    18, // rem_intra_luma_pred_mode
+    18, // intra_chroma_pred_mode
+    20, // merge_flag
+    21, // merge_idx
+    22, // inter_pred_idc
+    27, // ref_idx_l0
+    29, // ref_idx_l1
+    31, // abs_mvd_greater0_flag
+    33, // abs_mvd_greater1_flag
+    35, // abs_mvd_minus2
+    35, // mvd_sign_flag
+    35, // mvp_lx_flag
+    36, // no_residual_data_flag
+    37, // split_transform_flag
+    40, // cbf_luma
+    42, // cbf_cb, cbf_cr
+    46, // transform_skip_flag[][]
+    48, // explicit_rdpcm_flag[][]
+    50, // explicit_rdpcm_dir_flag[][]
+    52, // last_significant_coeff_x_prefix
+    70, // last_significant_coeff_y_prefix
+    88, // last_significant_coeff_x_suffix
+    88, // last_significant_coeff_y_suffix
+    88, // significant_coeff_group_flag
+    92, // significant_coeff_flag
+    136, // coeff_abs_level_greater1_flag
+    160, // coeff_abs_level_greater2_flag
+    166, // coeff_abs_level_remaining
+    166, // coeff_sign_flag
+    166, // log2_res_scale_abs
+    174, // res_scale_sign_flag
+    176, // cu_chroma_qp_offset_flag
+    177, // cu_chroma_qp_offset_idx
 };
 
 #define CNU 154
@@ -178,6 +190,10 @@ static const uint8_t init_values[3][HEVC_CONTEXTS] = {
       94, 138, 182, 154,
       // transform_skip_flag
       139, 139,
+      // explicit_rdpcm_flag
+      139, 139,
+      // explicit_rdpcm_dir_flag
+      139, 139,
       // last_significant_coeff_x_prefix
       110, 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
        79, 108, 123,  63,
@@ -190,11 +206,21 @@ static const uint8_t init_values[3][HEVC_CONTEXTS] = {
       111, 111, 125, 110, 110,  94, 124, 108, 124, 107, 125, 141, 179, 153,
       125, 107, 125, 141, 179, 153, 125, 107, 125, 141, 179, 153, 125, 140,
       139, 182, 182, 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111,
+      141, 111,
       // coeff_abs_level_greater1_flag
       140,  92, 137, 138, 140, 152, 138, 139, 153,  74, 149,  92, 139, 107,
       122, 152, 140, 179, 166, 182, 140, 227, 122, 197,
       // coeff_abs_level_greater2_flag
-      138, 153, 136, 167, 152, 152, },
+      138, 153, 136, 167, 152, 152,
+      // log2_res_scale_abs
+      154, 154, 154, 154, 154, 154, 154, 154,
+      // res_scale_sign_flag
+      154, 154,
+      // cu_chroma_qp_offset_flag
+      154,
+      // cu_chroma_qp_offset_idx
+      154,
+    },
     { // sao_merge_flag
       153,
       // sao_type_idx
@@ -241,6 +267,10 @@ static const uint8_t init_values[3][HEVC_CONTEXTS] = {
       149, 107, 167, 154,
       // transform_skip_flag
       139, 139,
+      // explicit_rdpcm_flag
+      139, 139,
+      // explicit_rdpcm_dir_flag
+      139, 139,
       // last_significant_coeff_x_prefix
       125, 110,  94, 110,  95,  79, 125, 111, 110,  78, 110, 111, 111,  95,
        94, 108, 123, 108,
@@ -253,11 +283,21 @@ static const uint8_t init_values[3][HEVC_CONTEXTS] = {
       155, 154, 139, 153, 139, 123, 123,  63, 153, 166, 183, 140, 136, 153,
       154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170,
       153, 123, 123, 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140,
+      140, 140,
       // coeff_abs_level_greater1_flag
       154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136, 153, 121,
       136, 137, 169, 194, 166, 167, 154, 167, 137, 182,
       // coeff_abs_level_greater2_flag
-      107, 167, 91, 122, 107, 167, },
+      107, 167, 91, 122, 107, 167,
+      // log2_res_scale_abs
+      154, 154, 154, 154, 154, 154, 154, 154,
+      // res_scale_sign_flag
+      154, 154,
+      // cu_chroma_qp_offset_flag
+      154,
+      // cu_chroma_qp_offset_idx
+      154,
+    },
     { // sao_merge_flag
       153,
       // sao_type_idx
@@ -304,6 +344,10 @@ static const uint8_t init_values[3][HEVC_CONTEXTS] = {
       149, 92, 167, 154,
       // transform_skip_flag
       139, 139,
+      // explicit_rdpcm_flag
+      139, 139,
+      // explicit_rdpcm_dir_flag
+      139, 139,
       // last_significant_coeff_x_prefix
       125, 110, 124, 110,  95,  94, 125, 111, 111,  79, 125, 126, 111, 111,
        79, 108, 123,  93,
@@ -316,11 +360,141 @@ static const uint8_t init_values[3][HEVC_CONTEXTS] = {
       170, 154, 139, 153, 139, 123, 123,  63, 124, 166, 183, 140, 136, 153,
       154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170,
       153, 138, 138, 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140,
+      140, 140,
       // coeff_abs_level_greater1_flag
       154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136, 153, 121,
       136, 122, 169, 208, 166, 167, 154, 152, 167, 182,
       // coeff_abs_level_greater2_flag
-      107, 167, 91, 107, 107, 167, },
+      107, 167, 91, 107, 107, 167,
+      // log2_res_scale_abs
+      154, 154, 154, 154, 154, 154, 154, 154,
+      // res_scale_sign_flag
+      154, 154,
+      // cu_chroma_qp_offset_flag
+      154,
+      // cu_chroma_qp_offset_idx
+      154,
+    },
+};
+
+static const uint8_t scan_1x1[1] = {
+    0,
+};
+
+static const uint8_t horiz_scan2x2_x[4] = {
+    0, 1, 0, 1,
+};
+
+static const uint8_t horiz_scan2x2_y[4] = {
+    0, 0, 1, 1
+};
+
+static const uint8_t horiz_scan4x4_x[16] = {
+    0, 1, 2, 3,
+    0, 1, 2, 3,
+    0, 1, 2, 3,
+    0, 1, 2, 3,
+};
+
+static const uint8_t horiz_scan4x4_y[16] = {
+    0, 0, 0, 0,
+    1, 1, 1, 1,
+    2, 2, 2, 2,
+    3, 3, 3, 3,
+};
+
+static const uint8_t horiz_scan8x8_inv[8][8] = {
+    {  0,  1,  2,  3, 16, 17, 18, 19, },
+    {  4,  5,  6,  7, 20, 21, 22, 23, },
+    {  8,  9, 10, 11, 24, 25, 26, 27, },
+    { 12, 13, 14, 15, 28, 29, 30, 31, },
+    { 32, 33, 34, 35, 48, 49, 50, 51, },
+    { 36, 37, 38, 39, 52, 53, 54, 55, },
+    { 40, 41, 42, 43, 56, 57, 58, 59, },
+    { 44, 45, 46, 47, 60, 61, 62, 63, },
+};
+
+static const uint8_t diag_scan2x2_x[4] = {
+    0, 0, 1, 1,
+};
+
+static const uint8_t diag_scan2x2_y[4] = {
+    0, 1, 0, 1,
+};
+
+static const uint8_t diag_scan2x2_inv[2][2] = {
+    { 0, 2, },
+    { 1, 3, },
+};
+
+const uint8_t ff_hevc_diag_scan4x4_x[16] = {
+    0, 0, 1, 0,
+    1, 2, 0, 1,
+    2, 3, 1, 2,
+    3, 2, 3, 3,
+};
+
+const uint8_t ff_hevc_diag_scan4x4_y[16] = {
+    0, 1, 0, 2,
+    1, 0, 3, 2,
+    1, 0, 3, 2,
+    1, 3, 2, 3,
+};
+
+static const uint8_t diag_scan4x4_inv[4][4] = {
+    { 0,  2,  5,  9, },
+    { 1,  4,  8, 12, },
+    { 3,  7, 11, 14, },
+    { 6, 10, 13, 15, },
+};
+
+const uint8_t ff_hevc_diag_scan8x8_x[64] = {
+    0, 0, 1, 0,
+    1, 2, 0, 1,
+    2, 3, 0, 1,
+    2, 3, 4, 0,
+    1, 2, 3, 4,
+    5, 0, 1, 2,
+    3, 4, 5, 6,
+    0, 1, 2, 3,
+    4, 5, 6, 7,
+    1, 2, 3, 4,
+    5, 6, 7, 2,
+    3, 4, 5, 6,
+    7, 3, 4, 5,
+    6, 7, 4, 5,
+    6, 7, 5, 6,
+    7, 6, 7, 7,
+};
+
+const uint8_t ff_hevc_diag_scan8x8_y[64] = {
+    0, 1, 0, 2,
+    1, 0, 3, 2,
+    1, 0, 4, 3,
+    2, 1, 0, 5,
+    4, 3, 2, 1,
+    0, 6, 5, 4,
+    3, 2, 1, 0,
+    7, 6, 5, 4,
+    3, 2, 1, 0,
+    7, 6, 5, 4,
+    3, 2, 1, 7,
+    6, 5, 4, 3,
+    2, 7, 6, 5,
+    4, 3, 7, 6,
+    5, 4, 7, 6,
+    5, 7, 6, 7,
+};
+
+static const uint8_t diag_scan8x8_inv[8][8] = {
+    {  0,  2,  5,  9, 14, 20, 27, 35, },
+    {  1,  4,  8, 13, 19, 26, 34, 42, },
+    {  3,  7, 12, 18, 25, 33, 41, 48, },
+    {  6, 11, 17, 24, 32, 40, 47, 53, },
+    { 10, 16, 23, 31, 39, 46, 52, 57, },
+    { 15, 22, 30, 38, 45, 51, 56, 60, },
+    { 21, 29, 37, 44, 50, 55, 59, 62, },
+    { 28, 36, 43, 49, 54, 58, 61, 63, },
 };
 
 void ff_hevc_save_states(HEVCContext *s, int ctb_addr_ts)
@@ -329,13 +503,13 @@ void ff_hevc_save_states(HEVCContext *s, int ctb_addr_ts)
         (ctb_addr_ts % s->sps->ctb_width == 2 ||
          (s->sps->ctb_width == 2 &&
           ctb_addr_ts % s->sps->ctb_width == 0))) {
-        memcpy(s->cabac_state, s->HEVClc.cabac_state, HEVC_CONTEXTS);
+        memcpy(s->cabac_state, s->HEVClc->cabac_state, HEVC_CONTEXTS);
     }
 }
 
 static void load_states(HEVCContext *s)
 {
-    memcpy(s->HEVClc.cabac_state, s->cabac_state, HEVC_CONTEXTS);
+    memcpy(s->HEVClc->cabac_state, s->cabac_state, HEVC_CONTEXTS);
 }
 
 static void cabac_reinit(HEVCLocalContext *lc)
@@ -345,10 +519,10 @@ static void cabac_reinit(HEVCLocalContext *lc)
 
 static void cabac_init_decoder(HEVCContext *s)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     skip_bits(gb, 1);
     align_get_bits(gb);
-    ff_init_cabac_decoder(&s->HEVClc.cc,
+    ff_init_cabac_decoder(&s->HEVClc->cc,
                           gb->buffer + get_bits_count(gb) / 8,
                           (get_bits_left(gb) + 7) / 8);
 }
@@ -365,13 +539,16 @@ static void cabac_init_state(HEVCContext *s)
         int init_value = init_values[init_type][i];
         int m = (init_value >> 4) * 5 - 45;
         int n = ((init_value & 15) << 3) - 16;
-        int pre = 2 * (((m * av_clip_c(s->sh.slice_qp, 0, 51)) >> 4) + n) - 127;
+        int pre = 2 * (((m * av_clip(s->sh.slice_qp, 0, 51)) >> 4) + n) - 127;
 
         pre ^= pre >> 31;
         if (pre > 124)
             pre = 124 + (pre & 1);
-        s->HEVClc.cabac_state[i] = pre;
+        s->HEVClc->cabac_state[i] = pre;
     }
+
+    for (i = 0; i < 4; i++)
+        s->HEVClc->stat_coeff[i] = 0;
 }
 
 void ff_hevc_cabac_init(HEVCContext *s, int ctb_addr_ts)
@@ -395,13 +572,19 @@ void ff_hevc_cabac_init(HEVCContext *s, int ctb_addr_ts)
     } else {
         if (s->pps->tiles_enabled_flag &&
             s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[ctb_addr_ts - 1]) {
-            cabac_reinit(&s->HEVClc);
+            if (s->threads_number == 1)
+                cabac_reinit(s->HEVClc);
+            else
+                cabac_init_decoder(s);
             cabac_init_state(s);
         }
         if (s->pps->entropy_coding_sync_enabled_flag) {
             if (ctb_addr_ts % s->sps->ctb_width == 0) {
-                get_cabac_terminate(&s->HEVClc.cc);
-                cabac_reinit(&s->HEVClc);
+                get_cabac_terminate(&s->HEVClc->cc);
+                if (s->threads_number == 1)
+                    cabac_reinit(s->HEVClc);
+                else
+                    cabac_init_decoder(s);
 
                 if (s->sps->ctb_width == 1)
                     cabac_init_state(s);
@@ -412,7 +595,7 @@ void ff_hevc_cabac_init(HEVCContext *s, int ctb_addr_ts)
     }
 }
 
-#define GET_CABAC(ctx) get_cabac(&s->HEVClc.cc, &s->HEVClc.cabac_state[ctx])
+#define GET_CABAC(ctx) get_cabac(&s->HEVClc->cc, &s->HEVClc->cabac_state[ctx])
 
 int ff_hevc_sao_merge_flag_decode(HEVCContext *s)
 {
@@ -424,7 +607,7 @@ int ff_hevc_sao_type_idx_decode(HEVCContext *s)
     if (!GET_CABAC(elem_offset[SAO_TYPE_IDX]))
         return 0;
 
-    if (!get_cabac_bypass(&s->HEVClc.cc))
+    if (!get_cabac_bypass(&s->HEVClc->cc))
         return SAO_BAND;
     return SAO_EDGE;
 }
@@ -432,10 +615,10 @@ int ff_hevc_sao_type_idx_decode(HEVCContext *s)
 int ff_hevc_sao_band_position_decode(HEVCContext *s)
 {
     int i;
-    int value = get_cabac_bypass(&s->HEVClc.cc);
+    int value = get_cabac_bypass(&s->HEVClc->cc);
 
     for (i = 0; i < 4; i++)
-        value = (value << 1) | get_cabac_bypass(&s->HEVClc.cc);
+        value = (value << 1) | get_cabac_bypass(&s->HEVClc->cc);
     return value;
 }
 
@@ -444,26 +627,26 @@ int ff_hevc_sao_offset_abs_decode(HEVCContext *s)
     int i = 0;
     int length = (1 << (FFMIN(s->sps->bit_depth, 10) - 5)) - 1;
 
-    while (i < length && get_cabac_bypass(&s->HEVClc.cc))
+    while (i < length && get_cabac_bypass(&s->HEVClc->cc))
         i++;
     return i;
 }
 
 int ff_hevc_sao_offset_sign_decode(HEVCContext *s)
 {
-    return get_cabac_bypass(&s->HEVClc.cc);
+    return get_cabac_bypass(&s->HEVClc->cc);
 }
 
 int ff_hevc_sao_eo_class_decode(HEVCContext *s)
 {
-    int ret = get_cabac_bypass(&s->HEVClc.cc) << 1;
-    ret    |= get_cabac_bypass(&s->HEVClc.cc);
+    int ret = get_cabac_bypass(&s->HEVClc->cc) << 1;
+    ret    |= get_cabac_bypass(&s->HEVClc->cc);
     return ret;
 }
 
 int ff_hevc_end_of_slice_flag_decode(HEVCContext *s)
 {
-    return get_cabac_terminate(&s->HEVClc.cc);
+    return get_cabac_terminate(&s->HEVClc->cc);
 }
 
 int ff_hevc_cu_transquant_bypass_flag_decode(HEVCContext *s)
@@ -478,9 +661,9 @@ int ff_hevc_skip_flag_decode(HEVCContext *s, int x0, int y0, int x_cb, int y_cb)
     int x0b = x0 & ((1 << s->sps->log2_ctb_size) - 1);
     int y0b = y0 & ((1 << s->sps->log2_ctb_size) - 1);
 
-    if (s->HEVClc.ctb_left_flag || x0b)
+    if (s->HEVClc->ctb_left_flag || x0b)
         inc = !!SAMPLE_CTB(s->skip_flag, x_cb - 1, y_cb);
-    if (s->HEVClc.ctb_up_flag || y0b)
+    if (s->HEVClc->ctb_up_flag || y0b)
         inc += !!SAMPLE_CTB(s->skip_flag, x_cb, y_cb - 1);
 
     return GET_CABAC(elem_offset[SKIP_FLAG] + inc);
@@ -498,7 +681,7 @@ int ff_hevc_cu_qp_delta_abs(HEVCContext *s)
     }
     if (prefix_val >= 5) {
         int k = 0;
-        while (k < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc.cc)) {
+        while (k < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc->cc)) {
             suffix_val += 1 << k;
             k++;
         }
@@ -506,14 +689,30 @@ int ff_hevc_cu_qp_delta_abs(HEVCContext *s)
             av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", k);
 
         while (k--)
-            suffix_val += get_cabac_bypass(&s->HEVClc.cc) << k;
+            suffix_val += get_cabac_bypass(&s->HEVClc->cc) << k;
     }
     return prefix_val + suffix_val;
 }
 
 int ff_hevc_cu_qp_delta_sign_flag(HEVCContext *s)
 {
-    return get_cabac_bypass(&s->HEVClc.cc);
+    return get_cabac_bypass(&s->HEVClc->cc);
+}
+
+int ff_hevc_cu_chroma_qp_offset_flag(HEVCContext *s)
+{
+    return GET_CABAC(elem_offset[CU_CHROMA_QP_OFFSET_FLAG]);
+}
+
+int ff_hevc_cu_chroma_qp_offset_idx(HEVCContext *s)
+{
+    int c_max= FFMAX(5, s->pps->chroma_qp_offset_list_len_minus1);
+    int i = 0;
+
+    while (i < c_max && GET_CABAC(elem_offset[CU_CHROMA_QP_OFFSET_IDX]))
+        i++;
+
+    return i;
 }
 
 int ff_hevc_pred_mode_decode(HEVCContext *s)
@@ -529,9 +728,9 @@ int ff_hevc_split_coding_unit_flag_decode(HEVCContext *s, int ct_depth, int x0,
     int x_cb = x0 >> s->sps->log2_min_cb_size;
     int y_cb = y0 >> s->sps->log2_min_cb_size;
 
-    if (s->HEVClc.ctb_left_flag || x0b)
+    if (s->HEVClc->ctb_left_flag || x0b)
         depth_left = s->tab_ct_depth[(y_cb) * s->sps->min_cb_width + x_cb - 1];
-    if (s->HEVClc.ctb_up_flag || y0b)
+    if (s->HEVClc->ctb_up_flag || y0b)
         depth_top = s->tab_ct_depth[(y_cb - 1) * s->sps->min_cb_width + x_cb];
 
     inc += (depth_left > ct_depth);
@@ -545,7 +744,7 @@ int ff_hevc_part_mode_decode(HEVCContext *s, int log2_cb_size)
     if (GET_CABAC(elem_offset[PART_MODE])) // 1
         return PART_2Nx2N;
     if (log2_cb_size == s->sps->log2_min_cb_size) {
-        if (s->HEVClc.cu.pred_mode == MODE_INTRA) // 0
+        if (s->HEVClc->cu.pred_mode == MODE_INTRA) // 0
             return PART_NxN;
         if (GET_CABAC(elem_offset[PART_MODE] + 1)) // 01
             return PART_2NxN;
@@ -565,21 +764,21 @@ int ff_hevc_part_mode_decode(HEVCContext *s, int log2_cb_size)
     if (GET_CABAC(elem_offset[PART_MODE] + 1)) { // 01X, 01XX
         if (GET_CABAC(elem_offset[PART_MODE] + 3)) // 011
             return PART_2NxN;
-        if (get_cabac_bypass(&s->HEVClc.cc)) // 0101
+        if (get_cabac_bypass(&s->HEVClc->cc)) // 0101
             return PART_2NxnD;
         return PART_2NxnU; // 0100
     }
 
     if (GET_CABAC(elem_offset[PART_MODE] + 3)) // 001
         return PART_Nx2N;
-    if (get_cabac_bypass(&s->HEVClc.cc)) // 0001
+    if (get_cabac_bypass(&s->HEVClc->cc)) // 0001
         return PART_nRx2N;
     return PART_nLx2N;  // 0000
 }
 
 int ff_hevc_pcm_flag_decode(HEVCContext *s)
 {
-    return get_cabac_terminate(&s->HEVClc.cc);
+    return get_cabac_terminate(&s->HEVClc->cc);
 }
 
 int ff_hevc_prev_intra_luma_pred_flag_decode(HEVCContext *s)
@@ -590,7 +789,7 @@ int ff_hevc_prev_intra_luma_pred_flag_decode(HEVCContext *s)
 int ff_hevc_mpm_idx_decode(HEVCContext *s)
 {
     int i = 0;
-    while (i < 2 && get_cabac_bypass(&s->HEVClc.cc))
+    while (i < 2 && get_cabac_bypass(&s->HEVClc->cc))
         i++;
     return i;
 }
@@ -598,10 +797,10 @@ int ff_hevc_mpm_idx_decode(HEVCContext *s)
 int ff_hevc_rem_intra_luma_pred_mode_decode(HEVCContext *s)
 {
     int i;
-    int value = get_cabac_bypass(&s->HEVClc.cc);
+    int value = get_cabac_bypass(&s->HEVClc->cc);
 
     for (i = 0; i < 4; i++)
-        value = (value << 1) | get_cabac_bypass(&s->HEVClc.cc);
+        value = (value << 1) | get_cabac_bypass(&s->HEVClc->cc);
     return value;
 }
 
@@ -611,8 +810,8 @@ int ff_hevc_intra_chroma_pred_mode_decode(HEVCContext *s)
     if (!GET_CABAC(elem_offset[INTRA_CHROMA_PRED_MODE]))
         return 4;
 
-    ret  = get_cabac_bypass(&s->HEVClc.cc) << 1;
-    ret |= get_cabac_bypass(&s->HEVClc.cc);
+    ret  = get_cabac_bypass(&s->HEVClc->cc) << 1;
+    ret |= get_cabac_bypass(&s->HEVClc->cc);
     return ret;
 }
 
@@ -621,7 +820,7 @@ int ff_hevc_merge_idx_decode(HEVCContext *s)
     int i = GET_CABAC(elem_offset[MERGE_IDX]);
 
     if (i != 0) {
-        while (i < s->sh.max_num_merge_cand-1 && get_cabac_bypass(&s->HEVClc.cc))
+        while (i < s->sh.max_num_merge_cand-1 && get_cabac_bypass(&s->HEVClc->cc))
             i++;
     }
     return i;
@@ -636,7 +835,7 @@ int ff_hevc_inter_pred_idc_decode(HEVCContext *s, int nPbW, int nPbH)
 {
     if (nPbW + nPbH == 12)
         return GET_CABAC(elem_offset[INTER_PRED_IDC] + 4);
-    if (GET_CABAC(elem_offset[INTER_PRED_IDC] + s->HEVClc.ct.depth))
+    if (GET_CABAC(elem_offset[INTER_PRED_IDC] + s->HEVClc->ct.depth))
         return PRED_BI;
 
     return GET_CABAC(elem_offset[INTER_PRED_IDC] + 4);
@@ -651,7 +850,7 @@ int ff_hevc_ref_idx_lx_decode(HEVCContext *s, int num_ref_idx_lx)
     while (i < max_ctx && GET_CABAC(elem_offset[REF_IDX_L0] + i))
         i++;
     if (i == 2) {
-        while (i < max && get_cabac_bypass(&s->HEVClc.cc))
+        while (i < max && get_cabac_bypass(&s->HEVClc->cc))
             i++;
     }
 
@@ -668,35 +867,35 @@ int ff_hevc_no_residual_syntax_flag_decode(HEVCContext *s)
     return GET_CABAC(elem_offset[NO_RESIDUAL_DATA_FLAG]);
 }
 
-int ff_hevc_abs_mvd_greater0_flag_decode(HEVCContext *s)
+static av_always_inline int abs_mvd_greater0_flag_decode(HEVCContext *s)
 {
     return GET_CABAC(elem_offset[ABS_MVD_GREATER0_FLAG]);
 }
 
-int ff_hevc_abs_mvd_greater1_flag_decode(HEVCContext *s)
+static av_always_inline int abs_mvd_greater1_flag_decode(HEVCContext *s)
 {
     return GET_CABAC(elem_offset[ABS_MVD_GREATER1_FLAG] + 1);
 }
 
-int ff_hevc_mvd_decode(HEVCContext *s)
+static av_always_inline int mvd_decode(HEVCContext *s)
 {
     int ret = 2;
     int k = 1;
 
-    while (k < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc.cc)) {
+    while (k < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc->cc)) {
         ret += 1 << k;
         k++;
     }
     if (k == CABAC_MAX_BIN)
         av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", k);
     while (k--)
-        ret += get_cabac_bypass(&s->HEVClc.cc) << k;
-    return get_cabac_bypass_sign(&s->HEVClc.cc, -ret);
+        ret += get_cabac_bypass(&s->HEVClc->cc) << k;
+    return get_cabac_bypass_sign(&s->HEVClc->cc, -ret);
 }
 
-int ff_hevc_mvd_sign_flag_decode(HEVCContext *s)
+static av_always_inline int mvd_sign_flag_decode(HEVCContext *s)
 {
-    return get_cabac_bypass_sign(&s->HEVClc.cc, -1);
+    return get_cabac_bypass_sign(&s->HEVClc->cc, -1);
 }
 
 int ff_hevc_split_transform_flag_decode(HEVCContext *s, int log2_trafo_size)
@@ -714,53 +913,73 @@ int ff_hevc_cbf_luma_decode(HEVCContext *s, int trafo_depth)
     return GET_CABAC(elem_offset[CBF_LUMA] + !trafo_depth);
 }
 
-int ff_hevc_transform_skip_flag_decode(HEVCContext *s, int c_idx)
+static int ff_hevc_transform_skip_flag_decode(HEVCContext *s, int c_idx)
 {
     return GET_CABAC(elem_offset[TRANSFORM_SKIP_FLAG] + !!c_idx);
 }
 
-#define LAST_SIG_COEFF(elem)                                                    \
-    int i = 0;                                                                  \
-    int max = (log2_size << 1) - 1;                                             \
-    int ctx_offset, ctx_shift;                                                  \
-                                                                                \
-    if (c_idx == 0) {                                                           \
-        ctx_offset = 3 * (log2_size - 2)  + ((log2_size - 1) >> 2);             \
-        ctx_shift = (log2_size + 1) >> 2;                                       \
-    } else {                                                                    \
-        ctx_offset = 15;                                                        \
-        ctx_shift = log2_size - 2;                                              \
-    }                                                                           \
-    while (i < max &&                                                           \
-           GET_CABAC(elem_offset[elem] + (i >> ctx_shift) + ctx_offset))        \
-        i++;                                                                    \
-    return i;
+static int explicit_rdpcm_flag_decode(HEVCContext *s, int c_idx)
+{
+    return GET_CABAC(elem_offset[EXPLICIT_RDPCM_FLAG] + !!c_idx);
+}
 
-int ff_hevc_last_significant_coeff_x_prefix_decode(HEVCContext *s, int c_idx,
-                                                   int log2_size)
+static int explicit_rdpcm_dir_flag_decode(HEVCContext *s, int c_idx)
 {
-    LAST_SIG_COEFF(LAST_SIGNIFICANT_COEFF_X_PREFIX)
+    return GET_CABAC(elem_offset[EXPLICIT_RDPCM_DIR_FLAG] + !!c_idx);
+}
+
+int ff_hevc_log2_res_scale_abs(HEVCContext *s, int idx) {
+    int i =0;
+
+    while (i < 4 && GET_CABAC(elem_offset[LOG2_RES_SCALE_ABS] + 4 * idx + i))
+        i++;
+
+    return i;
+}
+
+int ff_hevc_res_scale_sign_flag(HEVCContext *s, int idx) {
+    return GET_CABAC(elem_offset[RES_SCALE_SIGN_FLAG] + idx);
 }
 
-int ff_hevc_last_significant_coeff_y_prefix_decode(HEVCContext *s, int c_idx,
-                                                   int log2_size)
+static av_always_inline void last_significant_coeff_xy_prefix_decode(HEVCContext *s, int c_idx,
+                                                   int log2_size, int *last_scx_prefix, int *last_scy_prefix)
 {
-    LAST_SIG_COEFF(LAST_SIGNIFICANT_COEFF_Y_PREFIX)
+    int i = 0;
+    int max = (log2_size << 1) - 1;
+    int ctx_offset, ctx_shift;
+
+    if (!c_idx) {
+        ctx_offset = 3 * (log2_size - 2)  + ((log2_size - 1) >> 2);
+        ctx_shift = (log2_size + 1) >> 2;
+    } else {
+        ctx_offset = 15;
+        ctx_shift = log2_size - 2;
+    }
+    while (i < max &&
+           GET_CABAC(elem_offset[LAST_SIGNIFICANT_COEFF_X_PREFIX] + (i >> ctx_shift) + ctx_offset))
+        i++;
+    *last_scx_prefix = i;
+
+    i = 0;
+    while (i < max &&
+           GET_CABAC(elem_offset[LAST_SIGNIFICANT_COEFF_Y_PREFIX] + (i >> ctx_shift) + ctx_offset))
+        i++;
+    *last_scy_prefix = i;
 }
 
-int ff_hevc_last_significant_coeff_suffix_decode(HEVCContext *s,
+static av_always_inline int last_significant_coeff_suffix_decode(HEVCContext *s,
                                                  int last_significant_coeff_prefix)
 {
     int i;
     int length = (last_significant_coeff_prefix >> 1) - 1;
-    int value = get_cabac_bypass(&s->HEVClc.cc);
+    int value = get_cabac_bypass(&s->HEVClc->cc);
 
     for (i = 1; i < length; i++)
-        value = (value << 1) | get_cabac_bypass(&s->HEVClc.cc);
+        value = (value << 1) | get_cabac_bypass(&s->HEVClc->cc);
     return value;
 }
 
-int ff_hevc_significant_coeff_group_flag_decode(HEVCContext *s, int c_idx, int ctx_cg)
+static av_always_inline int significant_coeff_group_flag_decode(HEVCContext *s, int c_idx, int ctx_cg)
 {
     int inc;
 
@@ -768,58 +987,19 @@ int ff_hevc_significant_coeff_group_flag_decode(HEVCContext *s, int c_idx, int c
 
     return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_GROUP_FLAG] + inc);
 }
-
-int ff_hevc_significant_coeff_flag_decode(HEVCContext *s, int c_idx, int x_c, int y_c,
-                                          int log2_trafo_size, int scan_idx, int prev_sig)
+static av_always_inline int significant_coeff_flag_decode(HEVCContext *s, int x_c, int y_c,
+                                           int offset, const uint8_t *ctx_idx_map)
 {
-    static const uint8_t ctx_idx_map[] = {
-        0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8
-    };
-    int x_cg = x_c >> 2;
-    int y_cg = y_c >> 2;
-    int sig_ctx, inc;
-
-    if (x_c + y_c == 0) {
-        sig_ctx = 0;
-    } else if (log2_trafo_size == 2) {
-        sig_ctx = ctx_idx_map[(y_c << 2) + x_c];
-    } else {
-        switch (prev_sig) {
-        case 0: {
-                int x_off = x_c & 3;
-                int y_off = y_c & 3;
-                sig_ctx   = ((x_off + y_off) == 0) ? 2 : ((x_off + y_off) <= 2) ? 1 : 0;
-            }
-            break;
-        case 1:
-            sig_ctx = 2 - FFMIN(y_c & 3, 2);
-            break;
-        case 2:
-            sig_ctx = 2 - FFMIN(x_c & 3, 2);
-            break;
-        default:
-            sig_ctx = 2;
-        }
-
-        if (c_idx == 0 && (x_cg > 0 || y_cg > 0))
-            sig_ctx += 3;
-
-        if (log2_trafo_size == 3) {
-            sig_ctx += (scan_idx == SCAN_DIAG) ? 9 : 15;
-        } else {
-            sig_ctx += c_idx ? 12 : 21;
-        }
-    }
-
-    if (c_idx == 0)
-        inc = sig_ctx;
-    else
-        inc = sig_ctx + 27;
-
+    int inc = ctx_idx_map[(y_c << 2) + x_c] + offset;
     return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_FLAG] + inc);
 }
 
-int ff_hevc_coeff_abs_level_greater1_flag_decode(HEVCContext *s, int c_idx, int inc)
+static av_always_inline int significant_coeff_flag_decode_0(HEVCContext *s, int c_idx, int offset)
+{
+    return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_FLAG] + offset);
+}
+
+static av_always_inline int coeff_abs_level_greater1_flag_decode(HEVCContext *s, int c_idx, int inc)
 {
 
     if (c_idx > 0)
@@ -828,7 +1008,7 @@ int ff_hevc_coeff_abs_level_greater1_flag_decode(HEVCContext *s, int c_idx, int
     return GET_CABAC(elem_offset[COEFF_ABS_LEVEL_GREATER1_FLAG] + inc);
 }
 
-int ff_hevc_coeff_abs_level_greater2_flag_decode(HEVCContext *s, int c_idx, int inc)
+static av_always_inline int coeff_abs_level_greater2_flag_decode(HEVCContext *s, int c_idx, int inc)
 {
     if (c_idx > 0)
         inc += 4;
@@ -836,37 +1016,572 @@ int ff_hevc_coeff_abs_level_greater2_flag_decode(HEVCContext *s, int c_idx, int
     return GET_CABAC(elem_offset[COEFF_ABS_LEVEL_GREATER2_FLAG] + inc);
 }
 
-int ff_hevc_coeff_abs_level_remaining(HEVCContext *s, int base_level, int rc_rice_param)
+static av_always_inline int coeff_abs_level_remaining_decode(HEVCContext *s, int rc_rice_param)
 {
     int prefix = 0;
     int suffix = 0;
     int last_coeff_abs_level_remaining;
     int i;
 
-    while (prefix < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc.cc))
+    while (prefix < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc->cc))
         prefix++;
     if (prefix == CABAC_MAX_BIN)
         av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", prefix);
     if (prefix < 3) {
         for (i = 0; i < rc_rice_param; i++)
-            suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc.cc);
+            suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc->cc);
         last_coeff_abs_level_remaining = (prefix << rc_rice_param) + suffix;
     } else {
         int prefix_minus3 = prefix - 3;
         for (i = 0; i < prefix_minus3 + rc_rice_param; i++)
-            suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc.cc);
+            suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc->cc);
         last_coeff_abs_level_remaining = (((1 << prefix_minus3) + 3 - 1)
                                               << rc_rice_param) + suffix;
     }
     return last_coeff_abs_level_remaining;
 }
 
-int ff_hevc_coeff_sign_flag(HEVCContext *s, uint8_t nb)
+static av_always_inline int coeff_sign_flag_decode(HEVCContext *s, uint8_t nb)
 {
     int i;
     int ret = 0;
 
     for (i = 0; i < nb; i++)
-        ret = (ret << 1) | get_cabac_bypass(&s->HEVClc.cc);
+        ret = (ret << 1) | get_cabac_bypass(&s->HEVClc->cc);
     return ret;
 }
+
+void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0,
+                                int log2_trafo_size, enum ScanType scan_idx,
+                                int c_idx)
+{
+#define GET_COORD(offset, n)                                    \
+    do {                                                        \
+        x_c = (x_cg << 2) + scan_x_off[n];                      \
+        y_c = (y_cg << 2) + scan_y_off[n];                      \
+    } while (0)
+    HEVCLocalContext *lc = s->HEVClc;
+    int transform_skip_flag = 0;
+
+    int last_significant_coeff_x, last_significant_coeff_y;
+    int last_scan_pos;
+    int n_end;
+    int num_coeff = 0;
+    int greater1_ctx = 1;
+
+    int num_last_subset;
+    int x_cg_last_sig, y_cg_last_sig;
+
+    const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off;
+
+    ptrdiff_t stride = s->frame->linesize[c_idx];
+    int hshift = s->sps->hshift[c_idx];
+    int vshift = s->sps->vshift[c_idx];
+    uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride +
+                                          ((x0 >> hshift) << s->sps->pixel_shift)];
+    int16_t *coeffs = lc->tu.coeffs[c_idx > 0];
+    uint8_t significant_coeff_group_flag[8][8] = {{0}};
+    int explicit_rdpcm_flag = 0;
+    int explicit_rdpcm_dir_flag;
+
+    int trafo_size = 1 << log2_trafo_size;
+    int i;
+    int qp,shift,add,scale,scale_m;
+    const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 };
+    const uint8_t *scale_matrix = NULL;
+    uint8_t dc_scale;
+    int pred_mode_intra = (c_idx == 0) ? lc->tu.intra_pred_mode :
+                                         lc->tu.intra_pred_mode_c;
+
+    memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t));
+
+    // Derive QP for dequant
+    if (!lc->cu.cu_transquant_bypass_flag) {
+        static const int qp_c[] = { 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 };
+        static const uint8_t rem6[51 + 4 * 6 + 1] = {
+            0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
+            3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
+            0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
+            4, 5, 0, 1, 2, 3, 4, 5, 0, 1
+        };
+
+        static const uint8_t div6[51 + 4 * 6 + 1] = {
+            0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
+            3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
+            7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
+            10, 10, 11, 11, 11, 11, 11, 11, 12, 12
+        };
+        int qp_y = lc->qp_y;
+
+        if (s->pps->transform_skip_enabled_flag &&
+            log2_trafo_size <= s->pps->log2_max_transform_skip_block_size) {
+            transform_skip_flag = ff_hevc_transform_skip_flag_decode(s, c_idx);
+        }
+
+        if (c_idx == 0) {
+            qp = qp_y + s->sps->qp_bd_offset;
+        } else {
+            int qp_i, offset;
+
+            if (c_idx == 1)
+                offset = s->pps->cb_qp_offset + s->sh.slice_cb_qp_offset +
+                         lc->tu.cu_qp_offset_cb;
+            else
+                offset = s->pps->cr_qp_offset + s->sh.slice_cr_qp_offset +
+                         lc->tu.cu_qp_offset_cr;
+
+            qp_i = av_clip(qp_y + offset, - s->sps->qp_bd_offset, 57);
+            if (s->sps->chroma_format_idc == 1) {
+                if (qp_i < 30)
+                    qp = qp_i;
+                else if (qp_i > 43)
+                    qp = qp_i - 6;
+                else
+                    qp = qp_c[qp_i - 30];
+            } else {
+                if (qp_i > 51)
+                    qp = 51;
+                else
+                    qp = qp_i;
+            }
+
+            qp += s->sps->qp_bd_offset;
+        }
+
+        shift    = s->sps->bit_depth + log2_trafo_size - 5;
+        add      = 1 << (shift-1);
+        scale    = level_scale[rem6[qp]] << (div6[qp]);
+        scale_m  = 16; // default when no custom scaling lists.
+        dc_scale = 16;
+
+        if (s->sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) {
+            const ScalingList *sl = s->pps->scaling_list_data_present_flag ?
+            &s->pps->scaling_list : &s->sps->scaling_list;
+            int matrix_id = lc->cu.pred_mode != MODE_INTRA;
+
+            matrix_id = 3 * matrix_id + c_idx;
+
+            scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
+            if (log2_trafo_size >= 4)
+                dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
+        }
+    } else {
+        shift        = 0;
+        add          = 0;
+        scale        = 0;
+        dc_scale     = 0;
+    }
+
+    if (lc->cu.pred_mode == MODE_INTER && s->sps->explicit_rdpcm_enabled_flag &&
+        (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
+        explicit_rdpcm_flag = explicit_rdpcm_flag_decode(s, c_idx);
+        if (explicit_rdpcm_flag) {
+            explicit_rdpcm_dir_flag = explicit_rdpcm_dir_flag_decode(s, c_idx);
+        }
+    }
+
+    last_significant_coeff_xy_prefix_decode(s, c_idx, log2_trafo_size,
+                                           &last_significant_coeff_x, &last_significant_coeff_y);
+
+    if (last_significant_coeff_x > 3) {
+        int suffix = last_significant_coeff_suffix_decode(s, last_significant_coeff_x);
+        last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
+        (2 + (last_significant_coeff_x & 1)) +
+        suffix;
+    }
+
+    if (last_significant_coeff_y > 3) {
+        int suffix = last_significant_coeff_suffix_decode(s, last_significant_coeff_y);
+        last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
+        (2 + (last_significant_coeff_y & 1)) +
+        suffix;
+    }
+
+    if (scan_idx == SCAN_VERT)
+        FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
+
+    x_cg_last_sig = last_significant_coeff_x >> 2;
+    y_cg_last_sig = last_significant_coeff_y >> 2;
+
+    switch (scan_idx) {
+    case SCAN_DIAG: {
+        int last_x_c = last_significant_coeff_x & 3;
+        int last_y_c = last_significant_coeff_y & 3;
+
+        scan_x_off = ff_hevc_diag_scan4x4_x;
+        scan_y_off = ff_hevc_diag_scan4x4_y;
+        num_coeff = diag_scan4x4_inv[last_y_c][last_x_c];
+        if (trafo_size == 4) {
+            scan_x_cg = scan_1x1;
+            scan_y_cg = scan_1x1;
+        } else if (trafo_size == 8) {
+            num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
+            scan_x_cg = diag_scan2x2_x;
+            scan_y_cg = diag_scan2x2_y;
+        } else if (trafo_size == 16) {
+            num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
+            scan_x_cg = ff_hevc_diag_scan4x4_x;
+            scan_y_cg = ff_hevc_diag_scan4x4_y;
+        } else { // trafo_size == 32
+            num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
+            scan_x_cg = ff_hevc_diag_scan8x8_x;
+            scan_y_cg = ff_hevc_diag_scan8x8_y;
+        }
+        break;
+    }
+    case SCAN_HORIZ:
+        scan_x_cg = horiz_scan2x2_x;
+        scan_y_cg = horiz_scan2x2_y;
+        scan_x_off = horiz_scan4x4_x;
+        scan_y_off = horiz_scan4x4_y;
+        num_coeff = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
+        break;
+    default: //SCAN_VERT
+        scan_x_cg = horiz_scan2x2_y;
+        scan_y_cg = horiz_scan2x2_x;
+        scan_x_off = horiz_scan4x4_y;
+        scan_y_off = horiz_scan4x4_x;
+        num_coeff = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
+        break;
+    }
+    num_coeff++;
+    num_last_subset = (num_coeff - 1) >> 4;
+
+    for (i = num_last_subset; i >= 0; i--) {
+        int n, m;
+        int x_cg, y_cg, x_c, y_c, pos;
+        int implicit_non_zero_coeff = 0;
+        int64_t trans_coeff_level;
+        int prev_sig = 0;
+        int offset = i << 4;
+        int rice_init = 0;
+
+        uint8_t significant_coeff_flag_idx[16];
+        uint8_t nb_significant_coeff_flag = 0;
+
+        x_cg = scan_x_cg[i];
+        y_cg = scan_y_cg[i];
+
+        if ((i < num_last_subset) && (i > 0)) {
+            int ctx_cg = 0;
+            if (x_cg < (1 << (log2_trafo_size - 2)) - 1)
+                ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg];
+            if (y_cg < (1 << (log2_trafo_size - 2)) - 1)
+                ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1];
+
+            significant_coeff_group_flag[x_cg][y_cg] =
+                significant_coeff_group_flag_decode(s, c_idx, ctx_cg);
+            implicit_non_zero_coeff = 1;
+        } else {
+            significant_coeff_group_flag[x_cg][y_cg] =
+            ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) ||
+             (x_cg == 0 && y_cg == 0));
+        }
+
+        last_scan_pos = num_coeff - offset - 1;
+
+        if (i == num_last_subset) {
+            n_end = last_scan_pos - 1;
+            significant_coeff_flag_idx[0] = last_scan_pos;
+            nb_significant_coeff_flag = 1;
+        } else {
+            n_end = 15;
+        }
+
+        if (x_cg < ((1 << log2_trafo_size) - 1) >> 2)
+            prev_sig = !!significant_coeff_group_flag[x_cg + 1][y_cg];
+        if (y_cg < ((1 << log2_trafo_size) - 1) >> 2)
+            prev_sig += (!!significant_coeff_group_flag[x_cg][y_cg + 1] << 1);
+
+        if (significant_coeff_group_flag[x_cg][y_cg] && n_end >= 0) {
+            static const uint8_t ctx_idx_map[] = {
+                0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8, // log2_trafo_size == 2
+                1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // prev_sig == 0
+                2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, // prev_sig == 1
+                2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, // prev_sig == 2
+                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2  // default
+            };
+            const uint8_t *ctx_idx_map_p;
+            int scf_offset = 0;
+            if (s->sps->transform_skip_context_enabled_flag &&
+                (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
+                ctx_idx_map_p = (uint8_t*) &ctx_idx_map[4 * 16];
+                if (c_idx == 0) {
+                    scf_offset = 40;
+                } else {
+                    scf_offset = 14 + 27;
+                }
+            } else {
+                if (c_idx != 0)
+                    scf_offset = 27;
+                if (log2_trafo_size == 2) {
+                    ctx_idx_map_p = (uint8_t*) &ctx_idx_map[0];
+                } else {
+                    ctx_idx_map_p = (uint8_t*) &ctx_idx_map[(prev_sig + 1) << 4];
+                    if (c_idx == 0) {
+                        if ((x_cg > 0 || y_cg > 0))
+                            scf_offset += 3;
+                        if (log2_trafo_size == 3) {
+                            scf_offset += (scan_idx == SCAN_DIAG) ? 9 : 15;
+                        } else {
+                            scf_offset += 21;
+                        }
+                    } else {
+                        if (log2_trafo_size == 3)
+                            scf_offset += 9;
+                        else
+                            scf_offset += 12;
+                    }
+                }
+            }
+            for (n = n_end; n > 0; n--) {
+                x_c = scan_x_off[n];
+                y_c = scan_y_off[n];
+                if (significant_coeff_flag_decode(s, x_c, y_c, scf_offset, ctx_idx_map_p)) {
+                    significant_coeff_flag_idx[nb_significant_coeff_flag] = n;
+                    nb_significant_coeff_flag++;
+                    implicit_non_zero_coeff = 0;
+                }
+            }
+            if (implicit_non_zero_coeff == 0) {
+                if (s->sps->transform_skip_context_enabled_flag &&
+                    (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) {
+                    if (c_idx == 0) {
+                        scf_offset = 42;
+                    } else {
+                        scf_offset = 16 + 27;
+                    }
+                } else {
+                    if (i == 0) {
+                        if (c_idx == 0)
+                            scf_offset = 0;
+                        else
+                            scf_offset = 27;
+                    } else {
+                        scf_offset = 2 + scf_offset;
+                    }
+                }
+                if (significant_coeff_flag_decode_0(s, c_idx, scf_offset) == 1) {
+                    significant_coeff_flag_idx[nb_significant_coeff_flag] = 0;
+                    nb_significant_coeff_flag++;
+                }
+            } else {
+                significant_coeff_flag_idx[nb_significant_coeff_flag] = 0;
+                nb_significant_coeff_flag++;
+            }
+        }
+
+        n_end = nb_significant_coeff_flag;
+
+
+        if (n_end) {
+            int first_nz_pos_in_cg;
+            int last_nz_pos_in_cg;
+            int c_rice_param = 0;
+            int first_greater1_coeff_idx = -1;
+            uint8_t coeff_abs_level_greater1_flag[8];
+            uint16_t coeff_sign_flag;
+            int sum_abs = 0;
+            int sign_hidden;
+            int sb_type;
+
+
+            // initialize first elem of coeff_bas_level_greater1_flag
+            int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0;
+
+            if (s->sps->persistent_rice_adaptation_enabled_flag) {
+                if (!transform_skip_flag && !lc->cu.cu_transquant_bypass_flag)
+                    sb_type = 2 * (c_idx == 0 ? 1 : 0);
+                else
+                    sb_type = 2 * (c_idx == 0 ? 1 : 0) + 1;
+                c_rice_param = lc->stat_coeff[sb_type] / 4;
+            }
+
+            if (!(i == num_last_subset) && greater1_ctx == 0)
+                ctx_set++;
+            greater1_ctx = 1;
+            last_nz_pos_in_cg = significant_coeff_flag_idx[0];
+
+            for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) {
+                int inc = (ctx_set << 2) + greater1_ctx;
+                coeff_abs_level_greater1_flag[m] =
+                    coeff_abs_level_greater1_flag_decode(s, c_idx, inc);
+                if (coeff_abs_level_greater1_flag[m]) {
+                    greater1_ctx = 0;
+                    if (first_greater1_coeff_idx == -1)
+                        first_greater1_coeff_idx = m;
+                } else if (greater1_ctx > 0 && greater1_ctx < 3) {
+                    greater1_ctx++;
+                }
+            }
+            first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1];
+
+            if (lc->cu.cu_transquant_bypass_flag ||
+                (lc->cu.pred_mode ==  MODE_INTRA  &&
+                 s->sps->implicit_rdpcm_enabled_flag  &&  transform_skip_flag  &&
+                 (pred_mode_intra == 10 || pred_mode_intra  ==  26 )) ||
+                 explicit_rdpcm_flag)
+                sign_hidden = 0;
+            else
+                sign_hidden = (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4);
+
+            if (first_greater1_coeff_idx != -1) {
+                coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set);
+            }
+            if (!s->pps->sign_data_hiding_flag || !sign_hidden ) {
+                coeff_sign_flag = coeff_sign_flag_decode(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag);
+            } else {
+                coeff_sign_flag = coeff_sign_flag_decode(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1));
+            }
+
+            for (m = 0; m < n_end; m++) {
+                n = significant_coeff_flag_idx[m];
+                GET_COORD(offset, n);
+                if (m < 8) {
+                    trans_coeff_level = 1 + coeff_abs_level_greater1_flag[m];
+                    if (trans_coeff_level == ((m == first_greater1_coeff_idx) ? 3 : 2)) {
+                        int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(s, c_rice_param);
+
+                        trans_coeff_level += last_coeff_abs_level_remaining;
+                        if (trans_coeff_level > (3 << c_rice_param))
+                            c_rice_param = s->sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4);
+                        if (s->sps->persistent_rice_adaptation_enabled_flag && !rice_init) {
+                            int c_rice_p_init = lc->stat_coeff[sb_type] / 4;
+                            if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init))
+                                lc->stat_coeff[sb_type]++;
+                            else if (2 * last_coeff_abs_level_remaining < (1 << c_rice_p_init))
+                                if (lc->stat_coeff[sb_type] > 0)
+                                    lc->stat_coeff[sb_type]--;
+                            rice_init = 1;
+                        }
+                    }
+                } else {
+                    int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(s, c_rice_param);
+
+                    trans_coeff_level = 1 + last_coeff_abs_level_remaining;
+                    if (trans_coeff_level > (3 << c_rice_param))
+                        c_rice_param = s->sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4);
+                    if (s->sps->persistent_rice_adaptation_enabled_flag && !rice_init) {
+                        int c_rice_p_init = lc->stat_coeff[sb_type] / 4;
+                        if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init))
+                            lc->stat_coeff[sb_type]++;
+                        else if (2 * last_coeff_abs_level_remaining < (1 << c_rice_p_init))
+                            if (lc->stat_coeff[sb_type] > 0)
+                                lc->stat_coeff[sb_type]--;
+                        rice_init = 1;
+                    }
+                }
+                if (s->pps->sign_data_hiding_flag && sign_hidden) {
+                    sum_abs += trans_coeff_level;
+                    if (n == first_nz_pos_in_cg && (sum_abs&1))
+                        trans_coeff_level = -trans_coeff_level;
+                }
+                if (coeff_sign_flag >> 15)
+                    trans_coeff_level = -trans_coeff_level;
+                coeff_sign_flag <<= 1;
+                if(!lc->cu.cu_transquant_bypass_flag) {
+                    if (s->sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) {
+                        if(y_c || x_c || log2_trafo_size < 4) {
+                            switch(log2_trafo_size) {
+                                case 3: pos = (y_c << 3) + x_c; break;
+                                case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break;
+                                case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break;
+                                default: pos = (y_c << 2) + x_c; break;
+                            }
+                            scale_m = scale_matrix[pos];
+                        } else {
+                            scale_m = dc_scale;
+                        }
+                    }
+                    trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift;
+                    if(trans_coeff_level < 0) {
+                        if((~trans_coeff_level) & 0xFffffffffff8000)
+                            trans_coeff_level = -32768;
+                    } else {
+                        if(trans_coeff_level & 0xffffffffffff8000)
+                            trans_coeff_level = 32767;
+                    }
+                }
+                coeffs[y_c * trafo_size + x_c] = trans_coeff_level;
+            }
+        }
+    }
+
+    if (lc->cu.cu_transquant_bypass_flag) {
+        if (explicit_rdpcm_flag || (s->sps->implicit_rdpcm_enabled_flag &&
+                                    (pred_mode_intra == 10 || pred_mode_intra == 26))) {
+            int mode = s->sps->implicit_rdpcm_enabled_flag ? (pred_mode_intra == 26) : explicit_rdpcm_dir_flag;
+
+            s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
+        }
+    } else {
+        if (transform_skip_flag) {
+            int rot = s->sps->transform_skip_rotation_enabled_flag &&
+                      log2_trafo_size == 2 &&
+                      lc->cu.pred_mode == MODE_INTRA;
+            if (rot) {
+                for (i = 0; i < 8; i++)
+                    FFSWAP(int16_t, coeffs[i], coeffs[16 - i - 1]);
+            }
+
+            s->hevcdsp.transform_skip(coeffs, log2_trafo_size);
+
+            if (explicit_rdpcm_flag || (s->sps->implicit_rdpcm_enabled_flag &&
+                                        lc->cu.pred_mode == MODE_INTRA &&
+                                        (pred_mode_intra == 10 || pred_mode_intra == 26))) {
+                int mode = explicit_rdpcm_flag ? explicit_rdpcm_dir_flag : (pred_mode_intra == 26);
+
+                s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
+            }
+        } else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) {
+            s->hevcdsp.idct_4x4_luma(coeffs);
+        } else {
+            int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
+            if (max_xy == 0)
+                s->hevcdsp.idct_dc[log2_trafo_size-2](coeffs);
+            else {
+                int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4;
+                if (max_xy < 4)
+                    col_limit = FFMIN(4, col_limit);
+                else if (max_xy < 8)
+                    col_limit = FFMIN(8, col_limit);
+                else if (max_xy < 12)
+                    col_limit = FFMIN(24, col_limit);
+                s->hevcdsp.idct[log2_trafo_size-2](coeffs, col_limit);
+            }
+        }
+    }
+    if (lc->tu.cross_pf) {
+        int16_t *coeffs_y = lc->tu.coeffs[0];
+
+        for (i = 0; i < (trafo_size * trafo_size); i++) {
+            coeffs[i] = coeffs[i] + ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
+        }
+    }
+    s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride);
+}
+
+void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size)
+{
+    HEVCLocalContext *lc = s->HEVClc;
+    int x = abs_mvd_greater0_flag_decode(s);
+    int y = abs_mvd_greater0_flag_decode(s);
+
+    if (x)
+        x += abs_mvd_greater1_flag_decode(s);
+    if (y)
+        y += abs_mvd_greater1_flag_decode(s);
+
+    switch (x) {
+    case 2: lc->pu.mvd.x = mvd_decode(s);           break;
+    case 1: lc->pu.mvd.x = mvd_sign_flag_decode(s); break;
+    case 0: lc->pu.mvd.x = 0;                       break;
+    }
+
+    switch (y) {
+    case 2: lc->pu.mvd.y = mvd_decode(s);           break;
+    case 1: lc->pu.mvd.y = mvd_sign_flag_decode(s); break;
+    case 0: lc->pu.mvd.y = 0;                       break;
+    }
+}
+
diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c
index eb5594c..9f59170 100644
--- a/libavcodec/hevc_filter.c
+++ b/libavcodec/hevc_filter.c
@@ -5,20 +5,20 @@
  * Copyright (C) 2013 Seppo Tomperi
  * Copyright (C) 2013 Wassim Hamidouche
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,6 +29,8 @@
 #include "golomb.h"
 #include "hevc.h"
 
+#include "bit_depth_template.c"
+
 #define LUMA 0
 #define CB 1
 #define CR 2
@@ -58,29 +60,31 @@ static int chroma_tc(HEVCContext *s, int qp_y, int c_idx, int tc_offset)
     else
         offset = s->pps->cr_qp_offset;
 
-    qp_i = av_clip_c(qp_y + offset, 0, 57);
-    if (qp_i < 30)
-        qp = qp_i;
-    else if (qp_i > 43)
-        qp = qp_i - 6;
-    else
-        qp = qp_c[qp_i - 30];
+    qp_i = av_clip(qp_y + offset, 0, 57);
+    if (s->sps->chroma_format_idc == 1) {
+        if (qp_i < 30)
+            qp = qp_i;
+        else if (qp_i > 43)
+            qp = qp_i - 6;
+        else
+            qp = qp_c[qp_i - 30];
+    } else {
+        qp = av_clip(qp_i, 0, 51);
+    }
 
-    idxt = av_clip_c(qp + DEFAULT_INTRA_TC_OFFSET + tc_offset, 0, 53);
+    idxt = av_clip(qp + DEFAULT_INTRA_TC_OFFSET + tc_offset, 0, 53);
     return tctable[idxt];
 }
 
-static int get_qPy_pred(HEVCContext *s, int xC, int yC,
-                        int xBase, int yBase, int log2_cb_size)
+static int get_qPy_pred(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
 {
-    HEVCLocalContext *lc     = &s->HEVClc;
+    HEVCLocalContext *lc     = s->HEVClc;
     int ctb_size_mask        = (1 << s->sps->log2_ctb_size) - 1;
     int MinCuQpDeltaSizeMask = (1 << (s->sps->log2_ctb_size -
                                       s->pps->diff_cu_qp_delta_depth)) - 1;
     int xQgBase              = xBase - (xBase & MinCuQpDeltaSizeMask);
     int yQgBase              = yBase - (yBase & MinCuQpDeltaSizeMask);
     int min_cb_width         = s->sps->min_cb_width;
-    int min_cb_height        = s->sps->min_cb_height;
     int x_cb                 = xQgBase >> s->sps->log2_min_cb_size;
     int y_cb                 = yQgBase >> s->sps->log2_min_cb_size;
     int availableA           = (xBase   & ctb_size_mask) &&
@@ -94,46 +98,7 @@ static int get_qPy_pred(HEVCContext *s, int xC, int yC,
         lc->first_qp_group = !lc->tu.is_cu_qp_delta_coded;
         qPy_pred = s->sh.slice_qp;
     } else {
-        qPy_pred = lc->qp_y;
-        if (log2_cb_size < s->sps->log2_ctb_size -
-                           s->pps->diff_cu_qp_delta_depth) {
-            static const int offsetX[8][8] = {
-                { -1, 1, 3, 1, 7, 1, 3, 1 },
-                {  0, 0, 0, 0, 0, 0, 0, 0 },
-                {  1, 3, 1, 3, 1, 3, 1, 3 },
-                {  2, 2, 2, 2, 2, 2, 2, 2 },
-                {  3, 5, 7, 5, 3, 5, 7, 5 },
-                {  4, 4, 4, 4, 4, 4, 4, 4 },
-                {  5, 7, 5, 7, 5, 7, 5, 7 },
-                {  6, 6, 6, 6, 6, 6, 6, 6 }
-            };
-            static const int offsetY[8][8] = {
-                { 7, 0, 1, 2, 3, 4, 5, 6 },
-                { 0, 1, 2, 3, 4, 5, 6, 7 },
-                { 1, 0, 3, 2, 5, 4, 7, 6 },
-                { 0, 1, 2, 3, 4, 5, 6, 7 },
-                { 3, 0, 1, 2, 7, 4, 5, 6 },
-                { 0, 1, 2, 3, 4, 5, 6, 7 },
-                { 1, 0, 3, 2, 5, 4, 7, 6 },
-                { 0, 1, 2, 3, 4, 5, 6, 7 }
-            };
-            int xC0b = (xC - (xC & ctb_size_mask)) >> s->sps->log2_min_cb_size;
-            int yC0b = (yC - (yC & ctb_size_mask)) >> s->sps->log2_min_cb_size;
-            int idxX = (xQgBase  & ctb_size_mask)  >> s->sps->log2_min_cb_size;
-            int idxY = (yQgBase  & ctb_size_mask)  >> s->sps->log2_min_cb_size;
-            int idx_mask = ctb_size_mask >> s->sps->log2_min_cb_size;
-            int x, y;
-
-            x = FFMIN(xC0b +  offsetX[idxX][idxY],             min_cb_width  - 1);
-            y = FFMIN(yC0b + (offsetY[idxX][idxY] & idx_mask), min_cb_height - 1);
-
-            if (xC0b == (lc->start_of_tiles_x >> s->sps->log2_min_cb_size) &&
-                offsetX[idxX][idxY] == -1) {
-                x = (lc->end_of_tiles_x >> s->sps->log2_min_cb_size) - 1;
-                y = yC0b - 1;
-            }
-            qPy_pred = s->qp_y_tab[y * min_cb_width + x];
-        }
+        qPy_pred = lc->qPy_pred;
     }
 
     // qPy_a
@@ -148,20 +113,22 @@ static int get_qPy_pred(HEVCContext *s, int xC, int yC,
     else
         qPy_b = s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width];
 
+    av_assert2(qPy_a >= -s->sps->qp_bd_offset && qPy_a < 52);
+    av_assert2(qPy_b >= -s->sps->qp_bd_offset && qPy_b < 52);
+
     return (qPy_a + qPy_b + 1) >> 1;
 }
 
-void ff_hevc_set_qPy(HEVCContext *s, int xC, int yC,
-                     int xBase, int yBase, int log2_cb_size)
+void ff_hevc_set_qPy(HEVCContext *s, int xBase, int yBase, int log2_cb_size)
 {
-    int qp_y = get_qPy_pred(s, xC, yC, xBase, yBase, log2_cb_size);
+    int qp_y = get_qPy_pred(s, xBase, yBase, log2_cb_size);
 
-    if (s->HEVClc.tu.cu_qp_delta != 0) {
+    if (s->HEVClc->tu.cu_qp_delta != 0) {
         int off = s->sps->qp_bd_offset;
-        s->HEVClc.qp_y = FFUMOD(qp_y + s->HEVClc.tu.cu_qp_delta + 52 + 2 * off,
-                                52 + off) - off;
+        s->HEVClc->qp_y = FFUMOD(qp_y + s->HEVClc->tu.cu_qp_delta + 52 + 2 * off,
+                                 52 + off) - off;
     } else
-        s->HEVClc.qp_y = qp_y;
+        s->HEVClc->qp_y = qp_y;
 }
 
 static int get_qPy(HEVCContext *s, int xC, int yC)
@@ -173,14 +140,46 @@ static int get_qPy(HEVCContext *s, int xC, int yC)
 }
 
 static void copy_CTB(uint8_t *dst, uint8_t *src,
-                     int width, int height, int stride)
+                     int width, int height, int stride_dst, int stride_src)
 {
     int i;
 
     for (i = 0; i < height; i++) {
         memcpy(dst, src, width);
-        dst += stride;
-        src += stride;
+        dst += stride_dst;
+        src += stride_src;
+    }
+}
+
+static void restore_tqb_pixels(HEVCContext *s, int x0, int y0, int width, int height, int c_idx)
+{
+    if ( s->pps->transquant_bypass_enable_flag ||
+            (s->sps->pcm.loop_filter_disable_flag && s->sps->pcm_enabled_flag)) {
+        int x, y;
+        ptrdiff_t stride_dst = s->sao_frame->linesize[c_idx];
+        ptrdiff_t stride_src = s->frame->linesize[c_idx];
+        int min_pu_size  = 1 << s->sps->log2_min_pu_size;
+        int hshift       = s->sps->hshift[c_idx];
+        int vshift       = s->sps->vshift[c_idx];
+        int x_min        = ((x0         ) >> s->sps->log2_min_pu_size);
+        int y_min        = ((y0         ) >> s->sps->log2_min_pu_size);
+        int x_max        = ((x0 + width ) >> s->sps->log2_min_pu_size);
+        int y_max        = ((y0 + height) >> s->sps->log2_min_pu_size);
+        int len          = min_pu_size >> hshift;
+        for (y = y_min; y < y_max; y++) {
+            for (x = x_min; x < x_max; x++) {
+                if (s->is_pcm[y * s->sps->min_pu_width + x]) {
+                    int n;
+                    uint8_t *src = &s->frame->data[c_idx][    ((y << s->sps->log2_min_pu_size) >> vshift) * stride_src + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
+                    uint8_t *dst = &s->sao_frame->data[c_idx][((y << s->sps->log2_min_pu_size) >> vshift) * stride_dst + (((x << s->sps->log2_min_pu_size) >> hshift) << s->sps->pixel_shift)];
+                    for (n = 0; n < (min_pu_size >> vshift); n++) {
+                        memcpy(src, dst, len);
+                        src += stride_src;
+                        dst += stride_dst;
+                    }
+                }
+            }
+        }
     }
 }
 
@@ -188,128 +187,127 @@ static void copy_CTB(uint8_t *dst, uint8_t *src,
 
 static void sao_filter_CTB(HEVCContext *s, int x, int y)
 {
-    //  TODO: This should be easily parallelizable
-    //  TODO: skip CBs when (cu_transquant_bypass_flag || (pcm_loop_filter_disable_flag && pcm_flag))
-    int c_idx = 0;
-    int class = 1, class_index;
+    int c_idx;
     int edges[4];  // 0 left 1 top 2 right 3 bottom
-    SAOParams *sao[4];
-    int classes[4];
-    int x_shift = 0, y_shift = 0;
-    int x_ctb = x >> s->sps->log2_ctb_size;
-    int y_ctb = y >> s->sps->log2_ctb_size;
-    int ctb_addr_rs = y_ctb * s->sps->ctb_width + x_ctb;
-    int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
-
+    int x_ctb                = x >> s->sps->log2_ctb_size;
+    int y_ctb                = y >> s->sps->log2_ctb_size;
+    int ctb_addr_rs          = y_ctb * s->sps->ctb_width + x_ctb;
+    int ctb_addr_ts          = s->pps->ctb_addr_rs_to_ts[ctb_addr_rs];
+    SAOParams *sao           = &CTB(s->sao, x_ctb, y_ctb);
     // flags indicating unfilterable edges
-    uint8_t vert_edge[]  = { 0, 0, 0, 0 };
-    uint8_t horiz_edge[] = { 0, 0, 0, 0 };
-    uint8_t diag_edge[]  = { 0, 0, 0, 0 };
-    uint8_t lfase[3]; // current, above, left
-    uint8_t no_tile_filter = s->pps->tiles_enabled_flag &&
-                             !s->pps->loop_filter_across_tiles_enabled_flag;
-    uint8_t left_tile_edge = 0, up_tile_edge = 0;
-
-    sao[0]     = &CTB(s->sao, x_ctb, y_ctb);
+    uint8_t vert_edge[]      = { 0, 0 };
+    uint8_t horiz_edge[]     = { 0, 0 };
+    uint8_t diag_edge[]      = { 0, 0, 0, 0 };
+    uint8_t lfase            = CTB(s->filter_slice_edges, x_ctb, y_ctb);
+    uint8_t no_tile_filter   = s->pps->tiles_enabled_flag &&
+                               !s->pps->loop_filter_across_tiles_enabled_flag;
+    uint8_t restore          = no_tile_filter || !lfase;
+    uint8_t left_tile_edge   = 0;
+    uint8_t right_tile_edge  = 0;
+    uint8_t up_tile_edge     = 0;
+    uint8_t bottom_tile_edge = 0;
+
     edges[0]   = x_ctb == 0;
     edges[1]   = y_ctb == 0;
     edges[2]   = x_ctb == s->sps->ctb_width  - 1;
     edges[3]   = y_ctb == s->sps->ctb_height - 1;
-    lfase[0]   = CTB(s->filter_slice_edges, x_ctb, y_ctb);
-    classes[0] = 0;
-
-    if (!edges[0]) {
-        left_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
-        sao[class] = &CTB(s->sao, x_ctb - 1, y_ctb);
-        vert_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge;
-        vert_edge[2] = vert_edge[0];
-        lfase[2]     = CTB(s->filter_slice_edges, x_ctb - 1, y_ctb);
-        classes[class] = 2;
-        class++;
-        x_shift = 8;
-    }
-
-    if (!edges[1]) {
-        up_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
-        sao[class] = &CTB(s->sao, x_ctb, y_ctb - 1);
-        horiz_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge;
-        horiz_edge[1] = horiz_edge[0];
-        lfase[1] = CTB(s->filter_slice_edges, x_ctb, y_ctb - 1);
-        classes[class] = 1;
-        class++;
-        y_shift = 4;
 
+    if (restore) {
         if (!edges[0]) {
-            classes[class] = 3;
-            sao[class] = &CTB(s->sao, x_ctb - 1, y_ctb - 1);
-            class++;
-
-            // Tile check here is done current CTB row/col, not above/left like you'd expect,
-            //but that is because the tile boundary always extends through the whole pic
-            vert_edge[1] = (!lfase[1] && CTB(s->tab_slice_address, x_ctb, y_ctb - 1) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge;
-            vert_edge[3] = vert_edge[1];
-            horiz_edge[2] = (!lfase[2] && CTB(s->tab_slice_address, x_ctb - 1, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || up_tile_edge;
-            horiz_edge[3] = horiz_edge[2];
-            diag_edge[0] = (!lfase[0] && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge;
-            diag_edge[3] = diag_edge[0];
-
-            // Does left CTB comes after above CTB?
-            if (CTB(s->tab_slice_address, x_ctb - 1, y_ctb) >
-                CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) {
-                diag_edge[2] = !lfase[2] || left_tile_edge || up_tile_edge;
-                diag_edge[1] = diag_edge[2];
-            } else if (CTB(s->tab_slice_address, x_ctb - 1, y_ctb) <
-                       CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) {
-                diag_edge[1] = !lfase[1] || left_tile_edge || up_tile_edge;
-                diag_edge[2] = diag_edge[1];
-            } else {
-                // Same slice, only consider tiles
-                diag_edge[2] = left_tile_edge || up_tile_edge;
-                diag_edge[1] = diag_edge[2];
-            }
+            left_tile_edge  = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
+            vert_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge;
+        }
+        if (!edges[2]) {
+            right_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]];
+            vert_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb)) || right_tile_edge;
+        }
+        if (!edges[1]) {
+            up_tile_edge     = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->sps->ctb_width]];
+            horiz_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge;
+        }
+        if (!edges[3]) {
+            bottom_tile_edge = no_tile_filter && s->pps->tile_id[ctb_addr_ts] != s->pps->tile_id[s->pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->sps->ctb_width]];
+            horiz_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb + 1)) || bottom_tile_edge;
+        }
+        if (!edges[0] && !edges[1]) {
+            diag_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge;
+        }
+        if (!edges[1] && !edges[2]) {
+            diag_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb - 1)) || right_tile_edge || up_tile_edge;
+        }
+        if (!edges[2] && !edges[3]) {
+            diag_edge[2] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb + 1)) || right_tile_edge || bottom_tile_edge;
+        }
+        if (!edges[0] && !edges[3]) {
+            diag_edge[3] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb + 1)) || left_tile_edge || bottom_tile_edge;
         }
     }
 
     for (c_idx = 0; c_idx < 3; c_idx++) {
-        int chroma = c_idx ? 1 : 0;
-        int x0 = x >> chroma;
-        int y0 = y >> chroma;
-        int stride = s->frame->linesize[c_idx];
-        int ctb_size = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx];
-        int width = FFMIN(ctb_size,
-                          (s->sps->width >> s->sps->hshift[c_idx]) - x0);
-        int height = FFMIN(ctb_size,
-                           (s->sps->height >> s->sps->vshift[c_idx]) - y0);
-
-        uint8_t *src = &s->frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)];
-        uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride + (x0 << s->sps->pixel_shift)];
-        int offset = (y_shift >> chroma) * stride + ((x_shift >> chroma) << s->sps->pixel_shift);
-
-        copy_CTB(dst - offset, src - offset,
-                 (edges[2] ? width  + (x_shift >> chroma) : width)  << s->sps->pixel_shift,
-                 (edges[3] ? height + (y_shift >> chroma) : height), stride);
-
-        for (class_index = 0; class_index < class; class_index++) {
-
-            switch (sao[class_index]->type_idx[c_idx]) {
-            case SAO_BAND:
-                s->hevcdsp.sao_band_filter[classes[class_index]](dst, src,
-                                                                 stride,
-                                                                 sao[class_index],
-                                                                 edges, width,
-                                                                 height, c_idx);
-                break;
-            case SAO_EDGE:
-                s->hevcdsp.sao_edge_filter[classes[class_index]](dst, src,
-                                                                 stride,
-                                                                 sao[class_index],
-                                                                 edges, width,
-                                                                 height, c_idx,
-                                                                 vert_edge[classes[class_index]],
-                                                                 horiz_edge[classes[class_index]],
-                                                                 diag_edge[classes[class_index]]);
-                break;
+        int x0       = x >> s->sps->hshift[c_idx];
+        int y0       = y >> s->sps->vshift[c_idx];
+        int stride_src = s->frame->linesize[c_idx];
+        int stride_dst = s->sao_frame->linesize[c_idx];
+        int ctb_size_h = (1 << (s->sps->log2_ctb_size)) >> s->sps->hshift[c_idx];
+        int ctb_size_v = (1 << (s->sps->log2_ctb_size)) >> s->sps->vshift[c_idx];
+        int width    = FFMIN(ctb_size_h, (s->sps->width  >> s->sps->hshift[c_idx]) - x0);
+        int height   = FFMIN(ctb_size_v, (s->sps->height >> s->sps->vshift[c_idx]) - y0);
+        uint8_t *src = &s->frame->data[c_idx][y0 * stride_src + (x0 << s->sps->pixel_shift)];
+        uint8_t *dst = &s->sao_frame->data[c_idx][y0 * stride_dst + (x0 << s->sps->pixel_shift)];
+
+        switch (sao->type_idx[c_idx]) {
+        case SAO_BAND:
+            copy_CTB(dst, src, width << s->sps->pixel_shift, height, stride_dst, stride_src);
+            s->hevcdsp.sao_band_filter(src, dst,
+                                       stride_src, stride_dst,
+                                       sao,
+                                       edges, width,
+                                       height, c_idx);
+            restore_tqb_pixels(s, x, y, width, height, c_idx);
+            sao->type_idx[c_idx] = SAO_APPLIED;
+            break;
+        case SAO_EDGE:
+        {
+            uint8_t left_pixels = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] != SAO_APPLIED);
+            if (!edges[1]) {
+                uint8_t top_left  = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
+                uint8_t top_right = !edges[2] && (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] != SAO_APPLIED);
+                if (CTB(s->sao, x_ctb  , y_ctb-1).type_idx[c_idx] == 0)
+                    memcpy( dst - stride_dst - (top_left << s->sps->pixel_shift),
+                            src - stride_src - (top_left << s->sps->pixel_shift),
+                            (top_left + width + top_right) << s->sps->pixel_shift);
+                else {
+                    if (top_left)
+                        memcpy( dst - stride_dst - (1 << s->sps->pixel_shift),
+                                src - stride_src - (1 << s->sps->pixel_shift),
+                                1 << s->sps->pixel_shift);
+                    if(top_right)
+                        memcpy( dst - stride_dst + (width << s->sps->pixel_shift),
+                                src - stride_src + (width << s->sps->pixel_shift),
+                                1 << s->sps->pixel_shift);
+                }
+            }
+            if (!edges[3]) {                                                                // bottom and bottom right
+                uint8_t bottom_left = !edges[0] && (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] != SAO_APPLIED);
+                memcpy( dst + height * stride_dst - (bottom_left << s->sps->pixel_shift),
+                        src + height * stride_src - (bottom_left << s->sps->pixel_shift),
+                        (width + 1 + bottom_left) << s->sps->pixel_shift);
             }
+            copy_CTB(dst - (left_pixels << s->sps->pixel_shift),
+                     src - (left_pixels << s->sps->pixel_shift),
+                     (width + 1 + left_pixels) << s->sps->pixel_shift, height, stride_dst, stride_src);
+            s->hevcdsp.sao_edge_filter[restore](src, dst,
+                                                stride_src, stride_dst,
+                                                sao,
+                                                edges, width,
+                                                height, c_idx,
+                                                vert_edge,
+                                                horiz_edge,
+                                                diag_edge);
+            restore_tqb_pixels(s, x, y, width, height, c_idx);
+            sao->type_idx[c_idx] = SAO_APPLIED;
+            break;
+        }
         }
     }
 }
@@ -338,18 +336,21 @@ static int get_pcm(HEVCContext *s, int x, int y)
 static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
 {
     uint8_t *src;
-    int x, y, x_end, y_end, chroma;
+    int x, y;
+    int chroma;
     int c_tc[2], tc[2], beta;
     uint8_t no_p[2] = { 0 };
     uint8_t no_q[2] = { 0 };
 
     int log2_ctb_size = s->sps->log2_ctb_size;
+    int x_end, x_end2, y_end;
     int ctb_size        = 1 << log2_ctb_size;
     int ctb             = (x0 >> log2_ctb_size) +
                           (y0 >> log2_ctb_size) * s->sps->ctb_width;
     int cur_tc_offset   = s->deblock[ctb].tc_offset;
     int cur_beta_offset = s->deblock[ctb].beta_offset;
-    int tc_offset, left_tc_offset, beta_offset, left_beta_offset;
+    int left_tc_offset, left_beta_offset;
+    int tc_offset, beta_offset;
     int pcmf = (s->sps->pcm_enabled_flag &&
                 s->sps->pcm.loop_filter_disable_flag) ||
                s->pps->transquant_bypass_enable_flag;
@@ -357,6 +358,9 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
     if (x0) {
         left_tc_offset   = s->deblock[ctb - 1].tc_offset;
         left_beta_offset = s->deblock[ctb - 1].beta_offset;
+    } else {
+        left_tc_offset   = 0;
+        left_beta_offset = 0;
     }
 
     x_end = x0 + ctb_size;
@@ -369,11 +373,14 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
     tc_offset   = cur_tc_offset;
     beta_offset = cur_beta_offset;
 
-    // vertical filtering luma
+    x_end2 = x_end;
+    if (x_end2 != s->sps->width)
+        x_end2 -= 8;
     for (y = y0; y < y_end; y += 8) {
+        // vertical filtering luma
         for (x = x0 ? x0 : 8; x < x_end; x += 8) {
-            const int bs0 = s->vertical_bs[(x >> 3) + (y       >> 2) * s->bs_width];
-            const int bs1 = s->vertical_bs[(x >> 3) + ((y + 4) >> 2) * s->bs_width];
+            const int bs0 = s->vertical_bs[(x +  y      * s->bs_width) >> 2];
+            const int bs1 = s->vertical_bs[(x + (y + 4) * s->bs_width) >> 2];
             if (bs0 || bs1) {
                 const int qp = (get_qPy(s, x - 1, y)     + get_qPy(s, x, y)     + 1) >> 1;
 
@@ -396,45 +403,14 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
                                                        beta, tc, no_p, no_q);
             }
         }
-    }
 
-    // vertical filtering chroma
-    for (chroma = 1; chroma <= 2; chroma++) {
-        for (y = y0; y < y_end; y += 16) {
-            for (x = x0 ? x0 : 16; x < x_end; x += 16) {
-                const int bs0 = s->vertical_bs[(x >> 3) + (y       >> 2) * s->bs_width];
-                const int bs1 = s->vertical_bs[(x >> 3) + ((y + 8) >> 2) * s->bs_width];
-                if ((bs0 == 2) || (bs1 == 2)) {
-                    const int qp0 = (get_qPy(s, x - 1, y)     + get_qPy(s, x, y)     + 1) >> 1;
-                    const int qp1 = (get_qPy(s, x - 1, y + 8) + get_qPy(s, x, y + 8) + 1) >> 1;
+        if(!y)
+             continue;
 
-                    c_tc[0] = (bs0 == 2) ? chroma_tc(s, qp0, chroma, tc_offset) : 0;
-                    c_tc[1] = (bs1 == 2) ? chroma_tc(s, qp1, chroma, tc_offset) : 0;
-                    src     = &s->frame->data[chroma][y / 2 * s->frame->linesize[chroma] + ((x / 2) << s->sps->pixel_shift)];
-                    if (pcmf) {
-                        no_p[0] = get_pcm(s, x - 1, y);
-                        no_p[1] = get_pcm(s, x - 1, y + 8);
-                        no_q[0] = get_pcm(s, x, y);
-                        no_q[1] = get_pcm(s, x, y + 8);
-                        s->hevcdsp.hevc_v_loop_filter_chroma_c(src,
-                                                               s->frame->linesize[chroma],
-                                                               c_tc, no_p, no_q);
-                    } else
-                        s->hevcdsp.hevc_v_loop_filter_chroma(src,
-                                                             s->frame->linesize[chroma],
-                                                             c_tc, no_p, no_q);
-                }
-            }
-        }
-    }
-
-    // horizontal filtering luma
-    if (x_end != s->sps->width)
-        x_end -= 8;
-    for (y = y0 ? y0 : 8; y < y_end; y += 8) {
-        for (x = x0 ? x0 - 8 : 0; x < x_end; x += 8) {
-            const int bs0 = s->horizontal_bs[(x +     y * s->bs_width) >> 2];
-            const int bs1 = s->horizontal_bs[(x + 4 + y * s->bs_width) >> 2];
+        // horizontal filtering luma
+        for (x = x0 ? x0 - 8 : 0; x < x_end2; x += 8) {
+            const int bs0 = s->horizontal_bs[( x      + y * s->bs_width) >> 2];
+            const int bs1 = s->horizontal_bs[((x + 4) + y * s->bs_width) >> 2];
             if (bs0 || bs1) {
                 const int qp = (get_qPy(s, x, y - 1)     + get_qPy(s, x, y)     + 1) >> 1;
 
@@ -461,37 +437,61 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
         }
     }
 
-    // horizontal filtering chroma
     for (chroma = 1; chroma <= 2; chroma++) {
-        for (y = y0 ? y0 : 16; y < y_end; y += 16) {
-            for (x = x0 - 8; x < x_end; x += 16) {
-                int bs0, bs1;
-                // to make sure no memory access over boundary when x = -8
-                // TODO: simplify with row based deblocking
-                if (x < 0) {
-                    bs0 = 0;
-                    bs1 = s->horizontal_bs[(x + 8 + y * s->bs_width) >> 2];
-                } else if (x >= x_end - 8) {
-                    bs0 = s->horizontal_bs[(x +     y * s->bs_width) >> 2];
-                    bs1 = 0;
-                } else {
-                    bs0 = s->horizontal_bs[(x + y     * s->bs_width) >> 2];
-                    bs1 = s->horizontal_bs[(x + 8 + y * s->bs_width) >> 2];
+        int h = 1 << s->sps->hshift[chroma];
+        int v = 1 << s->sps->vshift[chroma];
+
+        // vertical filtering chroma
+        for (y = y0; y < y_end; y += (8 * v)) {
+            for (x = x0 ? x0 : 8 * h; x < x_end; x += (8 * h)) {
+                const int bs0 = s->vertical_bs[(x +  y            * s->bs_width) >> 2];
+                const int bs1 = s->vertical_bs[(x + (y + (4 * v)) * s->bs_width) >> 2];
+
+                if ((bs0 == 2) || (bs1 == 2)) {
+                    const int qp0 = (get_qPy(s, x - 1, y)           + get_qPy(s, x, y)           + 1) >> 1;
+                    const int qp1 = (get_qPy(s, x - 1, y + (4 * v)) + get_qPy(s, x, y + (4 * v)) + 1) >> 1;
+
+                    c_tc[0] = (bs0 == 2) ? chroma_tc(s, qp0, chroma, tc_offset) : 0;
+                    c_tc[1] = (bs1 == 2) ? chroma_tc(s, qp1, chroma, tc_offset) : 0;
+                    src       = &s->frame->data[chroma][(y >> s->sps->vshift[chroma]) * s->frame->linesize[chroma] + ((x >> s->sps->hshift[chroma]) << s->sps->pixel_shift)];
+                    if (pcmf) {
+                        no_p[0] = get_pcm(s, x - 1, y);
+                        no_p[1] = get_pcm(s, x - 1, y + (4 * v));
+                        no_q[0] = get_pcm(s, x, y);
+                        no_q[1] = get_pcm(s, x, y + (4 * v));
+                        s->hevcdsp.hevc_v_loop_filter_chroma_c(src,
+                                                               s->frame->linesize[chroma],
+                                                               c_tc, no_p, no_q);
+                    } else
+                        s->hevcdsp.hevc_v_loop_filter_chroma(src,
+                                                             s->frame->linesize[chroma],
+                                                             c_tc, no_p, no_q);
                 }
+            }
 
+            if(!y)
+                 continue;
+
+            // horizontal filtering chroma
+            tc_offset = x0 ? left_tc_offset : cur_tc_offset;
+            x_end2 = x_end;
+            if (x_end != s->sps->width)
+                x_end2 = x_end - 8 * h;
+            for (x = x0 ? x0 - 8 * h : 0; x < x_end2; x += (8 * h)) {
+                const int bs0 = s->horizontal_bs[( x          + y * s->bs_width) >> 2];
+                const int bs1 = s->horizontal_bs[((x + 4 * h) + y * s->bs_width) >> 2];
                 if ((bs0 == 2) || (bs1 == 2)) {
-                    const int qp0 = bs0 == 2 ? (get_qPy(s, x,     y - 1) + get_qPy(s, x,     y) + 1) >> 1 : 0;
-                    const int qp1 = bs1 == 2 ? (get_qPy(s, x + 8, y - 1) + get_qPy(s, x + 8, y) + 1) >> 1 : 0;
+                    const int qp0 = bs0 == 2 ? (get_qPy(s, x,           y - 1) + get_qPy(s, x,           y) + 1) >> 1 : 0;
+                    const int qp1 = bs1 == 2 ? (get_qPy(s, x + (4 * h), y - 1) + get_qPy(s, x + (4 * h), y) + 1) >> 1 : 0;
 
-                    tc_offset = x >= x0 ? cur_tc_offset : left_tc_offset;
                     c_tc[0]   = bs0 == 2 ? chroma_tc(s, qp0, chroma, tc_offset)     : 0;
                     c_tc[1]   = bs1 == 2 ? chroma_tc(s, qp1, chroma, cur_tc_offset) : 0;
-                    src       = &s->frame->data[chroma][y / 2 * s->frame->linesize[chroma] + ((x / 2) << s->sps->pixel_shift)];
+                    src       = &s->frame->data[chroma][(y >> s->sps->vshift[1]) * s->frame->linesize[chroma] + ((x >> s->sps->hshift[1]) << s->sps->pixel_shift)];
                     if (pcmf) {
-                        no_p[0] = get_pcm(s, x, y - 1);
-                        no_p[1] = get_pcm(s, x + 8, y - 1);
-                        no_q[0] = get_pcm(s, x, y);
-                        no_q[1] = get_pcm(s, x + 8, y);
+                        no_p[0] = get_pcm(s, x,           y - 1);
+                        no_p[1] = get_pcm(s, x + (4 * h), y - 1);
+                        no_q[0] = get_pcm(s, x,           y);
+                        no_q[1] = get_pcm(s, x + (4 * h), y);
                         s->hevcdsp.hevc_h_loop_filter_chroma_c(src,
                                                                s->frame->linesize[chroma],
                                                                c_tc, no_p, no_q);
@@ -505,138 +505,96 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0)
     }
 }
 
-static int boundary_strength(HEVCContext *s, MvField *curr,
-                             uint8_t curr_cbf_luma, MvField *neigh,
-                             uint8_t neigh_cbf_luma,
-                             RefPicList *neigh_refPicList,
-                             int tu_border)
+static int boundary_strength(HEVCContext *s, MvField *curr, MvField *neigh,
+                             RefPicList *neigh_refPicList)
 {
-    int mvs = curr->pred_flag[0] + curr->pred_flag[1];
-
-    if (tu_border) {
-        if (curr->is_intra || neigh->is_intra)
-            return 2;
-        if (curr_cbf_luma || neigh_cbf_luma)
-            return 1;
-    }
-
-    if (mvs == neigh->pred_flag[0] + neigh->pred_flag[1]) {
-        if (mvs == 2) {
-            // same L0 and L1
-            if (s->ref->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]]  &&
-                s->ref->refPicList[0].list[curr->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]] &&
-                neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) {
-                if ((abs(neigh->mv[0].x - curr->mv[0].x) >= 4 || abs(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
-                     abs(neigh->mv[1].x - curr->mv[1].x) >= 4 || abs(neigh->mv[1].y - curr->mv[1].y) >= 4) &&
-                    (abs(neigh->mv[1].x - curr->mv[0].x) >= 4 || abs(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
-                     abs(neigh->mv[0].x - curr->mv[1].x) >= 4 || abs(neigh->mv[0].y - curr->mv[1].y) >= 4))
-                    return 1;
-                else
-                    return 0;
-            } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[0].list[curr->ref_idx[0]] &&
-                       neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) {
-                if (abs(neigh->mv[0].x - curr->mv[0].x) >= 4 || abs(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
-                    abs(neigh->mv[1].x - curr->mv[1].x) >= 4 || abs(neigh->mv[1].y - curr->mv[1].y) >= 4)
-                    return 1;
-                else
-                    return 0;
-            } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[0].list[curr->ref_idx[0]] &&
-                       neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) {
-                if (abs(neigh->mv[1].x - curr->mv[0].x) >= 4 || abs(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
-                    abs(neigh->mv[0].x - curr->mv[1].x) >= 4 || abs(neigh->mv[0].y - curr->mv[1].y) >= 4)
-                    return 1;
-                else
-                    return 0;
-            } else {
+    if (curr->pred_flag == PF_BI &&  neigh->pred_flag == PF_BI) {
+        // same L0 and L1
+        if (s->ref->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]]  &&
+            s->ref->refPicList[0].list[curr->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]] &&
+            neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) {
+            if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
+                 FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) &&
+                (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
+                 FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4))
                 return 1;
-            }
-        } else { // 1 MV
-            Mv A, B;
-            int ref_A, ref_B;
-
-            if (curr->pred_flag[0]) {
-                A     = curr->mv[0];
-                ref_A = s->ref->refPicList[0].list[curr->ref_idx[0]];
-            } else {
-                A     = curr->mv[1];
-                ref_A = s->ref->refPicList[1].list[curr->ref_idx[1]];
-            }
+            else
+                return 0;
+        } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[0].list[curr->ref_idx[0]] &&
+                   neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) {
+            if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 ||
+                FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4)
+                return 1;
+            else
+                return 0;
+        } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[0].list[curr->ref_idx[0]] &&
+                   neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) {
+            if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 ||
+                FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)
+                return 1;
+            else
+                return 0;
+        } else {
+            return 1;
+        }
+    } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV
+        Mv A, B;
+        int ref_A, ref_B;
+
+        if (curr->pred_flag & 1) {
+            A     = curr->mv[0];
+            ref_A = s->ref->refPicList[0].list[curr->ref_idx[0]];
+        } else {
+            A     = curr->mv[1];
+            ref_A = s->ref->refPicList[1].list[curr->ref_idx[1]];
+        }
 
-            if (neigh->pred_flag[0]) {
-                B     = neigh->mv[0];
-                ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]];
-            } else {
-                B     = neigh->mv[1];
-                ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]];
-            }
+        if (neigh->pred_flag & 1) {
+            B     = neigh->mv[0];
+            ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]];
+        } else {
+            B     = neigh->mv[1];
+            ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]];
+        }
 
-            if (ref_A == ref_B) {
-                if (abs(A.x - B.x) >= 4 || abs(A.y - B.y) >= 4)
-                    return 1;
-                else
-                    return 0;
-            } else
+        if (ref_A == ref_B) {
+            if (FFABS(A.x - B.x) >= 4 || FFABS(A.y - B.y) >= 4)
                 return 1;
-        }
+            else
+                return 0;
+        } else
+            return 1;
     }
 
     return 1;
 }
 
 void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
-                                           int log2_trafo_size,
-                                           int slice_or_tiles_up_boundary,
-                                           int slice_or_tiles_left_boundary)
+                                           int log2_trafo_size)
 {
+    HEVCLocalContext *lc = s->HEVClc;
     MvField *tab_mvf     = s->ref->tab_mvf;
     int log2_min_pu_size = s->sps->log2_min_pu_size;
     int log2_min_tu_size = s->sps->log2_min_tb_size;
     int min_pu_width     = s->sps->min_pu_width;
     int min_tu_width     = s->sps->min_tb_width;
     int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width +
-                           (x0 >> log2_min_pu_size)].is_intra;
+                           (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
     int i, j, bs;
 
     if (y0 > 0 && (y0 & 7) == 0) {
-        int yp_pu = (y0 - 1) >> log2_min_pu_size;
-        int yq_pu =  y0      >> log2_min_pu_size;
-        int yp_tu = (y0 - 1) >> log2_min_tu_size;
-        int yq_tu =  y0      >> log2_min_tu_size;
-
-        for (i = 0; i < (1 << log2_trafo_size); i += 4) {
-            int x_pu = (x0 + i) >> log2_min_pu_size;
-            int x_tu = (x0 + i) >> log2_min_tu_size;
-            MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
-            MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
-            uint8_t top_cbf_luma  = s->cbf_luma[yp_tu * min_tu_width + x_tu];
-            uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu];
+        int bd_ctby = y0 & ((1 << s->sps->log2_ctb_size) - 1);
+        int bd_slice = s->sh.slice_loop_filter_across_slices_enabled_flag ||
+                       !(lc->boundary_flags & BOUNDARY_UPPER_SLICE);
+        int bd_tiles = s->pps->loop_filter_across_tiles_enabled_flag ||
+                       !(lc->boundary_flags & BOUNDARY_UPPER_TILE);
+        if (((bd_slice && bd_tiles)  || bd_ctby)) {
+            int yp_pu = (y0 - 1) >> log2_min_pu_size;
+            int yq_pu =  y0      >> log2_min_pu_size;
+            int yp_tu = (y0 - 1) >> log2_min_tu_size;
+            int yq_tu =  y0      >> log2_min_tu_size;
             RefPicList *top_refPicList = ff_hevc_get_ref_list(s, s->ref,
-                                                              x0 + i, y0 - 1);
-
-            bs = boundary_strength(s, curr, curr_cbf_luma,
-                                   top, top_cbf_luma, top_refPicList, 1);
-            if (!s->sh.slice_loop_filter_across_slices_enabled_flag &&
-                (slice_or_tiles_up_boundary & 1) &&
-                (y0 % (1 << s->sps->log2_ctb_size)) == 0)
-                bs = 0;
-            else if (!s->pps->loop_filter_across_tiles_enabled_flag &&
-                     (slice_or_tiles_up_boundary & 2) &&
-                     (y0 % (1 << s->sps->log2_ctb_size)) == 0)
-                bs = 0;
-            if (y0 == 0 || s->sh.disable_deblocking_filter_flag == 1)
-                bs = 0;
-            if (bs)
-                s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs;
-        }
-    }
-
-    // bs for TU internal horizontal PU boundaries
-    if (log2_trafo_size > s->sps->log2_min_pu_size && !is_intra)
-        for (j = 8; j < (1 << log2_trafo_size); j += 8) {
-            int yp_pu = (y0 + j - 1) >> log2_min_pu_size;
-            int yq_pu = (y0 + j)     >> log2_min_pu_size;
-            int yp_tu = (y0 + j - 1) >> log2_min_tu_size;
-            int yq_tu = (y0 + j)     >> log2_min_tu_size;
+                                                              x0, y0 - 1);
 
             for (i = 0; i < (1 << log2_trafo_size); i += 4) {
                 int x_pu = (x0 + i) >> log2_min_pu_size;
@@ -645,102 +603,124 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0,
                 MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
                 uint8_t top_cbf_luma  = s->cbf_luma[yp_tu * min_tu_width + x_tu];
                 uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu];
-                RefPicList *top_refPicList = ff_hevc_get_ref_list(s, s->ref,
-                                                                  x0 + i,
-                                                                  y0 + j - 1);
-
-                bs = boundary_strength(s, curr, curr_cbf_luma,
-                                       top, top_cbf_luma, top_refPicList, 0);
-                if (s->sh.disable_deblocking_filter_flag == 1)
-                    bs = 0;
-                if (bs)
-                    s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
+
+                if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA)
+                    bs = 2;
+                else if (curr_cbf_luma || top_cbf_luma)
+                    bs = 1;
+                else
+                    bs = boundary_strength(s, curr, top, top_refPicList);
+                s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs;
             }
         }
+    }
 
     // bs for vertical TU boundaries
     if (x0 > 0 && (x0 & 7) == 0) {
-        int xp_pu = (x0 - 1) >> log2_min_pu_size;
-        int xq_pu =  x0      >> log2_min_pu_size;
-        int xp_tu = (x0 - 1) >> log2_min_tu_size;
-        int xq_tu =  x0      >> log2_min_tu_size;
-
-        for (i = 0; i < (1 << log2_trafo_size); i += 4) {
-            int y_pu      = (y0 + i) >> log2_min_pu_size;
-            int y_tu      = (y0 + i) >> log2_min_tu_size;
-            MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
-            MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
-
-            uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu];
-            uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu];
+        int bd_ctbx = x0 & ((1 << s->sps->log2_ctb_size) - 1);
+        int bd_slice = s->sh.slice_loop_filter_across_slices_enabled_flag ||
+                       !(lc->boundary_flags & BOUNDARY_LEFT_SLICE);
+        int bd_tiles = s->pps->loop_filter_across_tiles_enabled_flag ||
+                       !(lc->boundary_flags & BOUNDARY_LEFT_TILE);
+        if (((bd_slice && bd_tiles)  || bd_ctbx)) {
+            int xp_pu = (x0 - 1) >> log2_min_pu_size;
+            int xq_pu =  x0      >> log2_min_pu_size;
+            int xp_tu = (x0 - 1) >> log2_min_tu_size;
+            int xq_tu =  x0      >> log2_min_tu_size;
             RefPicList *left_refPicList = ff_hevc_get_ref_list(s, s->ref,
-                                                               x0 - 1, y0 + i);
-
-            bs = boundary_strength(s, curr, curr_cbf_luma,
-                                   left, left_cbf_luma, left_refPicList, 1);
-            if (!s->sh.slice_loop_filter_across_slices_enabled_flag &&
-                (slice_or_tiles_left_boundary & 1) &&
-                (x0 % (1 << s->sps->log2_ctb_size)) == 0)
-                bs = 0;
-            else if (!s->pps->loop_filter_across_tiles_enabled_flag &&
-                     (slice_or_tiles_left_boundary & 2) &&
-                     (x0 % (1 << s->sps->log2_ctb_size)) == 0)
-                bs = 0;
-            if (x0 == 0 || s->sh.disable_deblocking_filter_flag == 1)
-                bs = 0;
-            if (bs)
-                s->vertical_bs[(x0 >> 3) + ((y0 + i) >> 2) * s->bs_width] = bs;
+                                                               x0 - 1, y0);
+
+            for (i = 0; i < (1 << log2_trafo_size); i += 4) {
+                int y_pu      = (y0 + i) >> log2_min_pu_size;
+                int y_tu      = (y0 + i) >> log2_min_tu_size;
+                MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
+                MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
+                uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu];
+                uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu];
+
+                if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA)
+                    bs = 2;
+                else if (curr_cbf_luma || left_cbf_luma)
+                    bs = 1;
+                else
+                    bs = boundary_strength(s, curr, left, left_refPicList);
+                s->vertical_bs[(x0 + (y0 + i) * s->bs_width) >> 2] = bs;
+            }
         }
     }
 
-    // bs for TU internal vertical PU boundaries
-    if (log2_trafo_size > log2_min_pu_size && !is_intra)
+    if (log2_trafo_size > log2_min_pu_size && !is_intra) {
+        RefPicList *refPicList = ff_hevc_get_ref_list(s, s->ref,
+                                                           x0,
+                                                           y0);
+        // bs for TU internal horizontal PU boundaries
+        for (j = 8; j < (1 << log2_trafo_size); j += 8) {
+            int yp_pu = (y0 + j - 1) >> log2_min_pu_size;
+            int yq_pu = (y0 + j)     >> log2_min_pu_size;
+
+            for (i = 0; i < (1 << log2_trafo_size); i += 4) {
+                int x_pu = (x0 + i) >> log2_min_pu_size;
+                MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
+                MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
+
+                bs = boundary_strength(s, curr, top, refPicList);
+                s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
+            }
+        }
+
+        // bs for TU internal vertical PU boundaries
         for (j = 0; j < (1 << log2_trafo_size); j += 4) {
             int y_pu = (y0 + j) >> log2_min_pu_size;
-            int y_tu = (y0 + j) >> log2_min_tu_size;
 
             for (i = 8; i < (1 << log2_trafo_size); i += 8) {
                 int xp_pu = (x0 + i - 1) >> log2_min_pu_size;
                 int xq_pu = (x0 + i)     >> log2_min_pu_size;
-                int xp_tu = (x0 + i - 1) >> log2_min_tu_size;
-                int xq_tu = (x0 + i)     >> log2_min_tu_size;
                 MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
                 MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
-                uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu];
-                uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu];
-                RefPicList *left_refPicList = ff_hevc_get_ref_list(s, s->ref,
-                                                                   x0 + i - 1,
-                                                                   y0 + j);
-
-                bs = boundary_strength(s, curr, curr_cbf_luma,
-                                       left, left_cbf_luma, left_refPicList, 0);
-                if (s->sh.disable_deblocking_filter_flag == 1)
-                    bs = 0;
-                if (bs)
-                    s->vertical_bs[((x0 + i) >> 3) + ((y0 + j) >> 2) * s->bs_width] = bs;
+
+                bs = boundary_strength(s, curr, left, refPicList);
+                s->vertical_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs;
             }
         }
+    }
 }
 
 #undef LUMA
 #undef CB
 #undef CR
 
-void ff_hevc_hls_filter(HEVCContext *s, int x, int y)
+void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size)
 {
+    int x_end = x >= s->sps->width  - ctb_size;
     deblocking_filter_CTB(s, x, y);
-    if (s->sps->sao_enabled)
-        sao_filter_CTB(s, x, y);
+    if (s->sps->sao_enabled) {
+        int y_end = y >= s->sps->height - ctb_size;
+        if (y && x)
+            sao_filter_CTB(s, x - ctb_size, y - ctb_size);
+        if (x && y_end)
+            sao_filter_CTB(s, x - ctb_size, y);
+        if (y && x_end) {
+            sao_filter_CTB(s, x, y - ctb_size);
+            if (s->threads_type & FF_THREAD_FRAME )
+                ff_thread_report_progress(&s->ref->tf, y, 0);
+        }
+        if (x_end && y_end) {
+            sao_filter_CTB(s, x , y);
+            if (s->threads_type & FF_THREAD_FRAME )
+                ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0);
+        }
+    } else if (s->threads_type & FF_THREAD_FRAME && x_end)
+        ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0);
 }
 
 void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size)
 {
+    int x_end = x_ctb >= s->sps->width  - ctb_size;
+    int y_end = y_ctb >= s->sps->height - ctb_size;
     if (y_ctb && x_ctb)
-        ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size);
-    if (y_ctb && x_ctb >= s->sps->width - ctb_size) {
-        ff_hevc_hls_filter(s, x_ctb, y_ctb - ctb_size);
-        ff_thread_report_progress(&s->ref->tf, y_ctb - ctb_size, 0);
-    }
-    if (x_ctb && y_ctb >= s->sps->height - ctb_size)
-        ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb);
+        ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size, ctb_size);
+    if (y_ctb && x_end)
+        ff_hevc_hls_filter(s, x_ctb, y_ctb - ctb_size, ctb_size);
+    if (x_ctb && y_end)
+        ff_hevc_hls_filter(s, x_ctb - ctb_size, y_ctb, ctb_size);
 }
diff --git a/libavcodec/hevc_mvs.c b/libavcodec/hevc_mvs.c
index 2fe4dbb..1d4c002 100644
--- a/libavcodec/hevc_mvs.c
+++ b/libavcodec/hevc_mvs.c
@@ -4,20 +4,20 @@
  * Copyright (C) 2012 - 2013 Guillaume Martres
  * Copyright (C) 2013 Anand Meher Kotra
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -41,7 +41,7 @@ static const uint8_t l0_l1_cand_idx[12][2] = {
 void ff_hevc_set_neighbour_available(HEVCContext *s, int x0, int y0,
                                      int nPbW, int nPbH)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     int x0b = x0 & ((1 << s->sps->log2_ctb_size) - 1);
     int y0b = y0 & ((1 << s->sps->log2_ctb_size) - 1);
 
@@ -52,8 +52,7 @@ void ff_hevc_set_neighbour_available(HEVCContext *s, int x0, int y0,
             ((x0b + nPbW) == (1 << s->sps->log2_ctb_size)) ?
                     lc->ctb_up_right_flag && !y0b : lc->na.cand_up;
     lc->na.cand_up_right =
-            ((x0b + nPbW) == (1 << s->sps->log2_ctb_size) ?
-                    lc->ctb_up_right_flag && !y0b : lc->na.cand_up )
+            lc->na.cand_up_right_sap
                      && (x0 + nPbW) < lc->end_of_tiles_x;
     lc->na.cand_bottom_left = ((y0 + nPbH) >= lc->end_of_tiles_y) ? 0 : lc->na.cand_left;
 }
@@ -61,56 +60,29 @@ void ff_hevc_set_neighbour_available(HEVCContext *s, int x0, int y0,
 /*
  * 6.4.1 Derivation process for z-scan order block availability
  */
-static int z_scan_block_avail(HEVCContext *s, int xCurr, int yCurr,
+static av_always_inline int z_scan_block_avail(HEVCContext *s, int xCurr, int yCurr,
                               int xN, int yN)
 {
 #define MIN_TB_ADDR_ZS(x, y)                                            \
-    s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)]
-    int Curr = MIN_TB_ADDR_ZS(xCurr >> s->sps->log2_min_tb_size,
-                              yCurr >> s->sps->log2_min_tb_size);
-    int N;
-
-    if (xN < 0 || yN < 0 ||
-        xN >= s->sps->width ||
-        yN >= s->sps->height)
-        return 0;
-
-    N = MIN_TB_ADDR_ZS(xN >> s->sps->log2_min_tb_size,
-                       yN >> s->sps->log2_min_tb_size);
-
-    return N <= Curr;
-}
-
-static int same_prediction_block(HEVCLocalContext *lc, int log2_cb_size,
-                                 int x0, int y0, int nPbW, int nPbH,
-                                 int xA1, int yA1, int partIdx)
-{
-    return !(nPbW << 1 == 1 << log2_cb_size &&
-             nPbH << 1 == 1 << log2_cb_size && partIdx == 1 &&
-             lc->cu.x + nPbW > xA1 &&
-             lc->cu.y + nPbH <= yA1);
-}
+    s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
 
-/*
- * 6.4.2 Derivation process for prediction block availability
- */
-static int check_prediction_block_available(HEVCContext *s, int log2_cb_size,
-                                            int x0, int y0, int nPbW, int nPbH,
-                                            int xA1, int yA1, int partIdx)
-{
-    HEVCLocalContext *lc = &s->HEVClc;
-
-    if (lc->cu.x < xA1 && lc->cu.y < yA1 &&
-        (lc->cu.x + (1 << log2_cb_size)) > xA1 &&
-        (lc->cu.y + (1 << log2_cb_size)) > yA1)
-        return same_prediction_block(lc, log2_cb_size, x0, y0,
-                                     nPbW, nPbH, xA1, yA1, partIdx);
-    else
-        return z_scan_block_avail(s, x0, y0, xA1, yA1);
+    int xCurr_ctb = xCurr >> s->sps->log2_ctb_size;
+    int yCurr_ctb = yCurr >> s->sps->log2_ctb_size;
+    int xN_ctb    = xN    >> s->sps->log2_ctb_size;
+    int yN_ctb    = yN    >> s->sps->log2_ctb_size;
+    if( yN_ctb < yCurr_ctb || xN_ctb < xCurr_ctb )
+        return 1;
+    else {
+        int Curr = MIN_TB_ADDR_ZS((xCurr >> s->sps->log2_min_tb_size) & s->sps->tb_mask,
+                (yCurr >> s->sps->log2_min_tb_size) & s->sps->tb_mask);
+        int N    = MIN_TB_ADDR_ZS((xN >> s->sps->log2_min_tb_size) & s->sps->tb_mask,
+                (yN >> s->sps->log2_min_tb_size) & s->sps->tb_mask);
+        return N <= Curr;
+    }
 }
 
 //check if the two luma locations belong to the same mostion estimation region
-static int isDiffMER(HEVCContext *s, int xN, int yN, int xP, int yP)
+static av_always_inline int is_diff_mer(HEVCContext *s, int xN, int yN, int xP, int yP)
 {
     uint8_t plevel = s->pps->log2_parallel_merge_level;
 
@@ -118,21 +90,24 @@ static int isDiffMER(HEVCContext *s, int xN, int yN, int xP, int yP)
            yN >> plevel == yP >> plevel;
 }
 
+#define MATCH_MV(x) (AV_RN32A(&A.x) == AV_RN32A(&B.x))
 #define MATCH(x) (A.x == B.x)
 
 // check if the mv's and refidx are the same between A and B
-static int compareMVrefidx(struct MvField A, struct MvField B)
+static av_always_inline int compare_mv_ref_idx(struct MvField A, struct MvField B)
 {
-    if (A.pred_flag[0] && A.pred_flag[1] && B.pred_flag[0] && B.pred_flag[1])
-        return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y) &&
-               MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y);
-
-    if (A.pred_flag[0] && !A.pred_flag[1] && B.pred_flag[0] && !B.pred_flag[1])
-        return MATCH(ref_idx[0]) && MATCH(mv[0].x) && MATCH(mv[0].y);
-
-    if (!A.pred_flag[0] && A.pred_flag[1] && !B.pred_flag[0] && B.pred_flag[1])
-        return MATCH(ref_idx[1]) && MATCH(mv[1].x) && MATCH(mv[1].y);
-
+    int a_pf = A.pred_flag;
+    int b_pf = B.pred_flag;
+    if (a_pf == b_pf) {
+        if (a_pf == PF_BI) {
+            return MATCH(ref_idx[0]) && MATCH_MV(mv[0]) &&
+                   MATCH(ref_idx[1]) && MATCH_MV(mv[1]);
+        } else if (a_pf == PF_L0) {
+            return MATCH(ref_idx[0]) && MATCH_MV(mv[0]);
+        } else if (a_pf == PF_L1) {
+            return MATCH(ref_idx[1]) && MATCH_MV(mv[1]);
+        }
+    }
     return 0;
 }
 
@@ -140,14 +115,14 @@ static av_always_inline void mv_scale(Mv *dst, Mv *src, int td, int tb)
 {
     int tx, scale_factor;
 
-    td = av_clip_int8_c(td);
-    tb = av_clip_int8_c(tb);
+    td = av_clip_int8(td);
+    tb = av_clip_int8(tb);
     tx = (0x4000 + abs(td / 2)) / td;
-    scale_factor = av_clip_c((tb * tx + 32) >> 6, -4096, 4095);
-    dst->x = av_clip_int16_c((scale_factor * src->x + 127 +
-                             (scale_factor * src->x < 0)) >> 8);
-    dst->y = av_clip_int16_c((scale_factor * src->y + 127 +
-                             (scale_factor * src->y < 0)) >> 8);
+    scale_factor = av_clip((tb * tx + 32) >> 6, -4096, 4095);
+    dst->x = av_clip_int16((scale_factor * src->x + 127 +
+                           (scale_factor * src->x < 0)) >> 8);
+    dst->y = av_clip_int16((scale_factor * src->y + 127 +
+                           (scale_factor * src->y < 0)) >> 8);
 }
 
 static int check_mvset(Mv *mvLXCol, Mv *mvCol,
@@ -168,10 +143,7 @@ static int check_mvset(Mv *mvLXCol, Mv *mvCol,
     col_poc_diff = colPic - refPicList_col[listCol].list[refidxCol];
     cur_poc_diff = poc    - refPicList[X].list[refIdxLx];
 
-    if (!col_poc_diff)
-        col_poc_diff = 1;  // error resilience
-
-    if (cur_lt || col_poc_diff == cur_poc_diff) {
+    if (cur_lt || col_poc_diff == cur_poc_diff || !col_poc_diff) {
         mvLXCol->x = mvCol->x;
         mvLXCol->y = mvCol->y;
     } else {
@@ -193,32 +165,30 @@ static int derive_temporal_colocated_mvs(HEVCContext *s, MvField temp_col,
 {
     RefPicList *refPicList = s->ref->refPicList;
 
-    if (temp_col.is_intra) {
-        mvLXCol->x = 0;
-        mvLXCol->y = 0;
+    if (temp_col.pred_flag == PF_INTRA)
         return 0;
-    }
 
-    if (temp_col.pred_flag[0] == 0)
+    if (!(temp_col.pred_flag & PF_L0))
         return CHECK_MVSET(1);
-    else if (temp_col.pred_flag[0] == 1 && temp_col.pred_flag[1] == 0)
+    else if (temp_col.pred_flag == PF_L0)
         return CHECK_MVSET(0);
-    else if (temp_col.pred_flag[0] == 1 && temp_col.pred_flag[1] == 1) {
+    else if (temp_col.pred_flag == PF_BI) {
         int check_diffpicount = 0;
-        int i = 0;
-        for (i = 0; i < refPicList[0].nb_refs; i++) {
-            if (refPicList[0].list[i] > s->poc)
-                check_diffpicount++;
-        }
-        for (i = 0; i < refPicList[1].nb_refs; i++) {
-            if (refPicList[1].list[i] > s->poc)
-                check_diffpicount++;
+        int i, j;
+        for (j = 0; j < 2; j++) {
+            for (i = 0; i < refPicList[j].nb_refs; i++) {
+                if (refPicList[j].list[i] > s->poc) {
+                    check_diffpicount++;
+                    break;
+                }
+            }
         }
-        if (check_diffpicount == 0 && X == 0)
-            return CHECK_MVSET(0);
-        else if (check_diffpicount == 0 && X == 1)
-            return CHECK_MVSET(1);
-        else {
+        if (!check_diffpicount) {
+            if (X==0)
+                return CHECK_MVSET(0);
+            else
+                return CHECK_MVSET(1);
+        } else {
             if (s->sh.collocated_list == L1)
                 return CHECK_MVSET(0);
             else
@@ -233,7 +203,8 @@ static int derive_temporal_colocated_mvs(HEVCContext *s, MvField temp_col,
     tab_mvf[(y) * min_pu_width + x]
 
 #define TAB_MVF_PU(v)                                                   \
-    TAB_MVF(x ## v ## _pu, y ## v ## _pu)
+    TAB_MVF(((x ## v) >> s->sps->log2_min_pu_size),                     \
+            ((y ## v) >> s->sps->log2_min_pu_size))
 
 #define DERIVE_TEMPORAL_COLOCATED_MVS                                   \
     derive_temporal_colocated_mvs(s, temp_col,                          \
@@ -266,13 +237,14 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0,
     x = x0 + nPbW;
     y = y0 + nPbH;
 
-    ff_thread_await_progress(&ref->tf, y, 0);
     if (tab_mvf &&
         (y0 >> s->sps->log2_ctb_size) == (y >> s->sps->log2_ctb_size) &&
         y < s->sps->height &&
         x < s->sps->width) {
-        x                  = ((x >> 4) << 4);
-        y                  = ((y >> 4) << 4);
+        x                 &= -16;
+        y                 &= -16;
+        if (s->threads_type == FF_THREAD_FRAME)
+            ff_thread_await_progress(&ref->tf, y, 0);
         x_pu               = x >> s->sps->log2_min_pu_size;
         y_pu               = y >> s->sps->log2_min_pu_size;
         temp_col           = TAB_MVF(x_pu, y_pu);
@@ -283,8 +255,10 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0,
     if (tab_mvf && !availableFlagLXCol) {
         x                  = x0 + (nPbW >> 1);
         y                  = y0 + (nPbH >> 1);
-        x                  = ((x >> 4) << 4);
-        y                  = ((y >> 4) << 4);
+        x                 &= -16;
+        y                 &= -16;
+        if (s->threads_type == FF_THREAD_FRAME)
+            ff_thread_await_progress(&ref->tf, y, 0);
         x_pu               = x >> s->sps->log2_min_pu_size;
         y_pu               = y >> s->sps->log2_min_pu_size;
         temp_col           = TAB_MVF(x_pu, y_pu);
@@ -294,15 +268,13 @@ static int temporal_luma_motion_vector(HEVCContext *s, int x0, int y0,
 }
 
 #define AVAILABLE(cand, v)                                      \
-    (cand && !TAB_MVF_PU(v).is_intra)
+    (cand && !(TAB_MVF_PU(v).pred_flag == PF_INTRA))
 
 #define PRED_BLOCK_AVAILABLE(v)                                 \
-    check_prediction_block_available(s, log2_cb_size,           \
-                                     x0, y0, nPbW, nPbH,        \
-                                     x ## v, y ## v, part_idx)
+    z_scan_block_avail(s, x0, y0, x ## v, y ## v)
 
 #define COMPARE_MV_REFIDX(a, b)                                 \
-    compareMVrefidx(TAB_MVF_PU(a), TAB_MVF_PU(b))
+    compare_mv_ref_idx(TAB_MVF_PU(a), TAB_MVF_PU(b))
 
 /*
  * 8.5.3.1.2  Derivation process for spatial merging candidates
@@ -311,9 +283,10 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
                                             int nPbW, int nPbH,
                                             int log2_cb_size,
                                             int singleMCLFlag, int part_idx,
+                                            int merge_idx,
                                             struct MvField mergecandlist[])
 {
-    HEVCLocalContext *lc   = &s->HEVClc;
+    HEVCLocalContext *lc   = s->HEVClc;
     RefPicList *refPicList = s->ref->refPicList;
     MvField *tab_mvf       = s->ref->tab_mvf;
 
@@ -327,33 +300,21 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
 
     const int xA1    = x0 - 1;
     const int yA1    = y0 + nPbH - 1;
-    const int xA1_pu = xA1 >> s->sps->log2_min_pu_size;
-    const int yA1_pu = yA1 >> s->sps->log2_min_pu_size;
 
     const int xB1    = x0 + nPbW - 1;
     const int yB1    = y0 - 1;
-    const int xB1_pu = xB1 >> s->sps->log2_min_pu_size;
-    const int yB1_pu = yB1 >> s->sps->log2_min_pu_size;
 
     const int xB0    = x0 + nPbW;
     const int yB0    = y0 - 1;
-    const int xB0_pu = xB0 >> s->sps->log2_min_pu_size;
-    const int yB0_pu = yB0 >> s->sps->log2_min_pu_size;
 
     const int xA0    = x0 - 1;
     const int yA0    = y0 + nPbH;
-    const int xA0_pu = xA0 >> s->sps->log2_min_pu_size;
-    const int yA0_pu = yA0 >> s->sps->log2_min_pu_size;
 
     const int xB2    = x0 - 1;
     const int yB2    = y0 - 1;
-    const int xB2_pu = xB2 >> s->sps->log2_min_pu_size;
-    const int yB2_pu = yB2 >> s->sps->log2_min_pu_size;
 
     const int nb_refs = (s->sh.slice_type == P_SLICE) ?
                         s->sh.nb_refs[0] : FFMIN(s->sh.nb_refs[0], s->sh.nb_refs[1]);
-    int check_MER   = 1;
-    int check_MER_1 = 1;
 
     int zero_idx = 0;
 
@@ -365,86 +326,77 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
     int is_available_b0;
     int is_available_b1;
     int is_available_b2;
-    int check_B0;
-    int check_A0;
 
-    //first left spatial merge candidate
-    is_available_a1 = AVAILABLE(cand_left, A1);
 
     if (!singleMCLFlag && part_idx == 1 &&
         (lc->cu.part_mode == PART_Nx2N ||
          lc->cu.part_mode == PART_nLx2N ||
          lc->cu.part_mode == PART_nRx2N) ||
-        isDiffMER(s, xA1, yA1, x0, y0)) {
+        is_diff_mer(s, xA1, yA1, x0, y0)) {
         is_available_a1 = 0;
+    } else {
+        is_available_a1 = AVAILABLE(cand_left, A1);
+        if (is_available_a1) {
+            mergecandlist[nb_merge_cand] = TAB_MVF_PU(A1);
+            if (merge_idx == 0) return;
+            nb_merge_cand++;
+        }
     }
 
-    if (is_available_a1)
-        mergecandlist[nb_merge_cand++] = TAB_MVF_PU(A1);
-
-    // above spatial merge candidate
-    is_available_b1 = AVAILABLE(cand_up, B1);
-
     if (!singleMCLFlag && part_idx == 1 &&
         (lc->cu.part_mode == PART_2NxN ||
          lc->cu.part_mode == PART_2NxnU ||
          lc->cu.part_mode == PART_2NxnD) ||
-        isDiffMER(s, xB1, yB1, x0, y0)) {
+        is_diff_mer(s, xB1, yB1, x0, y0)) {
         is_available_b1 = 0;
+    } else {
+        is_available_b1 = AVAILABLE(cand_up, B1);
+        if (is_available_b1 &&
+            !(is_available_a1 && COMPARE_MV_REFIDX(B1, A1))) {
+            mergecandlist[nb_merge_cand] = TAB_MVF_PU(B1);
+            if (merge_idx == nb_merge_cand) return;
+            nb_merge_cand++;
+        }
     }
 
-    if (is_available_a1 && is_available_b1)
-        check_MER = !COMPARE_MV_REFIDX(B1, A1);
-
-    if (is_available_b1 && check_MER)
-        mergecandlist[nb_merge_cand++] = TAB_MVF_PU(B1);
-
     // above right spatial merge candidate
-    check_MER = 1;
-    check_B0  = PRED_BLOCK_AVAILABLE(B0);
-
-    is_available_b0 = check_B0 && AVAILABLE(cand_up_right, B0);
-
-    if (isDiffMER(s, xB0, yB0, x0, y0))
-        is_available_b0 = 0;
-
-    if (is_available_b1 && is_available_b0)
-        check_MER = !COMPARE_MV_REFIDX(B0, B1);
-
-    if (is_available_b0 && check_MER)
-        mergecandlist[nb_merge_cand++] = TAB_MVF_PU(B0);
+    is_available_b0 = AVAILABLE(cand_up_right, B0) &&
+                      xB0 < s->sps->width &&
+                      PRED_BLOCK_AVAILABLE(B0) &&
+                      !is_diff_mer(s, xB0, yB0, x0, y0);
+
+    if (is_available_b0 &&
+        !(is_available_b1 && COMPARE_MV_REFIDX(B0, B1))) {
+        mergecandlist[nb_merge_cand] = TAB_MVF_PU(B0);
+        if (merge_idx == nb_merge_cand) return;
+        nb_merge_cand++;
+    }
 
     // left bottom spatial merge candidate
-    check_MER = 1;
-    check_A0  = PRED_BLOCK_AVAILABLE(A0);
-
-    is_available_a0 = check_A0 && AVAILABLE(cand_bottom_left, A0);
-
-    if (isDiffMER(s, xA0, yA0, x0, y0))
-        is_available_a0 = 0;
-
-    if (is_available_a1 && is_available_a0)
-        check_MER = !COMPARE_MV_REFIDX(A0, A1);
-
-    if (is_available_a0 && check_MER)
-        mergecandlist[nb_merge_cand++] = TAB_MVF_PU(A0);
+    is_available_a0 = AVAILABLE(cand_bottom_left, A0) &&
+                      yA0 < s->sps->height &&
+                      PRED_BLOCK_AVAILABLE(A0) &&
+                      !is_diff_mer(s, xA0, yA0, x0, y0);
+
+    if (is_available_a0 &&
+        !(is_available_a1 && COMPARE_MV_REFIDX(A0, A1))) {
+        mergecandlist[nb_merge_cand] = TAB_MVF_PU(A0);
+        if (merge_idx == nb_merge_cand) return;
+        nb_merge_cand++;
+    }
 
     // above left spatial merge candidate
-    check_MER = 1;
-
-    is_available_b2 = AVAILABLE(cand_up_left, B2);
-
-    if (isDiffMER(s, xB2, yB2, x0, y0))
-        is_available_b2 = 0;
-
-    if (is_available_a1 && is_available_b2)
-        check_MER = !COMPARE_MV_REFIDX(B2, A1);
-
-    if (is_available_b1 && is_available_b2)
-        check_MER_1 = !COMPARE_MV_REFIDX(B2, B1);
-
-    if (is_available_b2 && check_MER && check_MER_1 && nb_merge_cand != 4)
-        mergecandlist[nb_merge_cand++] = TAB_MVF_PU(B2);
+    is_available_b2 = AVAILABLE(cand_up_left, B2) &&
+                      !is_diff_mer(s, xB2, yB2, x0, y0);
+
+    if (is_available_b2 &&
+        !(is_available_a1 && COMPARE_MV_REFIDX(B2, A1)) &&
+        !(is_available_b1 && COMPARE_MV_REFIDX(B2, B1)) &&
+        nb_merge_cand != 4) {
+        mergecandlist[nb_merge_cand] = TAB_MVF_PU(B2);
+        if (merge_idx == nb_merge_cand) return;
+        nb_merge_cand++;
+    }
 
     // temporal motion vector candidate
     if (s->sh.slice_temporal_mvp_enabled_flag &&
@@ -457,9 +409,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
                                                        0, &mv_l1_col, 1) : 0;
 
         if (available_l0 || available_l1) {
-            mergecandlist[nb_merge_cand].is_intra     = 0;
-            mergecandlist[nb_merge_cand].pred_flag[0] = available_l0;
-            mergecandlist[nb_merge_cand].pred_flag[1] = available_l1;
+            mergecandlist[nb_merge_cand].pred_flag = available_l0 + (available_l1 << 1);
             if (available_l0) {
                 mergecandlist[nb_merge_cand].mv[0]      = mv_l0_col;
                 mergecandlist[nb_merge_cand].ref_idx[0] = 0;
@@ -468,6 +418,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
                 mergecandlist[nb_merge_cand].mv[1]      = mv_l1_col;
                 mergecandlist[nb_merge_cand].ref_idx[1] = 0;
             }
+            if (merge_idx == nb_merge_cand) return;
             nb_merge_cand++;
         }
     }
@@ -477,7 +428,7 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
     // combined bi-predictive merge candidates  (applies for B slices)
     if (s->sh.slice_type == B_SLICE && nb_orig_merge_cand > 1 &&
         nb_orig_merge_cand < s->sh.max_num_merge_cand) {
-        int comb_idx;
+        int comb_idx = 0;
 
         for (comb_idx = 0; nb_merge_cand < s->sh.max_num_merge_cand &&
                            comb_idx < nb_orig_merge_cand * (nb_orig_merge_cand - 1); comb_idx++) {
@@ -486,20 +437,16 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
             MvField l0_cand = mergecandlist[l0_cand_idx];
             MvField l1_cand = mergecandlist[l1_cand_idx];
 
-            if (l0_cand.pred_flag[0] && l1_cand.pred_flag[1] &&
+            if ((l0_cand.pred_flag & PF_L0) && (l1_cand.pred_flag & PF_L1) &&
                 (refPicList[0].list[l0_cand.ref_idx[0]] !=
                  refPicList[1].list[l1_cand.ref_idx[1]] ||
-                 l0_cand.mv[0].x != l1_cand.mv[1].x ||
-                 l0_cand.mv[0].y != l1_cand.mv[1].y)) {
+                 AV_RN32A(&l0_cand.mv[0]) != AV_RN32A(&l1_cand.mv[1]))) {
                 mergecandlist[nb_merge_cand].ref_idx[0]   = l0_cand.ref_idx[0];
                 mergecandlist[nb_merge_cand].ref_idx[1]   = l1_cand.ref_idx[1];
-                mergecandlist[nb_merge_cand].pred_flag[0] = 1;
-                mergecandlist[nb_merge_cand].pred_flag[1] = 1;
-                mergecandlist[nb_merge_cand].mv[0].x      = l0_cand.mv[0].x;
-                mergecandlist[nb_merge_cand].mv[0].y      = l0_cand.mv[0].y;
-                mergecandlist[nb_merge_cand].mv[1].x      = l1_cand.mv[1].x;
-                mergecandlist[nb_merge_cand].mv[1].y      = l1_cand.mv[1].y;
-                mergecandlist[nb_merge_cand].is_intra     = 0;
+                mergecandlist[nb_merge_cand].pred_flag    = PF_BI;
+                AV_COPY32(&mergecandlist[nb_merge_cand].mv[0], &l0_cand.mv[0]);
+                AV_COPY32(&mergecandlist[nb_merge_cand].mv[1], &l1_cand.mv[1]);
+                if (merge_idx == nb_merge_cand) return;
                 nb_merge_cand++;
             }
         }
@@ -507,16 +454,13 @@ static void derive_spatial_merge_candidates(HEVCContext *s, int x0, int y0,
 
     // append Zero motion vector candidates
     while (nb_merge_cand < s->sh.max_num_merge_cand) {
-        mergecandlist[nb_merge_cand].pred_flag[0] = 1;
-        mergecandlist[nb_merge_cand].pred_flag[1] = s->sh.slice_type == B_SLICE;
-        mergecandlist[nb_merge_cand].mv[0].x      = 0;
-        mergecandlist[nb_merge_cand].mv[0].y      = 0;
-        mergecandlist[nb_merge_cand].mv[1].x      = 0;
-        mergecandlist[nb_merge_cand].mv[1].y      = 0;
-        mergecandlist[nb_merge_cand].is_intra     = 0;
+        mergecandlist[nb_merge_cand].pred_flag    = PF_L0 + ((s->sh.slice_type == B_SLICE) << 1);
+        AV_ZERO32(mergecandlist[nb_merge_cand].mv+0);
+        AV_ZERO32(mergecandlist[nb_merge_cand].mv+1);
         mergecandlist[nb_merge_cand].ref_idx[0]   = zero_idx < nb_refs ? zero_idx : 0;
         mergecandlist[nb_merge_cand].ref_idx[1]   = zero_idx < nb_refs ? zero_idx : 0;
 
+        if (merge_idx == nb_merge_cand) return;
         nb_merge_cand++;
         zero_idx++;
     }
@@ -531,10 +475,10 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW,
 {
     int singleMCLFlag = 0;
     int nCS = 1 << log2_cb_size;
-    struct MvField mergecand_list[MRG_MAX_NUM_CANDS] = { { { { 0 } } } };
+    LOCAL_ALIGNED(4, MvField, mergecand_list, [MRG_MAX_NUM_CANDS]);
     int nPbW2 = nPbW;
     int nPbH2 = nPbH;
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
 
     if (s->pps->log2_parallel_merge_level > 2 && nCS == 8) {
         singleMCLFlag = 1;
@@ -547,13 +491,12 @@ void ff_hevc_luma_mv_merge_mode(HEVCContext *s, int x0, int y0, int nPbW,
 
     ff_hevc_set_neighbour_available(s, x0, y0, nPbW, nPbH);
     derive_spatial_merge_candidates(s, x0, y0, nPbW, nPbH, log2_cb_size,
-                                    singleMCLFlag, part_idx, mergecand_list);
+                                    singleMCLFlag, part_idx,
+                                    merge_idx, mergecand_list);
 
-    if (mergecand_list[merge_idx].pred_flag[0] == 1 &&
-        mergecand_list[merge_idx].pred_flag[1] == 1 &&
+    if (mergecand_list[merge_idx].pred_flag == PF_BI &&
         (nPbW2 + nPbH2) == 12) {
-        mergecand_list[merge_idx].ref_idx[1]   = -1;
-        mergecand_list[merge_idx].pred_flag[1] = 0;
+        mergecand_list[merge_idx].pred_flag = PF_L0;
     }
 
     *mv = mergecand_list[merge_idx];
@@ -584,7 +527,7 @@ static int mv_mp_mode_mx(HEVCContext *s, int x, int y, int pred_flag_index,
 
     RefPicList *refPicList = s->ref->refPicList;
 
-    if (TAB_MVF(x, y).pred_flag[pred_flag_index] == 1 &&
+    if (((TAB_MVF(x, y).pred_flag) & (1 << pred_flag_index)) &&
         refPicList[pred_flag_index].list[TAB_MVF(x, y).ref_idx[pred_flag_index]] == refPicList[ref_idx_curr].list[ref_idx]) {
         *mv = TAB_MVF(x, y).mv[pred_flag_index];
         return 1;
@@ -599,82 +542,73 @@ static int mv_mp_mode_mx_lt(HEVCContext *s, int x, int y, int pred_flag_index,
     int min_pu_width = s->sps->min_pu_width;
 
     RefPicList *refPicList = s->ref->refPicList;
-    int currIsLongTerm     = refPicList[ref_idx_curr].isLongTerm[ref_idx];
 
-    int colIsLongTerm =
-        refPicList[pred_flag_index].isLongTerm[(TAB_MVF(x, y).ref_idx[pred_flag_index])];
+    if ((TAB_MVF(x, y).pred_flag) & (1 << pred_flag_index)) {
+        int currIsLongTerm     = refPicList[ref_idx_curr].isLongTerm[ref_idx];
 
-    if (TAB_MVF(x, y).pred_flag[pred_flag_index] &&
-        colIsLongTerm == currIsLongTerm) {
-        *mv = TAB_MVF(x, y).mv[pred_flag_index];
-        if (!currIsLongTerm)
-            dist_scale(s, mv, min_pu_width, x, y,
-                       pred_flag_index, ref_idx_curr, ref_idx);
-        return 1;
+        int colIsLongTerm =
+            refPicList[pred_flag_index].isLongTerm[(TAB_MVF(x, y).ref_idx[pred_flag_index])];
+
+        if (colIsLongTerm == currIsLongTerm) {
+            *mv = TAB_MVF(x, y).mv[pred_flag_index];
+            if (!currIsLongTerm)
+                dist_scale(s, mv, min_pu_width, x, y,
+                           pred_flag_index, ref_idx_curr, ref_idx);
+            return 1;
+        }
     }
     return 0;
 }
 
 #define MP_MX(v, pred, mx)                                      \
-    mv_mp_mode_mx(s, x ## v ## _pu, y ## v ## _pu, pred,        \
-                  &mx, ref_idx_curr, ref_idx)
+    mv_mp_mode_mx(s,                                            \
+                  (x ## v) >> s->sps->log2_min_pu_size,         \
+                  (y ## v) >> s->sps->log2_min_pu_size,         \
+                  pred, &mx, ref_idx_curr, ref_idx)
 
 #define MP_MX_LT(v, pred, mx)                                   \
-    mv_mp_mode_mx_lt(s, x ## v ## _pu, y ## v ## _pu, pred,     \
-                     &mx, ref_idx_curr, ref_idx)
+    mv_mp_mode_mx_lt(s,                                         \
+                     (x ## v) >> s->sps->log2_min_pu_size,      \
+                     (y ## v) >> s->sps->log2_min_pu_size,      \
+                     pred, &mx, ref_idx_curr, ref_idx)
 
 void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
                               int nPbH, int log2_cb_size, int part_idx,
                               int merge_idx, MvField *mv,
                               int mvp_lx_flag, int LX)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     MvField *tab_mvf = s->ref->tab_mvf;
     int isScaledFlag_L0 = 0;
-    int availableFlagLXA0 = 0;
-    int availableFlagLXB0 = 0;
+    int availableFlagLXA0 = 1;
+    int availableFlagLXB0 = 1;
     int numMVPCandLX = 0;
     int min_pu_width = s->sps->min_pu_width;
 
     int xA0, yA0;
-    int xA0_pu, yA0_pu;
     int is_available_a0;
-
     int xA1, yA1;
-    int xA1_pu, yA1_pu;
     int is_available_a1;
-
     int xB0, yB0;
-    int xB0_pu, yB0_pu;
     int is_available_b0;
-
     int xB1, yB1;
-    int xB1_pu = 0, yB1_pu = 0;
-    int is_available_b1 = 0;
-
+    int is_available_b1;
     int xB2, yB2;
-    int xB2_pu = 0, yB2_pu = 0;
-    int is_available_b2 = 0;
+    int is_available_b2;
+
     Mv mvpcand_list[2] = { { 0 } };
-    Mv mxA = { 0 };
-    Mv mxB = { 0 };
+    Mv mxA;
+    Mv mxB;
     int ref_idx_curr = 0;
     int ref_idx = 0;
     int pred_flag_index_l0;
     int pred_flag_index_l1;
-    int x0b = x0 & ((1 << s->sps->log2_ctb_size) - 1);
-    int y0b = y0 & ((1 << s->sps->log2_ctb_size) - 1);
-
-    int cand_up = (lc->ctb_up_flag || y0b);
-    int cand_left = (lc->ctb_left_flag || x0b);
-    int cand_up_left =
-            (!x0b && !y0b) ? lc->ctb_up_left_flag : cand_left && cand_up;
-    int cand_up_right =
-            (x0b + nPbW == (1 << s->sps->log2_ctb_size) ||
-             x0  + nPbW >= lc->end_of_tiles_x) ? lc->ctb_up_right_flag && !y0b
-                                               : cand_up;
-    int cand_bottom_left = (y0 + nPbH >= lc->end_of_tiles_y) ? 0 : cand_left;
 
+    const int cand_bottom_left = lc->na.cand_bottom_left;
+    const int cand_left        = lc->na.cand_left;
+    const int cand_up_left     = lc->na.cand_up_left;
+    const int cand_up          = lc->na.cand_up;
+    const int cand_up_right    = lc->na.cand_up_right_sap;
     ref_idx_curr       = LX;
     ref_idx            = mv->ref_idx[LX];
     pred_flag_index_l0 = LX;
@@ -683,92 +617,107 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
     // left bottom spatial candidate
     xA0 = x0 - 1;
     yA0 = y0 + nPbH;
-    xA0_pu = xA0 >> s->sps->log2_min_pu_size;
-    yA0_pu = yA0 >> s->sps->log2_min_pu_size;
 
-    is_available_a0 = PRED_BLOCK_AVAILABLE(A0) && AVAILABLE(cand_bottom_left, A0);
+    is_available_a0 = AVAILABLE(cand_bottom_left, A0) &&
+                      yA0 < s->sps->height &&
+                      PRED_BLOCK_AVAILABLE(A0);
 
     //left spatial merge candidate
     xA1    = x0 - 1;
     yA1    = y0 + nPbH - 1;
-    xA1_pu = xA1 >> s->sps->log2_min_pu_size;
-    yA1_pu = yA1 >> s->sps->log2_min_pu_size;
 
     is_available_a1 = AVAILABLE(cand_left, A1);
     if (is_available_a0 || is_available_a1)
         isScaledFlag_L0 = 1;
 
     if (is_available_a0) {
-        availableFlagLXA0 = MP_MX(A0, pred_flag_index_l0, mxA);
-        if (!availableFlagLXA0)
-            availableFlagLXA0 = MP_MX(A0, pred_flag_index_l1, mxA);
+        if (MP_MX(A0, pred_flag_index_l0, mxA)) {
+            goto b_candidates;
+        }
+        if (MP_MX(A0, pred_flag_index_l1, mxA)) {
+            goto b_candidates;
+        }
     }
 
-    if (is_available_a1 && !availableFlagLXA0) {
-        availableFlagLXA0 = MP_MX(A1, pred_flag_index_l0, mxA);
-        if (!availableFlagLXA0)
-            availableFlagLXA0 = MP_MX(A1, pred_flag_index_l1, mxA);
+    if (is_available_a1) {
+        if (MP_MX(A1, pred_flag_index_l0, mxA)) {
+            goto b_candidates;
+        }
+        if (MP_MX(A1, pred_flag_index_l1, mxA)) {
+            goto b_candidates;
+        }
     }
 
-    if (is_available_a0 && !availableFlagLXA0) {
-        availableFlagLXA0 = MP_MX_LT(A0, pred_flag_index_l0, mxA);
-        if (!availableFlagLXA0)
-            availableFlagLXA0 = MP_MX_LT(A0, pred_flag_index_l1, mxA);
+    if (is_available_a0) {
+        if (MP_MX_LT(A0, pred_flag_index_l0, mxA)) {
+            goto b_candidates;
+        }
+        if (MP_MX_LT(A0, pred_flag_index_l1, mxA)) {
+            goto b_candidates;
+        }
     }
 
-    if (is_available_a1 && !availableFlagLXA0) {
-        availableFlagLXA0 = MP_MX_LT(A1, pred_flag_index_l0, mxA);
-        if (!availableFlagLXA0)
-            availableFlagLXA0 = MP_MX_LT(A1, pred_flag_index_l1, mxA);
+    if (is_available_a1) {
+        if (MP_MX_LT(A1, pred_flag_index_l0, mxA)) {
+            goto b_candidates;
+        }
+        if (MP_MX_LT(A1, pred_flag_index_l1, mxA)) {
+            goto b_candidates;
+        }
     }
+    availableFlagLXA0 = 0;
 
+b_candidates:
     // B candidates
     // above right spatial merge candidate
     xB0    = x0 + nPbW;
     yB0    = y0 - 1;
-    xB0_pu = xB0 >> s->sps->log2_min_pu_size;
-    yB0_pu = yB0 >> s->sps->log2_min_pu_size;
 
-    is_available_b0 = PRED_BLOCK_AVAILABLE(B0) && AVAILABLE(cand_up_right, B0);
+    is_available_b0 =  AVAILABLE(cand_up_right, B0) &&
+                       xB0 < s->sps->width &&
+                       PRED_BLOCK_AVAILABLE(B0);
 
     if (is_available_b0) {
-        availableFlagLXB0 = MP_MX(B0, pred_flag_index_l0, mxB);
-        if (!availableFlagLXB0)
-            availableFlagLXB0 = MP_MX(B0, pred_flag_index_l1, mxB);
+        if (MP_MX(B0, pred_flag_index_l0, mxB)) {
+            goto scalef;
+        }
+        if (MP_MX(B0, pred_flag_index_l1, mxB)) {
+            goto scalef;
+        }
     }
 
-    if (!availableFlagLXB0) {
-        // above spatial merge candidate
-        xB1    = x0 + nPbW - 1;
-        yB1    = y0 - 1;
-        xB1_pu = xB1 >> s->sps->log2_min_pu_size;
-        yB1_pu = yB1 >> s->sps->log2_min_pu_size;
+    // above spatial merge candidate
+    xB1    = x0 + nPbW - 1;
+    yB1    = y0 - 1;
 
-        is_available_b1 = AVAILABLE(cand_up, B1);
+    is_available_b1 = AVAILABLE(cand_up, B1);
 
-        if (is_available_b1) {
-            availableFlagLXB0 = MP_MX(B1, pred_flag_index_l0, mxB);
-            if (!availableFlagLXB0)
-                availableFlagLXB0 = MP_MX(B1, pred_flag_index_l1, mxB);
+    if (is_available_b1) {
+        if (MP_MX(B1, pred_flag_index_l0, mxB)) {
+            goto scalef;
+        }
+        if (MP_MX(B1, pred_flag_index_l1, mxB)) {
+            goto scalef;
         }
     }
 
-    if (!availableFlagLXB0) {
-        // above left spatial merge candidate
-        xB2 = x0 - 1;
-        yB2 = y0 - 1;
-        xB2_pu = xB2 >> s->sps->log2_min_pu_size;
-        yB2_pu = yB2 >> s->sps->log2_min_pu_size;
-        is_available_b2 = AVAILABLE(cand_up_left, B2);
+    // above left spatial merge candidate
+    xB2 = x0 - 1;
+    yB2 = y0 - 1;
+    is_available_b2 = AVAILABLE(cand_up_left, B2);
 
-        if (is_available_b2) {
-            availableFlagLXB0 = MP_MX(B2, pred_flag_index_l0, mxB);
-            if (!availableFlagLXB0)
-                availableFlagLXB0 = MP_MX(B2, pred_flag_index_l1, mxB);
+    if (is_available_b2) {
+        if (MP_MX(B2, pred_flag_index_l0, mxB)) {
+            goto scalef;
+        }
+        if (MP_MX(B2, pred_flag_index_l1, mxB)) {
+            goto scalef;
         }
     }
+    availableFlagLXB0 = 0;
 
-    if (isScaledFlag_L0 == 0) {
+scalef:
+    if (!isScaledFlag_L0) {
         if (availableFlagLXB0) {
             availableFlagLXA0 = 1;
             mxA = mxB;
@@ -802,7 +751,8 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
         mvpcand_list[numMVPCandLX++] = mxB;
 
     //temporal motion vector prediction candidate
-    if (numMVPCandLX < 2 && s->sh.slice_temporal_mvp_enabled_flag) {
+    if (numMVPCandLX < 2 && s->sh.slice_temporal_mvp_enabled_flag &&
+        mvp_lx_flag == numMVPCandLX) {
         Mv mv_col;
         int available_col = temporal_luma_motion_vector(s, x0, y0, nPbW,
                                                         nPbH, ref_idx,
@@ -811,10 +761,5 @@ void ff_hevc_luma_mv_mvp_mode(HEVCContext *s, int x0, int y0, int nPbW,
             mvpcand_list[numMVPCandLX++] = mv_col;
     }
 
-    // insert zero motion vectors when the number of available candidates are less than 2
-    while (numMVPCandLX < 2)
-        mvpcand_list[numMVPCandLX++] = (Mv){ 0, 0 };
-
-    mv->mv[LX].x = mvpcand_list[mvp_lx_flag].x;
-    mv->mv[LX].y = mvpcand_list[mvp_lx_flag].y;
+    mv->mv[LX] = mvpcand_list[mvp_lx_flag];
 }
diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c
index ac2c6f5..834b93b 100644
--- a/libavcodec/hevc_parser.c
+++ b/libavcodec/hevc_parser.c
@@ -3,20 +3,20 @@
  *
  * Copyright (C) 2012 - 2013 Guillaume Martres
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,9 +24,15 @@
 
 #include "parser.h"
 #include "hevc.h"
+#include "golomb.h"
 
 #define START_CODE 0x000001 ///< start_code_prefix_one_3bytes
 
+typedef struct HEVCParseContext {
+    HEVCContext  h;
+    ParseContext pc;
+} HEVCParseContext;
+
 /**
  * Find the end of the current frame in the bitstream.
  * @return the position of the first byte of the next frame, or END_NOT_FOUND
@@ -35,7 +41,7 @@ static int hevc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
                                int buf_size)
 {
     int i;
-    ParseContext *pc = s->priv_data;
+    ParseContext *pc = &((HEVCParseContext *)s->priv_data)->pc;
 
     for (i = 0; i < buf_size; i++) {
         int nut;
@@ -59,7 +65,6 @@ static int hevc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
             if (first_slice_segment_in_pic_flag) {
                 if (!pc->frame_start_found) {
                     pc->frame_start_found = 1;
-                    s->key_frame = nut >= NAL_BLA_W_LP && nut <= NAL_CRA_NUT;
                 } else { // First slice of next frame found
                     pc->frame_start_found = 0;
                     return i - 5;
@@ -71,12 +76,194 @@ static int hevc_find_frame_end(AVCodecParserContext *s, const uint8_t *buf,
     return END_NOT_FOUND;
 }
 
-static int hevc_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+/**
+ * Parse NAL units of found picture and decode some basic information.
+ *
+ * @param s parser context.
+ * @param avctx codec context.
+ * @param buf buffer with field/frame data.
+ * @param buf_size size of the buffer.
+ */
+static inline int parse_nal_units(AVCodecParserContext *s, AVCodecContext *avctx,
+                      const uint8_t *buf, int buf_size)
+{
+    HEVCContext   *h  = &((HEVCParseContext *)s->priv_data)->h;
+    GetBitContext *gb = &h->HEVClc->gb;
+    SliceHeader   *sh = &h->sh;
+    const uint8_t *buf_end = buf + buf_size;
+    int state = -1, i;
+    HEVCNAL *nal;
+
+    /* set some sane default values */
+    s->pict_type         = AV_PICTURE_TYPE_I;
+    s->key_frame         = 0;
+    s->picture_structure = AV_PICTURE_STRUCTURE_UNKNOWN;
+
+    h->avctx = avctx;
+
+    if (!buf_size)
+        return 0;
+
+    if (h->nals_allocated < 1) {
+        HEVCNAL *tmp = av_realloc_array(h->nals, 1, sizeof(*tmp));
+        if (!tmp)
+            return AVERROR(ENOMEM);
+        h->nals = tmp;
+        memset(h->nals, 0, sizeof(*tmp));
+        h->nals_allocated = 1;
+    }
+
+    nal = &h->nals[0];
+
+    for (;;) {
+        int src_length, consumed;
+        buf = avpriv_find_start_code(buf, buf_end, &state);
+        if (--buf + 2 >= buf_end)
+            break;
+        src_length = buf_end - buf;
+
+        h->nal_unit_type = (*buf >> 1) & 0x3f;
+        h->temporal_id   = (*(buf + 1) & 0x07) - 1;
+        if (h->nal_unit_type <= NAL_CRA_NUT) {
+            // Do not walk the whole buffer just to decode slice segment header
+            if (src_length > 20)
+                src_length = 20;
+        }
+
+        consumed = ff_hevc_extract_rbsp(h, buf, src_length, nal);
+        if (consumed < 0)
+            return consumed;
+
+        init_get_bits8(gb, nal->data + 2, nal->size);
+        switch (h->nal_unit_type) {
+        case NAL_VPS:
+            ff_hevc_decode_nal_vps(h);
+            break;
+        case NAL_SPS:
+            ff_hevc_decode_nal_sps(h);
+            break;
+        case NAL_PPS:
+            ff_hevc_decode_nal_pps(h);
+            break;
+        case NAL_SEI_PREFIX:
+        case NAL_SEI_SUFFIX:
+            ff_hevc_decode_nal_sei(h);
+            break;
+        case NAL_TRAIL_N:
+        case NAL_TRAIL_R:
+        case NAL_TSA_N:
+        case NAL_TSA_R:
+        case NAL_STSA_N:
+        case NAL_STSA_R:
+        case NAL_RADL_N:
+        case NAL_RADL_R:
+        case NAL_RASL_N:
+        case NAL_RASL_R:
+        case NAL_BLA_W_LP:
+        case NAL_BLA_W_RADL:
+        case NAL_BLA_N_LP:
+        case NAL_IDR_W_RADL:
+        case NAL_IDR_N_LP:
+        case NAL_CRA_NUT:
+            sh->first_slice_in_pic_flag = get_bits1(gb);
+            s->picture_structure = h->picture_struct;
+            s->field_order = h->picture_struct;
+
+            if (IS_IRAP(h)) {
+                s->key_frame = 1;
+                sh->no_output_of_prior_pics_flag = get_bits1(gb);
+            }
+
+            sh->pps_id = get_ue_golomb(gb);
+            if (sh->pps_id >= MAX_PPS_COUNT || !h->pps_list[sh->pps_id]) {
+                av_log(h->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
+                return AVERROR_INVALIDDATA;
+            }
+            h->pps = (HEVCPPS*)h->pps_list[sh->pps_id]->data;
+
+            if (h->pps->sps_id >= MAX_SPS_COUNT || !h->sps_list[h->pps->sps_id]) {
+                av_log(h->avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", h->pps->sps_id);
+                return AVERROR_INVALIDDATA;
+            }
+            if (h->sps != (HEVCSPS*)h->sps_list[h->pps->sps_id]->data) {
+                h->sps = (HEVCSPS*)h->sps_list[h->pps->sps_id]->data;
+                h->vps = (HEVCVPS*)h->vps_list[h->sps->vps_id]->data;
+            }
+
+            if (!sh->first_slice_in_pic_flag) {
+                int slice_address_length;
+
+                if (h->pps->dependent_slice_segments_enabled_flag)
+                    sh->dependent_slice_segment_flag = get_bits1(gb);
+                else
+                    sh->dependent_slice_segment_flag = 0;
+
+                slice_address_length = av_ceil_log2_c(h->sps->ctb_width *
+                                                      h->sps->ctb_height);
+                sh->slice_segment_addr = get_bits(gb, slice_address_length);
+                if (sh->slice_segment_addr >= h->sps->ctb_width * h->sps->ctb_height) {
+                    av_log(h->avctx, AV_LOG_ERROR, "Invalid slice segment address: %u.\n",
+                           sh->slice_segment_addr);
+                    return AVERROR_INVALIDDATA;
+                }
+            } else
+                sh->dependent_slice_segment_flag = 0;
+
+            if (sh->dependent_slice_segment_flag)
+                break;
+
+            for (i = 0; i < h->pps->num_extra_slice_header_bits; i++)
+                skip_bits(gb, 1); // slice_reserved_undetermined_flag[]
+
+            sh->slice_type = get_ue_golomb(gb);
+            if (!(sh->slice_type == I_SLICE || sh->slice_type == P_SLICE ||
+                  sh->slice_type == B_SLICE)) {
+                av_log(h->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
+                       sh->slice_type);
+                return AVERROR_INVALIDDATA;
+            }
+            s->pict_type = sh->slice_type == B_SLICE ? AV_PICTURE_TYPE_B :
+                           sh->slice_type == P_SLICE ? AV_PICTURE_TYPE_P :
+                                                       AV_PICTURE_TYPE_I;
+
+            if (h->pps->output_flag_present_flag)
+                sh->pic_output_flag = get_bits1(gb);
+
+            if (h->sps->separate_colour_plane_flag)
+                sh->colour_plane_id = get_bits(gb, 2);
+
+            if (!IS_IDR(h)) {
+                sh->pic_order_cnt_lsb = get_bits(gb, h->sps->log2_max_poc_lsb);
+                s->output_picture_number = h->poc = ff_hevc_compute_poc(h, sh->pic_order_cnt_lsb);
+            } else
+                s->output_picture_number = h->poc = 0;
+
+            if (h->temporal_id == 0 &&
+                h->nal_unit_type != NAL_TRAIL_N &&
+                h->nal_unit_type != NAL_TSA_N &&
+                h->nal_unit_type != NAL_STSA_N &&
+                h->nal_unit_type != NAL_RADL_N &&
+                h->nal_unit_type != NAL_RASL_N &&
+                h->nal_unit_type != NAL_RADL_R &&
+                h->nal_unit_type != NAL_RASL_R)
+                h->pocTid0 = h->poc;
+
+            return 0; /* no need to evaluate the rest */
+        }
+        buf += consumed;
+    }
+    /* didn't find a picture! */
+    av_log(h->avctx, AV_LOG_ERROR, "missing picture in access unit\n");
+    return -1;
+}
+
+static int hevc_parse(AVCodecParserContext *s,
+                      AVCodecContext *avctx,
                       const uint8_t **poutbuf, int *poutbuf_size,
                       const uint8_t *buf, int buf_size)
 {
     int next;
-    ParseContext *pc = s->priv_data;
+    ParseContext *pc = &((HEVCParseContext *)s->priv_data)->pc;
 
     if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
         next = buf_size;
@@ -89,6 +276,8 @@ static int hevc_parse(AVCodecParserContext *s, AVCodecContext *avctx,
         }
     }
 
+    parse_nal_units(s, avctx, buf, buf_size);
+
     *poutbuf      = buf;
     *poutbuf_size = buf_size;
     return next;
@@ -116,10 +305,46 @@ static int hevc_split(AVCodecContext *avctx, const uint8_t *buf, int buf_size)
     return 0;
 }
 
+static int hevc_init(AVCodecParserContext *s)
+{
+    HEVCContext  *h  = &((HEVCParseContext *)s->priv_data)->h;
+    h->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
+    h->skipped_bytes_pos_size = INT_MAX;
+
+    return 0;
+}
+
+static void hevc_close(AVCodecParserContext *s)
+{
+    int i;
+    HEVCContext  *h  = &((HEVCParseContext *)s->priv_data)->h;
+    ParseContext *pc = &((HEVCParseContext *)s->priv_data)->pc;
+
+    av_freep(&h->skipped_bytes_pos);
+    av_freep(&h->HEVClc);
+    av_freep(&pc->buffer);
+
+    for (i = 0; i < FF_ARRAY_ELEMS(h->vps_list); i++)
+        av_buffer_unref(&h->vps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(h->sps_list); i++)
+        av_buffer_unref(&h->sps_list[i]);
+    for (i = 0; i < FF_ARRAY_ELEMS(h->pps_list); i++)
+        av_buffer_unref(&h->pps_list[i]);
+
+    av_buffer_unref(&h->current_sps);
+    h->sps = NULL;
+
+    for (i = 0; i < h->nals_allocated; i++)
+        av_freep(&h->nals[i].rbsp_buffer);
+    av_freep(&h->nals);
+    h->nals_allocated = 0;
+}
+
 AVCodecParser ff_hevc_parser = {
     .codec_ids      = { AV_CODEC_ID_HEVC },
-    .priv_data_size = sizeof(ParseContext),
+    .priv_data_size = sizeof(HEVCParseContext),
+    .parser_init    = hevc_init,
     .parser_parse   = hevc_parse,
-    .parser_close   = ff_parse_close,
+    .parser_close   = hevc_close,
     .split          = hevc_split,
 };
diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index bc18990..f45dd8c 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -6,25 +6,24 @@
  * Copyright (C) 2012 - 2013 Gildas Cocherel
  * Copyright (C) 2013 Vittorio Giovara
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/imgutils.h"
-
 #include "golomb.h"
 #include "hevc.h"
 
@@ -73,7 +72,7 @@ static const AVRational vui_sar[] = {
 int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
                                   const HEVCSPS *sps, int is_slice_header)
 {
-    HEVCLocalContext *lc = &s->HEVClc;
+    HEVCLocalContext *lc = s->HEVClc;
     uint8_t rps_predict = 0;
     int delta_poc;
     int k0 = 0;
@@ -88,7 +87,8 @@ int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
 
     if (rps_predict) {
         const ShortTermRPS *rps_ridx;
-        int delta_rps, abs_delta_rps;
+        int delta_rps;
+        unsigned abs_delta_rps;
         uint8_t use_delta_flag = 0;
         uint8_t delta_rps_sign;
 
@@ -106,6 +106,12 @@ int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
 
         delta_rps_sign = get_bits1(gb);
         abs_delta_rps  = get_ue_golomb_long(gb) + 1;
+        if (abs_delta_rps < 1 || abs_delta_rps > 32768) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Invalid value of abs_delta_rps: %d\n",
+                   abs_delta_rps);
+            return AVERROR_INVALIDDATA;
+        }
         delta_rps      = (1 - (delta_rps_sign << 1)) * abs_delta_rps;
         for (i = 0; i <= rps_ridx->num_delta_pocs; i++) {
             int used = rps->used[k] = get_bits1(gb);
@@ -196,7 +202,8 @@ int ff_hevc_decode_short_term_rps(HEVCContext *s, ShortTermRPS *rps,
 static void decode_profile_tier_level(HEVCContext *s, PTLCommon *ptl)
 {
     int i;
-    GetBitContext *gb = &s->HEVClc.gb;
+    HEVCLocalContext *lc = s->HEVClc;
+    GetBitContext *gb = &lc->gb;
 
     ptl->profile_space = get_bits(gb, 2);
     ptl->tier_flag     = get_bits1(gb);
@@ -207,6 +214,8 @@ static void decode_profile_tier_level(HEVCContext *s, PTLCommon *ptl)
         av_log(s->avctx, AV_LOG_DEBUG, "Main 10 profile bitstream\n");
     else if (ptl->profile_idc == FF_PROFILE_HEVC_MAIN_STILL_PICTURE)
         av_log(s->avctx, AV_LOG_DEBUG, "Main Still Picture profile bitstream\n");
+    else if (ptl->profile_idc == FF_PROFILE_HEVC_REXT)
+        av_log(s->avctx, AV_LOG_DEBUG, "Range Extension profile bitstream\n");
     else
         av_log(s->avctx, AV_LOG_WARNING, "Unknown HEVC profile: %d\n", ptl->profile_idc);
 
@@ -225,7 +234,8 @@ static void decode_profile_tier_level(HEVCContext *s, PTLCommon *ptl)
 static void parse_ptl(HEVCContext *s, PTL *ptl, int max_num_sub_layers)
 {
     int i;
-    GetBitContext *gb = &s->HEVClc.gb;
+    HEVCLocalContext *lc = s->HEVClc;
+    GetBitContext *gb = &lc->gb;
     decode_profile_tier_level(s, &ptl->general_ptl);
     ptl->general_ptl.level_idc = get_bits(gb, 8);
 
@@ -233,7 +243,7 @@ static void parse_ptl(HEVCContext *s, PTL *ptl, int max_num_sub_layers)
         ptl->sub_layer_profile_present_flag[i] = get_bits1(gb);
         ptl->sub_layer_level_present_flag[i]   = get_bits1(gb);
     }
-    if (max_num_sub_layers - 1 > 0)
+    if (max_num_sub_layers - 1> 0)
         for (i = max_num_sub_layers - 1; i < 8; i++)
             skip_bits(gb, 2); // reserved_zero_2bits[i]
     for (i = 0; i < max_num_sub_layers - 1; i++) {
@@ -247,7 +257,7 @@ static void parse_ptl(HEVCContext *s, PTL *ptl, int max_num_sub_layers)
 static void decode_sublayer_hrd(HEVCContext *s, unsigned int nb_cpb,
                                 int subpic_params_present)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     int i;
 
     for (i = 0; i < nb_cpb; i++) {
@@ -262,10 +272,10 @@ static void decode_sublayer_hrd(HEVCContext *s, unsigned int nb_cpb,
     }
 }
 
-static void decode_hrd(HEVCContext *s, int common_inf_present,
+static int decode_hrd(HEVCContext *s, int common_inf_present,
                        int max_sublayers)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     int nal_params_present = 0, vcl_params_present = 0;
     int subpic_params_present = 0;
     int i;
@@ -309,20 +319,26 @@ static void decode_hrd(HEVCContext *s, int common_inf_present,
         else
             low_delay = get_bits1(gb);
 
-        if (!low_delay)
+        if (!low_delay) {
             nb_cpb = get_ue_golomb_long(gb) + 1;
+            if (nb_cpb < 1 || nb_cpb > 32) {
+                av_log(s->avctx, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb);
+                return AVERROR_INVALIDDATA;
+            }
+        }
 
         if (nal_params_present)
             decode_sublayer_hrd(s, nb_cpb, subpic_params_present);
         if (vcl_params_present)
             decode_sublayer_hrd(s, nb_cpb, subpic_params_present);
     }
+    return 0;
 }
 
 int ff_hevc_decode_nal_vps(HEVCContext *s)
 {
     int i,j;
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     int vps_id = 0;
     HEVCVPS *vps;
     AVBufferRef *vps_buf = av_buffer_allocz(sizeof(*vps));
@@ -369,7 +385,7 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
         vps->vps_num_reorder_pics[i]      = get_ue_golomb_long(gb);
         vps->vps_max_latency_increase[i]  = get_ue_golomb_long(gb) - 1;
 
-        if (vps->vps_max_dec_pic_buffering[i] > MAX_DPB_SIZE) {
+        if (vps->vps_max_dec_pic_buffering[i] > MAX_DPB_SIZE || !vps->vps_max_dec_pic_buffering[i]) {
             av_log(s->avctx, AV_LOG_ERROR, "vps_max_dec_pic_buffering_minus1 out of range: %d\n",
                    vps->vps_max_dec_pic_buffering[i] - 1);
             goto err;
@@ -384,6 +400,11 @@ int ff_hevc_decode_nal_vps(HEVCContext *s)
 
     vps->vps_max_layer_id   = get_bits(gb, 6);
     vps->vps_num_layer_sets = get_ue_golomb_long(gb) + 1;
+    if ((vps->vps_num_layer_sets - 1LL) * (vps->vps_max_layer_id + 1LL) > get_bits_left(gb)) {
+        av_log(s->avctx, AV_LOG_ERROR, "too many layer_id_included_flags\n");
+        goto err;
+    }
+
     for (i = 1; i < vps->vps_num_layer_sets; i++)
         for (j = 0; j <= vps->vps_max_layer_id; j++)
             skip_bits(gb, 1);  // layer_id_included_flag[i][j]
@@ -419,7 +440,7 @@ err:
 static void decode_vui(HEVCContext *s, HEVCSPS *sps)
 {
     VUI *vui          = &sps->vui;
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     int sar_present;
 
     av_log(s->avctx, AV_LOG_DEBUG, "Decoding VUI\n");
@@ -499,6 +520,7 @@ static void decode_vui(HEVCContext *s, HEVCSPS *sps)
     }
 
     vui->vui_timing_info_present_flag = get_bits1(gb);
+
     if (vui->vui_timing_info_present_flag) {
         vui->vui_num_units_in_tick               = get_bits_long(gb, 32);
         vui->vui_time_scale                      = get_bits_long(gb, 32);
@@ -546,20 +568,25 @@ static void set_default_scaling_list_data(ScalingList *sl)
     memcpy(sl->sl[2][4], default_scaling_list_inter, 64);
     memcpy(sl->sl[2][5], default_scaling_list_inter, 64);
     memcpy(sl->sl[3][0], default_scaling_list_intra, 64);
-    memcpy(sl->sl[3][1], default_scaling_list_inter, 64);
+    memcpy(sl->sl[3][1], default_scaling_list_intra, 64);
+    memcpy(sl->sl[3][2], default_scaling_list_intra, 64);
+    memcpy(sl->sl[3][3], default_scaling_list_inter, 64);
+    memcpy(sl->sl[3][4], default_scaling_list_inter, 64);
+    memcpy(sl->sl[3][5], default_scaling_list_inter, 64);
 }
 
-static int scaling_list_data(HEVCContext *s, ScalingList *sl)
+static int scaling_list_data(HEVCContext *s, ScalingList *sl, HEVCSPS *sps)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
-    uint8_t scaling_list_pred_mode_flag[4][6];
+    GetBitContext *gb = &s->HEVClc->gb;
+    uint8_t scaling_list_pred_mode_flag;
     int32_t scaling_list_dc_coef[2][6];
-    int size_id, matrix_id, i, pos;
+    int size_id, matrix_id, pos;
+    int i;
 
     for (size_id = 0; size_id < 4; size_id++)
-        for (matrix_id = 0; matrix_id < (size_id == 3 ? 2 : 6); matrix_id++) {
-            scaling_list_pred_mode_flag[size_id][matrix_id] = get_bits1(gb);
-            if (!scaling_list_pred_mode_flag[size_id][matrix_id]) {
+        for (matrix_id = 0; matrix_id < 6; matrix_id += ((size_id == 3) ? 3 : 1)) {
+            scaling_list_pred_mode_flag = get_bits1(gb);
+            if (!scaling_list_pred_mode_flag) {
                 unsigned int delta = get_ue_golomb_long(gb);
                 /* Only need to handle non-zero delta. Zero means default,
                  * which should already be in the arrays. */
@@ -603,13 +630,27 @@ static int scaling_list_data(HEVCContext *s, ScalingList *sl)
             }
         }
 
+    if (sps->chroma_format_idc == 3) {
+        for (i = 0; i < 64; i++) {
+            sl->sl[3][1][i] = sl->sl[2][1][i];
+            sl->sl[3][2][i] = sl->sl[2][2][i];
+            sl->sl[3][4][i] = sl->sl[2][4][i];
+            sl->sl[3][5][i] = sl->sl[2][5][i];
+        }
+        sl->sl_dc[1][1] = sl->sl_dc[0][1];
+        sl->sl_dc[1][2] = sl->sl_dc[0][2];
+        sl->sl_dc[1][4] = sl->sl_dc[0][4];
+        sl->sl_dc[1][5] = sl->sl_dc[0][5];
+    }
+
+
     return 0;
 }
 
 int ff_hevc_decode_nal_sps(HEVCContext *s)
 {
     const AVPixFmtDescriptor *desc;
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     int ret = 0;
     unsigned int sps_id = 0;
     int log2_diff_max_min_transform_block_size;
@@ -661,8 +702,8 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     }
 
     sps->chroma_format_idc = get_ue_golomb_long(gb);
-    if (sps->chroma_format_idc != 1) {
-        avpriv_report_missing_feature(s->avctx, "chroma_format_idc != 1\n");
+    if (!(sps->chroma_format_idc == 1 || sps->chroma_format_idc == 2 || sps->chroma_format_idc == 3)) {
+        avpriv_report_missing_feature(s->avctx, "chroma_format_idc != {1, 2, 3}\n");
         ret = AVERROR_PATCHWELCOME;
         goto err;
     }
@@ -670,6 +711,9 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     if (sps->chroma_format_idc == 3)
         sps->separate_colour_plane_flag = get_bits1(gb);
 
+    if (sps->separate_colour_plane_flag)
+        sps->chroma_format_idc = 0;
+
     sps->width  = get_ue_golomb_long(gb);
     sps->height = get_ue_golomb_long(gb);
     if ((ret = av_image_check_size(sps->width,
@@ -711,20 +755,30 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
         goto err;
     }
 
-    if (sps->chroma_format_idc == 1) {
-        switch (sps->bit_depth) {
-        case 8:  sps->pix_fmt = AV_PIX_FMT_YUV420P;   break;
-        case 9:  sps->pix_fmt = AV_PIX_FMT_YUV420P9;  break;
-        case 10: sps->pix_fmt = AV_PIX_FMT_YUV420P10; break;
-        default:
-            av_log(s->avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n",
-                   sps->bit_depth);
-            ret = AVERROR_PATCHWELCOME;
-            goto err;
-        }
-    } else {
+    switch (sps->bit_depth) {
+    case 8:
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P;
+       break;
+    case 9:
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P9;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P9;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P9;
+        break;
+    case 10:
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P10;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P10;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P10;
+        break;
+    case 12:
+        if (sps->chroma_format_idc == 1) sps->pix_fmt = AV_PIX_FMT_YUV420P12;
+        if (sps->chroma_format_idc == 2) sps->pix_fmt = AV_PIX_FMT_YUV422P12;
+        if (sps->chroma_format_idc == 3) sps->pix_fmt = AV_PIX_FMT_YUV444P12;
+        break;
+    default:
         av_log(s->avctx, AV_LOG_ERROR,
-               "non-4:2:0 support is currently unspecified.\n");
+               "4:2:0, 4:2:2, 4:4:4 supports are currently specified for 8, 10 and 12 bits.\n");
         return AVERROR_PATCHWELCOME;
     }
 
@@ -800,7 +854,7 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
         set_default_scaling_list_data(&sps->scaling_list);
 
         if (get_bits1(gb)) {
-            ret = scaling_list_data(s, &sps->scaling_list);
+            ret = scaling_list_data(s, &sps->scaling_list, sps);
             if (ret < 0)
                 goto err;
         }
@@ -855,8 +909,42 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     vui_present = get_bits1(gb);
     if (vui_present)
         decode_vui(s, sps);
-    skip_bits1(gb); // sps_extension_flag
 
+    if (get_bits1(gb)) { // sps_extension_flag
+        int sps_extension_flag[1];
+        for (i = 0; i < 1; i++)
+            sps_extension_flag[i] = get_bits1(gb);
+        skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7);
+        if (sps_extension_flag[0]) {
+            int extended_precision_processing_flag;
+            int high_precision_offsets_enabled_flag;
+            int cabac_bypass_alignment_enabled_flag;
+
+            sps->transform_skip_rotation_enabled_flag = get_bits1(gb);
+            sps->transform_skip_context_enabled_flag  = get_bits1(gb);
+            sps->implicit_rdpcm_enabled_flag = get_bits1(gb);
+
+            sps->explicit_rdpcm_enabled_flag = get_bits1(gb);
+
+            extended_precision_processing_flag = get_bits1(gb);
+            if (extended_precision_processing_flag)
+                av_log(s->avctx, AV_LOG_WARNING,
+                   "extended_precision_processing_flag not yet implemented\n");
+
+            sps->intra_smoothing_disabled_flag       = get_bits1(gb);
+            high_precision_offsets_enabled_flag  = get_bits1(gb);
+            if (high_precision_offsets_enabled_flag)
+                av_log(s->avctx, AV_LOG_WARNING,
+                   "high_precision_offsets_enabled_flag not yet implemented\n");
+
+            sps->persistent_rice_adaptation_enabled_flag = get_bits1(gb);
+
+            cabac_bypass_alignment_enabled_flag  = get_bits1(gb);
+            if (cabac_bypass_alignment_enabled_flag)
+                av_log(s->avctx, AV_LOG_WARNING,
+                   "cabac_bypass_alignment_enabled_flag not yet implemented\n");
+        }
+    }
     if (s->apply_defdispwin) {
         sps->output_window.left_offset   += sps->vui.def_disp_win.left_offset;
         sps->output_window.right_offset  += sps->vui.def_disp_win.right_offset;
@@ -906,6 +994,7 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
     sps->min_tb_height = sps->height >> sps->log2_min_tb_size;
     sps->min_pu_width  = sps->width  >> sps->log2_min_pu_size;
     sps->min_pu_height = sps->height >> sps->log2_min_pu_size;
+    sps->tb_mask       = (1 << (sps->log2_ctb_size - sps->log2_min_tb_size)) - 1;
 
     sps->qp_bd_offset = 6 * (sps->bit_depth - 8);
 
@@ -956,6 +1045,12 @@ int ff_hevc_decode_nal_sps(HEVCContext *s)
             if (s->pps_list[i] && ((HEVCPPS*)s->pps_list[i]->data)->sps_id == sps_id)
                 av_buffer_unref(&s->pps_list[i]);
         }
+        if (s->sps_list[sps_id] && s->sps == (HEVCSPS*)s->sps_list[sps_id]->data) {
+            av_buffer_unref(&s->current_sps);
+            s->current_sps = av_buffer_ref(s->sps_list[sps_id]);
+            if (!s->current_sps)
+                s->sps = NULL;
+        }
         av_buffer_unref(&s->sps_list[sps_id]);
         s->sps_list[sps_id] = sps_buf;
     }
@@ -980,16 +1075,52 @@ static void hevc_pps_free(void *opaque, uint8_t *data)
     av_freep(&pps->ctb_addr_ts_to_rs);
     av_freep(&pps->tile_pos_rs);
     av_freep(&pps->tile_id);
-    av_freep(&pps->min_tb_addr_zs);
+    av_freep(&pps->min_tb_addr_zs_tab);
 
     av_freep(&pps);
 }
 
+static int pps_range_extensions(HEVCContext *s, HEVCPPS *pps, HEVCSPS *sps) {
+    GetBitContext *gb = &s->HEVClc->gb;
+    int i;
+
+    if (pps->transform_skip_enabled_flag) {
+        pps->log2_max_transform_skip_block_size = get_ue_golomb_long(gb) + 2;
+    }
+    pps->cross_component_prediction_enabled_flag = get_bits1(gb);
+    pps->chroma_qp_offset_list_enabled_flag = get_bits1(gb);
+    if (pps->chroma_qp_offset_list_enabled_flag) {
+        pps->diff_cu_chroma_qp_offset_depth = get_ue_golomb_long(gb);
+        pps->chroma_qp_offset_list_len_minus1 = get_ue_golomb_long(gb);
+        if (pps->chroma_qp_offset_list_len_minus1 && pps->chroma_qp_offset_list_len_minus1 >= 5) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "chroma_qp_offset_list_len_minus1 shall be in the range [0, 5].\n");
+            return AVERROR_INVALIDDATA;
+        }
+        for (i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
+            pps->cb_qp_offset_list[i] = get_se_golomb_long(gb);
+            if (pps->cb_qp_offset_list[i]) {
+                av_log(s->avctx, AV_LOG_WARNING,
+                       "cb_qp_offset_list not tested yet.\n");
+            }
+            pps->cr_qp_offset_list[i] = get_se_golomb_long(gb);
+            if (pps->cr_qp_offset_list[i]) {
+                av_log(s->avctx, AV_LOG_WARNING,
+                       "cb_qp_offset_list not tested yet.\n");
+            }
+        }
+    }
+    pps->log2_sao_offset_scale_luma = get_ue_golomb_long(gb);
+    pps->log2_sao_offset_scale_chroma = get_ue_golomb_long(gb);
+
+    return(0);
+}
+
 int ff_hevc_decode_nal_pps(HEVCContext *s)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
     HEVCSPS      *sps = NULL;
-    int pic_area_in_ctbs, pic_area_in_min_tbs;
+    int pic_area_in_ctbs;
     int log2_diff_ctb_min_tb_size;
     int i, j, x, y, ctb_addr_rs, tile_id;
     int ret = 0;
@@ -1018,6 +1149,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
     pps->disable_dbf                           = 0;
     pps->beta_offset                           = 0;
     pps->tc_offset                             = 0;
+    pps->log2_max_transform_skip_block_size    = 2;
 
     // Coded parameters
     pps_id = get_ue_golomb_long(gb);
@@ -1164,7 +1296,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
     pps->scaling_list_data_present_flag = get_bits1(gb);
     if (pps->scaling_list_data_present_flag) {
         set_default_scaling_list_data(&pps->scaling_list);
-        ret = scaling_list_data(s, &pps->scaling_list);
+        ret = scaling_list_data(s, &pps->scaling_list, sps);
         if (ret < 0)
             goto err;
     }
@@ -1178,7 +1310,14 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
     }
 
     pps->slice_header_extension_present_flag = get_bits1(gb);
-    skip_bits1(gb);     // pps_extension_flag
+
+    if (get_bits1(gb)) { // pps_extension_present_flag
+        int pps_range_extensions_flag = get_bits1(gb);
+        /* int pps_extension_7bits = */ get_bits(gb, 7);
+        if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps_range_extensions_flag) {
+            pps_range_extensions(s, pps, sps);
+        }
+    }
 
     // Inferred parameters
     pps->col_bd   = av_malloc_array(pps->num_tile_columns + 1, sizeof(*pps->col_bd));
@@ -1228,14 +1367,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
      * 6.5
      */
     pic_area_in_ctbs     = sps->ctb_width    * sps->ctb_height;
-    pic_area_in_min_tbs  = sps->min_tb_width * sps->min_tb_height;
 
     pps->ctb_addr_rs_to_ts = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_rs_to_ts));
     pps->ctb_addr_ts_to_rs = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_ts_to_rs));
     pps->tile_id           = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->tile_id));
-    pps->min_tb_addr_zs    = av_malloc_array(pic_area_in_min_tbs, sizeof(*pps->min_tb_addr_zs));
+    pps->min_tb_addr_zs_tab = av_malloc_array((sps->tb_mask+2) * (sps->tb_mask+2), sizeof(*pps->min_tb_addr_zs_tab));
     if (!pps->ctb_addr_rs_to_ts || !pps->ctb_addr_ts_to_rs ||
-        !pps->tile_id || !pps->min_tb_addr_zs) {
+        !pps->tile_id || !pps->min_tb_addr_zs_tab) {
         ret = AVERROR(ENOMEM);
         goto err;
     }
@@ -1290,8 +1428,13 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
             pps->tile_pos_rs[j * pps->num_tile_columns + i] = pps->row_bd[j] * sps->ctb_width + pps->col_bd[i];
 
     log2_diff_ctb_min_tb_size = sps->log2_ctb_size - sps->log2_min_tb_size;
-    for (y = 0; y < sps->min_tb_height; y++) {
-        for (x = 0; x < sps->min_tb_width; x++) {
+    pps->min_tb_addr_zs = &pps->min_tb_addr_zs_tab[1*(sps->tb_mask+2)+1];
+    for (y = 0; y < sps->tb_mask+2; y++) {
+        pps->min_tb_addr_zs_tab[y*(sps->tb_mask+2)] = -1;
+        pps->min_tb_addr_zs_tab[y]    = -1;
+    }
+    for (y = 0; y < sps->tb_mask+1; y++) {
+        for (x = 0; x < sps->tb_mask+1; x++) {
             int tb_x        = x >> log2_diff_ctb_min_tb_size;
             int tb_y        = y >> log2_diff_ctb_min_tb_size;
             int ctb_addr_rs = sps->ctb_width * tb_y + tb_x;
@@ -1301,7 +1444,7 @@ int ff_hevc_decode_nal_pps(HEVCContext *s)
                 int m = 1 << i;
                 val += (m & x ? m * m : 0) + (m & y ? 2 * m * m : 0);
             }
-            pps->min_tb_addr_zs[y * sps->min_tb_width + x] = val;
+            pps->min_tb_addr_zs[y * (sps->tb_mask+2) + x] = val;
         }
     }
 
diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
index c924227..b3a9787 100644
--- a/libavcodec/hevc_refs.c
+++ b/libavcodec/hevc_refs.c
@@ -4,20 +4,20 @@
  * Copyright (C) 2012 - 2013 Guillaume Martres
  * Copyright (C) 2012 - 2013 Gildas Cocherel
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -51,16 +51,11 @@ void ff_hevc_unref_frame(HEVCContext *s, HEVCFrame *frame, int flags)
 
 RefPicList *ff_hevc_get_ref_list(HEVCContext *s, HEVCFrame *ref, int x0, int y0)
 {
-    if (x0 < 0 || y0 < 0) {
-        return s->ref->refPicList;
-    } else {
-        int x_cb         = x0 >> s->sps->log2_ctb_size;
-        int y_cb         = y0 >> s->sps->log2_ctb_size;
-        int pic_width_cb = (s->sps->width + (1 << s->sps->log2_ctb_size) - 1) >>
-                           s->sps->log2_ctb_size;
-        int ctb_addr_ts  = s->pps->ctb_addr_rs_to_ts[y_cb * pic_width_cb + x_cb];
-        return (RefPicList *)ref->rpl_tab[ctb_addr_ts];
-    }
+    int x_cb         = x0 >> s->sps->log2_ctb_size;
+    int y_cb         = y0 >> s->sps->log2_ctb_size;
+    int pic_width_cb = s->sps->ctb_width;
+    int ctb_addr_ts  = s->pps->ctb_addr_rs_to_ts[y_cb * pic_width_cb + x_cb];
+    return (RefPicList *)ref->rpl_tab[ctb_addr_ts];
 }
 
 void ff_hevc_clear_refs(HEVCContext *s)
@@ -109,8 +104,9 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
         for (j = 0; j < frame->ctb_count; j++)
             frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
 
+        frame->frame->top_field_first  = s->picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
+        frame->frame->interlaced_frame = (s->picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
         return frame;
-
 fail:
         ff_hevc_unref_frame(s, frame, ~0);
         return NULL;
@@ -162,6 +158,16 @@ int ff_hevc_output_frame(HEVCContext *s, AVFrame *out, int flush)
         int min_poc   = INT_MAX;
         int i, min_idx, ret;
 
+        if (s->sh.no_output_of_prior_pics_flag == 1) {
+            for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+                HEVCFrame *frame = &s->DPB[i];
+                if (!(frame->flags & HEVC_FRAME_FLAG_BUMPING) && frame->poc != s->poc &&
+                        frame->sequence == s->seq_output) {
+                    ff_hevc_unref_frame(s, frame, HEVC_FRAME_FLAG_OUTPUT);
+                }
+            }
+        }
+
         for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
             HEVCFrame *frame = &s->DPB[i];
             if ((frame->flags & HEVC_FRAME_FLAG_OUTPUT) &&
@@ -181,16 +187,16 @@ int ff_hevc_output_frame(HEVCContext *s, AVFrame *out, int flush)
 
         if (nb_output) {
             HEVCFrame *frame = &s->DPB[min_idx];
-            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->frame->format);
-            int pixel_shift;
-
-            if (!desc)
-                return AVERROR_BUG;
-
-            pixel_shift = desc->comp[0].depth_minus1 > 7;
-
-            ret = av_frame_ref(out, frame->frame);
-            ff_hevc_unref_frame(s, frame, HEVC_FRAME_FLAG_OUTPUT);
+            AVFrame *dst = out;
+            AVFrame *src = frame->frame;
+            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(src->format);
+            int pixel_shift = !!(desc->comp[0].depth_minus1 > 7);
+
+            ret = av_frame_ref(out, src);
+            if (frame->flags & HEVC_FRAME_FLAG_BUMPING)
+                ff_hevc_unref_frame(s, frame, HEVC_FRAME_FLAG_OUTPUT | HEVC_FRAME_FLAG_BUMPING);
+            else
+                ff_hevc_unref_frame(s, frame, HEVC_FRAME_FLAG_OUTPUT);
             if (ret < 0)
                 return ret;
 
@@ -198,8 +204,8 @@ int ff_hevc_output_frame(HEVCContext *s, AVFrame *out, int flush)
                 int hshift = (i > 0) ? desc->log2_chroma_w : 0;
                 int vshift = (i > 0) ? desc->log2_chroma_h : 0;
                 int off = ((frame->window.left_offset >> hshift) << pixel_shift) +
-                          (frame->window.top_offset   >> vshift) * out->linesize[i];
-                out->data[i] += off;
+                          (frame->window.top_offset   >> vshift) * dst->linesize[i];
+                dst->data[i] += off;
             }
             av_log(s->avctx, AV_LOG_DEBUG,
                    "Output frame with POC %d.\n", frame->poc);
@@ -215,6 +221,46 @@ int ff_hevc_output_frame(HEVCContext *s, AVFrame *out, int flush)
     return 0;
 }
 
+void ff_hevc_bump_frame(HEVCContext *s)
+{
+    int dpb = 0;
+    int min_poc = INT_MAX;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+        HEVCFrame *frame = &s->DPB[i];
+        if ((frame->flags) &&
+            frame->sequence == s->seq_output &&
+            frame->poc != s->poc) {
+            dpb++;
+        }
+    }
+
+    if (s->sps && dpb >= s->sps->temporal_layer[s->sps->max_sub_layers - 1].max_dec_pic_buffering) {
+        for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+            HEVCFrame *frame = &s->DPB[i];
+            if ((frame->flags) &&
+                frame->sequence == s->seq_output &&
+                frame->poc != s->poc) {
+                if (frame->flags == HEVC_FRAME_FLAG_OUTPUT && frame->poc < min_poc) {
+                    min_poc = frame->poc;
+                }
+            }
+        }
+
+        for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
+            HEVCFrame *frame = &s->DPB[i];
+            if (frame->flags & HEVC_FRAME_FLAG_OUTPUT &&
+                frame->sequence == s->seq_output &&
+                frame->poc <= min_poc) {
+                frame->flags |= HEVC_FRAME_FLAG_BUMPING;
+            }
+        }
+
+        dpb--;
+    }
+}
+
 static int init_slice_rpl(HEVCContext *s)
 {
     HEVCFrame *frame = s->ref;
@@ -361,7 +407,8 @@ static HEVCFrame *generate_missing_ref(HEVCContext *s, int poc)
     frame->sequence = s->seq_decode;
     frame->flags    = 0;
 
-    ff_thread_report_progress(&frame->tf, INT_MAX, 0);
+    if (s->threads_type == FF_THREAD_FRAME)
+        ff_thread_report_progress(&frame->tf, INT_MAX, 0);
 
     return frame;
 }
diff --git a/libavcodec/hevc_sei.c b/libavcodec/hevc_sei.c
index 978a0e3..5bb5c90 100644
--- a/libavcodec/hevc_sei.c
+++ b/libavcodec/hevc_sei.c
@@ -5,20 +5,20 @@
  * Copyright (C) 2012 - 2013 Gildas Cocherel
  * Copyright (C) 2013 Vittorio Giovara
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,10 +28,13 @@
 static void decode_nal_sei_decoded_picture_hash(HEVCContext *s)
 {
     int cIdx, i;
-    GetBitContext *gb = &s->HEVClc.gb;
-    uint8_t hash_type = get_bits(gb, 8);
+    uint8_t hash_type;
+    //uint16_t picture_crc;
+    //uint32_t picture_checksum;
+    GetBitContext *gb = &s->HEVClc->gb;
+    hash_type = get_bits(gb, 8);
 
-    for (cIdx = 0; cIdx < 3; cIdx++) {
+    for (cIdx = 0; cIdx < 3/*((s->sps->chroma_format_idc == 0) ? 1 : 3)*/; cIdx++) {
         if (hash_type == 0) {
             s->is_md5 = 1;
             for (i = 0; i < 16; i++)
@@ -40,7 +43,7 @@ static void decode_nal_sei_decoded_picture_hash(HEVCContext *s)
             // picture_crc = get_bits(gb, 16);
             skip_bits(gb, 16);
         } else if (hash_type == 2) {
-            // picture_checksum = get_bits(gb, 32);
+            // picture_checksum = get_bits_long(gb, 32);
             skip_bits(gb, 32);
         }
     }
@@ -48,7 +51,7 @@ static void decode_nal_sei_decoded_picture_hash(HEVCContext *s)
 
 static void decode_nal_sei_frame_packing_arrangement(HEVCContext *s)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
 
     get_ue_golomb(gb);                  // frame_packing_arrangement_id
     s->sei_frame_packing_present = !get_bits1(gb);
@@ -73,7 +76,7 @@ static void decode_nal_sei_frame_packing_arrangement(HEVCContext *s)
 
 static void decode_nal_sei_display_orientation(HEVCContext *s)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
 
     s->sei_display_orientation_present = !get_bits1(gb);
 
@@ -86,9 +89,59 @@ static void decode_nal_sei_display_orientation(HEVCContext *s)
     }
 }
 
+static int decode_pic_timing(HEVCContext *s)
+{
+    GetBitContext *gb = &s->HEVClc->gb;
+    HEVCSPS *sps;
+
+    if (!s->sps_list[s->active_seq_parameter_set_id])
+        return(AVERROR(ENOMEM));
+    sps = (HEVCSPS*)s->sps_list[s->active_seq_parameter_set_id]->data;
+
+    if (sps->vui.frame_field_info_present_flag) {
+        int pic_struct = get_bits(gb, 4);
+        s->picture_struct = AV_PICTURE_STRUCTURE_UNKNOWN;
+        if (pic_struct == 2) {
+            av_log(s->avctx, AV_LOG_DEBUG, "BOTTOM Field\n");
+            s->picture_struct = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
+        } else if (pic_struct == 1) {
+            av_log(s->avctx, AV_LOG_DEBUG, "TOP Field\n");
+            s->picture_struct = AV_PICTURE_STRUCTURE_TOP_FIELD;
+        }
+        get_bits(gb, 2);                   // source_scan_type
+        get_bits(gb, 1);                   // duplicate_flag
+    }
+    return 1;
+}
+
+static int active_parameter_sets(HEVCContext *s)
+{
+    GetBitContext *gb = &s->HEVClc->gb;
+    int num_sps_ids_minus1;
+    int i;
+    unsigned active_seq_parameter_set_id;
+
+    get_bits(gb, 4); // active_video_parameter_set_id
+    get_bits(gb, 1); // self_contained_cvs_flag
+    get_bits(gb, 1); // num_sps_ids_minus1
+    num_sps_ids_minus1 = get_ue_golomb_long(gb); // num_sps_ids_minus1
+
+    active_seq_parameter_set_id = get_ue_golomb_long(gb);
+    if (active_seq_parameter_set_id >= MAX_SPS_COUNT) {
+        av_log(s->avctx, AV_LOG_ERROR, "active_parameter_set_id %d invalid\n", active_seq_parameter_set_id);
+        return AVERROR_INVALIDDATA;
+    }
+    s->active_seq_parameter_set_id = active_seq_parameter_set_id;
+
+    for (i = 1; i <= num_sps_ids_minus1; i++)
+        get_ue_golomb_long(gb); // active_seq_parameter_set_id[i]
+
+    return 0;
+}
+
 static int decode_nal_sei_message(HEVCContext *s)
 {
-    GetBitContext *gb = &s->HEVClc.gb;
+    GetBitContext *gb = &s->HEVClc->gb;
 
     int payload_type = 0;
     int payload_size = 0;
@@ -105,25 +158,33 @@ static int decode_nal_sei_message(HEVCContext *s)
         payload_size += byte;
     }
     if (s->nal_unit_type == NAL_SEI_PREFIX) {
-        if (payload_type == 256)
+        if (payload_type == 256 /*&& s->decode_checksum_sei*/) {
             decode_nal_sei_decoded_picture_hash(s);
-        else if (payload_type == 45)
+        } else if (payload_type == 45) {
             decode_nal_sei_frame_packing_arrangement(s);
-        else if (payload_type == 47)
+        } else if (payload_type == 47) {
             decode_nal_sei_display_orientation(s);
-        else {
+        } else if (payload_type == 1){
+            int ret = decode_pic_timing(s);
             av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", payload_type);
             skip_bits(gb, 8 * payload_size);
+            return ret;
+        } else if (payload_type == 129){
+            active_parameter_sets(s);
+            av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", payload_type);
+        } else {
+            av_log(s->avctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", payload_type);
+            skip_bits(gb, 8*payload_size);
         }
     } else { /* nal_unit_type == NAL_SEI_SUFFIX */
-        if (payload_type == 132)
+        if (payload_type == 132 /* && s->decode_checksum_sei */)
             decode_nal_sei_decoded_picture_hash(s);
         else {
             av_log(s->avctx, AV_LOG_DEBUG, "Skipped SUFFIX SEI %d\n", payload_type);
             skip_bits(gb, 8 * payload_size);
         }
     }
-    return 0;
+    return 1;
 }
 
 static int more_rbsp_data(GetBitContext *gb)
@@ -133,8 +194,12 @@ static int more_rbsp_data(GetBitContext *gb)
 
 int ff_hevc_decode_nal_sei(HEVCContext *s)
 {
+    int ret;
+
     do {
-        decode_nal_sei_message(s);
-    } while (more_rbsp_data(&s->HEVClc.gb));
-    return 0;
+        ret = decode_nal_sei_message(s);
+        if (ret < 0)
+            return(AVERROR(ENOMEM));
+    } while (more_rbsp_data(&s->HEVClc->gb));
+    return 1;
 }
diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c
index 0abee9b..3eae541 100644
--- a/libavcodec/hevcdsp.c
+++ b/libavcodec/hevcdsp.c
@@ -2,21 +2,23 @@
  * HEVC video decoder
  *
  * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
  *
- * This file is part of Libav.
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -89,14 +91,20 @@ static const int8_t transform[32][32] = {
       90, -90,  88, -85,  82, -78,  73, -67,  61, -54,  46, -38,  31, -22,  13,  -4 },
 };
 
-DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = {
-    { -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2, -2, 58, 10, -2 },
-    { -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2, -4, 54, 16, -2 },
-    { -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4, -6, 46, 28, -4 },
-    { -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4, -4, 36, 36, -4 },
-    { -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6, -4, 28, 46, -6 },
-    { -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4, -2, 16, 54, -4 },
-    { -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2, -2, 10, 58, -2 },
+DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][4]) = {
+    { -2, 58, 10, -2},
+    { -4, 54, 16, -2},
+    { -6, 46, 28, -4},
+    { -4, 36, 36, -4},
+    { -4, 28, 46, -6},
+    { -2, 16, 54, -4},
+    { -2, 10, 58, -2},
+};
+
+DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters[3][16]) = {
+    { -1,  4,-10, 58, 17, -5,  1,  0, -1,  4,-10, 58, 17, -5,  1,  0},
+    { -1,  4,-11, 40, 40,-11,  4, -1, -1,  4,-11, 40, 40,-11,  4, -1},
+    {  0,  1, -5, 17, 58,-10,  4, -1,  0,  1, -5, 17, 58,-10,  4, -1}
 };
 
 #define BIT_DEPTH 8
@@ -111,62 +119,110 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_epel_filters[7][16]) = {
 #include "hevcdsp_template.c"
 #undef BIT_DEPTH
 
+#define BIT_DEPTH 12
+#include "hevcdsp_template.c"
+#undef BIT_DEPTH
+
 void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
 {
 #undef FUNC
 #define FUNC(a, depth) a ## _ ## depth
 
+#undef PEL_FUNC
+#define PEL_FUNC(dst1, idx1, idx2, a, depth)                                   \
+    for(i = 0 ; i < 10 ; i++)                                                  \
+{                                                                              \
+    hevcdsp->dst1[i][idx1][idx2] = a ## _ ## depth;                            \
+}
+
+#undef EPEL_FUNCS
+#define EPEL_FUNCS(depth)                                                     \
+    PEL_FUNC(put_hevc_epel, 0, 0, put_hevc_pel_pixels, depth);                \
+    PEL_FUNC(put_hevc_epel, 0, 1, put_hevc_epel_h, depth);                    \
+    PEL_FUNC(put_hevc_epel, 1, 0, put_hevc_epel_v, depth);                    \
+    PEL_FUNC(put_hevc_epel, 1, 1, put_hevc_epel_hv, depth)
+
+#undef EPEL_UNI_FUNCS
+#define EPEL_UNI_FUNCS(depth)                                                 \
+    PEL_FUNC(put_hevc_epel_uni, 0, 0, put_hevc_pel_uni_pixels, depth);        \
+    PEL_FUNC(put_hevc_epel_uni, 0, 1, put_hevc_epel_uni_h, depth);            \
+    PEL_FUNC(put_hevc_epel_uni, 1, 0, put_hevc_epel_uni_v, depth);            \
+    PEL_FUNC(put_hevc_epel_uni, 1, 1, put_hevc_epel_uni_hv, depth);           \
+    PEL_FUNC(put_hevc_epel_uni_w, 0, 0, put_hevc_pel_uni_w_pixels, depth);    \
+    PEL_FUNC(put_hevc_epel_uni_w, 0, 1, put_hevc_epel_uni_w_h, depth);        \
+    PEL_FUNC(put_hevc_epel_uni_w, 1, 0, put_hevc_epel_uni_w_v, depth);        \
+    PEL_FUNC(put_hevc_epel_uni_w, 1, 1, put_hevc_epel_uni_w_hv, depth)
+
+#undef EPEL_BI_FUNCS
+#define EPEL_BI_FUNCS(depth)                                                \
+    PEL_FUNC(put_hevc_epel_bi, 0, 0, put_hevc_pel_bi_pixels, depth);        \
+    PEL_FUNC(put_hevc_epel_bi, 0, 1, put_hevc_epel_bi_h, depth);            \
+    PEL_FUNC(put_hevc_epel_bi, 1, 0, put_hevc_epel_bi_v, depth);            \
+    PEL_FUNC(put_hevc_epel_bi, 1, 1, put_hevc_epel_bi_hv, depth);           \
+    PEL_FUNC(put_hevc_epel_bi_w, 0, 0, put_hevc_pel_bi_w_pixels, depth);    \
+    PEL_FUNC(put_hevc_epel_bi_w, 0, 1, put_hevc_epel_bi_w_h, depth);        \
+    PEL_FUNC(put_hevc_epel_bi_w, 1, 0, put_hevc_epel_bi_w_v, depth);        \
+    PEL_FUNC(put_hevc_epel_bi_w, 1, 1, put_hevc_epel_bi_w_hv, depth)
+
+#undef QPEL_FUNCS
+#define QPEL_FUNCS(depth)                                                     \
+    PEL_FUNC(put_hevc_qpel, 0, 0, put_hevc_pel_pixels, depth);                \
+    PEL_FUNC(put_hevc_qpel, 0, 1, put_hevc_qpel_h, depth);                    \
+    PEL_FUNC(put_hevc_qpel, 1, 0, put_hevc_qpel_v, depth);                    \
+    PEL_FUNC(put_hevc_qpel, 1, 1, put_hevc_qpel_hv, depth)
+
+#undef QPEL_UNI_FUNCS
+#define QPEL_UNI_FUNCS(depth)                                                 \
+    PEL_FUNC(put_hevc_qpel_uni, 0, 0, put_hevc_pel_uni_pixels, depth);        \
+    PEL_FUNC(put_hevc_qpel_uni, 0, 1, put_hevc_qpel_uni_h, depth);            \
+    PEL_FUNC(put_hevc_qpel_uni, 1, 0, put_hevc_qpel_uni_v, depth);            \
+    PEL_FUNC(put_hevc_qpel_uni, 1, 1, put_hevc_qpel_uni_hv, depth);           \
+    PEL_FUNC(put_hevc_qpel_uni_w, 0, 0, put_hevc_pel_uni_w_pixels, depth);    \
+    PEL_FUNC(put_hevc_qpel_uni_w, 0, 1, put_hevc_qpel_uni_w_h, depth);        \
+    PEL_FUNC(put_hevc_qpel_uni_w, 1, 0, put_hevc_qpel_uni_w_v, depth);        \
+    PEL_FUNC(put_hevc_qpel_uni_w, 1, 1, put_hevc_qpel_uni_w_hv, depth)
+
+#undef QPEL_BI_FUNCS
+#define QPEL_BI_FUNCS(depth)                                                  \
+    PEL_FUNC(put_hevc_qpel_bi, 0, 0, put_hevc_pel_bi_pixels, depth);          \
+    PEL_FUNC(put_hevc_qpel_bi, 0, 1, put_hevc_qpel_bi_h, depth);              \
+    PEL_FUNC(put_hevc_qpel_bi, 1, 0, put_hevc_qpel_bi_v, depth);              \
+    PEL_FUNC(put_hevc_qpel_bi, 1, 1, put_hevc_qpel_bi_hv, depth);             \
+    PEL_FUNC(put_hevc_qpel_bi_w, 0, 0, put_hevc_pel_bi_w_pixels, depth);      \
+    PEL_FUNC(put_hevc_qpel_bi_w, 0, 1, put_hevc_qpel_bi_w_h, depth);          \
+    PEL_FUNC(put_hevc_qpel_bi_w, 1, 0, put_hevc_qpel_bi_w_v, depth);          \
+    PEL_FUNC(put_hevc_qpel_bi_w, 1, 1, put_hevc_qpel_bi_w_hv, depth)
+
 #define HEVC_DSP(depth)                                                     \
     hevcdsp->put_pcm                = FUNC(put_pcm, depth);                 \
-    hevcdsp->transquant_bypass[0]   = FUNC(transquant_bypass4x4, depth);    \
-    hevcdsp->transquant_bypass[1]   = FUNC(transquant_bypass8x8, depth);    \
-    hevcdsp->transquant_bypass[2]   = FUNC(transquant_bypass16x16, depth);  \
-    hevcdsp->transquant_bypass[3]   = FUNC(transquant_bypass32x32, depth);  \
+    hevcdsp->transform_add[0]       = FUNC(transform_add4x4, depth);        \
+    hevcdsp->transform_add[1]       = FUNC(transform_add8x8, depth);        \
+    hevcdsp->transform_add[2]       = FUNC(transform_add16x16, depth);      \
+    hevcdsp->transform_add[3]       = FUNC(transform_add32x32, depth);      \
     hevcdsp->transform_skip         = FUNC(transform_skip, depth);          \
-    hevcdsp->transform_4x4_luma_add = FUNC(transform_4x4_luma_add, depth);  \
-    hevcdsp->transform_add[0]       = FUNC(transform_4x4_add, depth);       \
-    hevcdsp->transform_add[1]       = FUNC(transform_8x8_add, depth);       \
-    hevcdsp->transform_add[2]       = FUNC(transform_16x16_add, depth);     \
-    hevcdsp->transform_add[3]       = FUNC(transform_32x32_add, depth);     \
-                                                                            \
-    hevcdsp->sao_band_filter[0] = FUNC(sao_band_filter_0, depth);           \
-    hevcdsp->sao_band_filter[1] = FUNC(sao_band_filter_1, depth);           \
-    hevcdsp->sao_band_filter[2] = FUNC(sao_band_filter_2, depth);           \
-    hevcdsp->sao_band_filter[3] = FUNC(sao_band_filter_3, depth);           \
+    hevcdsp->transform_rdpcm        = FUNC(transform_rdpcm, depth);         \
+    hevcdsp->idct_4x4_luma          = FUNC(transform_4x4_luma, depth);      \
+    hevcdsp->idct[0]                = FUNC(idct_4x4, depth);                \
+    hevcdsp->idct[1]                = FUNC(idct_8x8, depth);                \
+    hevcdsp->idct[2]                = FUNC(idct_16x16, depth);              \
+    hevcdsp->idct[3]                = FUNC(idct_32x32, depth);              \
                                                                             \
-    hevcdsp->sao_edge_filter[0] = FUNC(sao_edge_filter_0, depth);           \
-    hevcdsp->sao_edge_filter[1] = FUNC(sao_edge_filter_1, depth);           \
-    hevcdsp->sao_edge_filter[2] = FUNC(sao_edge_filter_2, depth);           \
-    hevcdsp->sao_edge_filter[3] = FUNC(sao_edge_filter_3, depth);           \
-                                                                            \
-    hevcdsp->put_hevc_qpel[0][0] = FUNC(put_hevc_qpel_pixels, depth);       \
-    hevcdsp->put_hevc_qpel[0][1] = FUNC(put_hevc_qpel_h1, depth);           \
-    hevcdsp->put_hevc_qpel[0][2] = FUNC(put_hevc_qpel_h2, depth);           \
-    hevcdsp->put_hevc_qpel[0][3] = FUNC(put_hevc_qpel_h3, depth);           \
-    hevcdsp->put_hevc_qpel[1][0] = FUNC(put_hevc_qpel_v1, depth);           \
-    hevcdsp->put_hevc_qpel[1][1] = FUNC(put_hevc_qpel_h1v1, depth);         \
-    hevcdsp->put_hevc_qpel[1][2] = FUNC(put_hevc_qpel_h2v1, depth);         \
-    hevcdsp->put_hevc_qpel[1][3] = FUNC(put_hevc_qpel_h3v1, depth);         \
-    hevcdsp->put_hevc_qpel[2][0] = FUNC(put_hevc_qpel_v2, depth);           \
-    hevcdsp->put_hevc_qpel[2][1] = FUNC(put_hevc_qpel_h1v2, depth);         \
-    hevcdsp->put_hevc_qpel[2][2] = FUNC(put_hevc_qpel_h2v2, depth);         \
-    hevcdsp->put_hevc_qpel[2][3] = FUNC(put_hevc_qpel_h3v2, depth);         \
-    hevcdsp->put_hevc_qpel[3][0] = FUNC(put_hevc_qpel_v3, depth);           \
-    hevcdsp->put_hevc_qpel[3][1] = FUNC(put_hevc_qpel_h1v3, depth);         \
-    hevcdsp->put_hevc_qpel[3][2] = FUNC(put_hevc_qpel_h2v3, depth);         \
-    hevcdsp->put_hevc_qpel[3][3] = FUNC(put_hevc_qpel_h3v3, depth);         \
-                                                                            \
-    hevcdsp->put_hevc_epel[0][0] = FUNC(put_hevc_epel_pixels, depth);       \
-    hevcdsp->put_hevc_epel[0][1] = FUNC(put_hevc_epel_h, depth);            \
-    hevcdsp->put_hevc_epel[1][0] = FUNC(put_hevc_epel_v, depth);            \
-    hevcdsp->put_hevc_epel[1][1] = FUNC(put_hevc_epel_hv, depth);           \
-                                                                            \
-    hevcdsp->put_unweighted_pred   = FUNC(put_unweighted_pred, depth);      \
-    hevcdsp->put_weighted_pred_avg = FUNC(put_weighted_pred_avg, depth);    \
-                                                                            \
-    hevcdsp->weighted_pred         = FUNC(weighted_pred, depth);            \
-    hevcdsp->weighted_pred_avg     = FUNC(weighted_pred_avg, depth);        \
+    hevcdsp->idct_dc[0]             = FUNC(idct_4x4_dc, depth);             \
+    hevcdsp->idct_dc[1]             = FUNC(idct_8x8_dc, depth);             \
+    hevcdsp->idct_dc[2]             = FUNC(idct_16x16_dc, depth);           \
+    hevcdsp->idct_dc[3]             = FUNC(idct_32x32_dc, depth);           \
                                                                             \
+    hevcdsp->sao_band_filter    = FUNC(sao_band_filter_0, depth);              \
+    hevcdsp->sao_edge_filter[0] = FUNC(sao_edge_filter_0, depth);              \
+    hevcdsp->sao_edge_filter[1] = FUNC(sao_edge_filter_1, depth);              \
+                                                                               \
+    QPEL_FUNCS(depth);                                                         \
+    QPEL_UNI_FUNCS(depth);                                                     \
+    QPEL_BI_FUNCS(depth);                                                      \
+    EPEL_FUNCS(depth);                                                         \
+    EPEL_UNI_FUNCS(depth);                                                     \
+    EPEL_BI_FUNCS(depth);                                                      \
+                                                                               \
     hevcdsp->hevc_h_loop_filter_luma     = FUNC(hevc_h_loop_filter_luma, depth);   \
     hevcdsp->hevc_v_loop_filter_luma     = FUNC(hevc_v_loop_filter_luma, depth);   \
     hevcdsp->hevc_h_loop_filter_chroma   = FUNC(hevc_h_loop_filter_chroma, depth); \
@@ -174,7 +230,8 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
     hevcdsp->hevc_h_loop_filter_luma_c   = FUNC(hevc_h_loop_filter_luma, depth);   \
     hevcdsp->hevc_v_loop_filter_luma_c   = FUNC(hevc_v_loop_filter_luma, depth);   \
     hevcdsp->hevc_h_loop_filter_chroma_c = FUNC(hevc_h_loop_filter_chroma, depth); \
-    hevcdsp->hevc_v_loop_filter_chroma_c = FUNC(hevc_v_loop_filter_chroma, depth);
+    hevcdsp->hevc_v_loop_filter_chroma_c = FUNC(hevc_v_loop_filter_chroma, depth)
+int i = 0;
 
     switch (bit_depth) {
     case 9:
@@ -183,6 +240,9 @@ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
     case 10:
         HEVC_DSP(10);
         break;
+    case 12:
+        HEVC_DSP(12);
+        break;
     default:
         HEVC_DSP(8);
         break;
diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h
index aad96db..c18bc86 100644
--- a/libavcodec/hevcdsp.h
+++ b/libavcodec/hevcdsp.h
@@ -2,21 +2,23 @@
  * HEVC video decoder
  *
  * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
  *
- * This file is part of Libav.
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,54 +31,68 @@ typedef struct SAOParams {
     int offset_abs[3][4];   ///< sao_offset_abs
     int offset_sign[3][4];  ///< sao_offset_sign
 
-    int band_position[3];   ///< sao_band_position
+    uint8_t band_position[3];   ///< sao_band_position
 
     int eo_class[3];        ///< sao_eo_class
 
-    int offset_val[3][5];   ///<SaoOffsetVal
+    int16_t offset_val[3][5];   ///<SaoOffsetVal
 
     uint8_t type_idx[3];    ///< sao_type_idx
 } SAOParams;
 
 typedef struct HEVCDSPContext {
-    void (*put_pcm)(uint8_t *dst, ptrdiff_t stride, int size,
-                    GetBitContext *gb, int pcm_bit_depth);
-
-    void (*transquant_bypass[4])(uint8_t *dst, int16_t *coeffs,
-                                 ptrdiff_t stride);
-
-    void (*transform_skip)(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-    void (*transform_4x4_luma_add)(uint8_t *dst, int16_t *coeffs,
-                                   ptrdiff_t stride);
-    void (*transform_add[4])(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride);
-
-    void (*sao_band_filter[4])(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
-                               struct SAOParams *sao, int *borders,
-                               int width, int height, int c_idx);
-    void (*sao_edge_filter[4])(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
-                               struct SAOParams *sao, int *borders, int width,
-                               int height, int c_idx, uint8_t vert_edge,
-                               uint8_t horiz_edge, uint8_t diag_edge);
-
-    void (*put_hevc_qpel[4][4])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
-                                ptrdiff_t srcstride, int width, int height,
-                                int16_t *mcbuffer);
-    void (*put_hevc_epel[2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src,
-                                ptrdiff_t srcstride, int width, int height,
-                                int mx, int my, int16_t *mcbuffer);
-
-    void (*put_unweighted_pred)(uint8_t *dst, ptrdiff_t dststride, int16_t *src,
-                                ptrdiff_t srcstride, int width, int height);
-    void (*put_weighted_pred_avg)(uint8_t *dst, ptrdiff_t dststride,
-                                  int16_t *src1, int16_t *src2,
-                                  ptrdiff_t srcstride, int width, int height);
-    void (*weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
-                          uint8_t *dst, ptrdiff_t dststride, int16_t *src,
-                          ptrdiff_t srcstride, int width, int height);
-    void (*weighted_pred_avg)(uint8_t denom, int16_t wl0Flag, int16_t wl1Flag,
-                              int16_t ol0Flag, int16_t ol1Flag, uint8_t *dst,
-                              ptrdiff_t dststride, int16_t *src1, int16_t *src2,
-                              ptrdiff_t srcstride, int width, int height);
+    void (*put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int width, int height,
+                    struct GetBitContext *gb, int pcm_bit_depth);
+
+    void (*transform_add[4])(uint8_t *_dst, int16_t *coeffs, ptrdiff_t _stride);
+
+    void (*transform_skip)(int16_t *coeffs, int16_t log2_size);
+
+    void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode);
+
+    void (*idct_4x4_luma)(int16_t *coeffs);
+
+    void (*idct[4])(int16_t *coeffs, int col_limit);
+
+    void (*idct_dc[4])(int16_t *coeffs);
+
+    void (*sao_band_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
+                            struct SAOParams *sao, int *borders,
+                            int width, int height, int c_idx);
+
+    void (*sao_edge_filter[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
+                               struct SAOParams *sao, int *borders, int _width,
+                               int _height, int c_idx, uint8_t *vert_edge,
+                               uint8_t *horiz_edge, uint8_t *diag_edge);
+
+    void (*put_hevc_qpel[10][2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
+                                    int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_qpel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
+                                        int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_qpel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                          int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
+
+    void (*put_hevc_qpel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                       int16_t *src2, ptrdiff_t src2stride,
+                                       int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_qpel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                         int16_t *src2, ptrdiff_t src2stride,
+                                         int height, int denom, int wx0, int wx1,
+                                         int ox0, int ox1, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_epel[10][2][2])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
+                                    int height, intptr_t mx, intptr_t my, int width);
+
+    void (*put_hevc_epel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                        int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_epel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                          int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_epel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                       int16_t *src2, ptrdiff_t src2stride,
+                                       int height, intptr_t mx, intptr_t my, int width);
+    void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                         int16_t *src2, ptrdiff_t src2stride,
+                                         int height, int denom, int wx0, int ox0, int wx1,
+                                         int ox1, intptr_t mx, intptr_t my, int width);
 
     void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
                                     int beta, int *tc,
@@ -104,8 +120,9 @@ typedef struct HEVCDSPContext {
 
 void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth);
 
-void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
+extern const int8_t ff_hevc_epel_filters[7][4];
+extern const int8_t ff_hevc_qpel_filters[3][16];
 
-extern const int8_t ff_hevc_epel_filters[7][16];
+void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
 
 #endif /* AVCODEC_HEVCDSP_H */
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c
index 8dcc83d..03a1c68 100644
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -3,20 +3,20 @@
  *
  * Copyright (C) 2012 - 2013 Guillaume Martres
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,8 +24,10 @@
 #include "hevc.h"
 
 #include "bit_depth_template.c"
+#include "hevcdsp.h"
 
-static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int size,
+
+static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
                           GetBitContext *gb, int pcm_bit_depth)
 {
     int x, y;
@@ -33,14 +35,14 @@ static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int size,
 
     stride /= sizeof(pixel);
 
-    for (y = 0; y < size; y++) {
-        for (x = 0; x < size; x++)
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
             dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
         dst += stride;
     }
 }
 
-static void FUNC(transquant_bypass4x4)(uint8_t *_dst, int16_t *coeffs,
+static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
                                        ptrdiff_t stride)
 {
     int x, y;
@@ -57,7 +59,7 @@ static void FUNC(transquant_bypass4x4)(uint8_t *_dst, int16_t *coeffs,
     }
 }
 
-static void FUNC(transquant_bypass8x8)(uint8_t *_dst, int16_t *coeffs,
+static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
                                        ptrdiff_t stride)
 {
     int x, y;
@@ -74,7 +76,7 @@ static void FUNC(transquant_bypass8x8)(uint8_t *_dst, int16_t *coeffs,
     }
 }
 
-static void FUNC(transquant_bypass16x16)(uint8_t *_dst, int16_t *coeffs,
+static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
                                          ptrdiff_t stride)
 {
     int x, y;
@@ -91,7 +93,7 @@ static void FUNC(transquant_bypass16x16)(uint8_t *_dst, int16_t *coeffs,
     }
 }
 
-static void FUNC(transquant_bypass32x32)(uint8_t *_dst, int16_t *coeffs,
+static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
                                          ptrdiff_t stride)
 {
     int x, y;
@@ -108,24 +110,52 @@ static void FUNC(transquant_bypass32x32)(uint8_t *_dst, int16_t *coeffs,
     }
 }
 
-static void FUNC(transform_skip)(uint8_t *_dst, int16_t *coeffs,
-                                 ptrdiff_t stride)
+
+static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
 {
-    pixel *dst = (pixel *)_dst;
-    int shift  = 13 - BIT_DEPTH;
-#if BIT_DEPTH <= 13
-    int offset = 1 << (shift - 1);
-#else
-    int offset = 0;
-#endif
+    int16_t *coeffs = (int16_t *) _coeffs;
     int x, y;
+    int size = 1 << log2_size;
+
+    if (mode) {
+        coeffs += size;
+        for (y = 0; y < size - 1; y++) {
+            for (x = 0; x < size; x++)
+                coeffs[x] += coeffs[x - size];
+            coeffs += size;
+        }
+    } else {
+        for (y = 0; y < size; y++) {
+            for (x = 1; x < size; x++)
+                coeffs[x] += coeffs[x - 1];
+            coeffs += size;
+        }
+    }
+}
 
-    stride /= sizeof(pixel);
+static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
+{
+    int shift  = 15 - BIT_DEPTH - log2_size;
+    int x, y;
+    int size = 1 << log2_size;
+    int16_t *coeffs = _coeffs;
 
-    for (y = 0; y < 4 * 4; y += 4) {
-        for (x = 0; x < 4; x++)
-            dst[x] = av_clip_pixel(dst[x] + ((coeffs[y + x] + offset) >> shift));
-        dst += stride;
+
+    if (shift > 0) {
+        int offset = 1 << (shift - 1);
+        for (y = 0; y < size; y++) {
+            for (x = 0; x < size; x++) {
+                *coeffs = (*coeffs + offset) >> shift;
+                coeffs++;
+            }
+        }
+    } else {
+        for (y = 0; y < size; y++) {
+            for (x = 0; x < size; x++) {
+                *coeffs = *coeffs << -shift;
+                coeffs++;
+            }
+        }
     }
 }
 
@@ -149,17 +179,13 @@ static void FUNC(transform_skip)(uint8_t *_dst, int16_t *coeffs,
         assign(dst[3 * step], 55 * c0 + 29 * c2 - c3);                  \
     } while (0)
 
-static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs,
-                                         ptrdiff_t stride)
+static void FUNC(transform_4x4_luma)(int16_t *coeffs)
 {
     int i;
-    pixel *dst   = (pixel *)_dst;
     int shift    = 7;
     int add      = 1 << (shift - 1);
     int16_t *src = coeffs;
 
-    stride /= sizeof(pixel);
-
     for (i = 0; i < 4; i++) {
         TR_4x4_LUMA(src, src, 4, SCALE);
         src++;
@@ -168,323 +194,238 @@ static void FUNC(transform_4x4_luma_add)(uint8_t *_dst, int16_t *coeffs,
     shift = 20 - BIT_DEPTH;
     add   = 1 << (shift - 1);
     for (i = 0; i < 4; i++) {
-        TR_4x4_LUMA(dst, coeffs, 1, ADD_AND_SCALE);
+        TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
         coeffs += 4;
-        dst    += stride;
     }
 }
 
 #undef TR_4x4_LUMA
 
-#define TR_4(dst, src, dstep, sstep, assign)                            \
-    do {                                                                \
-        const int e0 = transform[8 * 0][0] * src[0 * sstep] +           \
-                       transform[8 * 2][0] * src[2 * sstep];            \
-        const int e1 = transform[8 * 0][1] * src[0 * sstep] +           \
-                       transform[8 * 2][1] * src[2 * sstep];            \
-        const int o0 = transform[8 * 1][0] * src[1 * sstep] +           \
-                       transform[8 * 3][0] * src[3 * sstep];            \
-        const int o1 = transform[8 * 1][1] * src[1 * sstep] +           \
-                       transform[8 * 3][1] * src[3 * sstep];            \
-                                                                        \
-        assign(dst[0 * dstep], e0 + o0);                                \
-        assign(dst[1 * dstep], e1 + o1);                                \
-        assign(dst[2 * dstep], e1 - o1);                                \
-        assign(dst[3 * dstep], e0 - o0);                                \
+#define TR_4(dst, src, dstep, sstep, assign, end)                              \
+    do {                                                                       \
+        const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep];              \
+        const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep];              \
+        const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep];              \
+        const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep];              \
+                                                                               \
+        assign(dst[0 * dstep], e0 + o0);                                       \
+        assign(dst[1 * dstep], e1 + o1);                                       \
+        assign(dst[2 * dstep], e1 - o1);                                       \
+        assign(dst[3 * dstep], e0 - o0);                                       \
     } while (0)
 
-static void FUNC(transform_4x4_add)(uint8_t *_dst, int16_t *coeffs,
-                                    ptrdiff_t stride)
-{
-    int i;
-    pixel *dst   = (pixel *)_dst;
-    int shift    = 7;
-    int add      = 1 << (shift - 1);
-    int16_t *src = coeffs;
-
-    stride /= sizeof(pixel);
-
-    for (i = 0; i < 4; i++) {
-        TR_4(src, src, 4, 4, SCALE);
-        src++;
-    }
-
-    shift = 20 - BIT_DEPTH;
-    add   = 1 << (shift - 1);
-    for (i = 0; i < 4; i++) {
-        TR_4(dst, coeffs, 1, 1, ADD_AND_SCALE);
-        coeffs += 4;
-        dst    += stride;
-    }
-}
-
-#define TR_8(dst, src, dstep, sstep, assign)                      \
-    do {                                                          \
-        int i, j;                                                 \
-        int e_8[4];                                               \
-        int o_8[4] = { 0 };                                       \
-        for (i = 0; i < 4; i++)                                   \
-            for (j = 1; j < 8; j += 2)                            \
-                o_8[i] += transform[4 * j][i] * src[j * sstep];   \
-        TR_4(e_8, src, 1, 2 * sstep, SET);                        \
-                                                                  \
-        for (i = 0; i < 4; i++) {                                 \
-            assign(dst[i * dstep], e_8[i] + o_8[i]);              \
-            assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]);        \
-        }                                                         \
+#define TR_8(dst, src, dstep, sstep, assign, end)                              \
+    do {                                                                       \
+        int i, j;                                                              \
+        int e_8[4];                                                            \
+        int o_8[4] = { 0 };                                                    \
+        for (i = 0; i < 4; i++)                                                \
+            for (j = 1; j < end; j += 2)                                       \
+                o_8[i] += transform[4 * j][i] * src[j * sstep];                \
+        TR_4(e_8, src, 1, 2 * sstep, SET, 4);                                  \
+                                                                               \
+        for (i = 0; i < 4; i++) {                                              \
+            assign(dst[i * dstep], e_8[i] + o_8[i]);                           \
+            assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]);                     \
+        }                                                                      \
     } while (0)
 
-#define TR_16(dst, src, dstep, sstep, assign)                     \
-    do {                                                          \
-        int i, j;                                                 \
-        int e_16[8];                                              \
-        int o_16[8] = { 0 };                                      \
-        for (i = 0; i < 8; i++)                                   \
-            for (j = 1; j < 16; j += 2)                           \
-                o_16[i] += transform[2 * j][i] * src[j * sstep];  \
-        TR_8(e_16, src, 1, 2 * sstep, SET);                       \
-                                                                  \
-        for (i = 0; i < 8; i++) {                                 \
-            assign(dst[i * dstep], e_16[i] + o_16[i]);            \
-            assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]);     \
-        }                                                         \
+#define TR_16(dst, src, dstep, sstep, assign, end)                             \
+    do {                                                                       \
+        int i, j;                                                              \
+        int e_16[8];                                                           \
+        int o_16[8] = { 0 };                                                   \
+        for (i = 0; i < 8; i++)                                                \
+            for (j = 1; j < end; j += 2)                                       \
+                o_16[i] += transform[2 * j][i] * src[j * sstep];               \
+        TR_8(e_16, src, 1, 2 * sstep, SET, 8);                                 \
+                                                                               \
+        for (i = 0; i < 8; i++) {                                              \
+            assign(dst[i * dstep], e_16[i] + o_16[i]);                         \
+            assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]);                  \
+        }                                                                      \
     } while (0)
 
-#define TR_32(dst, src, dstep, sstep, assign)                     \
-    do {                                                          \
-        int i, j;                                                 \
-        int e_32[16];                                             \
-        int o_32[16] = { 0 };                                     \
-        for (i = 0; i < 16; i++)                                  \
-            for (j = 1; j < 32; j += 2)                           \
-                o_32[i] += transform[j][i] * src[j * sstep];      \
-        TR_16(e_32, src, 1, 2 * sstep, SET);                      \
-                                                                  \
-        for (i = 0; i < 16; i++) {                                \
-            assign(dst[i * dstep], e_32[i] + o_32[i]);            \
-            assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]);     \
-        }                                                         \
+#define TR_32(dst, src, dstep, sstep, assign, end)                             \
+    do {                                                                       \
+        int i, j;                                                              \
+        int e_32[16];                                                          \
+        int o_32[16] = { 0 };                                                  \
+        for (i = 0; i < 16; i++)                                               \
+            for (j = 1; j < end; j += 2)                                       \
+                o_32[i] += transform[j][i] * src[j * sstep];                   \
+        TR_16(e_32, src, 1, 2 * sstep, SET, end/2);                            \
+                                                                               \
+        for (i = 0; i < 16; i++) {                                             \
+            assign(dst[i * dstep], e_32[i] + o_32[i]);                         \
+            assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]);                  \
+        }                                                                      \
     } while (0)
 
-
-
-static void FUNC(transform_8x8_add)(uint8_t *_dst, int16_t *coeffs,
-                                    ptrdiff_t stride)
-{
-    int i;
-    pixel *dst   = (pixel *)_dst;
-    int shift    = 7;
-    int add      = 1 << (shift - 1);
-    int16_t *src = coeffs;
-
-    stride /= sizeof(pixel);
-
-    for (i = 0; i < 8; i++) {
-        TR_8(src, src, 8, 8, SCALE);
-        src++;
-    }
-
-    shift = 20 - BIT_DEPTH;
-    add   = 1 << (shift - 1);
-    for (i = 0; i < 8; i++) {
-        TR_8(dst, coeffs, 1, 1, ADD_AND_SCALE);
-        coeffs += 8;
-        dst    += stride;
-    }
+#define IDCT_VAR4(H)                                                          \
+    int      limit2   = FFMIN(col_limit + 4, H)
+#define IDCT_VAR8(H)                                                          \
+        int      limit   = FFMIN(col_limit, H);                               \
+        int      limit2   = FFMIN(col_limit + 4, H)
+#define IDCT_VAR16(H)   IDCT_VAR8(H)
+#define IDCT_VAR32(H)   IDCT_VAR8(H)
+
+#define IDCT(H)                                                              \
+static void FUNC(idct_##H ##x ##H )(                                         \
+                   int16_t *coeffs, int col_limit) {                         \
+    int i;                                                                   \
+    int      shift   = 7;                                                    \
+    int      add     = 1 << (shift - 1);                                     \
+    int16_t *src     = coeffs;                                               \
+    IDCT_VAR ##H(H);                                                         \
+                                                                             \
+    for (i = 0; i < H; i++) {                                                \
+        TR_ ## H(src, src, H, H, SCALE, limit2);                             \
+        if (limit2 < H && i%4 == 0 && !!i)                                   \
+            limit2 -= 4;                                                     \
+        src++;                                                               \
+    }                                                                        \
+                                                                             \
+    shift   = 20 - BIT_DEPTH;                                                \
+    add     = 1 << (shift - 1);                                              \
+    for (i = 0; i < H; i++) {                                                \
+        TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit);                        \
+        coeffs += H;                                                         \
+    }                                                                        \
 }
 
-static void FUNC(transform_16x16_add)(uint8_t *_dst, int16_t *coeffs,
-                                      ptrdiff_t stride)
-{
-    int i;
-    pixel *dst   = (pixel *)_dst;
-    int shift    = 7;
-    int add      = 1 << (shift - 1);
-    int16_t *src = coeffs;
-
-    stride /= sizeof(pixel);
-
-    for (i = 0; i < 16; i++) {
-        TR_16(src, src, 16, 16, SCALE);
-        src++;
-    }
-
-    shift = 20 - BIT_DEPTH;
-    add   = 1 << (shift - 1);
-    for (i = 0; i < 16; i++) {
-        TR_16(dst, coeffs, 1, 1, ADD_AND_SCALE);
-        coeffs += 16;
-        dst    += stride;
-    }
+#define IDCT_DC(H)                                                           \
+static void FUNC(idct_##H ##x ##H ##_dc)(                                    \
+                   int16_t *coeffs) {                                        \
+    int i, j;                                                                \
+    int      shift   = 14 - BIT_DEPTH;                                       \
+    int      add     = 1 << (shift - 1);                                     \
+    int      coeff   = (((coeffs[0] + 1) >> 1) + add) >> shift;              \
+                                                                             \
+    for (j = 0; j < H; j++) {                                                \
+        for (i = 0; i < H; i++) {                                            \
+            coeffs[i+j*H] = coeff;                                           \
+        }                                                                    \
+    }                                                                        \
 }
 
-static void FUNC(transform_32x32_add)(uint8_t *_dst, int16_t *coeffs,
-                                      ptrdiff_t stride)
-{
-    int i;
-    pixel *dst   = (pixel *)_dst;
-    int shift    = 7;
-    int add      = 1 << (shift - 1);
-    int16_t *src = coeffs;
+IDCT( 4)
+IDCT( 8)
+IDCT(16)
+IDCT(32)
 
-    stride /= sizeof(pixel);
+IDCT_DC( 4)
+IDCT_DC( 8)
+IDCT_DC(16)
+IDCT_DC(32)
 
-    for (i = 0; i < 32; i++) {
-        TR_32(src, src, 32, 32, SCALE);
-        src++;
-    }
-    src   = coeffs;
-    shift = 20 - BIT_DEPTH;
-    add   = 1 << (shift - 1);
-    for (i = 0; i < 32; i++) {
-        TR_32(dst, coeffs, 1, 1, ADD_AND_SCALE);
-        coeffs += 32;
-        dst    += stride;
-    }
-}
+#undef TR_4
+#undef TR_8
+#undef TR_16
+#undef TR_32
 
-static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
-                                  ptrdiff_t stride, SAOParams *sao,
+#undef SET
+#undef SCALE
+#undef ADD_AND_SCALE
+
+static void FUNC(sao_band_filter_0)(uint8_t *_dst, uint8_t *_src,
+                                  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
                                   int *borders, int width, int height,
-                                  int c_idx, int class)
+                                  int c_idx)
 {
     pixel *dst = (pixel *)_dst;
     pixel *src = (pixel *)_src;
     int offset_table[32] = { 0 };
     int k, y, x;
-    int chroma = !!c_idx;
     int shift  = BIT_DEPTH - 5;
-    int *sao_offset_val = sao->offset_val[c_idx];
+    int16_t *sao_offset_val = sao->offset_val[c_idx];
     int sao_left_class  = sao->band_position[c_idx];
-    int init_y = 0, init_x = 0;
 
-    stride /= sizeof(pixel);
-
-    switch (class) {
-    case 0:
-        if (!borders[2])
-            width -= (8 >> chroma) + 2;
-        if (!borders[3])
-            height -= (4 >> chroma) + 2;
-        break;
-    case 1:
-        init_y = -(4 >> chroma) - 2;
-        if (!borders[2])
-            width -= (8 >> chroma) + 2;
-        height = (4 >> chroma) + 2;
-        break;
-    case 2:
-        init_x = -(8 >> chroma) - 2;
-        width  =  (8 >> chroma) + 2;
-        if (!borders[3])
-            height -= (4 >> chroma) + 2;
-        break;
-    case 3:
-        init_y = -(4 >> chroma) - 2;
-        init_x = -(8 >> chroma) - 2;
-        width  =  (8 >> chroma) + 2;
-        height =  (4 >> chroma) + 2;
-        break;
-    }
+    stride_dst /= sizeof(pixel);
+    stride_src /= sizeof(pixel);
 
-    dst = dst + (init_y * stride + init_x);
-    src = src + (init_y * stride + init_x);
     for (k = 0; k < 4; k++)
         offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
-        dst += stride;
-        src += stride;
+        dst += stride_dst;
+        src += stride_src;
     }
 }
 
-static void FUNC(sao_band_filter_0)(uint8_t *dst, uint8_t *src,
-                                    ptrdiff_t stride, SAOParams *sao,
-                                    int *borders, int width, int height,
-                                    int c_idx)
-{
-    FUNC(sao_band_filter)(dst, src, stride, sao, borders,
-                          width, height, c_idx, 0);
-}
+#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
 
-static void FUNC(sao_band_filter_1)(uint8_t *dst, uint8_t *src,
-                                    ptrdiff_t stride, SAOParams *sao,
-                                    int *borders, int width, int height,
-                                    int c_idx)
-{
-    FUNC(sao_band_filter)(dst, src, stride, sao, borders,
-                          width, height, c_idx, 1);
-}
+static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src,
+                                  ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
+                                  int width, int height,
+                                  int c_idx, int init_x, int init_y) {
 
-static void FUNC(sao_band_filter_2)(uint8_t *dst, uint8_t *src,
-                                    ptrdiff_t stride, SAOParams *sao,
-                                    int *borders, int width, int height,
-                                    int c_idx)
-{
-    FUNC(sao_band_filter)(dst, src, stride, sao, borders,
-                          width, height, c_idx, 2);
-}
+    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
+    static const int8_t pos[4][2][2] = {
+        { { -1,  0 }, {  1, 0 } }, // horizontal
+        { {  0, -1 }, {  0, 1 } }, // vertical
+        { { -1, -1 }, {  1, 1 } }, // 45 degree
+        { {  1, -1 }, { -1, 1 } }, // 135 degree
+    };
+    int16_t *sao_offset_val = sao->offset_val[c_idx];
+    int sao_eo_class    = sao->eo_class[c_idx];
+    pixel *dst = (pixel *)_dst;
+    pixel *src = (pixel *)_src;
 
-static void FUNC(sao_band_filter_3)(uint8_t *_dst, uint8_t *_src,
-                                    ptrdiff_t stride, SAOParams *sao,
-                                    int *borders, int width, int height,
-                                    int c_idx)
-{
-    FUNC(sao_band_filter)(_dst, _src, stride, sao, borders,
-                          width, height, c_idx, 3);
+    int y_stride_src = init_y * stride_src;
+    int y_stride_dst = init_y * stride_dst;
+    int pos_0_0  = pos[sao_eo_class][0][0];
+    int pos_0_1  = pos[sao_eo_class][0][1];
+    int pos_1_0  = pos[sao_eo_class][1][0];
+    int pos_1_1  = pos[sao_eo_class][1][1];
+    int x, y;
+
+    int y_stride_0_1 = (init_y + pos_0_1) * stride_src;
+    int y_stride_1_1 = (init_y + pos_1_1) * stride_src;
+    for (y = init_y; y < height; y++) {
+        for (x = init_x; x < width; x++) {
+            int diff0             = CMP(src[x + y_stride_src], src[x + pos_0_0 + y_stride_0_1]);
+            int diff1             = CMP(src[x + y_stride_src], src[x + pos_1_0 + y_stride_1_1]);
+            int offset_val        = edge_idx[2 + diff0 + diff1];
+            dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + sao_offset_val[offset_val]);
+        }
+        y_stride_src += stride_src;
+        y_stride_dst += stride_dst;
+        y_stride_0_1 += stride_src;
+        y_stride_1_1 += stride_src;
+    }
 }
 
 static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
-                                    ptrdiff_t stride, SAOParams *sao,
+                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
                                     int *borders, int _width, int _height,
-                                    int c_idx, uint8_t vert_edge,
-                                    uint8_t horiz_edge, uint8_t diag_edge)
+                                    int c_idx, uint8_t *vert_edge,
+                                    uint8_t *horiz_edge, uint8_t *diag_edge)
 {
     int x, y;
     pixel *dst = (pixel *)_dst;
     pixel *src = (pixel *)_src;
-    int chroma = !!c_idx;
-    int *sao_offset_val = sao->offset_val[c_idx];
+    int16_t *sao_offset_val = sao->offset_val[c_idx];
     int sao_eo_class    = sao->eo_class[c_idx];
     int init_x = 0, init_y = 0, width = _width, height = _height;
 
-    static const int8_t pos[4][2][2] = {
-        { { -1,  0 }, {  1, 0 } }, // horizontal
-        { {  0, -1 }, {  0, 1 } }, // vertical
-        { { -1, -1 }, {  1, 1 } }, // 45 degree
-        { {  1, -1 }, { -1, 1 } }, // 135 degree
-    };
-    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
-
-#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
-
-    stride /= sizeof(pixel);
-
-    if (!borders[2])
-        width -= (8 >> chroma) + 2;
-    if (!borders[3])
-        height -= (4 >> chroma) + 2;
+    stride_dst /= sizeof(pixel);
+    stride_src /= sizeof(pixel);
 
-    dst = dst + (init_y * stride + init_x);
-    src = src + (init_y * stride + init_x);
-    init_y = init_x = 0;
     if (sao_eo_class != SAO_EO_VERT) {
         if (borders[0]) {
             int offset_val = sao_offset_val[0];
-            int y_stride   = 0;
             for (y = 0; y < height; y++) {
-                dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
-                y_stride     += stride;
+                dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
             }
             init_x = 1;
         }
         if (borders[2]) {
             int offset_val = sao_offset_val[0];
-            int x_stride   = width - 1;
+            int offset     = width - 1;
             for (x = 0; x < height; x++) {
-                dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
-                x_stride     += stride;
+                dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
             }
             width--;
         }
@@ -497,478 +438,694 @@ static void FUNC(sao_edge_filter_0)(uint8_t *_dst, uint8_t *_src,
             init_y = 1;
         }
         if (borders[3]) {
-            int offset_val = sao_offset_val[0];
-            int y_stride   = stride * (height - 1);
+            int offset_val   = sao_offset_val[0];
+            int y_stride_dst = stride_dst * (height - 1);
+            int y_stride_src = stride_src * (height - 1);
             for (x = init_x; x < width; x++)
-                dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
+                dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
             height--;
         }
     }
-    {
-        int y_stride = init_y * stride;
-        int pos_0_0  = pos[sao_eo_class][0][0];
-        int pos_0_1  = pos[sao_eo_class][0][1];
-        int pos_1_0  = pos[sao_eo_class][1][0];
-        int pos_1_1  = pos[sao_eo_class][1][1];
-
-        int y_stride_0_1 = (init_y + pos_0_1) * stride;
-        int y_stride_1_1 = (init_y + pos_1_1) * stride;
-        for (y = init_y; y < height; y++) {
-            for (x = init_x; x < width; x++) {
-                int diff0         = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
-                int diff1         = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
-                int offset_val    = edge_idx[2 + diff0 + diff1];
-                dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
-            }
-            y_stride     += stride;
-            y_stride_0_1 += stride;
-            y_stride_1_1 += stride;
-        }
-    }
-
-    {
-        // Restore pixels that can't be modified
-        int save_upper_left = !diag_edge && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
-        if (vert_edge && sao_eo_class != SAO_EO_VERT)
-            for (y = init_y+save_upper_left; y< height; y++)
-                dst[y*stride] = src[y*stride];
-        if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
-            for(x = init_x+save_upper_left; x<width; x++)
-                dst[x] = src[x];
-        if(diag_edge && sao_eo_class == SAO_EO_135D)
-            dst[0] = src[0];
-    }
 
-#undef CMP
+    FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
 }
 
 static void FUNC(sao_edge_filter_1)(uint8_t *_dst, uint8_t *_src,
-                                    ptrdiff_t stride, SAOParams *sao,
+                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
                                     int *borders, int _width, int _height,
-                                    int c_idx, uint8_t vert_edge,
-                                    uint8_t horiz_edge, uint8_t diag_edge)
+                                    int c_idx, uint8_t *vert_edge,
+                                    uint8_t *horiz_edge, uint8_t *diag_edge)
 {
     int x, y;
     pixel *dst = (pixel *)_dst;
     pixel *src = (pixel *)_src;
-    int chroma = !!c_idx;
-    int *sao_offset_val = sao->offset_val[c_idx];
+    int16_t *sao_offset_val = sao->offset_val[c_idx];
     int sao_eo_class    = sao->eo_class[c_idx];
     int init_x = 0, init_y = 0, width = _width, height = _height;
 
-    static const int8_t pos[4][2][2] = {
-        { { -1, 0  }, { 1,  0 } }, // horizontal
-        { { 0,  -1 }, { 0,  1 } }, // vertical
-        { { -1, -1 }, { 1,  1 } }, // 45 degree
-        { { 1,  -1 }, { -1, 1 } }, // 135 degree
-    };
-    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
-
-#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
-
-    stride /= sizeof(pixel);
-
-    init_y = -(4 >> chroma) - 2;
-    if (!borders[2])
-        width -= (8 >> chroma) + 2;
-    height = (4 >> chroma) + 2;
+    stride_dst /= sizeof(pixel);
+    stride_src /= sizeof(pixel);
 
-    dst = dst + (init_y * stride + init_x);
-    src = src + (init_y * stride + init_x);
-    init_y = init_x = 0;
     if (sao_eo_class != SAO_EO_VERT) {
         if (borders[0]) {
             int offset_val = sao_offset_val[0];
-            int y_stride   = 0;
             for (y = 0; y < height; y++) {
-                dst[y_stride] = av_clip_pixel(src[y_stride] + offset_val);
-                y_stride     += stride;
+                dst[y * stride_dst] = av_clip_pixel(src[y * stride_src] + offset_val);
             }
             init_x = 1;
         }
         if (borders[2]) {
             int offset_val = sao_offset_val[0];
-            int x_stride   = width - 1;
+            int offset     = width - 1;
             for (x = 0; x < height; x++) {
-                dst[x_stride] = av_clip_pixel(src[x_stride] + offset_val);
-                x_stride     += stride;
+                dst[x * stride_dst + offset] = av_clip_pixel(src[x * stride_src + offset] + offset_val);
             }
             width--;
         }
     }
-    {
-        int y_stride = init_y * stride;
-        int pos_0_0  = pos[sao_eo_class][0][0];
-        int pos_0_1  = pos[sao_eo_class][0][1];
-        int pos_1_0  = pos[sao_eo_class][1][0];
-        int pos_1_1  = pos[sao_eo_class][1][1];
-
-        int y_stride_0_1 = (init_y + pos_0_1) * stride;
-        int y_stride_1_1 = (init_y + pos_1_1) * stride;
-        for (y = init_y; y < height; y++) {
-            for (x = init_x; x < width; x++) {
-                int diff0         = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
-                int diff1         = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
-                int offset_val    = edge_idx[2 + diff0 + diff1];
-                dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
-            }
-            y_stride     += stride;
-            y_stride_0_1 += stride;
-            y_stride_1_1 += stride;
+    if (sao_eo_class != SAO_EO_HORIZ) {
+        if (borders[1]) {
+            int offset_val = sao_offset_val[0];
+            for (x = init_x; x < width; x++)
+                dst[x] = av_clip_pixel(src[x] + offset_val);
+            init_y = 1;
+        }
+        if (borders[3]) {
+            int offset_val   = sao_offset_val[0];
+            int y_stride_dst = stride_dst * (height - 1);
+            int y_stride_src = stride_src * (height - 1);
+            for (x = init_x; x < width; x++)
+                dst[x + y_stride_dst] = av_clip_pixel(src[x + y_stride_src] + offset_val);
+            height--;
         }
     }
 
+    FUNC(sao_edge_filter)((uint8_t *)dst, (uint8_t *)src, stride_dst, stride_src, sao, width, height, c_idx, init_x, init_y);
+
     {
+        int save_upper_left  = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
+        int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D  && !borders[1] && !borders[2];
+        int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
+        int save_lower_left  = !diag_edge[3] && sao_eo_class == SAO_EO_45D  && !borders[0] && !borders[3];
+
         // Restore pixels that can't be modified
-        int save_lower_left = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[0];
-        if(vert_edge && sao_eo_class != SAO_EO_VERT)
-            for(y = init_y; y< height-save_lower_left; y++)
-                dst[y*stride] = src[y*stride];
-        if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
-            for(x = init_x+save_lower_left; x<width; x++)
-                dst[(height-1)*stride+x] = src[(height-1)*stride+x];
-        if(diag_edge && sao_eo_class == SAO_EO_45D)
-            dst[stride*(height-1)] = src[stride*(height-1)];
+        if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
+            for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
+                dst[y*stride_dst] = src[y*stride_src];
+        }
+        if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
+            for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
+                dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
+        }
+
+        if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
+            for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
+                dst[x] = src[x];
+        }
+        if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
+            for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
+                dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
+        }
+        if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
+            dst[0] = src[0];
+        if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
+            dst[width-1] = src[width-1];
+        if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
+            dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
+        if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
+            dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
+
     }
+}
 
 #undef CMP
+
+////////////////////////////////////////////////////////////////////////////////
+//
+////////////////////////////////////////////////////////////////////////////////
+static void FUNC(put_hevc_pel_pixels)(int16_t *dst, ptrdiff_t dststride,
+                                      uint8_t *_src, ptrdiff_t _srcstride,
+                                      int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src          = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = src[x] << (14 - BIT_DEPTH);
+        src += srcstride;
+        dst += dststride;
+    }
 }
 
-static void FUNC(sao_edge_filter_2)(uint8_t *_dst, uint8_t *_src,
-                                    ptrdiff_t stride, SAOParams *sao,
-                                    int *borders, int _width, int _height,
-                                    int c_idx, uint8_t vert_edge,
-                                    uint8_t horiz_edge, uint8_t diag_edge)
+static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                          int height, intptr_t mx, intptr_t my, int width)
+{
+    int y;
+    pixel *src          = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    for (y = 0; y < height; y++) {
+        memcpy(dst, src, width * sizeof(pixel));
+        src += srcstride;
+        dst += dststride;
+    }
+}
+
+static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                         int16_t *src2, ptrdiff_t src2stride,
+                                         int height, intptr_t mx, intptr_t my, int width)
 {
     int x, y;
-    pixel *dst = (pixel *)_dst;
-    pixel *src = (pixel *)_src;
-    int chroma = !!c_idx;
-    int *sao_offset_val = sao->offset_val[c_idx];
-    int sao_eo_class    = sao->eo_class[c_idx];
-    int init_x = 0, init_y = 0, width = _width, height = _height;
+    pixel *src          = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
 
-    static const int8_t pos[4][2][2] = {
-        { { -1,  0 }, {  1, 0 } }, // horizontal
-        { {  0, -1 }, {  0, 1 } }, // vertical
-        { { -1, -1 }, {  1, 1 } }, // 45 degree
-        { {  1, -1 }, { -1, 1 } }, // 135 degree
-    };
-    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
+    int shift = 14  + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
 
-#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
+        src  += srcstride;
+        dst  += dststride;
+        src2 += src2stride;
+    }
+}
 
-    stride /= sizeof(pixel);
+static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                            int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src          = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int shift = denom + 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
 
-    init_x = -(8 >> chroma) - 2;
-    width  =  (8 >> chroma) + 2;
-    if (!borders[3])
-        height -= (4 >> chroma) + 2;
+    ox     = ox * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
+        src += srcstride;
+        dst += dststride;
+    }
+}
 
-    dst = dst + (init_y * stride + init_x);
-    src = src + (init_y * stride + init_x);
-    init_y = init_x = 0;
-    if (sao_eo_class != SAO_EO_HORIZ) {
-        if (borders[1]) {
-            int offset_val = sao_offset_val[0];
-            for (x = init_x; x < width; x++)
-                dst[x] = av_clip_pixel(src[x] + offset_val);
-            init_y = 1;
-        }
-        if (borders[3]) {
-            int offset_val = sao_offset_val[0];
-            int y_stride   = stride * (height - 1);
-            for (x = init_x; x < width; x++)
-                dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + offset_val);
-            height--;
+static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                           int16_t *src2, ptrdiff_t src2stride,
+                                           int height, int denom, int wx0, int wx1,
+                                           int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src          = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    int shift = 14  + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++) {
+            dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
         }
+        src  += srcstride;
+        dst  += dststride;
+        src2 += src2stride;
     }
-    {
-        int y_stride = init_y * stride;
-        int pos_0_0  = pos[sao_eo_class][0][0];
-        int pos_0_1  = pos[sao_eo_class][0][1];
-        int pos_1_0  = pos[sao_eo_class][1][0];
-        int pos_1_1  = pos[sao_eo_class][1][1];
-
-        int y_stride_0_1 = (init_y + pos_0_1) * stride;
-        int y_stride_1_1 = (init_y + pos_1_1) * stride;
-        for (y = init_y; y < height; y++) {
-            for (x = init_x; x < width; x++) {
-                int diff0         = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
-                int diff1         = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
-                int offset_val    = edge_idx[2 + diff0 + diff1];
-                dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
-            }
-            y_stride     += stride;
-            y_stride_0_1 += stride;
-            y_stride_1_1 += stride;
-        }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+////////////////////////////////////////////////////////////////////////////////
+#define QPEL_FILTER(src, stride)                                               \
+    (filter[0] * src[x - 3 * stride] +                                         \
+     filter[1] * src[x - 2 * stride] +                                         \
+     filter[2] * src[x -     stride] +                                         \
+     filter[3] * src[x             ] +                                         \
+     filter[4] * src[x +     stride] +                                         \
+     filter[5] * src[x + 2 * stride] +                                         \
+     filter[6] * src[x + 3 * stride] +                                         \
+     filter[7] * src[x + 4 * stride])
+
+static void FUNC(put_hevc_qpel_h)(int16_t *dst,  ptrdiff_t dststride,
+                                  uint8_t *_src, ptrdiff_t _srcstride,
+                                  int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        dst += dststride;
     }
+}
 
-    {
-        // Restore pixels that can't be modified
-        int save_upper_right = !diag_edge && sao_eo_class == SAO_EO_45D && !borders[1];
-        if(vert_edge && sao_eo_class != SAO_EO_VERT)
-            for(y = init_y+save_upper_right; y< height; y++)
-                dst[y*stride+width-1] = src[y*stride+width-1];
-        if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
-            for(x = init_x; x<width-save_upper_right; x++)
-                dst[x] = src[x];
-        if(diag_edge && sao_eo_class == SAO_EO_45D)
-            dst[width-1] = src[width-1];
+static void FUNC(put_hevc_qpel_v)(int16_t *dst,  ptrdiff_t dststride,
+                                  uint8_t *_src, ptrdiff_t _srcstride,
+                                  int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
+    for (y = 0; y < height; y++)  {
+        for (x = 0; x < width; x++)
+            dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        dst += dststride;
     }
-#undef CMP
 }
 
-static void FUNC(sao_edge_filter_3)(uint8_t *_dst, uint8_t *_src,
-                                    ptrdiff_t stride, SAOParams *sao,
-                                    int *borders, int _width, int _height,
-                                    int c_idx, uint8_t vert_edge,
-                                    uint8_t horiz_edge, uint8_t diag_edge)
+static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
+                                   ptrdiff_t dststride,
+                                   uint8_t *_src,
+                                   ptrdiff_t _srcstride,
+                                   int height, intptr_t mx,
+                                   intptr_t my, int width)
 {
     int x, y;
-    pixel *dst = (pixel *)_dst;
-    pixel *src = (pixel *)_src;
-    int chroma = !!c_idx;
-    int *sao_offset_val = sao->offset_val[c_idx];
-    int sao_eo_class    = sao->eo_class[c_idx];
-    int init_x = 0, init_y = 0, width = _width, height = _height;
+    const int8_t *filter;
+    pixel *src = (pixel*)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
 
-    static const int8_t pos[4][2][2] = {
-        { { -1,  0 }, {  1, 0 } }, // horizontal
-        { {  0, -1 }, {  0, 1 } }, // vertical
-        { { -1, -1 }, {  1, 1 } }, // 45 degree
-        { {  1, -1 }, { -1, 1 } }, // 135 degree
-    };
-    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
+    src   -= QPEL_EXTRA_BEFORE * srcstride;
+    filter = ff_hevc_qpel_filters[mx - 1];
+    for (y = 0; y < height + QPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
 
-#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1))
+    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_qpel_filters[my - 1];
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
+        tmp += MAX_PB_SIZE;
+        dst += dststride;
+    }
+}
 
-    stride /= sizeof(pixel);
+static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst,  ptrdiff_t _dststride,
+                                      uint8_t *_src, ptrdiff_t _srcstride,
+                                      int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
+    int shift = 14 - BIT_DEPTH;
+
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
+        src += srcstride;
+        dst += dststride;
+    }
+}
 
-    init_y = -(4 >> chroma) - 2;
-    init_x = -(8 >> chroma) - 2;
-    width  =  (8 >> chroma) + 2;
-    height =  (4 >> chroma) + 2;
+static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                     int16_t *src2, ptrdiff_t src2stride,
+                                     int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
 
+    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
 
-    dst    = dst + (init_y * stride + init_x);
-    src    = src + (init_y * stride + init_x);
-    init_y = init_x = 0;
+    int shift = 14  + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
 
-    {
-        int y_stride = init_y * stride;
-        int pos_0_0  = pos[sao_eo_class][0][0];
-        int pos_0_1  = pos[sao_eo_class][0][1];
-        int pos_1_0  = pos[sao_eo_class][1][0];
-        int pos_1_1  = pos[sao_eo_class][1][1];
-
-        int y_stride_0_1 = (init_y + pos_0_1) * stride;
-        int y_stride_1_1 = (init_y + pos_1_1) * stride;
-
-        for (y = init_y; y < height; y++) {
-            for (x = init_x; x < width; x++) {
-                int diff0         = CMP(src[x + y_stride], src[x + pos_0_0 + y_stride_0_1]);
-                int diff1         = CMP(src[x + y_stride], src[x + pos_1_0 + y_stride_1_1]);
-                int offset_val    = edge_idx[2 + diff0 + diff1];
-                dst[x + y_stride] = av_clip_pixel(src[x + y_stride] + sao_offset_val[offset_val]);
-            }
-            y_stride     += stride;
-            y_stride_0_1 += stride;
-            y_stride_1_1 += stride;
-        }
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
+        src  += srcstride;
+        dst  += dststride;
+        src2 += src2stride;
     }
+}
 
-    {
-        // Restore pixels that can't be modified
-        int save_lower_right = !diag_edge && sao_eo_class == SAO_EO_135D;
-        if(vert_edge && sao_eo_class != SAO_EO_VERT)
-            for(y = init_y; y< height-save_lower_right; y++)
-                dst[y*stride+width-1] = src[y*stride+width-1];
-        if(horiz_edge && sao_eo_class != SAO_EO_HORIZ)
-            for(x = init_x; x<width-save_lower_right; x++)
-                dst[(height-1)*stride+x] = src[(height-1)*stride+x];
-        if(diag_edge && sao_eo_class == SAO_EO_135D)
-            dst[stride*(height-1)+width-1] = src[stride*(height-1)+width-1];
+static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst,  ptrdiff_t _dststride,
+                                     uint8_t *_src, ptrdiff_t _srcstride,
+                                     int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
+    int shift = 14 - BIT_DEPTH;
+
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
+        src += srcstride;
+        dst += dststride;
     }
-#undef CMP
 }
 
-#undef SET
-#undef SCALE
-#undef ADD_AND_SCALE
-#undef TR_4
-#undef TR_8
-#undef TR_16
-#undef TR_32
 
-static void FUNC(put_hevc_qpel_pixels)(int16_t *dst, ptrdiff_t dststride,
+static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                     int16_t *src2, ptrdiff_t src2stride,
+                                     int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
+
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
+        src  += srcstride;
+        dst  += dststride;
+        src2 += src2stride;
+    }
+}
+
+static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst,  ptrdiff_t _dststride,
                                        uint8_t *_src, ptrdiff_t _srcstride,
-                                       int width, int height, int16_t* mcbuffer)
+                                       int height, intptr_t mx, intptr_t my, int width)
 {
     int x, y;
-    pixel *src          = (pixel *)_src;
+    const int8_t *filter;
+    pixel *src = (pixel*)_src;
     ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift =  14 - BIT_DEPTH;
+
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    src   -= QPEL_EXTRA_BEFORE * srcstride;
+    filter = ff_hevc_qpel_filters[mx - 1];
+    for (y = 0; y < height + QPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_qpel_filters[my - 1];
 
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
-            dst[x] = src[x] << (14 - BIT_DEPTH);
+            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
+        tmp += MAX_PB_SIZE;
+        dst += dststride;
+    }
+}
+
+static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                      int16_t *src2, ptrdiff_t src2stride,
+                                      int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const int8_t *filter;
+    pixel *src = (pixel*)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    src   -= QPEL_EXTRA_BEFORE * srcstride;
+    filter = ff_hevc_qpel_filters[mx - 1];
+    for (y = 0; y < height + QPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_qpel_filters[my - 1];
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
+        tmp  += MAX_PB_SIZE;
+        dst  += dststride;
+        src2 += src2stride;
+    }
+}
+
+static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst,  ptrdiff_t _dststride,
+                                        uint8_t *_src, ptrdiff_t _srcstride,
+                                        int height, int denom, int wx, int ox,
+                                        intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
+    int shift = denom + 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    ox = ox * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
         src += srcstride;
         dst += dststride;
     }
 }
 
-#define QPEL_FILTER_1(src, stride)      \
-    (1 * -src[x - 3 * stride] +         \
-     4 *  src[x - 2 * stride] -         \
-    10 *  src[x -     stride] +         \
-    58 *  src[x]              +         \
-    17 *  src[x +     stride] -         \
-     5 *  src[x + 2 * stride] +         \
-     1 *  src[x + 3 * stride])
-
-#define QPEL_FILTER_2(src, stride)      \
-    (1  * -src[x - 3 * stride] +        \
-     4  *  src[x - 2 * stride] -        \
-    11  *  src[x -     stride] +        \
-    40  *  src[x]              +        \
-    40  *  src[x +     stride] -        \
-    11  *  src[x + 2 * stride] +        \
-     4  *  src[x + 3 * stride] -        \
-     1  *  src[x + 4 * stride])
-
-#define QPEL_FILTER_3(src, stride)      \
-    (1  * src[x - 2 * stride] -         \
-     5  * src[x -     stride] +         \
-    17  * src[x]              +         \
-    58  * src[x + stride]     -         \
-    10  * src[x + 2 * stride] +         \
-     4  * src[x + 3 * stride] -         \
-     1  * src[x + 4 * stride])
-
-
-#define PUT_HEVC_QPEL_H(H)                                                     \
-static void FUNC(put_hevc_qpel_h ## H)(int16_t *dst,  ptrdiff_t dststride,     \
-                                       uint8_t *_src, ptrdiff_t _srcstride,    \
-                                       int width, int height,                  \
-                                       int16_t* mcbuffer)                      \
-{                                                                              \
-    int x, y;                                                                  \
-    pixel *src = (pixel*)_src;                                                 \
-    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                          \
-                                                                               \
-    for (y = 0; y < height; y++) {                                             \
-        for (x = 0; x < width; x++)                                            \
-            dst[x] = QPEL_FILTER_ ## H(src, 1) >> (BIT_DEPTH - 8);             \
-        src += srcstride;                                                      \
-        dst += dststride;                                                      \
-    }                                                                          \
+static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                       int16_t *src2, ptrdiff_t src2stride,
+                                       int height, int denom, int wx0, int wx1,
+                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    const int8_t *filter    = ff_hevc_qpel_filters[mx - 1];
+
+    int shift = 14  + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
+        src  += srcstride;
+        dst  += dststride;
+        src2 += src2stride;
+    }
 }
 
-#define PUT_HEVC_QPEL_V(V)                                                     \
-static void FUNC(put_hevc_qpel_v ## V)(int16_t *dst,  ptrdiff_t dststride,     \
-                                       uint8_t *_src, ptrdiff_t _srcstride,    \
-                                       int width, int height,                  \
-                                       int16_t* mcbuffer)                      \
-{                                                                              \
-    int x, y;                                                                  \
-    pixel *src = (pixel*)_src;                                                 \
-    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                          \
-                                                                               \
-    for (y = 0; y < height; y++)  {                                            \
-        for (x = 0; x < width; x++)                                            \
-            dst[x] = QPEL_FILTER_ ## V(src, srcstride) >> (BIT_DEPTH - 8);     \
-        src += srcstride;                                                      \
-        dst += dststride;                                                      \
-    }                                                                          \
+static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst,  ptrdiff_t _dststride,
+                                        uint8_t *_src, ptrdiff_t _srcstride,
+                                        int height, int denom, int wx, int ox,
+                                        intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
+    int shift = denom + 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    ox = ox * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
+        src += srcstride;
+        dst += dststride;
+    }
 }
 
-#define PUT_HEVC_QPEL_HV(H, V)                                                 \
-static void FUNC(put_hevc_qpel_h ## H ## v ## V)(int16_t *dst,                 \
-                                                 ptrdiff_t dststride,          \
-                                                 uint8_t *_src,                \
-                                                 ptrdiff_t _srcstride,         \
-                                                 int width, int height,        \
-                                                 int16_t* mcbuffer)            \
-{                                                                              \
-    int x, y;                                                                  \
-    pixel *src = (pixel*)_src;                                                 \
-    ptrdiff_t srcstride = _srcstride / sizeof(pixel);                          \
-                                                                               \
-    int16_t tmp_array[(MAX_PB_SIZE + 7) * MAX_PB_SIZE];                        \
-    int16_t *tmp = tmp_array;                                                  \
-                                                                               \
-    src -= ff_hevc_qpel_extra_before[V] * srcstride;                           \
-                                                                               \
-    for (y = 0; y < height + ff_hevc_qpel_extra[V]; y++) {                     \
-        for (x = 0; x < width; x++)                                            \
-            tmp[x] = QPEL_FILTER_ ## H(src, 1) >> (BIT_DEPTH - 8);             \
-        src += srcstride;                                                      \
-        tmp += MAX_PB_SIZE;                                                    \
-    }                                                                          \
-                                                                               \
-    tmp = tmp_array + ff_hevc_qpel_extra_before[V] * MAX_PB_SIZE;              \
-                                                                               \
-    for (y = 0; y < height; y++) {                                             \
-        for (x = 0; x < width; x++)                                            \
-            dst[x] = QPEL_FILTER_ ## V(tmp, MAX_PB_SIZE) >> 6;                 \
-        tmp += MAX_PB_SIZE;                                                    \
-        dst += dststride;                                                      \
-    }                                                                          \
+static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                       int16_t *src2, ptrdiff_t src2stride,
+                                       int height, int denom, int wx0, int wx1,
+                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel        *src       = (pixel*)_src;
+    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+
+    const int8_t *filter    = ff_hevc_qpel_filters[my - 1];
+
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
+        src  += srcstride;
+        dst  += dststride;
+        src2 += src2stride;
+    }
 }
 
-PUT_HEVC_QPEL_H(1)
-PUT_HEVC_QPEL_H(2)
-PUT_HEVC_QPEL_H(3)
-PUT_HEVC_QPEL_V(1)
-PUT_HEVC_QPEL_V(2)
-PUT_HEVC_QPEL_V(3)
-PUT_HEVC_QPEL_HV(1, 1)
-PUT_HEVC_QPEL_HV(1, 2)
-PUT_HEVC_QPEL_HV(1, 3)
-PUT_HEVC_QPEL_HV(2, 1)
-PUT_HEVC_QPEL_HV(2, 2)
-PUT_HEVC_QPEL_HV(2, 3)
-PUT_HEVC_QPEL_HV(3, 1)
-PUT_HEVC_QPEL_HV(3, 2)
-PUT_HEVC_QPEL_HV(3, 3)
-
-static void FUNC(put_hevc_epel_pixels)(int16_t *dst, ptrdiff_t dststride,
-                                       uint8_t *_src, ptrdiff_t _srcstride,
-                                       int width, int height, int mx, int my,
-                                       int16_t* mcbuffer)
+static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst,  ptrdiff_t _dststride,
+                                         uint8_t *_src, ptrdiff_t _srcstride,
+                                         int height, int denom, int wx, int ox,
+                                         intptr_t mx, intptr_t my, int width)
 {
     int x, y;
-    pixel *src          = (pixel *)_src;
+    const int8_t *filter;
+    pixel *src = (pixel*)_src;
     ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = denom + 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
 
-    for (y = 0; y < height; y++) {
+    src   -= QPEL_EXTRA_BEFORE * srcstride;
+    filter = ff_hevc_qpel_filters[mx - 1];
+    for (y = 0; y < height + QPEL_EXTRA; y++) {
         for (x = 0; x < width; x++)
-            dst[x] = src[x] << (14 - BIT_DEPTH);
+            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
         src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_qpel_filters[my - 1];
+
+    ox = ox * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
+        tmp += MAX_PB_SIZE;
         dst += dststride;
     }
 }
 
-#define EPEL_FILTER(src, stride)                \
-    (filter_0 * src[x - stride] +               \
-     filter_1 * src[x]          +               \
-     filter_2 * src[x + stride] +               \
-     filter_3 * src[x + 2 * stride])
+static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                        int16_t *src2, ptrdiff_t src2stride,
+                                        int height, int denom, int wx0, int wx1,
+                                        int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    const int8_t *filter;
+    pixel *src = (pixel*)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    src   -= QPEL_EXTRA_BEFORE * srcstride;
+    filter = ff_hevc_qpel_filters[mx - 1];
+    for (y = 0; y < height + QPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_qpel_filters[my - 1];
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
+        tmp  += MAX_PB_SIZE;
+        dst  += dststride;
+        src2 += src2stride;
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+////////////////////////////////////////////////////////////////////////////////
+#define EPEL_FILTER(src, stride)                                               \
+    (filter[0] * src[x - stride] +                                             \
+     filter[1] * src[x]          +                                             \
+     filter[2] * src[x + stride] +                                             \
+     filter[3] * src[x + 2 * stride])
 
 static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
                                   uint8_t *_src, ptrdiff_t _srcstride,
-                                  int width, int height, int mx, int my,
-                                  int16_t* mcbuffer)
+                                  int height, intptr_t mx, intptr_t my, int width)
 {
     int x, y;
     pixel *src = (pixel *)_src;
     ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
     const int8_t *filter = ff_hevc_epel_filters[mx - 1];
-    int8_t filter_0 = filter[0];
-    int8_t filter_1 = filter[1];
-    int8_t filter_2 = filter[2];
-    int8_t filter_3 = filter[3];
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
@@ -979,17 +1136,12 @@ static void FUNC(put_hevc_epel_h)(int16_t *dst, ptrdiff_t dststride,
 
 static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
                                   uint8_t *_src, ptrdiff_t _srcstride,
-                                  int width, int height, int mx, int my,
-                                  int16_t* mcbuffer)
+                                  int height, intptr_t mx, intptr_t my, int width)
 {
     int x, y;
     pixel *src = (pixel *)_src;
     ptrdiff_t srcstride = _srcstride / sizeof(pixel);
     const int8_t *filter = ff_hevc_epel_filters[my - 1];
-    int8_t filter_0 = filter[0];
-    int8_t filter_1 = filter[1];
-    int8_t filter_2 = filter[2];
-    int8_t filter_3 = filter[3];
 
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
@@ -1001,19 +1153,13 @@ static void FUNC(put_hevc_epel_v)(int16_t *dst, ptrdiff_t dststride,
 
 static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
                                    uint8_t *_src, ptrdiff_t _srcstride,
-                                   int width, int height, int mx, int my,
-                                   int16_t* mcbuffer)
+                                   int height, intptr_t mx, intptr_t my, int width)
 {
     int x, y;
     pixel *src = (pixel *)_src;
     ptrdiff_t srcstride = _srcstride / sizeof(pixel);
-    const int8_t *filter_h = ff_hevc_epel_filters[mx - 1];
-    const int8_t *filter_v = ff_hevc_epel_filters[my - 1];
-    int8_t filter_0 = filter_h[0];
-    int8_t filter_1 = filter_h[1];
-    int8_t filter_2 = filter_h[2];
-    int8_t filter_3 = filter_h[3];
-    int16_t tmp_array[(MAX_PB_SIZE + 3) * MAX_PB_SIZE];
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
     int16_t *tmp = tmp_array;
 
     src -= EPEL_EXTRA_BEFORE * srcstride;
@@ -1026,10 +1172,8 @@ static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
     }
 
     tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
-    filter_0 = filter_v[0];
-    filter_1 = filter_v[1];
-    filter_2 = filter_v[2];
-    filter_3 = filter_v[3];
+    filter = ff_hevc_epel_filters[my - 1];
+
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
@@ -1038,37 +1182,91 @@ static void FUNC(put_hevc_epel_hv)(int16_t *dst, ptrdiff_t dststride,
     }
 }
 
-static void FUNC(put_unweighted_pred)(uint8_t *_dst, ptrdiff_t _dststride,
-                                      int16_t *src, ptrdiff_t srcstride,
-                                      int width, int height)
+static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                      int height, intptr_t mx, intptr_t my, int width)
 {
     int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
-
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
     int shift = 14 - BIT_DEPTH;
 #if BIT_DEPTH < 14
     int offset = 1 << (shift - 1);
 #else
     int offset = 0;
 #endif
+
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
-            dst[x] = av_clip_pixel((src[x] + offset) >> shift);
-        dst += dststride;
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
         src += srcstride;
+        dst += dststride;
     }
 }
 
-static void FUNC(put_weighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
-                                        int16_t *src1, int16_t *src2,
-                                        ptrdiff_t srcstride,
-                                        int width, int height)
+static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                     int16_t *src2, ptrdiff_t src2stride,
+                                     int height, intptr_t mx, intptr_t my, int width)
 {
     int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++) {
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
+        }
+        dst  += dststride;
+        src  += srcstride;
+        src2 += src2stride;
+    }
+}
 
+static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                      int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[my - 1];
+    int shift = 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
+        src += srcstride;
+        dst += dststride;
+    }
+}
+
+static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                     int16_t *src2, ptrdiff_t src2stride,
+                                     int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[my - 1];
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
     int shift = 14 + 1 - BIT_DEPTH;
 #if BIT_DEPTH < 14
     int offset = 1 << (shift - 1);
@@ -1078,71 +1276,273 @@ static void FUNC(put_weighted_pred_avg)(uint8_t *_dst, ptrdiff_t _dststride,
 
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
-            dst[x] = av_clip_pixel((src1[x] + src2[x] + offset) >> shift);
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
         dst  += dststride;
-        src1 += srcstride;
-        src2 += srcstride;
+        src  += srcstride;
+        src2 += src2stride;
+    }
+}
+
+static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                       int height, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    src -= EPEL_EXTRA_BEFORE * srcstride;
+
+    for (y = 0; y < height + EPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_epel_filters[my - 1];
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
+        tmp += MAX_PB_SIZE;
+        dst += dststride;
     }
 }
 
-static void FUNC(weighted_pred)(uint8_t denom, int16_t wlxFlag, int16_t olxFlag,
-                                uint8_t *_dst, ptrdiff_t _dststride,
-                                int16_t *src, ptrdiff_t srcstride,
-                                int width, int height)
+static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                      int16_t *src2, ptrdiff_t src2stride,
+                                      int height, intptr_t mx, intptr_t my, int width)
 {
-    int shift, log2Wd, wx, ox, x, y, offset;
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
     pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 + 1 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    src -= EPEL_EXTRA_BEFORE * srcstride;
+
+    for (y = 0; y < height + EPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
 
-    shift  = 14 - BIT_DEPTH;
-    log2Wd = denom + shift;
-    offset = 1 << (log2Wd - 1);
-    wx     = wlxFlag;
-    ox     = olxFlag * (1 << (BIT_DEPTH - 8));
+    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_epel_filters[my - 1];
+
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
+        tmp  += MAX_PB_SIZE;
+        dst  += dststride;
+        src2 += src2stride;
+    }
+}
+
+static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                        int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    int shift = denom + 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
 
+    ox     = ox * (1 << (BIT_DEPTH - 8));
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++) {
-            if (log2Wd >= 1) {
-                dst[x] = av_clip_pixel(((src[x] * wx + offset) >> log2Wd) + ox);
-            } else {
-                dst[x] = av_clip_pixel(src[x] * wx + ox);
-            }
+            dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
         }
         dst += dststride;
         src += srcstride;
     }
 }
 
-static void FUNC(weighted_pred_avg)(uint8_t denom,
-                                    int16_t wl0Flag, int16_t wl1Flag,
-                                    int16_t ol0Flag, int16_t ol1Flag,
-                                    uint8_t *_dst, ptrdiff_t _dststride,
-                                    int16_t *src1, int16_t *src2,
-                                    ptrdiff_t srcstride,
-                                    int width, int height)
+static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                       int16_t *src2, ptrdiff_t src2stride,
+                                       int height, int denom, int wx0, int wx1,
+                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
 {
-    int shift, log2Wd, w0, w1, o0, o1, x, y;
-    pixel *dst = (pixel *)_dst;
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
+        src  += srcstride;
+        dst  += dststride;
+        src2 += src2stride;
+    }
+}
+
+static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                        int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
     ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[my - 1];
+    int shift = denom + 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    ox     = ox * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++) {
+            dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
+        }
+        dst += dststride;
+        src += srcstride;
+    }
+}
 
-    shift  = 14 - BIT_DEPTH;
-    log2Wd = denom + shift;
-    w0     = wl0Flag;
-    w1     = wl1Flag;
-    o0     = ol0Flag * (1 << (BIT_DEPTH - 8));
-    o1     = ol1Flag * (1 << (BIT_DEPTH - 8));
+static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                       int16_t *src2, ptrdiff_t src2stride,
+                                       int height, int denom, int wx0, int wx1,
+                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[my - 1];
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
 
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
-            dst[x] = av_clip_pixel((src1[x] * w0 + src2[x] * w1 +
-                                    ((o0 + o1 + 1) << log2Wd)) >> (log2Wd + 1));
+            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
+        src  += srcstride;
         dst  += dststride;
-        src1 += srcstride;
-        src2 += srcstride;
+        src2 += src2stride;
+    }
+}
+
+static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                         int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = denom + 14 - BIT_DEPTH;
+#if BIT_DEPTH < 14
+    int offset = 1 << (shift - 1);
+#else
+    int offset = 0;
+#endif
+
+    src -= EPEL_EXTRA_BEFORE * srcstride;
+
+    for (y = 0; y < height + EPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_epel_filters[my - 1];
+
+    ox     = ox * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
+        tmp += MAX_PB_SIZE;
+        dst += dststride;
     }
 }
 
-// line zero
+static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
+                                        int16_t *src2, ptrdiff_t src2stride,
+                                        int height, int denom, int wx0, int wx1,
+                                        int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+{
+    int x, y;
+    pixel *src = (pixel *)_src;
+    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
+    pixel *dst          = (pixel *)_dst;
+    ptrdiff_t dststride = _dststride / sizeof(pixel);
+    const int8_t *filter = ff_hevc_epel_filters[mx - 1];
+    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
+    int16_t *tmp = tmp_array;
+    int shift = 14 + 1 - BIT_DEPTH;
+    int log2Wd = denom + shift - 1;
+
+    src -= EPEL_EXTRA_BEFORE * srcstride;
+
+    for (y = 0; y < height + EPEL_EXTRA; y++) {
+        for (x = 0; x < width; x++)
+            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
+        src += srcstride;
+        tmp += MAX_PB_SIZE;
+    }
+
+    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
+    filter = ff_hevc_epel_filters[my - 1];
+
+    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
+    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
+                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
+        tmp  += MAX_PB_SIZE;
+        dst  += dststride;
+        src2 += src2stride;
+    }
+}// line zero
 #define P3 pix[-4 * xstride]
 #define P2 pix[-3 * xstride]
 #define P1 pix[-2 * xstride]
diff --git a/libavcodec/hevcpred.c b/libavcodec/hevcpred.c
index 1ba2487..4598229 100644
--- a/libavcodec/hevcpred.c
+++ b/libavcodec/hevcpred.c
@@ -1,27 +1,29 @@
 /*
- * HEVC video decoder
+ * HEVC video Decoder
  *
  * Copyright (C) 2012 - 2013 Guillaume Martres
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "hevc.h"
 
+#include "hevcpred.h"
+
 #define BIT_DEPTH 8
 #include "hevcpred_template.c"
 #undef BIT_DEPTH
@@ -34,6 +36,10 @@
 #include "hevcpred_template.c"
 #undef BIT_DEPTH
 
+#define BIT_DEPTH 12
+#include "hevcpred_template.c"
+#undef BIT_DEPTH
+
 void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
 {
 #undef FUNC
@@ -61,6 +67,9 @@ void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth)
     case 10:
         HEVC_PRED(10);
         break;
+    case 12:
+        HEVC_PRED(12);
+        break;
     default:
         HEVC_PRED(8);
         break;
diff --git a/libavcodec/hevcpred.h b/libavcodec/hevcpred.h
new file mode 100644
index 0000000..7f14a76
--- /dev/null
+++ b/libavcodec/hevcpred.h
@@ -0,0 +1,45 @@
+/*
+ * HEVC video Decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_HEVCPRED_H
+#define AVCODEC_HEVCPRED_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+struct HEVCContext;
+
+typedef struct HEVCPredContext {
+    void (*intra_pred[4])(struct HEVCContext *s, int x0, int y0, int c_idx);
+
+    void (*pred_planar[4])(uint8_t *src, const uint8_t *top,
+                           const uint8_t *left, ptrdiff_t stride);
+    void (*pred_dc)(uint8_t *src, const uint8_t *top, const uint8_t *left,
+                    ptrdiff_t stride, int log2_size, int c_idx);
+    void (*pred_angular[4])(uint8_t *src, const uint8_t *top,
+                            const uint8_t *left, ptrdiff_t stride,
+                            int c_idx, int mode);
+} HEVCPredContext;
+
+void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
+
+#endif /* AVCODEC_HEVCPRED_H */
diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c
index 53b9c59..8124230 100644
--- a/libavcodec/hevcpred_template.c
+++ b/libavcodec/hevcpred_template.c
@@ -3,28 +3,27 @@
  *
  * Copyright (C) 2012 - 2013 Guillaume Martres
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/pixdesc.h"
 
-#include "hevc.h"
-
 #include "bit_depth_template.c"
+#include "hevcpred.h"
 
 #define POS(x, y) src[(x) + stride * (y)]
 
@@ -38,10 +37,9 @@ static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0,
 #define MVF_PU(x, y) \
     MVF(PU(x0 + ((x) << hshift)), PU(y0 + ((y) << vshift)))
 #define IS_INTRA(x, y) \
-    MVF_PU(x, y).is_intra
+    (MVF_PU(x, y).pred_flag == PF_INTRA)
 #define MIN_TB_ADDR_ZS(x, y) \
-    s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)]
-
+    s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)]
 #define EXTEND(ptr, val, len)         \
 do {                                  \
     pixel4 pix = PIXEL_SPLAT_X4(val); \
@@ -49,36 +47,43 @@ do {                                  \
         AV_WN4P(ptr + i, pix);        \
 } while (0)
 
+#define EXTEND_RIGHT_CIP(ptr, start, length)                                   \
+        for (i = start; i < (start) + (length); i += 4)                        \
+            if (!IS_INTRA(i, -1))                                              \
+                AV_WN4P(&ptr[i], a);                                           \
+            else                                                               \
+                a = PIXEL_SPLAT_X4(ptr[i+3])
 #define EXTEND_LEFT_CIP(ptr, start, length) \
-        for (i = (start); i > (start) - (length); i--) \
+        for (i = start; i > (start) - (length); i--) \
             if (!IS_INTRA(i - 1, -1)) \
                 ptr[i - 1] = ptr[i]
-#define EXTEND_RIGHT_CIP(ptr, start, length) \
-        for (i = (start); i < (start) + (length); i++) \
-            if (!IS_INTRA(i, -1)) \
-                ptr[i] = ptr[i - 1]
-#define EXTEND_UP_CIP(ptr, start, length) \
-        for (i = (start); i > (start) - (length); i--) \
-            if (!IS_INTRA(-1, i - 1)) \
-                ptr[i - 1] = ptr[i]
-#define EXTEND_UP_CIP_0(ptr, start, length) \
-        for (i = (start); i > (start) - (length); i--) \
-            ptr[i - 1] = ptr[i]
-#define EXTEND_DOWN_CIP(ptr, start, length) \
-        for (i = (start); i < (start) + (length); i++) \
-            if (!IS_INTRA(-1, i)) \
-                ptr[i] = ptr[i - 1]
-    HEVCLocalContext *lc = &s->HEVClc;
+#define EXTEND_UP_CIP(ptr, start, length)                                      \
+        for (i = (start); i > (start) - (length); i -= 4)                      \
+            if (!IS_INTRA(-1, i - 3))                                          \
+                AV_WN4P(&ptr[i - 3], a);                                       \
+            else                                                               \
+                a = PIXEL_SPLAT_X4(ptr[i - 3])
+#define EXTEND_DOWN_CIP(ptr, start, length)                                    \
+        for (i = start; i < (start) + (length); i += 4)                        \
+            if (!IS_INTRA(-1, i))                                              \
+                AV_WN4P(&ptr[i], a);                                           \
+            else                                                               \
+                a = PIXEL_SPLAT_X4(ptr[i + 3])
+
+    HEVCLocalContext *lc = s->HEVClc;
     int i;
     int hshift = s->sps->hshift[c_idx];
     int vshift = s->sps->vshift[c_idx];
     int size = (1 << log2_size);
-    int size_in_luma = size << hshift;
-    int size_in_tbs = size_in_luma >> s->sps->log2_min_tb_size;
+    int size_in_luma_h = size << hshift;
+    int size_in_tbs_h  = size_in_luma_h >> s->sps->log2_min_tb_size;
+    int size_in_luma_v = size << vshift;
+    int size_in_tbs_v  = size_in_luma_v >> s->sps->log2_min_tb_size;
     int x = x0 >> hshift;
     int y = y0 >> vshift;
-    int x_tb = x0 >> s->sps->log2_min_tb_size;
-    int y_tb = y0 >> s->sps->log2_min_tb_size;
+    int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
+    int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask;
+
     int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);
 
     ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
@@ -86,87 +91,77 @@ do {                                  \
 
     int min_pu_width = s->sps->min_pu_width;
 
-    enum IntraPredMode mode = c_idx ? lc->pu.intra_pred_mode_c :
-                              lc->tu.cur_intra_pred_mode;
-
-    pixel left_array[2 * MAX_TB_SIZE + 1];
-    pixel filtered_left_array[2 * MAX_TB_SIZE + 1];
-    pixel top_array[2 * MAX_TB_SIZE + 1];
-    pixel filtered_top_array[2 * MAX_TB_SIZE + 1];
-
-    pixel *left          = left_array + 1;
-    pixel *top           = top_array  + 1;
-    pixel *filtered_left = filtered_left_array + 1;
-    pixel *filtered_top  = filtered_top_array  + 1;
-
-    int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb - 1, y_tb + size_in_tbs);
+    enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c :
+                              lc->tu.intra_pred_mode;
+    pixel4 a;
+    pixel  left_array[2 * MAX_TB_SIZE + 1];
+    pixel  filtered_left_array[2 * MAX_TB_SIZE + 1];
+    pixel  top_array[2 * MAX_TB_SIZE + 1];
+    pixel  filtered_top_array[2 * MAX_TB_SIZE + 1];
+
+    pixel  *left          = left_array + 1;
+    pixel  *top           = top_array  + 1;
+    pixel  *filtered_left = filtered_left_array + 1;
+    pixel  *filtered_top  = filtered_top_array  + 1;
+    int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->sps->tb_mask);
     int cand_left        = lc->na.cand_left;
     int cand_up_left     = lc->na.cand_up_left;
     int cand_up          = lc->na.cand_up;
-    int cand_up_right    = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb + size_in_tbs, y_tb - 1);
+    int cand_up_right    = lc->na.cand_up_right    && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->sps->tb_mask, y_tb - 1);
 
-    int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma, s->sps->height) -
-                            (y0 + size_in_luma)) >> vshift;
-    int top_right_size   = (FFMIN(x0 + 2 * size_in_luma, s->sps->width) -
-                            (x0 + size_in_luma)) >> hshift;
+    int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->sps->height) -
+                           (y0 + size_in_luma_v)) >> vshift;
+    int top_right_size   = (FFMIN(x0 + 2 * size_in_luma_h, s->sps->width) -
+                           (x0 + size_in_luma_h)) >> hshift;
 
     if (s->pps->constrained_intra_pred_flag == 1) {
-        int size_in_luma_pu = PU(size_in_luma);
+        int size_in_luma_pu_v = PU(size_in_luma_v);
+        int size_in_luma_pu_h = PU(size_in_luma_h);
         int on_pu_edge_x    = !(x0 & ((1 << s->sps->log2_min_pu_size) - 1));
         int on_pu_edge_y    = !(y0 & ((1 << s->sps->log2_min_pu_size) - 1));
-        if (!size_in_luma_pu)
-            size_in_luma_pu++;
+        if (!size_in_luma_pu_h)
+            size_in_luma_pu_h++;
         if (cand_bottom_left == 1 && on_pu_edge_x) {
             int x_left_pu   = PU(x0 - 1);
-            int y_bottom_pu = PU(y0 + size_in_luma);
-            int max = FFMIN(size_in_luma_pu, s->sps->min_pu_height - y_bottom_pu);
+            int y_bottom_pu = PU(y0 + size_in_luma_v);
+            int max = FFMIN(size_in_luma_pu_v, s->sps->min_pu_height - y_bottom_pu);
             cand_bottom_left = 0;
-            for (i = 0; i < max; i++)
-                cand_bottom_left |= MVF(x_left_pu, y_bottom_pu + i).is_intra;
+            for (i = 0; i < max; i += 2)
+                cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA);
         }
         if (cand_left == 1 && on_pu_edge_x) {
             int x_left_pu   = PU(x0 - 1);
             int y_left_pu   = PU(y0);
-            int max = FFMIN(size_in_luma_pu, s->sps->min_pu_height - y_left_pu);
+            int max = FFMIN(size_in_luma_pu_v, s->sps->min_pu_height - y_left_pu);
             cand_left = 0;
-            for (i = 0; i < max; i++)
-                cand_left |= MVF(x_left_pu, y_left_pu + i).is_intra;
+            for (i = 0; i < max; i += 2)
+                cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA);
         }
         if (cand_up_left == 1) {
             int x_left_pu   = PU(x0 - 1);
             int y_top_pu    = PU(y0 - 1);
-            cand_up_left = MVF(x_left_pu, y_top_pu).is_intra;
+            cand_up_left = MVF(x_left_pu, y_top_pu).pred_flag == PF_INTRA;
         }
         if (cand_up == 1 && on_pu_edge_y) {
             int x_top_pu    = PU(x0);
             int y_top_pu    = PU(y0 - 1);
-            int max = FFMIN(size_in_luma_pu, s->sps->min_pu_width - x_top_pu);
+            int max = FFMIN(size_in_luma_pu_h, s->sps->min_pu_width - x_top_pu);
             cand_up = 0;
-            for (i = 0; i < max; i++)
-                cand_up |= MVF(x_top_pu + i, y_top_pu).is_intra;
+            for (i = 0; i < max; i += 2)
+                cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA);
         }
         if (cand_up_right == 1 && on_pu_edge_y) {
             int y_top_pu    = PU(y0 - 1);
-            int x_right_pu  = PU(x0 + size_in_luma);
-            int max = FFMIN(size_in_luma_pu, s->sps->min_pu_width - x_right_pu);
+            int x_right_pu  = PU(x0 + size_in_luma_h);
+            int max = FFMIN(size_in_luma_pu_h, s->sps->min_pu_width - x_right_pu);
             cand_up_right = 0;
-            for (i = 0; i < max; i++)
-                cand_up_right |= MVF(x_right_pu + i, y_top_pu).is_intra;
+            for (i = 0; i < max; i += 2)
+                cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA);
         }
-        for (i = 0; i < 2 * MAX_TB_SIZE; i++) {
-            left[i] = 128;
-            top[i]  = 128;
-        }
-    }
-    if (cand_bottom_left) {
-        for (i = size; i < size + bottom_left_size; i++)
-            left[i] = POS(-1, i);
-        EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1),
-               size - bottom_left_size);
+        memset(left, 128, 2 * MAX_TB_SIZE*sizeof(pixel));
+        memset(top , 128, 2 * MAX_TB_SIZE*sizeof(pixel));
+        top[-1] = 128;
     }
-    if (cand_left)
-        for (i = size - 1; i >= 0; i--)
-            left[i] = POS(-1, i);
     if (cand_up_left) {
         left[-1] = POS(-1, -1);
         top[-1]  = left[-1];
@@ -178,6 +173,15 @@ do {                                  \
         EXTEND(top + size + top_right_size, POS(size + top_right_size - 1, -1),
                size - top_right_size);
     }
+    if (cand_left)
+        for (i = 0; i < size; i++)
+            left[i] = POS(-1, i);
+    if (cand_bottom_left) {
+        for (i = size; i < size + bottom_left_size; i++)
+            left[i] = POS(-1, i);
+        EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1),
+               size - bottom_left_size);
+    }
 
     if (s->pps->constrained_intra_pred_flag == 1) {
         if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) {
@@ -203,7 +207,6 @@ do {                                  \
                         j++;
                     EXTEND_LEFT_CIP(top, j, j + 1);
                     left[-1] = top[-1];
-                    j        = 0;
                 }
             } else {
                 j = 0;
@@ -217,24 +220,30 @@ do {                                  \
                         top[-1] = top[0];
                     }
                 left[-1] = top[-1];
-                j        = 0;
             }
+            left[-1] = top[-1];
             if (cand_bottom_left || cand_left) {
-                EXTEND_DOWN_CIP(left, j, size_max_y - j);
+                a = PIXEL_SPLAT_X4(left[-1]);
+                EXTEND_DOWN_CIP(left, 0, size_max_y);
             }
             if (!cand_left)
                 EXTEND(left, left[-1], size);
             if (!cand_bottom_left)
                 EXTEND(left + size, left[size - 1], size);
             if (x0 != 0 && y0 != 0) {
+                a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
                 EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
+                if (!IS_INTRA(-1, - 1))
+                    left[-1] = left[0];
             } else if (x0 == 0) {
-                EXTEND_UP_CIP_0(left, size_max_y - 1, size_max_y);
+                EXTEND(left, 0, size_max_y);
             } else {
-                EXTEND_UP_CIP(left, size_max_y - 1, size_max_y - 1);
+                a = PIXEL_SPLAT_X4(left[size_max_y - 1]);
+                EXTEND_UP_CIP(left, size_max_y - 1, size_max_y);
             }
             top[-1] = left[-1];
             if (y0 != 0) {
+                a = PIXEL_SPLAT_X4(left[-1]);
                 EXTEND_RIGHT_CIP(top, 0, size_max_x);
             }
         }
@@ -278,40 +287,42 @@ do {                                  \
     top[-1] = left[-1];
 
     // Filtering process
-    if (c_idx == 0 && mode != INTRA_DC && size != 4) {
-        int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
-        int min_dist_vert_hor = FFMIN(FFABS((int)mode - 26),
-                                      FFABS((int)mode - 10));
-        if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) {
-            int threshold = 1 << (BIT_DEPTH - 5);
-            if (s->sps->sps_strong_intra_smoothing_enable_flag &&
-                log2_size == 5 &&
-                FFABS(top[-1]  + top[63]  - 2 * top[31])  < threshold &&
-                FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) {
-                // We can't just overwrite values in top because it could be
-                // a pointer into src
-                filtered_top[-1] = top[-1];
-                filtered_top[63] = top[63];
-                for (i = 0; i < 63; i++)
-                    filtered_top[i] = ((64 - (i + 1)) * top[-1] +
-                                             (i + 1)  * top[63] + 32) >> 6;
-                for (i = 0; i < 63; i++)
-                    left[i] = ((64 - (i + 1)) * left[-1] +
-                                     (i + 1)  * left[63] + 32) >> 6;
-                top = filtered_top;
-            } else {
-                filtered_left[2 * size - 1] = left[2 * size - 1];
-                filtered_top[2 * size - 1]  = top[2 * size - 1];
-                for (i = 2 * size - 2; i >= 0; i--)
-                    filtered_left[i] = (left[i + 1] + 2 * left[i] +
-                                        left[i - 1] + 2) >> 2;
-                filtered_top[-1]  =
-                filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
-                for (i = 2 * size - 2; i >= 0; i--)
-                    filtered_top[i] = (top[i + 1] + 2 * top[i] +
-                                       top[i - 1] + 2) >> 2;
-                left = filtered_left;
-                top  = filtered_top;
+    if (!s->sps->intra_smoothing_disabled_flag && (c_idx == 0  || s->sps->chroma_format_idc == 3)) {
+        if (mode != INTRA_DC && size != 4){
+            int intra_hor_ver_dist_thresh[] = { 7, 1, 0 };
+            int min_dist_vert_hor = FFMIN(FFABS((int)(mode - 26U)),
+                                          FFABS((int)(mode - 10U)));
+            if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) {
+                int threshold = 1 << (BIT_DEPTH - 5);
+                if (s->sps->sps_strong_intra_smoothing_enable_flag && c_idx == 0 &&
+                    log2_size == 5 &&
+                    FFABS(top[-1]  + top[63]  - 2 * top[31])  < threshold &&
+                    FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) {
+                    // We can't just overwrite values in top because it could be
+                    // a pointer into src
+                    filtered_top[-1] = top[-1];
+                    filtered_top[63] = top[63];
+                    for (i = 0; i < 63; i++)
+                        filtered_top[i] = ((64 - (i + 1)) * top[-1] +
+                                           (i + 1)  * top[63] + 32) >> 6;
+                    for (i = 0; i < 63; i++)
+                        left[i] = ((64 - (i + 1)) * left[-1] +
+                                   (i + 1)  * left[63] + 32) >> 6;
+                    top = filtered_top;
+                } else {
+                    filtered_left[2 * size - 1] = left[2 * size - 1];
+                    filtered_top[2 * size - 1]  = top[2 * size - 1];
+                    for (i = 2 * size - 2; i >= 0; i--)
+                        filtered_left[i] = (left[i + 1] + 2 * left[i] +
+                                            left[i - 1] + 2) >> 2;
+                    filtered_top[-1]  =
+                    filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2;
+                    for (i = 2 * size - 2; i >= 0; i--)
+                        filtered_top[i] = (top[i + 1] + 2 * top[i] +
+                                           top[i - 1] + 2) >> 2;
+                    left = filtered_left;
+                    top  = filtered_top;
+                }
             }
         }
     }
@@ -394,8 +405,8 @@ static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top,
     a = PIXEL_SPLAT_X4(dc);
 
     for (i = 0; i < size; i++)
-        for (j = 0; j < size / 4; j++)
-            AV_WN4PA(&POS(j * 4, i), a);
+        for (j = 0; j < size; j+=4)
+            AV_WN4P(&POS(j, i), a);
 
     if (c_idx == 0 && size < 32) {
         POS(0, 0) = (left[0] + 2 * dc + top[0] + 2) >> 2;
@@ -427,7 +438,7 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
     };
 
     int angle = intra_pred_angle[mode - 2];
-    pixel ref_array[3 * MAX_TB_SIZE + 1];
+    pixel ref_array[3 * MAX_TB_SIZE + 4];
     pixel *ref_tmp = ref_array + size;
     const pixel *ref;
     int last = (size * angle) >> 5;
@@ -435,8 +446,8 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
     if (mode >= 18) {
         ref = top - 1;
         if (angle < 0 && last < -1) {
-            for (x = 0; x <= size; x++)
-                ref_tmp[x] = top[x - 1];
+            for (x = 0; x <= size; x += 4)
+                AV_WN4P(&ref_tmp[x], AV_RN4P(&top[x - 1]));
             for (x = last; x <= -1; x++)
                 ref_tmp[x] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
             ref = ref_tmp;
@@ -446,13 +457,19 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
             int idx  = ((y + 1) * angle) >> 5;
             int fact = ((y + 1) * angle) & 31;
             if (fact) {
-                for (x = 0; x < size; x++) {
-                    POS(x, y) = ((32 - fact) * ref[x + idx + 1] +
-                                       fact  * ref[x + idx + 2] + 16) >> 5;
+                for (x = 0; x < size; x += 4) {
+                    POS(x    , y) = ((32 - fact) * ref[x + idx + 1] +
+                                           fact  * ref[x + idx + 2] + 16) >> 5;
+                    POS(x + 1, y) = ((32 - fact) * ref[x + 1 + idx + 1] +
+                                           fact  * ref[x + 1 + idx + 2] + 16) >> 5;
+                    POS(x + 2, y) = ((32 - fact) * ref[x + 2 + idx + 1] +
+                                           fact  * ref[x + 2 + idx + 2] + 16) >> 5;
+                    POS(x + 3, y) = ((32 - fact) * ref[x + 3 + idx + 1] +
+                                           fact  * ref[x + 3 + idx + 2] + 16) >> 5;
                 }
             } else {
-                for (x = 0; x < size; x++)
-                    POS(x, y) = ref[x + idx + 1];
+                for (x = 0; x < size; x += 4)
+                    AV_WN4P(&POS(x, y), AV_RN4P(&ref[x + idx + 1]));
             }
         }
         if (mode == 26 && c_idx == 0 && size < 32) {
@@ -462,8 +479,8 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
     } else {
         ref = left - 1;
         if (angle < 0 && last < -1) {
-            for (x = 0; x <= size; x++)
-                ref_tmp[x] = left[x - 1];
+            for (x = 0; x <= size; x += 4)
+                AV_WN4P(&ref_tmp[x], AV_RN4P(&left[x - 1]));
             for (x = last; x <= -1; x++)
                 ref_tmp[x] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
             ref = ref_tmp;
@@ -483,8 +500,12 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
             }
         }
         if (mode == 10 && c_idx == 0 && size < 32) {
-            for (x = 0; x < size; x++)
-                POS(x, 0) = av_clip_pixel(left[0] + ((top[x] - top[-1]) >> 1));
+            for (x = 0; x < size; x += 4) {
+                POS(x,     0) = av_clip_pixel(left[0] + ((top[x    ] - top[-1]) >> 1));
+                POS(x + 1, 0) = av_clip_pixel(left[0] + ((top[x + 1] - top[-1]) >> 1));
+                POS(x + 2, 0) = av_clip_pixel(left[0] + ((top[x + 2] - top[-1]) >> 1));
+                POS(x + 3, 0) = av_clip_pixel(left[0] + ((top[x + 3] - top[-1]) >> 1));
+            }
         }
     }
 }
diff --git a/libavcodec/hnm4video.c b/libavcodec/hnm4video.c
index b200e89..d8c51d0 100644
--- a/libavcodec/hnm4video.c
+++ b/libavcodec/hnm4video.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2012 David Kment
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,8 +36,8 @@
 
 typedef struct Hnm4VideoContext {
     uint8_t version;
-    uint16_t width;
-    uint16_t height;
+    int width;
+    int height;
     uint8_t *current;
     uint8_t *previous;
     uint8_t *buffer1;
@@ -78,7 +78,7 @@ static void unpack_intraframe(AVCodecContext *avctx, uint8_t *src,
         if (getbit(&gb, &bitbuf, &bits)) {
             if (writeoffset >= hnm->width * hnm->height) {
                 av_log(avctx, AV_LOG_ERROR,
-                       "Attempting to write out of bounds");
+                       "Attempting to write out of bounds\n");
                 break;
             }
             hnm->current[writeoffset++] = bytestream2_get_byte(&gb);
@@ -99,11 +99,11 @@ static void unpack_intraframe(AVCodecContext *avctx, uint8_t *src,
             count  += 2;
             offset += writeoffset;
             if (offset < 0 || offset + count >= hnm->width * hnm->height) {
-                av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds");
+                av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds\n");
                 break;
             } else if (writeoffset + count >= hnm->width * hnm->height) {
                 av_log(avctx, AV_LOG_ERROR,
-                       "Attempting to write out of bounds");
+                       "Attempting to write out of bounds\n");
                 break;
             }
             while (count--) {
@@ -146,7 +146,8 @@ static void decode_interframe_v4(AVCodecContext *avctx, uint8_t *src, uint32_t s
 {
     Hnm4VideoContext *hnm = avctx->priv_data;
     GetByteContext gb;
-    uint32_t writeoffset = 0, count, left, offset;
+    uint32_t writeoffset = 0;
+    int count, left, offset;
     uint8_t tag, previous, backline, backward, swap;
 
     bytestream2_init(&gb, src, size);
@@ -156,7 +157,12 @@ static void decode_interframe_v4(AVCodecContext *avctx, uint8_t *src, uint32_t s
         if (count == 0) {
             tag = bytestream2_get_byte(&gb) & 0xE0;
             tag = tag >> 5;
+
             if (tag == 0) {
+                if (writeoffset + 2 > hnm->width * hnm->height) {
+                    av_log(avctx, AV_LOG_ERROR, "writeoffset out of bounds\n");
+                    break;
+                }
                 hnm->current[writeoffset++] = bytestream2_get_byte(&gb);
                 hnm->current[writeoffset++] = bytestream2_get_byte(&gb);
             } else if (tag == 1) {
@@ -167,6 +173,10 @@ static void decode_interframe_v4(AVCodecContext *avctx, uint8_t *src, uint32_t s
                 writeoffset += count;
             } else if (tag == 3) {
                 count = bytestream2_get_byte(&gb) * 2;
+                if (writeoffset + count > hnm->width * hnm->height) {
+                    av_log(avctx, AV_LOG_ERROR, "writeoffset out of bounds\n");
+                    break;
+                }
                 while (count > 0) {
                     hnm->current[writeoffset++] = bytestream2_peek_byte(&gb);
                     count--;
@@ -175,6 +185,10 @@ static void decode_interframe_v4(AVCodecContext *avctx, uint8_t *src, uint32_t s
             } else {
                 break;
             }
+            if (writeoffset > hnm->width * hnm->height) {
+                av_log(avctx, AV_LOG_ERROR, "writeoffset out of bounds\n");
+                break;
+            }
         } else {
             previous = bytestream2_peek_byte(&gb) & 0x20;
             backline = bytestream2_peek_byte(&gb) & 0x40;
@@ -187,17 +201,28 @@ static void decode_interframe_v4(AVCodecContext *avctx, uint8_t *src, uint32_t s
 
             left = count;
 
-            if (!backward && offset + count >= hnm->width * hnm->height) {
-                av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds");
+            if (!backward && offset + 2*count > hnm->width * hnm->height) {
+                av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds\n");
                 break;
-            } else if (backward && offset >= hnm->width * hnm->height) {
-                av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds");
+            } else if (backward && offset + 1 >= hnm->width * hnm->height) {
+                av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds\n");
                 break;
-            } else if (writeoffset + count >= hnm->width * hnm->height) {
+            } else if (writeoffset + 2*count > hnm->width * hnm->height) {
                 av_log(avctx, AV_LOG_ERROR,
-                       "Attempting to write out of bounds");
+                       "Attempting to write out of bounds\n");
                 break;
             }
+            if(backward) {
+                if (offset < (!!backline)*(2 * hnm->width - 1) + 2*(left-1)) {
+                    av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds\n");
+                    break;
+                }
+            } else {
+                if (offset < (!!backline)*(2 * hnm->width - 1)) {
+                    av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds\n");
+                    break;
+                }
+            }
 
             if (previous) {
                 while (left > 0) {
@@ -262,6 +287,10 @@ static void decode_interframe_v4a(AVCodecContext *avctx, uint8_t *src,
             if (tag == 0) {
                 writeoffset += bytestream2_get_byte(&gb);
             } else if (tag == 1) {
+                if (writeoffset + hnm->width >= hnm->width * hnm->height) {
+                    av_log(avctx, AV_LOG_ERROR, "writeoffset out of bounds\n");
+                    break;
+                }
                 hnm->current[writeoffset]              = bytestream2_get_byte(&gb);
                 hnm->current[writeoffset + hnm->width] = bytestream2_get_byte(&gb);
                 writeoffset++;
@@ -270,6 +299,10 @@ static void decode_interframe_v4a(AVCodecContext *avctx, uint8_t *src,
             } else if (tag == 3) {
                 break;
             }
+            if (writeoffset > hnm->width * hnm->height) {
+                av_log(avctx, AV_LOG_ERROR, "writeoffset out of bounds\n");
+                break;
+            }
         } else {
             delta    = bytestream2_peek_byte(&gb) & 0x80;
             previous = bytestream2_peek_byte(&gb) & 0x40;
@@ -278,14 +311,19 @@ static void decode_interframe_v4a(AVCodecContext *avctx, uint8_t *src,
             offset  = writeoffset;
             offset += bytestream2_get_le16(&gb);
 
-            if (delta)
+            if (delta) {
+                if (offset < 0x10000) {
+                    av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds\n");
+                    break;
+                }
                 offset -= 0x10000;
+            }
 
             if (offset + hnm->width + count >= hnm->width * hnm->height) {
-                av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds");
+                av_log(avctx, AV_LOG_ERROR, "Attempting to read out of bounds\n");
                 break;
             } else if (writeoffset + hnm->width + count >= hnm->width * hnm->height) {
-                av_log(avctx, AV_LOG_ERROR, "Attempting to write out of bounds");
+                av_log(avctx, AV_LOG_ERROR, "Attempting to write out of bounds\n");
                 break;
             }
 
@@ -359,17 +397,23 @@ static int hnm_decode_frame(AVCodecContext *avctx, void *data,
     int ret;
     uint16_t chunk_id;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-        return ret;
+    if (avpkt->size < 8) {
+        av_log(avctx, AV_LOG_ERROR, "packet too small\n");
+        return AVERROR_INVALIDDATA;
     }
 
     chunk_id = AV_RL16(avpkt->data + 4);
 
     if (chunk_id == HNM4_CHUNK_ID_PL) {
         hnm_update_palette(avctx, avpkt->data, avpkt->size);
-        frame->palette_has_changed = 1;
     } else if (chunk_id == HNM4_CHUNK_ID_IZ) {
+        if (avpkt->size < 12) {
+            av_log(avctx, AV_LOG_ERROR, "packet too small\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+            return ret;
+
         unpack_intraframe(avctx, avpkt->data + 12, avpkt->size - 12);
         memcpy(hnm->previous, hnm->current, hnm->width * hnm->height);
         if (hnm->version == 0x4a)
@@ -382,6 +426,9 @@ static int hnm_decode_frame(AVCodecContext *avctx, void *data,
         memcpy(frame->data[1], hnm->palette, 256 * 4);
         *got_frame = 1;
     } else if (chunk_id == HNM4_CHUNK_ID_IU) {
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+            return ret;
+
         if (hnm->version == 0x4a) {
             decode_interframe_v4a(avctx, avpkt->data + 8, avpkt->size - 8);
             memcpy(hnm->processed, hnm->current, hnm->width * hnm->height);
@@ -421,7 +468,9 @@ static av_cold int hnm_decode_init(AVCodecContext *avctx)
     hnm->buffer2   = av_mallocz(avctx->width * avctx->height);
     hnm->processed = av_mallocz(avctx->width * avctx->height);
 
-    if (!hnm->buffer1 || !hnm->buffer2 || !hnm->processed) {
+    if (   !hnm->buffer1 || !hnm->buffer2 || !hnm->processed
+        || avctx->width * avctx->height == 0
+        || avctx->height % 2) {
         av_log(avctx, AV_LOG_ERROR, "av_mallocz() failed\n");
         av_freep(&hnm->buffer1);
         av_freep(&hnm->buffer2);
diff --git a/libavcodec/hpel_template.c b/libavcodec/hpel_template.c
index 81d3892..fccfe76 100644
--- a/libavcodec/hpel_template.c
+++ b/libavcodec/hpel_template.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2000, 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c
index 25694c5..7763760 100644
--- a/libavcodec/hpeldsp.c
+++ b/libavcodec/hpeldsp.c
@@ -5,20 +5,20 @@
  *
  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -357,6 +357,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
 
     if (ARCH_AARCH64)
         ff_hpeldsp_init_aarch64(c, flags);
+    if (ARCH_ALPHA)
+        ff_hpeldsp_init_alpha(c, flags);
     if (ARCH_ARM)
         ff_hpeldsp_init_arm(c, flags);
     if (ARCH_PPC)
diff --git a/libavcodec/hpeldsp.h b/libavcodec/hpeldsp.h
index 7ffed1a..07c293a 100644
--- a/libavcodec/hpeldsp.h
+++ b/libavcodec/hpeldsp.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -69,7 +69,7 @@ typedef struct HpelDSPContext {
 
     /**
      * Halfpel motion compensation with no rounding (a+b)>>1.
-     * this is an array[2][4] of motion compensation functions for 2
+     * this is an array[4][4] of motion compensation functions for 2
      * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
      * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
      * @param block destination where the result is stored
@@ -77,7 +77,7 @@ typedef struct HpelDSPContext {
      * @param line_size number of bytes in a horizontal line of block
      * @param h height
      */
-    op_pixels_func put_no_rnd_pixels_tab[2][4];
+    op_pixels_func put_no_rnd_pixels_tab[4][4];
 
     /**
      * Halfpel motion compensation with no rounding (a+b)>>1.
@@ -95,6 +95,7 @@ typedef struct HpelDSPContext {
 void ff_hpeldsp_init(HpelDSPContext *c, int flags);
 
 void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
 void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
diff --git a/libavcodec/huffman.c b/libavcodec/huffman.c
index dec2197..2866eef 100644
--- a/libavcodec/huffman.c
+++ b/libavcodec/huffman.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2006 Konstantin Shishkov
  * Copyright (c) 2007 Loren Merritt
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -52,18 +52,31 @@ static void heap_sift(HeapElem *h, int root, int size)
     }
 }
 
-void ff_huff_gen_len_table(uint8_t *dst, const uint64_t *stats)
+int ff_huff_gen_len_table(uint8_t *dst, const uint64_t *stats, int stats_size, int skip0)
 {
-    HeapElem h[256];
-    int up[2*256];
-    int len[2*256];
+    HeapElem *h  = av_malloc_array(sizeof(*h), stats_size);
+    int *up      = av_malloc_array(sizeof(*up) * 2, stats_size);
+    uint8_t *len = av_malloc_array(sizeof(*len) * 2, stats_size);
+    uint16_t *map= av_malloc_array(sizeof(*map), stats_size);
     int offset, i, next;
-    int size = 256;
+    int size = 0;
+    int ret = 0;
+
+    if (!h || !up || !len) {
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    for (i = 0; i<stats_size; i++) {
+        dst[i] = 255;
+        if (stats[i] || !skip0)
+            map[size++] = i;
+    }
 
     for (offset = 1; ; offset <<= 1) {
         for (i=0; i < size; i++) {
             h[i].name = i;
-            h[i].val = (stats[i] << 8) + offset;
+            h[i].val = (stats[map[i]] << 14) + offset;
         }
         for (i = size / 2 - 1; i >= 0; i--)
             heap_sift(h, i, size);
@@ -84,11 +97,17 @@ void ff_huff_gen_len_table(uint8_t *dst, const uint64_t *stats)
         for (i = 2 * size - 3; i >= size; i--)
             len[i] = len[up[i]] + 1;
         for (i = 0; i < size; i++) {
-            dst[i] = len[up[i]] + 1;
-            if (dst[i] >= 32) break;
+            dst[map[i]] = len[up[i]] + 1;
+            if (dst[map[i]] >= 32) break;
         }
         if (i==size) break;
     }
+end:
+    av_free(h);
+    av_free(up);
+    av_free(len);
+    av_free(map);
+    return ret;
 }
 
 static void get_tree_codes(uint32_t *bits, int16_t *lens, uint8_t *xlat,
@@ -114,7 +133,7 @@ static void get_tree_codes(uint32_t *bits, int16_t *lens, uint8_t *xlat,
     }
 }
 
-static int build_huff_tree(VLC *vlc, Node *nodes, int head, int flags)
+static int build_huff_tree(VLC *vlc, Node *nodes, int head, int flags, int nb_bits)
 {
     int no_zero_count = !(flags & FF_HUFFMAN_FLAG_ZERO_COUNT);
     uint32_t bits[256];
@@ -124,7 +143,7 @@ static int build_huff_tree(VLC *vlc, Node *nodes, int head, int flags)
 
     get_tree_codes(bits, lens, xlat, nodes, head, 0, 0,
                    &pos, no_zero_count);
-    return ff_init_vlc_sparse(vlc, 9, pos, lens, 2, 2, bits, 4, 4, xlat, 1, 1, 0);
+    return ff_init_vlc_sparse(vlc, nb_bits, pos, lens, 2, 2, bits, 4, 4, xlat, 1, 1, 0);
 }
 
 
@@ -132,7 +151,7 @@ static int build_huff_tree(VLC *vlc, Node *nodes, int head, int flags)
  * nodes size must be 2*nb_codes
  * first nb_codes nodes.count must be set
  */
-int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes,
+int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes, int nb_bits,
                        Node *nodes, HuffCmp cmp, int flags)
 {
     int i, j;
@@ -155,21 +174,22 @@ int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes,
     cur_node = nb_codes;
     nodes[nb_codes*2-1].count = 0;
     for (i = 0; i < nb_codes * 2 - 1; i += 2) {
-        nodes[cur_node].sym = HNODE;
-        nodes[cur_node].count = nodes[i].count + nodes[i + 1].count;
-        nodes[cur_node].n0 = i;
-        for (j = cur_node; j > 0; j--) {
-            if (nodes[j].count > nodes[j - 1].count ||
-                (nodes[j].count == nodes[j - 1].count &&
-                 (!(flags & FF_HUFFMAN_FLAG_HNODE_FIRST) ||
-                  nodes[j].n0 == j - 1 || nodes[j].n0 == j - 2 ||
-                  (nodes[j].sym!=HNODE && nodes[j-1].sym!=HNODE))))
+        uint32_t cur_count = nodes[i].count + nodes[i+1].count;
+        // find correct place to insert new node, and
+        // make space for the new node while at it
+        for(j = cur_node; j > i + 2; j--){
+            if(cur_count > nodes[j-1].count ||
+               (cur_count == nodes[j-1].count &&
+                !(flags & FF_HUFFMAN_FLAG_HNODE_FIRST)))
                 break;
-            FFSWAP(Node, nodes[j], nodes[j - 1]);
+            nodes[j] = nodes[j - 1];
         }
+        nodes[j].sym = HNODE;
+        nodes[j].count = cur_count;
+        nodes[j].n0 = i;
         cur_node++;
     }
-    if (build_huff_tree(vlc, nodes, nb_codes * 2 - 2, flags) < 0) {
+    if (build_huff_tree(vlc, nodes, nb_codes * 2 - 2, flags, nb_bits) < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error building tree\n");
         return -1;
     }
diff --git a/libavcodec/huffman.h b/libavcodec/huffman.h
index 043e6e3..6ab23ae 100644
--- a/libavcodec/huffman.h
+++ b/libavcodec/huffman.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2007  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,11 +37,12 @@ typedef struct Node {
 
 #define FF_HUFFMAN_FLAG_HNODE_FIRST 0x01
 #define FF_HUFFMAN_FLAG_ZERO_COUNT  0x02
+#define FF_HUFFMAN_BITS 10
 
 typedef int (*HuffCmp)(const void *va, const void *vb);
-int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes,
+int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes, int nb_bits,
                        Node *nodes, HuffCmp cmp, int flags);
 
-void ff_huff_gen_len_table(uint8_t *dst, const uint64_t *stats);
+int ff_huff_gen_len_table(uint8_t *dst, const uint64_t *stats, int n, int skip0);
 
 #endif /* AVCODEC_HUFFMAN_H */
diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index da5c52f..4921555 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -1,25 +1,25 @@
 /*
  * huffyuv codec for libavcodec
  *
- * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2002-2014 Michael Niedermayer <michaelni@gmx.at>
  *
  * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
  * the algorithm used
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,13 +36,13 @@
 #include "bswapdsp.h"
 #include "huffyuv.h"
 
-int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table)
+int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int n)
 {
     int len, index;
     uint32_t bits = 0;
 
     for (len = 32; len > 0; len--) {
-        for (index = 0; index < 256; index++) {
+        for (index = 0; index < n; index++) {
             if (len_table[index] == len)
                 dst[index] = bits++;
         }
@@ -59,16 +59,11 @@ av_cold int ff_huffyuv_alloc_temp(HYuvContext *s)
 {
     int i;
 
-    if (s->bitstream_bpp<24) {
-        for (i=0; i<3; i++) {
-            s->temp[i]= av_malloc(s->width + 16);
-            if (!s->temp[i])
-                return AVERROR(ENOMEM);
-        }
-    } else {
-        s->temp[0]= av_mallocz(4*s->width + 16);
-        if (!s->temp[0])
+    for (i=0; i<3; i++) {
+        s->temp[i]= av_malloc(4*s->width + 16);
+        if (!s->temp[i])
             return AVERROR(ENOMEM);
+        s->temp16[i] = (uint16_t*)s->temp[i];
     }
     return 0;
 }
@@ -81,17 +76,20 @@ av_cold void ff_huffyuv_common_init(AVCodecContext *avctx)
     s->flags = avctx->flags;
 
     ff_bswapdsp_init(&s->bdsp);
+    ff_llviddsp_init(&s->llviddsp, avctx);
 
     s->width = avctx->width;
     s->height = avctx->height;
-    assert(s->width>0 && s->height>0);
+
+    av_assert1(s->width > 0 && s->height > 0);
 }
 
-void ff_huffyuv_common_end(HYuvContext *s)
+av_cold void ff_huffyuv_common_end(HYuvContext *s)
 {
     int i;
 
     for(i = 0; i < 3; i++) {
         av_freep(&s->temp[i]);
+        s->temp16[i] = NULL;
     }
 }
diff --git a/libavcodec/huffyuv.h b/libavcodec/huffyuv.h
index aed1537..2b3a1b3 100644
--- a/libavcodec/huffyuv.h
+++ b/libavcodec/huffyuv.h
@@ -1,23 +1,23 @@
 /*
- * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2002-2014 Michael Niedermayer <michaelni@gmx.at>
  *
  * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
  * the algorithm used
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,20 +37,13 @@
 #include "huffyuvdsp.h"
 #include "huffyuvencdsp.h"
 #include "put_bits.h"
+#include "lossless_videodsp.h"
 
-#define VLC_BITS 11
+#define VLC_BITS 12
 
-#if HAVE_BIGENDIAN
-#define B 3
-#define G 2
-#define R 1
-#define A 0
-#else
-#define B 0
-#define G 1
-#define R 2
-#define A 3
-#endif
+#define MAX_BITS 16
+#define MAX_N (1<<MAX_BITS)
+#define MAX_VLC_N 16384
 
 typedef enum Predictor {
     LEFT = 0,
@@ -69,27 +62,37 @@ typedef struct HYuvContext {
     int version;
     int yuy2;                               //use yuy2 instead of 422P
     int bgr32;                              //use bgr32 instead of bgr24
+    int bps;
+    int n;                                  // 1<<bps
+    int vlc_n;                              // number of vlc codes (FFMIN(1<<bps, MAX_VLC_N))
+    int alpha;
+    int chroma;
+    int yuv;
+    int chroma_h_shift;
+    int chroma_v_shift;
     int width, height;
     int flags;
     int context;
     int picture_number;
     int last_slice_end;
     uint8_t *temp[3];
-    uint64_t stats[3][256];
-    uint8_t len[3][256];
-    uint32_t bits[3][256];
+    uint16_t *temp16[3];                    ///< identical to temp but 16bit type
+    uint64_t stats[4][MAX_VLC_N];
+    uint8_t len[4][MAX_VLC_N];
+    uint32_t bits[4][MAX_VLC_N];
     uint32_t pix_bgr_map[1<<VLC_BITS];
-    VLC vlc[6];                             //Y,U,V,YY,YU,YV
+    VLC vlc[8];                             //Y,U,V,A,YY,YU,YV,AA
     uint8_t *bitstream_buffer;
     unsigned int bitstream_buffer_size;
     BswapDSPContext bdsp;
     HuffYUVDSPContext hdsp;
     HuffYUVEncDSPContext hencdsp;
+    LLVidDSPContext llviddsp;
 } HYuvContext;
 
 void ff_huffyuv_common_init(AVCodecContext *s);
 void ff_huffyuv_common_end(HYuvContext *s);
 int  ff_huffyuv_alloc_temp(HYuvContext *s);
-int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table);
+int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int n);
 
 #endif /* AVCODEC_HUFFYUV_H */
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index dc99d19..f552323 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -1,26 +1,28 @@
 /*
  * huffyuv decoder
  *
- * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2002-2014 Michael Niedermayer <michaelni@gmx.at>
  *
  * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
  * the algorithm used
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * yuva, gray, 4:4:4, 4:1:1, 4:1:0 and >8 bit per sample support sponsored by NOA
  */
 
 /**
@@ -28,17 +30,21 @@
  * huffyuv decoder
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
+
 #include "avcodec.h"
 #include "get_bits.h"
 #include "huffyuv.h"
 #include "huffyuvdsp.h"
 #include "thread.h"
+#include "libavutil/pixdesc.h"
 
 #define classic_shift_luma_table_size 42
 static const unsigned char classic_shift_luma[classic_shift_luma_table_size + FF_INPUT_BUFFER_PADDING_SIZE] = {
     34, 36, 35, 69, 135, 232,   9, 16, 10, 24,  11,  23,  12,  16, 13, 10,
     14,  8, 15,  8,  16,   8,  17, 20, 16, 10, 207, 206, 205, 236, 11,  8,
-    10, 21,  9, 23,   8,   8, 199, 70, 69, 68,   0
+    10, 21,  9, 23,   8,   8, 199, 70, 69, 68,   0,
+  0,0,0,0,0,0,0,0,
 };
 
 #define classic_shift_chroma_table_size 59
@@ -46,7 +52,8 @@ static const unsigned char classic_shift_chroma[classic_shift_chroma_table_size
     66, 36,  37,  38, 39, 40,  41,  75,  76,  77, 110, 239, 144, 81, 82,  83,
     84, 85, 118, 183, 56, 57,  88,  89,  56,  89, 154,  57,  58, 57, 26, 141,
     57, 56,  58,  57, 58, 57, 184, 119, 214, 245, 116,  83,  82, 49, 80,  79,
-    78, 77,  44,  75, 41, 40,  39,  38,  37,  36,  34,  0
+    78, 77,  44,  75, 41, 40,  39,  38,  37,  36,  34,  0,
+  0,0,0,0,0,0,0,0,
 };
 
 static const unsigned char classic_add_luma[256] = {
@@ -87,16 +94,16 @@ static const unsigned char classic_add_chroma[256] = {
       6,  12,   8,  10,   7,   9,   6,   4,   6,   2,   2,   3,   3,   3,   3,   2,
 };
 
-static int read_len_table(uint8_t *dst, GetBitContext *gb)
+static int read_len_table(uint8_t *dst, GetBitContext *gb, int n)
 {
     int i, val, repeat;
 
-    for (i = 0; i < 256;) {
+    for (i = 0; i < n;) {
         repeat = get_bits(gb, 3);
         val    = get_bits(gb, 5);
         if (repeat == 0)
             repeat = get_bits(gb, 8);
-        if (i + repeat > 256 || get_bits_left(gb) < 0) {
+        if (i + repeat > n || get_bits_left(gb) < 0) {
             av_log(NULL, AV_LOG_ERROR, "Error reading huffman table\n");
             return AVERROR_INVALIDDATA;
         }
@@ -113,27 +120,32 @@ static int generate_joint_tables(HYuvContext *s)
     uint8_t len[1 << VLC_BITS];
     int ret;
 
-    if (s->bitstream_bpp < 24) {
+    if (s->bitstream_bpp < 24 || s->version > 2) {
         int p, i, y, u;
-        for (p = 0; p < 3; p++) {
-            for (i = y = 0; y < 256; y++) {
-                int len0  = s->len[0][y];
+        for (p = 0; p < 4; p++) {
+            int p0 = s->version > 2 ? p : 0;
+            for (i = y = 0; y < s->vlc_n; y++) {
+                int len0  = s->len[p0][y];
                 int limit = VLC_BITS - len0;
-                if (limit <= 0)
+                if (limit <= 0 || !len0)
+                    continue;
+                if ((sign_extend(y, 8) & (s->vlc_n-1)) != y)
                     continue;
-                for (u = 0; u < 256; u++) {
+                for (u = 0; u < s->vlc_n; u++) {
                     int len1 = s->len[p][u];
-                    if (len1 > limit)
+                    if (len1 > limit || !len1)
                         continue;
+                    if ((sign_extend(u, 8) & (s->vlc_n-1)) != u)
+                        continue;
+                    av_assert0(i < (1 << VLC_BITS));
                     len[i]     = len0 + len1;
-                    bits[i]    = (s->bits[0][y] << len1) + s->bits[p][u];
-                    symbols[i] = (y << 8) + u;
-                    if (symbols[i] != 0xffff) // reserved to mean "invalid"
+                    bits[i]    = (s->bits[p0][y] << len1) + s->bits[p][u];
+                    symbols[i] = (y << 8) + (u & 0xFF);
                         i++;
                 }
             }
-            ff_free_vlc(&s->vlc[3 + p]);
-            if ((ret = ff_init_vlc_sparse(&s->vlc[3 + p], VLC_BITS, i, len, 1, 1,
+            ff_free_vlc(&s->vlc[4 + p]);
+            if ((ret = ff_init_vlc_sparse(&s->vlc[4 + p], VLC_BITS, i, len, 1, 1,
                                           bits, 2, 2, symbols, 2, 2, 0)) < 0)
                 return ret;
         }
@@ -148,18 +160,19 @@ static int generate_joint_tables(HYuvContext *s)
         for (i = 0, g = -16; g < 16; g++) {
             int len0   = s->len[p0][g & 255];
             int limit0 = VLC_BITS - len0;
-            if (limit0 < 2)
+            if (limit0 < 2 || !len0)
                 continue;
             for (b = -16; b < 16; b++) {
                 int len1   = s->len[p1][b & 255];
                 int limit1 = limit0 - len1;
-                if (limit1 < 1)
+                if (limit1 < 1 || !len1)
                     continue;
                 code = (s->bits[p0][g & 255] << len1) + s->bits[p1][b & 255];
                 for (r = -16; r < 16; r++) {
                     int len2 = s->len[2][r & 255];
-                    if (len2 > limit1)
+                    if (len2 > limit1 || !len2)
                         continue;
+                    av_assert0(i < (1 << VLC_BITS));
                     len[i]  = len0 + len1 + len2;
                     bits[i] = (code << len2) + s->bits[2][r & 255];
                     if (s->decorrelate) {
@@ -175,8 +188,8 @@ static int generate_joint_tables(HYuvContext *s)
                 }
             }
         }
-        ff_free_vlc(&s->vlc[3]);
-        if ((ret = init_vlc(&s->vlc[3], VLC_BITS, i, len, 1, 1,
+        ff_free_vlc(&s->vlc[4]);
+        if ((ret = init_vlc(&s->vlc[4], VLC_BITS, i, len, 1, 1,
                             bits, 2, 2, 0)) < 0)
             return ret;
     }
@@ -187,18 +200,22 @@ static int read_huffman_tables(HYuvContext *s, const uint8_t *src, int length)
 {
     GetBitContext gb;
     int i, ret;
+    int count = 3;
 
     if ((ret = init_get_bits(&gb, src, length * 8)) < 0)
         return ret;
 
-    for (i = 0; i < 3; i++) {
-        if ((ret = read_len_table(s->len[i], &gb)) < 0)
+    if (s->version > 2)
+        count = 1 + s->alpha + 2*s->chroma;
+
+    for (i = 0; i < count; i++) {
+        if ((ret = read_len_table(s->len[i], &gb, s->vlc_n)) < 0)
             return ret;
-        if ((ret = ff_huffyuv_generate_bits_table(s->bits[i], s->len[i])) < 0)
+        if ((ret = ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->vlc_n)) < 0)
             return ret;
         ff_free_vlc(&s->vlc[i]);
-        if ((ret = init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1,
-                            s->bits[i], 4, 4, 0)) < 0)
+        if ((ret = init_vlc(&s->vlc[i], VLC_BITS, s->vlc_n, s->len[i], 1, 1,
+                           s->bits[i], 4, 4, 0)) < 0)
             return ret;
     }
 
@@ -213,16 +230,14 @@ static int read_old_huffman_tables(HYuvContext *s)
     GetBitContext gb;
     int i, ret;
 
-    if ((ret = init_get_bits(&gb, classic_shift_luma,
-                             classic_shift_luma_table_size * 8)) < 0)
-        return ret;
-    if ((ret = read_len_table(s->len[0], &gb)) < 0)
+    init_get_bits(&gb, classic_shift_luma,
+                  classic_shift_luma_table_size * 8);
+    if ((ret = read_len_table(s->len[0], &gb, 256)) < 0)
         return ret;
 
-    if ((ret = init_get_bits(&gb, classic_shift_chroma,
-                             classic_shift_chroma_table_size * 8)) < 0)
-        return ret;
-    if ((ret = read_len_table(s->len[1], &gb)) < 0)
+    init_get_bits(&gb, classic_shift_chroma,
+                  classic_shift_chroma_table_size * 8);
+    if ((ret = read_len_table(s->len[1], &gb, 256)) < 0)
         return ret;
 
     for (i = 0; i < 256; i++)
@@ -237,7 +252,7 @@ static int read_old_huffman_tables(HYuvContext *s)
     memcpy(s->bits[2], s->bits[1], 256 * sizeof(uint32_t));
     memcpy(s->len[2], s->len[1], 256 * sizeof(uint8_t));
 
-    for (i = 0; i < 3; i++) {
+    for (i = 0; i < 4; i++) {
         ff_free_vlc(&s->vlc[i]);
         if ((ret = init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1,
                             s->bits[i], 4, 4, 0)) < 0)
@@ -255,23 +270,28 @@ static av_cold int decode_init(AVCodecContext *avctx)
     HYuvContext *s = avctx->priv_data;
     int ret;
 
-    ff_huffyuv_common_init(avctx);
     ff_huffyuvdsp_init(&s->hdsp);
-    memset(s->vlc, 0, 3 * sizeof(VLC));
+    memset(s->vlc, 0, 4 * sizeof(VLC));
 
-    s->interlaced = s->height > 288;
+    s->interlaced = avctx->height > 288;
     s->bgr32      = 1;
 
     if (avctx->extradata_size) {
         if ((avctx->bits_per_coded_sample & 7) &&
             avctx->bits_per_coded_sample != 12)
             s->version = 1; // do such files exist at all?
-        else
+        else if (avctx->extradata_size > 3 && avctx->extradata[3] == 0)
             s->version = 2;
+        else
+            s->version = 3;
     } else
         s->version = 0;
 
-    if (s->version == 2) {
+    s->bps = 8;
+    s->n = 1<<s->bps;
+    s->vlc_n = FFMIN(s->n, MAX_VLC_N);
+    s->chroma = 1;
+    if (s->version >= 2) {
         int method, interlace;
 
         if (avctx->extradata_size < 4)
@@ -280,9 +300,20 @@ static av_cold int decode_init(AVCodecContext *avctx)
         method           = avctx->extradata[0];
         s->decorrelate   = method & 64 ? 1 : 0;
         s->predictor     = method & 63;
-        s->bitstream_bpp = avctx->extradata[1];
-        if (s->bitstream_bpp == 0)
-            s->bitstream_bpp = avctx->bits_per_coded_sample & ~7;
+        if (s->version == 2) {
+            s->bitstream_bpp = avctx->extradata[1];
+            if (s->bitstream_bpp == 0)
+                s->bitstream_bpp = avctx->bits_per_coded_sample & ~7;
+        } else {
+            s->bps = (avctx->extradata[1] >> 4) + 1;
+            s->n = 1<<s->bps;
+            s->vlc_n = FFMIN(s->n, MAX_VLC_N);
+            s->chroma_h_shift = avctx->extradata[1] & 3;
+            s->chroma_v_shift = (avctx->extradata[1] >> 2) & 3;
+            s->yuv   = !!(avctx->extradata[2] & 1);
+            s->chroma= !!(avctx->extradata[2] & 3);
+            s->alpha = !!(avctx->extradata[2] & 4);
+        }
         interlace     = (avctx->extradata[2] & 0x30) >> 4;
         s->interlaced = (interlace == 1) ? 1 : (interlace == 2) ? 0 : s->interlaced;
         s->context    = avctx->extradata[2] & 0x40 ? 1 : 0;
@@ -320,29 +351,186 @@ static av_cold int decode_init(AVCodecContext *avctx)
             return ret;
     }
 
-    switch (s->bitstream_bpp) {
-    case 12:
-        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
-        break;
-    case 16:
-        if (s->yuy2)
-            avctx->pix_fmt = AV_PIX_FMT_YUYV422;
-        else
-            avctx->pix_fmt = AV_PIX_FMT_YUV422P;
-        break;
-    case 24:
-    case 32:
-        if (s->bgr32)
+    if (s->version <= 2) {
+        switch (s->bitstream_bpp) {
+        case 12:
+            avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+            s->yuv = 1;
+            break;
+        case 16:
+            if (s->yuy2)
+                avctx->pix_fmt = AV_PIX_FMT_YUYV422;
+            else
+                avctx->pix_fmt = AV_PIX_FMT_YUV422P;
+            s->yuv = 1;
+            break;
+        case 24:
+            if (s->bgr32)
+                avctx->pix_fmt = AV_PIX_FMT_0RGB32;
+            else
+                avctx->pix_fmt = AV_PIX_FMT_BGR24;
+            break;
+        case 32:
+            av_assert0(s->bgr32);
             avctx->pix_fmt = AV_PIX_FMT_RGB32;
-        else
-            avctx->pix_fmt = AV_PIX_FMT_BGR24;
-        break;
-    default:
-        return AVERROR_INVALIDDATA;
+            s->alpha = 1;
+            break;
+        default:
+            return AVERROR_INVALIDDATA;
+        }
+        av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt,
+                                         &s->chroma_h_shift,
+                                         &s->chroma_v_shift);
+    } else {
+        switch ( (s->chroma<<10) | (s->yuv<<9) | (s->alpha<<8) | ((s->bps-1)<<4) | s->chroma_h_shift | (s->chroma_v_shift<<2)) {
+        case 0x070:
+            avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+            break;
+        case 0x0F0:
+            avctx->pix_fmt = AV_PIX_FMT_GRAY16;
+            break;
+        case 0x170:
+            avctx->pix_fmt = AV_PIX_FMT_GRAY8A;
+            break;
+        case 0x470:
+            avctx->pix_fmt = AV_PIX_FMT_GBRP;
+            break;
+        case 0x480:
+            avctx->pix_fmt = AV_PIX_FMT_GBRP9;
+            break;
+        case 0x490:
+            avctx->pix_fmt = AV_PIX_FMT_GBRP10;
+            break;
+        case 0x4B0:
+            avctx->pix_fmt = AV_PIX_FMT_GBRP12;
+            break;
+        case 0x4D0:
+            avctx->pix_fmt = AV_PIX_FMT_GBRP14;
+            break;
+        case 0x4F0:
+            avctx->pix_fmt = AV_PIX_FMT_GBRP16;
+            break;
+        case 0x570:
+            avctx->pix_fmt = AV_PIX_FMT_GBRAP;
+            break;
+        case 0x670:
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+            break;
+        case 0x680:
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P9;
+            break;
+        case 0x690:
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P10;
+            break;
+        case 0x6B0:
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P12;
+            break;
+        case 0x6D0:
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P14;
+            break;
+        case 0x6F0:
+            avctx->pix_fmt = AV_PIX_FMT_YUV444P16;
+            break;
+        case 0x671:
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P;
+            break;
+        case 0x681:
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P9;
+            break;
+        case 0x691:
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
+            break;
+        case 0x6B1:
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P12;
+            break;
+        case 0x6D1:
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P14;
+            break;
+        case 0x6F1:
+            avctx->pix_fmt = AV_PIX_FMT_YUV422P16;
+            break;
+        case 0x672:
+            avctx->pix_fmt = AV_PIX_FMT_YUV411P;
+            break;
+        case 0x674:
+            avctx->pix_fmt = AV_PIX_FMT_YUV440P;
+            break;
+        case 0x675:
+            avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+            break;
+        case 0x685:
+            avctx->pix_fmt = AV_PIX_FMT_YUV420P9;
+            break;
+        case 0x695:
+            avctx->pix_fmt = AV_PIX_FMT_YUV420P10;
+            break;
+        case 0x6B5:
+            avctx->pix_fmt = AV_PIX_FMT_YUV420P12;
+            break;
+        case 0x6D5:
+            avctx->pix_fmt = AV_PIX_FMT_YUV420P14;
+            break;
+        case 0x6F5:
+            avctx->pix_fmt = AV_PIX_FMT_YUV420P16;
+            break;
+        case 0x67A:
+            avctx->pix_fmt = AV_PIX_FMT_YUV410P;
+            break;
+        case 0x770:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
+            break;
+        case 0x780:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA444P9;
+            break;
+        case 0x790:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA444P10;
+            break;
+        case 0x7F0:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA444P16;
+            break;
+        case 0x771:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA422P;
+            break;
+        case 0x781:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA422P9;
+            break;
+        case 0x791:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA422P10;
+            break;
+        case 0x7F1:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA422P16;
+            break;
+        case 0x775:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
+            break;
+        case 0x785:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA420P9;
+            break;
+        case 0x795:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA420P10;
+            break;
+        case 0x7F5:
+            avctx->pix_fmt = AV_PIX_FMT_YUVA420P16;
+            break;
+        default:
+            return AVERROR_INVALIDDATA;
+        }
     }
 
-    if ((ret = ff_huffyuv_alloc_temp(s)) < 0)
+    ff_huffyuv_common_init(avctx);
+
+    if ((avctx->pix_fmt == AV_PIX_FMT_YUV422P || avctx->pix_fmt == AV_PIX_FMT_YUV420P) && avctx->width & 1) {
+        av_log(avctx, AV_LOG_ERROR, "width must be even for this colorspace\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (s->predictor == MEDIAN && avctx->pix_fmt == AV_PIX_FMT_YUV422P && avctx->width%4) {
+        av_log(avctx, AV_LOG_ERROR, "width must be a multiple of 4 this colorspace and predictor\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if ((ret = ff_huffyuv_alloc_temp(s)) < 0) {
+        ff_huffyuv_common_end(s);
         return ret;
+    }
 
     return 0;
 }
@@ -352,13 +540,15 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx)
     HYuvContext *s = avctx->priv_data;
     int i, ret;
 
-    if ((ret = ff_huffyuv_alloc_temp(s)) < 0)
+    if ((ret = ff_huffyuv_alloc_temp(s)) < 0) {
+        ff_huffyuv_common_end(s);
         return ret;
+    }
 
-    for (i = 0; i < 6; i++)
+    for (i = 0; i < 8; i++)
         s->vlc[i].table = NULL;
 
-    if (s->version == 2) {
+    if (s->version >= 2) {
         if ((ret = read_huffman_tables(s, avctx->extradata + 4,
                                        avctx->extradata_size)) < 0)
             return ret;
@@ -370,47 +560,153 @@ static av_cold int decode_init_thread_copy(AVCodecContext *avctx)
     return 0;
 }
 
-/* TODO instead of restarting the read when the code isn't in the first level
- * of the joint table, jump into the 2nd level of the individual table. */
+/** Subset of GET_VLC for use in hand-roller VLC code */
+#define VLC_INTERN(dst, table, gb, name, bits, max_depth)   \
+    code = table[index][0];                                 \
+    n    = table[index][1];                                 \
+    if (max_depth > 1 && n < 0) {                           \
+        LAST_SKIP_BITS(name, gb, bits);                     \
+        UPDATE_CACHE(name, gb);                             \
+                                                            \
+        nb_bits = -n;                                       \
+        index   = SHOW_UBITS(name, gb, nb_bits) + code;     \
+        code    = table[index][0];                          \
+        n       = table[index][1];                          \
+        if (max_depth > 2 && n < 0) {                       \
+            LAST_SKIP_BITS(name, gb, nb_bits);              \
+            UPDATE_CACHE(name, gb);                         \
+                                                            \
+            nb_bits = -n;                                   \
+            index   = SHOW_UBITS(name, gb, nb_bits) + code; \
+            code    = table[index][0];                      \
+            n       = table[index][1];                      \
+        }                                                   \
+    }                                                       \
+    dst = code;                                             \
+    LAST_SKIP_BITS(name, gb, n)
+
+
+#define GET_VLC_DUAL(dst0, dst1, name, gb, dtable, table1, table2,  \
+                     bits, max_depth, OP)                           \
+    do {                                                            \
+        unsigned int index = SHOW_UBITS(name, gb, bits);            \
+        int          code, n = dtable[index][1];                    \
+                                                                    \
+        if (n<=0) {                                                 \
+            int nb_bits;                                            \
+            VLC_INTERN(dst0, table1, gb, name, bits, max_depth);    \
+                                                                    \
+            UPDATE_CACHE(re, gb);                                   \
+            index = SHOW_UBITS(name, gb, bits);                     \
+            VLC_INTERN(dst1, table2, gb, name, bits, max_depth);    \
+        } else {                                                    \
+            code = dtable[index][0];                                \
+            OP(dst0, dst1, code);                                   \
+            LAST_SKIP_BITS(name, gb, n);                            \
+        }                                                           \
+    } while (0)
+
+#define OP8bits(dst0, dst1, code) dst0 = code>>8; dst1 = code
+
 #define READ_2PIX(dst0, dst1, plane1)                                   \
-    {                                                                   \
-        uint16_t code = get_vlc2(&s->gb, s->vlc[3 + plane1].table,      \
-                                 VLC_BITS, 1);                          \
-        if (code != 0xffff) {                                           \
-            dst0 = code >> 8;                                           \
-            dst1 = code;                                                \
-        } else {                                                        \
-            dst0 = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);      \
-            dst1 = get_vlc2(&s->gb, s->vlc[plane1].table, VLC_BITS, 3); \
-        }                                                               \
-    }
+    UPDATE_CACHE(re, &s->gb);                                           \
+    GET_VLC_DUAL(dst0, dst1, re, &s->gb, s->vlc[4+plane1].table,        \
+                 s->vlc[0].table, s->vlc[plane1].table, VLC_BITS, 3, OP8bits)
 
 static void decode_422_bitstream(HYuvContext *s, int count)
 {
-    int i;
-
+    int i, icount;
+    OPEN_READER(re, &s->gb);
     count /= 2;
 
-    if (count >= (get_bits_left(&s->gb)) / (31 * 4)) {
-        for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
+    icount = get_bits_left(&s->gb) / (32 * 4);
+    if (count >= icount) {
+        for (i = 0; i < icount; i++) {
             READ_2PIX(s->temp[0][2 * i],     s->temp[1][i], 1);
             READ_2PIX(s->temp[0][2 * i + 1], s->temp[2][i], 2);
         }
+        for (; i < count && get_bits_left(&s->gb) > 0; i++) {
+            READ_2PIX(s->temp[0][2 * i    ], s->temp[1][i], 1);
+            if (get_bits_left(&s->gb) <= 0) break;
+            READ_2PIX(s->temp[0][2 * i + 1], s->temp[2][i], 2);
+        }
+        for (; i < count; i++)
+            s->temp[0][2 * i    ] = s->temp[1][i] =
+            s->temp[0][2 * i + 1] = s->temp[2][i] = 0;
     } else {
         for (i = 0; i < count; i++) {
             READ_2PIX(s->temp[0][2 * i],     s->temp[1][i], 1);
             READ_2PIX(s->temp[0][2 * i + 1], s->temp[2][i], 2);
         }
     }
+    CLOSE_READER(re, &s->gb);
 }
 
-static void decode_gray_bitstream(HYuvContext *s, int count)
+#define READ_2PIX_PLANE(dst0, dst1, plane, OP) \
+    UPDATE_CACHE(re, &s->gb); \
+    GET_VLC_DUAL(dst0, dst1, re, &s->gb, s->vlc[4+plane].table, \
+                 s->vlc[plane].table, s->vlc[plane].table, VLC_BITS, 3, OP)
+
+#define OP14bits(dst0, dst1, code) dst0 = code>>8; dst1 = sign_extend(code, 8)
+
+/* TODO instead of restarting the read when the code isn't in the first level
+ * of the joint table, jump into the 2nd level of the individual table. */
+#define READ_2PIX_PLANE16(dst0, dst1, plane){\
+    dst0 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3)<<2;\
+    dst0 += get_bits(&s->gb, 2);\
+    dst1 = get_vlc2(&s->gb, s->vlc[plane].table, VLC_BITS, 3)<<2;\
+    dst1 += get_bits(&s->gb, 2);\
+}
+static void decode_plane_bitstream(HYuvContext *s, int count, int plane)
 {
     int i;
 
     count /= 2;
 
-    if (count >= (get_bits_left(&s->gb)) / (31 * 2)) {
+    if (s->bps <= 8) {
+        OPEN_READER(re, &s->gb);
+        if (count >= (get_bits_left(&s->gb)) / (32 * 2)) {
+            for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
+                READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane, OP8bits);
+            }
+        } else {
+            for(i=0; i<count; i++){
+                READ_2PIX_PLANE(s->temp[0][2 * i], s->temp[0][2 * i + 1], plane, OP8bits);
+            }
+        }
+        CLOSE_READER(re, &s->gb);
+    } else if (s->bps <= 14) {
+        OPEN_READER(re, &s->gb);
+        if (count >= (get_bits_left(&s->gb)) / (32 * 2)) {
+            for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
+                READ_2PIX_PLANE(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane, OP14bits);
+            }
+        } else {
+            for(i=0; i<count; i++){
+                READ_2PIX_PLANE(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane, OP14bits);
+            }
+        }
+        CLOSE_READER(re, &s->gb);
+    } else {
+        if (count >= (get_bits_left(&s->gb)) / (32 * 2)) {
+            for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
+                READ_2PIX_PLANE16(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane);
+            }
+        } else {
+            for(i=0; i<count; i++){
+                READ_2PIX_PLANE16(s->temp16[0][2 * i], s->temp16[0][2 * i + 1], plane);
+            }
+        }
+    }
+}
+
+static void decode_gray_bitstream(HYuvContext *s, int count)
+{
+    int i;
+    OPEN_READER(re, &s->gb);
+    count /= 2;
+
+    if (count >= (get_bits_left(&s->gb)) / (32 * 2)) {
         for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
             READ_2PIX(s->temp[0][2 * i], s->temp[0][2 * i + 1], 0);
         }
@@ -419,30 +715,66 @@ static void decode_gray_bitstream(HYuvContext *s, int count)
             READ_2PIX(s->temp[0][2 * i], s->temp[0][2 * i + 1], 0);
         }
     }
+    CLOSE_READER(re, &s->gb);
 }
 
 static av_always_inline void decode_bgr_1(HYuvContext *s, int count,
                                           int decorrelate, int alpha)
 {
     int i;
-    for (i = 0; i < count; i++) {
-        int code = get_vlc2(&s->gb, s->vlc[3].table, VLC_BITS, 1);
-        if (code != -1) {
+    OPEN_READER(re, &s->gb);
+
+    for (i = 0; i < count && get_bits_left(&s->gb) > 0; i++) {
+        unsigned int index;
+        int code, n;
+
+        UPDATE_CACHE(re, &s->gb);
+        index = SHOW_UBITS(re, &s->gb, VLC_BITS);
+        n     = s->vlc[4].table[index][1];
+
+        if (n>0) {
+            code  = s->vlc[4].table[index][0];
             *(uint32_t *) &s->temp[0][4 * i] = s->pix_bgr_map[code];
-        } else if (decorrelate) {
-            s->temp[0][4 * i + G] = get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
-            s->temp[0][4 * i + B] = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) +
-                                    s->temp[0][4 * i + G];
-            s->temp[0][4 * i + R] = get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) +
-                                    s->temp[0][4 * i + G];
+            LAST_SKIP_BITS(re, &s->gb, n);
         } else {
-            s->temp[0][4 * i + B] = get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
-            s->temp[0][4 * i + G] = get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
-            s->temp[0][4 * i + R] = get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
+            int nb_bits;
+            if (decorrelate) {
+                VLC_INTERN(s->temp[0][4 * i + G], s->vlc[1].table,
+                           &s->gb, re, VLC_BITS, 3);
+
+                UPDATE_CACHE(re, &s->gb);
+                index = SHOW_UBITS(re, &s->gb, VLC_BITS);
+                VLC_INTERN(code, s->vlc[0].table, &s->gb, re, VLC_BITS, 3);
+                s->temp[0][4 * i + B] = code + s->temp[0][4 * i + G];
+
+                UPDATE_CACHE(re, &s->gb);
+                index = SHOW_UBITS(re, &s->gb, VLC_BITS);
+                VLC_INTERN(code, s->vlc[2].table, &s->gb, re, VLC_BITS, 3);
+                s->temp[0][4 * i + R] = code + s->temp[0][4 * i + G];
+            } else {
+                VLC_INTERN(s->temp[0][4 * i + B], s->vlc[0].table,
+                           &s->gb, re, VLC_BITS, 3);
+
+                UPDATE_CACHE(re, &s->gb);
+                index = SHOW_UBITS(re, &s->gb, VLC_BITS);
+                VLC_INTERN(s->temp[0][4 * i + G], s->vlc[1].table,
+                           &s->gb, re, VLC_BITS, 3);
+
+                UPDATE_CACHE(re, &s->gb);
+                index = SHOW_UBITS(re, &s->gb, VLC_BITS);
+                VLC_INTERN(s->temp[0][4 * i + R], s->vlc[2].table,
+                           &s->gb, re, VLC_BITS, 3);
+            }
+            if (alpha) {
+                UPDATE_CACHE(re, &s->gb);
+                index = SHOW_UBITS(re, &s->gb, VLC_BITS);
+                VLC_INTERN(s->temp[0][4 * i + A], s->vlc[2].table,
+                           &s->gb, re, VLC_BITS, 3);
+            } else
+                s->temp[0][4 * i + A] = 0;
         }
-        if (alpha)
-            s->temp[0][4 * i + A] = get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
     }
+    CLOSE_READER(re, &s->gb);
 }
 
 static void decode_bgr_bitstream(HYuvContext *s, int count)
@@ -488,6 +820,32 @@ static void draw_slice(HYuvContext *s, AVFrame *frame, int y)
     s->last_slice_end = y + h;
 }
 
+static int left_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, int w, int acc)
+{
+    if (s->bps <= 8) {
+        return s->hdsp.add_hfyu_left_pred(dst, src, w, acc);
+    } else {
+        return s->llviddsp.add_hfyu_left_pred_int16((      uint16_t *)dst, (const uint16_t *)src, s->n-1, w, acc);
+    }
+}
+
+static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w)
+{
+    if (s->bps <= 8) {
+        s->hdsp.add_bytes(dst, src, w);
+    } else {
+        s->llviddsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w);
+    }
+}
+
+static void add_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, const uint8_t *diff, int w, int *left, int *left_top)
+{
+    if (s->bps <= 8) {
+        s->hdsp.add_hfyu_median_pred(dst, src, diff, w, left, left_top);
+    } else {
+        s->llviddsp.add_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src, (const uint16_t *)diff, s->n-1, w, left, left_top);
+    }
+}
 static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                         AVPacket *avpkt)
 {
@@ -502,20 +860,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     AVFrame *const p = data;
     int table_size = 0, ret;
 
-    av_fast_malloc(&s->bitstream_buffer,
+    av_fast_padded_malloc(&s->bitstream_buffer,
                    &s->bitstream_buffer_size,
-                   buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+                   buf_size);
     if (!s->bitstream_buffer)
         return AVERROR(ENOMEM);
 
-    memset(s->bitstream_buffer + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
     s->bdsp.bswap_buf((uint32_t *) s->bitstream_buffer,
                       (const uint32_t *) buf, buf_size / 4);
 
-    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
         return ret;
-    }
 
     if (s->context) {
         table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size);
@@ -536,7 +891,72 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     s->last_slice_end = 0;
 
-    if (s->bitstream_bpp < 24) {
+    if (s->version > 2) {
+        int plane;
+        for(plane = 0; plane < 1 + 2*s->chroma + s->alpha; plane++) {
+            int left, lefttop, y;
+            int w = width;
+            int h = height;
+            int fake_stride = fake_ystride;
+
+            if (s->chroma && (plane == 1 || plane == 2)) {
+                w >>= s->chroma_h_shift;
+                h >>= s->chroma_v_shift;
+                fake_stride = plane == 1 ? fake_ustride : fake_vstride;
+            }
+
+            switch (s->predictor) {
+            case LEFT:
+            case PLANE:
+                decode_plane_bitstream(s, w, plane);
+                left = left_prediction(s, p->data[plane], s->temp[0], w, 0);
+
+                for (y = 1; y < h; y++) {
+                    uint8_t *dst = p->data[plane] + p->linesize[plane]*y;
+
+                    decode_plane_bitstream(s, w, plane);
+                    left = left_prediction(s, dst, s->temp[0], w, left);
+                    if (s->predictor == PLANE) {
+                        if (y > s->interlaced) {
+                            add_bytes(s, dst, dst - fake_stride, w);
+                        }
+                    }
+                }
+
+                break;
+            case MEDIAN:
+                decode_plane_bitstream(s, w, plane);
+                left= left_prediction(s, p->data[plane], s->temp[0], w, 0);
+
+                y = 1;
+
+                /* second line is left predicted for interlaced case */
+                if (s->interlaced) {
+                    decode_plane_bitstream(s, w, plane);
+                    left = left_prediction(s, p->data[plane] + p->linesize[plane], s->temp[0], w, left);
+                    y++;
+                }
+
+                lefttop = p->data[plane][0];
+                decode_plane_bitstream(s, w, plane);
+                add_median_prediction(s, p->data[plane] + fake_stride, p->data[plane], s->temp[0], w, &left, &lefttop);
+                y++;
+
+                for (; y<h; y++) {
+                    uint8_t *dst;
+
+                    decode_plane_bitstream(s, w, plane);
+
+                    dst = p->data[plane] + p->linesize[plane] * y;
+
+                    add_median_prediction(s, dst, dst - fake_stride, s->temp[0], w, &left, &lefttop);
+                }
+
+                break;
+            }
+        }
+        draw_slice(s, p, height);
+    } else if (s->bitstream_bpp < 24) {
         int y, cy;
         int lefty, leftu, leftv;
         int lefttopy, lefttopu, lefttopv;
@@ -547,7 +967,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             p->data[0][1] = get_bits(&s->gb, 8);
             p->data[0][0] = get_bits(&s->gb, 8);
 
-            avpriv_report_missing_feature(avctx, "YUY2 output");
+            av_log(avctx, AV_LOG_ERROR,
+                   "YUY2 output is not implemented yet\n");
             return AVERROR_PATCHWELCOME;
         } else {
             leftv         =
@@ -701,19 +1122,19 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         }
     } else {
         int y;
-        int leftr, leftg, leftb, lefta;
+        uint8_t left[4];
         const int last_line = (height - 1) * p->linesize[0];
 
         if (s->bitstream_bpp == 32) {
-            lefta = p->data[0][last_line + A] = get_bits(&s->gb, 8);
-            leftr = p->data[0][last_line + R] = get_bits(&s->gb, 8);
-            leftg = p->data[0][last_line + G] = get_bits(&s->gb, 8);
-            leftb = p->data[0][last_line + B] = get_bits(&s->gb, 8);
+            left[A] = p->data[0][last_line + A] = get_bits(&s->gb, 8);
+            left[R] = p->data[0][last_line + R] = get_bits(&s->gb, 8);
+            left[G] = p->data[0][last_line + G] = get_bits(&s->gb, 8);
+            left[B] = p->data[0][last_line + B] = get_bits(&s->gb, 8);
         } else {
-            leftr = p->data[0][last_line + R] = get_bits(&s->gb, 8);
-            leftg = p->data[0][last_line + G] = get_bits(&s->gb, 8);
-            leftb = p->data[0][last_line + B] = get_bits(&s->gb, 8);
-            lefta = p->data[0][last_line + A] = 255;
+            left[R] = p->data[0][last_line + R] = get_bits(&s->gb, 8);
+            left[G] = p->data[0][last_line + G] = get_bits(&s->gb, 8);
+            left[B] = p->data[0][last_line + B] = get_bits(&s->gb, 8);
+            left[A] = p->data[0][last_line + A] = 255;
             skip_bits(&s->gb, 8);
         }
 
@@ -723,18 +1144,16 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             case PLANE:
                 decode_bgr_bitstream(s, width - 1);
                 s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + last_line + 4,
-                                                 s->temp[0], width - 1, &leftr,
-                                                 &leftg, &leftb, &lefta);
+                                                 s->temp[0], width - 1, left);
 
                 for (y = s->height - 2; y >= 0; y--) { // Yes it is stored upside down.
                     decode_bgr_bitstream(s, width);
 
                     s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + p->linesize[0] * y,
-                                                     s->temp[0], width, &leftr,
-                                                     &leftg, &leftb, &lefta);
+                                                     s->temp[0], width, left);
                     if (s->predictor == PLANE) {
                         if (s->bitstream_bpp != 32)
-                            lefta = 0;
+                            left[A] = 0;
                         if ((y & s->interlaced) == 0 &&
                             y < s->height - 1 - s->interlaced) {
                             s->hdsp.add_bytes(p->data[0] + p->linesize[0] * y,
@@ -751,7 +1170,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                        "prediction type not supported!\n");
             }
         } else {
-            avpriv_report_missing_feature(avctx, "BGR24 output");
+            av_log(avctx, AV_LOG_ERROR,
+                   "BGR24 output is not implemented yet\n");
             return AVERROR_PATCHWELCOME;
         }
     }
@@ -770,7 +1190,7 @@ static av_cold int decode_end(AVCodecContext *avctx)
     ff_huffyuv_common_end(s);
     av_freep(&s->bitstream_buffer);
 
-    for (i = 0; i < 6; i++)
+    for (i = 0; i < 8; i++)
         ff_free_vlc(&s->vlc[i]);
 
     return 0;
diff --git a/libavcodec/huffyuvdsp.c b/libavcodec/huffyuvdsp.c
index b5a714d..e8a05f6 100644
--- a/libavcodec/huffyuvdsp.c
+++ b/libavcodec/huffyuvdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@
 #define pb_7f (~0UL / 255 * 0x7f)
 #define pb_80 (~0UL / 255 * 0x80)
 
-static void add_bytes_c(uint8_t *dst, uint8_t *src, int w)
+static void add_bytes_c(uint8_t *dst, uint8_t *src, intptr_t w)
 {
     long i;
 
@@ -41,7 +41,7 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w)
 }
 
 static void add_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
-                                   const uint8_t *diff, int w,
+                                   const uint8_t *diff, intptr_t w,
                                    int *left, int *left_top)
 {
     int i;
@@ -60,7 +60,7 @@ static void add_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1,
     *left_top = lt;
 }
 
-static int add_hfyu_left_pred_c(uint8_t *dst, const uint8_t *src, int w,
+static int add_hfyu_left_pred_c(uint8_t *dst, const uint8_t *src, intptr_t w,
                                 int acc)
 {
     int i;
@@ -81,22 +81,11 @@ static int add_hfyu_left_pred_c(uint8_t *dst, const uint8_t *src, int w,
     return acc;
 }
 
-#if HAVE_BIGENDIAN
-#define B 3
-#define G 2
-#define R 1
-#define A 0
-#else
-#define B 0
-#define G 1
-#define R 2
-#define A 3
-#endif
 static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src,
-                                       int w, int *red, int *green,
-                                       int *blue, int *alpha)
+                                       intptr_t w, uint8_t *left)
 {
-    int i, r = *red, g = *green, b = *blue, a = *alpha;
+    int i;
+    uint8_t r = left[R], g = left[G], b = left[B], a = left[A];
 
     for (i = 0; i < w; i++) {
         b += src[4 * i + B];
@@ -110,15 +99,11 @@ static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src,
         dst[4 * i + A] = a;
     }
 
-    *red   = r;
-    *green = g;
-    *blue  = b;
-    *alpha = a;
+    left[B] = b;
+    left[G] = g;
+    left[R] = r;
+    left[A] = a;
 }
-#undef B
-#undef G
-#undef R
-#undef A
 
 av_cold void ff_huffyuvdsp_init(HuffYUVDSPContext *c)
 {
diff --git a/libavcodec/huffyuvdsp.h b/libavcodec/huffyuvdsp.h
index 5e84e3a..78b2bca 100644
--- a/libavcodec/huffyuvdsp.h
+++ b/libavcodec/huffyuvdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -21,17 +21,28 @@
 
 #include <stdint.h>
 
+#if HAVE_BIGENDIAN
+#define B 3
+#define G 2
+#define R 1
+#define A 0
+#else
+#define B 0
+#define G 1
+#define R 2
+#define A 3
+#endif
+
 typedef struct HuffYUVDSPContext {
     void (*add_bytes)(uint8_t *dst /* align 16 */, uint8_t *src /* align 16 */,
-                      int w);
+                      intptr_t w);
     void (*add_hfyu_median_pred)(uint8_t *dst, const uint8_t *top,
-                                 const uint8_t *diff, int w,
+                                 const uint8_t *diff, intptr_t w,
                                  int *left, int *left_top);
     int (*add_hfyu_left_pred)(uint8_t *dst, const uint8_t *src,
-                              int w, int left);
+                              intptr_t w, int left);
     void (*add_hfyu_left_pred_bgr32)(uint8_t *dst, const uint8_t *src,
-                                     int w, int *red, int *green,
-                                     int *blue, int *alpha);
+                                     intptr_t w, uint8_t *left);
 } HuffYUVDSPContext;
 
 void ff_huffyuvdsp_init(HuffYUVDSPContext *c);
diff --git a/libavcodec/huffyuvenc.c b/libavcodec/huffyuvenc.c
index 47fe2a5..8d72b63 100644
--- a/libavcodec/huffyuvenc.c
+++ b/libavcodec/huffyuvenc.c
@@ -1,24 +1,26 @@
 /*
- * Copyright (c) 2002-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2002-2014 Michael Niedermayer <michaelni@gmx.at>
  *
  * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
  * the algorithm used
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * yuva, gray, 4:4:4, 4:1:1, 4:1:0 and >8 bit per sample support sponsored by NOA
  */
 
 /**
@@ -30,32 +32,65 @@
 #include "huffyuv.h"
 #include "huffman.h"
 #include "huffyuvencdsp.h"
+#include "internal.h"
 #include "put_bits.h"
+#include "libavutil/pixdesc.h"
+
+static inline void diff_bytes(HYuvContext *s, uint8_t *dst,
+                              const uint8_t *src0, const uint8_t *src1, int w)
+{
+    if (s->bps <= 8) {
+        s->hencdsp.diff_bytes(dst, src0, src1, w);
+    } else {
+        s->llviddsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w);
+    }
+}
 
 static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst,
-                                      uint8_t *src, int w, int left)
+                                      const uint8_t *src, int w, int left)
 {
     int i;
-    if (w < 32) {
-        for (i = 0; i < w; i++) {
-            const int temp = src[i];
-            dst[i] = temp - left;
-            left   = temp;
+    if (s->bps <= 8) {
+        if (w < 32) {
+            for (i = 0; i < w; i++) {
+                const int temp = src[i];
+                dst[i] = temp - left;
+                left   = temp;
+            }
+            return left;
+        } else {
+            for (i = 0; i < 16; i++) {
+                const int temp = src[i];
+                dst[i] = temp - left;
+                left   = temp;
+            }
+            s->hencdsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
+            return src[w-1];
         }
-        return left;
     } else {
-        for (i = 0; i < 16; i++) {
-            const int temp = src[i];
-            dst[i] = temp - left;
-            left   = temp;
+        const uint16_t *src16 = (const uint16_t *)src;
+        uint16_t       *dst16 = (      uint16_t *)dst;
+        if (w < 32) {
+            for (i = 0; i < w; i++) {
+                const int temp = src16[i];
+                dst16[i] = temp - left;
+                left   = temp;
+            }
+            return left;
+        } else {
+            for (i = 0; i < 16; i++) {
+                const int temp = src16[i];
+                dst16[i] = temp - left;
+                left   = temp;
+            }
+            s->llviddsp.diff_int16(dst16 + 16, src16 + 16, src16 + 15, s->n - 1, w - 16);
+            return src16[w-1];
         }
-        s->hencdsp.diff_bytes(dst + 16, src + 16, src + 15, w - 16);
-        return src[w-1];
     }
 }
 
 static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst,
-                                             uint8_t *src, int w,
+                                             const uint8_t *src, int w,
                                              int *red, int *green, int *blue,
                                              int *alpha)
 {
@@ -117,20 +152,30 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst,
     *blue  = src[(w - 1) * 3 + 2];
 }
 
+static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top)
+{
+    if (s->bps <= 8) {
+        s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top);
+    } else {
+        s->llviddsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top);
+    }
+}
+
 static int store_table(HYuvContext *s, const uint8_t *len, uint8_t *buf)
 {
     int i;
     int index = 0;
+    int n = s->vlc_n;
 
-    for (i = 0; i < 256;) {
+    for (i = 0; i < n;) {
         int val = len[i];
         int repeat = 0;
 
-        for (; i < 256 && len[i] == val && repeat < 255; i++)
+        for (; i < n && len[i] == val && repeat < 255; i++)
             repeat++;
 
-        assert(val < 32 && val >0 && repeat<256 && repeat>0);
-        if ( repeat > 7) {
+        av_assert0(val < 32 && val >0 && repeat < 256 && repeat>0);
+        if (repeat > 7) {
             buf[index++] = val;
             buf[index++] = repeat;
         } else {
@@ -141,16 +186,47 @@ static int store_table(HYuvContext *s, const uint8_t *len, uint8_t *buf)
     return index;
 }
 
+static int store_huffman_tables(HYuvContext *s, uint8_t *buf)
+{
+    int i, ret;
+    int size = 0;
+    int count = 3;
+
+    if (s->version > 2)
+        count = 1 + s->alpha + 2*s->chroma;
+
+    for (i = 0; i < count; i++) {
+        if ((ret = ff_huff_gen_len_table(s->len[i], s->stats[i], s->vlc_n, 0)) < 0)
+            return ret;
+
+        if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i], s->vlc_n) < 0) {
+            return -1;
+        }
+
+        size += store_table(s, s->len[i], buf + size);
+    }
+    return size;
+}
+
 static av_cold int encode_init(AVCodecContext *avctx)
 {
     HYuvContext *s = avctx->priv_data;
     int i, j;
+    int ret;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 
     ff_huffyuv_common_init(avctx);
     ff_huffyuvencdsp_init(&s->hencdsp);
 
-    avctx->extradata = av_mallocz(1024*30); // 256*3+4 == 772
-    avctx->stats_out = av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
+    avctx->extradata = av_mallocz(3*MAX_N + 4);
+    if (!avctx->extradata)
+        return AVERROR(ENOMEM);
+    if (s->flags&CODEC_FLAG_PASS1) {
+#define STATS_OUT_SIZE 21*MAX_N*3 + 4
+        avctx->stats_out = av_mallocz(STATS_OUT_SIZE); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
+        if (!avctx->stats_out)
+            return AVERROR(ENOMEM);
+    }
     s->version = 2;
 
     avctx->coded_frame = av_frame_alloc();
@@ -160,15 +236,66 @@ static av_cold int encode_init(AVCodecContext *avctx)
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
 
+    s->bps = desc->comp[0].depth_minus1 + 1;
+    s->yuv = !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2;
+    s->chroma = desc->nb_components > 2;
+    s->alpha = !!(desc->flags & AV_PIX_FMT_FLAG_ALPHA);
+    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt,
+                                     &s->chroma_h_shift,
+                                     &s->chroma_v_shift);
+
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_YUV420P:
     case AV_PIX_FMT_YUV422P:
         if (s->width & 1) {
             av_log(avctx, AV_LOG_ERROR, "Width must be even for this colorspace.\n");
-            return -1;
+            return AVERROR(EINVAL);
         }
         s->bitstream_bpp = avctx->pix_fmt == AV_PIX_FMT_YUV420P ? 12 : 16;
         break;
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUV410P:
+    case AV_PIX_FMT_YUV411P:
+    case AV_PIX_FMT_YUV440P:
+    case AV_PIX_FMT_GBRP:
+    case AV_PIX_FMT_GBRP9:
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRP14:
+    case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_GRAY16:
+    case AV_PIX_FMT_YUVA444P:
+    case AV_PIX_FMT_YUVA420P:
+    case AV_PIX_FMT_YUVA422P:
+    case AV_PIX_FMT_GBRAP:
+    case AV_PIX_FMT_GRAY8A:
+    case AV_PIX_FMT_YUV420P9:
+    case AV_PIX_FMT_YUV420P10:
+    case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV420P14:
+    case AV_PIX_FMT_YUV420P16:
+    case AV_PIX_FMT_YUV422P9:
+    case AV_PIX_FMT_YUV422P10:
+    case AV_PIX_FMT_YUV422P12:
+    case AV_PIX_FMT_YUV422P14:
+    case AV_PIX_FMT_YUV422P16:
+    case AV_PIX_FMT_YUV444P9:
+    case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUV444P12:
+    case AV_PIX_FMT_YUV444P14:
+    case AV_PIX_FMT_YUV444P16:
+    case AV_PIX_FMT_YUVA420P9:
+    case AV_PIX_FMT_YUVA420P10:
+    case AV_PIX_FMT_YUVA420P16:
+    case AV_PIX_FMT_YUVA422P9:
+    case AV_PIX_FMT_YUVA422P10:
+    case AV_PIX_FMT_YUVA422P16:
+    case AV_PIX_FMT_YUVA444P9:
+    case AV_PIX_FMT_YUVA444P10:
+    case AV_PIX_FMT_YUVA444P16:
+        s->version = 3;
+        break;
     case AV_PIX_FMT_RGB32:
         s->bitstream_bpp = 32;
         break;
@@ -177,10 +304,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "format not supported\n");
-        return -1;
+        return AVERROR(EINVAL);
     }
+    s->n = 1<<s->bps;
+    s->vlc_n = FFMIN(s->n, MAX_VLC_N);
+
     avctx->bits_per_coded_sample = s->bitstream_bpp;
-    s->decorrelate = s->bitstream_bpp >= 24;
+    s->decorrelate = s->bitstream_bpp >= 24 && !s->yuv && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
     s->predictor = avctx->prediction_method;
     s->interlaced = avctx->flags&CODEC_FLAG_INTERLACED_ME ? 1 : 0;
     if (avctx->context_model == 1) {
@@ -189,7 +319,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
             av_log(avctx, AV_LOG_ERROR,
                    "context=1 is not compatible with "
                    "2 pass huffyuv encoding\n");
-            return -1;
+            return AVERROR(EINVAL);
         }
     }else s->context= 0;
 
@@ -198,45 +328,66 @@ static av_cold int encode_init(AVCodecContext *avctx)
             av_log(avctx, AV_LOG_ERROR,
                    "Error: YV12 is not supported by huffyuv; use "
                    "vcodec=ffvhuff or format=422p\n");
-            return -1;
+            return AVERROR(EINVAL);
         }
         if (avctx->context_model) {
             av_log(avctx, AV_LOG_ERROR,
                    "Error: per-frame huffman tables are not supported "
                    "by huffyuv; use vcodec=ffvhuff\n");
-            return -1;
+            return AVERROR(EINVAL);
+        }
+        if (s->version > 2) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Error: ver>2 is not supported "
+                   "by huffyuv; use vcodec=ffvhuff\n");
+            return AVERROR(EINVAL);
         }
         if (s->interlaced != ( s->height > 288 ))
             av_log(avctx, AV_LOG_INFO,
                    "using huffyuv 2.2.0 or newer interlacing flag\n");
     }
 
-    if (s->bitstream_bpp >= 24 && s->predictor == MEDIAN) {
+    if (s->version > 3 && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        av_log(avctx, AV_LOG_ERROR, "Ver > 3 is under development, files encoded with it may not be decodable with future versions!!!\n"
+               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (s->bitstream_bpp >= 24 && s->predictor == MEDIAN && s->version <= 2) {
         av_log(avctx, AV_LOG_ERROR,
                "Error: RGB is incompatible with median predictor\n");
-        return -1;
+        return AVERROR(EINVAL);
     }
 
     ((uint8_t*)avctx->extradata)[0] = s->predictor | (s->decorrelate << 6);
-    ((uint8_t*)avctx->extradata)[1] = s->bitstream_bpp;
     ((uint8_t*)avctx->extradata)[2] = s->interlaced ? 0x10 : 0x20;
     if (s->context)
         ((uint8_t*)avctx->extradata)[2] |= 0x40;
-    ((uint8_t*)avctx->extradata)[3] = 0;
+    if (s->version < 3) {
+        ((uint8_t*)avctx->extradata)[1] = s->bitstream_bpp;
+        ((uint8_t*)avctx->extradata)[3] = 0;
+    } else {
+        ((uint8_t*)avctx->extradata)[1] = ((s->bps-1)<<4) | s->chroma_h_shift | (s->chroma_v_shift<<2);
+        if (s->chroma)
+            ((uint8_t*)avctx->extradata)[2] |= s->yuv ? 1 : 2;
+        if (s->alpha)
+            ((uint8_t*)avctx->extradata)[2] |= 4;
+        ((uint8_t*)avctx->extradata)[3] = 1;
+    }
     s->avctx->extradata_size = 4;
 
     if (avctx->stats_in) {
         char *p = avctx->stats_in;
 
-        for (i = 0; i < 3; i++)
-            for (j = 0; j < 256; j++)
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < s->vlc_n; j++)
                 s->stats[i][j] = 1;
 
         for (;;) {
-            for (i = 0; i < 3; i++) {
+            for (i = 0; i < 4; i++) {
                 char *next;
 
-                for (j = 0; j < 256; j++) {
+                for (j = 0; j < s->vlc_n; j++) {
                     s->stats[i][j] += strtol(p, &next, 0);
                     if (next == p) return -1;
                     p = next;
@@ -245,40 +396,37 @@ static av_cold int encode_init(AVCodecContext *avctx)
             if (p[0] == 0 || p[1] == 0 || p[2] == 0) break;
         }
     } else {
-        for (i = 0; i < 3; i++)
-            for (j = 0; j < 256; j++) {
-                int d = FFMIN(j, 256 - j);
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < s->vlc_n; j++) {
+                int d = FFMIN(j, s->vlc_n - j);
 
-                s->stats[i][j] = 100000000 / (d + 1);
+                s->stats[i][j] = 100000000 / (d*d + 1);
             }
     }
 
-    for (i = 0; i < 3; i++) {
-        ff_huff_gen_len_table(s->len[i], s->stats[i]);
-
-        if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i]) < 0) {
-            return -1;
-        }
-
-        s->avctx->extradata_size +=
-            store_table(s, s->len[i], &((uint8_t*)s->avctx->extradata)[s->avctx->extradata_size]);
-    }
+    ret = store_huffman_tables(s, s->avctx->extradata + s->avctx->extradata_size);
+    if (ret < 0)
+        return ret;
+    s->avctx->extradata_size += ret;
 
     if (s->context) {
-        for (i = 0; i < 3; i++) {
+        for (i = 0; i < 4; i++) {
             int pels = s->width * s->height / (i ? 40 : 10);
-            for (j = 0; j < 256; j++) {
-                int d = FFMIN(j, 256 - j);
-                s->stats[i][j] = pels/(d + 1);
+            for (j = 0; j < s->vlc_n; j++) {
+                int d = FFMIN(j, s->vlc_n - j);
+                s->stats[i][j] = pels/(d*d + 1);
             }
         }
     } else {
-        for (i = 0; i < 3; i++)
-            for (j = 0; j < 256; j++)
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < s->vlc_n; j++)
                 s->stats[i][j]= 0;
     }
 
-    ff_huffyuv_alloc_temp(s);
+    if (ff_huffyuv_alloc_temp(s)) {
+        ff_huffyuv_common_end(s);
+        return AVERROR(ENOMEM);
+    }
 
     s->picture_number=0;
 
@@ -339,6 +487,115 @@ static int encode_422_bitstream(HYuvContext *s, int offset, int count)
     return 0;
 }
 
+static int encode_plane_bitstream(HYuvContext *s, int count, int plane)
+{
+    int i;
+
+    if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) < count * s->bps / 2) {
+        av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+        return -1;
+    }
+
+#define LOAD2\
+            int y0 = s->temp[0][2 * i];\
+            int y1 = s->temp[0][2 * i + 1];
+#define LOAD2_14\
+            int y0 = s->temp16[0][2 * i] & mask;\
+            int y1 = s->temp16[0][2 * i + 1] & mask;
+#define LOAD2_16\
+            int y0 = s->temp16[0][2 * i];\
+            int y1 = s->temp16[0][2 * i + 1];
+#define STAT2\
+            s->stats[plane][y0]++;\
+            s->stats[plane][y1]++;
+#define STAT2_16\
+            s->stats[plane][y0>>2]++;\
+            s->stats[plane][y1>>2]++;
+#define WRITE2\
+            put_bits(&s->pb, s->len[plane][y0], s->bits[plane][y0]);\
+            put_bits(&s->pb, s->len[plane][y1], s->bits[plane][y1]);
+#define WRITE2_16\
+            put_bits(&s->pb, s->len[plane][y0>>2], s->bits[plane][y0>>2]);\
+            put_bits(&s->pb, 2, y0&3);\
+            put_bits(&s->pb, s->len[plane][y1>>2], s->bits[plane][y1>>2]);\
+            put_bits(&s->pb, 2, y1&3);
+
+    count /= 2;
+
+    if (s->bps <= 8) {
+    if (s->flags & CODEC_FLAG_PASS1) {
+        for (i = 0; i < count; i++) {
+            LOAD2;
+            STAT2;
+        }
+    }
+    if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
+        return 0;
+
+    if (s->context) {
+        for (i = 0; i < count; i++) {
+            LOAD2;
+            STAT2;
+            WRITE2;
+        }
+    } else {
+        for (i = 0; i < count; i++) {
+            LOAD2;
+            WRITE2;
+        }
+    }
+    } else if (s->bps <= 14) {
+        int mask = s->n - 1;
+        if (s->flags & CODEC_FLAG_PASS1) {
+            for (i = 0; i < count; i++) {
+                LOAD2_14;
+                STAT2;
+            }
+        }
+        if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
+            return 0;
+
+        if (s->context) {
+            for (i = 0; i < count; i++) {
+                LOAD2_14;
+                STAT2;
+                WRITE2;
+            }
+        } else {
+            for (i = 0; i < count; i++) {
+                LOAD2_14;
+                WRITE2;
+            }
+        }
+    } else {
+        if (s->flags & CODEC_FLAG_PASS1) {
+            for (i = 0; i < count; i++) {
+                LOAD2_16;
+                STAT2_16;
+            }
+        }
+        if (s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)
+            return 0;
+
+        if (s->context) {
+            for (i = 0; i < count; i++) {
+                LOAD2_16;
+                STAT2_16;
+                WRITE2_16;
+            }
+        } else {
+            for (i = 0; i < count; i++) {
+                LOAD2_16;
+                WRITE2_16;
+            }
+        }
+    }
+#undef LOAD2
+#undef STAT2
+#undef WRITE2
+    return 0;
+}
+
 static int encode_gray_bitstream(HYuvContext *s, int count)
 {
     int i;
@@ -396,8 +653,8 @@ static inline int encode_bgra_bitstream(HYuvContext *s, int count, int planes)
 
 #define LOAD_GBRA                                                       \
     int g = s->temp[0][planes == 3 ? 3 * i + 1 : 4 * i + G];            \
-    int b = s->temp[0][planes == 3 ? 3 * i + 2 : 4 * i + B] - g & 0xFF; \
-    int r = s->temp[0][planes == 3 ? 3 * i + 0 : 4 * i + R] - g & 0xFF; \
+    int b =(s->temp[0][planes == 3 ? 3 * i + 2 : 4 * i + B] - g) & 0xFF;\
+    int r =(s->temp[0][planes == 3 ? 3 * i + 0 : 4 * i + R] - g) & 0xFF;\
     int a = s->temp[0][planes * i + A];
 
 #define STAT_BGRA                                                       \
@@ -448,22 +705,16 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     const AVFrame * const p = pict;
     int i, j, size = 0, ret;
 
-    if (!pkt->data &&
-        (ret = av_new_packet(pkt, width * height * 3 * 4 + FF_MIN_BUFFER_SIZE)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error allocating output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, width * height * 3 * 4 + FF_MIN_BUFFER_SIZE)) < 0)
         return ret;
-    }
 
     if (s->context) {
-        for (i = 0; i < 3; i++) {
-            ff_huff_gen_len_table(s->len[i], s->stats[i]);
-            if (ff_huffyuv_generate_bits_table(s->bits[i], s->len[i]) < 0)
-                return -1;
-            size += store_table(s, s->len[i], &pkt->data[size]);
-        }
+        size = store_huffman_tables(s, pkt->data);
+        if (size < 0)
+            return size;
 
-        for (i = 0; i < 3; i++)
-            for (j = 0; j < 256; j++)
+        for (i = 0; i < 4; i++)
+            for (j = 0; j < s->vlc_n; j++)
                 s->stats[i][j] >>= 1;
     }
 
@@ -631,6 +882,59 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             }
             encode_bgra_bitstream(s, width, 3);
         }
+    } else if (s->version > 2) {
+        int plane;
+        for (plane = 0; plane < 1 + 2*s->chroma + s->alpha; plane++) {
+            int left, y;
+            int w = width;
+            int h = height;
+            int fake_stride = fake_ystride;
+
+            if (s->chroma && (plane == 1 || plane == 2)) {
+                w >>= s->chroma_h_shift;
+                h >>= s->chroma_v_shift;
+                fake_stride = plane == 1 ? fake_ustride : fake_vstride;
+            }
+
+            left = sub_left_prediction(s, s->temp[0], p->data[plane], w , 0);
+
+            encode_plane_bitstream(s, w, plane);
+
+            if (s->predictor==MEDIAN) {
+                int lefttop;
+                y = 1;
+                if (s->interlaced) {
+                    left = sub_left_prediction(s, s->temp[0], p->data[plane] + p->linesize[plane], w , left);
+
+                    encode_plane_bitstream(s, w, plane);
+                    y++;
+                }
+
+                lefttop = p->data[plane][0];
+
+                for (; y < h; y++) {
+                    uint8_t *dst = p->data[plane] + p->linesize[plane] * y;
+
+                    sub_median_prediction(s, s->temp[0], dst - fake_stride, dst, w , &left, &lefttop);
+
+                    encode_plane_bitstream(s, w, plane);
+                }
+            } else {
+                for (y = 1; y < h; y++) {
+                    uint8_t *dst = p->data[plane] + p->linesize[plane] * y;
+
+                    if (s->predictor == PLANE && s->interlaced < y) {
+                        diff_bytes(s, s->temp[1], dst, dst - fake_stride, w);
+
+                        left = sub_left_prediction(s, s->temp[0], s->temp[1], w , left);
+                    } else {
+                        left = sub_left_prediction(s, s->temp[0], dst, w , left);
+                    }
+
+                    encode_plane_bitstream(s, w, plane);
+                }
+            }
+        }
     } else {
         av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
     }
@@ -644,17 +948,19 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if ((s->flags&CODEC_FLAG_PASS1) && (s->picture_number & 31) == 0) {
         int j;
         char *p = avctx->stats_out;
-        char *end = p + 1024*30;
-        for (i = 0; i < 3; i++) {
-            for (j = 0; j < 256; j++) {
+        char *end = p + STATS_OUT_SIZE;
+        for (i = 0; i < 4; i++) {
+            for (j = 0; j < s->vlc_n; j++) {
                 snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]);
                 p += strlen(p);
                 s->stats[i][j]= 0;
             }
             snprintf(p, end-p, "\n");
             p++;
+            if (end <= p)
+                return AVERROR(ENOMEM);
         }
-    } else
+    } else if (avctx->stats_out)
         avctx->stats_out[0] = '\0';
     if (!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)) {
         flush_put_bits(&s->pb);
@@ -693,6 +999,7 @@ AVCodec ff_huffyuv_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_end,
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUV422P, AV_PIX_FMT_RGB24,
         AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE
@@ -709,8 +1016,23 @@ AVCodec ff_ffvhuff_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_end,
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]){
-        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_RGB24,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
+        AV_PIX_FMT_GBRP,
+        AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16,
+        AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
+        AV_PIX_FMT_GBRAP,
+        AV_PIX_FMT_GRAY8A,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV420P16,
+        AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV422P16,
+        AV_PIX_FMT_YUV444P9, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16,
+        AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P16,
+        AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_RGB24,
         AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE
     },
 };
diff --git a/libavcodec/huffyuvencdsp.c b/libavcodec/huffyuvencdsp.c
index 6c30877..95fcc19 100644
--- a/libavcodec/huffyuvencdsp.c
+++ b/libavcodec/huffyuvencdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,12 +25,12 @@
 #define pb_7f (~0UL / 255 * 0x7f)
 #define pb_80 (~0UL / 255 * 0x80)
 
-static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
+static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w)
 {
     long i;
 
 #if !HAVE_FAST_UNALIGNED
-    if ((long) src2 & (sizeof(long) - 1)) {
+    if (((long)src1 | (long)src2) & (sizeof(long) - 1)) {
         for (i = 0; i + 7 < w; i += 8) {
             dst[i + 0] = src1[i + 0] - src2[i + 0];
             dst[i + 1] = src1[i + 1] - src2[i + 1];
diff --git a/libavcodec/huffyuvencdsp.h b/libavcodec/huffyuvencdsp.h
index 603c36f..3a49b4a 100644
--- a/libavcodec/huffyuvencdsp.h
+++ b/libavcodec/huffyuvencdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,8 +23,8 @@
 
 typedef struct HuffYUVEncDSPContext {
     void (*diff_bytes)(uint8_t *dst /* align 16 */,
-                       uint8_t *src1 /* align 16 */,
-                       uint8_t *src2 /* align 1 */,
+                       const uint8_t *src1 /* align 16 */,
+                       const uint8_t *src2 /* align 1 */,
                        int w);
     /**
      * Subtract HuffYUV's variant of median prediction.
diff --git a/libavcodec/idcinvideo.c b/libavcodec/idcinvideo.c
index 70c98d6..80c7d07 100644
--- a/libavcodec/idcinvideo.c
+++ b/libavcodec/idcinvideo.c
@@ -2,20 +2,20 @@
  * id Quake II CIN Video Decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -76,7 +76,7 @@ typedef struct IdcinContext {
     uint32_t pal[256];
 } IdcinContext;
 
-/*
+/**
  * Find the lowest probability node in a Huffman table, and mark it as
  * being assigned to a higher probability.
  * @return the node index of the lowest unused node, or -1 if all nodes
@@ -170,7 +170,7 @@ static av_cold int idcin_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static void idcin_decode_vlcs(IdcinContext *s, AVFrame *frame)
+static int idcin_decode_vlcs(IdcinContext *s, AVFrame *frame)
 {
     hnode *hnodes;
     long x, y;
@@ -189,7 +189,7 @@ static void idcin_decode_vlcs(IdcinContext *s, AVFrame *frame)
                 if(!bit_pos) {
                     if(dat_pos >= s->size) {
                         av_log(s->avctx, AV_LOG_ERROR, "Huffman decode error.\n");
-                        return;
+                        return -1;
                     }
                     bit_pos = 8;
                     v = s->buf[dat_pos++];
@@ -204,6 +204,8 @@ static void idcin_decode_vlcs(IdcinContext *s, AVFrame *frame)
             prev = node_num;
         }
     }
+
+    return 0;
 }
 
 static int idcin_decode_frame(AVCodecContext *avctx,
@@ -220,12 +222,11 @@ static int idcin_decode_frame(AVCodecContext *avctx,
     s->buf = buf;
     s->size = buf_size;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "  id CIN Video: get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
-    idcin_decode_vlcs(s, frame);
+    if (idcin_decode_vlcs(s, frame))
+        return AVERROR_INVALIDDATA;
 
     if (pal) {
         frame->palette_has_changed = 1;
diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
index 9dbeba1..1c7a2aa 100644
--- a/libavcodec/idctdsp.c
+++ b/libavcodec/idctdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,6 +24,7 @@
 #include "faanidct.h"
 #include "idctdsp.h"
 #include "simple_idct.h"
+#include "xvididct.h"
 
 av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
                                const uint8_t *src_scantable)
@@ -79,7 +80,7 @@ av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
     }
 }
 
-static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
+static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
                                  int line_size)
 {
     int i;
@@ -100,8 +101,40 @@ static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
     }
 }
 
+static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
+                                 int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
+                                 int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
 static void put_signed_pixels_clamped_c(const int16_t *block,
-                                        uint8_t *restrict pixels,
+                                        uint8_t *av_restrict pixels,
                                         int line_size)
 {
     int i, j;
@@ -121,7 +154,7 @@ static void put_signed_pixels_clamped_c(const int16_t *block,
     }
 }
 
-static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
+static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
                                  int line_size)
 {
     int i;
@@ -141,6 +174,36 @@ static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
     }
 }
 
+static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
+                          int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
+                          int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels += line_size;
+        block += 8;
+    }
+}
+
 static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
 {
     ff_j_rev_dct(block);
@@ -152,17 +215,68 @@ static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
     ff_j_rev_dct(block);
     add_pixels_clamped_c(block, dest, line_size);
 }
+static void ff_jref_idct4_put(uint8_t *dest, int line_size, int16_t *block)
+{
+    ff_j_rev_dct4 (block);
+    put_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct4_add(uint8_t *dest, int line_size, int16_t *block)
+{
+    ff_j_rev_dct4 (block);
+    add_pixels_clamped4_c(block, dest, line_size);
+}
+
+static void ff_jref_idct2_put(uint8_t *dest, int line_size, int16_t *block)
+{
+    ff_j_rev_dct2 (block);
+    put_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct2_add(uint8_t *dest, int line_size, int16_t *block)
+{
+    ff_j_rev_dct2 (block);
+    add_pixels_clamped2_c(block, dest, line_size);
+}
+
+static void ff_jref_idct1_put(uint8_t *dest, int line_size, int16_t *block)
+{
+    dest[0] = av_clip_uint8((block[0] + 4)>>3);
+}
+static void ff_jref_idct1_add(uint8_t *dest, int line_size, int16_t *block)
+{
+    dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
+}
 
 av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
 {
     const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
 
-    if (avctx->bits_per_raw_sample == 10) {
-        c->idct_put  = ff_simple_idct_put_10;
-        c->idct_add  = ff_simple_idct_add_10;
-        c->idct      = ff_simple_idct_10;
+    if (avctx->lowres==1) {
+        c->idct_put  = ff_jref_idct4_put;
+        c->idct_add  = ff_jref_idct4_add;
+        c->idct      = ff_j_rev_dct4;
+        c->perm_type = FF_IDCT_PERM_NONE;
+    } else if (avctx->lowres==2) {
+        c->idct_put  = ff_jref_idct2_put;
+        c->idct_add  = ff_jref_idct2_add;
+        c->idct      = ff_j_rev_dct2;
+        c->perm_type = FF_IDCT_PERM_NONE;
+    } else if (avctx->lowres==3) {
+        c->idct_put  = ff_jref_idct1_put;
+        c->idct_add  = ff_jref_idct1_add;
+        c->idct      = ff_j_rev_dct1;
         c->perm_type = FF_IDCT_PERM_NONE;
     } else {
+        if (avctx->bits_per_raw_sample == 10) {
+            c->idct_put              = ff_simple_idct_put_10;
+            c->idct_add              = ff_simple_idct_add_10;
+            c->idct                  = ff_simple_idct_10;
+            c->perm_type             = FF_IDCT_PERM_NONE;
+        } else if (avctx->bits_per_raw_sample == 12) {
+            c->idct_put              = ff_simple_idct_put_12;
+            c->idct_add              = ff_simple_idct_add_12;
+            c->idct                  = ff_simple_idct_12;
+            c->perm_type             = FF_IDCT_PERM_NONE;
+        } else {
         if (avctx->idct_algo == FF_IDCT_INT) {
             c->idct_put  = jref_idct_put;
             c->idct_add  = jref_idct_add;
@@ -179,12 +293,18 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
             c->idct      = ff_simple_idct_8;
             c->perm_type = FF_IDCT_PERM_NONE;
         }
+        }
     }
 
     c->put_pixels_clamped        = put_pixels_clamped_c;
     c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
     c->add_pixels_clamped        = add_pixels_clamped_c;
 
+    if (CONFIG_MPEG4_DECODER && avctx->idct_algo == FF_IDCT_XVIDMMX)
+        ff_xvididct_init(c, avctx);
+
+    if (ARCH_ALPHA)
+        ff_idctdsp_init_alpha(c, avctx, high_bit_depth);
     if (ARCH_ARM)
         ff_idctdsp_init_arm(c, avctx, high_bit_depth);
     if (ARCH_PPC)
diff --git a/libavcodec/idctdsp.h b/libavcodec/idctdsp.h
index b88cc82..04510b1 100644
--- a/libavcodec/idctdsp.h
+++ b/libavcodec/idctdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -97,6 +97,8 @@ typedef struct IDCTDSPContext {
 
 void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx);
 
+void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx,
+                           unsigned high_bit_depth);
 void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,
                          unsigned high_bit_depth);
 void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
diff --git a/libavcodec/iff.c b/libavcodec/iff.c
index 112c2ca..f08a0f7 100644
--- a/libavcodec/iff.c
+++ b/libavcodec/iff.c
@@ -1,28 +1,28 @@
 /*
- * IFF PBM/ILBM bitmap decoder
+ * IFF ACBM/DEEP/ILBM/PBM bitmap decoder
  * Copyright (c) 2010 Peter Ross <pross@xvid.org>
  * Copyright (c) 2010 Sebastian Vater <cdgs.basty@googlemail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
- * IFF PBM/ILBM bitmap decoder
+ * IFF ACBM/DEEP/ILBM/PBM bitmap decoder
  */
 
 #include <stdint.h>
@@ -33,11 +33,30 @@
 #include "get_bits.h"
 #include "internal.h"
 
+// TODO: masking bits
+typedef enum {
+    MASK_NONE,
+    MASK_HAS_MASK,
+    MASK_HAS_TRANSPARENT_COLOR,
+    MASK_LASSO
+} mask_type;
+
 typedef struct {
     AVFrame *frame;
     int planesize;
     uint8_t * planebuf;
+    uint8_t * ham_buf;      ///< temporary buffer for planar to chunky conversation
+    uint32_t *ham_palbuf;   ///< HAM decode table
+    uint32_t *mask_buf;     ///< temporary buffer for palette indices
+    uint32_t *mask_palbuf;  ///< masking palette table
+    unsigned  compression;  ///< delta compression method used
+    unsigned  bpp;          ///< bits per plane to decode (differs from bits_per_coded_sample if HAM)
+    unsigned  ham;          ///< 0 if non-HAM or number of hold bits (6 for bpp > 6, 4 otherwise)
+    unsigned  flags;        ///< 1 for EHB, 0 is no extra half darkening
+    unsigned  transparency; ///< TODO: transparency color index in palette
+    unsigned  masking;      ///< TODO: masking method used
     int init; // 1 if buffer and palette data already initialized, 0 otherwise
+    int16_t   tvdc[16];     ///< TVDC lookup table
 } IffContext;
 
 #define LUT8_PART(plane, v)                             \
@@ -124,25 +143,180 @@ static av_always_inline uint32_t gray2rgb(const uint32_t x) {
  */
 static int cmap_read_palette(AVCodecContext *avctx, uint32_t *pal)
 {
+    IffContext *s = avctx->priv_data;
     int count, i;
+    const uint8_t *const palette = avctx->extradata + AV_RB16(avctx->extradata);
+    int palette_size = avctx->extradata_size - AV_RB16(avctx->extradata);
 
     if (avctx->bits_per_coded_sample > 8) {
-        av_log(avctx, AV_LOG_ERROR, "bit_per_coded_sample > 8 not supported\n");
+        av_log(avctx, AV_LOG_ERROR, "bits_per_coded_sample > 8 not supported\n");
         return AVERROR_INVALIDDATA;
     }
 
     count = 1 << avctx->bits_per_coded_sample;
     // If extradata is smaller than actually needed, fill the remaining with black.
-    count = FFMIN(avctx->extradata_size / 3, count);
+    count = FFMIN(palette_size / 3, count);
     if (count) {
         for (i = 0; i < count; i++)
-            pal[i] = 0xFF000000 | AV_RB24(avctx->extradata + i * 3);
+            pal[i] = 0xFF000000 | AV_RB24(palette + i*3);
+        if (s->flags && count >= 32) { // EHB
+            for (i = 0; i < 32; i++)
+                pal[i + 32] = 0xFF000000 | (AV_RB24(palette + i*3) & 0xFEFEFE) >> 1;
+            count = FFMAX(count, 64);
+        }
     } else { // Create gray-scale color palette for bps < 8
         count = 1 << avctx->bits_per_coded_sample;
 
         for (i = 0; i < count; i++)
             pal[i] = 0xFF000000 | gray2rgb((i * 255) >> avctx->bits_per_coded_sample);
     }
+    if (s->masking == MASK_HAS_MASK) {
+        memcpy(pal + (1 << avctx->bits_per_coded_sample), pal, count * 4);
+        for (i = 0; i < count; i++)
+            pal[i] &= 0xFFFFFF;
+    } else if (s->masking == MASK_HAS_TRANSPARENT_COLOR &&
+        s->transparency < 1 << avctx->bits_per_coded_sample)
+        pal[s->transparency] &= 0xFFFFFF;
+    return 0;
+}
+
+/**
+ * Extracts the IFF extra context and updates internal
+ * decoder structures.
+ *
+ * @param avctx the AVCodecContext where to extract extra context to
+ * @param avpkt the AVPacket to extract extra context from or NULL to use avctx
+ * @return >= 0 in case of success, a negative error code otherwise
+ */
+static int extract_header(AVCodecContext *const avctx,
+                          const AVPacket *const avpkt) {
+    const uint8_t *buf;
+    unsigned buf_size;
+    IffContext *s = avctx->priv_data;
+    int i, palette_size;
+
+    if (avctx->extradata_size < 2) {
+        av_log(avctx, AV_LOG_ERROR, "not enough extradata\n");
+        return AVERROR_INVALIDDATA;
+    }
+    palette_size = avctx->extradata_size - AV_RB16(avctx->extradata);
+
+    if (avpkt) {
+        int image_size;
+        if (avpkt->size < 2)
+            return AVERROR_INVALIDDATA;
+        image_size = avpkt->size - AV_RB16(avpkt->data);
+        buf = avpkt->data;
+        buf_size = bytestream_get_be16(&buf);
+        if (buf_size <= 1 || image_size <= 1) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid image size received: %u -> image data offset: %d\n",
+                   buf_size, image_size);
+            return AVERROR_INVALIDDATA;
+        }
+    } else {
+        buf = avctx->extradata;
+        buf_size = bytestream_get_be16(&buf);
+        if (buf_size <= 1 || palette_size < 0) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid palette size received: %u -> palette data offset: %d\n",
+                   buf_size, palette_size);
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    if (buf_size >= 41) {
+        s->compression  = bytestream_get_byte(&buf);
+        s->bpp          = bytestream_get_byte(&buf);
+        s->ham          = bytestream_get_byte(&buf);
+        s->flags        = bytestream_get_byte(&buf);
+        s->transparency = bytestream_get_be16(&buf);
+        s->masking      = bytestream_get_byte(&buf);
+        for (i = 0; i < 16; i++)
+            s->tvdc[i] = bytestream_get_be16(&buf);
+
+        if (s->masking == MASK_HAS_MASK) {
+            if (s->bpp >= 8 && !s->ham) {
+                avctx->pix_fmt = AV_PIX_FMT_RGB32;
+                av_freep(&s->mask_buf);
+                av_freep(&s->mask_palbuf);
+                s->mask_buf = av_malloc((s->planesize * 32) + FF_INPUT_BUFFER_PADDING_SIZE);
+                if (!s->mask_buf)
+                    return AVERROR(ENOMEM);
+                if (s->bpp > 16) {
+                    av_log(avctx, AV_LOG_ERROR, "bpp %d too large for palette\n", s->bpp);
+                    av_freep(&s->mask_buf);
+                    return AVERROR(ENOMEM);
+                }
+                s->mask_palbuf = av_malloc((2 << s->bpp) * sizeof(uint32_t) + FF_INPUT_BUFFER_PADDING_SIZE);
+                if (!s->mask_palbuf) {
+                    av_freep(&s->mask_buf);
+                    return AVERROR(ENOMEM);
+                }
+            }
+            s->bpp++;
+        } else if (s->masking != MASK_NONE && s->masking != MASK_HAS_TRANSPARENT_COLOR) {
+            av_log(avctx, AV_LOG_ERROR, "Masking not supported\n");
+            return AVERROR_PATCHWELCOME;
+        }
+        if (!s->bpp || s->bpp > 32) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid number of bitplanes: %u\n", s->bpp);
+            return AVERROR_INVALIDDATA;
+        } else if (s->ham >= 8) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid number of hold bits for HAM: %u\n", s->ham);
+            return AVERROR_INVALIDDATA;
+        }
+
+        av_freep(&s->ham_buf);
+        av_freep(&s->ham_palbuf);
+
+        if (s->ham) {
+            int i, count = FFMIN(palette_size / 3, 1 << s->ham);
+            int ham_count;
+            const uint8_t *const palette = avctx->extradata + AV_RB16(avctx->extradata);
+
+            s->ham_buf = av_malloc((s->planesize * 8) + FF_INPUT_BUFFER_PADDING_SIZE);
+            if (!s->ham_buf)
+                return AVERROR(ENOMEM);
+
+            ham_count = 8 * (1 << s->ham);
+            s->ham_palbuf = av_malloc((ham_count << !!(s->masking == MASK_HAS_MASK)) * sizeof (uint32_t) + FF_INPUT_BUFFER_PADDING_SIZE);
+            if (!s->ham_palbuf) {
+                av_freep(&s->ham_buf);
+                return AVERROR(ENOMEM);
+            }
+
+            if (count) { // HAM with color palette attached
+                // prefill with black and palette and set HAM take direct value mask to zero
+                memset(s->ham_palbuf, 0, (1 << s->ham) * 2 * sizeof (uint32_t));
+                for (i=0; i < count; i++) {
+                    s->ham_palbuf[i*2+1] = 0xFF000000 | AV_RL24(palette + i*3);
+                }
+                count = 1 << s->ham;
+            } else { // HAM with grayscale color palette
+                count = 1 << s->ham;
+                for (i=0; i < count; i++) {
+                    s->ham_palbuf[i*2]   = 0xFF000000; // take direct color value from palette
+                    s->ham_palbuf[i*2+1] = 0xFF000000 | av_le2ne32(gray2rgb((i * 255) >> s->ham));
+                }
+            }
+            for (i=0; i < count; i++) {
+                uint32_t tmp = i << (8 - s->ham);
+                tmp |= tmp >> s->ham;
+                s->ham_palbuf[(i+count)*2]     = 0xFF00FFFF; // just modify blue color component
+                s->ham_palbuf[(i+count*2)*2]   = 0xFFFFFF00; // just modify red color component
+                s->ham_palbuf[(i+count*3)*2]   = 0xFFFF00FF; // just modify green color component
+                s->ham_palbuf[(i+count)*2+1]   = 0xFF000000 | tmp << 16;
+                s->ham_palbuf[(i+count*2)*2+1] = 0xFF000000 | tmp;
+                s->ham_palbuf[(i+count*3)*2+1] = 0xFF000000 | tmp << 8;
+            }
+            if (s->masking == MASK_HAS_MASK) {
+                for (i = 0; i < ham_count; i++)
+                    s->ham_palbuf[(1 << s->bpp) + i] = s->ham_palbuf[i] | 0xFF000000;
+            }
+        }
+    }
+
     return 0;
 }
 
@@ -151,6 +325,8 @@ static av_cold int decode_end(AVCodecContext *avctx)
     IffContext *s = avctx->priv_data;
     av_frame_free(&s->frame);
     av_freep(&s->planebuf);
+    av_freep(&s->ham_buf);
+    av_freep(&s->ham_palbuf);
     return 0;
 }
 
@@ -160,11 +336,29 @@ static av_cold int decode_init(AVCodecContext *avctx)
     int err;
 
     if (avctx->bits_per_coded_sample <= 8) {
-        avctx->pix_fmt = (avctx->bits_per_coded_sample < 8 ||
-                          avctx->extradata_size) ? AV_PIX_FMT_PAL8
-                                                 : AV_PIX_FMT_GRAY8;
+        int palette_size;
+
+        if (avctx->extradata_size >= 2)
+            palette_size = avctx->extradata_size - AV_RB16(avctx->extradata);
+        else
+            palette_size = 0;
+        avctx->pix_fmt = (avctx->bits_per_coded_sample < 8) ||
+                         (avctx->extradata_size >= 2 && palette_size) ? AV_PIX_FMT_PAL8 : AV_PIX_FMT_GRAY8;
     } else if (avctx->bits_per_coded_sample <= 32) {
-        avctx->pix_fmt = AV_PIX_FMT_BGR32;
+        if (avctx->codec_tag == MKTAG('R', 'G', 'B', '8')) {
+            avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        } else if (avctx->codec_tag == MKTAG('R', 'G', 'B', 'N')) {
+            avctx->pix_fmt = AV_PIX_FMT_RGB444;
+        } else if (avctx->codec_tag != MKTAG('D', 'E', 'E', 'P')) {
+            if (avctx->bits_per_coded_sample == 24) {
+                avctx->pix_fmt = AV_PIX_FMT_0BGR32;
+            } else if (avctx->bits_per_coded_sample == 32) {
+                avctx->pix_fmt = AV_PIX_FMT_BGR32;
+            } else {
+                avpriv_request_sample(avctx, "unknown bits_per_coded_sample");
+                return AVERROR_PATCHWELCOME;
+            }
+        }
     } else {
         return AVERROR_INVALIDDATA;
     }
@@ -176,12 +370,16 @@ static av_cold int decode_init(AVCodecContext *avctx)
     if (!s->planebuf)
         return AVERROR(ENOMEM);
 
+    s->bpp = avctx->bits_per_coded_sample;
     s->frame = av_frame_alloc();
     if (!s->frame) {
         decode_end(avctx);
         return AVERROR(ENOMEM);
     }
 
+    if ((err = extract_header(avctx, NULL)) < 0)
+        return err;
+
     return 0;
 }
 
@@ -195,6 +393,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
 static void decodeplane8(uint8_t *dst, const uint8_t *buf, int buf_size, int plane)
 {
     const uint64_t *lut = plane8_lut[plane];
+    if (plane >= 8) {
+        av_log(NULL, AV_LOG_WARNING, "Ignoring extra planes beyond 8\n");
+        return;
+    }
     do {
         uint64_t v = AV_RN64A(dst) | lut[*buf++];
         AV_WN64A(dst, v);
@@ -227,6 +429,47 @@ static void decodeplane32(uint32_t *dst, const uint8_t *buf, int buf_size, int p
     } while (--buf_size);
 }
 
+#define DECODE_HAM_PLANE32(x)       \
+    first       = buf[x] << 1;      \
+    second      = buf[(x)+1] << 1;  \
+    delta      &= pal[first++];     \
+    delta      |= pal[first];       \
+    dst[x]      = delta;            \
+    delta      &= pal[second++];    \
+    delta      |= pal[second];      \
+    dst[(x)+1]  = delta
+
+/**
+ * Converts one line of HAM6/8-encoded chunky buffer to 24bpp.
+ *
+ * @param dst the destination 24bpp buffer
+ * @param buf the source 8bpp chunky buffer
+ * @param pal the HAM decode table
+ * @param buf_size the plane size in bytes
+ */
+static void decode_ham_plane32(uint32_t *dst, const uint8_t  *buf,
+                               const uint32_t *const pal, unsigned buf_size)
+{
+    uint32_t delta = pal[1]; /* first palette entry */
+    do {
+        uint32_t first, second;
+        DECODE_HAM_PLANE32(0);
+        DECODE_HAM_PLANE32(2);
+        DECODE_HAM_PLANE32(4);
+        DECODE_HAM_PLANE32(6);
+        buf += 8;
+        dst += 8;
+    } while (--buf_size);
+}
+
+static void lookup_pal_indicies(uint32_t *dst, const uint32_t *buf,
+                         const uint32_t *const pal, unsigned width)
+{
+    do {
+        *dst++ = pal[*buf++];
+    } while (--width);
+}
+
 /**
  * Decode one complete byterun1 encoded line.
  *
@@ -245,124 +488,384 @@ static int decode_byterun(uint8_t *dst, int dst_size,
         unsigned length;
         const int8_t value = *buf++;
         if (value >= 0) {
-            length = value + 1;
-            memcpy(dst + x, buf, FFMIN3(length, dst_size - x, buf_end - buf));
+            length = FFMIN3(value + 1, dst_size - x, buf_end - buf);
+            memcpy(dst + x, buf, length);
             buf += length;
         } else if (value > -128) {
-            length = -value + 1;
-            memset(dst + x, *buf++, FFMIN(length, dst_size - x));
+            length = FFMIN(-value + 1, dst_size - x);
+            memset(dst + x, *buf++, length);
         } else { // noop
             continue;
         }
         x += length;
     }
+    if (x < dst_size) {
+        av_log(NULL, AV_LOG_WARNING, "decode_byterun ended before plane size\n");
+        memset(dst+x, 0, dst_size - x);
+    }
     return buf - buf_start;
 }
 
-static int decode_frame_ilbm(AVCodecContext *avctx,
-                             void *data, int *got_frame,
-                             AVPacket *avpkt)
-{
-    IffContext *s          = avctx->priv_data;
-    const uint8_t *buf     = avpkt->data;
-    int buf_size           = avpkt->size;
-    const uint8_t *buf_end = buf + buf_size;
-    int y, plane, res;
+#define DECODE_RGBX_COMMON(type) \
+    if (!length) { \
+        length = bytestream2_get_byte(gb); \
+        if (!length) { \
+            length = bytestream2_get_be16(gb); \
+            if (!length) \
+                return; \
+        } \
+    } \
+    for (i = 0; i < length; i++) { \
+        *(type *)(dst + y*linesize + x * sizeof(type)) = pixel; \
+        x += 1; \
+        if (x >= width) { \
+            y += 1; \
+            if (y >= height) \
+                return; \
+            x = 0; \
+        } \
+    }
 
-    if ((res = ff_reget_buffer(avctx, s->frame)) < 0)
-        return res;
+/**
+ * Decode RGB8 buffer
+ * @param[out] dst Destination buffer
+ * @param width Width of destination buffer (pixels)
+ * @param height Height of destination buffer (pixels)
+ * @param linesize Line size of destination buffer (bytes)
+ */
+static void decode_rgb8(GetByteContext *gb, uint8_t *dst, int width, int height, int linesize)
+{
+    int x = 0, y = 0, i, length;
+    while (bytestream2_get_bytes_left(gb) >= 4) {
+        uint32_t pixel = 0xFF000000 | bytestream2_get_be24(gb);
+        length = bytestream2_get_byte(gb) & 0x7F;
+        DECODE_RGBX_COMMON(uint32_t)
+    }
+}
 
-    if (!s->init && avctx->bits_per_coded_sample <= 8 &&
-        avctx->pix_fmt != AV_PIX_FMT_GRAY8) {
-        if ((res = cmap_read_palette(avctx, (uint32_t *)s->frame->data[1])) < 0)
-            return res;
+/**
+ * Decode RGBN buffer
+ * @param[out] dst Destination buffer
+ * @param width Width of destination buffer (pixels)
+ * @param height Height of destination buffer (pixels)
+ * @param linesize Line size of destination buffer (bytes)
+ */
+static void decode_rgbn(GetByteContext *gb, uint8_t *dst, int width, int height, int linesize)
+{
+    int x = 0, y = 0, i, length;
+    while (bytestream2_get_bytes_left(gb) >= 2) {
+        uint32_t pixel = bytestream2_get_be16u(gb);
+        length = pixel & 0x7;
+        pixel >>= 4;
+        DECODE_RGBX_COMMON(uint16_t)
     }
-    s->init = 1;
+}
 
-    if (avctx->codec_tag == MKTAG('I', 'L', 'B', 'M')) { // interleaved
-        if (avctx->pix_fmt == AV_PIX_FMT_PAL8 || avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
-            for (y = 0; y < avctx->height; y++) {
-                uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
-                memset(row, 0, avctx->width);
-                for (plane = 0; plane < avctx->bits_per_coded_sample && buf < buf_end;
-                     plane++) {
-                    decodeplane8(row, buf, FFMIN(s->planesize, buf_end - buf), plane);
-                    buf += s->planesize;
+/**
+ * Decode DEEP RLE 32-bit buffer
+ * @param[out] dst Destination buffer
+ * @param[in] src Source buffer
+ * @param src_size Source buffer size (bytes)
+ * @param width Width of destination buffer (pixels)
+ * @param height Height of destination buffer (pixels)
+ * @param linesize Line size of destination buffer (bytes)
+ */
+static void decode_deep_rle32(uint8_t *dst, const uint8_t *src, int src_size, int width, int height, int linesize)
+{
+    const uint8_t *src_end = src + src_size;
+    int x = 0, y = 0, i;
+    while (src + 5 <= src_end) {
+        int opcode;
+        opcode = *(int8_t *)src++;
+        if (opcode >= 0) {
+            int size = opcode + 1;
+            for (i = 0; i < size; i++) {
+                int length = FFMIN(size - i, width);
+                memcpy(dst + y*linesize + x * 4, src, length * 4);
+                src += length * 4;
+                x += length;
+                i += length;
+                if (x >= width) {
+                    x = 0;
+                    y += 1;
+                    if (y >= height)
+                        return;
                 }
             }
-        } else { // AV_PIX_FMT_BGR32
-            for (y = 0; y < avctx->height; y++) {
-                uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
-                memset(row, 0, avctx->width << 2);
-                for (plane = 0; plane < avctx->bits_per_coded_sample && buf < buf_end;
-                     plane++) {
-                    decodeplane32((uint32_t *)row, buf,
-                                  FFMIN(s->planesize, buf_end - buf), plane);
-                    buf += s->planesize;
+        } else {
+            int size = -opcode + 1;
+            uint32_t pixel = AV_RN32(src);
+            for (i = 0; i < size; i++) {
+                *(uint32_t *)(dst + y*linesize + x * 4) = pixel;
+                x += 1;
+                if (x >= width) {
+                    x = 0;
+                    y += 1;
+                    if (y >= height)
+                        return;
                 }
             }
-        }
-    } else if (avctx->pix_fmt == AV_PIX_FMT_PAL8 || avctx->pix_fmt == AV_PIX_FMT_GRAY8) { // IFF-PBM
-        for (y = 0; y < avctx->height && buf < buf_end; y++) {
-            uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
-            memcpy(row, buf, FFMIN(avctx->width, buf_end - buf));
-            buf += avctx->width + (avctx->width % 2); // padding if odd
+            src += 4;
         }
     }
+}
 
-    if ((res = av_frame_ref(data, s->frame)) < 0)
-        return res;
-
-    *got_frame = 1;
+/**
+ * Decode DEEP TVDC 32-bit buffer
+ * @param[out] dst Destination buffer
+ * @param[in] src Source buffer
+ * @param src_size Source buffer size (bytes)
+ * @param width Width of destination buffer (pixels)
+ * @param height Height of destination buffer (pixels)
+ * @param linesize Line size of destination buffer (bytes)
+ * @param[int] tvdc TVDC lookup table
+ */
+static void decode_deep_tvdc32(uint8_t *dst, const uint8_t *src, int src_size, int width, int height, int linesize, const int16_t *tvdc)
+{
+    int x = 0, y = 0, plane = 0;
+    int8_t pixel = 0;
+    int i, j;
+
+    for (i = 0; i < src_size * 2;) {
+#define GETNIBBLE ((i & 1) ?  (src[i>>1] & 0xF) : (src[i>>1] >> 4))
+        int d = tvdc[GETNIBBLE];
+        i++;
+        if (d) {
+            pixel += d;
+            dst[y * linesize + x*4 + plane] = pixel;
+            x++;
+        } else {
+            if (i >= src_size * 2)
+                return;
+            d = GETNIBBLE + 1;
+            i++;
+            d = FFMIN(d, width - x);
+            for (j = 0; j < d; j++) {
+                dst[y * linesize + x*4 + plane] = pixel;
+                x++;
+            }
+        }
+        if (x >= width) {
+            plane++;
+            if (plane >= 4) {
+                y++;
+                if (y >= height)
+                    return;
+                plane = 0;
+            }
+            x = 0;
+            pixel = 0;
+            i = (i + 1) & ~1;
+        }
+    }
+}
 
-    return buf_size;
+static int unsupported(AVCodecContext *avctx)
+{
+    IffContext *s = avctx->priv_data;
+    avpriv_request_sample(avctx, "bitmap (compression %i, bpp %i, ham %i)", s->compression, s->bpp, s->ham);
+    return AVERROR_INVALIDDATA;
 }
 
-static int decode_frame_byterun1(AVCodecContext *avctx,
-                                 void *data, int *got_frame,
-                                 AVPacket *avpkt)
+static int decode_frame(AVCodecContext *avctx,
+                        void *data, int *got_frame,
+                        AVPacket *avpkt)
 {
     IffContext *s          = avctx->priv_data;
-    const uint8_t *buf     = avpkt->data;
-    int buf_size           = avpkt->size;
+    const uint8_t *buf     = avpkt->size >= 2 ? avpkt->data + AV_RB16(avpkt->data) : NULL;
+    const int buf_size     = avpkt->size >= 2 ? avpkt->size - AV_RB16(avpkt->data) : 0;
     const uint8_t *buf_end = buf + buf_size;
     int y, plane, res;
+    GetByteContext gb;
 
+    if ((res = extract_header(avctx, avpkt)) < 0)
+        return res;
     if ((res = ff_reget_buffer(avctx, s->frame)) < 0)
         return res;
-
     if (!s->init && avctx->bits_per_coded_sample <= 8 &&
-        avctx->pix_fmt != AV_PIX_FMT_GRAY8) {
+        avctx->pix_fmt == AV_PIX_FMT_PAL8) {
         if ((res = cmap_read_palette(avctx, (uint32_t *)s->frame->data[1])) < 0)
             return res;
+    } else if (!s->init && avctx->bits_per_coded_sample <= 8 &&
+               avctx->pix_fmt == AV_PIX_FMT_RGB32) {
+        if ((res = cmap_read_palette(avctx, s->mask_palbuf)) < 0)
+            return res;
     }
     s->init = 1;
 
-    if (avctx->codec_tag == MKTAG('I', 'L', 'B', 'M')) { // interleaved
-        if (avctx->pix_fmt == AV_PIX_FMT_PAL8 || avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
-            for (y = 0; y < avctx->height; y++) {
+    switch (s->compression) {
+    case 0:
+        if (avctx->codec_tag == MKTAG('A', 'C', 'B', 'M')) {
+            if (avctx->pix_fmt == AV_PIX_FMT_PAL8 || avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
+                memset(s->frame->data[0], 0, avctx->height * s->frame->linesize[0]);
+                for (plane = 0; plane < s->bpp; plane++) {
+                    for (y = 0; y < avctx->height && buf < buf_end; y++) {
+                        uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                        decodeplane8(row, buf, FFMIN(s->planesize, buf_end - buf), plane);
+                        buf += s->planesize;
+                    }
+                }
+            } else if (s->ham) { // HAM to AV_PIX_FMT_BGR32
+                memset(s->frame->data[0], 0, avctx->height * s->frame->linesize[0]);
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memset(s->ham_buf, 0, s->planesize * 8);
+                    for (plane = 0; plane < s->bpp; plane++) {
+                        const uint8_t * start = buf + (plane * avctx->height + y) * s->planesize;
+                        if (start >= buf_end)
+                            break;
+                        decodeplane8(s->ham_buf, start, FFMIN(s->planesize, buf_end - start), plane);
+                    }
+                    decode_ham_plane32((uint32_t *)row, s->ham_buf, s->ham_palbuf, s->planesize);
+                }
+            } else
+                return unsupported(avctx);
+        } else if (avctx->codec_tag == MKTAG('D', 'E', 'E', 'P')) {
+            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+            int raw_width = avctx->width * (av_get_bits_per_pixel(desc) >> 3);
+            int x;
+            for (y = 0; y < avctx->height && buf < buf_end; y++) {
                 uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
-                memset(row, 0, avctx->width);
-                for (plane = 0; plane < avctx->bits_per_coded_sample; plane++) {
-                    buf += decode_byterun(s->planebuf, s->planesize, buf, buf_end);
-                    decodeplane8(row, s->planebuf, s->planesize, plane);
+                memcpy(row, buf, FFMIN(raw_width, buf_end - buf));
+                buf += raw_width;
+                if (avctx->pix_fmt == AV_PIX_FMT_BGR32) {
+                    for (x = 0; x < avctx->width; x++)
+                        row[4 * x + 3] = row[4 * x + 3] & 0xF0 | (row[4 * x + 3] >> 4);
                 }
             }
-        } else { // AV_PIX_FMT_BGR32
-            for (y = 0; y < avctx->height; y++) {
-                uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
-                memset(row, 0, avctx->width << 2);
-                for (plane = 0; plane < avctx->bits_per_coded_sample; plane++) {
-                    buf += decode_byterun(s->planebuf, s->planesize, buf, buf_end);
-                    decodeplane32((uint32_t *)row, s->planebuf, s->planesize, plane);
+        } else if (avctx->codec_tag == MKTAG('I', 'L', 'B', 'M')) { // interleaved
+            if (avctx->pix_fmt == AV_PIX_FMT_PAL8 || avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memset(row, 0, avctx->width);
+                    for (plane = 0; plane < s->bpp && buf < buf_end; plane++) {
+                        decodeplane8(row, buf, FFMIN(s->planesize, buf_end - buf), plane);
+                        buf += s->planesize;
+                    }
+                }
+            } else if (s->ham) { // HAM to AV_PIX_FMT_BGR32
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memset(s->ham_buf, 0, s->planesize * 8);
+                    for (plane = 0; plane < s->bpp && buf < buf_end; plane++) {
+                        decodeplane8(s->ham_buf, buf, FFMIN(s->planesize, buf_end - buf), plane);
+                        buf += s->planesize;
+                    }
+                    decode_ham_plane32((uint32_t *)row, s->ham_buf, s->ham_palbuf, s->planesize);
+                }
+            } else { // AV_PIX_FMT_BGR32
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memset(row, 0, avctx->width << 2);
+                    for (plane = 0; plane < s->bpp && buf < buf_end; plane++) {
+                        decodeplane32((uint32_t *)row, buf,
+                                      FFMIN(s->planesize, buf_end - buf), plane);
+                        buf += s->planesize;
+                    }
                 }
             }
+        } else if (avctx->codec_tag == MKTAG('P', 'B', 'M', ' ')) { // IFF-PBM
+            if (avctx->pix_fmt == AV_PIX_FMT_PAL8 || avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
+                for (y = 0; y < avctx->height && buf_end > buf; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memcpy(row, buf, FFMIN(avctx->width, buf_end - buf));
+                    buf += avctx->width + (avctx->width % 2); // padding if odd
+                }
+            } else if (s->ham) { // IFF-PBM: HAM to AV_PIX_FMT_BGR32
+                for (y = 0; y < avctx->height && buf_end > buf; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memcpy(s->ham_buf, buf, FFMIN(avctx->width, buf_end - buf));
+                    buf += avctx->width + (avctx->width & 1); // padding if odd
+                    decode_ham_plane32((uint32_t *)row, s->ham_buf, s->ham_palbuf, s->planesize);
+                }
+            } else
+                return unsupported(avctx);
         }
-    } else {
-        for (y = 0; y < avctx->height; y++) {
-            uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
-            buf += decode_byterun(row, avctx->width, buf, buf_end);
+        break;
+    case 1:
+        if (avctx->codec_tag == MKTAG('I', 'L', 'B', 'M')) { // interleaved
+            if (avctx->pix_fmt == AV_PIX_FMT_PAL8 || avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memset(row, 0, avctx->width);
+                    for (plane = 0; plane < s->bpp; plane++) {
+                        buf += decode_byterun(s->planebuf, s->planesize, buf, buf_end);
+                        decodeplane8(row, s->planebuf, s->planesize, plane);
+                    }
+                }
+            } else if (avctx->bits_per_coded_sample <= 8) { //8-bit (+ mask) to AV_PIX_FMT_BGR32
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memset(s->mask_buf, 0, avctx->width * sizeof(uint32_t));
+                    for (plane = 0; plane < s->bpp; plane++) {
+                        buf += decode_byterun(s->planebuf, s->planesize, buf, buf_end);
+                        decodeplane32(s->mask_buf, s->planebuf, s->planesize, plane);
+                    }
+                    lookup_pal_indicies((uint32_t *)row, s->mask_buf, s->mask_palbuf, avctx->width);
+                }
+            } else if (s->ham) { // HAM to AV_PIX_FMT_BGR32
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memset(s->ham_buf, 0, s->planesize * 8);
+                    for (plane = 0; plane < s->bpp; plane++) {
+                        buf += decode_byterun(s->planebuf, s->planesize, buf, buf_end);
+                        decodeplane8(s->ham_buf, s->planebuf, s->planesize, plane);
+                    }
+                    decode_ham_plane32((uint32_t *)row, s->ham_buf, s->ham_palbuf, s->planesize);
+                }
+            } else { // AV_PIX_FMT_BGR32
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    memset(row, 0, avctx->width << 2);
+                    for (plane = 0; plane < s->bpp; plane++) {
+                        buf += decode_byterun(s->planebuf, s->planesize, buf, buf_end);
+                        decodeplane32((uint32_t *)row, s->planebuf, s->planesize, plane);
+                    }
+                }
+            }
+        } else if (avctx->codec_tag == MKTAG('P', 'B', 'M', ' ')) { // IFF-PBM
+            if (avctx->pix_fmt == AV_PIX_FMT_PAL8 || avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    buf += decode_byterun(row, avctx->width, buf, buf_end);
+                }
+            } else if (s->ham) { // IFF-PBM: HAM to AV_PIX_FMT_BGR32
+                for (y = 0; y < avctx->height; y++) {
+                    uint8_t *row = &s->frame->data[0][y * s->frame->linesize[0]];
+                    buf += decode_byterun(s->ham_buf, avctx->width, buf, buf_end);
+                    decode_ham_plane32((uint32_t *)row, s->ham_buf, s->ham_palbuf, s->planesize);
+                }
+            } else
+                return unsupported(avctx);
+        } else if (avctx->codec_tag == MKTAG('D', 'E', 'E', 'P')) { // IFF-DEEP
+            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+            if (av_get_bits_per_pixel(desc) == 32)
+                decode_deep_rle32(s->frame->data[0], buf, buf_size, avctx->width, avctx->height, s->frame->linesize[0]);
+            else
+                return unsupported(avctx);
         }
+        break;
+    case 4:
+        bytestream2_init(&gb, buf, buf_size);
+        if (avctx->codec_tag == MKTAG('R', 'G', 'B', '8'))
+            decode_rgb8(&gb, s->frame->data[0], avctx->width, avctx->height, s->frame->linesize[0]);
+        else if (avctx->codec_tag == MKTAG('R', 'G', 'B', 'N'))
+            decode_rgbn(&gb, s->frame->data[0], avctx->width, avctx->height, s->frame->linesize[0]);
+        else
+            return unsupported(avctx);
+        break;
+    case 5:
+        if (avctx->codec_tag == MKTAG('D', 'E', 'E', 'P')) {
+            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+            if (av_get_bits_per_pixel(desc) == 32)
+                decode_deep_tvdc32(s->frame->data[0], buf, buf_size, avctx->width, avctx->height, s->frame->linesize[0], s->tvdc);
+            else
+                return unsupported(avctx);
+        } else
+            return unsupported(avctx);
+        break;
+    default:
+        return unsupported(avctx);
     }
 
     if ((res = av_frame_ref(data, s->frame)) < 0)
@@ -373,26 +876,29 @@ static int decode_frame_byterun1(AVCodecContext *avctx,
     return buf_size;
 }
 
+#if CONFIG_IFF_ILBM_DECODER
 AVCodec ff_iff_ilbm_decoder = {
-    .name           = "iff_ilbm",
-    .long_name      = NULL_IF_CONFIG_SMALL("IFF ILBM"),
+    .name           = "iff",
+    .long_name      = NULL_IF_CONFIG_SMALL("IFF"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_IFF_ILBM,
     .priv_data_size = sizeof(IffContext),
     .init           = decode_init,
     .close          = decode_end,
-    .decode         = decode_frame_ilbm,
+    .decode         = decode_frame,
     .capabilities   = CODEC_CAP_DR1,
 };
-
+#endif
+#if CONFIG_IFF_BYTERUN1_DECODER
 AVCodec ff_iff_byterun1_decoder = {
-    .name           = "iff_byterun1",
-    .long_name      = NULL_IF_CONFIG_SMALL("IFF ByteRun1"),
+    .name           = "iff",
+    .long_name      = NULL_IF_CONFIG_SMALL("IFF"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_IFF_BYTERUN1,
     .priv_data_size = sizeof(IffContext),
     .init           = decode_init,
     .close          = decode_end,
-    .decode         = decode_frame_byterun1,
+    .decode         = decode_frame,
     .capabilities   = CODEC_CAP_DR1,
 };
+#endif
diff --git a/libavcodec/iirfilter.c b/libavcodec/iirfilter.c
index 40a543d..a2d9d11 100644
--- a/libavcodec/iirfilter.c
+++ b/libavcodec/iirfilter.c
@@ -2,20 +2,20 @@
  * IIR filter
  * Copyright (c) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -313,6 +313,13 @@ av_cold void ff_iir_filter_free_coeffs(struct FFIIRFilterCoeffs *coeffs)
     av_free(coeffs);
 }
 
+void ff_iir_filter_init(FFIIRFilterContext *f) {
+    f->filter_flt = ff_iir_filter_flt;
+
+    if (HAVE_MIPSFPU)
+        ff_iir_filter_init_mips(f);
+}
+
 #ifdef TEST
 #include <stdio.h>
 
diff --git a/libavcodec/iirfilter.h b/libavcodec/iirfilter.h
index bc65a96..4ea6642 100644
--- a/libavcodec/iirfilter.h
+++ b/libavcodec/iirfilter.h
@@ -2,20 +2,20 @@
  * IIR filter
  * Copyright (c) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,6 +47,29 @@ enum IIRFilterMode{
     FF_FILTER_MODE_BANDSTOP,
 };
 
+typedef struct FFIIRFilterContext {
+    /**
+    * Perform IIR filtering on floating-point input samples.
+    *
+    * @param coeffs pointer to filter coefficients
+    * @param state  pointer to filter state
+    * @param size   input length
+    * @param src    source samples
+    * @param sstep  source stride
+    * @param dst    filtered samples (destination may be the same as input)
+    * @param dstep  destination stride
+    */
+    void (*filter_flt)(const struct FFIIRFilterCoeffs *coeffs,
+                        struct FFIIRFilterState *state, int size,
+                        const float *src, int sstep, float *dst, int dstep);
+} FFIIRFilterContext;
+
+/**
+ * Initialize FFIIRFilterContext
+ */
+void ff_iir_filter_init(FFIIRFilterContext *f);
+void ff_iir_filter_init_mips(FFIIRFilterContext *f);
+
 /**
  * Initialize filter coefficients.
  *
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 41ca8c8..e6a087a 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2006 Benjamin Larsson
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,6 +38,7 @@
 #include "libavutil/channel_layout.h"
 #include "libavutil/float_dsp.h"
 #include "libavutil/internal.h"
+#include "libavutil/libm.h"
 #include "avcodec.h"
 #include "bswapdsp.h"
 #include "get_bits.h"
@@ -356,7 +357,7 @@ static void imc_decode_level_coefficients(IMCContext *q, int *levlCoeffBuf,
     float tmp, tmp2;
     // maybe some frequency division thingy
 
-    flcoeffs1[0] = 20000.0 / pow (2, levlCoeffBuf[0] * 0.18945); // 0.18945 = log2(10) * 0.05703125
+    flcoeffs1[0] = 20000.0 / exp2 (levlCoeffBuf[0] * 0.18945); // 0.18945 = log2(10) * 0.05703125
     flcoeffs2[0] = log2f(flcoeffs1[0]);
     tmp  = flcoeffs1[0];
     tmp2 = flcoeffs2[0];
@@ -450,8 +451,13 @@ static int bit_allocation(IMCContext *q, IMCChannel *chctx,
     for (i = 0; i < BANDS; i++)
         highest = FFMAX(highest, chctx->flcoeffs1[i]);
 
-    for (i = 0; i < BANDS - 1; i++)
+    for (i = 0; i < BANDS - 1; i++) {
+        if (chctx->flcoeffs5[i] <= 0) {
+            av_log(NULL, AV_LOG_ERROR, "flcoeffs5 %f invalid\n", chctx->flcoeffs5[i]);
+            return AVERROR_INVALIDDATA;
+        }
         chctx->flcoeffs4[i] = chctx->flcoeffs3[i] - log2f(chctx->flcoeffs5[i]);
+    }
     chctx->flcoeffs4[BANDS - 1] = limit;
 
     highest = highest * 0.25;
@@ -887,6 +893,13 @@ static int imc_decode_block(AVCodecContext *avctx, IMCContext *q, int ch)
         imc_decode_level_coefficients2(q, chctx->levlCoeffBuf, chctx->old_floor,
                                        chctx->flcoeffs1, chctx->flcoeffs2);
 
+    for(i=0; i<BANDS; i++) {
+        if(chctx->flcoeffs1[i] > INT_MAX) {
+            av_log(avctx, AV_LOG_ERROR, "scalefactor out of range\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
     memcpy(chctx->old_floor, chctx->flcoeffs1, 32 * sizeof(float));
 
     counter = 0;
@@ -1006,10 +1019,8 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = COEFFS;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     for (i = 0; i < avctx->channels; i++) {
         q->out_samples = (float *)frame->extended_data[i];
@@ -1044,7 +1055,15 @@ static av_cold int imc_decode_close(AVCodecContext * avctx)
     return 0;
 }
 
+static av_cold void flush(AVCodecContext *avctx)
+{
+    IMCContext *q = avctx->priv_data;
+
+    q->chctx[0].decoder_reset =
+    q->chctx[1].decoder_reset = 1;
+}
 
+#if CONFIG_IMC_DECODER
 AVCodec ff_imc_decoder = {
     .name           = "imc",
     .long_name      = NULL_IF_CONFIG_SMALL("IMC (Intel Music Coder)"),
@@ -1054,11 +1073,13 @@ AVCodec ff_imc_decoder = {
     .init           = imc_decode_init,
     .close          = imc_decode_close,
     .decode         = imc_decode_frame,
+    .flush          = flush,
     .capabilities   = CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
-
+#endif
+#if CONFIG_IAC_DECODER
 AVCodec ff_iac_decoder = {
     .name           = "iac",
     .long_name      = NULL_IF_CONFIG_SMALL("IAC (Indeo Audio Coder)"),
@@ -1068,7 +1089,9 @@ AVCodec ff_iac_decoder = {
     .init           = imc_decode_init,
     .close          = imc_decode_close,
     .decode         = imc_decode_frame,
+    .flush          = flush,
     .capabilities   = CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
+#endif
diff --git a/libavcodec/imcdata.h b/libavcodec/imcdata.h
index 8e99391..64e7c71 100644
--- a/libavcodec/imcdata.h
+++ b/libavcodec/imcdata.h
@@ -4,20 +4,20 @@
  * Copyright (c) 2006 Benjamin Larsson
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 365dc54..158bc73 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -2,20 +2,20 @@
  * Misc image conversion routines
  * Copyright (c) 2001, 2002, 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,7 @@
 #include "imgconvert.h"
 #include "internal.h"
 #include "mathops.h"
+#include "libavutil/avassert.h"
 #include "libavutil/colorspace.h"
 #include "libavutil/common.h"
 #include "libavutil/pixdesc.h"
@@ -42,121 +43,49 @@
 void avcodec_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    av_assert0(desc);
     *h_shift = desc->log2_chroma_w;
     *v_shift = desc->log2_chroma_h;
 }
 
-static int is_gray(const AVPixFmtDescriptor *desc)
-{
-    return desc->nb_components - (desc->flags & AV_PIX_FMT_FLAG_ALPHA) == 1;
-}
-
 int avcodec_get_pix_fmt_loss(enum AVPixelFormat dst_pix_fmt,
                              enum AVPixelFormat src_pix_fmt,
                              int has_alpha)
 {
-    const AVPixFmtDescriptor *src_desc = av_pix_fmt_desc_get(src_pix_fmt);
-    const AVPixFmtDescriptor *dst_desc = av_pix_fmt_desc_get(dst_pix_fmt);
-    int loss, i, nb_components = FFMIN(src_desc->nb_components,
-                                       dst_desc->nb_components);
-
-    /* compute loss */
-    loss = 0;
-
-    if (dst_pix_fmt == src_pix_fmt)
-        return 0;
-
-    for (i = 0; i < nb_components; i++)
-        if (src_desc->comp[i].depth_minus1 > dst_desc->comp[i].depth_minus1)
-            loss |= FF_LOSS_DEPTH;
-
-    if (dst_desc->log2_chroma_w > src_desc->log2_chroma_w ||
-        dst_desc->log2_chroma_h > src_desc->log2_chroma_h)
-        loss |= FF_LOSS_RESOLUTION;
-
-    if ((src_desc->flags & AV_PIX_FMT_FLAG_RGB) != (dst_desc->flags & AV_PIX_FMT_FLAG_RGB))
-        loss |= FF_LOSS_COLORSPACE;
-
-    if (has_alpha && !(dst_desc->flags & AV_PIX_FMT_FLAG_ALPHA) &&
-         (dst_desc->flags & AV_PIX_FMT_FLAG_ALPHA))
-        loss |= FF_LOSS_ALPHA;
-
-    if (dst_pix_fmt == AV_PIX_FMT_PAL8 && !is_gray(src_desc))
-        return loss | FF_LOSS_COLORQUANT;
-
-    if (src_desc->nb_components > dst_desc->nb_components)
-        if (is_gray(dst_desc))
-            loss |= FF_LOSS_CHROMA;
-
-    return loss;
+    return av_get_pix_fmt_loss(dst_pix_fmt, src_pix_fmt, has_alpha);
 }
 
-static enum AVPixelFormat avcodec_find_best_pix_fmt1(enum AVPixelFormat *pix_fmt_list,
-                                      enum AVPixelFormat src_pix_fmt,
-                                      int has_alpha,
-                                      int loss_mask)
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+                                            enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr)
 {
-    int dist, i, loss, min_dist;
-    enum AVPixelFormat dst_pix_fmt;
-
-    /* find exact color match with smallest size */
-    dst_pix_fmt = AV_PIX_FMT_NONE;
-    min_dist = 0x7fffffff;
-    i = 0;
-    while (pix_fmt_list[i] != AV_PIX_FMT_NONE) {
-        enum AVPixelFormat pix_fmt = pix_fmt_list[i];
-
-        if (i > AV_PIX_FMT_NB) {
-            av_log(NULL, AV_LOG_ERROR, "Pixel format list longer than expected, "
-                   "it is either not properly terminated or contains duplicates\n");
-            return AV_PIX_FMT_NONE;
-        }
-
-        loss = avcodec_get_pix_fmt_loss(pix_fmt, src_pix_fmt, has_alpha) & loss_mask;
-        if (loss == 0) {
-            dist = av_get_bits_per_pixel(av_pix_fmt_desc_get(pix_fmt));
-            if (dist < min_dist) {
-                min_dist = dist;
-                dst_pix_fmt = pix_fmt;
-            }
-        }
-        i++;
-    }
-    return dst_pix_fmt;
+    return av_find_best_pix_fmt_of_2(dst_pix_fmt1, dst_pix_fmt2, src_pix_fmt, has_alpha, loss_ptr);
 }
 
-enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat *pix_fmt_list,
+#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
+enum AVPixelFormat avcodec_find_best_pix_fmt2(const enum AVPixelFormat *pix_fmt_list,
                                             enum AVPixelFormat src_pix_fmt,
-                                            int has_alpha, int *loss_ptr)
+                                            int has_alpha, int *loss_ptr){
+    return avcodec_find_best_pix_fmt_of_list(pix_fmt_list, src_pix_fmt, has_alpha, loss_ptr);
+}
+#else
+enum AVPixelFormat avcodec_find_best_pix_fmt2(enum AVPixelFormat dst_pix_fmt1, enum AVPixelFormat dst_pix_fmt2,
+                                            enum AVPixelFormat src_pix_fmt, int has_alpha, int *loss_ptr)
 {
-    enum AVPixelFormat dst_pix_fmt;
-    int loss_mask, i;
-    static const int loss_mask_order[] = {
-        ~0, /* no loss first */
-        ~FF_LOSS_ALPHA,
-        ~FF_LOSS_RESOLUTION,
-        ~(FF_LOSS_COLORSPACE | FF_LOSS_RESOLUTION),
-        ~FF_LOSS_COLORQUANT,
-        ~FF_LOSS_DEPTH,
-        0,
-    };
-
-    /* try with successive loss */
-    i = 0;
-    for(;;) {
-        loss_mask = loss_mask_order[i++];
-        dst_pix_fmt = avcodec_find_best_pix_fmt1(pix_fmt_list, src_pix_fmt,
-                                                 has_alpha, loss_mask);
-        if (dst_pix_fmt >= 0)
-            goto found;
-        if (loss_mask == 0)
-            break;
-    }
-    return AV_PIX_FMT_NONE;
- found:
-    if (loss_ptr)
-        *loss_ptr = avcodec_get_pix_fmt_loss(dst_pix_fmt, src_pix_fmt, has_alpha);
-    return dst_pix_fmt;
+    return avcodec_find_best_pix_fmt_of_2(dst_pix_fmt1, dst_pix_fmt2, src_pix_fmt, has_alpha, loss_ptr);
+}
+#endif
+
+enum AVPixelFormat avcodec_find_best_pix_fmt_of_list(const enum AVPixelFormat *pix_fmt_list,
+                                            enum AVPixelFormat src_pix_fmt,
+                                            int has_alpha, int *loss_ptr){
+    int i;
+
+    enum AVPixelFormat best = AV_PIX_FMT_NONE;
+
+    for(i=0; pix_fmt_list[i] != AV_PIX_FMT_NONE; i++)
+        best = avcodec_find_best_pix_fmt_of_2(best, pix_fmt_list[i], src_pix_fmt, has_alpha, loss_ptr);
+
+    return best;
 }
 
 /* 2x2 -> 1x1 */
@@ -248,8 +177,22 @@ void ff_shrink88(uint8_t *dst, int dst_wrap,
 /* return true if yuv planar */
 static inline int is_yuv_planar(const AVPixFmtDescriptor *desc)
 {
-    return (!(desc->flags & AV_PIX_FMT_FLAG_RGB) &&
-             (desc->flags & AV_PIX_FMT_FLAG_PLANAR));
+    int i;
+    int planes[4] = { 0 };
+
+    if (     desc->flags & AV_PIX_FMT_FLAG_RGB
+        || !(desc->flags & AV_PIX_FMT_FLAG_PLANAR))
+        return 0;
+
+    /* set the used planes */
+    for (i = 0; i < desc->nb_components; i++)
+        planes[desc->comp[i].plane] = 1;
+
+    /* if there is an unused plane, the format is not planar */
+    for (i = 0; i < desc->nb_components; i++)
+        if (!planes[i])
+            return 0;
+    return 1;
 }
 
 int av_picture_crop(AVPicture *dst, const AVPicture *src,
@@ -259,15 +202,23 @@ int av_picture_crop(AVPicture *dst, const AVPicture *src,
     int y_shift;
     int x_shift;
 
-    if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB || !is_yuv_planar(desc))
+    if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB)
         return -1;
 
     y_shift = desc->log2_chroma_h;
     x_shift = desc->log2_chroma_w;
 
+    if (is_yuv_planar(desc)) {
     dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band;
     dst->data[1] = src->data[1] + ((top_band >> y_shift) * src->linesize[1]) + (left_band >> x_shift);
     dst->data[2] = src->data[2] + ((top_band >> y_shift) * src->linesize[2]) + (left_band >> x_shift);
+    } else{
+        if(top_band % (1<<y_shift) || left_band % (1<<x_shift))
+            return -1;
+        if(left_band) //FIXME add support for this too
+            return -1;
+        dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band;
+    }
 
     dst->linesize[0] = src->linesize[0];
     dst->linesize[1] = src->linesize[1];
@@ -504,3 +455,31 @@ int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
 }
 
 #endif /* FF_API_DEINTERLACE */
+
+#ifdef TEST
+
+int main(void){
+    int i;
+    int err=0;
+    int skip = 0;
+
+    for (i=0; i<AV_PIX_FMT_NB*2; i++) {
+        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(i);
+        if(!desc || !desc->name) {
+            skip ++;
+            continue;
+        }
+        if (skip) {
+            av_log(NULL, AV_LOG_INFO, "%3d unused pixel format values\n", skip);
+            skip = 0;
+        }
+        av_log(NULL, AV_LOG_INFO, "pix fmt %s yuv_plan:%d avg_bpp:%d\n", desc->name, is_yuv_planar(desc), av_get_padded_bits_per_pixel(desc));
+        if ((!(desc->flags & AV_PIX_FMT_FLAG_ALPHA)) != (desc->nb_components != 2 && desc->nb_components != 4)) {
+            av_log(NULL, AV_LOG_ERROR, "Alpha flag mismatch\n");
+            err = 1;
+        }
+    }
+    return err;
+}
+
+#endif
diff --git a/libavcodec/imgconvert.h b/libavcodec/imgconvert.h
index 56d89b2..0ce626d 100644
--- a/libavcodec/imgconvert.h
+++ b/libavcodec/imgconvert.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/imx_dump_header_bsf.c b/libavcodec/imx_dump_header_bsf.c
index 5f5493f..be43fbc 100644
--- a/libavcodec/imx_dump_header_bsf.c
+++ b/libavcodec/imx_dump_header_bsf.c
@@ -2,20 +2,20 @@
  * imx dump header bitstream filter
  * Copyright (c) 2007 Baptiste Coudurier
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -53,7 +53,6 @@ static int imx_dump_header(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx
 }
 
 AVBitStreamFilter ff_imx_dump_header_bsf = {
-    "imxdump",
-    0,
-    imx_dump_header,
+    .name   = "imxdump",
+    .filter = imx_dump_header,
 };
diff --git a/libavcodec/indeo2.c b/libavcodec/indeo2.c
index 4221e9e..cccac44 100644
--- a/libavcodec/indeo2.c
+++ b/libavcodec/indeo2.c
@@ -2,20 +2,20 @@
  * Intel Indeo 2 codec
  * Copyright (c) 2005 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -149,10 +149,8 @@ static int ir2_decode_frame(AVCodecContext *avctx,
     AVFrame * const p    = s->picture;
     int start, ret;
 
-    if ((ret = ff_reget_buffer(avctx, p)) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, p)) < 0)
         return ret;
-    }
 
     start = 48; /* hardcoded for now */
 
diff --git a/libavcodec/indeo2data.h b/libavcodec/indeo2data.h
index ed8d83c..0d6d82f 100644
--- a/libavcodec/indeo2data.h
+++ b/libavcodec/indeo2data.h
@@ -2,20 +2,20 @@
  * Intel Indeo 2 codec
  * copyright (c) 2005 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c
index a9c02b2..aa9c30a 100644
--- a/libavcodec/indeo3.c
+++ b/libavcodec/indeo3.c
@@ -2,20 +2,20 @@
  * Indeo Video v3 compatible decoder
  * Copyright (c) 2009 - 2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,6 +32,7 @@
 #include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
+#include "copy_block.h"
 #include "bytestream.h"
 #include "get_bits.h"
 #include "hpeldsp.h"
@@ -147,15 +148,26 @@ static av_cold void build_requant_tab(void)
 }
 
 
+static av_cold void free_frame_buffers(Indeo3DecodeContext *ctx)
+{
+    int p;
+
+    ctx->width = ctx->height = 0;
+
+    for (p = 0; p < 3; p++) {
+        av_freep(&ctx->planes[p].buffers[0]);
+        av_freep(&ctx->planes[p].buffers[1]);
+        ctx->planes[p].pixels[0] = ctx->planes[p].pixels[1] = 0;
+    }
+}
+
+
 static av_cold int allocate_frame_buffers(Indeo3DecodeContext *ctx,
-                                          AVCodecContext *avctx)
+                                          AVCodecContext *avctx, int luma_width, int luma_height)
 {
-    int p, luma_width, luma_height, chroma_width, chroma_height;
+    int p, chroma_width, chroma_height;
     int luma_pitch, chroma_pitch, luma_size, chroma_size;
 
-    luma_width  = ctx->width;
-    luma_height = ctx->height;
-
     if (luma_width  < 16 || luma_width  > 640 ||
         luma_height < 16 || luma_height > 480 ||
         luma_width  &  3 || luma_height &   3) {
@@ -164,6 +176,9 @@ static av_cold int allocate_frame_buffers(Indeo3DecodeContext *ctx,
         return AVERROR_INVALIDDATA;
     }
 
+    ctx->width  = luma_width ;
+    ctx->height = luma_height;
+
     chroma_width  = FFALIGN(luma_width  >> 2, 4);
     chroma_height = FFALIGN(luma_height >> 2, 4);
 
@@ -187,6 +202,11 @@ static av_cold int allocate_frame_buffers(Indeo3DecodeContext *ctx,
         ctx->planes[p].buffers[0] = av_malloc(!p ? luma_size : chroma_size);
         ctx->planes[p].buffers[1] = av_malloc(!p ? luma_size : chroma_size);
 
+        if (!ctx->planes[p].buffers[0] || !ctx->planes[p].buffers[1]) {
+            free_frame_buffers(ctx);
+            return AVERROR(ENOMEM);
+        }
+
         /* fill the INTRA prediction lines with the middle pixel value = 64 */
         memset(ctx->planes[p].buffers[0], 0x40, ctx->planes[p].pitch);
         memset(ctx->planes[p].buffers[1], 0x40, ctx->planes[p].pitch);
@@ -201,19 +221,6 @@ static av_cold int allocate_frame_buffers(Indeo3DecodeContext *ctx,
     return 0;
 }
 
-
-static av_cold void free_frame_buffers(Indeo3DecodeContext *ctx)
-{
-    int p;
-
-    for (p = 0; p < 3; p++) {
-        av_freep(&ctx->planes[p].buffers[0]);
-        av_freep(&ctx->planes[p].buffers[1]);
-        ctx->planes[p].pixels[0] = ctx->planes[p].pixels[1] = 0;
-    }
-}
-
-
 /**
  *  Copy pixels of the cell(x + mv_x, y + mv_y) from the previous frame into
  *  the cell(x, y) in the current frame.
@@ -230,8 +237,11 @@ static int copy_cell(Indeo3DecodeContext *ctx, Plane *plane, Cell *cell)
     /* setup output and reference pointers */
     offset_dst  = (cell->ypos << 2) * plane->pitch + (cell->xpos << 2);
     dst         = plane->pixels[ctx->buf_sel] + offset_dst;
+    if(cell->mv_ptr){
     mv_y        = cell->mv_ptr[0];
     mv_x        = cell->mv_ptr[1];
+    }else
+        mv_x= mv_y= 0;
 
     /* -1 because there is an extra line on top for prediction */
     if ((cell->ypos << 2) + mv_y < -1 || (cell->xpos << 2) + mv_x < 0 ||
@@ -333,7 +343,7 @@ if (*data_ptr >= last_ptr) \
 
 #define RLE_BLOCK_COPY \
     if (cell->mv_ptr || !skip_flag) \
-        ctx->hdsp.put_pixels_tab[2][0](dst, ref, row_offset, 4 << v_zoom)
+        copy_block4(dst, ref, row_offset, row_offset, 4 << v_zoom)
 
 #define RLE_BLOCK_COPY_8 \
     pix64 = AV_RN64(ref);\
@@ -345,7 +355,7 @@ if (*data_ptr >= last_ptr) \
         fill_64(dst, pix64, 8, row_offset)
 
 #define RLE_LINES_COPY \
-    ctx->hdsp.put_pixels_tab[2][0](dst, ref, row_offset, num_lines << v_zoom)
+    copy_block4(dst, ref, row_offset, row_offset, num_lines << v_zoom)
 
 #define RLE_LINES_COPY_M10 \
     pix64 = AV_RN64(ref);\
@@ -589,6 +599,7 @@ static int decode_cell(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
     /* setup output and reference pointers */
     offset = (cell->ypos << 2) * plane->pitch + (cell->xpos << 2);
     block  =  plane->pixels[ctx->buf_sel] + offset;
+
     if (!cell->mv_ptr) {
         /* use previous line as reference for INTRA cells */
         ref_block = block - plane->pitch;
@@ -643,7 +654,7 @@ static int decode_cell(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
     /* of the predicted cell in order to avoid overflows. */
     if (vq_index >= 8 && ref_block) {
         for (x = 0; x < cell->width << 2; x++)
-            ref_block[x] = requant_tab[vq_index & 7][ref_block[x]];
+            ref_block[x] = requant_tab[vq_index & 7][ref_block[x] & 127];
     }
 
     error = IV3_NOERR;
@@ -771,7 +782,7 @@ static int parse_bintree(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
             return AVERROR_INVALIDDATA;
     }
 
-    while (1) { /* loop until return */
+    while (get_bits_left(&ctx->gb) >= 2) { /* loop until return */
         RESYNC_BITSTREAM;
         switch (code = get_bits(&ctx->gb, 2)) {
         case H_SPLIT:
@@ -796,6 +807,7 @@ static int parse_bintree(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
                 CHECK_CELL
                 if (!curr_cell.mv_ptr)
                     return AVERROR_INVALIDDATA;
+
                 ret = copy_cell(ctx, plane, &curr_cell);
                 return ret;
             }
@@ -806,6 +818,10 @@ static int parse_bintree(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
                 /* get motion vector index and setup the pointer to the mv set */
                 if (!ctx->need_resync)
                     ctx->next_cell_data = &ctx->gb.buffer[(get_bits_count(&ctx->gb) + 7) >> 3];
+                if (ctx->next_cell_data >= ctx->last_byte) {
+                    av_log(avctx, AV_LOG_ERROR, "motion vector out of array\n");
+                    return AVERROR_INVALIDDATA;
+                }
                 mv_idx = *(ctx->next_cell_data++);
                 if (mv_idx >= ctx->num_vectors) {
                     av_log(avctx, AV_LOG_ERROR, "motion vector index out of range\n");
@@ -832,7 +848,7 @@ static int parse_bintree(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
         }
     }//while
 
-    return 0;
+    return AVERROR_INVALIDDATA;
 }
 
 
@@ -845,13 +861,13 @@ static int decode_plane(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
 
     /* each plane data starts with mc_vector_count field, */
     /* an optional array of motion vectors followed by the vq data */
-    num_vectors = bytestream_get_le32(&data);
+    num_vectors = bytestream_get_le32(&data); data_size -= 4;
     if (num_vectors > 256) {
         av_log(ctx->avctx, AV_LOG_ERROR,
                "Read invalid number of motion vectors %d\n", num_vectors);
         return AVERROR_INVALIDDATA;
     }
-    if (num_vectors * 2 >= data_size)
+    if (num_vectors * 2 > data_size)
         return AVERROR_INVALIDDATA;
 
     ctx->num_vectors = num_vectors;
@@ -862,7 +878,7 @@ static int decode_plane(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
     ctx->skip_bits   = 0;
     ctx->need_resync = 0;
 
-    ctx->last_byte = data + data_size - 1;
+    ctx->last_byte = data + data_size;
 
     /* initialize the 1st cell and set its dimensions to whole plane */
     curr_cell.xpos   = curr_cell.ypos = 0;
@@ -937,14 +953,11 @@ static int decode_frame_headers(Indeo3DecodeContext *ctx, AVCodecContext *avctx,
                    "Invalid picture dimensions: %d x %d!\n", width, height);
             return AVERROR_INVALIDDATA;
         }
-
-        ctx->width  = width;
-        ctx->height = height;
-
         free_frame_buffers(ctx);
-        if ((res = allocate_frame_buffers(ctx, avctx)) < 0)
+        if ((res = allocate_frame_buffers(ctx, avctx, width, height)) < 0)
              return res;
-        ff_set_dimensions(avctx, width, height);
+        if ((res = ff_set_dimensions(avctx, width, height)) < 0)
+            return res;
     }
 
     y_offset = bytestream2_get_le32(&gb);
@@ -1039,17 +1052,13 @@ static av_cold int decode_init(AVCodecContext *avctx)
     Indeo3DecodeContext *ctx = avctx->priv_data;
 
     ctx->avctx     = avctx;
-    ctx->width     = avctx->width;
-    ctx->height    = avctx->height;
     avctx->pix_fmt = AV_PIX_FMT_YUV410P;
 
     build_requant_tab();
 
     ff_hpeldsp_init(&ctx->hdsp, avctx->flags);
 
-    allocate_frame_buffers(ctx, avctx);
-
-    return 0;
+    return allocate_frame_buffers(ctx, avctx, avctx->width, avctx->height);
 }
 
 
@@ -1085,6 +1094,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     /* use BS_BUFFER flag for buffer switching */
     ctx->buf_sel = (ctx->frame_flags >> BS_BUFFER) & 1;
 
+    if ((res = ff_get_buffer(avctx, frame, 0)) < 0)
+        return res;
+
     /* decode luma plane */
     if ((res = decode_plane(ctx, avctx, ctx->planes, ctx->y_data_ptr, ctx->y_data_size, 40)))
         return res;
@@ -1096,11 +1108,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if ((res = decode_plane(ctx, avctx, &ctx->planes[2], ctx->v_data_ptr, ctx->v_data_size, 10)))
         return res;
 
-    if ((res = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(ctx->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-        return res;
-    }
-
     output_plane(&ctx->planes[0], ctx->buf_sel,
                  frame->data[0], frame->linesize[0],
                  avctx->height);
diff --git a/libavcodec/indeo3data.h b/libavcodec/indeo3data.h
index 28c9bb6..e7e28a3 100644
--- a/libavcodec/indeo3data.h
+++ b/libavcodec/indeo3data.h
@@ -2,20 +2,20 @@
  * Indeo Video v3 compatible decoder
  * Copyright (c) 2009 - 2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/indeo4.c b/libavcodec/indeo4.c
index 35f266e..ed4272b 100644
--- a/libavcodec/indeo4.c
+++ b/libavcodec/indeo4.c
@@ -2,20 +2,20 @@
  * Indeo Video Interactive v4 compatible decoder
  * Copyright (c) 2009-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -183,6 +183,7 @@ static int decode_pic_hdr(IVI45DecContext *ctx, AVCodecContext *avctx)
 
     /* decode subdivision of the planes */
     pic_conf.luma_bands = decode_plane_subdivision(&ctx->gb);
+    pic_conf.chroma_bands = 0;
     if (pic_conf.luma_bands)
         pic_conf.chroma_bands = decode_plane_subdivision(&ctx->gb);
     ctx->is_scalable = pic_conf.luma_bands != 1 || pic_conf.chroma_bands != 1;
@@ -270,6 +271,7 @@ static int decode_band_hdr(IVI45DecContext *ctx, IVIBandDesc *band,
 {
     int plane, band_num, indx, transform_id, scan_indx;
     int i;
+    int quant_mat;
 
     plane    = get_bits(&ctx->gb, 2);
     band_num = get_bits(&ctx->gb, 4);
@@ -327,6 +329,10 @@ static int decode_band_hdr(IVI45DecContext *ctx, IVIBandDesc *band,
                 return AVERROR_PATCHWELCOME;
             }
 
+            if (transform_id < 10 && band->blk_size < 8) {
+                av_log(avctx, AV_LOG_ERROR, "wrong transform size!\n");
+                return AVERROR_INVALIDDATA;
+            }
 #if IVI4_STREAM_ANALYSER
             if ((transform_id >= 0 && transform_id <= 2) || transform_id == 10)
                 ctx->uses_haar = 1;
@@ -335,13 +341,16 @@ static int decode_band_hdr(IVI45DecContext *ctx, IVIBandDesc *band,
             band->inv_transform = transforms[transform_id].inv_trans;
             band->dc_transform  = transforms[transform_id].dc_trans;
             band->is_2d_trans   = transforms[transform_id].is_2d_trans;
+
             if (transform_id < 10)
                 band->transform_size = 8;
             else
                 band->transform_size = 4;
 
-            if (band->blk_size != band->transform_size)
+            if (band->blk_size != band->transform_size) {
+                av_log(avctx, AV_LOG_ERROR, "transform and block size mismatch (%d != %d)\n", band->transform_size, band->blk_size);
                 return AVERROR_INVALIDDATA;
+            }
 
             scan_indx = get_bits(&ctx->gb, 4);
             if (scan_indx == 15) {
@@ -349,25 +358,29 @@ static int decode_band_hdr(IVI45DecContext *ctx, IVIBandDesc *band,
                 return AVERROR_INVALIDDATA;
             }
             if (scan_indx > 4 && scan_indx < 10) {
-                if (band->blk_size != 4)
+                if (band->blk_size != 4) {
+                    av_log(avctx, AV_LOG_ERROR, "mismatching scan table!\n");
                     return AVERROR_INVALIDDATA;
-            } else if (band->blk_size != 8)
+                }
+            } else if (band->blk_size != 8) {
+                av_log(avctx, AV_LOG_ERROR, "mismatching scan table!\n");
                 return AVERROR_INVALIDDATA;
+            }
 
             band->scan = scan_index_to_tab[scan_indx];
+            band->scan_size = band->blk_size;
 
-            band->quant_mat = get_bits(&ctx->gb, 5);
-            if (band->quant_mat >= FF_ARRAY_ELEMS(quant_index_to_tab)) {
-
-                if (band->quant_mat == 31)
-                    av_log(avctx, AV_LOG_ERROR,
-                           "Custom quant matrix encountered!\n");
-                else
-                    avpriv_request_sample(avctx, "Quantization matrix %d",
-                                          band->quant_mat);
-                band->quant_mat = -1;
+            quant_mat = get_bits(&ctx->gb, 5);
+            if (quant_mat == 31) {
+                av_log(avctx, AV_LOG_ERROR, "Custom quant matrix encountered!\n");
                 return AVERROR_INVALIDDATA;
             }
+            if (quant_mat >= FF_ARRAY_ELEMS(quant_index_to_tab)) {
+                avpriv_request_sample(avctx, "Quantization matrix %d",
+                                      quant_mat);
+                return AVERROR_INVALIDDATA;
+            }
+            band->quant_mat = quant_mat;
         } else {
             if (old_blk_size != band->blk_size) {
                 av_log(avctx, AV_LOG_ERROR,
@@ -375,10 +388,19 @@ static int decode_band_hdr(IVI45DecContext *ctx, IVIBandDesc *band,
                        "inherited\n");
                 return AVERROR_INVALIDDATA;
             }
-            if (band->quant_mat < 0) {
-                av_log(avctx, AV_LOG_ERROR, "Invalid quant_mat inherited\n");
-                return AVERROR_INVALIDDATA;
-            }
+        }
+        if (quant_index_to_tab[band->quant_mat] > 4 && band->blk_size == 4) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid quant matrix for 4x4 block encountered!\n");
+            band->quant_mat = 0;
+            return AVERROR_INVALIDDATA;
+        }
+        if (band->scan_size != band->blk_size) {
+            av_log(avctx, AV_LOG_ERROR, "mismatching scan table!\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (band->transform_size == 8 && band->blk_size < 8) {
+            av_log(avctx, AV_LOG_ERROR, "mismatching transform_size!\n");
+            return AVERROR_INVALIDDATA;
         }
 
         /* decode block huffman codebook */
@@ -422,6 +444,11 @@ static int decode_band_hdr(IVI45DecContext *ctx, IVIBandDesc *band,
 
     align_get_bits(&ctx->gb);
 
+    if (!band->scan) {
+        av_log(avctx, AV_LOG_ERROR, "band->scan not set\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     return 0;
 }
 
@@ -440,7 +467,7 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
                           IVITile *tile, AVCodecContext *avctx)
 {
     int         x, y, mv_x, mv_y, mv_delta, offs, mb_offset, blks_per_mb,
-                mv_scale, mb_type_bits;
+                mv_scale, mb_type_bits, s;
     IVIMbInfo   *mb, *ref_mb;
     int         row_offset = band->mb_size * band->pitch;
 
@@ -455,6 +482,11 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
     mv_scale = (ctx->planes[0].bands[0].mb_size >> 3) - (band->mb_size >> 3);
     mv_x = mv_y = 0;
 
+    if (((tile->width + band->mb_size-1)/band->mb_size) * ((tile->height + band->mb_size-1)/band->mb_size) != tile->num_MBs) {
+        av_log(avctx, AV_LOG_ERROR, "num_MBs mismatch %d %d %d %d\n", tile->width, tile->height, band->mb_size, tile->num_MBs);
+        return -1;
+    }
+
     for (y = tile->ypos; y < tile->ypos + tile->height; y += band->mb_size) {
         mb_offset = offs;
 
@@ -494,8 +526,10 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
             } else {
                 if (band->inherit_mv) {
                     /* copy mb_type from corresponding reference mb */
-                    if (!ref_mb)
+                    if (!ref_mb) {
+                        av_log(avctx, AV_LOG_ERROR, "ref_mb unavailable\n");
                         return AVERROR_INVALIDDATA;
+                    }
                     mb->type = ref_mb->type;
                 } else if (ctx->frame_type == IVI4_FRAMETYPE_INTRA ||
                            ctx->frame_type == IVI4_FRAMETYPE_INTRA1) {
@@ -561,6 +595,15 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
                 }
             }
 
+            s= band->is_halfpel;
+            if (mb->type)
+            if ( x +  (mb->mv_x   >>s) +                 (y+               (mb->mv_y   >>s))*band->pitch < 0 ||
+                 x + ((mb->mv_x+s)>>s) + band->mb_size - 1
+                   + (y+band->mb_size - 1 +((mb->mv_y+s)>>s))*band->pitch > band->bufsize -1) {
+                av_log(avctx, AV_LOG_ERROR, "motion vector %d %d outside reference\n", x*s + mb->mv_x, y*s + mb->mv_y);
+                return AVERROR_INVALIDDATA;
+            }
+
             mb++;
             if (ref_mb)
                 ref_mb++;
diff --git a/libavcodec/indeo4data.h b/libavcodec/indeo4data.h
index 8311c68..d96ee98 100644
--- a/libavcodec/indeo4data.h
+++ b/libavcodec/indeo4data.h
@@ -2,20 +2,20 @@
  * Indeo Video Interactive 4 compatible decoder
  * Copyright (c) 2009-2010 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/indeo5.c b/libavcodec/indeo5.c
index 5a112f9..71a54d5 100644
--- a/libavcodec/indeo5.c
+++ b/libavcodec/indeo5.c
@@ -2,20 +2,20 @@
  * Indeo Video Interactive v5 compatible decoder
  * Copyright (c) 2009 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -58,7 +58,7 @@ enum {
  */
 static int decode_gop_header(IVI45DecContext *ctx, AVCodecContext *avctx)
 {
-    int             result, i, p, tile_size, pic_size_indx, mb_size, blk_size;
+    int             result, i, p, tile_size, pic_size_indx, mb_size, blk_size, is_scalable;
     int             quant_mat, blk_size_changed = 0;
     IVIBandDesc     *band, *band1, *band2;
     IVIPicConfig    pic_conf;
@@ -80,8 +80,8 @@ static int decode_gop_header(IVI45DecContext *ctx, AVCodecContext *avctx)
     /* num_levels * 3 + 1 */
     pic_conf.luma_bands   = get_bits(&ctx->gb, 2) * 3 + 1;
     pic_conf.chroma_bands = get_bits1(&ctx->gb)   * 3 + 1;
-    ctx->is_scalable = pic_conf.luma_bands != 1 || pic_conf.chroma_bands != 1;
-    if (ctx->is_scalable && (pic_conf.luma_bands != 4 || pic_conf.chroma_bands != 1)) {
+    is_scalable = pic_conf.luma_bands != 1 || pic_conf.chroma_bands != 1;
+    if (is_scalable && (pic_conf.luma_bands != 4 || pic_conf.chroma_bands != 1)) {
         av_log(avctx, AV_LOG_ERROR, "Scalability: unsupported subdivision! Luma bands: %d, chroma bands: %d\n",
                pic_conf.luma_bands, pic_conf.chroma_bands);
         return AVERROR_INVALIDDATA;
@@ -119,6 +119,7 @@ static int decode_gop_header(IVI45DecContext *ctx, AVCodecContext *avctx)
             return result;
         }
         ctx->pic_conf = pic_conf;
+        ctx->is_scalable = is_scalable;
         blk_size_changed = 1; /* force reallocation of the internal structures */
     }
 
@@ -132,6 +133,11 @@ static int decode_gop_header(IVI45DecContext *ctx, AVCodecContext *avctx)
             blk_size = 8 >> get_bits1(&ctx->gb);
             mb_size  = blk_size << !mb_size;
 
+            if (p==0 && blk_size==4) {
+                av_log(avctx, AV_LOG_ERROR, "4x4 luma blocks are unsupported!\n");
+                return AVERROR_PATCHWELCOME;
+            }
+
             blk_size_changed = mb_size != band->mb_size || blk_size != band->blk_size;
             if (blk_size_changed) {
                 band->mb_size  = mb_size;
@@ -184,8 +190,10 @@ static int decode_gop_header(IVI45DecContext *ctx, AVCodecContext *avctx)
             band->is_2d_trans = band->inv_transform == ff_ivi_inverse_slant_8x8 ||
                                 band->inv_transform == ff_ivi_inverse_slant_4x4;
 
-            if (band->transform_size != band->blk_size)
+            if (band->transform_size != band->blk_size) {
+                av_log(avctx, AV_LOG_ERROR, "transform and block size mismatch (%d != %d)\n", band->transform_size, band->blk_size);
                 return AVERROR_INVALIDDATA;
+            }
 
             /* select dequant matrix according to plane and band number */
             if (!p) {
@@ -195,6 +203,10 @@ static int decode_gop_header(IVI45DecContext *ctx, AVCodecContext *avctx)
             }
 
             if (band->blk_size == 8) {
+                if(quant_mat >= 5){
+                    av_log(avctx, AV_LOG_ERROR, "quant_mat %d too large!\n", quant_mat);
+                    return -1;
+                }
                 band->intra_base  = &ivi5_base_quant_8x8_intra[quant_mat][0];
                 band->inter_base  = &ivi5_base_quant_8x8_inter[quant_mat][0];
                 band->intra_scale = &ivi5_scale_quant_8x8_intra[quant_mat][0];
@@ -231,6 +243,7 @@ static int decode_gop_header(IVI45DecContext *ctx, AVCodecContext *avctx)
         band2->inv_transform = band1->inv_transform;
         band2->dc_transform  = band1->dc_transform;
         band2->is_2d_trans   = band1->is_2d_trans;
+        band2->transform_size= band1->transform_size;
     }
 
     /* reallocate internal structures if needed */
@@ -276,14 +289,18 @@ static int decode_gop_header(IVI45DecContext *ctx, AVCodecContext *avctx)
  *
  *  @param[in,out]  gb  the GetBit context
  */
-static inline void skip_hdr_extension(GetBitContext *gb)
+static inline int skip_hdr_extension(GetBitContext *gb)
 {
     int i, len;
 
     do {
         len = get_bits(gb, 8);
+        if (8*len > get_bits_left(gb))
+            return AVERROR_INVALIDDATA;
         for (i = 0; i < len; i++) skip_bits(gb, 8);
     } while(len);
+
+    return 0;
 }
 
 
@@ -321,6 +338,12 @@ static int decode_pic_hdr(IVI45DecContext *ctx, AVCodecContext *avctx)
         ctx->gop_invalid = 0;
     }
 
+    if (ctx->frame_type == FRAMETYPE_INTER_SCAL && !ctx->is_scalable) {
+        av_log(avctx, AV_LOG_ERROR, "Scalable inter frame in non scalable stream\n");
+        ctx->frame_type = FRAMETYPE_INTER;
+        return AVERROR_INVALIDDATA;
+    }
+
     if (ctx->frame_type != FRAMETYPE_NULL) {
         ctx->frame_flags = get_bits(&ctx->gb, 8);
 
@@ -431,7 +454,7 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
                           IVITile *tile, AVCodecContext *avctx)
 {
     int         x, y, mv_x, mv_y, mv_delta, offs, mb_offset,
-                mv_scale, blks_per_mb;
+                mv_scale, blks_per_mb, s;
     IVIMbInfo   *mb, *ref_mb;
     int         row_offset = band->mb_size * band->pitch;
 
@@ -477,7 +500,7 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
                 }
 
                 mb->mv_x = mb->mv_y = 0; /* no motion vector coded */
-                if (band->inherit_mv){
+                if (band->inherit_mv && ref_mb){
                     /* motion vector inheritance */
                     if (mv_scale) {
                         mb->mv_x = ivi_scale_mv(ref_mb->mv_x, mv_scale);
@@ -488,7 +511,7 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
                     }
                 }
             } else {
-                if (band->inherit_mv) {
+                if (band->inherit_mv && ref_mb) {
                     mb->type = ref_mb->type; /* copy mb_type from corresponding reference mb */
                 } else if (ctx->frame_type == FRAMETYPE_INTRA) {
                     mb->type = 0; /* mb_type is always INTRA for intra-frames */
@@ -514,7 +537,7 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
                 if (!mb->type) {
                     mb->mv_x = mb->mv_y = 0; /* there is no motion vector in intra-macroblocks */
                 } else {
-                    if (band->inherit_mv){
+                    if (band->inherit_mv && ref_mb){
                         /* motion vector inheritance */
                         if (mv_scale) {
                             mb->mv_x = ivi_scale_mv(ref_mb->mv_x, mv_scale);
@@ -537,6 +560,15 @@ static int decode_mb_info(IVI45DecContext *ctx, IVIBandDesc *band,
                 }
             }
 
+            s= band->is_halfpel;
+            if (mb->type)
+            if ( x +  (mb->mv_x   >>s) +                 (y+               (mb->mv_y   >>s))*band->pitch < 0 ||
+                 x + ((mb->mv_x+s)>>s) + band->mb_size - 1
+                   + (y+band->mb_size - 1 +((mb->mv_y+s)>>s))*band->pitch > band->bufsize - 1) {
+                av_log(avctx, AV_LOG_ERROR, "motion vector %d %d outside reference\n", x*s + mb->mv_x, y*s + mb->mv_y);
+                return AVERROR_INVALIDDATA;
+            }
+
             mb++;
             if (ref_mb)
                 ref_mb++;
@@ -647,7 +679,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-
 AVCodec ff_indeo5_decoder = {
     .name           = "indeo5",
     .long_name      = NULL_IF_CONFIG_SMALL("Intel Indeo Video Interactive 5"),
diff --git a/libavcodec/indeo5data.h b/libavcodec/indeo5data.h
index f4252b5..a6217d0 100644
--- a/libavcodec/indeo5data.h
+++ b/libavcodec/indeo5data.h
@@ -2,20 +2,20 @@
  * Indeo Video Interactive 5 compatible decoder
  * Copyright (c) 2009 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/intelh263dec.c b/libavcodec/intelh263dec.c
index e34da5c..5ed1cdb 100644
--- a/libavcodec/intelh263dec.c
+++ b/libavcodec/intelh263dec.c
@@ -1,20 +1,20 @@
 /*
  * H.263i decoder
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -58,14 +58,14 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s)
 
     s->pict_type = AV_PICTURE_TYPE_I + get_bits1(&s->gb);
 
-    s->unrestricted_mv = get_bits1(&s->gb);
-    s->h263_long_vectors = s->unrestricted_mv;
+    s->h263_long_vectors = get_bits1(&s->gb);
 
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n");
         return -1;      /* SAC: off */
     }
     s->obmc= get_bits1(&s->gb);
+    s->unrestricted_mv = s->obmc || s->h263_long_vectors;
     s->pb_frame = get_bits1(&s->gb);
 
     if (format < 6) {
@@ -81,7 +81,7 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s)
         }
         if(get_bits(&s->gb, 2))
             av_log(s->avctx, AV_LOG_ERROR, "Bad value for reserved field\n");
-        s->loop_filter = get_bits1(&s->gb);
+        s->loop_filter = get_bits1(&s->gb) * !s->avctx->lowres;
         if(get_bits1(&s->gb))
             av_log(s->avctx, AV_LOG_ERROR, "Bad value for reserved field\n");
         if(get_bits1(&s->gb))
@@ -115,9 +115,8 @@ int ff_intel_h263_decode_picture_header(MpegEncContext *s)
     }
 
     /* PEI */
-    while (get_bits1(&s->gb) != 0) {
-        skip_bits(&s->gb, 8);
-    }
+    if (skip_1stop_8data_bits(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
     s->f_code = 1;
 
     s->y_dc_scale_table=
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index 3b2ae40..dcb2113 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,6 +35,14 @@
 
 #define FF_SANE_NB_CHANNELS 63U
 
+#if HAVE_AVX
+#   define STRIDE_ALIGN 32
+#elif HAVE_SIMD_ALIGN_16
+#   define STRIDE_ALIGN 16
+#else
+#   define STRIDE_ALIGN 8
+#endif
+
 typedef struct FramePool {
     /**
      * Pools for each data plane. For audio all the planes have the same size,
@@ -78,6 +86,14 @@ typedef struct AVCodecInternal {
      */
     int allocate_progress;
 
+#if FF_API_OLD_ENCODE_AUDIO
+    /**
+     * Internal sample count used by avcodec_encode_audio() to fabricate pts.
+     * Can be removed along with avcodec_encode_audio().
+     */
+    int64_t sample_count;
+#endif
+
     /**
      * An audio frame with less than required samples has been submitted and
      * padded with silence. Reject all subsequent frames.
@@ -97,6 +113,19 @@ typedef struct AVCodecInternal {
     AVPacket *pkt;
 
     /**
+     * temporary buffer used for encoders to store their bitstream
+     */
+    uint8_t *byte_buffer;
+    unsigned int byte_buffer_size;
+
+    void *frame_thread_encoder;
+
+    /**
+     * Number of audio samples to skip at the start of the next decoded frame
+     */
+    int skip_samples;
+
+    /**
      * hwaccel-specific private data
      */
     void *hwaccel_priv_data;
@@ -107,6 +136,8 @@ struct AVCodecDefault {
     const uint8_t *value;
 };
 
+extern const uint8_t ff_log2_run[41];
+
 /**
  * Return the index into tab at which {a,b} match elements {[0],[1]} of tab.
  * If there is no such matching pair then size is returned.
@@ -115,6 +146,18 @@ int ff_match_2uint16(const uint16_t (*tab)[2], int size, int a, int b);
 
 unsigned int avpriv_toupper4(unsigned int x);
 
+/**
+ * does needed setup of pkt_pts/pos and such for (re)get_buffer();
+ */
+int ff_init_buffer_info(AVCodecContext *s, AVFrame *frame);
+
+
+void avpriv_color_frame(AVFrame *frame, const int color[4]);
+
+extern volatile int ff_avcodec_locked;
+int ff_lock_avcodec(AVCodecContext *log_ctx);
+int ff_unlock_avcodec(void);
+
 int avpriv_lock_avformat(void);
 int avpriv_unlock_avformat(void);
 
@@ -132,6 +175,7 @@ int avpriv_unlock_avformat(void);
  * ensure the output packet data is large enough, whether provided by the user
  * or allocated in this function.
  *
+ * @param avctx   the AVCodecContext of the encoder
  * @param avpkt   the AVPacket
  *                If avpkt->data is already set, avpkt->size is checked
  *                to ensure it is large enough.
@@ -141,6 +185,8 @@ int avpriv_unlock_avformat(void);
  * @param size    the minimum required packet size
  * @return        0 on success, negative error code on failure
  */
+int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size);
+
 int ff_alloc_packet(AVPacket *avpkt, int size);
 
 /**
@@ -149,6 +195,8 @@ int ff_alloc_packet(AVPacket *avpkt, int size);
 static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx,
                                                         int64_t samples)
 {
+    if(samples == AV_NOPTS_VALUE)
+        return AV_NOPTS_VALUE;
     return av_rescale_q(samples, (AVRational){ 1, avctx->sample_rate },
                         avctx->time_base);
 }
@@ -166,9 +214,25 @@ int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags);
  */
 int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame);
 
-const uint8_t *avpriv_find_start_code(const uint8_t *restrict p,
+int ff_thread_can_start_frame(AVCodecContext *avctx);
+
+int avpriv_h264_has_num_reorder_frames(AVCodecContext *avctx);
+
+/**
+ * Call avcodec_open2 recursively by decrementing counter, unlocking mutex,
+ * calling the function and then restoring again. Assumes the mutex is
+ * already locked
+ */
+int ff_codec_open2_recursive(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options);
+
+/**
+ * Finalize buf into extradata and set its size appropriately.
+ */
+int avpriv_bprint_to_extradata(AVCodecContext *avctx, struct AVBPrint *buf);
+
+const uint8_t *avpriv_find_start_code(const uint8_t *p,
                                       const uint8_t *end,
-                                      uint32_t *restrict state);
+                                      uint32_t *state);
 
 /**
  * Check that the provided frame dimensions are valid and set them on the codec
diff --git a/libavcodec/interplayvideo.c b/libavcodec/interplayvideo.c
index 7d785e3..542fefe1 100644
--- a/libavcodec/interplayvideo.c
+++ b/libavcodec/interplayvideo.c
@@ -2,20 +2,20 @@
  * Interplay MVE Video Decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -72,10 +72,10 @@ static int copy_from(IpvideoContext *s, AVFrame *src, AVFrame *dst, int delta_x,
     int motion_offset = current_offset + delta_y * dst->linesize[0]
                        + delta_x * (1 + s->is_16bpp);
     if (motion_offset < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset < 0 (%d)\n", motion_offset);
+        av_log(s->avctx, AV_LOG_ERROR, "motion offset < 0 (%d)\n", motion_offset);
         return AVERROR_INVALIDDATA;
     } else if (motion_offset > s->upper_motion_limit_offset) {
-        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset above limit (%d >= %d)\n",
+        av_log(s->avctx, AV_LOG_ERROR, "motion offset above limit (%d >= %d)\n",
             motion_offset, s->upper_motion_limit_offset);
         return AVERROR_INVALIDDATA;
     }
@@ -118,7 +118,7 @@ static int ipvideo_decode_block_opcode_0x2(IpvideoContext *s, AVFrame *frame)
         y =   8 + ((B - 56) / 29);
     }
 
-    av_dlog(NULL, "    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    av_dlog(s->avctx, "motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
     return copy_from(s, s->second_last_frame, frame, x, y);
 }
 
@@ -144,7 +144,7 @@ static int ipvideo_decode_block_opcode_0x3(IpvideoContext *s, AVFrame *frame)
         y = -(  8 + ((B - 56) / 29));
     }
 
-    av_dlog(NULL, "    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    av_dlog(s->avctx, "motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
     return copy_from(s, frame, frame, x, y);
 }
 
@@ -165,7 +165,7 @@ static int ipvideo_decode_block_opcode_0x4(IpvideoContext *s, AVFrame *frame)
     x = -8 + BL;
     y = -8 + BH;
 
-    av_dlog(NULL, "    motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
+    av_dlog(s->avctx, "motion byte = %d, (x, y) = (%d, %d)\n", B, x, y);
     return copy_from(s, s->last_frame, frame, x, y);
 }
 
@@ -178,14 +178,14 @@ static int ipvideo_decode_block_opcode_0x5(IpvideoContext *s, AVFrame *frame)
     x = bytestream2_get_byte(&s->stream_ptr);
     y = bytestream2_get_byte(&s->stream_ptr);
 
-    av_dlog(NULL, "    motion bytes = %d, %d\n", x, y);
+    av_dlog(s->avctx, "motion bytes = %d, %d\n", x, y);
     return copy_from(s, s->last_frame, frame, x, y);
 }
 
 static int ipvideo_decode_block_opcode_0x6(IpvideoContext *s, AVFrame *frame)
 {
     /* mystery opcode? skip multiple blocks? */
-    av_log(s->avctx, AV_LOG_ERROR, "  Interplay video: Help! Mystery opcode 0x6 seen\n");
+    av_log(s->avctx, AV_LOG_ERROR, "Help! Mystery opcode 0x6 seen\n");
 
     /* report success */
     return 0;
@@ -197,6 +197,11 @@ static int ipvideo_decode_block_opcode_0x7(IpvideoContext *s, AVFrame *frame)
     unsigned char P[2];
     unsigned int flags;
 
+    if (bytestream2_get_bytes_left(&s->stream_ptr) < 4) {
+        av_log(s->avctx, AV_LOG_ERROR, "too little data for opcode 0x7\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* 2-color encoding */
     P[0] = bytestream2_get_byte(&s->stream_ptr);
     P[1] = bytestream2_get_byte(&s->stream_ptr);
@@ -236,6 +241,11 @@ static int ipvideo_decode_block_opcode_0x8(IpvideoContext *s, AVFrame *frame)
     unsigned char P[4];
     unsigned int flags = 0;
 
+    if (bytestream2_get_bytes_left(&s->stream_ptr) < 12) {
+        av_log(s->avctx, AV_LOG_ERROR, "too little data for opcode 0x8\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* 2-color encoding for each 4x4 quadrant, or 2-color encoding on
      * either top and bottom or left and right halves */
     P[0] = bytestream2_get_byte(&s->stream_ptr);
@@ -308,6 +318,11 @@ static int ipvideo_decode_block_opcode_0x9(IpvideoContext *s, AVFrame *frame)
     int x, y;
     unsigned char P[4];
 
+    if (bytestream2_get_bytes_left(&s->stream_ptr) < 8) {
+        av_log(s->avctx, AV_LOG_ERROR, "too little data for opcode 0x9\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* 4-color encoding */
     bytestream2_get_buffer(&s->stream_ptr, P, 4);
 
@@ -374,6 +389,11 @@ static int ipvideo_decode_block_opcode_0xA(IpvideoContext *s, AVFrame *frame)
     unsigned char P[8];
     int flags = 0;
 
+    if (bytestream2_get_bytes_left(&s->stream_ptr) < 16) {
+        av_log(s->avctx, AV_LOG_ERROR, "too little data for opcode 0xA\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     bytestream2_get_buffer(&s->stream_ptr, P, 4);
 
     /* 4-color encoding for each 4x4 quadrant, or 4-color encoding on
@@ -467,6 +487,11 @@ static int ipvideo_decode_block_opcode_0xD(IpvideoContext *s, AVFrame *frame)
     int y;
     unsigned char P[2];
 
+    if (bytestream2_get_bytes_left(&s->stream_ptr) < 4) {
+        av_log(s->avctx, AV_LOG_ERROR, "too little data for opcode 0xD\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* 4-color block encoding: each 4x4 block is a different color */
     for (y = 0; y < 8; y++) {
         if (!(y & 3)) {
@@ -528,7 +553,7 @@ static int ipvideo_decode_block_opcode_0x6_16(IpvideoContext *s, AVFrame *frame)
     x = bytestream2_get_byte(&s->stream_ptr);
     y = bytestream2_get_byte(&s->stream_ptr);
 
-    av_dlog(NULL, "    motion bytes = %d, %d\n", x, y);
+    av_dlog(s->avctx, "motion bytes = %d, %d\n", x, y);
     return copy_from(s, s->second_last_frame, frame, x, y);
 }
 
@@ -917,7 +942,7 @@ static void ipvideo_decode_opcodes(IpvideoContext *s, AVFrame *frame)
                 ret = ipvideo_decode_block16[opcode](s, frame);
             }
             if (ret != 0) {
-                av_log(s->avctx, AV_LOG_ERROR, " Interplay video: decode problem on frame %d, @ block (%d, %d)\n",
+                av_log(s->avctx, AV_LOG_ERROR, "decode problem on frame %d, @ block (%d, %d)\n",
                        s->avctx->frame_number, x, y);
                 return;
             }
@@ -925,7 +950,7 @@ static void ipvideo_decode_opcodes(IpvideoContext *s, AVFrame *frame)
     }
     if (bytestream2_get_bytes_left(&s->stream_ptr) > 1) {
         av_log(s->avctx, AV_LOG_ERROR,
-               "Interplay video: decode finished with %d bytes left over\n",
+               "decode finished with %d bytes left over\n",
                bytestream2_get_bytes_left(&s->stream_ptr));
     }
 }
@@ -970,14 +995,17 @@ static int ipvideo_decode_frame(AVCodecContext *avctx,
     if (buf_size < s->decoding_map_size)
         return buf_size;
 
+    if (av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, NULL)) {
+        av_frame_unref(s->last_frame);
+        av_frame_unref(s->second_last_frame);
+    }
+
     s->decoding_map = buf;
     bytestream2_init(&s->stream_ptr, buf + s->decoding_map_size,
                      buf_size - s->decoding_map_size);
 
-    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "  Interplay Video: get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
 
     if (!s->is_16bpp) {
         const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, NULL);
diff --git a/libavcodec/intrax8.c b/libavcodec/intrax8.c
index d37eb79..c6df8cf 100644
--- a/libavcodec/intrax8.c
+++ b/libavcodec/intrax8.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -21,6 +21,7 @@
  * @brief IntraX8 (J-Frame) subdecoder, used by WMV2 and VC-1
  */
 
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "error_resilience.h"
 #include "get_bits.h"
@@ -127,13 +128,13 @@ static inline void x8_select_ac_table(IntraX8Context * const w , int mode){
     MpegEncContext * const s= w->s;
     int table_index;
 
-    assert(mode<4);
+    av_assert2(mode<4);
 
     if( w->j_ac_vlc[mode] ) return;
 
     table_index = get_bits(&s->gb, 3);
     w->j_ac_vlc[mode] = &j_ac_vlc[w->quant<13][mode>>1][table_index];//2 modes use same tables
-    assert(w->j_ac_vlc[mode]);
+    av_assert2(w->j_ac_vlc[mode]);
 }
 
 static inline int x8_get_orient_vlc(IntraX8Context * w){
@@ -144,8 +145,6 @@ static inline int x8_get_orient_vlc(IntraX8Context * w){
         table_index = get_bits(&s->gb, 1+(w->quant<13) );
         w->j_orient_vlc = &j_orient_vlc[w->quant<13][table_index];
     }
-    assert(w->j_orient_vlc);
-    assert(w->j_orient_vlc->table);
 
     return get_vlc2(&s->gb, w->j_orient_vlc->table, OR_VLC_BITS, OR_VLC_MTD);
 }
@@ -267,15 +266,13 @@ static int x8_get_dc_rlf(IntraX8Context * const w,int const mode, int * const le
     MpegEncContext * const s= w->s;
     int i,e,c;
 
-    assert(mode<3);
+    av_assert2(mode<3);
     if( !w->j_dc_vlc[mode] ) {
         int table_index;
         table_index = get_bits(&s->gb, 3);
         //4 modes, same table
         w->j_dc_vlc[mode]= &j_dc_vlc[w->quant<13][table_index];
     }
-    assert(w->j_dc_vlc);
-    assert(w->j_dc_vlc[mode]->table);
 
     i=get_vlc2(&s->gb, w->j_dc_vlc[mode]->table, DC_VLC_BITS, DC_VLC_MTD);
 
@@ -328,7 +325,7 @@ static int x8_setup_spatial_predictor(IntraX8Context * const w, const int chroma
     if(chroma)
         return 0;
 
-    assert(w->orient < 3);
+    av_assert2(w->orient < 3);
     if(range < 2*w->quant){
         if( (w->edges&3) == 0){
             if(w->orient==1) w->orient=11;
@@ -345,8 +342,8 @@ static int x8_setup_spatial_predictor(IntraX8Context * const w, const int chroma
         };
         w->raw_orient=x8_get_orient_vlc(w);
         if(w->raw_orient<0) return -1;
-        assert(w->raw_orient < 12 );
-        assert(w->orient<3);
+        av_assert2(w->raw_orient < 12 );
+        av_assert2(w->orient<3);
         w->orient=prediction_table[w->orient][w->raw_orient];
     }
     return 0;
@@ -441,7 +438,7 @@ lut2[q>12][c]={
 static void x8_ac_compensation(IntraX8Context * const w, int const direction, int const dc_level){
     MpegEncContext * const s= w->s;
     int t;
-#define B(x, y) s->block[0][s->idsp.idct_permutation[(x) + (y) * 8]]
+#define B(x,y)  s->block[0][w->idct_permutation[(x)+(y)*8]]
 #define T(x)  ((x) * dc_level + 0x8000) >> 16;
     switch(direction){
     case 0:
@@ -538,7 +535,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){
     int use_quant_matrix;
     int sign;
 
-    assert(w->orient<12);
+    av_assert2(w->orient<12);
     s->bdsp.clear_block(s->block[0]);
 
     if(chroma){
@@ -647,7 +644,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){
                                             s->current_picture.f->linesize[!!chroma] );
     }
     if(!zeros_only)
-        s->idsp.idct_add(s->dest[chroma],
+        w->wdsp.idct_add(s->dest[chroma],
                          s->current_picture.f->linesize[!!chroma],
                          s->block[0]);
 
@@ -696,12 +693,16 @@ av_cold void ff_intrax8_common_init(IntraX8Context * w, MpegEncContext * const s
 
     w->s=s;
     x8_vlc_init();
-    assert(s->mb_width>0);
+    av_assert0(s->mb_width>0);
     w->prediction_table=av_mallocz(s->mb_width*2*2);//two rows, 2 blocks per cannon mb
 
-    ff_init_scantable(s->idsp.idct_permutation, &w->scantable[0], ff_wmv1_scantable[0]);
-    ff_init_scantable(s->idsp.idct_permutation, &w->scantable[1], ff_wmv1_scantable[2]);
-    ff_init_scantable(s->idsp.idct_permutation, &w->scantable[2], ff_wmv1_scantable[3]);
+    ff_wmv2dsp_init(&w->wdsp);
+    ff_init_scantable_permutation(w->idct_permutation,
+                                  w->wdsp.idct_perm);
+
+    ff_init_scantable(w->idct_permutation, &w->scantable[0], ff_wmv1_scantable[0]);
+    ff_init_scantable(w->idct_permutation, &w->scantable[1], ff_wmv1_scantable[2]);
+    ff_init_scantable(w->idct_permutation, &w->scantable[2], ff_wmv1_scantable[3]);
 
     ff_intrax8dsp_init(&w->dsp);
 }
@@ -721,6 +722,7 @@ av_cold void ff_intrax8_common_end(IntraX8Context * w)
  * The parent codec must call MPV_frame_start(), ff_er_frame_start() before calling this function.
  * The parent codec must call ff_er_frame_end(), MPV_frame_end() after calling this function.
  * This function does not use MPV_decode_mb().
+ * lowres decoding is theoretically impossible.
  * @param w pointer to IntraX8Context
  * @param dquant doubled quantizer, it would be odd in case of VC-1 halfpq==1.
  * @param quant_offset offset away from zero
@@ -728,7 +730,6 @@ av_cold void ff_intrax8_common_end(IntraX8Context * w)
 int ff_intrax8_decode_picture(IntraX8Context * const w, int dquant, int quant_offset){
     MpegEncContext * const s= w->s;
     int mb_xy;
-    assert(s);
     w->use_quant_matrix = get_bits1(&s->gb);
 
     w->dquant = dquant;
diff --git a/libavcodec/intrax8.h b/libavcodec/intrax8.h
index 6967317..9981785 100644
--- a/libavcodec/intrax8.h
+++ b/libavcodec/intrax8.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -22,6 +22,7 @@
 #include "get_bits.h"
 #include "mpegvideo.h"
 #include "intrax8dsp.h"
+#include "wmv2dsp.h"
 
 typedef struct IntraX8Context {
     VLC * j_ac_vlc[4];//they point to the static j_mb_vlc
@@ -32,6 +33,8 @@ typedef struct IntraX8Context {
 //set by ff_intrax8_common_init
     uint8_t * prediction_table;//2*(mb_w*2)
     ScanTable scantable[3];
+    WMV2DSPContext wdsp;
+    uint8_t idct_permutation[64];
 //set by the caller codec
     MpegEncContext * s;
     IntraX8DSPContext dsp;
diff --git a/libavcodec/intrax8dsp.c b/libavcodec/intrax8dsp.c
index 1115945..1b34f89 100644
--- a/libavcodec/intrax8dsp.c
+++ b/libavcodec/intrax8dsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/intrax8dsp.h b/libavcodec/intrax8dsp.h
index 5c3cc4a..1e4a3af 100644
--- a/libavcodec/intrax8dsp.h
+++ b/libavcodec/intrax8dsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/intrax8huf.h b/libavcodec/intrax8huf.h
index 6bf01f3..375906b 100644
--- a/libavcodec/intrax8huf.h
+++ b/libavcodec/intrax8huf.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ituh263dec.c b/libavcodec/ituh263dec.c
index dc3de30..e64c2bb 100644
--- a/libavcodec/ituh263dec.c
+++ b/libavcodec/ituh263dec.c
@@ -5,20 +5,20 @@
  * Copyright (c) 2001 Juan J. Sierralta P
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,7 @@
  * h263 decoder.
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
 #include <limits.h>
 
 #include "libavutil/attributes.h"
@@ -101,11 +102,9 @@ static VLC cbpc_b_vlc;
 /* XXX: find a better solution to handle static init */
 av_cold void ff_h263_decode_init_vlc(void)
 {
-    static int done = 0;
+    static volatile int done = 0;
 
     if (!done) {
-        done = 1;
-
         INIT_VLC_STATIC(&ff_h263_intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 9,
                  ff_h263_intra_MCBPC_bits, 1, 1,
                  ff_h263_intra_MCBPC_code, 1, 1, 72);
@@ -128,6 +127,7 @@ av_cold void ff_h263_decode_init_vlc(void)
         INIT_VLC_STATIC(&cbpc_b_vlc, CBPC_B_VLC_BITS, 4,
                  &ff_cbpc_b_tab[0][1], 2, 1,
                  &ff_cbpc_b_tab[0][0], 2, 1, 8);
+        done = 1;
     }
 }
 
@@ -201,27 +201,6 @@ static int h263_decode_gob_header(MpegEncContext *s)
 }
 
 /**
- * Find the next resync_marker.
- * @param p pointer to buffer to scan
- * @param end pointer to the end of the buffer
- * @return pointer to the next resync_marker, or end if none was found
- */
-const uint8_t *ff_h263_find_resync_marker(const uint8_t *restrict p, const uint8_t * restrict end)
-{
-    assert(p < end);
-
-    end-=2;
-    p++;
-    for(;p<end; p+=2){
-        if(!*p){
-            if     (!p[-1] && p[1]) return p - 1;
-            else if(!p[ 1] && p[2]) return p;
-        }
-    }
-    return end+2;
-}
-
-/**
  * Decode the group of blocks / video packet header.
  * @return bit position of the resync_marker, or <0 if none was found
  */
@@ -345,7 +324,7 @@ static void preview_obmc(MpegEncContext *s){
         s->block_index[i]+= 1;
     s->mb_x++;
 
-    assert(s->pict_type == AV_PICTURE_TYPE_P);
+    av_assert2(s->pict_type == AV_PICTURE_TYPE_P);
 
     do{
         if (get_bits1(&s->gb)) {
@@ -482,7 +461,7 @@ static int h263_decode_block(MpegEncContext * s, int16_t * block,
             level = get_bits(&s->gb, 8);
             if((level&0x7F) == 0){
                 av_log(s->avctx, AV_LOG_ERROR, "illegal dc %d at %d %d\n", level, s->mb_x, s->mb_y);
-                if(s->err_recognition & AV_EF_BITSTREAM)
+                if(s->err_recognition & (AV_EF_BITSTREAM|AV_EF_COMPLIANT))
                     return -1;
             }
             if (level == 255)
@@ -563,11 +542,13 @@ static int h263_skip_b_part(MpegEncContext *s, int cbp)
 {
     LOCAL_ALIGNED_16(int16_t, dblock, [64]);
     int i, mbi;
+    int bli[6];
 
     /* we have to set s->mb_intra to zero to decode B-part of PB-frame correctly
      * but real value should be restored in order to be used later (in OBMC condition)
      */
     mbi = s->mb_intra;
+    memcpy(bli, s->block_last_index, sizeof(bli));
     s->mb_intra = 0;
     for (i = 0; i < 6; i++) {
         if (h263_decode_block(s, dblock, i, cbp&32) < 0)
@@ -575,6 +556,7 @@ static int h263_skip_b_part(MpegEncContext *s, int cbp)
         cbp+=cbp;
     }
     s->mb_intra = mbi;
+    memcpy(s->block_last_index, bli, sizeof(bli));
     return 0;
 }
 
@@ -604,7 +586,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
     const int xy= s->mb_x + s->mb_y * s->mb_stride;
     int cbpb = 0, pb_mv_count = 0;
 
-    assert(!s->h263_pred);
+    av_assert2(!s->h263_pred);
 
     if (s->pict_type == AV_PICTURE_TYPE_P) {
         do{
@@ -744,15 +726,13 @@ int ff_h263_decode_mb(MpegEncContext *s,
         }else
             cbp=0;
 
-        assert(!s->mb_intra);
+        av_assert2(!s->mb_intra);
 
         if(IS_QUANT(mb_type)){
             h263_decode_dquant(s);
         }
 
         if(IS_DIRECT(mb_type)){
-            if (!s->pp_time)
-                return AVERROR_INVALIDDATA;
             s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
             mb_type |= ff_mpeg4_set_direct_mv(s, 0, 0);
         }else{
@@ -871,6 +851,10 @@ int ff_h263_decode_picture_header(MpegEncContext *s)
 
     align_get_bits(&s->gb);
 
+    if (show_bits(&s->gb, 2) == 2 && s->avctx->frame_number == 0) {
+         av_log(s->avctx, AV_LOG_WARNING, "Header looks like RTP instead of H.263\n");
+    }
+
     startcode= get_bits(&s->gb, 22-8);
 
     for(i= get_bits_left(&s->gb); i>24; i-=8) {
@@ -960,6 +944,8 @@ int ff_h263_decode_picture_header(MpegEncContext *s)
             s->h263_aic = get_bits1(&s->gb); /* Advanced Intra Coding (AIC) */
             s->loop_filter= get_bits1(&s->gb);
             s->unrestricted_mv = s->umvplus || s->obmc || s->loop_filter;
+            if(s->avctx->lowres)
+                s->loop_filter = 0;
 
             s->h263_slice_structured= get_bits1(&s->gb);
             if (get_bits1(&s->gb) != 0) {
@@ -1027,6 +1013,7 @@ int ff_h263_decode_picture_header(MpegEncContext *s)
                 height = ff_h263_format[format][1];
                 s->avctx->sample_aspect_ratio= (AVRational){12,11};
             }
+            s->avctx->sample_aspect_ratio.den <<= s->ehc_mode;
             if ((width == 0) || (height == 0))
                 return -1;
             s->width = width;
@@ -1071,6 +1058,10 @@ int ff_h263_decode_picture_header(MpegEncContext *s)
         s->qscale = get_bits(&s->gb, 5);
     }
 
+    if (s->width == 0 || s->height == 0) {
+        av_log(s->avctx, AV_LOG_ERROR, "dimensions 0\n");
+        return -1;
+    }
     s->mb_width = (s->width  + 15) / 16;
     s->mb_height = (s->height  + 15) / 16;
     s->mb_num = s->mb_width * s->mb_height;
@@ -1099,9 +1090,8 @@ int ff_h263_decode_picture_header(MpegEncContext *s)
     }
 
     /* PEI */
-    while (get_bits1(&s->gb) != 0) {
-        skip_bits(&s->gb, 8);
-    }
+    if (skip_1stop_8data_bits(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
 
     if(s->h263_slice_structured){
         if (get_bits1(&s->gb) != 1) {
@@ -1127,7 +1117,7 @@ int ff_h263_decode_picture_header(MpegEncContext *s)
     }
 
         ff_h263_show_pict_info(s);
-    if (s->pict_type == AV_PICTURE_TYPE_I && s->codec_tag == AV_RL32("ZYGO")){
+    if (s->pict_type == AV_PICTURE_TYPE_I && s->codec_tag == AV_RL32("ZYGO") && get_bits_left(&s->gb) >= 85 + 13*3*16 + 50){
         int i,j;
         for(i=0; i<85; i++) av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb));
         av_log(s->avctx, AV_LOG_DEBUG, "\n");
diff --git a/libavcodec/ituh263enc.c b/libavcodec/ituh263enc.c
index 5f15b2f..43ad080 100644
--- a/libavcodec/ituh263enc.c
+++ b/libavcodec/ituh263enc.c
@@ -5,20 +5,20 @@
  * Copyright (c) 2001 Juan J. Sierralta P
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -226,19 +226,11 @@ void ff_h263_encode_picture_header(MpegEncContext * s, int picture_number)
     if(s->h263_slice_structured){
         put_bits(&s->pb, 1, 1);
 
-        assert(s->mb_x == 0 && s->mb_y == 0);
+        av_assert1(s->mb_x == 0 && s->mb_y == 0);
         ff_h263_encode_mba(s);
 
         put_bits(&s->pb, 1, 1);
     }
-
-    if(s->h263_aic){
-         s->y_dc_scale_table=
-         s->c_dc_scale_table= ff_aic_dc_scale_table;
-    }else{
-        s->y_dc_scale_table=
-        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
-    }
 }
 
 /**
@@ -268,7 +260,7 @@ void ff_h263_encode_gob_header(MpegEncContext * s, int mb_line)
 }
 
 /**
- * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2)
+ * modify qscale so that encoding is actually possible in h263 (limit difference to -2..2)
  */
 void ff_clean_h263_qscales(MpegEncContext *s){
     int i;
@@ -393,7 +385,7 @@ static void h263_encode_block(MpegEncContext * s, int16_t * block, int n)
                 put_bits(&s->pb, 1, last);
                 put_bits(&s->pb, 6, run);
 
-                assert(slevel != 0);
+                av_assert2(slevel != 0);
 
                 if(level < 128)
                     put_sbits(&s->pb, 8, slevel);
@@ -546,7 +538,7 @@ void ff_h263_encode_mb(MpegEncContext * s,
             s->mv_bits+= get_bits_diff(s);
         }
     } else {
-        assert(s->mb_intra);
+        av_assert2(s->mb_intra);
 
         cbp = 0;
         if (s->h263_aic) {
@@ -725,8 +717,8 @@ static av_cold void init_uni_h263_rl_tab(RLTable *rl, uint32_t *bits_tab,
 {
     int slevel, run, last;
 
-    assert(MAX_LEVEL >= 64);
-    assert(MAX_RUN   >= 63);
+    av_assert0(MAX_LEVEL >= 64);
+    av_assert0(MAX_RUN   >= 63);
 
     for(slevel=-64; slevel<64; slevel++){
         if(slevel==0) continue;
@@ -815,12 +807,15 @@ av_cold void ff_h263_encode_init(MpegEncContext *s)
             s->min_qcoeff= -127;
             s->max_qcoeff=  127;
         }
-        s->y_dc_scale_table=
-        s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
         break;
     default: //nothing needed - default table already set in mpegvideo.c
         s->min_qcoeff= -127;
         s->max_qcoeff=  127;
+    }
+    if(s->h263_aic){
+         s->y_dc_scale_table=
+         s->c_dc_scale_table= ff_aic_dc_scale_table;
+    }else{
         s->y_dc_scale_table=
         s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
     }
diff --git a/libavcodec/ivi_common.c b/libavcodec/ivi_common.c
index 7c4d53e..3795081 100644
--- a/libavcodec/ivi_common.c
+++ b/libavcodec/ivi_common.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -87,12 +87,9 @@ static int ivi_mc(IVIBandDesc *band, ivi_mc_func mc, ivi_mc_avg_func mc_avg,
     int ref_size = (mc_type > 1) * band->pitch + (mc_type & 1);
 
     if (mc_type != -1) {
-        if (offs < 0 || ref_offs < 0 || !band->ref_buf)
-            return AVERROR_INVALIDDATA;
-        if (buf_size - min_size < offs)
-            return AVERROR_INVALIDDATA;
-        if (buf_size - min_size - ref_size < ref_offs)
-            return AVERROR_INVALIDDATA;
+        av_assert0(offs >= 0 && ref_offs >= 0 && band->ref_buf);
+        av_assert0(buf_size - min_size >= offs);
+        av_assert0(buf_size - min_size - ref_size >= ref_offs);
     }
 
     if (mc_type2 == -1) {
@@ -136,7 +133,7 @@ static uint16_t inv_bits(uint16_t val, int nbits)
 
 /*
  *  Generate a huffman codebook from the given descriptor
- *  and convert it into the Libav VLC table.
+ *  and convert it into the FFmpeg VLC table.
  *
  *  @param[in]   cb    pointer to codebook descriptor
  *  @param[out]  vlc   where to place the generated VLC table
@@ -250,7 +247,7 @@ int ff_ivi_dec_huff_desc(GetBitContext *gb, int desc_coded, int which_tab,
             new_huff.xbits[i] = get_bits(gb, 4);
 
         /* Have we got the same custom table? Rebuild if not. */
-        if (ivi_huff_desc_cmp(&new_huff, &huff_tab->cust_desc)) {
+        if (ivi_huff_desc_cmp(&new_huff, &huff_tab->cust_desc) || !huff_tab->cust_tab.table) {
             ivi_huff_desc_copy(&huff_tab->cust_desc, &new_huff);
 
             if (huff_tab->cust_tab.table)
@@ -285,6 +282,7 @@ static av_cold void ivi_free_buffers(IVIPlaneDesc *planes)
     int p, b, t;
 
     for (p = 0; p < 3; p++) {
+        if (planes[p].bands)
         for (b = 0; b < planes[p].num_bands; b++) {
             av_freep(&planes[p].bands[b].bufs[0]);
             av_freep(&planes[p].bands[b].bufs[1]);
@@ -327,7 +325,7 @@ av_cold int ff_ivi_init_planes(IVIPlaneDesc *planes, const IVIPicConfig *cfg,
     planes[1].num_bands = planes[2].num_bands = cfg->chroma_bands;
 
     for (p = 0; p < 3; p++) {
-        planes[p].bands = av_mallocz(planes[p].num_bands * sizeof(IVIBandDesc));
+        planes[p].bands = av_mallocz_array(planes[p].num_bands, sizeof(IVIBandDesc));
         if (!planes[p].bands)
             return AVERROR(ENOMEM);
 
@@ -356,6 +354,7 @@ av_cold int ff_ivi_init_planes(IVIPlaneDesc *planes, const IVIPicConfig *cfg,
             band->aheight  = height_aligned;
             band->bufs[0]  = av_mallocz(buf_size);
             band->bufs[1]  = av_mallocz(buf_size);
+            band->bufsize  = buf_size/2;
             if (!band->bufs[0] || !band->bufs[1])
                 return AVERROR(ENOMEM);
 
@@ -397,14 +396,16 @@ static int ivi_init_tiles(IVIBandDesc *band, IVITile *ref_tile,
                                               band->mb_size);
 
             av_freep(&tile->mbs);
-            tile->mbs = av_malloc(tile->num_MBs * sizeof(IVIMbInfo));
+            tile->mbs = av_mallocz_array(tile->num_MBs, sizeof(IVIMbInfo));
             if (!tile->mbs)
                 return AVERROR(ENOMEM);
 
             tile->ref_mbs = 0;
             if (p || b) {
-                if (tile->num_MBs != ref_tile->num_MBs)
+                if (tile->num_MBs != ref_tile->num_MBs) {
+                    av_log(NULL, AV_LOG_DEBUG, "ref_tile mismatch\n");
                     return AVERROR_INVALIDDATA;
+                }
                 tile->ref_mbs = ref_tile->mbs;
                 ref_tile++;
             }
@@ -429,6 +430,8 @@ av_cold int ff_ivi_init_tiles(IVIPlaneDesc *planes,
             t_width  >>= 1;
             t_height >>= 1;
         }
+        if(t_width<=0 || t_height<=0)
+            return AVERROR(EINVAL);
 
         for (b = 0; b < planes[p].num_bands; b++) {
             band = &planes[p].bands[b];
@@ -437,7 +440,7 @@ av_cold int ff_ivi_init_tiles(IVIPlaneDesc *planes,
             band->num_tiles = x_tiles * y_tiles;
 
             av_freep(&band->tiles);
-            band->tiles = av_mallocz(band->num_tiles * sizeof(IVITile));
+            band->tiles = av_mallocz_array(band->num_tiles, sizeof(IVITile));
             if (!band->tiles)
                 return AVERROR(ENOMEM);
 
@@ -486,10 +489,6 @@ static int ivi_dc_transform(IVIBandDesc *band, int *prev_dc, int buf_offs,
     int buf_size = band->pitch * band->aheight - buf_offs;
     int min_size = (blk_size - 1) * band->pitch + blk_size;
 
-    if (!band->dc_transform)
-        return 0;
-
-
     if (min_size > buf_size)
         return AVERROR_INVALIDDATA;
 
@@ -583,6 +582,11 @@ static int ivi_decode_coded_blocks(GetBitContext *gb, IVIBandDesc *band,
         col_flags[0] |= !!*prev_dc;
     }
 
+    if(band->transform_size > band->blk_size){
+        av_log(NULL, AV_LOG_ERROR, "Too large transform\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* apply inverse transform */
     band->inv_transform(trvec, band->buf + offs,
                         band->pitch, col_flags);
@@ -805,6 +809,22 @@ static int ivi_process_empty_tile(AVCodecContext *avctx, IVIBandDesc *band,
                     mb->mv_y = ref_mb->mv_y;
                 }
                 need_mc |= mb->mv_x || mb->mv_y; /* tracking non-zero motion vectors */
+                {
+                    int dmv_x, dmv_y, cx, cy;
+
+                    dmv_x = mb->mv_x >> band->is_halfpel;
+                    dmv_y = mb->mv_y >> band->is_halfpel;
+                    cx    = mb->mv_x &  band->is_halfpel;
+                    cy    = mb->mv_y &  band->is_halfpel;
+
+                    if (   mb->xpos + dmv_x < 0
+                        || mb->xpos + dmv_x + band->mb_size + cx > band->pitch
+                        || mb->ypos + dmv_y < 0
+                        || mb->ypos + dmv_y + band->mb_size + cy > band->aheight) {
+                        av_log(avctx, AV_LOG_ERROR, "MV out of bounds\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                }
             }
 
             mb++;
@@ -946,6 +966,10 @@ static int decode_band(IVI45DecContext *ctx,
         idx2 = band->corr[i * 2 + 1];
         FFSWAP(uint8_t, band->rv_map->runtab[idx1], band->rv_map->runtab[idx2]);
         FFSWAP(int16_t, band->rv_map->valtab[idx1], band->rv_map->valtab[idx2]);
+        if (idx1 == band->rv_map->eob_sym || idx2 == band->rv_map->eob_sym)
+            band->rv_map->eob_sym ^= idx1 ^ idx2;
+        if (idx1 == band->rv_map->esc_sym || idx2 == band->rv_map->esc_sym)
+            band->rv_map->esc_sym ^= idx1 ^ idx2;
     }
 
     pos = get_bits_count(&ctx->gb);
@@ -969,7 +993,8 @@ static int decode_band(IVI45DecContext *ctx,
             tile->data_size = ivi_dec_tile_data_size(&ctx->gb);
             if (!tile->data_size) {
                 av_log(avctx, AV_LOG_ERROR, "Tile data size is zero!\n");
-                return AVERROR_INVALIDDATA;
+                result = AVERROR_INVALIDDATA;
+                break;
             }
 
             result = ctx->decode_mb_info(ctx, band, tile, avctx);
@@ -1001,6 +1026,10 @@ static int decode_band(IVI45DecContext *ctx,
         idx2 = band->corr[i*2+1];
         FFSWAP(uint8_t, band->rv_map->runtab[idx1], band->rv_map->runtab[idx2]);
         FFSWAP(int16_t, band->rv_map->valtab[idx1], band->rv_map->valtab[idx2]);
+        if (idx1 == band->rv_map->eob_sym || idx2 == band->rv_map->eob_sym)
+            band->rv_map->eob_sym ^= idx1 ^ idx2;
+        if (idx1 == band->rv_map->esc_sym || idx2 == band->rv_map->esc_sym)
+            band->rv_map->esc_sym ^= idx1 ^ idx2;
     }
 
 #ifdef DEBUG
@@ -1068,6 +1097,7 @@ int ff_ivi_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     //{ START_TIMER;
 
     if (ctx->is_nonnull_frame(ctx)) {
+        ctx->buf_invalid[ctx->dst_buf] = 1;
         for (p = 0; p < 3; p++) {
             for (b = 0; b < ctx->planes[p].num_bands; b++) {
                 result = decode_band(ctx, &ctx->planes[p].bands[b], avctx);
@@ -1078,6 +1108,7 @@ int ff_ivi_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 }
             }
         }
+        ctx->buf_invalid[ctx->dst_buf] = 0;
     } else {
         if (ctx->is_scalable)
             return AVERROR_INVALIDDATA;
@@ -1087,17 +1118,20 @@ int ff_ivi_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 return AVERROR_INVALIDDATA;
         }
     }
+    if (ctx->buf_invalid[ctx->dst_buf])
+        return -1;
 
     //STOP_TIMER("decode_planes"); }
 
+    if (!ctx->is_nonnull_frame(ctx))
+        return buf_size;
+
     result = ff_set_dimensions(avctx, ctx->planes[0].width, ctx->planes[0].height);
     if (result < 0)
         return result;
 
-    if ((result = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((result = ff_get_buffer(avctx, frame, 0)) < 0)
         return result;
-    }
 
     if (ctx->is_scalable) {
         if (ctx->is_indeo4)
@@ -1121,7 +1155,11 @@ int ff_ivi_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (ctx->is_indeo4 && ctx->frame_type == IVI4_FRAMETYPE_INTRA) {
         int left;
 
-        while (get_bits(&ctx->gb, 8)); // skip version string
+            // skip version string
+        while (get_bits(&ctx->gb, 8)) {
+            if (get_bits_left(&ctx->gb) < 8)
+                return AVERROR_INVALIDDATA;
+        }
         left = get_bits_count(&ctx->gb) & 0x18;
         skip_bits_long(&ctx->gb, 64 - left);
         if (get_bits_left(&ctx->gb) > 18 &&
diff --git a/libavcodec/ivi_common.h b/libavcodec/ivi_common.h
index e2cc593..9fec960 100644
--- a/libavcodec/ivi_common.h
+++ b/libavcodec/ivi_common.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -165,6 +165,7 @@ typedef struct IVIBandDesc {
     int             quant_mat;      ///< dequant matrix index
     int             glob_quant;     ///< quant base for this band
     const uint8_t   *scan;          ///< ptr to the scan pattern
+    int             scan_size;      ///< size of the scantable
 
     IVIHuffTab      blk_vlc;        ///< vlc table for decoding block data
 
@@ -264,6 +265,7 @@ typedef struct IVI45DecContext {
     int             (*is_nonnull_frame)(struct IVI45DecContext *ctx);
 
     int gop_invalid;
+    int buf_invalid[4];
 
     int is_indeo4;
 
diff --git a/libavcodec/ivi_dsp.c b/libavcodec/ivi_dsp.c
index ecc49b3..89121ac 100644
--- a/libavcodec/ivi_dsp.c
+++ b/libavcodec/ivi_dsp.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,7 +38,7 @@ void ff_ivi_recompose53(const IVIPlaneDesc *plane, uint8_t *dst,
     int32_t         b0_1, b0_2, b1_1, b1_2, b1_3, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6;
     int32_t         b3_1, b3_2, b3_3, b3_4, b3_5, b3_6, b3_7, b3_8, b3_9;
     int32_t         pitch, back_pitch;
-    const short    *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
+    const short     *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
     const int       num_bands = 4;
 
     /* all bands should have the same pitch */
@@ -54,6 +54,9 @@ void ff_ivi_recompose53(const IVIPlaneDesc *plane, uint8_t *dst,
     b3_ptr = plane->bands[3].buf;
 
     for (y = 0; y < plane->height; y += 2) {
+
+        if (y+2 >= plane->height)
+            pitch= 0;
         /* load storage variables with values */
         if (num_bands > 0) {
             b0_1 = b0_ptr[0];
@@ -83,6 +86,13 @@ void ff_ivi_recompose53(const IVIPlaneDesc *plane, uint8_t *dst,
         }
 
         for (x = 0, indx = 0; x < plane->width; x+=2, indx++) {
+            if (x+2 >= plane->width) {
+                b0_ptr --;
+                b1_ptr --;
+                b2_ptr --;
+                b3_ptr --;
+            }
+
             /* some values calculated in the previous iterations can */
             /* be reused in the next ones, so do appropriate copying */
             b2_1 = b2_2; // b2[x-1,y  ] = b2[x,  y  ]
@@ -170,10 +180,10 @@ void ff_ivi_recompose53(const IVIPlaneDesc *plane, uint8_t *dst,
 
         back_pitch = -pitch;
 
-        b0_ptr += pitch;
-        b1_ptr += pitch;
-        b2_ptr += pitch;
-        b3_ptr += pitch;
+        b0_ptr += pitch + 1;
+        b1_ptr += pitch + 1;
+        b2_ptr += pitch + 1;
+        b3_ptr += pitch + 1;
     }
 }
 
@@ -181,7 +191,7 @@ void ff_ivi_recompose_haar(const IVIPlaneDesc *plane, uint8_t *dst,
                            const int dst_pitch)
 {
     int             x, y, indx, b0, b1, b2, b3, p0, p1, p2, p3;
-    const short    *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
+    const short     *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
     int32_t         pitch;
 
     /* all bands should have the same pitch */
@@ -225,15 +235,15 @@ void ff_ivi_recompose_haar(const IVIPlaneDesc *plane, uint8_t *dst,
 
 /** butterfly operation for the inverse Haar transform */
 #define IVI_HAAR_BFLY(s1, s2, o1, o2, t) \
-    t  = (s1 - s2) >> 1;\
-    o1 = (s1 + s2) >> 1;\
-    o2 = t;\
+    t  = ((s1) - (s2)) >> 1;\
+    o1 = ((s1) + (s2)) >> 1;\
+    o2 = (t);\
 
 /** inverse 8-point Haar transform */
 #define INV_HAAR8(s1, s5, s3, s7, s2, s4, s6, s8,\
                   d1, d2, d3, d4, d5, d6, d7, d8,\
                   t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
-    t1 = s1 << 1; t5 = s5 << 1;\
+    t1 = (s1) << 1; t5 = (s5) << 1;\
     IVI_HAAR_BFLY(t1, t5, t1, t5, t0); IVI_HAAR_BFLY(t1, s3, t1, t3, t0);\
     IVI_HAAR_BFLY(t5, s7, t5, t7, t0); IVI_HAAR_BFLY(t1, s2, t1, t2, t0);\
     IVI_HAAR_BFLY(t3, s4, t3, t4, t0); IVI_HAAR_BFLY(t5, s6, t5, t6, t0);\
@@ -475,21 +485,21 @@ void ff_ivi_dc_haar_2d(const int32_t *in, int16_t *out, uint32_t pitch,
 
 /** butterfly operation for the inverse slant transform */
 #define IVI_SLANT_BFLY(s1, s2, o1, o2, t) \
-    t  = s1 - s2;\
-    o1 = s1 + s2;\
-    o2 = t;\
+    t  = (s1) - (s2);\
+    o1 = (s1) + (s2);\
+    o2 = (t);\
 
 /** This is a reflection a,b = 1/2, 5/4 for the inverse slant transform */
 #define IVI_IREFLECT(s1, s2, o1, o2, t) \
-    t  = ((s1 + s2*2 + 2) >> 2) + s1;\
-    o2 = ((s1*2 - s2 + 2) >> 2) - s2;\
-    o1 = t;\
+    t  = (((s1) + (s2)*2 + 2) >> 2) + (s1);\
+    o2 = (((s1)*2 - (s2) + 2) >> 2) - (s2);\
+    o1 = (t);\
 
 /** This is a reflection a,b = 1/2, 7/8 for the inverse slant transform */
 #define IVI_SLANT_PART4(s1, s2, o1, o2, t) \
-    t  = s2 + ((s1*4  - s2 + 4) >> 3);\
-    o2 = s1 + ((-s1 - s2*4 + 4) >> 3);\
-    o1 = t;\
+    t  = (s2) + (((s1)*4  - (s2) + 4) >> 3);\
+    o2 = (s1) + ((-(s1) - (s2)*4 + 4) >> 3);\
+    o1 = (t);\
 
 /** inverse slant8 transform */
 #define IVI_INV_SLANT8(s1, s4, s8, s5, s2, s6, s3, s7,\
@@ -547,7 +557,7 @@ void ff_ivi_inverse_slant_8x8(const int32_t *in, int16_t *out, uint32_t pitch, c
     }
 #undef COMPENSATE
 
-#define COMPENSATE(x) ((x + 1)>>1)
+#define COMPENSATE(x) (((x) + 1)>>1)
     src = tmp;
     for (i = 0; i < 8; i++) {
         if (!src[0] && !src[1] && !src[2] && !src[3] && !src[4] && !src[5] && !src[6] && !src[7]) {
@@ -587,7 +597,7 @@ void ff_ivi_inverse_slant_4x4(const int32_t *in, int16_t *out, uint32_t pitch, c
     }
 #undef COMPENSATE
 
-#define COMPENSATE(x) ((x + 1)>>1)
+#define COMPENSATE(x) (((x) + 1)>>1)
     src = tmp;
     for (i = 0; i < 4; i++) {
         if (!src[0] && !src[1] && !src[2] && !src[3]) {
@@ -621,7 +631,7 @@ void ff_ivi_row_slant8(const int32_t *in, int16_t *out, uint32_t pitch, const ui
     int     i;
     int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
 
-#define COMPENSATE(x) ((x + 1)>>1)
+#define COMPENSATE(x) (((x) + 1)>>1)
     for (i = 0; i < 8; i++) {
         if (!in[0] && !in[1] && !in[2] && !in[3] && !in[4] && !in[5] && !in[6] && !in[7]) {
             memset(out, 0, 8*sizeof(out[0]));
@@ -663,7 +673,7 @@ void ff_ivi_col_slant8(const int32_t *in, int16_t *out, uint32_t pitch, const ui
     row4 = pitch << 2;
     row8 = pitch << 3;
 
-#define COMPENSATE(x) ((x + 1)>>1)
+#define COMPENSATE(x) (((x) + 1)>>1)
     for (i = 0; i < 8; i++) {
         if (flags[i]) {
             IVI_INV_SLANT8(in[0], in[8], in[16], in[24], in[32], in[40], in[48], in[56],
@@ -700,7 +710,7 @@ void ff_ivi_row_slant4(const int32_t *in, int16_t *out, uint32_t pitch, const ui
     int     i;
     int     t0, t1, t2, t3, t4;
 
-#define COMPENSATE(x) ((x + 1)>>1)
+#define COMPENSATE(x) (((x) + 1)>>1)
     for (i = 0; i < 4; i++) {
         if (!in[0] && !in[1] && !in[2] && !in[3]) {
             memset(out, 0, 4*sizeof(out[0]));
@@ -722,7 +732,7 @@ void ff_ivi_col_slant4(const int32_t *in, int16_t *out, uint32_t pitch, const ui
 
     row2 = pitch << 1;
 
-#define COMPENSATE(x) ((x + 1)>>1)
+#define COMPENSATE(x) (((x) + 1)>>1)
     for (i = 0; i < 4; i++) {
         if (flags[i]) {
             IVI_INV_SLANT4(in[0], in[4], in[8], in[12],
diff --git a/libavcodec/ivi_dsp.h b/libavcodec/ivi_dsp.h
index 11c2f5c..15562b6 100644
--- a/libavcodec/ivi_dsp.h
+++ b/libavcodec/ivi_dsp.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -64,6 +64,10 @@ void ff_ivi_recompose_haar(const IVIPlaneDesc *plane, uint8_t *dst,
  */
 void ff_ivi_inverse_haar_8x8(const int32_t *in, int16_t *out, uint32_t pitch,
                              const uint8_t *flags);
+void ff_ivi_inverse_haar_8x1(const int32_t *in, int16_t *out, uint32_t pitch,
+                             const uint8_t *flags);
+void ff_ivi_inverse_haar_1x8(const int32_t *in, int16_t *out, uint32_t pitch,
+                             const uint8_t *flags);
 
 /**
  *  one-dimensional inverse 8-point Haar transform on rows for Indeo 4
diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
new file mode 100644
index 0000000..ddb0b68
--- /dev/null
+++ b/libavcodec/j2kenc.c
@@ -0,0 +1,1055 @@
+/*
+ * JPEG2000 image encoder
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * JPEG2000 image encoder
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include <float.h>
+#include "avcodec.h"
+#include "internal.h"
+#include "bytestream.h"
+#include "jpeg2000.h"
+#include "libavutil/common.h"
+
+#define NMSEDEC_BITS 7
+#define NMSEDEC_FRACBITS (NMSEDEC_BITS-1)
+#define WMSEDEC_SHIFT 13 ///< must be >= 13
+#define LAMBDA_SCALE (100000000LL << (WMSEDEC_SHIFT - 13))
+
+static int lut_nmsedec_ref [1<<NMSEDEC_BITS],
+           lut_nmsedec_ref0[1<<NMSEDEC_BITS],
+           lut_nmsedec_sig [1<<NMSEDEC_BITS],
+           lut_nmsedec_sig0[1<<NMSEDEC_BITS];
+
+static const int dwt_norms[2][4][10] = { // [dwt_type][band][rlevel] (multiplied by 10000)
+    {{10000, 19650, 41770,  84030, 169000, 338400,  676900, 1353000, 2706000, 5409000},
+     {20220, 39890, 83550, 170400, 342700, 686300, 1373000, 2746000, 5490000},
+     {20220, 39890, 83550, 170400, 342700, 686300, 1373000, 2746000, 5490000},
+     {20800, 38650, 83070, 171800, 347100, 695900, 1393000, 2786000, 5572000}},
+
+    {{10000, 15000, 27500, 53750, 106800, 213400, 426700, 853300, 1707000, 3413000},
+     {10380, 15920, 29190, 57030, 113300, 226400, 452500, 904800, 1809000},
+     {10380, 15920, 29190, 57030, 113300, 226400, 452500, 904800, 1809000},
+     { 7186,  9218, 15860, 30430,  60190, 120100, 240000, 479700,  959300}}
+};
+
+typedef struct {
+   Jpeg2000Component *comp;
+} Jpeg2000Tile;
+
+typedef struct {
+    AVCodecContext *avctx;
+    const AVFrame *picture;
+
+    int width, height; ///< image width and height
+    uint8_t cbps[4]; ///< bits per sample in particular components
+    int chroma_shift[2];
+    uint8_t planar;
+    int ncomponents;
+    int tile_width, tile_height; ///< tile size
+    int numXtiles, numYtiles;
+
+    uint8_t *buf_start;
+    uint8_t *buf;
+    uint8_t *buf_end;
+    int bit_index;
+
+    int64_t lambda;
+
+    Jpeg2000CodingStyle codsty;
+    Jpeg2000QuantStyle  qntsty;
+
+    Jpeg2000Tile *tile;
+} Jpeg2000EncoderContext;
+
+
+/* debug */
+#if 0
+#undef ifprintf
+#undef printf
+
+static void nspaces(FILE *fd, int n)
+{
+    while(n--) putc(' ', fd);
+}
+
+static void printcomp(Jpeg2000Component *comp)
+{
+    int i;
+    for (i = 0; i < comp->y1 - comp->y0; i++)
+        ff_jpeg2000_printv(comp->i_data + i * (comp->x1 - comp->x0), comp->x1 - comp->x0);
+}
+
+static void dump(Jpeg2000EncoderContext *s, FILE *fd)
+{
+    int tileno, compno, reslevelno, bandno, precno;
+    fprintf(fd, "XSiz = %d, YSiz = %d, tile_width = %d, tile_height = %d\n"
+                "numXtiles = %d, numYtiles = %d, ncomponents = %d\n"
+                "tiles:\n",
+            s->width, s->height, s->tile_width, s->tile_height,
+            s->numXtiles, s->numYtiles, s->ncomponents);
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        Jpeg2000Tile *tile = s->tile + tileno;
+        nspaces(fd, 2);
+        fprintf(fd, "tile %d:\n", tileno);
+        for(compno = 0; compno < s->ncomponents; compno++){
+            Jpeg2000Component *comp = tile->comp + compno;
+            nspaces(fd, 4);
+            fprintf(fd, "component %d:\n", compno);
+            nspaces(fd, 4);
+            fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d\n",
+                        comp->x0, comp->x1, comp->y0, comp->y1);
+            for(reslevelno = 0; reslevelno < s->nreslevels; reslevelno++){
+                Jpeg2000ResLevel *reslevel = comp->reslevel + reslevelno;
+                nspaces(fd, 6);
+                fprintf(fd, "reslevel %d:\n", reslevelno);
+                nspaces(fd, 6);
+                fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d, nbands = %d\n",
+                        reslevel->x0, reslevel->x1, reslevel->y0,
+                        reslevel->y1, reslevel->nbands);
+                for(bandno = 0; bandno < reslevel->nbands; bandno++){
+                    Jpeg2000Band *band = reslevel->band + bandno;
+                    nspaces(fd, 8);
+                    fprintf(fd, "band %d:\n", bandno);
+                    nspaces(fd, 8);
+                    fprintf(fd, "x0 = %d, x1 = %d, y0 = %d, y1 = %d,"
+                                "codeblock_width = %d, codeblock_height = %d cblknx = %d cblkny = %d\n",
+                                band->x0, band->x1,
+                                band->y0, band->y1,
+                                band->codeblock_width, band->codeblock_height,
+                                band->cblknx, band->cblkny);
+                    for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){
+                        Jpeg2000Prec *prec = band->prec + precno;
+                        nspaces(fd, 10);
+                        fprintf(fd, "prec %d:\n", precno);
+                        nspaces(fd, 10);
+                        fprintf(fd, "xi0 = %d, xi1 = %d, yi0 = %d, yi1 = %d\n",
+                                     prec->xi0, prec->xi1, prec->yi0, prec->yi1);
+                    }
+                }
+            }
+        }
+    }
+}
+#endif
+
+/* bitstream routines */
+
+/** put n times val bit */
+static void put_bits(Jpeg2000EncoderContext *s, int val, int n) // TODO: optimize
+{
+    while (n-- > 0){
+        if (s->bit_index == 8)
+        {
+            s->bit_index = *s->buf == 0xff;
+            *(++s->buf) = 0;
+        }
+        *s->buf |= val << (7 - s->bit_index++);
+    }
+}
+
+/** put n least significant bits of a number num */
+static void put_num(Jpeg2000EncoderContext *s, int num, int n)
+{
+    while(--n >= 0)
+        put_bits(s, (num >> n) & 1, 1);
+}
+
+/** flush the bitstream */
+static void j2k_flush(Jpeg2000EncoderContext *s)
+{
+    if (s->bit_index){
+        s->bit_index = 0;
+        s->buf++;
+    }
+}
+
+/* tag tree routines */
+
+/** code the value stored in node */
+static void tag_tree_code(Jpeg2000EncoderContext *s, Jpeg2000TgtNode *node, int threshold)
+{
+    Jpeg2000TgtNode *stack[30];
+    int sp = 1, curval = 0;
+    stack[0] = node;
+
+    node = node->parent;
+    while(node){
+        if (node->vis){
+            curval = node->val;
+            break;
+        }
+        node->vis++;
+        stack[sp++] = node;
+        node = node->parent;
+    }
+    while(--sp >= 0){
+        if (stack[sp]->val >= threshold){
+            put_bits(s, 0, threshold - curval);
+            break;
+        }
+        put_bits(s, 0, stack[sp]->val - curval);
+        put_bits(s, 1, 1);
+        curval = stack[sp]->val;
+    }
+}
+
+/** update the value in node */
+static void tag_tree_update(Jpeg2000TgtNode *node)
+{
+    int lev = 0;
+    while (node->parent){
+        if (node->parent->val <= node->val)
+            break;
+        node->parent->val = node->val;
+        node = node->parent;
+        lev++;
+    }
+}
+
+static int put_siz(Jpeg2000EncoderContext *s)
+{
+    int i;
+
+    if (s->buf_end - s->buf < 40 + 3 * s->ncomponents)
+        return -1;
+
+    bytestream_put_be16(&s->buf, JPEG2000_SIZ);
+    bytestream_put_be16(&s->buf, 38 + 3 * s->ncomponents); // Lsiz
+    bytestream_put_be16(&s->buf, 0); // Rsiz
+    bytestream_put_be32(&s->buf, s->width); // width
+    bytestream_put_be32(&s->buf, s->height); // height
+    bytestream_put_be32(&s->buf, 0); // X0Siz
+    bytestream_put_be32(&s->buf, 0); // Y0Siz
+
+    bytestream_put_be32(&s->buf, s->tile_width); // XTSiz
+    bytestream_put_be32(&s->buf, s->tile_height); // YTSiz
+    bytestream_put_be32(&s->buf, 0); // XT0Siz
+    bytestream_put_be32(&s->buf, 0); // YT0Siz
+    bytestream_put_be16(&s->buf, s->ncomponents); // CSiz
+
+    for (i = 0; i < s->ncomponents; i++){ // Ssiz_i XRsiz_i, YRsiz_i
+        bytestream_put_byte(&s->buf, 7);
+        bytestream_put_byte(&s->buf, i?1<<s->chroma_shift[0]:1);
+        bytestream_put_byte(&s->buf, i?1<<s->chroma_shift[1]:1);
+    }
+    return 0;
+}
+
+static int put_cod(Jpeg2000EncoderContext *s)
+{
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+
+    if (s->buf_end - s->buf < 14)
+        return -1;
+
+    bytestream_put_be16(&s->buf, JPEG2000_COD);
+    bytestream_put_be16(&s->buf, 12); // Lcod
+    bytestream_put_byte(&s->buf, 0);  // Scod
+    // SGcod
+    bytestream_put_byte(&s->buf, 0); // progression level
+    bytestream_put_be16(&s->buf, 1); // num of layers
+    if(s->avctx->pix_fmt == AV_PIX_FMT_YUV444P){
+        bytestream_put_byte(&s->buf, 2); // ICT
+    }else{
+        bytestream_put_byte(&s->buf, 0); // unspecified
+    }
+    // SPcod
+    bytestream_put_byte(&s->buf, codsty->nreslevels - 1); // num of decomp. levels
+    bytestream_put_byte(&s->buf, codsty->log2_cblk_width-2); // cblk width
+    bytestream_put_byte(&s->buf, codsty->log2_cblk_height-2); // cblk height
+    bytestream_put_byte(&s->buf, 0); // cblk style
+    bytestream_put_byte(&s->buf, codsty->transform == FF_DWT53); // transformation
+    return 0;
+}
+
+static int put_qcd(Jpeg2000EncoderContext *s, int compno)
+{
+    int i, size;
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+    Jpeg2000QuantStyle  *qntsty = &s->qntsty;
+
+    if (qntsty->quantsty == JPEG2000_QSTY_NONE)
+        size = 4 + 3 * (codsty->nreslevels-1);
+    else // QSTY_SE
+        size = 5 + 6 * (codsty->nreslevels-1);
+
+    if (s->buf_end - s->buf < size + 2)
+        return -1;
+
+    bytestream_put_be16(&s->buf, JPEG2000_QCD);
+    bytestream_put_be16(&s->buf, size);  // LQcd
+    bytestream_put_byte(&s->buf, (qntsty->nguardbits << 5) | qntsty->quantsty);  // Sqcd
+    if (qntsty->quantsty == JPEG2000_QSTY_NONE)
+        for (i = 0; i < codsty->nreslevels * 3 - 2; i++)
+            bytestream_put_byte(&s->buf, qntsty->expn[i] << 3);
+    else // QSTY_SE
+        for (i = 0; i < codsty->nreslevels * 3 - 2; i++)
+            bytestream_put_be16(&s->buf, (qntsty->expn[i] << 11) | qntsty->mant[i]);
+    return 0;
+}
+
+static uint8_t *put_sot(Jpeg2000EncoderContext *s, int tileno)
+{
+    uint8_t *psotptr;
+
+    if (s->buf_end - s->buf < 12)
+        return NULL;
+
+    bytestream_put_be16(&s->buf, JPEG2000_SOT);
+    bytestream_put_be16(&s->buf, 10); // Lsot
+    bytestream_put_be16(&s->buf, tileno); // Isot
+
+    psotptr = s->buf;
+    bytestream_put_be32(&s->buf, 0); // Psot (filled in later)
+
+    bytestream_put_byte(&s->buf, 0); // TPsot
+    bytestream_put_byte(&s->buf, 1); // TNsot
+    return psotptr;
+}
+
+/**
+ * compute the sizes of tiles, resolution levels, bands, etc.
+ * allocate memory for them
+ * divide the input image into tile-components
+ */
+static int init_tiles(Jpeg2000EncoderContext *s)
+{
+    int tileno, tilex, tiley, compno;
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+    Jpeg2000QuantStyle  *qntsty = &s->qntsty;
+
+    s->numXtiles = ff_jpeg2000_ceildiv(s->width, s->tile_width);
+    s->numYtiles = ff_jpeg2000_ceildiv(s->height, s->tile_height);
+
+    s->tile = av_malloc_array(s->numXtiles, s->numYtiles * sizeof(Jpeg2000Tile));
+    if (!s->tile)
+        return AVERROR(ENOMEM);
+    for (tileno = 0, tiley = 0; tiley < s->numYtiles; tiley++)
+        for (tilex = 0; tilex < s->numXtiles; tilex++, tileno++){
+            Jpeg2000Tile *tile = s->tile + tileno;
+
+            tile->comp = av_mallocz_array(s->ncomponents, sizeof(Jpeg2000Component));
+            if (!tile->comp)
+                return AVERROR(ENOMEM);
+            for (compno = 0; compno < s->ncomponents; compno++){
+                Jpeg2000Component *comp = tile->comp + compno;
+                int ret, i, j;
+
+                comp->coord[0][0] = comp->coord_o[0][0] = tilex * s->tile_width;
+                comp->coord[0][1] = comp->coord_o[0][1] = FFMIN((tilex+1)*s->tile_width, s->width);
+                comp->coord[1][0] = comp->coord_o[1][0] = tiley * s->tile_height;
+                comp->coord[1][1] = comp->coord_o[1][1] = FFMIN((tiley+1)*s->tile_height, s->height);
+                if (compno > 0)
+                    for (i = 0; i < 2; i++)
+                        for (j = 0; j < 2; j++)
+                            comp->coord[i][j] = comp->coord_o[i][j] = ff_jpeg2000_ceildivpow2(comp->coord[i][j], s->chroma_shift[i]);
+
+                if (ret = ff_jpeg2000_init_component(comp,
+                                                codsty,
+                                                qntsty,
+                                                s->cbps[compno],
+                                                compno?1<<s->chroma_shift[0]:1,
+                                                compno?1<<s->chroma_shift[1]:1,
+                                                s->avctx
+                                               ))
+                    return ret;
+            }
+        }
+    return 0;
+}
+
+static void copy_frame(Jpeg2000EncoderContext *s)
+{
+    int tileno, compno, i, y, x;
+    uint8_t *line;
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        Jpeg2000Tile *tile = s->tile + tileno;
+        if (s->planar){
+            for (compno = 0; compno < s->ncomponents; compno++){
+                Jpeg2000Component *comp = tile->comp + compno;
+                int *dst = comp->i_data;
+                line = s->picture->data[compno]
+                       + comp->coord[1][0] * s->picture->linesize[compno]
+                       + comp->coord[0][0];
+                for (y = comp->coord[1][0]; y < comp->coord[1][1]; y++){
+                    uint8_t *ptr = line;
+                    for (x = comp->coord[0][0]; x < comp->coord[0][1]; x++)
+                        *dst++ = *ptr++ - (1 << 7);
+                    line += s->picture->linesize[compno];
+                }
+            }
+        } else{
+            line = s->picture->data[0] + tile->comp[0].coord[1][0] * s->picture->linesize[0]
+                   + tile->comp[0].coord[0][0] * s->ncomponents;
+
+            i = 0;
+            for (y = tile->comp[0].coord[1][0]; y < tile->comp[0].coord[1][1]; y++){
+                uint8_t *ptr = line;
+                for (x = tile->comp[0].coord[0][0]; x < tile->comp[0].coord[0][1]; x++, i++){
+                    for (compno = 0; compno < s->ncomponents; compno++){
+                        tile->comp[compno].i_data[i] = *ptr++  - (1 << 7);
+                    }
+                }
+                line += s->picture->linesize[0];
+            }
+        }
+    }
+}
+
+static void init_quantization(Jpeg2000EncoderContext *s)
+{
+    int compno, reslevelno, bandno;
+    Jpeg2000QuantStyle  *qntsty = &s->qntsty;
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+
+    for (compno = 0; compno < s->ncomponents; compno++){
+        int gbandno = 0;
+        for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+            int nbands, lev = codsty->nreslevels - reslevelno - 1;
+            nbands = reslevelno ? 3 : 1;
+            for (bandno = 0; bandno < nbands; bandno++, gbandno++){
+                int expn, mant;
+
+                if (codsty->transform == FF_DWT97_INT){
+                    int bandpos = bandno + (reslevelno>0),
+                        ss = 81920000 / dwt_norms[0][bandpos][lev],
+                        log = av_log2(ss);
+                    mant = (11 - log < 0 ? ss >> log - 11 : ss << 11 - log) & 0x7ff;
+                    expn = s->cbps[compno] - log + 13;
+                } else
+                    expn = ((bandno&2)>>1) + (reslevelno>0) + s->cbps[compno];
+
+                qntsty->expn[gbandno] = expn;
+                qntsty->mant[gbandno] = mant;
+            }
+        }
+    }
+}
+
+static void init_luts(void)
+{
+    int i, a,
+        mask = ~((1<<NMSEDEC_FRACBITS)-1);
+
+    for (i = 0; i < (1 << NMSEDEC_BITS); i++){
+        lut_nmsedec_sig[i]  = FFMAX(6*i - (9<<NMSEDEC_FRACBITS-1) << 12-NMSEDEC_FRACBITS, 0);
+        lut_nmsedec_sig0[i] = FFMAX((i*i + (1<<NMSEDEC_FRACBITS-1) & mask) << 1, 0);
+
+        a = (i >> (NMSEDEC_BITS-2)&2) + 1;
+        lut_nmsedec_ref[i]  = FFMAX((-2*i + (1<<NMSEDEC_FRACBITS) + a*i - (a*a<<NMSEDEC_FRACBITS-2))
+                                    << 13-NMSEDEC_FRACBITS, 0);
+        lut_nmsedec_ref0[i] = FFMAX(((i*i + (1-4*i << NMSEDEC_FRACBITS-1) + (1<<2*NMSEDEC_FRACBITS)) & mask)
+                                    << 1, 0);
+    }
+}
+
+/* tier-1 routines */
+static int getnmsedec_sig(int x, int bpno)
+{
+    if (bpno > NMSEDEC_FRACBITS)
+        return lut_nmsedec_sig[(x >> (bpno - NMSEDEC_FRACBITS)) & ((1 << NMSEDEC_BITS) - 1)];
+    return lut_nmsedec_sig0[x & ((1 << NMSEDEC_BITS) - 1)];
+}
+
+static int getnmsedec_ref(int x, int bpno)
+{
+    if (bpno > NMSEDEC_FRACBITS)
+        return lut_nmsedec_ref[(x >> (bpno - NMSEDEC_FRACBITS)) & ((1 << NMSEDEC_BITS) - 1)];
+    return lut_nmsedec_ref0[x & ((1 << NMSEDEC_BITS) - 1)];
+}
+
+static void encode_sigpass(Jpeg2000T1Context *t1, int width, int height, int bandno, int *nmsedec, int bpno)
+{
+    int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS);
+    for (y0 = 0; y0 < height; y0 += 4)
+        for (x = 0; x < width; x++)
+            for (y = y0; y < height && y < y0+4; y++){
+                if (!(t1->flags[y+1][x+1] & JPEG2000_T1_SIG) && (t1->flags[y+1][x+1] & JPEG2000_T1_SIG_NB)){
+                    int ctxno = ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1], bandno),
+                        bit = t1->data[y][x] & mask ? 1 : 0;
+                    ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, bit);
+                    if (bit){
+                        int xorbit;
+                        int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
+                        ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
+                        *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
+                        ff_jpeg2000_set_significance(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                    }
+                    t1->flags[y+1][x+1] |= JPEG2000_T1_VIS;
+                }
+            }
+}
+
+static void encode_refpass(Jpeg2000T1Context *t1, int width, int height, int *nmsedec, int bpno)
+{
+    int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS);
+    for (y0 = 0; y0 < height; y0 += 4)
+        for (x = 0; x < width; x++)
+            for (y = y0; y < height && y < y0+4; y++)
+                if ((t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS)) == JPEG2000_T1_SIG){
+                    int ctxno = ff_jpeg2000_getrefctxno(t1->flags[y+1][x+1]);
+                    *nmsedec += getnmsedec_ref(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
+                    ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
+                    t1->flags[y+1][x+1] |= JPEG2000_T1_REF;
+                }
+}
+
+static void encode_clnpass(Jpeg2000T1Context *t1, int width, int height, int bandno, int *nmsedec, int bpno)
+{
+    int y0, x, y, mask = 1 << (bpno + NMSEDEC_FRACBITS);
+    for (y0 = 0; y0 < height; y0 += 4)
+        for (x = 0; x < width; x++){
+            if (y0 + 3 < height && !(
+            (t1->flags[y0+1][x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+            (t1->flags[y0+2][x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+            (t1->flags[y0+3][x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG)) ||
+            (t1->flags[y0+4][x+1] & (JPEG2000_T1_SIG_NB | JPEG2000_T1_VIS | JPEG2000_T1_SIG))))
+            {
+                // aggregation mode
+                int rlen;
+                for (rlen = 0; rlen < 4; rlen++)
+                    if (t1->data[y0+rlen][x] & mask)
+                        break;
+                ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_RL, rlen != 4);
+                if (rlen == 4)
+                    continue;
+                ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI, rlen >> 1);
+                ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + MQC_CX_UNI, rlen & 1);
+                for (y = y0 + rlen; y < y0 + 4; y++){
+                    if (!(t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))){
+                        int ctxno = ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1], bandno);
+                        if (y > y0 + rlen)
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
+                        if (t1->data[y][x] & mask){ // newly significant
+                            int xorbit;
+                            int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
+                            *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
+                            ff_jpeg2000_set_significance(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                        }
+                    }
+                    t1->flags[y+1][x+1] &= ~JPEG2000_T1_VIS;
+                }
+            } else{
+                for (y = y0; y < y0 + 4 && y < height; y++){
+                    if (!(t1->flags[y+1][x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))){
+                        int ctxno = ff_jpeg2000_getsigctxno(t1->flags[y+1][x+1], bandno);
+                        ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, t1->data[y][x] & mask ? 1:0);
+                        if (t1->data[y][x] & mask){ // newly significant
+                            int xorbit;
+                            int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[y+1][x+1], &xorbit);
+                            *nmsedec += getnmsedec_sig(t1->data[y][x], bpno + NMSEDEC_FRACBITS);
+                            ff_mqc_encode(&t1->mqc, t1->mqc.cx_states + ctxno, (t1->flags[y+1][x+1] >> 15) ^ xorbit);
+                            ff_jpeg2000_set_significance(t1, x, y, t1->flags[y+1][x+1] >> 15);
+                        }
+                    }
+                    t1->flags[y+1][x+1] &= ~JPEG2000_T1_VIS;
+                }
+            }
+        }
+}
+
+static void encode_cblk(Jpeg2000EncoderContext *s, Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk, Jpeg2000Tile *tile,
+                        int width, int height, int bandpos, int lev)
+{
+    int pass_t = 2, passno, x, y, max=0, nmsedec, bpno;
+    int64_t wmsedec = 0;
+
+    for (y = 0; y < height+2; y++)
+        memset(t1->flags[y], 0, (width+2)*sizeof(int));
+
+    for (y = 0; y < height; y++){
+        for (x = 0; x < width; x++){
+            if (t1->data[y][x] < 0){
+                t1->flags[y+1][x+1] |= JPEG2000_T1_SGN;
+                t1->data[y][x] = -t1->data[y][x];
+            }
+            max = FFMAX(max, t1->data[y][x]);
+        }
+    }
+
+    if (max == 0){
+        cblk->nonzerobits = 0;
+        bpno = 0;
+    } else{
+        cblk->nonzerobits = av_log2(max) + 1 - NMSEDEC_FRACBITS;
+        bpno = cblk->nonzerobits - 1;
+    }
+
+    ff_mqc_initenc(&t1->mqc, cblk->data);
+
+    for (passno = 0; bpno >= 0; passno++){
+        nmsedec=0;
+
+        switch(pass_t){
+            case 0: encode_sigpass(t1, width, height, bandpos, &nmsedec, bpno);
+                    break;
+            case 1: encode_refpass(t1, width, height, &nmsedec, bpno);
+                    break;
+            case 2: encode_clnpass(t1, width, height, bandpos, &nmsedec, bpno);
+                    break;
+        }
+
+        cblk->passes[passno].rate = 3 + ff_mqc_length(&t1->mqc);
+        wmsedec += (int64_t)nmsedec << (2*bpno);
+        cblk->passes[passno].disto = wmsedec;
+
+        if (++pass_t == 3){
+            pass_t = 0;
+            bpno--;
+        }
+    }
+    cblk->npasses = passno;
+    cblk->ninclpasses = passno;
+
+    // TODO: optional flush on each pass
+    cblk->passes[passno-1].rate = ff_mqc_flush(&t1->mqc);
+}
+
+/* tier-2 routines: */
+
+static void putnumpasses(Jpeg2000EncoderContext *s, int n)
+{
+    if (n == 1)
+        put_num(s, 0, 1);
+    else if (n == 2)
+        put_num(s, 2, 2);
+    else if (n <= 5)
+        put_num(s, 0xc | (n-3), 4);
+    else if (n <= 36)
+        put_num(s, 0x1e0 | (n-6), 9);
+    else
+        put_num(s, 0xff80 | (n-37), 16);
+}
+
+
+static int encode_packet(Jpeg2000EncoderContext *s, Jpeg2000ResLevel *rlevel, int precno,
+                          uint8_t *expn, int numgbits)
+{
+    int bandno, empty = 1;
+
+    // init bitstream
+    *s->buf = 0;
+    s->bit_index = 0;
+
+    // header
+
+    // is the packet empty?
+    for (bandno = 0; bandno < rlevel->nbands; bandno++){
+        if (rlevel->band[bandno].coord[0][0] < rlevel->band[bandno].coord[0][1]
+        &&  rlevel->band[bandno].coord[1][0] < rlevel->band[bandno].coord[1][1]){
+            empty = 0;
+            break;
+        }
+    }
+
+    put_bits(s, !empty, 1);
+    if (empty){
+        j2k_flush(s);
+        return 0;
+    }
+
+    for (bandno = 0; bandno < rlevel->nbands; bandno++){
+        Jpeg2000Band *band = rlevel->band + bandno;
+        Jpeg2000Prec *prec = band->prec + precno;
+        int yi, xi, pos;
+        int cblknw = prec->nb_codeblocks_width;
+
+        if (band->coord[0][0] == band->coord[0][1]
+        ||  band->coord[1][0] == band->coord[1][1])
+            continue;
+
+        for (pos=0, yi = 0; yi < prec->nb_codeblocks_height; yi++){
+            for (xi = 0; xi < cblknw; xi++, pos++){
+                prec->cblkincl[pos].val = prec->cblk[yi * cblknw + xi].ninclpasses == 0;
+                tag_tree_update(prec->cblkincl + pos);
+                prec->zerobits[pos].val = expn[bandno] + numgbits - 1 - prec->cblk[yi * cblknw + xi].nonzerobits;
+                tag_tree_update(prec->zerobits + pos);
+            }
+        }
+
+        for (pos=0, yi = 0; yi < prec->nb_codeblocks_height; yi++){
+            for (xi = 0; xi < cblknw; xi++, pos++){
+                int pad = 0, llen, length;
+                Jpeg2000Cblk *cblk = prec->cblk + yi * cblknw + xi;
+
+                if (s->buf_end - s->buf < 20) // approximately
+                    return -1;
+
+                // inclusion information
+                tag_tree_code(s, prec->cblkincl + pos, 1);
+                if (!cblk->ninclpasses)
+                    continue;
+                // zerobits information
+                tag_tree_code(s, prec->zerobits + pos, 100);
+                // number of passes
+                putnumpasses(s, cblk->ninclpasses);
+
+                length = cblk->passes[cblk->ninclpasses-1].rate;
+                llen = av_log2(length) - av_log2(cblk->ninclpasses) - 2;
+                if (llen < 0){
+                    pad = -llen;
+                    llen = 0;
+                }
+                // length of code block
+                put_bits(s, 1, llen);
+                put_bits(s, 0, 1);
+                put_num(s, length, av_log2(length)+1+pad);
+            }
+        }
+    }
+    j2k_flush(s);
+    for (bandno = 0; bandno < rlevel->nbands; bandno++){
+        Jpeg2000Band *band = rlevel->band + bandno;
+        Jpeg2000Prec *prec = band->prec + precno;
+        int yi, cblknw = prec->nb_codeblocks_width;
+        for (yi =0; yi < prec->nb_codeblocks_height; yi++){
+            int xi;
+            for (xi = 0; xi < cblknw; xi++){
+                Jpeg2000Cblk *cblk = prec->cblk + yi * cblknw + xi;
+                if (cblk->ninclpasses){
+                    if (s->buf_end - s->buf < cblk->passes[cblk->ninclpasses-1].rate)
+                        return -1;
+                    bytestream_put_buffer(&s->buf, cblk->data, cblk->passes[cblk->ninclpasses-1].rate);
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+static int encode_packets(Jpeg2000EncoderContext *s, Jpeg2000Tile *tile, int tileno)
+{
+    int compno, reslevelno, ret;
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+    Jpeg2000QuantStyle  *qntsty = &s->qntsty;
+
+    av_log(s->avctx, AV_LOG_DEBUG, "tier2\n");
+    // lay-rlevel-comp-pos progression
+    for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+        for (compno = 0; compno < s->ncomponents; compno++){
+            int precno;
+            Jpeg2000ResLevel *reslevel = s->tile[tileno].comp[compno].reslevel + reslevelno;
+            for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){
+                if (ret = encode_packet(s, reslevel, precno, qntsty->expn + (reslevelno ? 3*reslevelno-2 : 0),
+                              qntsty->nguardbits))
+                    return ret;
+            }
+        }
+    }
+    av_log(s->avctx, AV_LOG_DEBUG, "after tier2\n");
+    return 0;
+}
+
+static int getcut(Jpeg2000Cblk *cblk, int64_t lambda, int dwt_norm)
+{
+    int passno, res = 0;
+    for (passno = 0; passno < cblk->npasses; passno++){
+        int dr;
+        int64_t dd;
+
+        dr = cblk->passes[passno].rate
+           - (res ? cblk->passes[res-1].rate:0);
+        dd = cblk->passes[passno].disto
+           - (res ? cblk->passes[res-1].disto:0);
+
+        if (((dd * dwt_norm) >> WMSEDEC_SHIFT) * dwt_norm >= dr * lambda)
+            res = passno+1;
+    }
+    return res;
+}
+
+static void truncpasses(Jpeg2000EncoderContext *s, Jpeg2000Tile *tile)
+{
+    int precno, compno, reslevelno, bandno, cblkno, lev;
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+
+    for (compno = 0; compno < s->ncomponents; compno++){
+        Jpeg2000Component *comp = tile->comp + compno;
+
+        for (reslevelno = 0, lev = codsty->nreslevels-1; reslevelno < codsty->nreslevels; reslevelno++, lev--){
+            Jpeg2000ResLevel *reslevel = comp->reslevel + reslevelno;
+
+            for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++){
+                for (bandno = 0; bandno < reslevel->nbands ; bandno++){
+                    int bandpos = bandno + (reslevelno > 0);
+                    Jpeg2000Band *band = reslevel->band + bandno;
+                    Jpeg2000Prec *prec = band->prec + precno;
+
+                    for (cblkno = 0; cblkno < prec->nb_codeblocks_height * prec->nb_codeblocks_width; cblkno++){
+                        Jpeg2000Cblk *cblk = prec->cblk + cblkno;
+
+                        cblk->ninclpasses = getcut(cblk, s->lambda,
+                                (int64_t)dwt_norms[codsty->transform == FF_DWT53][bandpos][lev] * (int64_t)band->i_stepsize >> 15);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static int encode_tile(Jpeg2000EncoderContext *s, Jpeg2000Tile *tile, int tileno)
+{
+    int compno, reslevelno, bandno, ret;
+    Jpeg2000T1Context t1;
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+    for (compno = 0; compno < s->ncomponents; compno++){
+        Jpeg2000Component *comp = s->tile[tileno].comp + compno;
+
+        av_log(s->avctx, AV_LOG_DEBUG,"dwt\n");
+        if (ret = ff_dwt_encode(&comp->dwt, comp->i_data))
+            return ret;
+        av_log(s->avctx, AV_LOG_DEBUG,"after dwt -> tier1\n");
+
+        for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++){
+            Jpeg2000ResLevel *reslevel = comp->reslevel + reslevelno;
+
+            for (bandno = 0; bandno < reslevel->nbands ; bandno++){
+                Jpeg2000Band *band = reslevel->band + bandno;
+                Jpeg2000Prec *prec = band->prec; // we support only 1 precinct per band ATM in the encoder
+                int cblkx, cblky, cblkno=0, xx0, x0, xx1, y0, yy0, yy1, bandpos;
+                yy0 = bandno == 0 ? 0 : comp->reslevel[reslevelno-1].coord[1][1] - comp->reslevel[reslevelno-1].coord[1][0];
+                y0 = yy0;
+                yy1 = FFMIN(ff_jpeg2000_ceildivpow2(band->coord[1][0] + 1, band->log2_cblk_height) << band->log2_cblk_height,
+                            band->coord[1][1]) - band->coord[1][0] + yy0;
+
+                if (band->coord[0][0] == band->coord[0][1] || band->coord[1][0] == band->coord[1][1])
+                    continue;
+
+                bandpos = bandno + (reslevelno > 0);
+
+                for (cblky = 0; cblky < prec->nb_codeblocks_height; cblky++){
+                    if (reslevelno == 0 || bandno == 1)
+                        xx0 = 0;
+                    else
+                        xx0 = comp->reslevel[reslevelno-1].coord[0][1] - comp->reslevel[reslevelno-1].coord[0][0];
+                    x0 = xx0;
+                    xx1 = FFMIN(ff_jpeg2000_ceildivpow2(band->coord[0][0] + 1, band->log2_cblk_width) << band->log2_cblk_width,
+                                band->coord[0][1]) - band->coord[0][0] + xx0;
+
+                    for (cblkx = 0; cblkx < prec->nb_codeblocks_width; cblkx++, cblkno++){
+                        int y, x;
+                        if (codsty->transform == FF_DWT53){
+                            for (y = yy0; y < yy1; y++){
+                                int *ptr = t1.data[y-yy0];
+                                for (x = xx0; x < xx1; x++){
+                                    *ptr++ = comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x] << NMSEDEC_FRACBITS;
+                                }
+                            }
+                        } else{
+                            for (y = yy0; y < yy1; y++){
+                                int *ptr = t1.data[y-yy0];
+                                for (x = xx0; x < xx1; x++){
+                                    *ptr = (comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * y + x]);
+                                    *ptr = (int64_t)*ptr * (int64_t)(16384 * 65536 / band->i_stepsize) >> 15 - NMSEDEC_FRACBITS;
+                                    ptr++;
+                                }
+                            }
+                        }
+                        encode_cblk(s, &t1, prec->cblk + cblkno, tile, xx1 - xx0, yy1 - yy0,
+                                    bandpos, codsty->nreslevels - reslevelno - 1);
+                        xx0 = xx1;
+                        xx1 = FFMIN(xx1 + (1 << band->log2_cblk_width), band->coord[0][1] - band->coord[0][0] + x0);
+                    }
+                    yy0 = yy1;
+                    yy1 = FFMIN(yy1 + (1 << band->log2_cblk_height), band->coord[1][1] - band->coord[1][0] + y0);
+                }
+            }
+        }
+        av_log(s->avctx, AV_LOG_DEBUG, "after tier1\n");
+    }
+
+    av_log(s->avctx, AV_LOG_DEBUG, "rate control\n");
+    truncpasses(s, tile);
+    if (ret = encode_packets(s, tile, tileno))
+        return ret;
+    av_log(s->avctx, AV_LOG_DEBUG, "after rate control\n");
+    return 0;
+}
+
+static void cleanup(Jpeg2000EncoderContext *s)
+{
+    int tileno, compno;
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        for (compno = 0; compno < s->ncomponents; compno++){
+            Jpeg2000Component *comp = s->tile[tileno].comp + compno;
+            ff_jpeg2000_cleanup(comp, codsty);
+        }
+        av_freep(&s->tile[tileno].comp);
+    }
+    av_freep(&s->tile);
+}
+
+static void reinit(Jpeg2000EncoderContext *s)
+{
+    int tileno, compno;
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        Jpeg2000Tile *tile = s->tile + tileno;
+        for (compno = 0; compno < s->ncomponents; compno++)
+            ff_jpeg2000_reinit(tile->comp + compno, &s->codsty);
+    }
+}
+
+static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                        const AVFrame *pict, int *got_packet)
+{
+    int tileno, ret;
+    Jpeg2000EncoderContext *s = avctx->priv_data;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*9 + FF_MIN_BUFFER_SIZE)) < 0)
+        return ret;
+
+    // init:
+    s->buf = s->buf_start = pkt->data;
+    s->buf_end = pkt->data + pkt->size;
+
+    s->picture = pict;
+
+    s->lambda = s->picture->quality * LAMBDA_SCALE;
+
+    copy_frame(s);
+    reinit(s);
+
+    if (s->buf_end - s->buf < 2)
+        return -1;
+    bytestream_put_be16(&s->buf, JPEG2000_SOC);
+    if (ret = put_siz(s))
+        return ret;
+    if (ret = put_cod(s))
+        return ret;
+    if (ret = put_qcd(s, 0))
+        return ret;
+
+    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){
+        uint8_t *psotptr;
+        if (!(psotptr = put_sot(s, tileno)))
+            return -1;
+        if (s->buf_end - s->buf < 2)
+            return -1;
+        bytestream_put_be16(&s->buf, JPEG2000_SOD);
+        if (ret = encode_tile(s, s->tile + tileno, tileno))
+            return ret;
+        bytestream_put_be32(&psotptr, s->buf - psotptr + 6);
+    }
+    if (s->buf_end - s->buf < 2)
+        return -1;
+    bytestream_put_be16(&s->buf, JPEG2000_EOC);
+
+    av_log(s->avctx, AV_LOG_DEBUG, "end\n");
+    pkt->size = s->buf - s->buf_start;
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+
+    return 0;
+}
+
+static av_cold int j2kenc_init(AVCodecContext *avctx)
+{
+    int i, ret;
+    Jpeg2000EncoderContext *s = avctx->priv_data;
+    Jpeg2000CodingStyle *codsty = &s->codsty;
+    Jpeg2000QuantStyle  *qntsty = &s->qntsty;
+
+    s->avctx = avctx;
+    av_log(s->avctx, AV_LOG_DEBUG, "init\n");
+
+    // defaults:
+    // TODO: implement setting non-standard precinct size
+    memset(codsty->log2_prec_widths , 15, sizeof(codsty->log2_prec_widths ));
+    memset(codsty->log2_prec_heights, 15, sizeof(codsty->log2_prec_heights));
+    codsty->nreslevels2decode=
+    codsty->nreslevels       = 7;
+    codsty->log2_cblk_width  = 4;
+    codsty->log2_cblk_height = 4;
+    codsty->transform        = avctx->prediction_method ? FF_DWT53 : FF_DWT97_INT;
+
+    qntsty->nguardbits       = 1;
+
+    s->tile_width            = 256;
+    s->tile_height           = 256;
+
+    if (codsty->transform == FF_DWT53)
+        qntsty->quantsty = JPEG2000_QSTY_NONE;
+    else
+        qntsty->quantsty = JPEG2000_QSTY_SE;
+
+    s->width = avctx->width;
+    s->height = avctx->height;
+
+    for (i = 0; i < 3; i++)
+        s->cbps[i] = 8;
+
+    if (avctx->pix_fmt == AV_PIX_FMT_RGB24){
+        s->ncomponents = 3;
+    } else if (avctx->pix_fmt == AV_PIX_FMT_GRAY8){
+        s->ncomponents = 1;
+    } else{ // planar YUV
+        s->planar = 1;
+        s->ncomponents = 3;
+        avcodec_get_chroma_sub_sample(avctx->pix_fmt,
+                s->chroma_shift, s->chroma_shift + 1);
+    }
+
+    ff_jpeg2000_init_tier1_luts();
+    ff_mqc_init_context_tables();
+    init_luts();
+
+    init_quantization(s);
+    if (ret=init_tiles(s))
+        return ret;
+
+    av_log(s->avctx, AV_LOG_DEBUG, "after init\n");
+
+    return 0;
+}
+
+static int j2kenc_destroy(AVCodecContext *avctx)
+{
+    Jpeg2000EncoderContext *s = avctx->priv_data;
+
+    cleanup(s);
+    return 0;
+}
+
+AVCodec ff_jpeg2000_encoder = {
+    .name           = "jpeg2000",
+    .long_name      = NULL_IF_CONFIG_SMALL("JPEG 2000"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_JPEG2000,
+    .priv_data_size = sizeof(Jpeg2000EncoderContext),
+    .init           = j2kenc_init,
+    .encode2        = encode_frame,
+    .close          = j2kenc_destroy,
+    .capabilities   = CODEC_CAP_EXPERIMENTAL,
+    .pix_fmts       = (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_YUV444P, AV_PIX_FMT_GRAY8,
+/*      AV_PIX_FMT_YUV420P,
+        AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,*/
+        AV_PIX_FMT_NONE
+    }
+};
diff --git a/libavcodec/jacosub.h b/libavcodec/jacosub.h
new file mode 100644
index 0000000..c3665ae
--- /dev/null
+++ b/libavcodec/jacosub.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * JACOsub shared utils
+ */
+
+#ifndef AVCODEC_JACOSUB_H
+#define AVCODEC_JACOSUB_H
+
+#include "libavutil/common.h"
+
+#define JSS_MAX_LINESIZE 512
+
+static av_always_inline int jss_whitespace(char c)
+{
+    return c == ' ' || (c >= '\t' && c <= '\r');
+}
+
+static av_always_inline const char *jss_skip_whitespace(const char *p)
+{
+    while (jss_whitespace(*p))
+        p++;
+    return p;
+}
+
+#endif /* AVCODEC_JACOSUB_H */
diff --git a/libavcodec/jacosubdec.c b/libavcodec/jacosubdec.c
new file mode 100644
index 0000000..b64fac8
--- /dev/null
+++ b/libavcodec/jacosubdec.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * JACOsub subtitle decoder
+ * @see http://unicorn.us.com/jacosub/jscripts.html
+ */
+
+#include <time.h>
+#include "ass.h"
+#include "jacosub.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+
+#undef time
+
+static int insert_text(AVBPrint *dst, const char *in, const char *arg)
+{
+    av_bprintf(dst, "%s", arg);
+    return 0;
+}
+
+static int insert_datetime(AVBPrint *dst, const char *in, const char *arg)
+{
+    char buf[16] = {0};
+    time_t now = time(0);
+    struct tm ltime;
+
+#if HAVE_LOCALTIME_R
+    localtime_r(&now, &ltime);
+#else
+    ltime = *localtime(&now);
+#endif
+    strftime(buf, sizeof(buf), arg, &ltime);
+    av_bprintf(dst, "%s", buf);
+    return 0;
+}
+
+static int insert_color(AVBPrint *dst, const char *in, const char *arg)
+{
+    return 1; // skip id
+}
+
+static int insert_font(AVBPrint *dst, const char *in, const char *arg)
+{
+    return 1; // skip id
+}
+
+static const struct {
+    const char *from;
+    const char *arg;
+    int (*func)(AVBPrint *dst, const char *in, const char *arg);
+} ass_codes_map[] = {
+    {"\\~", "~",        insert_text},       // tilde doesn't need escaping
+    {"~",   "{\\h}",    insert_text},       // hard space
+    {"\\n", "\\N",      insert_text},       // newline
+    {"\\D", "%d %b %Y", insert_datetime},   // current date
+    {"\\T", "%H:%M",    insert_datetime},   // current time
+    {"\\N", "{\\r}",    insert_text},       // reset to default style
+    {"\\I", "{\\i1}",   insert_text},       // italic on
+    {"\\i", "{\\i0}",   insert_text},       // italic off
+    {"\\B", "{\\b1}",   insert_text},       // bold on
+    {"\\b", "{\\b0}",   insert_text},       // bold off
+    {"\\U", "{\\u1}",   insert_text},       // underline on
+    {"\\u", "{\\u0}",   insert_text},       // underline off
+    {"\\C", "",         insert_color},      // TODO: color
+    {"\\F", "",         insert_font},       // TODO: font
+};
+
+enum {
+    ALIGN_VB = 1<<0, // vertical bottom, default
+    ALIGN_VM = 1<<1, // vertical middle
+    ALIGN_VT = 1<<2, // vertical top
+    ALIGN_JC = 1<<3, // justify center, default
+    ALIGN_JL = 1<<4, // justify left
+    ALIGN_JR = 1<<5, // justify right
+};
+
+static void jacosub_to_ass(AVCodecContext *avctx, AVBPrint *dst, const char *src)
+{
+    int i, valign = 0, halign = 0;
+    char c = av_toupper(*src);
+    char directives[128] = {0};
+
+    /* extract the optional directives */
+    if ((c >= 'A' && c <= 'Z') || c == '[') {
+        char *p    = directives;
+        char *pend = directives + sizeof(directives) - 1;
+
+        do *p++ = av_toupper(*src++);
+        while (*src && !jss_whitespace(*src) && p < pend);
+        *p = 0;
+        src = jss_skip_whitespace(src);
+    }
+
+    /* handle directives (TODO: handle more of them, and more reliably) */
+    if      (strstr(directives, "VB")) valign = ALIGN_VB;
+    else if (strstr(directives, "VM")) valign = ALIGN_VM;
+    else if (strstr(directives, "VT")) valign = ALIGN_VT;
+    if      (strstr(directives, "JC")) halign = ALIGN_JC;
+    else if (strstr(directives, "JL")) halign = ALIGN_JL;
+    else if (strstr(directives, "JR")) halign = ALIGN_JR;
+    if (valign || halign) {
+        if (!valign) valign = ALIGN_VB;
+        if (!halign) halign = ALIGN_JC;
+        switch (valign | halign) {
+        case ALIGN_VB | ALIGN_JL: av_bprintf(dst, "{\\an1}"); break; // bottom left
+        case ALIGN_VB | ALIGN_JC: av_bprintf(dst, "{\\an2}"); break; // bottom center
+        case ALIGN_VB | ALIGN_JR: av_bprintf(dst, "{\\an3}"); break; // bottom right
+        case ALIGN_VM | ALIGN_JL: av_bprintf(dst, "{\\an4}"); break; // middle left
+        case ALIGN_VM | ALIGN_JC: av_bprintf(dst, "{\\an5}"); break; // middle center
+        case ALIGN_VM | ALIGN_JR: av_bprintf(dst, "{\\an6}"); break; // middle right
+        case ALIGN_VT | ALIGN_JL: av_bprintf(dst, "{\\an7}"); break; // top left
+        case ALIGN_VT | ALIGN_JC: av_bprintf(dst, "{\\an8}"); break; // top center
+        case ALIGN_VT | ALIGN_JR: av_bprintf(dst, "{\\an9}"); break; // top right
+        }
+    }
+
+    /* process timed line */
+    while (*src && *src != '\n') {
+
+        /* text continue on the next line */
+        if (src[0] == '\\' && src[1] == '\n') {
+            src += 2;
+            while (jss_whitespace(*src))
+                src++;
+            continue;
+        }
+
+        /* special character codes */
+        for (i = 0; i < FF_ARRAY_ELEMS(ass_codes_map); i++) {
+            const char *from = ass_codes_map[i].from;
+            const char *arg  = ass_codes_map[i].arg;
+            size_t codemap_len = strlen(from);
+
+            if (!strncmp(src, from, codemap_len)) {
+                src += codemap_len;
+                src += ass_codes_map[i].func(dst, src, arg);
+                break;
+            }
+        }
+
+        /* simple char copy */
+        if (i == FF_ARRAY_ELEMS(ass_codes_map))
+            av_bprintf(dst, "%c", *src++);
+    }
+    av_bprintf(dst, "\r\n");
+}
+
+static int jacosub_decode_frame(AVCodecContext *avctx,
+                                void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVSubtitle *sub = data;
+    const char *ptr = avpkt->data;
+
+    if (avpkt->size <= 0)
+        goto end;
+
+    if (*ptr) {
+        AVBPrint buffer;
+        char *dec_sub;
+
+        // skip timers
+        ptr = jss_skip_whitespace(ptr);
+        ptr = strchr(ptr, ' '); if (!ptr) goto end; ptr++;
+        ptr = strchr(ptr, ' '); if (!ptr) goto end; ptr++;
+
+        av_bprint_init(&buffer, JSS_MAX_LINESIZE, JSS_MAX_LINESIZE);
+        jacosub_to_ass(avctx, &buffer, ptr);
+        av_bprint_finalize(&buffer, &dec_sub);
+        ff_ass_add_rect(sub, dec_sub, avpkt->pts, avpkt->duration, 0);
+        av_free(dec_sub);
+    }
+
+end:
+    *got_sub_ptr = sub->num_rects > 0;
+    return avpkt->size;
+}
+
+AVCodec ff_jacosub_decoder = {
+    .name           = "jacosub",
+    .long_name      = NULL_IF_CONFIG_SMALL("JACOsub subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_JACOSUB,
+    .init           = ff_ass_subtitle_header_default,
+    .decode         = jacosub_decode_frame,
+};
diff --git a/libavcodec/jfdctint.c b/libavcodec/jfdctint.c
index ed6b7ff..6a39578 100644
--- a/libavcodec/jfdctint.c
+++ b/libavcodec/jfdctint.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/jfdctint_template.c b/libavcodec/jfdctint_template.c
index c6a1638..3ea2f5d 100644
--- a/libavcodec/jfdctint_template.c
+++ b/libavcodec/jfdctint_template.c
@@ -216,8 +216,8 @@ static av_always_inline void FUNC(row_fdct)(int16_t *data)
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
 
-    dataptr[0] = (int16_t) ((tmp10 + tmp11) << PASS1_BITS);
-    dataptr[4] = (int16_t) ((tmp10 - tmp11) << PASS1_BITS);
+    dataptr[0] = (int16_t) ((tmp10 + tmp11) * (1 << PASS1_BITS));
+    dataptr[4] = (int16_t) ((tmp10 - tmp11) * (1 << PASS1_BITS));
 
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
     dataptr[2] = (int16_t) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index fadb5cb..ede1a79 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Kamil Nowosad
  * Copyright (c) 2013 Nicolas Bertrand <nicoinattendu@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/mem.h"
 #include "avcodec.h"
@@ -41,8 +42,7 @@ static int32_t tag_tree_size(uint16_t w, uint16_t h)
     uint32_t res = 0;
     while (w > 1 || h > 1) {
         res += w * h;
-        if (res + 1 >= INT32_MAX)
-            return -1;
+        av_assert0(res + 1 < INT32_MAX);
         w = (w + 1) >> 1;
         h = (h + 1) >> 1;
     }
@@ -56,8 +56,6 @@ static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
     int32_t tt_size;
 
     tt_size = tag_tree_size(w, h);
-    if (tt_size == -1)
-        return NULL;
 
     t = res = av_mallocz_array(tt_size, sizeof(*t));
     if (!res)
@@ -82,6 +80,16 @@ static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
     return res;
 }
 
+static void tag_tree_zero(Jpeg2000TgtNode *t, int w, int h)
+{
+    int i, siz = tag_tree_size(w, h);
+
+    for (i = 0; i < siz; i++) {
+        t[i].val = 0;
+        t[i].vis = 0;
+    }
+}
+
 uint8_t ff_jpeg2000_sigctxno_lut[256][4];
 
 static int getsigctxno(int flag, int bandno)
@@ -96,45 +104,33 @@ static int getsigctxno(int flag, int bandno)
         ((flag & JPEG2000_T1_SIG_NW) ? 1 : 0) +
         ((flag & JPEG2000_T1_SIG_SE) ? 1 : 0) +
         ((flag & JPEG2000_T1_SIG_SW) ? 1 : 0);
+
     if (bandno < 3) {
         if (bandno == 1)
             FFSWAP(int, h, v);
-        if (h == 2)
-            return 8;
+        if (h == 2) return 8;
         if (h == 1) {
-            if (v >= 1)
-                return 7;
-            if (d >= 1)
-                return 6;
+            if (v >= 1) return 7;
+            if (d >= 1) return 6;
             return 5;
         }
-        if (v == 2)
-            return 4;
-        if (v == 1)
-            return 3;
-        if (d >= 2)
-            return 2;
-        if (d == 1)
-            return 1;
+        if (v == 2) return 4;
+        if (v == 1) return 3;
+        if (d >= 2) return 2;
+        if (d == 1) return 1;
     } else {
-        if (d >= 3)
-            return 8;
+        if (d >= 3) return 8;
         if (d == 2) {
-            if (h + v >= 1)
-                return 7;
+            if (h+v >= 1) return 7;
             return 6;
         }
         if (d == 1) {
-            if (h + v >= 2)
-                return 5;
-            if (h + v == 1)
-                return 4;
+            if (h+v >= 2) return 5;
+            if (h+v == 1) return 4;
             return 3;
         }
-        if (h + v >= 2)
-            return 2;
-        if (h + v == 1)
-            return 1;
+        if (h+v >= 2) return 2;
+        if (h+v == 1) return 1;
     }
     return 0;
 }
@@ -205,8 +201,8 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
 
-    if (!codsty->nreslevels2decode) {
-        av_log(avctx, AV_LOG_ERROR, "nreslevels2decode uninitialized\n");
+    if (codsty->nreslevels2decode <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode);
         return AVERROR_INVALIDDATA;
     }
 
@@ -220,12 +216,12 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
 
     if (codsty->transform == FF_DWT97) {
         comp->i_data = NULL;
-        comp->f_data = av_malloc_array(csize, sizeof(*comp->f_data));
+        comp->f_data = av_mallocz_array(csize, sizeof(*comp->f_data));
         if (!comp->f_data)
             return AVERROR(ENOMEM);
     } else {
         comp->f_data = NULL;
-        comp->i_data = av_malloc_array(csize, sizeof(*comp->i_data));
+        comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
         if (!comp->i_data)
             return AVERROR(ENOMEM);
     }
@@ -254,7 +250,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
         else
             reslevel->nbands = 3;
 
-        /* Number of precincts wich span the tile for resolution level reslevelno
+        /* Number of precincts which span the tile for resolution level reslevelno
          * see B.6 in ISO/IEC 15444-1:2002 eq. B-16
          * num_precincts_x = |- trx_1 / 2 ^ log2_prec_width) -| - (trx_0 / 2 ^ log2_prec_width)
          * num_precincts_y = |- try_1 / 2 ^ log2_prec_width) -| - (try_0 / 2 ^ log2_prec_width)
@@ -325,7 +321,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
             if (!av_codec_is_encoder(avctx->codec))
                 band->f_stepsize *= 0.5;
 
-            band->i_stepsize = band->f_stepsize * (1 << 16);
+            band->i_stepsize = band->f_stepsize * (1 << 15);
 
             /* computation of tbx_0, tbx_1, tby_0, tby_1
              * see ISO/IEC 15444-1:2002 B.5 eq. B-15 and tbl B.1
@@ -374,7 +370,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                 band->coord[1][j] = ff_jpeg2000_ceildiv(band->coord[1][j], dy);
 
             band->prec = av_mallocz_array(reslevel->num_precincts_x *
-                                          reslevel->num_precincts_y,
+                                          (uint64_t)reslevel->num_precincts_y,
                                           sizeof(*band->prec));
             if (!band->prec)
                 return AVERROR(ENOMEM);
@@ -430,7 +426,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                     return AVERROR(ENOMEM);
 
                 prec->cblk = av_mallocz_array(prec->nb_codeblocks_width *
-                                              prec->nb_codeblocks_height,
+                                              (uint64_t)prec->nb_codeblocks_height,
                                               sizeof(*prec->cblk));
                 if (!prec->cblk)
                     return AVERROR(ENOMEM);
@@ -482,6 +478,27 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
     return 0;
 }
 
+void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
+{
+    int reslevelno, bandno, cblkno, precno;
+    for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++) {
+        Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
+        for (bandno = 0; bandno < rlevel->nbands; bandno++) {
+            Jpeg2000Band *band = rlevel->band + bandno;
+            for(precno = 0; precno < rlevel->num_precincts_x * rlevel->num_precincts_y; precno++) {
+                Jpeg2000Prec *prec = band->prec + precno;
+                tag_tree_zero(prec->zerobits, prec->nb_codeblocks_width, prec->nb_codeblocks_height);
+                tag_tree_zero(prec->cblkincl, prec->nb_codeblocks_width, prec->nb_codeblocks_height);
+                for (cblkno = 0; cblkno < prec->nb_codeblocks_width * prec->nb_codeblocks_height; cblkno++) {
+                    Jpeg2000Cblk *cblk = prec->cblk + cblkno;
+                    cblk->length = 0;
+                    cblk->lblock = 3;
+                }
+            }
+        }
+    }
+}
+
 void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
 {
     int reslevelno, bandno, precno;
@@ -502,16 +519,12 @@ void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
 
             band = reslevel->band + bandno;
             for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++) {
-                Jpeg2000Prec *prec;
-
-                if (!band->prec)
-                    continue;
-
-                prec = band->prec + precno;
-                av_freep(&prec->zerobits);
-                av_freep(&prec->cblkincl);
-                av_freep(&prec->cblk);
-
+                if (band->prec) {
+                    Jpeg2000Prec *prec = band->prec + precno;
+                    av_freep(&prec->zerobits);
+                    av_freep(&prec->cblkincl);
+                    av_freep(&prec->cblk);
+                }
             }
 
             av_freep(&band->prec);
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index b96b7e2..acdba62 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Kamil Nowosad
  * Copyright (c) 2013 Nicolas Bertrand <nicoinattendu@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -130,8 +130,8 @@ typedef struct Jpeg2000TgtNode {
 } Jpeg2000TgtNode;
 
 typedef struct Jpeg2000CodingStyle {
-    uint8_t nreslevels;       // number of resolution levels
-    uint8_t nreslevels2decode; // number of resolution levels to decode
+    int nreslevels;           // number of resolution levels
+    int nreslevels2decode;    // number of resolution levels to decode
     uint8_t log2_cblk_width,
             log2_cblk_height; // exponent of codeblock size
     uint8_t transform;        // DWT type
@@ -146,11 +146,16 @@ typedef struct Jpeg2000CodingStyle {
 
 typedef struct Jpeg2000QuantStyle {
     uint8_t expn[JPEG2000_MAX_DECLEVELS * 3];  // quantization exponent
-    uint32_t mant[JPEG2000_MAX_DECLEVELS * 3]; // quantization mantissa
+    uint16_t mant[JPEG2000_MAX_DECLEVELS * 3]; // quantization mantissa
     uint8_t quantsty;      // quantization style
     uint8_t nguardbits;    // number of guard bits
 } Jpeg2000QuantStyle;
 
+typedef struct Jpeg2000Pass {
+    uint16_t rate;
+    int64_t disto;
+} Jpeg2000Pass;
+
 typedef struct Jpeg2000Cblk {
     uint8_t npasses;
     uint8_t ninclpasses; // number coding of passes included in codestream
@@ -160,6 +165,7 @@ typedef struct Jpeg2000Cblk {
     uint8_t lblock;
     uint8_t zero;
     uint8_t data[8192];
+    Jpeg2000Pass passes[100];
     uint16_t coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Cblk; // code block
 
@@ -252,6 +258,8 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                int cbps, int dx, int dy,
                                AVCodecContext *ctx);
 
+void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty);
+
 void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty);
 
 #endif /* AVCODEC_JPEG2000_H */
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 7d081ea..d6204bb 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Kamil Nowosad
  * Copyright (c) 2013 Nicolas Bertrand <nicoinattendu@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,8 +28,10 @@
 #include <inttypes.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -39,6 +41,7 @@
 #define JP2_SIG_TYPE    0x6A502020
 #define JP2_SIG_VALUE   0x0D0A870A
 #define JP2_CODESTREAM  0x6A703263
+#define JP2_HEADER      0x6A703268
 
 #define HAD_COC 0x01
 #define HAD_QCC 0x02
@@ -56,14 +59,14 @@ typedef struct Jpeg2000Tile {
     uint8_t             properties[4];
     Jpeg2000CodingStyle codsty[4];
     Jpeg2000QuantStyle  qntsty[4];
-    Jpeg2000TilePart    tile_part[3];
+    Jpeg2000TilePart    tile_part[4];
     uint16_t tp_idx;                    // Tile-part index
 } Jpeg2000Tile;
 
 typedef struct Jpeg2000DecoderContext {
     AVClass         *class;
     AVCodecContext  *avctx;
-    GetByteContext g;
+    GetByteContext  g;
 
     int             width, height;
     int             image_offset_x, image_offset_y;
@@ -74,6 +77,10 @@ typedef struct Jpeg2000DecoderContext {
     int             cdx[4], cdy[4];
     int             precision;
     int             ncomponents;
+    int             colour_space;
+    uint32_t        palette[256];
+    int8_t          pal8;
+    int             cdef[4];
     int             tile_width, tile_height;
     unsigned        numXtiles, numYtiles;
     int             maxtilelen;
@@ -83,7 +90,8 @@ typedef struct Jpeg2000DecoderContext {
 
     int             bit_index;
 
-    int16_t         curtileno;
+    int             curtileno;
+
     Jpeg2000Tile    *tile;
 
     /*options parameters*/
@@ -97,6 +105,7 @@ typedef struct Jpeg2000DecoderContext {
 static int get_bits(Jpeg2000DecoderContext *s, int n)
 {
     int res = 0;
+
     while (--n >= 0) {
         res <<= 1;
         if (s->bit_index == 0) {
@@ -154,12 +163,74 @@ static int tag_tree_decode(Jpeg2000DecoderContext *s, Jpeg2000TgtNode *node,
     return curval;
 }
 
+static int pix_fmt_match(enum AVPixelFormat pix_fmt, int components,
+                         int bpc, uint32_t log2_chroma_wh, int pal8)
+{
+    int match = 1;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+
+    if (desc->nb_components != components) {
+        return 0;
+    }
+
+    switch (components) {
+    case 4:
+        match = match && desc->comp[3].depth_minus1 + 1 >= bpc &&
+                         (log2_chroma_wh >> 14 & 3) == 0 &&
+                         (log2_chroma_wh >> 12 & 3) == 0;
+    case 3:
+        match = match && desc->comp[2].depth_minus1 + 1 >= bpc &&
+                         (log2_chroma_wh >> 10 & 3) == desc->log2_chroma_w &&
+                         (log2_chroma_wh >>  8 & 3) == desc->log2_chroma_h;
+    case 2:
+        match = match && desc->comp[1].depth_minus1 + 1 >= bpc &&
+                         (log2_chroma_wh >>  6 & 3) == desc->log2_chroma_w &&
+                         (log2_chroma_wh >>  4 & 3) == desc->log2_chroma_h;
+
+    case 1:
+        match = match && desc->comp[0].depth_minus1 + 1 >= bpc &&
+                         (log2_chroma_wh >>  2 & 3) == 0 &&
+                         (log2_chroma_wh       & 3) == 0 &&
+                         (desc->flags & AV_PIX_FMT_FLAG_PAL) == pal8 * AV_PIX_FMT_FLAG_PAL;
+    }
+    return match;
+}
+
+// pix_fmts with lower bpp have to be listed before
+// similar pix_fmts with higher bpp.
+#define RGB_PIXEL_FORMATS   AV_PIX_FMT_PAL8,AV_PIX_FMT_RGB24,AV_PIX_FMT_RGBA,AV_PIX_FMT_RGB48,AV_PIX_FMT_RGBA64
+#define GRAY_PIXEL_FORMATS  AV_PIX_FMT_GRAY8,AV_PIX_FMT_GRAY8A,AV_PIX_FMT_GRAY16
+#define YUV_PIXEL_FORMATS   AV_PIX_FMT_YUV410P,AV_PIX_FMT_YUV411P,AV_PIX_FMT_YUVA420P, \
+                            AV_PIX_FMT_YUV420P,AV_PIX_FMT_YUV422P,AV_PIX_FMT_YUVA422P, \
+                            AV_PIX_FMT_YUV440P,AV_PIX_FMT_YUV444P,AV_PIX_FMT_YUVA444P, \
+                            AV_PIX_FMT_YUV420P9,AV_PIX_FMT_YUV422P9,AV_PIX_FMT_YUV444P9, \
+                            AV_PIX_FMT_YUVA420P9,AV_PIX_FMT_YUVA422P9,AV_PIX_FMT_YUVA444P9, \
+                            AV_PIX_FMT_YUV420P10,AV_PIX_FMT_YUV422P10,AV_PIX_FMT_YUV444P10, \
+                            AV_PIX_FMT_YUVA420P10,AV_PIX_FMT_YUVA422P10,AV_PIX_FMT_YUVA444P10, \
+                            AV_PIX_FMT_YUV420P12,AV_PIX_FMT_YUV422P12,AV_PIX_FMT_YUV444P12, \
+                            AV_PIX_FMT_YUV420P14,AV_PIX_FMT_YUV422P14,AV_PIX_FMT_YUV444P14, \
+                            AV_PIX_FMT_YUV420P16,AV_PIX_FMT_YUV422P16,AV_PIX_FMT_YUV444P16, \
+                            AV_PIX_FMT_YUVA420P16,AV_PIX_FMT_YUVA422P16,AV_PIX_FMT_YUVA444P16
+#define XYZ_PIXEL_FORMATS   AV_PIX_FMT_XYZ12
+
+static const enum AVPixelFormat rgb_pix_fmts[]  = {RGB_PIXEL_FORMATS};
+static const enum AVPixelFormat gray_pix_fmts[] = {GRAY_PIXEL_FORMATS};
+static const enum AVPixelFormat yuv_pix_fmts[]  = {YUV_PIXEL_FORMATS};
+static const enum AVPixelFormat xyz_pix_fmts[]  = {XYZ_PIXEL_FORMATS};
+static const enum AVPixelFormat all_pix_fmts[]  = {RGB_PIXEL_FORMATS,
+                                                   GRAY_PIXEL_FORMATS,
+                                                   YUV_PIXEL_FORMATS,
+                                                   XYZ_PIXEL_FORMATS};
+
 /* marker segments */
 /* get sizes and offsets of image, tiles; number of components */
 static int get_siz(Jpeg2000DecoderContext *s)
 {
     int i;
     int ncomponents;
+    uint32_t log2_chroma_wh = 0;
+    const enum AVPixelFormat *possible_fmts = NULL;
+    int possible_fmts_nb = 0;
 
     if (bytestream2_get_bytes_left(&s->g) < 36)
         return AVERROR_INVALIDDATA;
@@ -175,13 +246,18 @@ static int get_siz(Jpeg2000DecoderContext *s)
     s->tile_offset_y  = bytestream2_get_be32u(&s->g); // YT0Siz
     ncomponents       = bytestream2_get_be16u(&s->g); // CSiz
 
+    if (s->image_offset_x || s->image_offset_y) {
+        avpriv_request_sample(s->avctx, "Support for image offsets");
+        return AVERROR_PATCHWELCOME;
+    }
+
     if (ncomponents <= 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Invalid number of components: %d\n",
                s->ncomponents);
         return AVERROR_INVALIDDATA;
     }
 
-    if (ncomponents > 3) {
+    if (ncomponents > 4) {
         avpriv_request_sample(s->avctx, "Support for %d components",
                               s->ncomponents);
         return AVERROR_PATCHWELCOME;
@@ -189,8 +265,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
 
     s->ncomponents = ncomponents;
 
-    if (s->tile_width <= 0 || s->tile_height <= 0 ||
-        s->tile_width > s->width || s->tile_height > s->height) {
+    if (s->tile_width <= 0 || s->tile_height <= 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Invalid tile dimension %dx%d.\n",
                s->tile_width, s->tile_height);
         return AVERROR_INVALIDDATA;
@@ -206,21 +281,22 @@ static int get_siz(Jpeg2000DecoderContext *s)
         s->sgnd[i]   = !!(x & 0x80);
         s->cdx[i]    = bytestream2_get_byteu(&s->g);
         s->cdy[i]    = bytestream2_get_byteu(&s->g);
-
-        if (s->cdx[i] != 1 || s->cdy[i] != 1) {
-            avpriv_request_sample(s->avctx,
-                                  "CDxy values %d %d for component %d",
-                                  s->cdx[i], s->cdy[i], i);
-            if (!s->cdx[i] || !s->cdy[i])
-                return AVERROR_INVALIDDATA;
-            else
-                return AVERROR_PATCHWELCOME;
+        if (   !s->cdx[i] || s->cdx[i] == 3 || s->cdx[i] > 4
+            || !s->cdy[i] || s->cdy[i] == 3 || s->cdy[i] > 4) {
+            av_log(s->avctx, AV_LOG_ERROR, "Invalid sample separation %d/%d\n", s->cdx[i], s->cdy[i]);
+            return AVERROR_INVALIDDATA;
         }
+        log2_chroma_wh |= s->cdy[i] >> 1 << i * 4 | s->cdx[i] >> 1 << i * 4 + 2;
     }
 
     s->numXtiles = ff_jpeg2000_ceildiv(s->width  - s->tile_offset_x, s->tile_width);
     s->numYtiles = ff_jpeg2000_ceildiv(s->height - s->tile_offset_y, s->tile_height);
 
+    if (s->numXtiles * (uint64_t)s->numYtiles > INT_MAX/sizeof(*s->tile)) {
+        s->numXtiles = s->numYtiles = 0;
+        return AVERROR(EINVAL);
+    }
+
     s->tile = av_mallocz_array(s->numXtiles * s->numYtiles, sizeof(*s->tile));
     if (!s->tile) {
         s->numXtiles = s->numYtiles = 0;
@@ -241,36 +317,49 @@ static int get_siz(Jpeg2000DecoderContext *s)
     s->avctx->height = ff_jpeg2000_ceildivpow2(s->height - s->image_offset_y,
                                                s->reduction_factor);
 
-    switch (s->ncomponents) {
-    case 1:
-        if (s->precision > 8)
-            s->avctx->pix_fmt = AV_PIX_FMT_GRAY16;
-        else
-            s->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
-        break;
-    case 3:
-        switch (s->avctx->profile) {
-        case FF_PROFILE_JPEG2000_DCINEMA_2K:
-        case FF_PROFILE_JPEG2000_DCINEMA_4K:
-            /* XYZ color-space for digital cinema profiles */
-            s->avctx->pix_fmt = AV_PIX_FMT_XYZ12;
+    if (s->avctx->profile == FF_PROFILE_JPEG2000_DCINEMA_2K ||
+        s->avctx->profile == FF_PROFILE_JPEG2000_DCINEMA_4K) {
+        possible_fmts = xyz_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(xyz_pix_fmts);
+    } else {
+        switch (s->colour_space) {
+        case 16:
+            possible_fmts = rgb_pix_fmts;
+            possible_fmts_nb = FF_ARRAY_ELEMS(rgb_pix_fmts);
+            break;
+        case 17:
+            possible_fmts = gray_pix_fmts;
+            possible_fmts_nb = FF_ARRAY_ELEMS(gray_pix_fmts);
+            break;
+        case 18:
+            possible_fmts = yuv_pix_fmts;
+            possible_fmts_nb = FF_ARRAY_ELEMS(yuv_pix_fmts);
             break;
         default:
-            if (s->precision > 8)
-                s->avctx->pix_fmt = AV_PIX_FMT_RGB48;
-            else
-                s->avctx->pix_fmt = AV_PIX_FMT_RGB24;
+            possible_fmts = all_pix_fmts;
+            possible_fmts_nb = FF_ARRAY_ELEMS(all_pix_fmts);
             break;
         }
-        break;
-    case 4:
-        s->avctx->pix_fmt = AV_PIX_FMT_RGBA;
-        break;
-    default:
-        /* pixel format can not be identified */
-        s->avctx->pix_fmt = AV_PIX_FMT_NONE;
-        break;
     }
+    for (i = 0; i < possible_fmts_nb; ++i) {
+        if (pix_fmt_match(possible_fmts[i], ncomponents, s->precision, log2_chroma_wh, s->pal8)) {
+            s->avctx->pix_fmt = possible_fmts[i];
+            break;
+        }
+    }
+    if (i == possible_fmts_nb) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Unknown pix_fmt, profile: %d, colour_space: %d, "
+               "components: %d, precision: %d, "
+               "cdx[1]: %d, cdy[1]: %d, cdx[2]: %d, cdy[2]: %d\n",
+               s->avctx->profile, s->colour_space, ncomponents, s->precision,
+               ncomponents > 2 ? s->cdx[1] : 0,
+               ncomponents > 2 ? s->cdy[1] : 0,
+               ncomponents > 2 ? s->cdx[2] : 0,
+               ncomponents > 2 ? s->cdy[2] : 0);
+        return AVERROR_PATCHWELCOME;
+    }
+    s->avctx->bits_per_raw_sample = s->precision;
     return 0;
 }
 
@@ -285,18 +374,26 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
     /*  nreslevels = number of resolution levels
                    = number of decomposition level +1 */
     c->nreslevels = bytestream2_get_byteu(&s->g) + 1;
-
-    if (c->nreslevels > JPEG2000_MAX_RESLEVELS)
+    if (c->nreslevels >= JPEG2000_MAX_RESLEVELS) {
+        av_log(s->avctx, AV_LOG_ERROR, "nreslevels %d is invalid\n", c->nreslevels);
         return AVERROR_INVALIDDATA;
+    }
+
+    if (c->nreslevels <= s->reduction_factor) {
+        /* we are forced to update reduction_factor as its requested value is
+           not compatible with this bitstream, and as we might have used it
+           already in setup earlier we have to fail this frame until
+           reinitialization is implemented */
+        av_log(s->avctx, AV_LOG_ERROR, "reduction_factor too large for this bitstream, max is %d\n", c->nreslevels - 1);
+        s->reduction_factor = c->nreslevels - 1;
+        return AVERROR(EINVAL);
+    }
 
     /* compute number of resolution levels to decode */
-    if (c->nreslevels < s->reduction_factor)
-        c->nreslevels2decode = 1;
-    else
-        c->nreslevels2decode = c->nreslevels - s->reduction_factor;
+    c->nreslevels2decode = c->nreslevels - s->reduction_factor;
 
-    c->log2_cblk_width  = bytestream2_get_byteu(&s->g) + 2; // cblk width
-    c->log2_cblk_height = bytestream2_get_byteu(&s->g) + 2; // cblk height
+    c->log2_cblk_width  = (bytestream2_get_byteu(&s->g) & 15) + 2; // cblk width
+    c->log2_cblk_height = (bytestream2_get_byteu(&s->g) & 15) + 2; // cblk height
 
     if (c->log2_cblk_width > 10 || c->log2_cblk_height > 10 ||
         c->log2_cblk_width + c->log2_cblk_height > 12) {
@@ -304,10 +401,14 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
         return AVERROR_INVALIDDATA;
     }
 
+    if (c->log2_cblk_width > 6 || c->log2_cblk_height > 6) {
+        avpriv_request_sample(s->avctx, "cblk size > 64");
+        return AVERROR_PATCHWELCOME;
+    }
+
     c->cblk_style = bytestream2_get_byteu(&s->g);
     if (c->cblk_style != 0) { // cblk style
-        avpriv_request_sample(s->avctx, "Support for extra cblk styles");
-        return AVERROR_PATCHWELCOME;
+        av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n", c->cblk_style);
     }
     c->transform = bytestream2_get_byteu(&s->g); // DWT transformation type
     /* set integer 9/7 DWT in case of BITEXACT flag */
@@ -407,7 +508,7 @@ static int get_qcx(Jpeg2000DecoderContext *s, int n, Jpeg2000QuantStyle *q)
     if (q->quantsty == JPEG2000_QSTY_NONE) {
         n -= 3;
         if (bytestream2_get_bytes_left(&s->g) < n ||
-            n > JPEG2000_MAX_DECLEVELS)
+            n > JPEG2000_MAX_DECLEVELS*3)
             return AVERROR_INVALIDDATA;
         for (i = 0; i < n; i++)
             q->expn[i] = bytestream2_get_byteu(&s->g) >> 3;
@@ -425,7 +526,7 @@ static int get_qcx(Jpeg2000DecoderContext *s, int n, Jpeg2000QuantStyle *q)
     } else {
         n = (n - 3) >> 1;
         if (bytestream2_get_bytes_left(&s->g) < 2 * n ||
-            n > JPEG2000_MAX_DECLEVELS)
+            n > JPEG2000_MAX_DECLEVELS*3)
             return AVERROR_INVALIDDATA;
         for (i = 0; i < n; i++) {
             x          = bytestream2_get_be16u(&s->g);
@@ -443,6 +544,8 @@ static int get_qcd(Jpeg2000DecoderContext *s, int n, Jpeg2000QuantStyle *q,
     Jpeg2000QuantStyle tmp;
     int compno, ret;
 
+    memset(&tmp, 0, sizeof(tmp));
+
     if ((ret = get_qcx(s, n, &tmp)) < 0)
         return ret;
     for (compno = 0; compno < s->ncomponents; compno++)
@@ -485,14 +588,12 @@ static int get_sot(Jpeg2000DecoderContext *s, int n)
     if (bytestream2_get_bytes_left(&s->g) < 8)
         return AVERROR_INVALIDDATA;
 
+    s->curtileno = 0;
     Isot = bytestream2_get_be16u(&s->g);        // Isot
     if (Isot >= s->numXtiles * s->numYtiles)
         return AVERROR_INVALIDDATA;
 
-    if (Isot) {
-        avpriv_request_sample(s->avctx, "Support for more than one tile");
-        return AVERROR_PATCHWELCOME;
-    }
+    s->curtileno = Isot;
     Psot  = bytestream2_get_be32u(&s->g);       // Psot
     TPsot = bytestream2_get_byteu(&s->g);       // TPsot
 
@@ -682,7 +783,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
                 return ret;
             if (ret > sizeof(cblk->data)) {
                 avpriv_request_sample(s->avctx,
-                                      "Block with lengthinc greater than %zu",
+                                      "Block with lengthinc greater than %"SIZE_SPECIFIER"",
                                       sizeof(cblk->data));
                 return AVERROR_PATCHWELCOME;
             }
@@ -706,25 +807,18 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s,
         nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width;
         for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) {
             Jpeg2000Cblk *cblk = prec->cblk + cblkno;
-            if (bytestream2_get_bytes_left(&s->g) < cblk->lengthinc)
+            if (   bytestream2_get_bytes_left(&s->g) < cblk->lengthinc
+                || sizeof(cblk->data) < cblk->length + cblk->lengthinc + 2
+            ) {
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "Block length %"PRIu16" or lengthinc %d is too large\n",
+                       cblk->length, cblk->lengthinc);
                 return AVERROR_INVALIDDATA;
-            /* Code-block data can be empty. In that case initialize data
-             * with 0xFFFF. */
-            if (cblk->lengthinc > 0) {
-                bytestream2_get_bufferu(&s->g, cblk->data, cblk->lengthinc);
-            } else {
-                cblk->data[0] = 0xFF;
-                cblk->data[1] = 0xFF;
             }
+
+            bytestream2_get_bufferu(&s->g, cblk->data + cblk->length, cblk->lengthinc);
             cblk->length   += cblk->lengthinc;
             cblk->lengthinc = 0;
-
-            if (cblk->length > sizeof(cblk->data)) {
-                av_log(s->avctx, AV_LOG_ERROR,
-                       "Block length %"PRIu16" > data size %zd\n",
-                       cblk->length, sizeof(cblk->data));
-                return AVERROR_INVALIDDATA;
-            }
         }
     }
     return 0;
@@ -738,6 +832,9 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
 
     s->bit_index = 8;
     switch (tile->codsty[0].prog_order) {
+    case JPEG2000_PGOD_RLCP:
+        avpriv_request_sample(s->avctx, "Progression order RLCP");
+
     case JPEG2000_PGOD_LRCP:
         for (layno = 0; layno < tile->codsty[0].nlayers; layno++) {
             ok_reslevel = 1;
@@ -748,7 +845,7 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
                     Jpeg2000QuantStyle *qntsty  = tile->qntsty + compno;
                     if (reslevelno < codsty->nreslevels) {
                         Jpeg2000ResLevel *rlevel = tile->comp[compno].reslevel +
-                                                   reslevelno;
+                                                reslevelno;
                         ok_reslevel = 1;
                         for (precno = 0; precno < rlevel->num_precincts_x * rlevel->num_precincts_y; precno++)
                             if ((ret = jpeg2000_decode_packet(s,
@@ -798,6 +895,10 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
                         prcx   = ff_jpeg2000_ceildivpow2(x, reducedresno) >> rlevel->log2_prec_width;
                         prcy   = ff_jpeg2000_ceildivpow2(y, reducedresno) >> rlevel->log2_prec_height;
                         precno = prcx + rlevel->num_precincts_x * prcy;
+
+                        if (prcx >= rlevel->num_precincts_x || prcy >= rlevel->num_precincts_y)
+                            return AVERROR_PATCHWELCOME;
+
                         for (layno = 0; layno < tile->codsty[0].nlayers; layno++) {
                             if ((ret = jpeg2000_decode_packet(s, codsty, rlevel,
                                                               precno, layno,
@@ -811,11 +912,6 @@ static int jpeg2000_decode_packets(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile
         }
         break;
 
-    case JPEG2000_PGOD_RLCP:
-        avpriv_request_sample(s->avctx, "Progression order RLCP");
-        ret = AVERROR_PATCHWELCOME;
-        break;
-
     case JPEG2000_PGOD_RPCL:
         avpriv_request_sample(s->avctx, "Progression order RPCL");
         ret = AVERROR_PATCHWELCOME;
@@ -961,21 +1057,25 @@ static int decode_cblk(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty,
     int bpass_csty_symbol           = codsty->cblk_style & JPEG2000_CBLK_BYPASS;
     int vert_causal_ctx_csty_symbol = codsty->cblk_style & JPEG2000_CBLK_VSC;
 
+    av_assert0(width  <= JPEG2000_MAX_CBLKW);
+    av_assert0(height <= JPEG2000_MAX_CBLKH);
+
     for (y = 0; y < height; y++)
         memset(t1->data[y], 0, width * sizeof(**t1->data));
 
     /* If code-block contains no compressed data: nothing to do. */
     if (!cblk->length)
         return 0;
+
     for (y = 0; y < height + 2; y++)
         memset(t1->flags[y], 0, (width + 2) * sizeof(**t1->flags));
 
+    cblk->data[cblk->length] = 0xff;
+    cblk->data[cblk->length+1] = 0xff;
     ff_mqc_initdec(&t1->mqc, cblk->data);
-    cblk->data[cblk->length]     = 0xff;
-    cblk->data[cblk->length + 1] = 0xff;
 
     while (passno--) {
-        switch (pass_t) {
+        switch(pass_t) {
         case 0:
             decode_sigpass(t1, width, height, bpno + 1, bandpos,
                            bpass_csty_symbol && (clnpass_cnt >= 4),
@@ -1037,7 +1137,7 @@ static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk,
         int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
         int *src = t1->data[j];
         for (i = 0; i < w; ++i)
-            datap[i] = (src[i] * band->i_stepsize + (1 << 15)) >> 16;
+            datap[i] = (src[i] * band->i_stepsize + (1 << 14)) >> 15;
     }
 }
 
@@ -1062,6 +1162,12 @@ static void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
     int32_t *src[3],  i0,  i1,  i2;
     float   *srcf[3], i0f, i1f, i2f;
 
+    for (i = 1; i < 3; i++)
+        if (tile->codsty[0].transform != tile->codsty[i].transform) {
+            av_log(s->avctx, AV_LOG_ERROR, "Transforms mismatch, MCT not supported\n");
+            return;
+        }
+
     for (i = 0; i < 3; i++)
         if (tile->codsty[0].transform == FF_DWT97)
             srcf[i] = tile->comp[i].f_data;
@@ -1070,6 +1176,7 @@ static void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
 
     for (i = 0; i < 2; i++)
         csize *= tile->comp[0].coord[i][1] - tile->comp[0].coord[i][0];
+
     switch (tile->codsty[0].transform) {
     case FF_DWT97:
         for (i = 0; i < csize; i++) {
@@ -1109,24 +1216,29 @@ static void mct_decode(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
 static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                                 AVFrame *picture)
 {
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
     int compno, reslevelno, bandno;
     int x, y;
+    int planar    = !!(pixdesc->flags & AV_PIX_FMT_FLAG_PLANAR);
+    int pixelsize = planar ? 1 : pixdesc->nb_components;
 
     uint8_t *line;
     Jpeg2000T1Context t1;
-    /* Loop on tile components */
 
+    /* Loop on tile components */
     for (compno = 0; compno < s->ncomponents; compno++) {
         Jpeg2000Component *comp     = tile->comp + compno;
         Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+
         /* Loop on resolution levels */
         for (reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
             Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
             /* Loop on bands */
             for (bandno = 0; bandno < rlevel->nbands; bandno++) {
-                uint16_t nb_precincts, precno;
+                int nb_precincts, precno;
                 Jpeg2000Band *band = rlevel->band + bandno;
                 int cblkno = 0, bandpos;
+
                 bandpos = bandno + (reslevelno > 0);
 
                 if (band->coord[0][0] == band->coord[0][1] ||
@@ -1167,6 +1279,13 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
     if (tile->codsty[0].mct)
         mct_decode(s, tile);
 
+    if (s->cdef[0] < 0) {
+        for (x = 0; x < s->ncomponents; x++)
+            s->cdef[x] = x + 1;
+        if ((s->ncomponents & 1) == 0)
+            s->cdef[s->ncomponents-1] = 0;
+    }
+
     if (s->precision <= 8) {
         for (compno = 0; compno < s->ncomponents; compno++) {
             Jpeg2000Component *comp = tile->comp + compno;
@@ -1175,14 +1294,19 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
             int32_t *i_datap = comp->i_data;
             int cbps = s->cbps[compno];
             int w = tile->comp[compno].coord[0][1] - s->image_offset_x;
+            int plane = 0;
+
+            if (planar)
+                plane = s->cdef[compno] ? s->cdef[compno]-1 : (s->ncomponents-1);
+
 
             y    = tile->comp[compno].coord[1][0] - s->image_offset_y;
-            line = picture->data[0] + y * picture->linesize[0];
+            line = picture->data[plane] + y / s->cdy[compno] * picture->linesize[plane];
             for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y += s->cdy[compno]) {
                 uint8_t *dst;
 
                 x   = tile->comp[compno].coord[0][0] - s->image_offset_x;
-                dst = line + x * s->ncomponents + compno;
+                dst = line + x / s->cdx[compno] * pixelsize + compno*!planar;
 
                 if (codsty->transform == FF_DWT97) {
                     for (; x < w; x += s->cdx[compno]) {
@@ -1191,7 +1315,7 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                         val = av_clip(val, 0, (1 << cbps) - 1);
                         *dst = val << (8 - cbps);
                         datap++;
-                        dst += s->ncomponents;
+                        dst += pixelsize;
                     }
                 } else {
                     for (; x < w; x += s->cdx[compno]) {
@@ -1200,10 +1324,10 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                         val = av_clip(val, 0, (1 << cbps) - 1);
                         *dst = val << (8 - cbps);
                         i_datap++;
-                        dst += s->ncomponents;
+                        dst += pixelsize;
                     }
                 }
-                line += picture->linesize[0];
+                line += picture->linesize[plane];
             }
         }
     } else {
@@ -1215,13 +1339,18 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
             uint16_t *linel;
             int cbps = s->cbps[compno];
             int w = tile->comp[compno].coord[0][1] - s->image_offset_x;
+            int plane = 0;
+
+            if (planar)
+                plane = s->cdef[compno] ? s->cdef[compno]-1 : (s->ncomponents-1);
 
             y     = tile->comp[compno].coord[1][0] - s->image_offset_y;
-            linel = (uint16_t *)picture->data[0] + y * (picture->linesize[0] >> 1);
+            linel = (uint16_t *)picture->data[plane] + y / s->cdy[compno] * (picture->linesize[plane] >> 1);
             for (; y < tile->comp[compno].coord[1][1] - s->image_offset_y; y += s->cdy[compno]) {
                 uint16_t *dst;
+
                 x   = tile->comp[compno].coord[0][0] - s->image_offset_x;
-                dst = linel + (x * s->ncomponents + compno);
+                dst = linel + (x / s->cdx[compno] * pixelsize + compno*!planar);
                 if (codsty->transform == FF_DWT97) {
                     for (; x < w; x += s-> cdx[compno]) {
                         int  val = lrintf(*datap) + (1 << (cbps - 1));
@@ -1230,7 +1359,7 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                         /* align 12 bit values in little-endian mode */
                         *dst = val << (16 - cbps);
                         datap++;
-                        dst += s->ncomponents;
+                        dst += pixelsize;
                     }
                 } else {
                     for (; x < w; x += s-> cdx[compno]) {
@@ -1240,10 +1369,10 @@ static int jpeg2000_decode_tile(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                         /* align 12 bit values in little-endian mode */
                         *dst = val << (16 - cbps);
                         i_datap++;
-                        dst += s->ncomponents;
+                        dst += pixelsize;
                     }
                 }
-                linel += picture->linesize[0] >> 1;
+                linel += picture->linesize[plane] >> 1;
             }
         }
     }
@@ -1255,15 +1384,19 @@ static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s)
 {
     int tileno, compno;
     for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
-        for (compno = 0; compno < s->ncomponents; compno++) {
-            Jpeg2000Component *comp     = s->tile[tileno].comp   + compno;
-            Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
+        if (s->tile[tileno].comp) {
+            for (compno = 0; compno < s->ncomponents; compno++) {
+                Jpeg2000Component *comp     = s->tile[tileno].comp   + compno;
+                Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
 
-            ff_jpeg2000_cleanup(comp, codsty);
+                ff_jpeg2000_cleanup(comp, codsty);
+            }
+            av_freep(&s->tile[tileno].comp);
         }
-        av_freep(&s->tile[tileno].comp);
     }
     av_freep(&s->tile);
+    memset(s->codsty, 0, sizeof(s->codsty));
+    memset(s->qntsty, 0, sizeof(s->qntsty));
     s->numXtiles = s->numYtiles = 0;
 }
 
@@ -1290,17 +1423,21 @@ static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s)
             Jpeg2000Tile *tile;
             Jpeg2000TilePart *tp;
 
-            if (s->curtileno < 0) {
-                av_log(s->avctx, AV_LOG_ERROR, "Missing SOT\n");
-                return AVERROR_INVALIDDATA;
-            }
             if (!s->tile) {
                 av_log(s->avctx, AV_LOG_ERROR, "Missing SIZ\n");
                 return AVERROR_INVALIDDATA;
             }
+            if (s->curtileno < 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "Missing SOT\n");
+                return AVERROR_INVALIDDATA;
+            }
 
             tile = s->tile + s->curtileno;
             tp = tile->tile_part + tile->tp_idx;
+            if (tp->tp_end < s->g.buffer) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid tpend\n");
+                return AVERROR_INVALIDDATA;
+            }
             bytestream2_init(&tp->tpg, s->g.buffer, tp->tp_end - s->g.buffer);
             bytestream2_skip(&s->g, tp->tp_end - s->g.buffer);
 
@@ -1309,13 +1446,15 @@ static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s)
         if (marker == JPEG2000_EOC)
             break;
 
-        len = bytestream2_get_be16u(&s->g);
+        len = bytestream2_get_be16(&s->g);
         if (len < 2 || bytestream2_get_bytes_left(&s->g) < len - 2)
             return AVERROR_INVALIDDATA;
 
         switch (marker) {
         case JPEG2000_SIZ:
             ret = get_siz(s);
+            if (!s->tile)
+                s->numXtiles = s->numYtiles = 0;
             break;
         case JPEG2000_COC:
             ret = get_coc(s, codsty, properties);
@@ -1331,6 +1470,7 @@ static int jpeg2000_read_main_headers(Jpeg2000DecoderContext *s)
             break;
         case JPEG2000_SOT:
             if (!(ret = get_sot(s, len))) {
+                av_assert1(s->curtileno >= 0);
                 codsty = s->tile[s->curtileno].codsty;
                 qntsty = s->tile[s->curtileno].qntsty;
                 properties = s->tile[s->curtileno].properties;
@@ -1383,26 +1523,101 @@ static int jpeg2000_read_bitstream_packets(Jpeg2000DecoderContext *s)
 
 static int jp2_find_codestream(Jpeg2000DecoderContext *s)
 {
-    uint32_t atom_size, atom;
-    int found_codestream = 0, search_range = 10;
+    uint32_t atom_size, atom, atom_end;
+    int search_range = 10;
 
-    while(!found_codestream && search_range
-          &&
-          bytestream2_get_bytes_left(&s->g) >= 8) {
+    while (search_range
+           &&
+           bytestream2_get_bytes_left(&s->g) >= 8) {
         atom_size = bytestream2_get_be32u(&s->g);
         atom      = bytestream2_get_be32u(&s->g);
-        if (atom == JP2_CODESTREAM) {
-            found_codestream = 1;
+        atom_end  = bytestream2_tell(&s->g) + atom_size - 8;
+
+        if (atom == JP2_CODESTREAM)
+            return 1;
+
+        if (bytestream2_get_bytes_left(&s->g) < atom_size || atom_end < atom_size)
+            return 0;
+
+        if (atom == JP2_HEADER &&
+                   atom_size >= 16) {
+            uint32_t atom2_size, atom2, atom2_end;
+            do {
+                atom2_size = bytestream2_get_be32u(&s->g);
+                atom2      = bytestream2_get_be32u(&s->g);
+                atom2_end  = bytestream2_tell(&s->g) + atom2_size - 8;
+                if (atom2_size < 8 || atom2_end > atom_end || atom2_end < atom2_size)
+                    break;
+                if (atom2 == JP2_CODESTREAM) {
+                    return 1;
+                } else if (atom2 == MKBETAG('c','o','l','r') && atom2_size >= 7) {
+                    int method = bytestream2_get_byteu(&s->g);
+                    bytestream2_skipu(&s->g, 2);
+                    if (method == 1) {
+                        s->colour_space = bytestream2_get_be32u(&s->g);
+                    }
+                } else if (atom2 == MKBETAG('p','c','l','r') && atom2_size >= 6) {
+                    int i, size, colour_count, colour_channels, colour_depth[3];
+                    uint32_t r, g, b;
+                    colour_count = bytestream2_get_be16u(&s->g);
+                    colour_channels = bytestream2_get_byteu(&s->g);
+                    // FIXME: Do not ignore channel_sign
+                    colour_depth[0] = (bytestream2_get_byteu(&s->g) & 0x7f) + 1;
+                    colour_depth[1] = (bytestream2_get_byteu(&s->g) & 0x7f) + 1;
+                    colour_depth[2] = (bytestream2_get_byteu(&s->g) & 0x7f) + 1;
+                    size = (colour_depth[0] + 7 >> 3) * colour_count +
+                           (colour_depth[1] + 7 >> 3) * colour_count +
+                           (colour_depth[2] + 7 >> 3) * colour_count;
+                    if (colour_count > 256   ||
+                        colour_channels != 3 ||
+                        colour_depth[0] > 16 ||
+                        colour_depth[1] > 16 ||
+                        colour_depth[2] > 16 ||
+                        atom2_size < size) {
+                        avpriv_request_sample(s->avctx, "Unknown palette");
+                        bytestream2_seek(&s->g, atom2_end, SEEK_SET);
+                        continue;
+                    }
+                    s->pal8 = 1;
+                    for (i = 0; i < colour_count; i++) {
+                        if (colour_depth[0] <= 8) {
+                            r = bytestream2_get_byteu(&s->g) << 8 - colour_depth[0];
+                            r |= r >> colour_depth[0];
+                        } else {
+                            r = bytestream2_get_be16u(&s->g) >> colour_depth[0] - 8;
+                        }
+                        if (colour_depth[1] <= 8) {
+                            g = bytestream2_get_byteu(&s->g) << 8 - colour_depth[1];
+                            r |= r >> colour_depth[1];
+                        } else {
+                            g = bytestream2_get_be16u(&s->g) >> colour_depth[1] - 8;
+                        }
+                        if (colour_depth[2] <= 8) {
+                            b = bytestream2_get_byteu(&s->g) << 8 - colour_depth[2];
+                            r |= r >> colour_depth[2];
+                        } else {
+                            b = bytestream2_get_be16u(&s->g) >> colour_depth[2] - 8;
+                        }
+                        s->palette[i] = 0xffu << 24 | r << 16 | g << 8 | b;
+                    }
+                } else if (atom2 == MKBETAG('c','d','e','f') && atom2_size >= 2) {
+                    int n = bytestream2_get_be16u(&s->g);
+                    for (; n>0; n--) {
+                        int cn   = bytestream2_get_be16(&s->g);
+                        int av_unused typ  = bytestream2_get_be16(&s->g);
+                        int asoc = bytestream2_get_be16(&s->g);
+                        if (cn < 4 || asoc < 4)
+                            s->cdef[cn] = asoc;
+                    }
+                }
+                bytestream2_seek(&s->g, atom2_end, SEEK_SET);
+            } while (atom_end - atom2_end >= 8);
         } else {
-            if (bytestream2_get_bytes_left(&s->g) < atom_size - 8)
-                return 0;
-            bytestream2_skipu(&s->g, atom_size - 8);
             search_range--;
         }
+        bytestream2_seek(&s->g, atom_end, SEEK_SET);
     }
 
-    if (found_codestream)
-        return 1;
     return 0;
 }
 
@@ -1416,7 +1631,8 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
 
     s->avctx     = avctx;
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
-    s->curtileno = 0; // TODO: only one tile in DCI JP2K. to implement for more tiles
+    s->curtileno = -1;
+    memset(s->cdef, -1, sizeof(s->cdef));
 
     if (bytestream2_get_bytes_left(&s->g) < 2) {
         ret = AVERROR_INVALIDDATA;
@@ -1438,6 +1654,9 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
         bytestream2_seek(&s->g, 0, SEEK_SET);
     }
 
+    while (bytestream2_get_bytes_left(&s->g) >= 3 && bytestream2_peek_be16(&s->g) != JPEG2000_SOC)
+        bytestream2_skip(&s->g, 1);
+
     if (bytestream2_get_be16u(&s->g) != JPEG2000_SOC) {
         av_log(avctx, AV_LOG_ERROR, "SOC marker not present\n");
         ret = AVERROR_INVALIDDATA;
@@ -1447,15 +1666,14 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
         goto end;
 
     /* get picture buffer */
-    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "ff_thread_get_buffer() failed.\n");
+    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
         goto end;
-    }
     picture->pict_type = AV_PICTURE_TYPE_I;
     picture->key_frame = 1;
 
     if (ret = jpeg2000_read_bitstream_packets(s))
         goto end;
+
     for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++)
         if (ret = jpeg2000_decode_tile(s, s->tile + tileno, picture))
             goto end;
@@ -1464,6 +1682,9 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, void *data,
 
     *got_frame = 1;
 
+    if (s->avctx->pix_fmt == AV_PIX_FMT_PAL8)
+        memcpy(picture->data[1], s->palette, 256 * sizeof(uint32_t));
+
     return bytestream2_tell(&s->g);
 
 end:
@@ -1495,7 +1716,7 @@ static const AVProfile profiles[] = {
     { FF_PROFILE_UNKNOWN },
 };
 
-static const AVClass class = {
+static const AVClass jpeg2000_class = {
     .class_name = "jpeg2000",
     .item_name  = av_default_item_name,
     .option     = options,
@@ -1511,6 +1732,7 @@ AVCodec ff_jpeg2000_decoder = {
     .priv_data_size   = sizeof(Jpeg2000DecoderContext),
     .init_static_data = jpeg2000_init_static_data,
     .decode           = jpeg2000_decode_frame,
-    .priv_class       = &class,
+    .priv_class       = &jpeg2000_class,
+    .max_lowres       = 5,
     .profiles         = NULL_IF_CONFIG_SMALL(profiles)
 };
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index 6642a53..ceceda3 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Kamil Nowosad
  * Copyright (c) 2013 Nicolas Bertrand <nicoinattendu@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -51,7 +51,6 @@
 #define I_LFTG_K       80621
 #define I_LFTG_X      106544
 
-
 static inline void extend53(int *p, int i0, int i1)
 {
     p[i0 - 1] = p[i0 + 1];
@@ -80,6 +79,213 @@ static inline void extend97_int(int32_t *p, int i0, int i1)
     }
 }
 
+static void sd_1d53(int *p, int i0, int i1)
+{
+    int i;
+
+    if (i1 == i0 + 1)
+        return;
+
+    extend53(p, i0, i1);
+
+    for (i = (i0+1)/2 - 1; i < (i1+1)/2; i++)
+        p[2*i+1] -= (p[2*i] + p[2*i+2]) >> 1;
+    for (i = (i0+1)/2; i < (i1+1)/2; i++)
+        p[2*i] += (p[2*i-1] + p[2*i+1] + 2) >> 2;
+}
+
+static void dwt_encode53(DWTContext *s, int *t)
+{
+    int lev,
+        w = s->linelen[s->ndeclevels-1][0];
+    int *line = s->i_linebuf;
+    line += 3;
+
+    for (lev = s->ndeclevels-1; lev >= 0; lev--){
+        int lh = s->linelen[lev][0],
+            lv = s->linelen[lev][1],
+            mh = s->mod[lev][0],
+            mv = s->mod[lev][1],
+            lp;
+        int *l;
+
+        // HOR_SD
+        l = line + mh;
+        for (lp = 0; lp < lv; lp++){
+            int i, j = 0;
+
+            for (i = 0; i < lh; i++)
+                l[i] = t[w*lp + i];
+
+            sd_1d53(line, mh, mh + lh);
+
+            // copy back and deinterleave
+            for (i =   mh; i < lh; i+=2, j++)
+                t[w*lp + j] = l[i];
+            for (i = 1-mh; i < lh; i+=2, j++)
+                t[w*lp + j] = l[i];
+        }
+
+        // VER_SD
+        l = line + mv;
+        for (lp = 0; lp < lh; lp++) {
+            int i, j = 0;
+
+            for (i = 0; i < lv; i++)
+                l[i] = t[w*i + lp];
+
+            sd_1d53(line, mv, mv + lv);
+
+            // copy back and deinterleave
+            for (i =   mv; i < lv; i+=2, j++)
+                t[w*j + lp] = l[i];
+            for (i = 1-mv; i < lv; i+=2, j++)
+                t[w*j + lp] = l[i];
+        }
+    }
+}
+static void sd_1d97_float(float *p, int i0, int i1)
+{
+    int i;
+
+    if (i1 == i0 + 1)
+        return;
+
+    extend97_float(p, i0, i1);
+    i0++; i1++;
+
+    for (i = i0/2 - 2; i < i1/2 + 1; i++)
+        p[2*i+1] -= 1.586134 * (p[2*i] + p[2*i+2]);
+    for (i = i0/2 - 1; i < i1/2 + 1; i++)
+        p[2*i] -= 0.052980 * (p[2*i-1] + p[2*i+1]);
+    for (i = i0/2 - 1; i < i1/2; i++)
+        p[2*i+1] += 0.882911 * (p[2*i] + p[2*i+2]);
+    for (i = i0/2; i < i1/2; i++)
+        p[2*i] += 0.443506 * (p[2*i-1] + p[2*i+1]);
+}
+
+static void dwt_encode97_float(DWTContext *s, float *t)
+{
+    int lev,
+        w = s->linelen[s->ndeclevels-1][0];
+    float *line = s->f_linebuf;
+    line += 5;
+
+    for (lev = s->ndeclevels-1; lev >= 0; lev--){
+        int lh = s->linelen[lev][0],
+            lv = s->linelen[lev][1],
+            mh = s->mod[lev][0],
+            mv = s->mod[lev][1],
+            lp;
+        float *l;
+
+        // HOR_SD
+        l = line + mh;
+        for (lp = 0; lp < lv; lp++){
+            int i, j = 0;
+
+            for (i = 0; i < lh; i++)
+                l[i] = t[w*lp + i];
+
+            sd_1d97_float(line, mh, mh + lh);
+
+            // copy back and deinterleave
+            for (i =   mh; i < lh; i+=2, j++)
+                t[w*lp + j] = F_LFTG_X * l[i] / 2;
+            for (i = 1-mh; i < lh; i+=2, j++)
+                t[w*lp + j] = F_LFTG_K * l[i] / 2;
+        }
+
+        // VER_SD
+        l = line + mv;
+        for (lp = 0; lp < lh; lp++) {
+            int i, j = 0;
+
+            for (i = 0; i < lv; i++)
+                l[i] = t[w*i + lp];
+
+            sd_1d97_float(line, mv, mv + lv);
+
+            // copy back and deinterleave
+            for (i =   mv; i < lv; i+=2, j++)
+                t[w*j + lp] = F_LFTG_X * l[i] / 2;
+            for (i = 1-mv; i < lv; i+=2, j++)
+                t[w*j + lp] = F_LFTG_K * l[i] / 2;
+        }
+    }
+}
+
+static void sd_1d97_int(int *p, int i0, int i1)
+{
+    int i;
+
+    if (i1 == i0 + 1)
+        return;
+
+    extend97_int(p, i0, i1);
+    i0++; i1++;
+
+    for (i = i0/2 - 2; i < i1/2 + 1; i++)
+        p[2 * i + 1] -= (I_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]) + (1 << 15)) >> 16;
+    for (i = i0/2 - 1; i < i1/2 + 1; i++)
+        p[2 * i]     -= (I_LFTG_BETA  * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
+    for (i = i0/2 - 1; i < i1/2; i++)
+        p[2 * i + 1] += (I_LFTG_GAMMA * (p[2 * i]     + p[2 * i + 2]) + (1 << 15)) >> 16;
+    for (i = i0/2; i < i1/2; i++)
+        p[2 * i]     += (I_LFTG_DELTA * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
+}
+
+static void dwt_encode97_int(DWTContext *s, int *t)
+{
+    int lev,
+        w = s->linelen[s->ndeclevels-1][0];
+    int *line = s->i_linebuf;
+    line += 5;
+
+    for (lev = s->ndeclevels-1; lev >= 0; lev--){
+        int lh = s->linelen[lev][0],
+            lv = s->linelen[lev][1],
+            mh = s->mod[lev][0],
+            mv = s->mod[lev][1],
+            lp;
+        int *l;
+
+        // HOR_SD
+        l = line + mh;
+        for (lp = 0; lp < lv; lp++){
+            int i, j = 0;
+
+            for (i = 0; i < lh; i++)
+                l[i] = t[w*lp + i];
+
+            sd_1d97_int(line, mh, mh + lh);
+
+            // copy back and deinterleave
+            for (i =   mh; i < lh; i+=2, j++)
+                t[w*lp + j] = ((l[i] * I_LFTG_X) + (1 << 16)) >> 17;
+            for (i = 1-mh; i < lh; i+=2, j++)
+                t[w*lp + j] = ((l[i] * I_LFTG_K) + (1 << 16)) >> 17;
+        }
+
+        // VER_SD
+        l = line + mv;
+        for (lp = 0; lp < lh; lp++) {
+            int i, j = 0;
+
+            for (i = 0; i < lv; i++)
+                l[i] = t[w*i + lp];
+
+            sd_1d97_int(line, mv, mv + lv);
+
+            // copy back and deinterleave
+            for (i =   mv; i < lv; i+=2, j++)
+                t[w*j + lp] = ((l[i] * I_LFTG_X) + (1 << 16)) >> 17;
+            for (i = 1-mv; i < lv; i+=2, j++)
+                t[w*j + lp] = ((l[i] * I_LFTG_K) + (1 << 16)) >> 17;
+        }
+    }
+}
+
 static void sr_1d53(int *p, int i0, int i1)
 {
     int i;
@@ -312,17 +518,17 @@ int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
         }
     switch (type) {
     case FF_DWT97:
-        s->f_linebuf = av_malloc((maxlen + 12) * sizeof(*s->f_linebuf));
+        s->f_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->f_linebuf));
         if (!s->f_linebuf)
             return AVERROR(ENOMEM);
         break;
      case FF_DWT97_INT:
-        s->i_linebuf = av_malloc((maxlen + 12) * sizeof(*s->i_linebuf));
+        s->i_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->i_linebuf));
         if (!s->i_linebuf)
             return AVERROR(ENOMEM);
         break;
     case FF_DWT53:
-        s->i_linebuf = av_malloc((maxlen +  6) * sizeof(*s->i_linebuf));
+        s->i_linebuf = av_malloc_array((maxlen +  6), sizeof(*s->i_linebuf));
         if (!s->i_linebuf)
             return AVERROR(ENOMEM);
         break;
@@ -332,6 +538,21 @@ int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
     return 0;
 }
 
+int ff_dwt_encode(DWTContext *s, void *t)
+{
+    switch(s->type){
+        case FF_DWT97:
+            dwt_encode97_float(s, t); break;
+        case FF_DWT97_INT:
+            dwt_encode97_int(s, t); break;
+        case FF_DWT53:
+            dwt_encode53(s, t); break;
+        default:
+            return -1;
+    }
+    return 0;
+}
+
 int ff_dwt_decode(DWTContext *s, void *t)
 {
     switch (s->type) {
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index 9aaa18b..b5be812 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -2,20 +2,20 @@
  * Discrete wavelet transform
  * Copyright (c) 2007 Kamil Nowosad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -57,6 +57,7 @@ typedef struct DWTContext {
 int ff_jpeg2000_dwt_init(DWTContext *s, uint16_t border[2][2],
                          int decomp_levels, int type);
 
+int ff_dwt_encode(DWTContext *s, void *t);
 int ff_dwt_decode(DWTContext *s, void *t);
 
 void ff_dwt_destroy(DWTContext *s);
diff --git a/libavcodec/jpegls.c b/libavcodec/jpegls.c
index 52a4500..216c486 100644
--- a/libavcodec/jpegls.c
+++ b/libavcodec/jpegls.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Michael Niedermayer
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,10 +38,8 @@ void ff_jpegls_init_state(JLSState *state)
     for (state->qbpp = 0; (1 << state->qbpp) < state->range; state->qbpp++)
         ;
 
-    if (state->bpp < 8)
-        state->limit = 2 * state->bpp - state->qbpp + 16;
-    else
-        state->limit = 4 * state->bpp - state->qbpp;
+    state->bpp   = FFMAX(av_log2(state->maxval) + 1, 2);
+    state->limit = 2*(state->bpp + FFMAX(state->bpp, 8)) - state->qbpp;
 
     for (i = 0; i < 367; i++) {
         state->A[i] = FFMAX(state->range + 32 >> 6, 2);
diff --git a/libavcodec/jpegls.h b/libavcodec/jpegls.h
index eae3943..c8997c7 100644
--- a/libavcodec/jpegls.h
+++ b/libavcodec/jpegls.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Michael Niedermayer
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,6 +30,7 @@
 
 #include "libavutil/common.h"
 #include "avcodec.h"
+#include "internal.h"
 
 typedef struct JpeglsContext {
     AVCodecContext *avctx;
@@ -40,11 +41,9 @@ typedef struct JLSState {
     int A[367], B[367], C[365], N[367];
     int limit, reset, bpp, qbpp, maxval, range;
     int near, twonear;
-    int run_index[3];
+    int run_index[4];
 } JLSState;
 
-extern const uint8_t ff_log2_run[32];
-
 /**
  * Calculate initial JPEG-LS parameters
  */
@@ -98,6 +97,8 @@ static inline void ff_jpegls_downscale_state(JLSState *state, int Q)
 static inline int ff_jpegls_update_state_regular(JLSState *state,
                                                  int Q, int err)
 {
+    if(FFABS(err) > 0xFFFF)
+        return -0x10000;
     state->A[Q] += FFABS(err);
     err         *= state->twonear;
     state->B[Q] += err;
diff --git a/libavcodec/jpeglsdec.c b/libavcodec/jpeglsdec.c
index df72ca3..fb3762d 100644
--- a/libavcodec/jpeglsdec.c
+++ b/libavcodec/jpeglsdec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Michael Niedermayer
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,7 +39,7 @@
  * (or test broken JPEG-LS decoder) and slow down ordinary decoding a bit.
  *
  * There is no Golomb code with length >= 32 bits possible, so check and
- * avoid situation of 32 zeros, Libav Golomb decoder is painfully slow
+ * avoid situation of 32 zeros, FFmpeg Golomb decoder is painfully slow
  * on this errors.
  */
 //#define JLS_BROKEN
@@ -50,27 +50,79 @@
 int ff_jpegls_decode_lse(MJpegDecodeContext *s)
 {
     int id;
+    int tid, wt, maxtab, i, j;
 
-    skip_bits(&s->gb, 16);  /* length: FIXME: verify field validity */
+    int len = get_bits(&s->gb, 16);
     id = get_bits(&s->gb, 8);
 
     switch (id) {
     case 1:
+        if (len < 13)
+            return AVERROR_INVALIDDATA;
+
         s->maxval = get_bits(&s->gb, 16);
         s->t1     = get_bits(&s->gb, 16);
         s->t2     = get_bits(&s->gb, 16);
         s->t3     = get_bits(&s->gb, 16);
         s->reset  = get_bits(&s->gb, 16);
 
+        if(s->avctx->debug & FF_DEBUG_PICT_INFO) {
+            av_log(s->avctx, AV_LOG_DEBUG, "Coding parameters maxval:%d T1:%d T2:%d T3:%d reset:%d\n",
+                   s->maxval, s->t1, s->t2, s->t3, s->reset);
+        }
+
 //        ff_jpegls_reset_coding_parameters(s, 0);
         //FIXME quant table?
         break;
     case 2:
+        s->palette_index = 0;
     case 3:
-        av_log(s->avctx, AV_LOG_ERROR, "palette not supported\n");
-        return AVERROR(ENOSYS);
+        tid= get_bits(&s->gb, 8);
+        wt = get_bits(&s->gb, 8);
+
+        if (len < 5)
+            return AVERROR_INVALIDDATA;
+
+        if (wt < 1 || wt > MAX_COMPONENTS) {
+            avpriv_request_sample(s->avctx, "wt %d", wt);
+            return AVERROR_PATCHWELCOME;
+        }
+
+        if (!s->maxval)
+            maxtab = 255;
+        else if ((5 + wt*(s->maxval+1)) < 65535)
+            maxtab = s->maxval;
+        else
+            maxtab = 65530/wt - 1;
+
+        if(s->avctx->debug & FF_DEBUG_PICT_INFO) {
+            av_log(s->avctx, AV_LOG_DEBUG, "LSE palette %d tid:%d wt:%d maxtab:%d\n", id, tid, wt, maxtab);
+        }
+        if (maxtab >= 256) {
+            avpriv_request_sample(s->avctx, ">8bit palette");
+            return AVERROR_PATCHWELCOME;
+        }
+        maxtab = FFMIN(maxtab, (len - 5) / wt + s->palette_index);
+
+        if (s->palette_index > maxtab)
+            return AVERROR_INVALIDDATA;
+
+        if ((s->avctx->pix_fmt == AV_PIX_FMT_GRAY8 || s->avctx->pix_fmt == AV_PIX_FMT_PAL8) &&
+            (s->picture_ptr->format == AV_PIX_FMT_GRAY8 || s->picture_ptr->format == AV_PIX_FMT_PAL8)) {
+            uint32_t *pal = (uint32_t *)s->picture_ptr->data[1];
+            s->picture_ptr->format =
+            s->avctx->pix_fmt = AV_PIX_FMT_PAL8;
+            for (i=s->palette_index; i<=maxtab; i++) {
+                pal[i] = 0;
+                for (j=0; j<wt; j++) {
+                    pal[i] |= get_bits(&s->gb, 8) << (8*(wt-j-1));
+                }
+            }
+            s->palette_index = i;
+        }
+        break;
     case 4:
-        av_log(s->avctx, AV_LOG_ERROR, "oversize image not supported\n");
+        avpriv_request_sample(s->avctx, "oversize image");
         return AVERROR(ENOSYS);
     default:
         av_log(s->avctx, AV_LOG_ERROR, "invalid id %d\n", id);
@@ -148,6 +200,8 @@ static inline int ls_get_code_runterm(GetBitContext *gb, JLSState *state,
         ret = ret >> 1;
     }
 
+    if(FFABS(ret) > 0xFFFF)
+        return -0x10000;
     /* update state */
     state->A[Q] += FFABS(ret) - RItype;
     ret         *= state->twonear;
@@ -207,6 +261,9 @@ static inline void ls_decode_line(JLSState *state, MJpegDecodeContext *s,
             r = ff_log2_run[state->run_index[comp]];
             if (r)
                 r = get_bits_long(&s->gb, r);
+            if (x + r * stride > w) {
+                r = (w - x) / stride;
+            }
             for (i = 0; i < r; i++) {
                 W(dst, x, Ra);
                 x += stride;
@@ -297,21 +354,23 @@ int ff_jpegls_decode_picture(MJpegDecodeContext *s, int near,
     else
         shift = point_transform + (16 - s->bits);
 
-    av_dlog(s->avctx,
-            "JPEG-LS params: %ix%i NEAR=%i MV=%i T(%i,%i,%i) "
-            "RESET=%i, LIMIT=%i, qbpp=%i, RANGE=%i\n",
-            s->width, s->height, state->near, state->maxval,
-            state->T1, state->T2, state->T3,
-            state->reset, state->limit, state->qbpp, state->range);
-    av_dlog(s->avctx, "JPEG params: ILV=%i Pt=%i BPP=%i, scan = %i\n",
-            ilv, point_transform, s->bits, s->cur_scan);
+    if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
+        av_log(s->avctx, AV_LOG_DEBUG,
+               "JPEG-LS params: %ix%i NEAR=%i MV=%i T(%i,%i,%i) "
+               "RESET=%i, LIMIT=%i, qbpp=%i, RANGE=%i\n",
+                s->width, s->height, state->near, state->maxval,
+                state->T1, state->T2, state->T3,
+                state->reset, state->limit, state->qbpp, state->range);
+        av_log(s->avctx, AV_LOG_DEBUG, "JPEG params: ILV=%i Pt=%i BPP=%i, scan = %i\n",
+                ilv, point_transform, s->bits, s->cur_scan);
+    }
     if (ilv == 0) { /* separate planes */
         if (s->cur_scan > s->nb_components) {
             ret = AVERROR_INVALIDDATA;
             goto end;
         }
-        off    = s->cur_scan - 1;
         stride = (s->nb_components > 1) ? 3 : 1;
+        off    = av_clip(s->cur_scan - 1, 0, stride - 1);
         width  = s->width * stride;
         cur   += off;
         for (i = 0; i < s->height; i++) {
@@ -333,12 +392,13 @@ int ff_jpegls_decode_picture(MJpegDecodeContext *s, int near,
     } else if (ilv == 1) { /* line interleaving */
         int j;
         int Rc[3] = { 0, 0, 0 };
+        stride = (s->nb_components > 1) ? 3 : 1;
         memset(cur, 0, s->picture_ptr->linesize[0]);
-        width = s->width * 3;
+        width = s->width * stride;
         for (i = 0; i < s->height; i++) {
-            for (j = 0; j < 3; j++) {
+            for (j = 0; j < stride; j++) {
                 ls_decode_line(state, s, last + j, cur + j,
-                               Rc[j], width, 3, j, 8);
+                               Rc[j], width, stride, j, 8);
                 Rc[j] = last[j];
 
                 if (s->restart_interval && !--s->restart_count) {
@@ -355,6 +415,53 @@ int ff_jpegls_decode_picture(MJpegDecodeContext *s, int near,
         goto end;
     }
 
+    if (s->xfrm && s->nb_components == 3) {
+        int x, w;
+
+        w = s->width * s->nb_components;
+
+        if (s->bits <= 8) {
+            uint8_t *src = s->picture_ptr->data[0];
+
+            for (i = 0; i < s->height; i++) {
+                switch(s->xfrm) {
+                case 1:
+                    for (x = off; x < w; x += 3) {
+                        src[x  ] += src[x+1] + 128;
+                        src[x+2] += src[x+1] + 128;
+                    }
+                    break;
+                case 2:
+                    for (x = off; x < w; x += 3) {
+                        src[x  ] += src[x+1] + 128;
+                        src[x+2] += ((src[x  ] + src[x+1])>>1) + 128;
+                    }
+                    break;
+                case 3:
+                    for (x = off; x < w; x += 3) {
+                        int g = src[x+0] - ((src[x+2]+src[x+1])>>2) + 64;
+                        src[x+0] = src[x+2] + g + 128;
+                        src[x+2] = src[x+1] + g + 128;
+                        src[x+1] = g;
+                    }
+                    break;
+                case 4:
+                    for (x = off; x < w; x += 3) {
+                        int r    = src[x+0] - ((                       359 * (src[x+2]-128) + 490) >> 8);
+                        int g    = src[x+0] - (( 88 * (src[x+1]-128) - 183 * (src[x+2]-128) +  30) >> 8);
+                        int b    = src[x+0] + ((454 * (src[x+1]-128)                        + 574) >> 8);
+                        src[x+0] = av_clip_uint8(r);
+                        src[x+1] = av_clip_uint8(g);
+                        src[x+2] = av_clip_uint8(b);
+                    }
+                    break;
+                }
+                src += s->picture_ptr->linesize[0];
+            }
+        }else
+            avpriv_report_missing_feature(s->avctx, "16bit xfrm");
+    }
+
     if (shift) { /* we need to do point transform or normalize samples */
         int x, w;
 
diff --git a/libavcodec/jpeglsdec.h b/libavcodec/jpeglsdec.h
index d60a87b..0cafaba 100644
--- a/libavcodec/jpeglsdec.h
+++ b/libavcodec/jpeglsdec.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Michael Niedermayer
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/jpeglsenc.c b/libavcodec/jpeglsenc.c
index 3af6412..2f02332 100644
--- a/libavcodec/jpeglsenc.c
+++ b/libavcodec/jpeglsenc.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Michael Niedermayer
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,7 @@
 
 #include "avcodec.h"
 #include "get_bits.h"
+#include "put_bits.h"
 #include "golomb.h"
 #include "internal.h"
 #include "mathops.h"
@@ -263,11 +264,9 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
     else
         comps = 3;
 
-    if ((ret = ff_alloc_packet(pkt, avctx->width * avctx->height * comps * 4 +
-                               FF_MIN_BUFFER_SIZE)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width  *avctx->height * comps * 4 +
+                                FF_MIN_BUFFER_SIZE)) < 0)
         return ret;
-    }
 
     buf2 = av_malloc(pkt->size);
 
@@ -308,7 +307,11 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
 
     ls_store_lse(state, &pb);
 
-    zero = av_mallocz(p->linesize[0]);
+    zero = av_mallocz(FFABS(p->linesize[0]));
+    if (!zero) {
+        av_free(state);
+        return AVERROR(ENOMEM);
+    }
     last = zero;
     cur  = p->data[0];
     if (avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
@@ -359,8 +362,8 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
         }
     }
 
-    av_free(zero);
-    av_free(state);
+    av_freep(&zero);
+    av_freep(&state);
 
     /* the specification says that after doing 0xff escaping unused bits in
      * the last byte must be set to 0, so just append 7 "optional" zero-bits
@@ -428,6 +431,7 @@ AVCodec ff_jpegls_encoder = {
     .id             = AV_CODEC_ID_JPEGLS,
     .init           = encode_init_ls,
     .close          = encode_close,
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
     .encode2        = encode_picture_ls,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_BGR24, AV_PIX_FMT_RGB24,
diff --git a/libavcodec/jrevdct.c b/libavcodec/jrevdct.c
index e6846a1..d4e6571 100644
--- a/libavcodec/jrevdct.c
+++ b/libavcodec/jrevdct.c
@@ -249,8 +249,8 @@ void ff_j_rev_dct(DCTBLOCK data)
       /* AC terms all zero */
       if (d0) {
           /* Compute a 32 bit value to assign. */
-          int16_t dcval = (int16_t) (d0 << PASS1_BITS);
-          register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
+          int16_t dcval = (int16_t) (d0 * (1 << PASS1_BITS));
+          register int v = (dcval & 0xffff) | ((dcval * (1 << 16)) & 0xffff0000);
 
           idataptr[0] = v;
           idataptr[1] = v;
@@ -272,8 +272,8 @@ void ff_j_rev_dct(DCTBLOCK data)
                     tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
                     tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
 
-                    tmp0 = (d0 + d4) << CONST_BITS;
-                    tmp1 = (d0 - d4) << CONST_BITS;
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
 
                     tmp10 = tmp0 + tmp3;
                     tmp13 = tmp0 - tmp3;
@@ -284,8 +284,8 @@ void ff_j_rev_dct(DCTBLOCK data)
                     tmp2 = MULTIPLY(-d6, FIX_1_306562965);
                     tmp3 = MULTIPLY(d6, FIX_0_541196100);
 
-                    tmp0 = (d0 + d4) << CONST_BITS;
-                    tmp1 = (d0 - d4) << CONST_BITS;
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
 
                     tmp10 = tmp0 + tmp3;
                     tmp13 = tmp0 - tmp3;
@@ -298,8 +298,8 @@ void ff_j_rev_dct(DCTBLOCK data)
                     tmp2 = MULTIPLY(d2, FIX_0_541196100);
                     tmp3 = MULTIPLY(d2, FIX_1_306562965);
 
-                    tmp0 = (d0 + d4) << CONST_BITS;
-                    tmp1 = (d0 - d4) << CONST_BITS;
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
 
                     tmp10 = tmp0 + tmp3;
                     tmp13 = tmp0 - tmp3;
@@ -307,8 +307,8 @@ void ff_j_rev_dct(DCTBLOCK data)
                     tmp12 = tmp1 - tmp2;
             } else {
                     /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
-                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
-                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+                    tmp10 = tmp13 = (d0 + d4) * CONST_SCALE;
+                    tmp11 = tmp12 = (d0 - d4) * CONST_SCALE;
             }
       }
 
@@ -618,8 +618,8 @@ void ff_j_rev_dct(DCTBLOCK data)
                     tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
                     tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
 
-                    tmp0 = (d0 + d4) << CONST_BITS;
-                    tmp1 = (d0 - d4) << CONST_BITS;
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
 
                     tmp10 = tmp0 + tmp3;
                     tmp13 = tmp0 - tmp3;
@@ -630,8 +630,8 @@ void ff_j_rev_dct(DCTBLOCK data)
                     tmp2 = MULTIPLY(-d6, FIX_1_306562965);
                     tmp3 = MULTIPLY(d6, FIX_0_541196100);
 
-                    tmp0 = (d0 + d4) << CONST_BITS;
-                    tmp1 = (d0 - d4) << CONST_BITS;
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
 
                     tmp10 = tmp0 + tmp3;
                     tmp13 = tmp0 - tmp3;
@@ -644,8 +644,8 @@ void ff_j_rev_dct(DCTBLOCK data)
                     tmp2 = MULTIPLY(d2, FIX_0_541196100);
                     tmp3 = MULTIPLY(d2, FIX_1_306562965);
 
-                    tmp0 = (d0 + d4) << CONST_BITS;
-                    tmp1 = (d0 - d4) << CONST_BITS;
+                    tmp0 = (d0 + d4) * CONST_SCALE;
+                    tmp1 = (d0 - d4) * CONST_SCALE;
 
                     tmp10 = tmp0 + tmp3;
                     tmp13 = tmp0 - tmp3;
@@ -653,8 +653,8 @@ void ff_j_rev_dct(DCTBLOCK data)
                     tmp12 = tmp1 - tmp2;
             } else {
                     /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
-                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
-                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+                    tmp10 = tmp13 = (d0 + d4) * CONST_SCALE;
+                    tmp11 = tmp12 = (d0 - d4) * CONST_SCALE;
             }
     }
 
@@ -940,3 +940,216 @@ void ff_j_rev_dct(DCTBLOCK data)
     dataptr++;                  /* advance pointer to next column */
   }
 }
+
+#undef DCTSIZE
+#define DCTSIZE 4
+#define DCTSTRIDE 8
+
+void ff_j_rev_dct4(DCTBLOCK data)
+{
+  int32_t tmp0, tmp1, tmp2, tmp3;
+  int32_t tmp10, tmp11, tmp12, tmp13;
+  int32_t z1;
+  int32_t d0, d2, d4, d6;
+  register int16_t *dataptr;
+  int rowctr;
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  data[0] += 4;
+
+  dataptr = data;
+
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any row in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * row DCT calculations can be simplified this way.
+     */
+
+    register int *idataptr = (int*)dataptr;
+
+    d0 = dataptr[0];
+    d2 = dataptr[1];
+    d4 = dataptr[2];
+    d6 = dataptr[3];
+
+    if ((d2 | d4 | d6) == 0) {
+      /* AC terms all zero */
+      if (d0) {
+          /* Compute a 32 bit value to assign. */
+          int16_t dcval = (int16_t) (d0 << PASS1_BITS);
+          register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
+
+          idataptr[0] = v;
+          idataptr[1] = v;
+      }
+
+      dataptr += DCTSTRIDE;     /* advance pointer to next row */
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
+      }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[0] = (int16_t) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
+    dataptr[1] = (int16_t) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
+    dataptr[2] = (int16_t) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (int16_t) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
+
+    dataptr += DCTSTRIDE;       /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  dataptr = data;
+  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
+    /* Columns of zeroes can be exploited in the same way as we did with rows.
+     * However, the row calculation has created many nonzero AC terms, so the
+     * simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+    d0 = dataptr[DCTSTRIDE*0];
+    d2 = dataptr[DCTSTRIDE*1];
+    d4 = dataptr[DCTSTRIDE*2];
+    d6 = dataptr[DCTSTRIDE*3];
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+    if (d6) {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
+    } else {
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
+    }
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
+    dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+void ff_j_rev_dct2(DCTBLOCK data){
+  int d00, d01, d10, d11;
+
+  data[0] += 4;
+  d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE];
+  d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
+  d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
+  d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
+
+  data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
+  data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
+  data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
+  data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
+}
+
+void ff_j_rev_dct1(DCTBLOCK data){
+  data[0] = (data[0] + 4)>>3;
+}
+
+#undef FIX
+#undef CONST_BITS
diff --git a/libavcodec/jvdec.c b/libavcodec/jvdec.c
index bb347e0..47e8edc 100644
--- a/libavcodec/jvdec.c
+++ b/libavcodec/jvdec.c
@@ -2,20 +2,20 @@
  * Bitmap Brothers JV video decoder
  * Copyright (c) 2011 Peter Ross <pross@xvid.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -140,24 +140,28 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                         AVPacket *avpkt)
 {
     JvContext *s = avctx->priv_data;
-    int buf_size = avpkt->size;
     const uint8_t *buf = avpkt->data;
-    const uint8_t *buf_end = buf + buf_size;
+    const uint8_t *buf_end = buf + avpkt->size;
     int video_size, video_type, i, j, ret;
 
+    if (avpkt->size < 6)
+        return AVERROR_INVALIDDATA;
+
     video_size = AV_RL32(buf);
     video_type = buf[4];
     buf += 5;
 
     if (video_size) {
-        if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-            return ret;
+        if (video_size < 0 || video_size > avpkt->size - 5) {
+            av_log(avctx, AV_LOG_ERROR, "video size %d invalid\n", video_size);
+            return AVERROR_INVALIDDATA;
         }
+        if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
+            return ret;
 
         if (video_type == 0 || video_type == 1) {
             GetBitContext gb;
-            init_get_bits(&gb, buf, 8 * FFMIN(video_size, buf_end - buf));
+            init_get_bits(&gb, buf, 8 * video_size);
 
             for (j = 0; j < avctx->height; j += 8)
                 for (i = 0; i < avctx->width; i += 8)
@@ -167,12 +171,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
             buf += video_size;
         } else if (video_type == 2) {
-            if (buf + 1 <= buf_end) {
-                int v = *buf++;
-                for (j = 0; j < avctx->height; j++)
-                    memset(s->frame->data[0] + j * s->frame->linesize[0],
-                           v, avctx->width);
-            }
+            int v = *buf++;
+            for (j = 0; j < avctx->height; j++)
+                memset(s->frame->data[0] + j * s->frame->linesize[0],
+                       v, avctx->width);
         } else {
             av_log(avctx, AV_LOG_WARNING,
                    "unsupported frame type %i\n", video_type);
@@ -180,9 +182,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         }
     }
 
-    if (buf < buf_end) {
-        for (i = 0; i < AVPALETTE_COUNT && buf + 3 <= buf_end; i++) {
-            s->palette[i] = AV_RB24(buf) << 2;
+    if (buf_end - buf >= AVPALETTE_COUNT * 3) {
+        for (i = 0; i < AVPALETTE_COUNT; i++) {
+            uint32_t pal = AV_RB24(buf);
+            s->palette[i] = 0xFFU << 24 | pal << 2 | ((pal >> 4) & 0x30303);
             buf += 3;
         }
         s->palette_has_changed = 1;
@@ -200,7 +203,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         *got_frame = 1;
     }
 
-    return buf_size;
+    return avpkt->size;
 }
 
 static av_cold int decode_close(AVCodecContext *avctx)
diff --git a/libavcodec/kbdwin.c b/libavcodec/kbdwin.c
index 1b7313d..bf32aeb 100644
--- a/libavcodec/kbdwin.c
+++ b/libavcodec/kbdwin.c
@@ -1,22 +1,22 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <assert.h>
+#include "libavutil/avassert.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/attributes.h"
 #include "kbdwin.h"
@@ -30,7 +30,7 @@ av_cold void ff_kbd_window_init(float *window, float alpha, int n)
    double local_window[FF_KBD_WINDOW_MAX];
    double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
 
-   assert(n <= FF_KBD_WINDOW_MAX);
+   av_assert0(n <= FF_KBD_WINDOW_MAX);
 
    for (i = 0; i < n; i++) {
        tmp = i * (n - i) * alpha2;
@@ -45,3 +45,13 @@ av_cold void ff_kbd_window_init(float *window, float alpha, int n)
    for (i = 0; i < n; i++)
        window[i] = sqrt(local_window[i] / sum);
 }
+
+av_cold void ff_kbd_window_init_fixed(int32_t *window, float alpha, int n)
+{
+    int i;
+    float local_window[FF_KBD_WINDOW_MAX];
+
+    ff_kbd_window_init(local_window, alpha, n);
+    for (i = 0; i < n; i++)
+        window[i] = (int)floor(2147483647.0 * local_window[i] + 0.5);
+}
diff --git a/libavcodec/kbdwin.h b/libavcodec/kbdwin.h
index 89b569a..4185c42 100644
--- a/libavcodec/kbdwin.h
+++ b/libavcodec/kbdwin.h
@@ -1,24 +1,26 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef AVCODEC_KBDWIN_H
 #define AVCODEC_KBDWIN_H
 
+#include <stdint.h>
+
 /**
  * Maximum window size for ff_kbd_window_init.
  */
@@ -31,5 +33,6 @@
  * @param   n       size of half window, max FF_KBD_WINDOW_MAX
  */
 void ff_kbd_window_init(float *window, float alpha, int n);
+void ff_kbd_window_init_fixed(int32_t *window, float alpha, int n);
 
 #endif /* AVCODEC_KBDWIN_H */
diff --git a/libavcodec/kgv1dec.c b/libavcodec/kgv1dec.c
index d58e775..b81ba75 100644
--- a/libavcodec/kgv1dec.c
+++ b/libavcodec/kgv1dec.c
@@ -2,20 +2,20 @@
  * Kega Game Video (KGV1) decoder
  * Copyright (c) 2010 Daniel Verkamp
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,7 +31,6 @@
 #include "internal.h"
 
 typedef struct {
-    AVCodecContext *avctx;
     uint16_t *frame_buffer;
     uint16_t *last_frame_buffer;
 } KgvContext;
@@ -52,7 +51,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     const uint8_t *buf_end = buf + avpkt->size;
     KgvContext * const c = avctx->priv_data;
     int offsets[8];
-    uint16_t *out, *prev;
+    uint8_t *out, *prev;
     int outcnt = 0, maxcnt;
     int w, h, i, res;
 
@@ -83,22 +82,21 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     if ((res = ff_get_buffer(avctx, frame, 0)) < 0)
         return res;
-    out  = c->frame_buffer;
-    prev = c->last_frame_buffer;
+    out  = (uint8_t*)c->frame_buffer;
+    prev = (uint8_t*)c->last_frame_buffer;
 
     for (i = 0; i < 8; i++)
         offsets[i] = -1;
 
-    while (outcnt < maxcnt && buf_end - 2 > buf) {
+    while (outcnt < maxcnt && buf_end - 2 >= buf) {
         int code = AV_RL16(buf);
         buf += 2;
 
         if (!(code & 0x8000)) {
-            out[outcnt++] = code; // rgb555 pixel coded directly
+            AV_WN16A(&out[2 * outcnt], code); // rgb555 pixel coded directly
+            outcnt++;
         } else {
             int count;
-            int inp_off;
-            uint16_t *inp;
 
             if ((code & 0x6000) == 0x6000) {
                 // copy from previous frame
@@ -116,7 +114,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
                 start = (outcnt + offsets[oidx]) % maxcnt;
 
-                if (maxcnt - start < count)
+                if (maxcnt - start < count || maxcnt - outcnt < count)
                     break;
 
                 if (!prev) {
@@ -125,8 +123,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                     break;
                 }
 
-                inp = prev;
-                inp_off = start;
+                memcpy(out + 2 * outcnt, prev + 2 * start, 2 * count);
             } else {
                 // copy from earlier in this frame
                 int offset = (code & 0x1FFF) + 1;
@@ -141,19 +138,12 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                     count = 4 + *buf++;
                 }
 
-                if (outcnt < offset)
+                if (outcnt < offset || maxcnt - outcnt < count)
                     break;
 
-                inp = out;
-                inp_off = outcnt - offset;
-            }
-
-            if (maxcnt - outcnt < count)
-                break;
-
-            for (i = inp_off; i < count + inp_off; i++) {
-                out[outcnt++] = inp[i];
+                av_memcpy_backptr(out + 2 * outcnt, 2 * offset, 2 * count);
             }
+            outcnt += count;
         }
     }
 
@@ -172,9 +162,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
 static av_cold int decode_init(AVCodecContext *avctx)
 {
-    KgvContext * const c = avctx->priv_data;
-
-    c->avctx = avctx;
     avctx->pix_fmt = AV_PIX_FMT_RGB555;
 
     return 0;
diff --git a/libavcodec/kmvc.c b/libavcodec/kmvc.c
index 5da8bb2..f879c35 100644
--- a/libavcodec/kmvc.c
+++ b/libavcodec/kmvc.c
@@ -2,20 +2,20 @@
  * KMVC decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -107,6 +107,10 @@ static int kmvc_decode_intra_8x8(KmvcContext * ctx, int w, int h)
                             val = bytestream2_get_byte(&ctx->g);
                             mx = val & 0xF;
                             my = val >> 4;
+                            if ((l0x-mx) + 320*(l0y-my) < 0 || (l0x-mx) + 320*(l0y-my) > 320*197 - 4) {
+                                av_log(ctx->avctx, AV_LOG_ERROR, "Invalid MV\n");
+                                return AVERROR_INVALIDDATA;
+                            }
                             for (j = 0; j < 16; j++)
                                 BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) =
                                     BLK(ctx->cur, l0x + (j & 3) - mx, l0y + (j >> 2) - my);
@@ -128,6 +132,10 @@ static int kmvc_decode_intra_8x8(KmvcContext * ctx, int w, int h)
                                     val = bytestream2_get_byte(&ctx->g);
                                     mx = val & 0xF;
                                     my = val >> 4;
+                                    if ((l1x-mx) + 320*(l1y-my) < 0 || (l1x-mx) + 320*(l1y-my) > 320*199 - 2) {
+                                        av_log(ctx->avctx, AV_LOG_ERROR, "Invalid MV\n");
+                                        return AVERROR_INVALIDDATA;
+                                    }
                                     BLK(ctx->cur, l1x, l1y) = BLK(ctx->cur, l1x - mx, l1y - my);
                                     BLK(ctx->cur, l1x + 1, l1y) =
                                         BLK(ctx->cur, l1x + 1 - mx, l1y - my);
@@ -199,6 +207,10 @@ static int kmvc_decode_inter_8x8(KmvcContext * ctx, int w, int h)
                             val = bytestream2_get_byte(&ctx->g);
                             mx = (val & 0xF) - 8;
                             my = (val >> 4) - 8;
+                            if ((l0x+mx) + 320*(l0y+my) < 0 || (l0x+mx) + 320*(l0y+my) > 320*197 - 4) {
+                                av_log(ctx->avctx, AV_LOG_ERROR, "Invalid MV\n");
+                                return AVERROR_INVALIDDATA;
+                            }
                             for (j = 0; j < 16; j++)
                                 BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) =
                                     BLK(ctx->prev, l0x + (j & 3) + mx, l0y + (j >> 2) + my);
@@ -220,6 +232,10 @@ static int kmvc_decode_inter_8x8(KmvcContext * ctx, int w, int h)
                                     val = bytestream2_get_byte(&ctx->g);
                                     mx = (val & 0xF) - 8;
                                     my = (val >> 4) - 8;
+                                    if ((l1x+mx) + 320*(l1y+my) < 0 || (l1x+mx) + 320*(l1y+my) > 320*199 - 2) {
+                                        av_log(ctx->avctx, AV_LOG_ERROR, "Invalid MV\n");
+                                        return AVERROR_INVALIDDATA;
+                                    }
                                     BLK(ctx->cur, l1x, l1y) = BLK(ctx->prev, l1x + mx, l1y + my);
                                     BLK(ctx->cur, l1x + 1, l1y) =
                                         BLK(ctx->prev, l1x + 1 + mx, l1y + my);
@@ -256,10 +272,8 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame,
 
     bytestream2_init(&ctx->g, avpkt->data, avpkt->size);
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     header = bytestream2_get_byte(&ctx->g);
 
@@ -267,7 +281,7 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame,
     if (bytestream2_peek_byte(&ctx->g) == 127) {
         bytestream2_skip(&ctx->g, 3);
         for (i = 0; i < 127; i++) {
-            ctx->pal[i + (header & 0x81)] = bytestream2_get_be24(&ctx->g);
+            ctx->pal[i + (header & 0x81)] = 0xFFU << 24 | bytestream2_get_be24(&ctx->g);
             bytestream2_skip(&ctx->g, 1);
         }
         bytestream2_seek(&ctx->g, -127 * 4 - 3, SEEK_CUR);
@@ -285,7 +299,7 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame,
         frame->palette_has_changed = 1;
         // palette starts from index 1 and has 127 entries
         for (i = 1; i <= ctx->palsize; i++) {
-            ctx->pal[i] = bytestream2_get_be24(&ctx->g);
+            ctx->pal[i] = 0xFFU << 24 | bytestream2_get_be24(&ctx->g);
         }
     }
 
@@ -369,7 +383,7 @@ static av_cold int decode_init(AVCodecContext * avctx)
     c->prev = c->frm1;
 
     for (i = 0; i < 256; i++) {
-        c->pal[i] = i * 0x10101;
+        c->pal[i] = 0xFFU << 24 | i * 0x10101;
     }
 
     if (avctx->extradata_size < 12) {
@@ -378,7 +392,8 @@ static av_cold int decode_init(AVCodecContext * avctx)
         c->palsize = 127;
     } else {
         c->palsize = AV_RL16(avctx->extradata + 10);
-        if (c->palsize >= MAX_PALSIZE) {
+        if (c->palsize >= (unsigned)MAX_PALSIZE) {
+            c->palsize = 127;
             av_log(avctx, AV_LOG_ERROR, "KMVC palette too large\n");
             return AVERROR_INVALIDDATA;
         }
diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index e9b0098..a08d7fd 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -2,20 +2,20 @@
  * Lagarith lossless decoder
  * Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -160,8 +160,8 @@ static int lag_read_prob_header(lag_rac *rac, GetBitContext *gb)
                 av_log(rac->avctx, AV_LOG_ERROR, "Invalid probability run encountered.\n");
                 return -1;
             }
-            if (prob > 257 - i)
-                prob = 257 - i;
+            if (prob > 256 - i)
+                prob = 256 - i;
             for (j = 0; j < prob; j++)
                 rac->prob[++i] = 0;
         }
@@ -177,7 +177,15 @@ static int lag_read_prob_header(lag_rac *rac, GetBitContext *gb)
 
     if (cumul_prob & (cumul_prob - 1)) {
         uint64_t mul = softfloat_reciprocal(cumul_prob);
-        for (i = 1; i < 257; i++) {
+        for (i = 1; i <= 128; i++) {
+            rac->prob[i] = softfloat_mul(rac->prob[i], mul);
+            scaled_cumul_prob += rac->prob[i];
+        }
+        if (scaled_cumul_prob <= 0) {
+            av_log(rac->avctx, AV_LOG_ERROR, "Scaled probabilities invalid\n");
+            return AVERROR_INVALIDDATA;
+        }
+        for (; i < 257; i++) {
             rac->prob[i] = softfloat_mul(rac->prob[i], mul);
             scaled_cumul_prob += rac->prob[i];
         }
@@ -251,11 +259,8 @@ static void lag_pred_line(LagarithContext *l, uint8_t *buf,
     int L, TL;
 
     if (!line) {
-        int i, align_width = (width - 1) & ~31;
         /* Left prediction only for first line */
-        L = l->hdsp.add_hfyu_left_pred(buf + 1, buf + 1, align_width, buf[0]);
-        for (i = align_width + 1; i < width; i++)
-            buf[i] += buf[i - 1];
+        L = l->hdsp.add_hfyu_left_pred(buf, buf, width, 0);
     } else {
         /* Left pixel is actually prev_row[width] */
         L = buf[width - stride - 1];
@@ -281,18 +286,12 @@ static void lag_pred_line_yuy2(LagarithContext *l, uint8_t *buf,
     int L, TL;
 
     if (!line) {
-        int i, align_width;
-        if (is_luma) {
-            buf++;
-            width--;
-        }
-
-        align_width = (width - 1) & ~31;
-        l->hdsp.add_hfyu_left_pred(buf + 1, buf + 1, align_width, buf[0]);
-
-        for (i = align_width + 1; i < width; i++)
-            buf[i] += buf[i - 1];
-
+        L= buf[0];
+        if (is_luma)
+            buf[0] = 0;
+        l->hdsp.add_hfyu_left_pred(buf, buf, width, 0);
+        if (is_luma)
+            buf[0] = L;
         return;
     }
     if (line == 1) {
@@ -371,6 +370,10 @@ static int lag_decode_zero_run_line(LagarithContext *l, uint8_t *dst,
     uint8_t mask2 = -(esc_count < 3);
     uint8_t *end = dst + (width - 2);
 
+    avpriv_request_sample(l->avctx, "zero_run_line");
+
+    memset(dst, 0, width);
+
 output_zeros:
     if (l->zeros_rem) {
         count = FFMIN(l->zeros_rem, width - i);
@@ -388,7 +391,7 @@ output_zeros:
         i = 0;
         while (!zero_run && dst + i < end) {
             i++;
-            if (src + i >= src_end)
+            if (i+2 >= src_end - src)
                 return AVERROR_INVALIDDATA;
             zero_run =
                 !(src[i] | (src[i + 1] & mask1) | (src[i + 2] & mask2));
@@ -408,7 +411,7 @@ output_zeros:
             dst += i;
         }
     }
-    return src_start - src;
+    return  src - src_start;
 }
 
 
@@ -421,22 +424,30 @@ static int lag_decode_arith_plane(LagarithContext *l, uint8_t *dst,
     int read = 0;
     uint32_t length;
     uint32_t offset = 1;
-    int esc_count = src[0];
+    int esc_count;
     GetBitContext gb;
     lag_rac rac;
     const uint8_t *src_end = src + src_size;
+    int ret;
 
     rac.avctx = l->avctx;
     l->zeros = 0;
 
+    if(src_size < 2)
+        return AVERROR_INVALIDDATA;
+
+    esc_count = src[0];
     if (esc_count < 4) {
         length = width * height;
+        if(src_size < 5)
+            return AVERROR_INVALIDDATA;
         if (esc_count && AV_RL32(src + 1) < length) {
             length = AV_RL32(src + 1);
             offset += 4;
         }
 
-        init_get_bits(&gb, src + offset, src_size * 8);
+        if ((ret = init_get_bits8(&gb, src + offset, src_size - offset)) < 0)
+            return ret;
 
         if (lag_read_prob_header(&rac, &gb) < 0)
             return -1;
@@ -453,6 +464,8 @@ static int lag_decode_arith_plane(LagarithContext *l, uint8_t *dst,
                    length);
     } else if (esc_count < 8) {
         esc_count -= 4;
+        src ++;
+        src_size --;
         if (esc_count > 0) {
             /* Zero run coding only, no range coding. */
             for (i = 0; i < height; i++) {
@@ -513,7 +526,7 @@ static int lag_decode_frame(AVCodecContext *avctx,
                             void *data, int *got_frame, AVPacket *avpkt)
 {
     const uint8_t *buf = avpkt->data;
-    int buf_size = avpkt->size;
+    unsigned int buf_size = avpkt->size;
     LagarithContext *l = avctx->priv_data;
     ThreadFrame frame = { .f = data };
     AVFrame *const p  = data;
@@ -522,6 +535,7 @@ static int lag_decode_frame(AVCodecContext *avctx,
     uint32_t offs[4];
     uint8_t *srcs[4], *dst;
     int i, j, planes = 3;
+    int ret;
 
     p->key_frame = 1;
 
@@ -533,18 +547,53 @@ static int lag_decode_frame(AVCodecContext *avctx,
     switch (frametype) {
     case FRAME_SOLID_RGBA:
         avctx->pix_fmt = AV_PIX_FMT_RGB32;
+    case FRAME_SOLID_GRAY:
+        if (frametype == FRAME_SOLID_GRAY)
+            if (avctx->bits_per_coded_sample == 24) {
+                avctx->pix_fmt = AV_PIX_FMT_RGB24;
+            } else {
+                avctx->pix_fmt = AV_PIX_FMT_0RGB32;
+                planes = 4;
+            }
 
-        if (ff_thread_get_buffer(avctx, &frame, 0) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-            return -1;
-        }
+        if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
+            return ret;
 
         dst = p->data[0];
+        if (frametype == FRAME_SOLID_RGBA) {
         for (j = 0; j < avctx->height; j++) {
             for (i = 0; i < avctx->width; i++)
                 AV_WN32(dst + i * 4, offset_gu);
             dst += p->linesize[0];
         }
+        } else {
+            for (j = 0; j < avctx->height; j++) {
+                memset(dst, buf[1], avctx->width * planes);
+                dst += p->linesize[0];
+            }
+        }
+        break;
+    case FRAME_SOLID_COLOR:
+        if (avctx->bits_per_coded_sample == 24) {
+            avctx->pix_fmt = AV_PIX_FMT_RGB24;
+        } else {
+            avctx->pix_fmt = AV_PIX_FMT_RGB32;
+            offset_gu |= 0xFFU << 24;
+        }
+
+        if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0)
+            return ret;
+
+        dst = p->data[0];
+        for (j = 0; j < avctx->height; j++) {
+            for (i = 0; i < avctx->width; i++)
+                if (avctx->bits_per_coded_sample == 24) {
+                    AV_WB24(dst + i * 3, offset_gu);
+                } else {
+                    AV_WN32(dst + i * 4, offset_gu);
+                }
+            dst += p->linesize[0];
+        }
         break;
     case FRAME_ARITH_RGBA:
         avctx->pix_fmt = AV_PIX_FMT_RGB32;
@@ -556,10 +605,8 @@ static int lag_decode_frame(AVCodecContext *avctx,
         if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24)
             avctx->pix_fmt = AV_PIX_FMT_RGB24;
 
-        if (ff_thread_get_buffer(avctx, &frame, 0) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-            return -1;
-        }
+        if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
+            return ret;
 
         offs[0] = offset_bv;
         offs[1] = offset_gu;
@@ -574,14 +621,13 @@ static int lag_decode_frame(AVCodecContext *avctx,
         }
         for (i = 0; i < planes; i++)
             srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride;
-        if (offset_ry >= buf_size ||
-            offset_gu >= buf_size ||
-            offset_bv >= buf_size ||
-            (planes == 4 && offs[3] >= buf_size)) {
-            av_log(avctx, AV_LOG_ERROR,
-                    "Invalid frame offsets\n");
-            return AVERROR_INVALIDDATA;
-        }
+        for (i = 0; i < planes; i++)
+            if (buf_size <= offs[i]) {
+                av_log(avctx, AV_LOG_ERROR,
+                        "Invalid frame offsets\n");
+                return AVERROR_INVALIDDATA;
+            }
+
         for (i = 0; i < planes; i++)
             lag_decode_arith_plane(l, srcs[i],
                                    avctx->width, avctx->height,
@@ -615,10 +661,8 @@ static int lag_decode_frame(AVCodecContext *avctx,
     case FRAME_ARITH_YUY2:
         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
 
-        if (ff_thread_get_buffer(avctx, &frame, 0) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-            return -1;
-        }
+        if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
+            return ret;
 
         if (offset_ry >= buf_size ||
             offset_gu >= buf_size ||
@@ -641,9 +685,10 @@ static int lag_decode_frame(AVCodecContext *avctx,
     case FRAME_ARITH_YV12:
         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
 
-        if (ff_thread_get_buffer(avctx, &frame, 0) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-            return -1;
+        if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
+            return ret;
+        if (buf_size <= offset_ry || buf_size <= offset_gu || buf_size <= offset_bv) {
+            return AVERROR_INVALIDDATA;
         }
 
         if (offset_ry >= buf_size ||
@@ -667,7 +712,7 @@ static int lag_decode_frame(AVCodecContext *avctx,
     default:
         av_log(avctx, AV_LOG_ERROR,
                "Unsupported Lagarith frame type: %#"PRIx8"\n", frametype);
-        return -1;
+        return AVERROR_PATCHWELCOME;
     }
 
     *got_frame = 1;
diff --git a/libavcodec/lagarithrac.c b/libavcodec/lagarithrac.c
index edfb18f..37ac2cf 100644
--- a/libavcodec/lagarithrac.c
+++ b/libavcodec/lagarithrac.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
  * Copyright (c) 2009 David Conrad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -41,19 +41,16 @@ void ff_lag_rac_init(lag_rac *l, GetBitContext *gb, int length)
     left                = get_bits_left(gb) >> 3;
     l->bytestream_start =
     l->bytestream       = gb->buffer + get_bits_count(gb) / 8;
-    l->bytestream_end   = l->bytestream_start + FFMIN(length, left);
+    l->bytestream_end   = l->bytestream_start + left;
 
     l->range        = 0x80;
     l->low          = *l->bytestream >> 1;
-    l->hash_shift   = FFMAX(l->scale - 8, 0);
+    l->hash_shift   = FFMAX((int)l->scale - 10, 0);
 
-    for (i = j = 0; i < 256; i++) {
+    for (i = j = 0; i < 1024; i++) {
         unsigned r = i << l->hash_shift;
         while (l->prob[j + 1] <= r)
             j++;
         l->range_hash[i] = j;
     }
-
-    /* Add conversion factor to hash_shift so we don't have to in lag_get_rac. */
-    l->hash_shift += 23;
 }
diff --git a/libavcodec/lagarithrac.h b/libavcodec/lagarithrac.h
index e4f066e..dfdfea0 100644
--- a/libavcodec/lagarithrac.h
+++ b/libavcodec/lagarithrac.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
  * Copyright (c) 2009 David Conrad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -48,7 +48,7 @@ typedef struct lag_rac {
     const uint8_t *bytestream_end;    /**< End position of input bytestream. */
 
     uint32_t prob[258];         /**< Table of cumulative probability for each symbol. */
-    uint8_t  range_hash[256];   /**< Hash table mapping upper byte to approximate symbol. */
+    uint8_t  range_hash[1024];   /**< Hash table mapping upper byte to approximate symbol. */
 } lag_rac;
 
 void ff_lag_rac_init(lag_rac *l, GetBitContext *gb, int length);
@@ -72,9 +72,8 @@ static inline void lag_rac_refill(lag_rac *l)
  */
 static inline uint8_t lag_get_rac(lag_rac *l)
 {
-    unsigned range_scaled, low_scaled, div;
+    unsigned range_scaled, low_scaled;
     int val;
-    uint8_t shift;
 
     lag_rac_refill(l);
 
@@ -85,18 +84,9 @@ static inline uint8_t lag_get_rac(lag_rac *l)
         if (l->low < range_scaled * l->prob[1]) {
             val = 0;
         } else {
-            /* FIXME __builtin_clz is ~20% faster here, but not allowed in generic code. */
-            shift = 30 - av_log2(range_scaled);
-            div = ((range_scaled << shift) + (1 << 23) - 1) >> 23;
-            /* low>>24 ensures that any cases too big for exact FASTDIV are
-             * under- rather than over-estimated
-             */
-            low_scaled = FASTDIV(l->low - (l->low >> 24), div);
-            shift -= l->hash_shift;
-            shift &= 31;
-            low_scaled = (low_scaled << shift) | (low_scaled >> (32 - shift));
-            /* low_scaled is now a lower bound of low/range_scaled */
-            val = l->range_hash[(uint8_t) low_scaled];
+            low_scaled = l->low / (range_scaled<<(l->hash_shift));
+
+            val = l->range_hash[low_scaled];
             while (l->low >= range_scaled * l->prob[val + 1])
                 val++;
         }
diff --git a/libavcodec/latm_parser.c b/libavcodec/latm_parser.c
index 6fdb897..3820f58 100644
--- a/libavcodec/latm_parser.c
+++ b/libavcodec/latm_parser.c
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2008 Paul Kendall <paul@kcbbs.gen.nz>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -50,7 +50,6 @@ static int latm_find_frame_end(AVCodecParserContext *s1, const uint8_t *buf,
     pic_found = pc->frame_start_found;
     state     = pc->state;
 
-    i = 0;
     if (!pic_found) {
         for (i = 0; i < buf_size; i++) {
             state = (state<<8) | buf[i];
diff --git a/libavcodec/lcl.h b/libavcodec/lcl.h
index 4e7e170..b60c0e9 100644
--- a/libavcodec/lcl.h
+++ b/libavcodec/lcl.h
@@ -2,20 +2,20 @@
  * LCL (LossLess Codec Library) Codec
  * Copyright (c) 2002-2004 Roberto Togni
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/lcldec.c b/libavcodec/lcldec.c
index 8923341..bfab58c 100644
--- a/libavcodec/lcldec.c
+++ b/libavcodec/lcldec.c
@@ -2,20 +2,20 @@
  * LCL (LossLess Codec Library) Codec
  * Copyright (c) 2002-2004 Roberto Togni
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -42,6 +42,7 @@
 #include <stdlib.h>
 
 #include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -95,7 +96,13 @@ static unsigned int mszh_decomp(const unsigned char * srcptr, int srclen, unsign
             ofs = FFMIN(ofs, destptr - destptr_bak);
             cnt *= 4;
             cnt = FFMIN(cnt, destptr_end - destptr);
-            av_memcpy_backptr(destptr, ofs, cnt);
+            if (ofs) {
+                av_memcpy_backptr(destptr, ofs, cnt);
+            } else {
+                // Not known what the correct behaviour is, but
+                // this at least avoids uninitialized data.
+                memset(destptr, 0, cnt);
+            }
             destptr += cnt;
         }
         maskbit >>= 1;
@@ -132,7 +139,7 @@ static int zlib_decomp(AVCodecContext *avctx, const uint8_t *src, int src_len, i
         av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret);
         return AVERROR_UNKNOWN;
     }
-    c->zstream.next_in = src;
+    c->zstream.next_in = (uint8_t *)src;
     c->zstream.avail_in = src_len;
     c->zstream.next_out = c->decomp_buf + offset;
     c->zstream.avail_out = c->decomp_size - offset;
@@ -174,10 +181,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     unsigned int mthread_inlen, mthread_outlen;
     unsigned int len = buf_size;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     outptr = frame->data[0]; // Output image pointer
 
@@ -186,8 +191,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     case AV_CODEC_ID_MSZH:
         switch (c->compression) {
         case COMP_MSZH:
-            if (c->flags & FLAG_MULTITHREAD) {
+            if (c->imgtype == IMGTYPE_RGB24 && len == width * height * 3) {
+                ;
+            } else if (c->flags & FLAG_MULTITHREAD) {
                 mthread_inlen = AV_RL32(buf);
+                if (len < 8) {
+                    av_log(avctx, AV_LOG_ERROR, "len %d is too small\n", len);
+                    return AVERROR_INVALIDDATA;
+                }
                 mthread_inlen = FFMIN(mthread_inlen, len - 8);
                 mthread_outlen = AV_RL32(buf + 4);
                 mthread_outlen = FFMIN(mthread_outlen, c->decomp_size);
@@ -471,6 +482,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     unsigned int max_basesize = FFALIGN(avctx->width,  4) *
                                 FFALIGN(avctx->height, 4);
     unsigned int max_decomp_size;
+    int subsample_h, subsample_v;
 
     if (avctx->extradata_size < 8) {
         av_log(avctx, AV_LOG_ERROR, "Extradata size too small.\n");
@@ -496,6 +508,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
         max_decomp_size = max_basesize * 2;
         avctx->pix_fmt = AV_PIX_FMT_YUV422P;
         av_log(avctx, AV_LOG_DEBUG, "Image type is YUV 4:2:2.\n");
+        if (avctx->width % 4) {
+            avpriv_request_sample(avctx, "Unsupported dimensions\n");
+            return AVERROR_INVALIDDATA;
+        }
         break;
     case IMGTYPE_RGB24:
         c->decomp_size = basesize * 3;
@@ -526,6 +542,12 @@ static av_cold int decode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
+    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &subsample_h, &subsample_v);
+    if (avctx->width % (1<<subsample_h) || avctx->height % (1<<subsample_v)) {
+        avpriv_request_sample(avctx, "Unsupported dimensions\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* Detect compression method */
     c->compression = (int8_t)avctx->extradata[5];
     switch (avctx->codec_id) {
diff --git a/libavcodec/lclenc.c b/libavcodec/lclenc.c
index 878d1e1..201d357 100644
--- a/libavcodec/lclenc.c
+++ b/libavcodec/lclenc.c
@@ -2,20 +2,20 @@
  * LCL (LossLess Codec Library) Codec
  * Copyright (c) 2002-2004 Roberto Togni
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -41,7 +41,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#include "libavutil/avassert.h"
 #include "avcodec.h"
+#include "internal.h"
 #include "lcl.h"
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
@@ -70,19 +72,15 @@ typedef struct LclEncContext {
  *
  */
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                        const AVFrame *pict, int *got_packet)
+                        const AVFrame *p, int *got_packet)
 {
     LclEncContext *c = avctx->priv_data;
-    const AVFrame * const p = pict;
     int i, ret;
     int zret; // Zlib return code
     int max_size = deflateBound(&c->zstream, avctx->width * avctx->height * 3);
 
-    if (!pkt->data &&
-        (ret = av_new_packet(pkt, max_size)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Error allocating packet of size %d.\n", max_size);
-            return ret;
-    }
+    if ((ret = ff_alloc_packet2(avctx, pkt, max_size)) < 0)
+        return ret;
 
     if(avctx->pix_fmt != AV_PIX_FMT_BGR24){
         av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
@@ -131,9 +129,11 @@ static av_cold int encode_init(AVCodecContext *avctx)
 
     c->avctx= avctx;
 
-    assert(avctx->width && avctx->height);
+    av_assert0(avctx->width && avctx->height);
 
-    avctx->extradata= av_mallocz(8);
+    avctx->extradata = av_mallocz(8 + FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!avctx->extradata)
+        return AVERROR(ENOMEM);
 
     avctx->coded_frame = av_frame_alloc();
     if (!avctx->coded_frame)
@@ -142,8 +142,9 @@ static av_cold int encode_init(AVCodecContext *avctx)
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
 
-    // Will be user settable someday
-    c->compression = 6;
+    c->compression = avctx->compression_level == FF_COMPRESSION_DEFAULT ?
+                            COMP_ZLIB_NORMAL :
+                            av_clip(avctx->compression_level, 0, 9);
     c->flags = 0;
     c->imgtype = IMGTYPE_RGB24;
     avctx->bits_per_coded_sample= 24;
@@ -196,5 +197,6 @@ AVCodec ff_zlib_encoder = {
     .init           = encode_init,
     .encode2        = encode_frame,
     .close          = encode_end,
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_BGR24, AV_PIX_FMT_NONE },
 };
diff --git a/libavcodec/libaacplus.c b/libavcodec/libaacplus.c
new file mode 100644
index 0000000..545e240
--- /dev/null
+++ b/libavcodec/libaacplus.c
@@ -0,0 +1,141 @@
+/*
+ * Interface to libaacplus for aac+ (sbr+ps) encoding
+ * Copyright (c) 2010 tipok <piratfm@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Interface to libaacplus for aac+ (sbr+ps) encoding.
+ */
+
+#include <aacplus.h>
+
+#include "avcodec.h"
+#include "internal.h"
+
+typedef struct aacPlusAudioContext {
+    aacplusEncHandle aacplus_handle;
+    unsigned long max_output_bytes;
+    unsigned long samples_input;
+} aacPlusAudioContext;
+
+static av_cold int aacPlus_encode_init(AVCodecContext *avctx)
+{
+    aacPlusAudioContext *s = avctx->priv_data;
+    aacplusEncConfiguration *aacplus_cfg;
+
+    /* number of channels */
+    if (avctx->channels < 1 || avctx->channels > 2) {
+        av_log(avctx, AV_LOG_ERROR, "encoding %d channel(s) is not allowed\n", avctx->channels);
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->profile != FF_PROFILE_AAC_LOW && avctx->profile != FF_PROFILE_UNKNOWN) {
+        av_log(avctx, AV_LOG_ERROR, "invalid AAC profile: %d, only LC supported\n", avctx->profile);
+        return AVERROR(EINVAL);
+    }
+
+    s->aacplus_handle = aacplusEncOpen(avctx->sample_rate, avctx->channels,
+                                       &s->samples_input, &s->max_output_bytes);
+    if (!s->aacplus_handle) {
+        av_log(avctx, AV_LOG_ERROR, "can't open encoder\n");
+        return AVERROR(EINVAL);
+    }
+
+    /* check aacplus version */
+    aacplus_cfg = aacplusEncGetCurrentConfiguration(s->aacplus_handle);
+
+    aacplus_cfg->bitRate = avctx->bit_rate;
+    aacplus_cfg->bandWidth = avctx->cutoff;
+    aacplus_cfg->outputFormat = !(avctx->flags & CODEC_FLAG_GLOBAL_HEADER);
+    aacplus_cfg->inputFormat = avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? AACPLUS_INPUT_FLOAT : AACPLUS_INPUT_16BIT;
+    if (!aacplusEncSetConfiguration(s->aacplus_handle, aacplus_cfg)) {
+        av_log(avctx, AV_LOG_ERROR, "libaacplus doesn't support this output format!\n");
+        return AVERROR(EINVAL);
+    }
+
+    avctx->frame_size = s->samples_input / avctx->channels;
+
+    /* Set decoder specific info */
+    avctx->extradata_size = 0;
+    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+
+        unsigned char *buffer = NULL;
+        unsigned long decoder_specific_info_size;
+
+        if (aacplusEncGetDecoderSpecificInfo(s->aacplus_handle, &buffer,
+                                           &decoder_specific_info_size) == 1) {
+            avctx->extradata = av_malloc(decoder_specific_info_size + FF_INPUT_BUFFER_PADDING_SIZE);
+            avctx->extradata_size = decoder_specific_info_size;
+            memcpy(avctx->extradata, buffer, avctx->extradata_size);
+        }
+        free(buffer);
+    }
+    return 0;
+}
+
+static int aacPlus_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                                const AVFrame *frame, int *got_packet)
+{
+    aacPlusAudioContext *s = avctx->priv_data;
+    int32_t *input_buffer = (int32_t *)frame->data[0];
+    int ret;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->max_output_bytes)) < 0)
+        return ret;
+
+    pkt->size = aacplusEncEncode(s->aacplus_handle, input_buffer,
+                                 s->samples_input, pkt->data, pkt->size);
+    *got_packet   = 1;
+    pkt->pts      = frame->pts;
+    return 0;
+}
+
+static av_cold int aacPlus_encode_close(AVCodecContext *avctx)
+{
+    aacPlusAudioContext *s = avctx->priv_data;
+
+    av_freep(&avctx->extradata);
+    aacplusEncClose(s->aacplus_handle);
+
+    return 0;
+}
+
+static const AVProfile profiles[] = {
+    { FF_PROFILE_AAC_LOW, "LC" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+AVCodec ff_libaacplus_encoder = {
+    .name           = "libaacplus",
+    .long_name      = NULL_IF_CONFIG_SMALL("libaacplus AAC+ (Advanced Audio Codec with SBR+PS)"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_AAC,
+    .priv_data_size = sizeof(aacPlusAudioContext),
+    .init           = aacPlus_encode_init,
+    .encode2        = aacPlus_encode_frame,
+    .close          = aacPlus_encode_close,
+    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
+                                                     AV_SAMPLE_FMT_FLT,
+                                                     AV_SAMPLE_FMT_NONE },
+    .profiles       = profiles,
+    .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                            AV_CH_LAYOUT_STEREO,
+                                            0 },
+};
diff --git a/libavcodec/libavcodec.v b/libavcodec/libavcodec.v
index bf14807..b4bd2c8 100644
--- a/libavcodec/libavcodec.v
+++ b/libavcodec/libavcodec.v
@@ -1,4 +1,33 @@
 LIBAVCODEC_$MAJOR {
         global: av*;
+                #deprecated, remove after next bump
+                audio_resample;
+                audio_resample_close;
+                ff_find_pix_fmt;
+                ff_framenum_to_drop_timecode;
+                ff_framenum_to_smtpe_timecode;
+                ff_raw_pix_fmt_tags;
+                ff_init_smtpe_timecode;
+                ff_fft*;
+                ff_mdct*;
+                ff_dct*;
+                ff_rdft*;
+                ff_prores_idct_put_10_sse2;
+                ff_simple_idct*;
+                ff_aanscales;
+                ff_faan*;
+                ff_mmx_idct;
+                ff_fdct*;
+                fdct_ifast;
+                j_rev_dct;
+                ff_mmxext_idct;
+                ff_idct_xvid*;
+                ff_jpeg_fdct*;
+                ff_dnxhd_get_cid_table;
+                ff_dnxhd_cid_table;
+                ff_idctdsp_init;
+                ff_fdctdsp_init;
+                ff_pixblockdsp_init;
+                ff_me_cmp_init;
         local:  *;
 };
diff --git a/libavcodec/libcelt_dec.c b/libavcodec/libcelt_dec.c
new file mode 100644
index 0000000..4e62fe5
--- /dev/null
+++ b/libavcodec/libcelt_dec.c
@@ -0,0 +1,140 @@
+/*
+ * Xiph CELT decoder using libcelt
+ * Copyright (c) 2011 Nicolas George
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <celt/celt.h>
+#include <celt/celt_header.h>
+#include "avcodec.h"
+#include "internal.h"
+#include "libavutil/intreadwrite.h"
+
+struct libcelt_context {
+    CELTMode *mode;
+    CELTDecoder *dec;
+    int discard;
+};
+
+static int ff_celt_error_to_averror(int err)
+{
+    switch (err) {
+        case CELT_BAD_ARG:          return AVERROR(EINVAL);
+#ifdef CELT_BUFFER_TOO_SMALL
+        case CELT_BUFFER_TOO_SMALL: return AVERROR(ENOBUFS);
+#endif
+        case CELT_INTERNAL_ERROR:   return AVERROR(EFAULT);
+        case CELT_CORRUPTED_DATA:   return AVERROR_INVALIDDATA;
+        case CELT_UNIMPLEMENTED:    return AVERROR(ENOSYS);
+#ifdef ENOTRECOVERABLE
+        case CELT_INVALID_STATE:    return AVERROR(ENOTRECOVERABLE);
+#endif
+        case CELT_ALLOC_FAIL:       return AVERROR(ENOMEM);
+        default:                    return AVERROR(EINVAL);
+    }
+}
+
+static int ff_celt_bitstream_version_hack(CELTMode *mode)
+{
+    CELTHeader header = { .version_id = 0 };
+    celt_header_init(&header, mode, 960, 2);
+    return header.version_id;
+}
+
+static av_cold int libcelt_dec_init(AVCodecContext *c)
+{
+    struct libcelt_context *celt = c->priv_data;
+    int err;
+
+    if (!c->channels || !c->frame_size ||
+        c->frame_size > INT_MAX / sizeof(int16_t) / c->channels)
+        return AVERROR(EINVAL);
+    celt->mode = celt_mode_create(c->sample_rate, c->frame_size, &err);
+    if (!celt->mode)
+        return ff_celt_error_to_averror(err);
+    celt->dec = celt_decoder_create_custom(celt->mode, c->channels, &err);
+    if (!celt->dec) {
+        celt_mode_destroy(celt->mode);
+        return ff_celt_error_to_averror(err);
+    }
+    if (c->extradata_size >= 4) {
+        celt->discard = AV_RL32(c->extradata);
+        if (celt->discard < 0 || celt->discard >= c->frame_size) {
+            av_log(c, AV_LOG_WARNING,
+                   "Invalid overlap (%d), ignored.\n", celt->discard);
+            celt->discard = 0;
+        }
+    }
+    if (c->extradata_size >= 8) {
+        unsigned version = AV_RL32(c->extradata + 4);
+        unsigned lib_version = ff_celt_bitstream_version_hack(celt->mode);
+        if (version != lib_version)
+            av_log(c, AV_LOG_WARNING,
+                   "CELT bitstream version 0x%x may be "
+                   "improperly decoded by libcelt for version 0x%x.\n",
+                   version, lib_version);
+    }
+    c->sample_fmt = AV_SAMPLE_FMT_S16;
+    return 0;
+}
+
+static av_cold int libcelt_dec_close(AVCodecContext *c)
+{
+    struct libcelt_context *celt = c->priv_data;
+
+    celt_decoder_destroy(celt->dec);
+    celt_mode_destroy(celt->mode);
+    return 0;
+}
+
+static int libcelt_dec_decode(AVCodecContext *c, void *data,
+                              int *got_frame_ptr, AVPacket *pkt)
+{
+    struct libcelt_context *celt = c->priv_data;
+    AVFrame *frame = data;
+    int err;
+    int16_t *pcm;
+
+    frame->nb_samples = c->frame_size;
+    if ((err = ff_get_buffer(c, frame, 0)) < 0)
+        return err;
+    pcm = (int16_t *)frame->data[0];
+    err = celt_decode(celt->dec, pkt->data, pkt->size, pcm, c->frame_size);
+    if (err < 0)
+        return ff_celt_error_to_averror(err);
+    if (celt->discard) {
+        frame->nb_samples -= celt->discard;
+        memmove(pcm, pcm + celt->discard * c->channels,
+                frame->nb_samples * c->channels * sizeof(int16_t));
+        celt->discard = 0;
+    }
+    *got_frame_ptr = 1;
+    return pkt->size;
+}
+
+AVCodec ff_libcelt_decoder = {
+    .name           = "libcelt",
+    .long_name      = NULL_IF_CONFIG_SMALL("Xiph CELT decoder using libcelt"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_CELT,
+    .priv_data_size = sizeof(struct libcelt_context),
+    .init           = libcelt_dec_init,
+    .close          = libcelt_dec_close,
+    .decode         = libcelt_dec_decode,
+    .capabilities   = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/libfaac.c b/libavcodec/libfaac.c
index 9b5b11a..477669a 100644
--- a/libavcodec/libfaac.c
+++ b/libavcodec/libfaac.c
@@ -2,20 +2,20 @@
  * Interface to libfaac for aac encoding
  * Copyright (c) 2002 Gildas Bazin <gbazin@netcourrier.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -41,7 +41,6 @@ typedef struct FaacAudioContext {
     AudioFrameQueue afq;
 } FaacAudioContext;
 
-
 static av_cold int Faac_encode_close(AVCodecContext *avctx)
 {
     FaacAudioContext *s = avctx->priv_data;
@@ -152,9 +151,20 @@ static av_cold int Faac_encode_init(AVCodecContext *avctx)
     }
 
     if (!faacEncSetConfiguration(s->faac_handle, faac_cfg)) {
-        av_log(avctx, AV_LOG_ERROR, "libfaac doesn't support this output format!\n");
-        ret = AVERROR(EINVAL);
-        goto error;
+        int i;
+        for (i = avctx->bit_rate/1000; i ; i--) {
+            faac_cfg->bitRate = 1000*i / avctx->channels;
+            if (faacEncSetConfiguration(s->faac_handle, faac_cfg))
+                break;
+        }
+        if (!i) {
+            av_log(avctx, AV_LOG_ERROR, "libfaac doesn't support this output format!\n");
+            ret = AVERROR(EINVAL);
+            goto error;
+        } else {
+            avctx->bit_rate = 1000*i;
+            av_log(avctx, AV_LOG_WARNING, "libfaac doesn't support the specified bitrate, using %dkbit/s instead\n", i);
+        }
     }
 
     avctx->delay = FAAC_DELAY_SAMPLES;
@@ -174,10 +184,8 @@ static int Faac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     int num_samples  = frame ? frame->nb_samples : 0;
     void *samples    = frame ? frame->data[0]    : NULL;
 
-    if ((ret = ff_alloc_packet(avpkt, (7 + 768) * avctx->channels))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, (7 + 768) * avctx->channels)) < 0)
         return ret;
-    }
 
     bytes_written = faacEncEncode(s->faac_handle, samples,
                                   num_samples * avctx->channels,
diff --git a/libavcodec/libfdk-aacdec.c b/libavcodec/libfdk-aacdec.c
index 27e5712..624d579 100644
--- a/libavcodec/libfdk-aacdec.c
+++ b/libavcodec/libfdk-aacdec.c
@@ -2,7 +2,7 @@
  * AAC decoder wrapper
  * Copyright (c) 2012 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -223,10 +223,8 @@ static int fdk_aac_decode_frame(AVCodecContext *avctx, void *data,
 
     if (s->initialized) {
         frame->nb_samples = avctx->frame_size;
-        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "ff_get_buffer() failed\n");
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
             return ret;
-        }
         buf = frame->extended_data[0];
         buf_size = avctx->channels * frame->nb_samples *
                    av_get_bytes_per_sample(avctx->sample_fmt);
@@ -258,10 +256,8 @@ static int fdk_aac_decode_frame(AVCodecContext *avctx, void *data,
 
     if (tmpptr) {
         frame->nb_samples = avctx->frame_size;
-        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "ff_get_buffer() failed\n");
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
             goto end;
-        }
         memcpy(frame->extended_data[0], tmpptr,
                avctx->channels * avctx->frame_size *
                av_get_bytes_per_sample(avctx->sample_fmt));
diff --git a/libavcodec/libfdk-aacenc.c b/libavcodec/libfdk-aacenc.c
index 34717d4..b9c9a13 100644
--- a/libavcodec/libfdk-aacenc.c
+++ b/libavcodec/libfdk-aacenc.c
@@ -2,7 +2,7 @@
  * AAC encoder wrapper
  * Copyright (c) 2012 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -342,10 +342,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
 
     /* The maximum packet size is 6144 bits aka 768 bytes per channel. */
-    if ((ret = ff_alloc_packet(avpkt, FFMAX(8192, 768 * avctx->channels)))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, FFMAX(8192, 768 * avctx->channels))) < 0)
         return ret;
-    }
 
     out_ptr                   = avpkt->data;
     out_buffer_size           = avpkt->size;
diff --git a/libavcodec/libgsmdec.c b/libavcodec/libgsmdec.c
index da95cc7..8740108 100644
--- a/libavcodec/libgsmdec.c
+++ b/libavcodec/libgsmdec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2005 Alban Bedel <albeu@free.fr>
  * Copyright (c) 2006, 2007 Michel Bardiaux <mbardiaux@mediaxim.be>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -50,7 +50,8 @@ static av_cold int libgsm_decode_init(AVCodecContext *avctx) {
 
     avctx->channels       = 1;
     avctx->channel_layout = AV_CH_LAYOUT_MONO;
-    avctx->sample_rate    = 8000;
+    if (!avctx->sample_rate)
+        avctx->sample_rate = 8000;
     avctx->sample_fmt     = AV_SAMPLE_FMT_S16;
 
     s->state = gsm_create();
@@ -96,10 +97,8 @@ static int libgsm_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = avctx->frame_size;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (int16_t *)frame->data[0];
 
     for (i = 0; i < avctx->frame_size / GSM_FRAME_SIZE; i++) {
@@ -124,6 +123,7 @@ static void libgsm_flush(AVCodecContext *avctx) {
         gsm_option(s->state, GSM_OPT_WAV49, &one);
 }
 
+#if CONFIG_LIBGSM_DECODER
 AVCodec ff_libgsm_decoder = {
     .name           = "libgsm",
     .long_name      = NULL_IF_CONFIG_SMALL("libgsm GSM"),
@@ -136,7 +136,8 @@ AVCodec ff_libgsm_decoder = {
     .flush          = libgsm_flush,
     .capabilities   = CODEC_CAP_DR1,
 };
-
+#endif
+#if CONFIG_LIBGSM_MS_DECODER
 AVCodec ff_libgsm_ms_decoder = {
     .name           = "libgsm_ms",
     .long_name      = NULL_IF_CONFIG_SMALL("libgsm GSM Microsoft variant"),
@@ -149,3 +150,4 @@ AVCodec ff_libgsm_ms_decoder = {
     .flush          = libgsm_flush,
     .capabilities   = CODEC_CAP_DR1,
 };
+#endif
diff --git a/libavcodec/libgsmenc.c b/libavcodec/libgsmenc.c
index 8f51321..b06ec64 100644
--- a/libavcodec/libgsmenc.c
+++ b/libavcodec/libgsmenc.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2005 Alban Bedel <albeu@free.fr>
  * Copyright (c) 2006, 2007 Michel Bardiaux <mbardiaux@mediaxim.be>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -40,6 +40,12 @@
 #include "internal.h"
 #include "gsm.h"
 
+static av_cold int libgsm_encode_close(AVCodecContext *avctx) {
+    gsm_destroy(avctx->priv_data);
+    avctx->priv_data = NULL;
+    return 0;
+}
+
 static av_cold int libgsm_encode_init(AVCodecContext *avctx) {
     if (avctx->channels > 1) {
         av_log(avctx, AV_LOG_ERROR, "Mono required for GSM, got %d channels\n",
@@ -63,6 +69,8 @@ static av_cold int libgsm_encode_init(AVCodecContext *avctx) {
     }
 
     avctx->priv_data = gsm_create();
+    if (!avctx->priv_data)
+        goto error;
 
     switch(avctx->codec_id) {
     case AV_CODEC_ID_GSM:
@@ -78,12 +86,9 @@ static av_cold int libgsm_encode_init(AVCodecContext *avctx) {
     }
 
     return 0;
-}
-
-static av_cold int libgsm_encode_close(AVCodecContext *avctx) {
-    gsm_destroy(avctx->priv_data);
-    avctx->priv_data = NULL;
-    return 0;
+error:
+    libgsm_encode_close(avctx);
+    return -1;
 }
 
 static int libgsm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
@@ -93,10 +98,8 @@ static int libgsm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     gsm_signal *samples = (gsm_signal *)frame->data[0];
     struct gsm_state *state = avctx->priv_data;
 
-    if ((ret = ff_alloc_packet(avpkt, avctx->block_align))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, avctx->block_align)) < 0)
         return ret;
-    }
 
     switch(avctx->codec_id) {
     case AV_CODEC_ID_GSM:
@@ -112,6 +115,7 @@ static int libgsm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 }
 
 
+#if CONFIG_LIBGSM_ENCODER
 AVCodec ff_libgsm_encoder = {
     .name           = "libgsm",
     .long_name      = NULL_IF_CONFIG_SMALL("libgsm GSM"),
@@ -123,7 +127,8 @@ AVCodec ff_libgsm_encoder = {
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
 };
-
+#endif
+#if CONFIG_LIBGSM_MS_ENCODER
 AVCodec ff_libgsm_ms_encoder = {
     .name           = "libgsm_ms",
     .long_name      = NULL_IF_CONFIG_SMALL("libgsm GSM Microsoft variant"),
@@ -135,3 +140,4 @@ AVCodec ff_libgsm_ms_encoder = {
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
 };
+#endif
diff --git a/libavcodec/libilbc.c b/libavcodec/libilbc.c
index af693bd..898fe83 100644
--- a/libavcodec/libilbc.c
+++ b/libavcodec/libilbc.c
@@ -2,20 +2,20 @@
  * iLBC decoder/encoder stub
  * Copyright (c) 2012 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -51,7 +51,10 @@ static const AVOption ilbc_dec_options[] = {
 };
 
 static const AVClass ilbc_dec_class = {
-    "libilbc", av_default_item_name, ilbc_dec_options, LIBAVUTIL_VERSION_INT
+    .class_name = "libilbc",
+    .item_name  = av_default_item_name,
+    .option     = ilbc_dec_options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static av_cold int ilbc_decode_init(AVCodecContext *avctx)
@@ -90,10 +93,8 @@ static int ilbc_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     frame->nb_samples = s->decoder.blockl;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     WebRtcIlbcfix_DecodeImpl((WebRtc_Word16*) frame->data[0],
                              (const WebRtc_UWord16*) buf, &s->decoder, 1);
@@ -127,7 +128,10 @@ static const AVOption ilbc_enc_options[] = {
 };
 
 static const AVClass ilbc_enc_class = {
-    "libilbc", av_default_item_name, ilbc_enc_options, LIBAVUTIL_VERSION_INT
+    .class_name = "libilbc",
+    .item_name  = av_default_item_name,
+    .option     = ilbc_enc_options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
 static av_cold int ilbc_encode_init(AVCodecContext *avctx)
@@ -163,10 +167,8 @@ static int ilbc_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     ILBCEncContext *s = avctx->priv_data;
     int ret;
 
-    if ((ret = ff_alloc_packet(avpkt, 50))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 50)) < 0)
         return ret;
-    }
 
     WebRtcIlbcfix_EncodeImpl((WebRtc_UWord16*) avpkt->data, (const WebRtc_Word16*) frame->data[0], &s->encoder);
 
diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
index dee1909..fed82e2 100644
--- a/libavcodec/libmp3lame.c
+++ b/libavcodec/libmp3lame.c
@@ -2,20 +2,20 @@
  * Interface to libmp3lame for mp3 encoding
  * Copyright (c) 2002 Lennert Buytenhek <buytenh@gnu.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,7 +38,7 @@
 #include "mpegaudio.h"
 #include "mpegaudiodecheader.h"
 
-#define BUFFER_SIZE (7200 + 2 * MPA_FRAME_SIZE + MPA_FRAME_SIZE / 4)
+#define BUFFER_SIZE (7200 + 2 * MPA_FRAME_SIZE + MPA_FRAME_SIZE / 4+1000) // FIXME: Buffer size to small? Adding 1000 to make up for it.
 
 typedef struct LAMEContext {
     AVClass *class;
@@ -97,6 +97,7 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
     if ((s->gfp = lame_init()) == NULL)
         return AVERROR(ENOMEM);
 
+
     lame_set_num_channels(s->gfp, avctx->channels);
     lame_set_mode(s->gfp, avctx->channels > 1 ? s->joint_stereo ? JOINT_STEREO : STEREO : MONO);
 
@@ -208,6 +209,8 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         default:
             return AVERROR_BUG;
         }
+    } else if (!s->afq.frame_alloc) {
+        lame_result = 0;
     } else {
         lame_result = lame_encode_flush(s->gfp, s->buffer + s->buffer_index,
                                         s->buffer_size - s->buffer_index);
@@ -251,10 +254,8 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     av_dlog(avctx, "in:%d packet-len:%d index:%d\n", avctx->frame_size, len,
             s->buffer_index);
     if (len <= s->buffer_index) {
-        if ((ret = ff_alloc_packet(avpkt, len))) {
-            av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+        if ((ret = ff_alloc_packet2(avctx, avpkt, len)) < 0)
             return ret;
-        }
         memcpy(avpkt->data, s->buffer, len);
         s->buffer_index -= len;
         memmove(s->buffer, s->buffer + len, s->buffer_index);
@@ -272,9 +273,9 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 #define OFFSET(x) offsetof(LAMEContext, x)
 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "reservoir", "Use bit reservoir.", OFFSET(reservoir), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
-    { "joint_stereo", "Use joint stereo.", OFFSET(joint_stereo), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
-    { "abr", "Use ABR", OFFSET(abr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
+    { "reservoir",    "use bit reservoir", OFFSET(reservoir),    AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
+    { "joint_stereo", "use joint stereo",  OFFSET(joint_stereo), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
+    { "abr",          "use ABR",           OFFSET(abr),          AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
     { NULL },
 };
 
diff --git a/libavcodec/libopencore-amr.c b/libavcodec/libopencore-amr.c
index 6b45959..4c7b6af 100644
--- a/libavcodec/libopencore-amr.c
+++ b/libavcodec/libopencore-amr.c
@@ -2,20 +2,20 @@
  * AMR Audio decoder stub
  * Copyright (c) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,7 +31,8 @@ static int amr_decode_fix_avctx(AVCodecContext *avctx)
 {
     const int is_amr_wb = 1 + (avctx->codec_id == AV_CODEC_ID_AMR_WB);
 
-    avctx->sample_rate = 8000 * is_amr_wb;
+    if (!avctx->sample_rate)
+        avctx->sample_rate = 8000 * is_amr_wb;
 
     if (avctx->channels > 1) {
         avpriv_report_missing_feature(avctx, "multi-channel AMR");
@@ -103,10 +104,8 @@ static int amr_nb_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = 160;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     dec_mode    = (buf[0] >> 3) & 0x000F;
     packet_size = block_size[dec_mode] + 1;
@@ -181,7 +180,7 @@ static const AVOption options[] = {
     { NULL }
 };
 
-static const AVClass class = {
+static const AVClass amrnb_class = {
     "libopencore_amrnb", av_default_item_name, options, LIBAVUTIL_VERSION_INT
 };
 
@@ -189,7 +188,7 @@ static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
 {
     AMRContext *s = avctx->priv_data;
 
-    if (avctx->sample_rate != 8000) {
+    if (avctx->sample_rate != 8000 && avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
         av_log(avctx, AV_LOG_ERROR, "Only 8000Hz sample rate supported\n");
         return AVERROR(ENOSYS);
     }
@@ -206,7 +205,6 @@ static av_cold int amr_nb_encode_init(AVCodecContext *avctx)
     s->enc_state = Encoder_Interface_init(s->enc_dtx);
     if (!s->enc_state) {
         av_log(avctx, AV_LOG_ERROR, "Encoder_Interface_init error\n");
-        av_freep(&avctx->coded_frame);
         return -1;
     }
 
@@ -238,14 +236,12 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         s->enc_bitrate = avctx->bit_rate;
     }
 
-    if ((ret = ff_alloc_packet(avpkt, 32))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 32)) < 0)
         return ret;
-    }
 
     if (frame) {
         if (frame->nb_samples < avctx->frame_size) {
-            flush_buf = av_mallocz(avctx->frame_size * sizeof(*flush_buf));
+            flush_buf = av_mallocz_array(avctx->frame_size, sizeof(*flush_buf));
             if (!flush_buf)
                 return AVERROR(ENOMEM);
             memcpy(flush_buf, samples, frame->nb_samples * sizeof(*flush_buf));
@@ -260,7 +256,7 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     } else {
         if (s->enc_last_frame < 0)
             return 0;
-        flush_buf = av_mallocz(avctx->frame_size * sizeof(*flush_buf));
+        flush_buf = av_mallocz_array(avctx->frame_size, sizeof(*flush_buf));
         if (!flush_buf)
             return AVERROR(ENOMEM);
         samples = flush_buf;
@@ -270,7 +266,7 @@ static int amr_nb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     written = Encoder_Interface_Encode(s->enc_state, s->enc_mode, samples,
                                        avpkt->data, 0);
     av_dlog(avctx, "amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",
-            written, s->enc_mode, frame[0]);
+            written, s->enc_mode, avpkt->data[0]);
 
     /* Get the next frame pts/duration */
     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
@@ -294,7 +290,7 @@ AVCodec ff_libopencore_amrnb_encoder = {
     .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
-    .priv_class     = &class,
+    .priv_class     = &amrnb_class,
 };
 #endif /* CONFIG_LIBOPENCORE_AMRNB_ENCODER */
 
@@ -336,10 +332,8 @@ static int amr_wb_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = 320;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     mode        = (buf[0] >> 3) & 0x000F;
     packet_size = block_size[mode];
@@ -349,6 +343,10 @@ static int amr_wb_decode_frame(AVCodecContext *avctx, void *data,
                buf_size, packet_size + 1);
         return AVERROR_INVALIDDATA;
     }
+    if (!packet_size) {
+        av_log(avctx, AV_LOG_ERROR, "amr packet_size invalid\n");
+        return AVERROR_INVALIDDATA;
+    }
 
     D_IF_decode(s->state, buf, (short *)frame->data[0], _good_frame);
 
diff --git a/libavcodec/libopenjpegdec.c b/libavcodec/libopenjpegdec.c
index c7697a5..0cf46e6 100644
--- a/libavcodec/libopenjpegdec.c
+++ b/libavcodec/libopenjpegdec.c
@@ -2,20 +2,20 @@
  * JPEG 2000 decoding support via OpenJPEG
  * Copyright (c) 2009 Jaikrishnan Menon <realityman@gmx.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,7 +25,6 @@
  */
 
 #define  OPJ_STATIC
-#include <openjpeg.h>
 
 #include "libavutil/common.h"
 #include "libavutil/imgutils.h"
@@ -37,6 +36,12 @@
 #include "internal.h"
 #include "thread.h"
 
+#if HAVE_OPENJPEG_1_5_OPENJPEG_H
+# include <openjpeg-1.5/openjpeg.h>
+#else
+# include <openjpeg.h>
+#endif
+
 #define JP2_SIG_TYPE    0x6A502020
 #define JP2_SIG_VALUE   0x0D0A870A
 
@@ -48,70 +53,69 @@
 #define GRAY_PIXEL_FORMATS AV_PIX_FMT_GRAY8, AV_PIX_FMT_YA8,                  \
                            AV_PIX_FMT_GRAY16
 
-#define YUV_PIXEL_FORMATS  AV_PIX_FMT_YUV410P,   AV_PIX_FMT_YUV411P,          \
-                           AV_PIX_FMT_YUVA420P,                               \
-                           AV_PIX_FMT_YUV420P,   AV_PIX_FMT_YUV422P,          \
-                           AV_PIX_FMT_YUV440P,   AV_PIX_FMT_YUV444P,          \
-                           AV_PIX_FMT_YUV420P9,  AV_PIX_FMT_YUV422P9,         \
-                           AV_PIX_FMT_YUV444P9,                               \
-                           AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10,        \
-                           AV_PIX_FMT_YUV444P10,                              \
-                           AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16,        \
-                           AV_PIX_FMT_YUV444P16
+#define YUV_PIXEL_FORMATS  AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUVA420P, \
+                           AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA422P, \
+                           AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA444P, \
+                           AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9, \
+                           AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, \
+                           AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, \
+                           AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, \
+                           AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, \
+                           AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14, \
+                           AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, \
+                           AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16
 
 #define XYZ_PIXEL_FORMATS  AV_PIX_FMT_XYZ12
 
-static const enum AVPixelFormat rgb_pix_fmts[] = {
+static const enum AVPixelFormat libopenjpeg_rgb_pix_fmts[]  = {
     RGB_PIXEL_FORMATS
 };
-static const enum AVPixelFormat gray_pix_fmts[] = {
+static const enum AVPixelFormat libopenjpeg_gray_pix_fmts[] = {
     GRAY_PIXEL_FORMATS
 };
-static const enum AVPixelFormat yuv_pix_fmts[] = {
+static const enum AVPixelFormat libopenjpeg_yuv_pix_fmts[]  = {
     YUV_PIXEL_FORMATS
 };
-static const enum AVPixelFormat any_pix_fmts[] = {
+static const enum AVPixelFormat libopenjpeg_all_pix_fmts[]  = {
     RGB_PIXEL_FORMATS, GRAY_PIXEL_FORMATS, YUV_PIXEL_FORMATS, XYZ_PIXEL_FORMATS
 };
 
 typedef struct {
     AVClass *class;
     opj_dparameters_t dec_params;
-    int lowres;
     int lowqual;
 } LibOpenJPEGContext;
 
-static int libopenjpeg_matches_pix_fmt(const opj_image_t *img,
-                                       enum AVPixelFormat pix_fmt)
+static inline int libopenjpeg_matches_pix_fmt(const opj_image_t *image, enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
     int match = 1;
 
-    if (desc->nb_components != img->numcomps) {
+    if (desc->nb_components != image->numcomps) {
         return 0;
     }
 
     switch (desc->nb_components) {
     case 4:
         match = match &&
-                desc->comp[3].depth_minus1 + 1 >= img->comps[3].prec &&
-                1 == img->comps[3].dx &&
-                1 == img->comps[3].dy;
+                desc->comp[3].depth_minus1 + 1 >= image->comps[3].prec &&
+                1 == image->comps[3].dx &&
+                1 == image->comps[3].dy;
     case 3:
         match = match &&
-                desc->comp[2].depth_minus1 + 1 >= img->comps[2].prec &&
-                1 << desc->log2_chroma_w == img->comps[2].dx &&
-                1 << desc->log2_chroma_h == img->comps[2].dy;
+                desc->comp[2].depth_minus1 + 1 >= image->comps[2].prec &&
+                1 << desc->log2_chroma_w == image->comps[2].dx &&
+                1 << desc->log2_chroma_h == image->comps[2].dy;
     case 2:
         match = match &&
-                desc->comp[1].depth_minus1 + 1 >= img->comps[1].prec &&
-                1 << desc->log2_chroma_w == img->comps[1].dx &&
-                1 << desc->log2_chroma_h == img->comps[1].dy;
+                desc->comp[1].depth_minus1 + 1 >= image->comps[1].prec &&
+                1 << desc->log2_chroma_w == image->comps[1].dx &&
+                1 << desc->log2_chroma_h == image->comps[1].dy;
     case 1:
         match = match &&
-                desc->comp[0].depth_minus1 + 1 >= img->comps[0].prec &&
-                1 == img->comps[0].dx &&
-                1 == img->comps[0].dy;
+                desc->comp[0].depth_minus1 + 1 >= image->comps[0].prec &&
+                1 == image->comps[0].dx &&
+                1 == image->comps[0].dy;
     default:
         break;
     }
@@ -119,28 +123,27 @@ static int libopenjpeg_matches_pix_fmt(const opj_image_t *img,
     return match;
 }
 
-static enum AVPixelFormat libopenjpeg_guess_pix_fmt(const opj_image_t *image)
-{
+static inline enum AVPixelFormat libopenjpeg_guess_pix_fmt(const opj_image_t *image) {
     int index;
     const enum AVPixelFormat *possible_fmts = NULL;
     int possible_fmts_nb = 0;
 
     switch (image->color_space) {
     case CLRSPC_SRGB:
-        possible_fmts    = rgb_pix_fmts;
-        possible_fmts_nb = FF_ARRAY_ELEMS(rgb_pix_fmts);
+        possible_fmts    = libopenjpeg_rgb_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_rgb_pix_fmts);
         break;
     case CLRSPC_GRAY:
-        possible_fmts    = gray_pix_fmts;
-        possible_fmts_nb = FF_ARRAY_ELEMS(gray_pix_fmts);
+        possible_fmts    = libopenjpeg_gray_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_gray_pix_fmts);
         break;
     case CLRSPC_SYCC:
-        possible_fmts    = yuv_pix_fmts;
-        possible_fmts_nb = FF_ARRAY_ELEMS(yuv_pix_fmts);
+        possible_fmts    = libopenjpeg_yuv_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_yuv_pix_fmts);
         break;
     default:
-        possible_fmts    = any_pix_fmts;
-        possible_fmts_nb = FF_ARRAY_ELEMS(any_pix_fmts);
+        possible_fmts    = libopenjpeg_all_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(libopenjpeg_all_pix_fmts);
         break;
     }
 
@@ -167,40 +170,36 @@ static inline int libopenjpeg_ispacked(enum AVPixelFormat pix_fmt)
     return 1;
 }
 
-static void libopenjpeg_copy_to_packed8(AVFrame *picture, opj_image_t *image)
-{
+static inline void libopenjpeg_copy_to_packed8(AVFrame *picture, opj_image_t *image) {
     uint8_t *img_ptr;
     int index, x, y, c;
-
     for (y = 0; y < picture->height; y++) {
         index   = y * picture->width;
         img_ptr = picture->data[0] + y * picture->linesize[0];
         for (x = 0; x < picture->width; x++, index++)
             for (c = 0; c < image->numcomps; c++)
-                *img_ptr++ = image->comps[c].data[index];
+                *img_ptr++ = 0x80 * image->comps[c].sgnd + image->comps[c].data[index];
     }
 }
 
-static void libopenjpeg_copy_to_packed16(AVFrame *picture, opj_image_t *image)
-{
+static inline void libopenjpeg_copy_to_packed16(AVFrame *picture, opj_image_t *image) {
     uint16_t *img_ptr;
     int index, x, y, c;
     int adjust[4];
-
     for (x = 0; x < image->numcomps; x++)
-        adjust[x] = FFMAX(FFMIN(16 - image->comps[x].prec, 8), 0);
+        adjust[x] = FFMAX(FFMIN(av_pix_fmt_desc_get(picture->format)->comp[x].depth_minus1 + 1 - image->comps[x].prec, 8), 0);
 
     for (y = 0; y < picture->height; y++) {
         index   = y * picture->width;
         img_ptr = (uint16_t *) (picture->data[0] + y * picture->linesize[0]);
         for (x = 0; x < picture->width; x++, index++)
             for (c = 0; c < image->numcomps; c++)
-                *img_ptr++ = image->comps[c].data[index] << adjust[c];
+                *img_ptr++ = (1 << image->comps[c].prec - 1) * image->comps[c].sgnd +
+                             (unsigned)image->comps[c].data[index] << adjust[c];
     }
 }
 
-static void libopenjpeg_copyto8(AVFrame *picture, opj_image_t *image)
-{
+static inline void libopenjpeg_copyto8(AVFrame *picture, opj_image_t *image) {
     int *comp_data;
     uint8_t *img_ptr;
     int index, x, y;
@@ -210,7 +209,7 @@ static void libopenjpeg_copyto8(AVFrame *picture, opj_image_t *image)
         for (y = 0; y < image->comps[index].h; y++) {
             img_ptr = picture->data[index] + y * picture->linesize[index];
             for (x = 0; x < image->comps[index].w; x++) {
-                *img_ptr = (uint8_t) *comp_data;
+                *img_ptr = 0x80 * image->comps[index].sgnd + *comp_data;
                 img_ptr++;
                 comp_data++;
             }
@@ -218,18 +217,21 @@ static void libopenjpeg_copyto8(AVFrame *picture, opj_image_t *image)
     }
 }
 
-static void libopenjpeg_copyto16(AVFrame *p, opj_image_t *image)
-{
+static inline void libopenjpeg_copyto16(AVFrame *picture, opj_image_t *image) {
     int *comp_data;
     uint16_t *img_ptr;
     int index, x, y;
+    int adjust[4];
+    for (x = 0; x < image->numcomps; x++)
+        adjust[x] = FFMAX(FFMIN(av_pix_fmt_desc_get(picture->format)->comp[x].depth_minus1 + 1 - image->comps[x].prec, 8), 0);
 
     for (index = 0; index < image->numcomps; index++) {
         comp_data = image->comps[index].data;
         for (y = 0; y < image->comps[index].h; y++) {
-            img_ptr = (uint16_t *)(p->data[index] + y * p->linesize[index]);
+            img_ptr = (uint16_t *)(picture->data[index] + y * picture->linesize[index]);
             for (x = 0; x < image->comps[index].w; x++) {
-                *img_ptr = *comp_data;
+                *img_ptr = (1 << image->comps[index].prec - 1) * image->comps[index].sgnd +
+                           (unsigned)*comp_data << adjust[index];
                 img_ptr++;
                 comp_data++;
             }
@@ -283,9 +285,7 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
         return AVERROR_UNKNOWN;
     }
     opj_set_event_mgr((opj_common_ptr) dec, NULL, NULL);
-
     ctx->dec_params.cp_limit_decoding = LIMIT_TO_MAIN_HEADER;
-    ctx->dec_params.cp_reduce         = ctx->lowres;
     ctx->dec_params.cp_layer          = ctx->lowqual;
     // Tie decoder with decoding parameters
     opj_setup_decoder(dec, &ctx->dec_params);
@@ -311,11 +311,6 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     width  = image->x1 - image->x0;
     height = image->y1 - image->y0;
 
-    if (ctx->lowres) {
-        width  = (width + (1 << ctx->lowres) - 1) >> ctx->lowres;
-        height = (height + (1 << ctx->lowres) - 1) >> ctx->lowres;
-    }
-
     ret = ff_set_dimensions(avctx, width, height);
     if (ret < 0)
         goto done;
@@ -329,20 +324,17 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
 
     if (avctx->pix_fmt == AV_PIX_FMT_NONE) {
         av_log(avctx, AV_LOG_ERROR, "Unable to determine pixel format\n");
-        ret = AVERROR_INVALIDDATA;
         goto done;
     }
-
     for (i = 0; i < image->numcomps; i++)
         if (image->comps[i].prec > avctx->bits_per_raw_sample)
             avctx->bits_per_raw_sample = image->comps[i].prec;
 
-    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "ff_thread_get_buffer() failed\n");
+    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
         goto done;
-    }
 
     ctx->dec_params.cp_limit_decoding = NO_LIMITATION;
+    ctx->dec_params.cp_reduce = avctx->lowres;
     // Tie decoder with decoding parameters.
     opj_setup_decoder(dec, &ctx->dec_params);
     stream = opj_cio_open((opj_common_ptr) dec, buf, buf_size);
@@ -416,12 +408,10 @@ done:
 static const AVOption options[] = {
     { "lowqual", "Limit the number of layers used for decoding",
         OFFSET(lowqual), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VD },
-    { "lowres",  "Lower the decoding resolution by a power of two",
-        OFFSET(lowres),  AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VD },
     { NULL },
 };
 
-static const AVClass class = {
+static const AVClass openjpeg_class = {
     .class_name = "libopenjpeg",
     .item_name  = av_default_item_name,
     .option     = options,
@@ -437,5 +427,6 @@ AVCodec ff_libopenjpeg_decoder = {
     .init           = libopenjpeg_decode_init,
     .decode         = libopenjpeg_decode_frame,
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
-    .priv_class     = &class,
+    .max_lowres     = 31,
+    .priv_class     = &openjpeg_class,
 };
diff --git a/libavcodec/libopenjpegenc.c b/libavcodec/libopenjpegenc.c
index 2c2109f..66633f4 100644
--- a/libavcodec/libopenjpegenc.c
+++ b/libavcodec/libopenjpegenc.c
@@ -1,21 +1,21 @@
 /*
  * JPEG 2000 encoding support via OpenJPEG
- * Copyright (c) 2011 Michael Bradshaw <mbradshaw@sorensonmedia.com>
+ * Copyright (c) 2011 Michael Bradshaw <mjbshaw gmail com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,8 +25,8 @@
  */
 
 #define  OPJ_STATIC
-#include <openjpeg.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
@@ -34,9 +34,16 @@
 #include "avcodec.h"
 #include "internal.h"
 
+#if HAVE_OPENJPEG_1_5_OPENJPEG_H
+# include <openjpeg-1.5/openjpeg.h>
+#else
+# include <openjpeg.h>
+#endif
+
 typedef struct {
     AVClass *avclass;
     opj_image_t *image;
+    opj_cio_t *stream;
     opj_cparameters_t enc_params;
     opj_cinfo_t *compress;
     opj_event_mgr_t event_mgr;
@@ -66,37 +73,41 @@ static void info_callback(const char *msg, void *data)
     av_log(data, AV_LOG_DEBUG, "%s\n", msg);
 }
 
-static opj_image_t *libopenjpeg_create_image(AVCodecContext *avctx,
-                                             opj_cparameters_t *parameters)
+static opj_image_t *mj2_create_image(AVCodecContext *avctx, opj_cparameters_t *parameters)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
-    opj_image_cmptparm_t *cmptparm;
-    OPJ_COLOR_SPACE color_space;
+    opj_image_cmptparm_t cmptparm[4] = {{0}};
     opj_image_t *img;
     int i;
     int sub_dx[4];
     int sub_dy[4];
-    int numcomps = desc->nb_components;
+    int numcomps;
+    OPJ_COLOR_SPACE color_space = CLRSPC_UNKNOWN;
 
-    sub_dx[0] =
-    sub_dx[3] = 1;
-    sub_dy[0] =
-    sub_dy[3] = 1;
-    sub_dx[1] =
-    sub_dx[2] = 1 << desc->log2_chroma_w;
-    sub_dy[1] =
-    sub_dy[2] = 1 << desc->log2_chroma_h;
+    sub_dx[0] = sub_dx[3] = 1;
+    sub_dy[0] = sub_dy[3] = 1;
+    sub_dx[1] = sub_dx[2] = 1 << desc->log2_chroma_w;
+    sub_dy[1] = sub_dy[2] = 1 << desc->log2_chroma_h;
+
+    numcomps = desc->nb_components;
 
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_GRAY8:
-    case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YA8:
+    case AV_PIX_FMT_GRAY16:
         color_space = CLRSPC_GRAY;
         break;
     case AV_PIX_FMT_RGB24:
     case AV_PIX_FMT_RGBA:
     case AV_PIX_FMT_RGB48:
     case AV_PIX_FMT_RGBA64:
+    case AV_PIX_FMT_GBR24P:
+    case AV_PIX_FMT_GBRP9:
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRP14:
+    case AV_PIX_FMT_GBRP16:
+    case AV_PIX_FMT_XYZ12:
         color_space = CLRSPC_SRGB;
         break;
     case AV_PIX_FMT_YUV410P:
@@ -106,15 +117,32 @@ static opj_image_t *libopenjpeg_create_image(AVCodecContext *avctx,
     case AV_PIX_FMT_YUV440P:
     case AV_PIX_FMT_YUV444P:
     case AV_PIX_FMT_YUVA420P:
+    case AV_PIX_FMT_YUVA422P:
+    case AV_PIX_FMT_YUVA444P:
     case AV_PIX_FMT_YUV420P9:
     case AV_PIX_FMT_YUV422P9:
     case AV_PIX_FMT_YUV444P9:
+    case AV_PIX_FMT_YUVA420P9:
+    case AV_PIX_FMT_YUVA422P9:
+    case AV_PIX_FMT_YUVA444P9:
     case AV_PIX_FMT_YUV420P10:
     case AV_PIX_FMT_YUV422P10:
     case AV_PIX_FMT_YUV444P10:
+    case AV_PIX_FMT_YUVA420P10:
+    case AV_PIX_FMT_YUVA422P10:
+    case AV_PIX_FMT_YUVA444P10:
+    case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV422P12:
+    case AV_PIX_FMT_YUV444P12:
+    case AV_PIX_FMT_YUV420P14:
+    case AV_PIX_FMT_YUV422P14:
+    case AV_PIX_FMT_YUV444P14:
     case AV_PIX_FMT_YUV420P16:
     case AV_PIX_FMT_YUV422P16:
     case AV_PIX_FMT_YUV444P16:
+    case AV_PIX_FMT_YUVA420P16:
+    case AV_PIX_FMT_YUVA422P16:
+    case AV_PIX_FMT_YUVA444P16:
         color_space = CLRSPC_SYCC;
         break;
     default:
@@ -124,24 +152,25 @@ static opj_image_t *libopenjpeg_create_image(AVCodecContext *avctx,
         return NULL;
     }
 
-    cmptparm = av_mallocz(numcomps * sizeof(*cmptparm));
-    if (!cmptparm) {
-        av_log(avctx, AV_LOG_ERROR, "Not enough memory");
-        return NULL;
-    }
-
     for (i = 0; i < numcomps; i++) {
         cmptparm[i].prec = desc->comp[i].depth_minus1 + 1;
         cmptparm[i].bpp  = desc->comp[i].depth_minus1 + 1;
         cmptparm[i].sgnd = 0;
-        cmptparm[i].dx   = sub_dx[i];
-        cmptparm[i].dy   = sub_dy[i];
-        cmptparm[i].w    = avctx->width / sub_dx[i];
-        cmptparm[i].h    = avctx->height / sub_dy[i];
+        cmptparm[i].dx = sub_dx[i];
+        cmptparm[i].dy = sub_dy[i];
+        cmptparm[i].w = (avctx->width + sub_dx[i] - 1) / sub_dx[i];
+        cmptparm[i].h = (avctx->height + sub_dy[i] - 1) / sub_dy[i];
     }
 
     img = opj_image_create(numcomps, cmptparm, color_space);
-    av_freep(&cmptparm);
+
+    // x0, y0 is the top left corner of the image
+    // x1, y1 is the width, height of the reference grid
+    img->x0 = 0;
+    img->y0 = 0;
+    img->x1 = (avctx->width  - 1) * parameters->subsampling_dx + 1;
+    img->y1 = (avctx->height - 1) * parameters->subsampling_dy + 1;
+
     return img;
 }
 
@@ -152,16 +181,45 @@ static av_cold int libopenjpeg_encode_init(AVCodecContext *avctx)
 
     opj_set_default_encoder_parameters(&ctx->enc_params);
 
-    ctx->enc_params.cp_rsiz          = ctx->profile;
-    ctx->enc_params.mode             = !!avctx->global_quality;
-    ctx->enc_params.cp_cinema        = ctx->cinema_mode;
-    ctx->enc_params.prog_order       = ctx->prog_order;
-    ctx->enc_params.numresolution    = ctx->numresolution;
-    ctx->enc_params.cp_disto_alloc   = ctx->disto_alloc;
-    ctx->enc_params.cp_fixed_alloc   = ctx->fixed_alloc;
+    ctx->enc_params.cp_rsiz = ctx->profile;
+    ctx->enc_params.mode = !!avctx->global_quality;
+    ctx->enc_params.cp_cinema = ctx->cinema_mode;
+    ctx->enc_params.prog_order = ctx->prog_order;
+    ctx->enc_params.numresolution = ctx->numresolution;
+    ctx->enc_params.cp_disto_alloc = ctx->disto_alloc;
+    ctx->enc_params.cp_fixed_alloc = ctx->fixed_alloc;
     ctx->enc_params.cp_fixed_quality = ctx->fixed_quality;
-    ctx->enc_params.tcp_numlayers    = ctx->numlayers;
-    ctx->enc_params.tcp_rates[0]     = FFMAX(avctx->compression_level, 0) * 2;
+    ctx->enc_params.tcp_numlayers = ctx->numlayers;
+    ctx->enc_params.tcp_rates[0] = FFMAX(avctx->compression_level, 0) * 2;
+
+    if (ctx->cinema_mode > 0) {
+        ctx->enc_params.irreversible = 1;
+        ctx->enc_params.tcp_mct = 1;
+        ctx->enc_params.tile_size_on = 0;
+        /* no subsampling */
+        ctx->enc_params.cp_tdx=1;
+        ctx->enc_params.cp_tdy=1;
+        ctx->enc_params.subsampling_dx = 1;
+        ctx->enc_params.subsampling_dy = 1;
+        /* Tile and Image shall be at (0,0) */
+        ctx->enc_params.cp_tx0 = 0;
+        ctx->enc_params.cp_ty0 = 0;
+        ctx->enc_params.image_offset_x0 = 0;
+        ctx->enc_params.image_offset_y0 = 0;
+        /* Codeblock size= 32*32 */
+        ctx->enc_params.cblockw_init = 32;
+        ctx->enc_params.cblockh_init = 32;
+        ctx->enc_params.csty |= 0x01;
+        /* No ROI */
+        ctx->enc_params.roi_compno = -1;
+
+        if (ctx->enc_params.prog_order != CPRL) {
+            av_log(avctx, AV_LOG_ERROR, "prog_order forced to CPRL\n");
+            ctx->enc_params.prog_order = CPRL;
+        }
+        ctx->enc_params.tp_flag = 'C';
+        ctx->enc_params.tp_on = 1;
+    }
 
     ctx->compress = opj_create_compress(ctx->format);
     if (!ctx->compress) {
@@ -169,118 +227,247 @@ static av_cold int libopenjpeg_encode_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame) {
-        av_log(avctx, AV_LOG_ERROR, "Error allocating coded frame\n");
-        goto fail;
-    }
-
-    ctx->image = libopenjpeg_create_image(avctx, &ctx->enc_params);
+    ctx->image = mj2_create_image(avctx, &ctx->enc_params);
     if (!ctx->image) {
         av_log(avctx, AV_LOG_ERROR, "Error creating the mj2 image\n");
         err = AVERROR(EINVAL);
         goto fail;
     }
+    opj_setup_encoder(ctx->compress, &ctx->enc_params, ctx->image);
+
+    ctx->stream = opj_cio_open((opj_common_ptr) ctx->compress, NULL, 0);
+    if (!ctx->stream) {
+        av_log(avctx, AV_LOG_ERROR, "Error creating the cio stream\n");
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    avctx->coded_frame = av_frame_alloc();
+    if (!avctx->coded_frame) {
+        av_log(avctx, AV_LOG_ERROR, "Error allocating coded frame\n");
+        goto fail;
+    }
 
+    memset(&ctx->event_mgr, 0, sizeof(opj_event_mgr_t));
     ctx->event_mgr.info_handler    = info_callback;
-    ctx->event_mgr.error_handler   = error_callback;
+    ctx->event_mgr.error_handler = error_callback;
     ctx->event_mgr.warning_handler = warning_callback;
     opj_set_event_mgr((opj_common_ptr) ctx->compress, &ctx->event_mgr, avctx);
 
     return 0;
 
 fail:
-    av_freep(&ctx->compress);
+    opj_cio_close(ctx->stream);
+    ctx->stream = NULL;
+    opj_destroy_compress(ctx->compress);
+    ctx->compress = NULL;
+    opj_image_destroy(ctx->image);
+    ctx->image = NULL;
     av_freep(&avctx->coded_frame);
     return err;
 }
 
-static void libopenjpeg_copy_packed8(AVCodecContext *avctx,
-                                     const AVFrame *frame, opj_image_t *image)
+static int libopenjpeg_copy_packed8(AVCodecContext *avctx, const AVFrame *frame, opj_image_t *image)
 {
     int compno;
-    int x, y;
-    int image_index, frame_index;
+    int x;
+    int y;
+    int *image_line;
+    int frame_index;
     const int numcomps = image->numcomps;
 
-    for (compno = 0; compno < numcomps; ++compno)
+    for (compno = 0; compno < numcomps; ++compno) {
+        if (image->comps[compno].w > frame->linesize[0] / numcomps) {
+            av_log(avctx, AV_LOG_ERROR, "Error: frame's linesize is too small for the image\n");
+            return 0;
+        }
+    }
+
+    for (compno = 0; compno < numcomps; ++compno) {
         for (y = 0; y < avctx->height; ++y) {
-            image_index = y * avctx->width;
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
             frame_index = y * frame->linesize[0] + compno;
             for (x = 0; x < avctx->width; ++x) {
-                image->comps[compno].data[image_index++] =
-                    frame->data[0][frame_index];
+                image_line[x] = frame->data[0][frame_index];
                 frame_index += numcomps;
             }
+            for (; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - 1];
+            }
         }
+        for (; y < image->comps[compno].h; ++y) {
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
+            for (x = 0; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - image->comps[compno].w];
+            }
+        }
+    }
+
+    return 1;
 }
 
-static void libopenjpeg_copy_packed16(AVCodecContext *avctx,
-                                      const AVFrame *frame, opj_image_t *image)
+// for XYZ 12 bit
+static int libopenjpeg_copy_packed12(AVCodecContext *avctx, const AVFrame *frame, opj_image_t *image)
 {
     int compno;
     int x, y;
-    int image_index, frame_index;
+    int *image_line;
+    int frame_index;
     const int numcomps  = image->numcomps;
     uint16_t *frame_ptr = (uint16_t *)frame->data[0];
 
-    for (compno = 0; compno < numcomps; ++compno)
+    for (compno = 0; compno < numcomps; ++compno) {
+        if (image->comps[compno].w > frame->linesize[0] / numcomps) {
+            av_log(avctx, AV_LOG_ERROR, "Error: frame's linesize is too small for the image\n");
+            return 0;
+        }
+    }
+
+    for (compno = 0; compno < numcomps; ++compno) {
+        for (y = 0; y < avctx->height; ++y) {
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
+            frame_index = y * (frame->linesize[0] / 2) + compno;
+            for (x = 0; x < avctx->width; ++x) {
+                image_line[x] = frame_ptr[frame_index] >> 4;
+                frame_index += numcomps;
+            }
+            for (; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - 1];
+            }
+        }
+        for (; y < image->comps[compno].h; ++y) {
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
+            for (x = 0; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - image->comps[compno].w];
+            }
+        }
+    }
+
+    return 1;
+}
+
+static int libopenjpeg_copy_packed16(AVCodecContext *avctx, const AVFrame *frame, opj_image_t *image)
+{
+    int compno;
+    int x;
+    int y;
+    int *image_line;
+    int frame_index;
+    const int numcomps = image->numcomps;
+    uint16_t *frame_ptr = (uint16_t*)frame->data[0];
+
+    for (compno = 0; compno < numcomps; ++compno) {
+        if (image->comps[compno].w > frame->linesize[0] / numcomps) {
+            av_log(avctx, AV_LOG_ERROR, "Error: frame's linesize is too small for the image\n");
+            return 0;
+        }
+    }
+
+    for (compno = 0; compno < numcomps; ++compno) {
         for (y = 0; y < avctx->height; ++y) {
-            image_index = y * avctx->width;
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
             frame_index = y * (frame->linesize[0] / 2) + compno;
             for (x = 0; x < avctx->width; ++x) {
-                image->comps[compno].data[image_index++] =
-                    frame_ptr[frame_index];
+                image_line[x] = frame_ptr[frame_index];
                 frame_index += numcomps;
             }
+            for (; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - 1];
+            }
         }
+        for (; y < image->comps[compno].h; ++y) {
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
+            for (x = 0; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - image->comps[compno].w];
+            }
+        }
+    }
+
+    return 1;
 }
 
-static void libopenjpeg_copy_unpacked8(AVCodecContext *avctx,
-                                       const AVFrame *frame, opj_image_t *image)
+static int libopenjpeg_copy_unpacked8(AVCodecContext *avctx, const AVFrame *frame, opj_image_t *image)
 {
     int compno;
-    int x, y;
-    int width, height;
-    int image_index, frame_index;
+    int x;
+    int y;
+    int width;
+    int height;
+    int *image_line;
+    int frame_index;
     const int numcomps = image->numcomps;
 
     for (compno = 0; compno < numcomps; ++compno) {
+        if (image->comps[compno].w > frame->linesize[compno]) {
+            av_log(avctx, AV_LOG_ERROR, "Error: frame's linesize is too small for the image\n");
+            return 0;
+        }
+    }
+
+    for (compno = 0; compno < numcomps; ++compno) {
         width  = avctx->width / image->comps[compno].dx;
         height = avctx->height / image->comps[compno].dy;
         for (y = 0; y < height; ++y) {
-            image_index = y * width;
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
             frame_index = y * frame->linesize[compno];
             for (x = 0; x < width; ++x)
-                image->comps[compno].data[image_index++] =
-                    frame->data[compno][frame_index++];
+                image_line[x] = frame->data[compno][frame_index++];
+            for (; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - 1];
+            }
+        }
+        for (; y < image->comps[compno].h; ++y) {
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
+            for (x = 0; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - image->comps[compno].w];
+            }
         }
     }
+
+    return 1;
 }
 
-static void libopenjpeg_copy_unpacked16(AVCodecContext *avctx,
-                                        const AVFrame *frame,
-                                        opj_image_t *image)
+static int libopenjpeg_copy_unpacked16(AVCodecContext *avctx, const AVFrame *frame, opj_image_t *image)
 {
     int compno;
-    int x, y;
-    int width, height;
-    int image_index, frame_index;
+    int x;
+    int y;
+    int width;
+    int height;
+    int *image_line;
+    int frame_index;
     const int numcomps = image->numcomps;
     uint16_t *frame_ptr;
 
     for (compno = 0; compno < numcomps; ++compno) {
+        if (image->comps[compno].w > frame->linesize[compno]) {
+            av_log(avctx, AV_LOG_ERROR, "Error: frame's linesize is too small for the image\n");
+            return 0;
+        }
+    }
+
+    for (compno = 0; compno < numcomps; ++compno) {
         width     = avctx->width / image->comps[compno].dx;
         height    = avctx->height / image->comps[compno].dy;
         frame_ptr = (uint16_t *)frame->data[compno];
         for (y = 0; y < height; ++y) {
-            image_index = y * width;
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
             frame_index = y * (frame->linesize[compno] / 2);
             for (x = 0; x < width; ++x)
-                image->comps[compno].data[image_index++] =
-                    frame_ptr[frame_index++];
+                image_line[x] = frame_ptr[frame_index++];
+            for (; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - 1];
+            }
+        }
+        for (; y < image->comps[compno].h; ++y) {
+            image_line = image->comps[compno].data + y * image->comps[compno].w;
+            for (x = 0; x < image->comps[compno].w; ++x) {
+                image_line[x] = image_line[x - image->comps[compno].w];
+            }
         }
     }
+
+    return 1;
 }
 
 static int libopenjpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
@@ -289,25 +476,45 @@ static int libopenjpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     LibOpenJPEGContext *ctx = avctx->priv_data;
     opj_cinfo_t *compress   = ctx->compress;
     opj_image_t *image      = ctx->image;
-    opj_cio_t *stream;
+    opj_cio_t *stream       = ctx->stream;
+    int cpyresult = 0;
     int ret, len;
-
-    // x0, y0 is the top left corner of the image
-    // x1, y1 is the width, height of the reference grid
-    image->x0 = 0;
-    image->y0 = 0;
-    image->x1 = (avctx->width - 1) * ctx->enc_params.subsampling_dx + 1;
-    image->y1 = (avctx->height - 1) * ctx->enc_params.subsampling_dy + 1;
+    AVFrame *gbrframe;
 
     switch (avctx->pix_fmt) {
     case AV_PIX_FMT_RGB24:
     case AV_PIX_FMT_RGBA:
     case AV_PIX_FMT_YA8:
-        libopenjpeg_copy_packed8(avctx, frame, image);
+        cpyresult = libopenjpeg_copy_packed8(avctx, frame, image);
+        break;
+    case AV_PIX_FMT_XYZ12:
+        cpyresult = libopenjpeg_copy_packed12(avctx, frame, image);
         break;
     case AV_PIX_FMT_RGB48:
     case AV_PIX_FMT_RGBA64:
-        libopenjpeg_copy_packed16(avctx, frame, image);
+        cpyresult = libopenjpeg_copy_packed16(avctx, frame, image);
+        break;
+    case AV_PIX_FMT_GBR24P:
+    case AV_PIX_FMT_GBRP9:
+    case AV_PIX_FMT_GBRP10:
+    case AV_PIX_FMT_GBRP12:
+    case AV_PIX_FMT_GBRP14:
+    case AV_PIX_FMT_GBRP16:
+        gbrframe = av_frame_clone(frame);
+        if (!gbrframe)
+            return AVERROR(ENOMEM);
+        gbrframe->data[0] = frame->data[2]; // swap to be rgb
+        gbrframe->data[1] = frame->data[0];
+        gbrframe->data[2] = frame->data[1];
+        gbrframe->linesize[0] = frame->linesize[2];
+        gbrframe->linesize[1] = frame->linesize[0];
+        gbrframe->linesize[2] = frame->linesize[1];
+        if (avctx->pix_fmt == AV_PIX_FMT_GBR24P) {
+            cpyresult = libopenjpeg_copy_unpacked8(avctx, gbrframe, image);
+        } else {
+            cpyresult = libopenjpeg_copy_unpacked16(avctx, gbrframe, image);
+        }
+        av_frame_free(&gbrframe);
         break;
     case AV_PIX_FMT_GRAY8:
     case AV_PIX_FMT_YUV410P:
@@ -317,19 +524,36 @@ static int libopenjpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     case AV_PIX_FMT_YUV440P:
     case AV_PIX_FMT_YUV444P:
     case AV_PIX_FMT_YUVA420P:
-        libopenjpeg_copy_unpacked8(avctx, frame, image);
+    case AV_PIX_FMT_YUVA422P:
+    case AV_PIX_FMT_YUVA444P:
+        cpyresult = libopenjpeg_copy_unpacked8(avctx, frame, image);
         break;
     case AV_PIX_FMT_GRAY16:
     case AV_PIX_FMT_YUV420P9:
     case AV_PIX_FMT_YUV422P9:
     case AV_PIX_FMT_YUV444P9:
+    case AV_PIX_FMT_YUVA420P9:
+    case AV_PIX_FMT_YUVA422P9:
+    case AV_PIX_FMT_YUVA444P9:
     case AV_PIX_FMT_YUV444P10:
     case AV_PIX_FMT_YUV422P10:
     case AV_PIX_FMT_YUV420P10:
+    case AV_PIX_FMT_YUVA444P10:
+    case AV_PIX_FMT_YUVA422P10:
+    case AV_PIX_FMT_YUVA420P10:
+    case AV_PIX_FMT_YUV420P12:
+    case AV_PIX_FMT_YUV422P12:
+    case AV_PIX_FMT_YUV444P12:
+    case AV_PIX_FMT_YUV420P14:
+    case AV_PIX_FMT_YUV422P14:
+    case AV_PIX_FMT_YUV444P14:
     case AV_PIX_FMT_YUV444P16:
     case AV_PIX_FMT_YUV422P16:
     case AV_PIX_FMT_YUV420P16:
-        libopenjpeg_copy_unpacked16(avctx, frame, image);
+    case AV_PIX_FMT_YUVA444P16:
+    case AV_PIX_FMT_YUVA422P16:
+    case AV_PIX_FMT_YUVA420P16:
+        cpyresult = libopenjpeg_copy_unpacked16(avctx, frame, image);
         break;
     default:
         av_log(avctx, AV_LOG_ERROR,
@@ -339,29 +563,26 @@ static int libopenjpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         break;
     }
 
-    opj_setup_encoder(compress, &ctx->enc_params, image);
-    stream = opj_cio_open((opj_common_ptr) compress, NULL, 0);
-    if (!stream) {
-        av_log(avctx, AV_LOG_ERROR, "Error creating the cio stream\n");
-        return AVERROR(ENOMEM);
+    if (!cpyresult) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Could not copy the frame data to the internal image buffer\n");
+        return -1;
     }
 
+    cio_seek(stream, 0);
     if (!opj_encode(compress, stream, image, NULL)) {
-        opj_cio_close(stream);
         av_log(avctx, AV_LOG_ERROR, "Error during the opj encode\n");
         return -1;
     }
 
     len = cio_tell(stream);
-    if ((ret = ff_alloc_packet(pkt, len)) < 0) {
-        opj_cio_close(stream);
+    if ((ret = ff_alloc_packet2(avctx, pkt, len)) < 0) {
         return ret;
     }
 
     memcpy(pkt->data, stream->buffer, len);
     pkt->flags |= AV_PKT_FLAG_KEY;
     *got_packet = 1;
-    opj_cio_close(stream);
     return 0;
 }
 
@@ -369,8 +590,12 @@ static av_cold int libopenjpeg_encode_close(AVCodecContext *avctx)
 {
     LibOpenJPEGContext *ctx = avctx->priv_data;
 
+    opj_cio_close(ctx->stream);
+    ctx->stream = NULL;
     opj_destroy_compress(ctx->compress);
+    ctx->compress = NULL;
     opj_image_destroy(ctx->image);
+    ctx->image = NULL;
     av_freep(&avctx->coded_frame);
     return 0;
 }
@@ -396,15 +621,15 @@ static const AVOption options[] = {
     { "rpcl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = RPCL        }, 0,         0,           VE, "prog_order"  },
     { "pcrl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = PCRL        }, 0,         0,           VE, "prog_order"  },
     { "cprl",          NULL,                0,                     AV_OPT_TYPE_CONST, { .i64 = CPRL        }, 0,         0,           VE, "prog_order"  },
-    { "numresolution", NULL,                OFFSET(numresolution), AV_OPT_TYPE_INT,   { .i64 = 6           }, 1,         10,          VE },
-    { "numlayers",     NULL,                OFFSET(numlayers),     AV_OPT_TYPE_INT,   { .i64 = 1           }, 1,         10,          VE },
-    { "disto_alloc",   NULL,                OFFSET(disto_alloc),   AV_OPT_TYPE_INT,   { .i64 = 1           }, 0,         1,           VE },
-    { "fixed_alloc",   NULL,                OFFSET(fixed_alloc),   AV_OPT_TYPE_INT,   { .i64 = 0           }, 0,         1,           VE },
-    { "fixed_quality", NULL,                OFFSET(fixed_quality), AV_OPT_TYPE_INT,   { .i64 = 0           }, 0,         1,           VE },
+    { "numresolution", NULL,                OFFSET(numresolution), AV_OPT_TYPE_INT,   { .i64 = 6           }, 1,         INT_MAX,     VE                },
+    { "numlayers",     NULL,                OFFSET(numlayers),     AV_OPT_TYPE_INT,   { .i64 = 1           }, 1,         10,          VE                },
+    { "disto_alloc",   NULL,                OFFSET(disto_alloc),   AV_OPT_TYPE_INT,   { .i64 = 1           }, 0,         1,           VE                },
+    { "fixed_alloc",   NULL,                OFFSET(fixed_alloc),   AV_OPT_TYPE_INT,   { .i64 = 0           }, 0,         1,           VE                },
+    { "fixed_quality", NULL,                OFFSET(fixed_quality), AV_OPT_TYPE_INT,   { .i64 = 0           }, 0,         1,           VE                },
     { NULL },
 };
 
-static const AVClass class = {
+static const AVClass openjpeg_class = {
     .class_name = "libopenjpeg",
     .item_name  = av_default_item_name,
     .option     = options,
@@ -420,18 +645,25 @@ AVCodec ff_libopenjpeg_encoder = {
     .init           = libopenjpeg_encode_init,
     .encode2        = libopenjpeg_encode_frame,
     .close          = libopenjpeg_encode_close,
-    .capabilities   = 0,
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB48,
-        AV_PIX_FMT_RGBA64,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16, AV_PIX_FMT_YA8,
+        AV_PIX_FMT_RGBA64, AV_PIX_FMT_GBR24P,
+        AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_YA8, AV_PIX_FMT_GRAY16,
         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P,
-        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
-        AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P,
+        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVA422P,
+        AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA444P,
         AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
         AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
+        AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12,
+        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
         AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
+        AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
+        AV_PIX_FMT_XYZ12,
         AV_PIX_FMT_NONE
     },
-    .priv_class     = &class,
+    .priv_class     = &openjpeg_class,
 };
diff --git a/libavcodec/libopus.c b/libavcodec/libopus.c
index b511415..16395c7 100644
--- a/libavcodec/libopus.c
+++ b/libavcodec/libopus.c
@@ -2,20 +2,20 @@
  * libopus encoder/decoder common code
  * Copyright (c) 2012 Nicolas George
  *
- * This file is part of libav.
+ * This file is part of FFmpeg.
  *
- * libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/libopus.h b/libavcodec/libopus.h
index b08257d..a8223d1 100644
--- a/libavcodec/libopus.h
+++ b/libavcodec/libopus.h
@@ -2,20 +2,20 @@
  * libopus encoder/decoder common code
  * Copyright (c) 2012 Nicolas George
  *
- * This file is part of libav.
+ * This file is part of FFmpeg.
  *
- * libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/libopusdec.c b/libavcodec/libopusdec.c
index 398450f..8436302 100644
--- a/libavcodec/libopusdec.c
+++ b/libavcodec/libopusdec.c
@@ -2,20 +2,20 @@
  * Opus decoder using libopus
  * Copyright (c) 2012 Nicolas George
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,6 +32,10 @@
 
 struct libopus_context {
     OpusMSDecoder *dec;
+    int pre_skip;
+#ifndef OPUS_SET_GAIN
+    union { int i; double d; } gain;
+#endif
 };
 
 #define OPUS_HEAD_SIZE 19
@@ -49,6 +53,7 @@ static av_cold int libopus_decode_init(AVCodecContext *avc)
                           ff_vorbis_channel_layouts[avc->channels - 1];
 
     if (avc->extradata_size >= OPUS_HEAD_SIZE) {
+        opus->pre_skip = AV_RL16(avc->extradata + 10);
         gain_db     = sign_extend(AV_RL16(avc->extradata + 16), 16);
         channel_map = AV_RL8 (avc->extradata + 18);
     }
@@ -73,7 +78,7 @@ static av_cold int libopus_decode_init(AVCodecContext *avc)
         const uint8_t *vorbis_offset = ff_vorbis_channel_layout_offsets[avc->channels - 1];
         int ch;
 
-        /* Remap channels from vorbis order to libav order */
+        /* Remap channels from vorbis order to ffmpeg order */
         for (ch = 0; ch < avc->channels; ch++)
             mapping_arr[ch] = mapping[vorbis_offset[ch]];
         mapping = mapping_arr;
@@ -88,12 +93,23 @@ static av_cold int libopus_decode_init(AVCodecContext *avc)
         return ff_opus_error_to_averror(ret);
     }
 
+#ifdef OPUS_SET_GAIN
     ret = opus_multistream_decoder_ctl(opus->dec, OPUS_SET_GAIN(gain_db));
     if (ret != OPUS_OK)
         av_log(avc, AV_LOG_WARNING, "Failed to set gain: %s\n",
                opus_strerror(ret));
+#else
+    {
+        double gain_lin = pow(10, gain_db / (20.0 * 256));
+        if (avc->sample_fmt == AV_SAMPLE_FMT_FLT)
+            opus->gain.d = gain_lin;
+        else
+            opus->gain.i = FFMIN(gain_lin * 65536, INT_MAX);
+    }
+#endif
 
-    avc->delay = 3840;  /* Decoder delay (in samples) at 48kHz */
+    /* Decoder delay (in samples) at 48kHz */
+    avc->delay = avc->internal->skip_samples = opus->pre_skip;
 
     return 0;
 }
@@ -116,11 +132,8 @@ static int libopus_decode(AVCodecContext *avc, void *data,
     int ret, nb_samples;
 
     frame->nb_samples = MAX_FRAME_SIZE;
-    ret = ff_get_buffer(avc, frame, 0);
-    if (ret < 0) {
-        av_log(avc, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avc, frame, 0)) < 0)
         return ret;
-    }
 
     if (avc->sample_fmt == AV_SAMPLE_FMT_S16)
         nb_samples = opus_multistream_decode(opus->dec, pkt->data, pkt->size,
@@ -137,6 +150,21 @@ static int libopus_decode(AVCodecContext *avc, void *data,
         return ff_opus_error_to_averror(nb_samples);
     }
 
+#ifndef OPUS_SET_GAIN
+    {
+        int i = avc->channels * nb_samples;
+        if (avc->sample_fmt == AV_SAMPLE_FMT_FLT) {
+            float *pcm = (float *)frame->data[0];
+            for (; i > 0; i--, pcm++)
+                *pcm = av_clipf(*pcm * opus->gain.d, -1, 1);
+        } else {
+            int16_t *pcm = (int16_t *)frame->data[0];
+            for (; i > 0; i--, pcm++)
+                *pcm = av_clip_int16(((int64_t)opus->gain.i * *pcm) >> 16);
+        }
+    }
+#endif
+
     frame->nb_samples = nb_samples;
     *got_frame_ptr    = 1;
 
@@ -148,6 +176,9 @@ static void libopus_flush(AVCodecContext *avc)
     struct libopus_context *opus = avc->priv_data;
 
     opus_multistream_decoder_ctl(opus->dec, OPUS_RESET_STATE);
+    /* The stream can have been extracted by a tool that is not Opus-aware.
+       Therefore, any packet can become the first of the stream. */
+    avc->internal->skip_samples = opus->pre_skip;
 }
 
 AVCodec ff_libopus_decoder = {
diff --git a/libavcodec/libopusenc.c b/libavcodec/libopusenc.c
index 9af8bcd..8ceb877 100644
--- a/libavcodec/libopusenc.c
+++ b/libavcodec/libopusenc.c
@@ -2,20 +2,20 @@
  * Opus encoder using libopus
  * Copyright (c) 2012 Nathan Caldwell
  *
- * This file is part of libav.
+ * This file is part of FFmpeg.
  *
- * libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -65,8 +65,8 @@ static const uint8_t opus_vorbis_channel_map[8][8] = {
     { 0, 6, 1, 2, 3, 4, 5, 7 },
 };
 
-/* libav to libopus channel order mapping, passed to libopus */
-static const uint8_t libav_libopus_channel_map[8][8] = {
+/* libavcodec to libopus channel order mapping, passed to libopus */
+static const uint8_t libavcodec_libopus_channel_map[8][8] = {
     { 0 },
     { 0, 1 },
     { 0, 1, 2 },
@@ -107,6 +107,13 @@ static int libopus_configure_encoder(AVCodecContext *avctx, OpusMSEncoder *enc,
 {
     int ret;
 
+    if (avctx->global_quality) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Quality-based encoding not supported, "
+               "please specify a bitrate and VBR setting.\n");
+        return AVERROR(EINVAL);
+    }
+
     ret = opus_multistream_encoder_ctl(enc, OPUS_SET_BITRATE(avctx->bit_rate));
     if (ret != OPUS_OK) {
         av_log(avctx, AV_LOG_ERROR,
@@ -149,7 +156,7 @@ static int libopus_configure_encoder(AVCodecContext *avctx, OpusMSEncoder *enc,
     return OPUS_OK;
 }
 
-static int av_cold libopus_encode_init(AVCodecContext *avctx)
+static av_cold int libopus_encode_init(AVCodecContext *avctx)
 {
     LibopusEncContext *opus = avctx->priv_data;
     const uint8_t *channel_mapping;
@@ -159,7 +166,7 @@ static int av_cold libopus_encode_init(AVCodecContext *avctx)
 
     coupled_stream_count = opus_coupled_streams[avctx->channels - 1];
     opus->stream_count   = avctx->channels - coupled_stream_count;
-    channel_mapping      = libav_libopus_channel_map[avctx->channels - 1];
+    channel_mapping      = libavcodec_libopus_channel_map[avctx->channels - 1];
 
     /* FIXME: Opus can handle up to 255 channels. However, the mapping for
      * anything greater than 8 is undefined. */
@@ -306,6 +313,7 @@ static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
                               av_get_bytes_per_sample(avctx->sample_fmt);
     uint8_t *audio;
     int ret;
+    int discard_padding;
 
     if (frame) {
         ff_af_queue_add(&opus->afq, frame);
@@ -324,10 +332,8 @@ static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
     /* Maximum packet size taken from opusenc in opus-tools. 60ms packets
      * consist of 3 frames in one packet. The maximum frame size is 1275
      * bytes along with the largest possible packet header of 7 bytes. */
-    if (ret = ff_alloc_packet(avpkt, (1275 * 3 + 7) * opus->stream_count)) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, (1275 * 3 + 7) * opus->stream_count)) < 0)
         return ret;
-    }
 
     if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
         ret = opus_multistream_encode_float(opus->enc, (float *)audio,
@@ -349,12 +355,31 @@ static int libopus_encode(AVCodecContext *avctx, AVPacket *avpkt,
     ff_af_queue_remove(&opus->afq, opus->opts.packet_size,
                        &avpkt->pts, &avpkt->duration);
 
+    discard_padding = opus->opts.packet_size - avpkt->duration;
+    // Check if subtraction resulted in an overflow
+    if ((discard_padding < opus->opts.packet_size) != (avpkt->duration > 0)) {
+        av_free_packet(avpkt);
+        av_free(avpkt);
+        return AVERROR(EINVAL);
+    }
+    if (discard_padding > 0) {
+        uint8_t* side_data = av_packet_new_side_data(avpkt,
+                                                     AV_PKT_DATA_SKIP_SAMPLES,
+                                                     10);
+        if(side_data == NULL) {
+            av_free_packet(avpkt);
+            av_free(avpkt);
+            return AVERROR(ENOMEM);
+        }
+        AV_WL32(side_data + 4, discard_padding);
+    }
+
     *got_packet_ptr = 1;
 
     return 0;
 }
 
-static int av_cold libopus_encode_close(AVCodecContext *avctx)
+static av_cold int libopus_encode_close(AVCodecContext *avctx)
 {
     LibopusEncContext *opus = avctx->priv_data;
 
diff --git a/libavcodec/libschroedinger.c b/libavcodec/libschroedinger.c
index fc9188c..9f0b25c 100644
--- a/libavcodec/libschroedinger.c
+++ b/libavcodec/libschroedinger.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 BBC, Anuradha Suraparaju <asuraparaju at gmail dot com >
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -191,7 +191,10 @@ SchroFrame *ff_create_schro_frame(AVCodecContext *avctx,
     uv_height = y_height >> (SCHRO_FRAME_FORMAT_V_SHIFT(schro_frame_fmt));
 
     p_pic = av_mallocz(sizeof(AVPicture));
-    avpicture_alloc(p_pic, avctx->pix_fmt, y_width, y_height);
+    if (!p_pic || avpicture_alloc(p_pic, avctx->pix_fmt, y_width, y_height) < 0) {
+        av_free(p_pic);
+        return NULL;
+    }
 
     p_frame         = schro_frame_new();
     p_frame->format = schro_frame_fmt;
diff --git a/libavcodec/libschroedinger.h b/libavcodec/libschroedinger.h
index 5481f92..12fe57c 100644
--- a/libavcodec/libschroedinger.h
+++ b/libavcodec/libschroedinger.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 BBC, Anuradha Suraparaju <asuraparaju at gmail dot com >
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/libschroedingerdec.c b/libavcodec/libschroedingerdec.c
index 7e258e3..f20633a 100644
--- a/libavcodec/libschroedingerdec.c
+++ b/libavcodec/libschroedingerdec.c
@@ -2,20 +2,20 @@
  * Dirac decoder support via Schroedinger libraries
  * Copyright (c) 2008 BBC, Anuradha Suraparaju <asuraparaju at gmail dot com >
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,10 +37,6 @@
 #include "internal.h"
 #include "libschroedinger.h"
 
-#undef NDEBUG
-#include <assert.h>
-
-
 #include <schroedinger/schro.h>
 #include <schroedinger/schrodebug.h>
 #include <schroedinger/schrovideoformat.h>
@@ -134,7 +130,7 @@ static SchroBuffer *find_next_parse_unit(SchroParseUnitContext *parse_ctx)
 }
 
 /**
-* Returns Libav chroma format.
+* Returns FFmpeg chroma format.
 */
 static enum AVPixelFormat get_chroma_format(SchroChromaFormat schro_pix_fmt)
 {
@@ -179,7 +175,7 @@ static void libschroedinger_handle_first_access_unit(AVCodecContext *avctx)
 
     p_schro_params->format = schro_decoder_get_video_format(decoder);
 
-    /* Tell Libav about sequence details. */
+    /* Tell FFmpeg about sequence details. */
     if (av_image_check_size(p_schro_params->format->width,
                             p_schro_params->format->height, 0, avctx) < 0) {
         av_log(avctx, AV_LOG_ERROR, "invalid dimensions (%dx%d)\n",
@@ -310,10 +306,10 @@ static int libschroedinger_decode_frame(AVCodecContext *avctx,
     framewithpts = ff_schro_queue_pop(&p_schro_params->dec_frame_queue);
 
     if (framewithpts && framewithpts->frame) {
-        if (ff_get_buffer(avctx, avframe, 0) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Unable to allocate buffer\n");
-            return AVERROR(ENOMEM);
-        }
+        int ret;
+
+        if ((ret = ff_get_buffer(avctx, avframe, 0)) < 0)
+            return ret;
 
         memcpy(avframe->data[0],
                framewithpts->frame->components[0].data,
diff --git a/libavcodec/libschroedingerenc.c b/libavcodec/libschroedingerenc.c
index 3dc1481..294fb06 100644
--- a/libavcodec/libschroedingerenc.c
+++ b/libavcodec/libschroedingerenc.c
@@ -2,20 +2,20 @@
  * Dirac encoder support via Schroedinger libraries
  * Copyright (c) 2008 BBC, Anuradha Suraparaju <asuraparaju at gmail dot com >
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,6 +32,7 @@
 #include <schroedinger/schrovideoformat.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "libschroedinger.h"
@@ -378,10 +379,8 @@ static int libschroedinger_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     pkt_size = p_frame_output->size;
     if (last_frame_in_sequence && p_schro_params->enc_buf_size > 0)
         pkt_size += p_schro_params->enc_buf_size;
-    if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", pkt_size);
+    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
         goto error;
-    }
 
     memcpy(pkt->data, p_frame_output->p_encbuf, p_frame_output->size);
     avctx->coded_frame->key_frame = p_frame_output->key_frame;
diff --git a/libavcodec/libshine.c b/libavcodec/libshine.c
new file mode 100644
index 0000000..48333bb
--- /dev/null
+++ b/libavcodec/libshine.c
@@ -0,0 +1,149 @@
+/*
+ * Interface to libshine for mp3 encoding
+ * Copyright (c) 2012 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <shine/layer3.h>
+
+#include "libavutil/intreadwrite.h"
+#include "audio_frame_queue.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "mpegaudio.h"
+#include "mpegaudiodecheader.h"
+
+#define BUFFER_SIZE (4096 * 20)
+
+typedef struct SHINEContext {
+    shine_config_t  config;
+    shine_t         shine;
+    uint8_t         buffer[BUFFER_SIZE];
+    int             buffer_index;
+    AudioFrameQueue afq;
+} SHINEContext;
+
+static av_cold int libshine_encode_init(AVCodecContext *avctx)
+{
+    SHINEContext *s = avctx->priv_data;
+
+    if (avctx->channels <= 0 || avctx->channels > 2){
+        av_log(avctx, AV_LOG_ERROR, "only mono or stereo is supported\n");
+        return AVERROR(EINVAL);
+    }
+
+    shine_set_config_mpeg_defaults(&s->config.mpeg);
+    if (avctx->bit_rate)
+        s->config.mpeg.bitr = avctx->bit_rate / 1000;
+    s->config.mpeg.mode = avctx->channels == 2 ? STEREO : MONO;
+    s->config.wave.samplerate = avctx->sample_rate;
+    s->config.wave.channels   = avctx->channels == 2 ? PCM_STEREO : PCM_MONO;
+    if (shine_check_config(s->config.wave.samplerate, s->config.mpeg.bitr) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "invalid configuration\n");
+        return AVERROR(EINVAL);
+    }
+    s->shine = shine_initialise(&s->config);
+    if (!s->shine)
+        return AVERROR(ENOMEM);
+    avctx->frame_size = shine_samples_per_pass(s->shine);
+    ff_af_queue_init(avctx, &s->afq);
+    return 0;
+}
+
+static int libshine_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                                 const AVFrame *frame, int *got_packet_ptr)
+{
+    SHINEContext *s = avctx->priv_data;
+    MPADecodeHeader hdr;
+    unsigned char *data;
+    long written;
+    int ret, len;
+
+    if (frame)
+        data = shine_encode_buffer(s->shine, (int16_t **)frame->data, &written);
+    else
+        data = shine_flush(s->shine, &written);
+    if (written < 0)
+        return -1;
+    if (written > 0) {
+        if (s->buffer_index + written > BUFFER_SIZE) {
+            av_log(avctx, AV_LOG_ERROR, "internal buffer too small\n");
+            return AVERROR_BUG;
+        }
+        memcpy(s->buffer + s->buffer_index, data, written);
+        s->buffer_index += written;
+    }
+    if (frame) {
+        if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
+            return ret;
+    }
+
+    if (s->buffer_index < 4 || !s->afq.frame_count)
+        return 0;
+    if (avpriv_mpegaudio_decode_header(&hdr, AV_RB32(s->buffer))) {
+        av_log(avctx, AV_LOG_ERROR, "free format output not supported\n");
+        return -1;
+    }
+
+    len = hdr.frame_size;
+    if (len <= s->buffer_index) {
+        if ((ret = ff_alloc_packet2(avctx, avpkt, len)))
+            return ret;
+        memcpy(avpkt->data, s->buffer, len);
+        s->buffer_index -= len;
+        memmove(s->buffer, s->buffer + len, s->buffer_index);
+
+        ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
+                           &avpkt->duration);
+
+        avpkt->size = len;
+        *got_packet_ptr = 1;
+    }
+    return 0;
+}
+
+static av_cold int libshine_encode_close(AVCodecContext *avctx)
+{
+    SHINEContext *s = avctx->priv_data;
+
+    ff_af_queue_close(&s->afq);
+    shine_close(s->shine);
+    return 0;
+}
+
+static const int libshine_sample_rates[] = {
+    44100, 48000, 32000, 0
+};
+
+AVCodec ff_libshine_encoder = {
+    .name                  = "libshine",
+    .long_name             = NULL_IF_CONFIG_SMALL("libshine MP3 (MPEG audio layer 3)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_MP3,
+    .priv_data_size        = sizeof(SHINEContext),
+    .init                  = libshine_encode_init,
+    .encode2               = libshine_encode_frame,
+    .close                 = libshine_encode_close,
+    .capabilities          = CODEC_CAP_DELAY,
+    .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16P,
+                                                            AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = libshine_sample_rates,
+    .channel_layouts       = (const uint64_t[]) { AV_CH_LAYOUT_MONO,
+                                                  AV_CH_LAYOUT_STEREO,
+                                                  0 },
+};
diff --git a/libavcodec/libspeexdec.c b/libavcodec/libspeexdec.c
index d00696e..5e149a5 100644
--- a/libavcodec/libspeexdec.c
+++ b/libavcodec/libspeexdec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2008 David Conrad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,14 +43,24 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx)
     SpeexHeader *header = NULL;
     int spx_mode;
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
     if (avctx->extradata && avctx->extradata_size >= 80) {
         header = speex_packet_to_header(avctx->extradata,
                                         avctx->extradata_size);
         if (!header)
             av_log(avctx, AV_LOG_WARNING, "Invalid Speex header\n");
     }
-    if (header) {
+    if (avctx->codec_tag == MKTAG('S', 'P', 'X', 'N')) {
+        if (!avctx->extradata || avctx->extradata && avctx->extradata_size < 47) {
+            av_log(avctx, AV_LOG_ERROR, "Missing or invalid extradata.\n");
+            return AVERROR_INVALIDDATA;
+        }
+        if (avctx->extradata[37] != 10) {
+            av_log(avctx, AV_LOG_ERROR, "Unsupported quality mode.\n");
+            return AVERROR_PATCHWELCOME;
+        }
+        spx_mode           = 0;
+    } else if (header) {
+        avctx->sample_rate = header->rate;
         avctx->channels    = header->nb_channels;
         spx_mode           = header->mode;
         speex_header_free(header);
@@ -73,8 +83,9 @@ static av_cold int libspeex_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Unknown Speex mode %d", spx_mode);
         return AVERROR_INVALIDDATA;
     }
-    avctx->sample_rate = 8000 << spx_mode;
     s->frame_size      =  160 << spx_mode;
+    if (!avctx->sample_rate)
+        avctx->sample_rate = 8000 << spx_mode;
 
     if (avctx->channels < 1 || avctx->channels > 2) {
         /* libspeex can handle mono or stereo if initialized as stereo */
@@ -113,13 +124,12 @@ static int libspeex_decode_frame(AVCodecContext *avctx, void *data,
     AVFrame *frame     = data;
     int16_t *output;
     int ret, consumed = 0;
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
     /* get output buffer */
     frame->nb_samples = s->frame_size;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     output = (int16_t *)frame->data[0];
 
     /* if there is not enough data left for the smallest possible frame or the
@@ -149,6 +159,8 @@ static int libspeex_decode_frame(AVCodecContext *avctx, void *data,
 
     *got_frame_ptr = 1;
 
+    if (!avctx->bit_rate)
+        speex_decoder_ctl(s->dec_state, SPEEX_GET_BITRATE, &avctx->bit_rate);
     return consumed;
 }
 
diff --git a/libavcodec/libspeexenc.c b/libavcodec/libspeexenc.c
index 651d7ac..aba4618 100644
--- a/libavcodec/libspeexenc.c
+++ b/libavcodec/libspeexenc.c
@@ -2,20 +2,20 @@
  * Copyright (C) 2009 Justin Ruggles
  * Copyright (c) 2009 Xuggle Incorporated
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -92,6 +92,7 @@
 #include "internal.h"
 #include "audio_frame_queue.h"
 
+/* TODO: Think about converting abr, vad, dtx and such flags to a bit field */
 typedef struct {
     AVClass *class;             ///< AVClass for private options
     SpeexBits bits;             ///< libspeex bitwriter context
@@ -293,10 +294,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     /* write output if all frames for the packet have been encoded */
     if (s->pkt_frame_count == s->frames_per_packet) {
         s->pkt_frame_count = 0;
-        if ((ret = ff_alloc_packet(avpkt, speex_bits_nbytes(&s->bits)))) {
-            av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+        if ((ret = ff_alloc_packet2(avctx, avpkt, speex_bits_nbytes(&s->bits))) < 0)
             return ret;
-        }
         ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
         speex_bits_reset(&s->bits);
 
@@ -335,7 +334,7 @@ static const AVOption options[] = {
     { NULL },
 };
 
-static const AVClass class = {
+static const AVClass speex_class = {
     .class_name = "libspeex",
     .item_name  = av_default_item_name,
     .option     = options,
@@ -364,6 +363,6 @@ AVCodec ff_libspeex_encoder = {
                                            AV_CH_LAYOUT_STEREO,
                                            0 },
     .supported_samplerates = (const int[]){ 8000, 16000, 32000, 0 },
-    .priv_class     = &class,
+    .priv_class     = &speex_class,
     .defaults       = defaults,
 };
diff --git a/libavcodec/libstagefright.cpp b/libavcodec/libstagefright.cpp
new file mode 100644
index 0000000..346cc9c
--- /dev/null
+++ b/libavcodec/libstagefright.cpp
@@ -0,0 +1,590 @@
+/*
+ * Interface to the Android Stagefright library for
+ * H/W accelerated H.264 decoding
+ *
+ * Copyright (C) 2011 Mohamed Naufal
+ * Copyright (C) 2011 Martin Storsjö
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <binder/ProcessState.h>
+#include <media/stagefright/MetaData.h>
+#include <media/stagefright/MediaBufferGroup.h>
+#include <media/stagefright/MediaDebug.h>
+#include <media/stagefright/MediaDefs.h>
+#include <media/stagefright/OMXClient.h>
+#include <media/stagefright/OMXCodec.h>
+#include <utils/List.h>
+#include <new>
+#include <map>
+
+extern "C" {
+#include "avcodec.h"
+#include "libavutil/imgutils.h"
+#include "internal.h"
+}
+
+#define OMX_QCOM_COLOR_FormatYVU420SemiPlanar 0x7FA30C00
+
+using namespace android;
+
+struct Frame {
+    status_t status;
+    size_t size;
+    int64_t time;
+    int key;
+    uint8_t *buffer;
+    AVFrame *vframe;
+};
+
+struct TimeStamp {
+    int64_t pts;
+    int64_t reordered_opaque;
+};
+
+class CustomSource;
+
+struct StagefrightContext {
+    AVCodecContext *avctx;
+    AVBitStreamFilterContext *bsfc;
+    uint8_t* orig_extradata;
+    int orig_extradata_size;
+    sp<MediaSource> *source;
+    List<Frame*> *in_queue, *out_queue;
+    pthread_mutex_t in_mutex, out_mutex;
+    pthread_cond_t condition;
+    pthread_t decode_thread_id;
+
+    Frame *end_frame;
+    bool source_done;
+    volatile sig_atomic_t thread_started, thread_exited, stop_decode;
+
+    AVFrame *prev_frame;
+    std::map<int64_t, TimeStamp> *ts_map;
+    int64_t frame_index;
+
+    uint8_t *dummy_buf;
+    int dummy_bufsize;
+
+    OMXClient *client;
+    sp<MediaSource> *decoder;
+    const char *decoder_component;
+};
+
+class CustomSource : public MediaSource {
+public:
+    CustomSource(AVCodecContext *avctx, sp<MetaData> meta) {
+        s = (StagefrightContext*)avctx->priv_data;
+        source_meta = meta;
+        frame_size  = (avctx->width * avctx->height * 3) / 2;
+        buf_group.add_buffer(new MediaBuffer(frame_size));
+    }
+
+    virtual sp<MetaData> getFormat() {
+        return source_meta;
+    }
+
+    virtual status_t start(MetaData *params) {
+        return OK;
+    }
+
+    virtual status_t stop() {
+        return OK;
+    }
+
+    virtual status_t read(MediaBuffer **buffer,
+                          const MediaSource::ReadOptions *options) {
+        Frame *frame;
+        status_t ret;
+
+        if (s->thread_exited)
+            return ERROR_END_OF_STREAM;
+        pthread_mutex_lock(&s->in_mutex);
+
+        while (s->in_queue->empty())
+            pthread_cond_wait(&s->condition, &s->in_mutex);
+
+        frame = *s->in_queue->begin();
+        ret = frame->status;
+
+        if (ret == OK) {
+            ret = buf_group.acquire_buffer(buffer);
+            if (ret == OK) {
+                memcpy((*buffer)->data(), frame->buffer, frame->size);
+                (*buffer)->set_range(0, frame->size);
+                (*buffer)->meta_data()->clear();
+                (*buffer)->meta_data()->setInt32(kKeyIsSyncFrame,frame->key);
+                (*buffer)->meta_data()->setInt64(kKeyTime, frame->time);
+            } else {
+                av_log(s->avctx, AV_LOG_ERROR, "Failed to acquire MediaBuffer\n");
+            }
+            av_freep(&frame->buffer);
+        }
+
+        s->in_queue->erase(s->in_queue->begin());
+        pthread_mutex_unlock(&s->in_mutex);
+
+        av_freep(&frame);
+        return ret;
+    }
+
+private:
+    MediaBufferGroup buf_group;
+    sp<MetaData> source_meta;
+    StagefrightContext *s;
+    int frame_size;
+};
+
+void* decode_thread(void *arg)
+{
+    AVCodecContext *avctx = (AVCodecContext*)arg;
+    StagefrightContext *s = (StagefrightContext*)avctx->priv_data;
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+    Frame* frame;
+    MediaBuffer *buffer;
+    int32_t w, h;
+    int decode_done = 0;
+    int ret;
+    int src_linesize[3];
+    const uint8_t *src_data[3];
+    int64_t out_frame_index = 0;
+
+    do {
+        buffer = NULL;
+        frame = (Frame*)av_mallocz(sizeof(Frame));
+        if (!frame) {
+            frame         = s->end_frame;
+            frame->status = AVERROR(ENOMEM);
+            decode_done   = 1;
+            s->end_frame  = NULL;
+            goto push_frame;
+        }
+        frame->status = (*s->decoder)->read(&buffer);
+        if (frame->status == OK) {
+            sp<MetaData> outFormat = (*s->decoder)->getFormat();
+            outFormat->findInt32(kKeyWidth , &w);
+            outFormat->findInt32(kKeyHeight, &h);
+            frame->vframe = av_frame_alloc();
+            if (!frame->vframe) {
+                frame->status = AVERROR(ENOMEM);
+                decode_done   = 1;
+                buffer->release();
+                goto push_frame;
+            }
+            ret = ff_get_buffer(avctx, frame->vframe, AV_GET_BUFFER_FLAG_REF);
+            if (ret < 0) {
+                frame->status = ret;
+                decode_done   = 1;
+                buffer->release();
+                goto push_frame;
+            }
+
+            // The OMX.SEC decoder doesn't signal the modified width/height
+            if (s->decoder_component && !strncmp(s->decoder_component, "OMX.SEC", 7) &&
+                (w & 15 || h & 15)) {
+                if (((w + 15)&~15) * ((h + 15)&~15) * 3/2 == buffer->range_length()) {
+                    w = (w + 15)&~15;
+                    h = (h + 15)&~15;
+                }
+            }
+
+            if (!avctx->width || !avctx->height || avctx->width > w || avctx->height > h) {
+                avctx->width  = w;
+                avctx->height = h;
+            }
+
+            src_linesize[0] = av_image_get_linesize(avctx->pix_fmt, w, 0);
+            src_linesize[1] = av_image_get_linesize(avctx->pix_fmt, w, 1);
+            src_linesize[2] = av_image_get_linesize(avctx->pix_fmt, w, 2);
+
+            src_data[0] = (uint8_t*)buffer->data();
+            src_data[1] = src_data[0] + src_linesize[0] * h;
+            src_data[2] = src_data[1] + src_linesize[1] * -(-h>>pix_desc->log2_chroma_h);
+            av_image_copy(frame->vframe->data, frame->vframe->linesize,
+                          src_data, src_linesize,
+                          avctx->pix_fmt, avctx->width, avctx->height);
+
+            buffer->meta_data()->findInt64(kKeyTime, &out_frame_index);
+            if (out_frame_index && s->ts_map->count(out_frame_index) > 0) {
+                frame->vframe->pts = (*s->ts_map)[out_frame_index].pts;
+                frame->vframe->reordered_opaque = (*s->ts_map)[out_frame_index].reordered_opaque;
+                s->ts_map->erase(out_frame_index);
+            }
+            buffer->release();
+            } else if (frame->status == INFO_FORMAT_CHANGED) {
+                if (buffer)
+                    buffer->release();
+                av_free(frame);
+                continue;
+            } else {
+                decode_done = 1;
+            }
+push_frame:
+        while (true) {
+            pthread_mutex_lock(&s->out_mutex);
+            if (s->out_queue->size() >= 10) {
+                pthread_mutex_unlock(&s->out_mutex);
+                usleep(10000);
+                continue;
+            }
+            break;
+        }
+        s->out_queue->push_back(frame);
+        pthread_mutex_unlock(&s->out_mutex);
+    } while (!decode_done && !s->stop_decode);
+
+    s->thread_exited = true;
+
+    return 0;
+}
+
+static av_cold int Stagefright_init(AVCodecContext *avctx)
+{
+    StagefrightContext *s = (StagefrightContext*)avctx->priv_data;
+    sp<MetaData> meta, outFormat;
+    int32_t colorFormat = 0;
+    int ret;
+
+    if (!avctx->extradata || !avctx->extradata_size || avctx->extradata[0] != 1)
+        return -1;
+
+    s->avctx = avctx;
+    s->bsfc  = av_bitstream_filter_init("h264_mp4toannexb");
+    if (!s->bsfc) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot open the h264_mp4toannexb BSF!\n");
+        return -1;
+    }
+
+    s->orig_extradata_size = avctx->extradata_size;
+    s->orig_extradata = (uint8_t*) av_mallocz(avctx->extradata_size +
+                                              FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!s->orig_extradata) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    memcpy(s->orig_extradata, avctx->extradata, avctx->extradata_size);
+
+    meta = new MetaData;
+    if (meta == NULL) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_AVC);
+    meta->setInt32(kKeyWidth, avctx->width);
+    meta->setInt32(kKeyHeight, avctx->height);
+    meta->setData(kKeyAVCC, kTypeAVCC, avctx->extradata, avctx->extradata_size);
+
+    android::ProcessState::self()->startThreadPool();
+
+    s->source    = new sp<MediaSource>();
+    *s->source   = new CustomSource(avctx, meta);
+    s->in_queue  = new List<Frame*>;
+    s->out_queue = new List<Frame*>;
+    s->ts_map    = new std::map<int64_t, TimeStamp>;
+    s->client    = new OMXClient;
+    s->end_frame = (Frame*)av_mallocz(sizeof(Frame));
+    if (s->source == NULL || !s->in_queue || !s->out_queue || !s->client ||
+        !s->ts_map || !s->end_frame) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (s->client->connect() !=  OK) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot connect OMX client\n");
+        ret = -1;
+        goto fail;
+    }
+
+    s->decoder  = new sp<MediaSource>();
+    *s->decoder = OMXCodec::Create(s->client->interface(), meta,
+                                  false, *s->source, NULL,
+                                  OMXCodec::kClientNeedsFramebuffer);
+    if ((*s->decoder)->start() !=  OK) {
+        av_log(avctx, AV_LOG_ERROR, "Cannot start decoder\n");
+        ret = -1;
+        s->client->disconnect();
+        goto fail;
+    }
+
+    outFormat = (*s->decoder)->getFormat();
+    outFormat->findInt32(kKeyColorFormat, &colorFormat);
+    if (colorFormat == OMX_QCOM_COLOR_FormatYVU420SemiPlanar ||
+        colorFormat == OMX_COLOR_FormatYUV420SemiPlanar)
+        avctx->pix_fmt = AV_PIX_FMT_NV21;
+    else if (colorFormat == OMX_COLOR_FormatYCbYCr)
+        avctx->pix_fmt = AV_PIX_FMT_YUYV422;
+    else if (colorFormat == OMX_COLOR_FormatCbYCrY)
+        avctx->pix_fmt = AV_PIX_FMT_UYVY422;
+    else
+        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+
+    outFormat->findCString(kKeyDecoderComponent, &s->decoder_component);
+    if (s->decoder_component)
+        s->decoder_component = av_strdup(s->decoder_component);
+
+    pthread_mutex_init(&s->in_mutex, NULL);
+    pthread_mutex_init(&s->out_mutex, NULL);
+    pthread_cond_init(&s->condition, NULL);
+    return 0;
+
+fail:
+    av_bitstream_filter_close(s->bsfc);
+    av_freep(&s->orig_extradata);
+    av_freep(&s->end_frame);
+    delete s->in_queue;
+    delete s->out_queue;
+    delete s->ts_map;
+    delete s->client;
+    return ret;
+}
+
+static int Stagefright_decode_frame(AVCodecContext *avctx, void *data,
+                                    int *got_frame, AVPacket *avpkt)
+{
+    StagefrightContext *s = (StagefrightContext*)avctx->priv_data;
+    Frame *frame;
+    status_t status;
+    int orig_size = avpkt->size;
+    AVPacket pkt = *avpkt;
+    AVFrame *ret_frame;
+
+    if (!s->thread_started) {
+        pthread_create(&s->decode_thread_id, NULL, &decode_thread, avctx);
+        s->thread_started = true;
+    }
+
+    if (avpkt && avpkt->data) {
+        av_bitstream_filter_filter(s->bsfc, avctx, NULL, &pkt.data, &pkt.size,
+                                   avpkt->data, avpkt->size, avpkt->flags & AV_PKT_FLAG_KEY);
+        avpkt = &pkt;
+    }
+
+    if (!s->source_done) {
+        if(!s->dummy_buf) {
+            s->dummy_buf = (uint8_t*)av_malloc(avpkt->size);
+            if (!s->dummy_buf)
+                return AVERROR(ENOMEM);
+            s->dummy_bufsize = avpkt->size;
+            memcpy(s->dummy_buf, avpkt->data, avpkt->size);
+        }
+
+        frame = (Frame*)av_mallocz(sizeof(Frame));
+        if (avpkt->data) {
+            frame->status  = OK;
+            frame->size    = avpkt->size;
+            frame->key     = avpkt->flags & AV_PKT_FLAG_KEY ? 1 : 0;
+            frame->buffer  = (uint8_t*)av_malloc(avpkt->size);
+            if (!frame->buffer) {
+                av_freep(&frame);
+                return AVERROR(ENOMEM);
+            }
+            uint8_t *ptr = avpkt->data;
+            // The OMX.SEC decoder fails without this.
+            if (avpkt->size == orig_size + avctx->extradata_size) {
+                ptr += avctx->extradata_size;
+                frame->size = orig_size;
+            }
+            memcpy(frame->buffer, ptr, orig_size);
+            if (avpkt == &pkt)
+                av_free(avpkt->data);
+
+            frame->time = ++s->frame_index;
+            (*s->ts_map)[s->frame_index].pts = avpkt->pts;
+            (*s->ts_map)[s->frame_index].reordered_opaque = avctx->reordered_opaque;
+        } else {
+            frame->status  = ERROR_END_OF_STREAM;
+            s->source_done = true;
+        }
+
+        while (true) {
+            if (s->thread_exited) {
+                s->source_done = true;
+                break;
+            }
+            pthread_mutex_lock(&s->in_mutex);
+            if (s->in_queue->size() >= 10) {
+                pthread_mutex_unlock(&s->in_mutex);
+                usleep(10000);
+                continue;
+            }
+            s->in_queue->push_back(frame);
+            pthread_cond_signal(&s->condition);
+            pthread_mutex_unlock(&s->in_mutex);
+            break;
+        }
+    }
+    while (true) {
+        pthread_mutex_lock(&s->out_mutex);
+        if (!s->out_queue->empty()) break;
+        pthread_mutex_unlock(&s->out_mutex);
+        if (s->source_done) {
+            usleep(10000);
+            continue;
+        } else {
+            return orig_size;
+        }
+    }
+
+    frame = *s->out_queue->begin();
+    s->out_queue->erase(s->out_queue->begin());
+    pthread_mutex_unlock(&s->out_mutex);
+
+    ret_frame = frame->vframe;
+    status  = frame->status;
+    av_freep(&frame);
+
+    if (status == ERROR_END_OF_STREAM)
+        return 0;
+    if (status != OK) {
+        if (status == AVERROR(ENOMEM))
+            return status;
+        av_log(avctx, AV_LOG_ERROR, "Decode failed: %x\n", status);
+        return -1;
+    }
+
+    if (s->prev_frame)
+        av_frame_free(&s->prev_frame);
+    s->prev_frame = ret_frame;
+
+    *got_frame = 1;
+    *(AVFrame*)data = *ret_frame;
+    return orig_size;
+}
+
+static av_cold int Stagefright_close(AVCodecContext *avctx)
+{
+    StagefrightContext *s = (StagefrightContext*)avctx->priv_data;
+    Frame *frame;
+
+    if (s->thread_started) {
+        if (!s->thread_exited) {
+            s->stop_decode = 1;
+
+            // Make sure decode_thread() doesn't get stuck
+            pthread_mutex_lock(&s->out_mutex);
+            while (!s->out_queue->empty()) {
+                frame = *s->out_queue->begin();
+                s->out_queue->erase(s->out_queue->begin());
+                if (frame->vframe)
+                    av_frame_free(&frame->vframe);
+                av_freep(&frame);
+            }
+            pthread_mutex_unlock(&s->out_mutex);
+
+            // Feed a dummy frame prior to signalling EOF.
+            // This is required to terminate the decoder(OMX.SEC)
+            // when only one frame is read during stream info detection.
+            if (s->dummy_buf && (frame = (Frame*)av_mallocz(sizeof(Frame)))) {
+                frame->status = OK;
+                frame->size   = s->dummy_bufsize;
+                frame->key    = 1;
+                frame->buffer = s->dummy_buf;
+                pthread_mutex_lock(&s->in_mutex);
+                s->in_queue->push_back(frame);
+                pthread_cond_signal(&s->condition);
+                pthread_mutex_unlock(&s->in_mutex);
+                s->dummy_buf = NULL;
+            }
+
+            pthread_mutex_lock(&s->in_mutex);
+            s->end_frame->status = ERROR_END_OF_STREAM;
+            s->in_queue->push_back(s->end_frame);
+            pthread_cond_signal(&s->condition);
+            pthread_mutex_unlock(&s->in_mutex);
+            s->end_frame = NULL;
+        }
+
+        pthread_join(s->decode_thread_id, NULL);
+
+        if (s->prev_frame)
+            av_frame_free(&s->prev_frame);
+
+        s->thread_started = false;
+    }
+
+    while (!s->in_queue->empty()) {
+        frame = *s->in_queue->begin();
+        s->in_queue->erase(s->in_queue->begin());
+        if (frame->size)
+            av_freep(&frame->buffer);
+        av_freep(&frame);
+    }
+
+    while (!s->out_queue->empty()) {
+        frame = *s->out_queue->begin();
+        s->out_queue->erase(s->out_queue->begin());
+        if (frame->vframe)
+            av_frame_free(&frame->vframe);
+        av_freep(&frame);
+    }
+
+    (*s->decoder)->stop();
+    s->client->disconnect();
+
+    if (s->decoder_component)
+        av_freep(&s->decoder_component);
+    av_freep(&s->dummy_buf);
+    av_freep(&s->end_frame);
+
+    // Reset the extradata back to the original mp4 format, so that
+    // the next invocation (both when decoding and when called from
+    // av_find_stream_info) get the original mp4 format extradata.
+    av_freep(&avctx->extradata);
+    avctx->extradata = s->orig_extradata;
+    avctx->extradata_size = s->orig_extradata_size;
+
+    delete s->in_queue;
+    delete s->out_queue;
+    delete s->ts_map;
+    delete s->client;
+    delete s->decoder;
+    delete s->source;
+
+    pthread_mutex_destroy(&s->in_mutex);
+    pthread_mutex_destroy(&s->out_mutex);
+    pthread_cond_destroy(&s->condition);
+    av_bitstream_filter_close(s->bsfc);
+    return 0;
+}
+
+AVCodec ff_libstagefright_h264_decoder = {
+    "libstagefright_h264",
+    NULL_IF_CONFIG_SMALL("libstagefright H.264"),
+    AVMEDIA_TYPE_VIDEO,
+    AV_CODEC_ID_H264,
+    CODEC_CAP_DELAY,
+    NULL, //supported_framerates
+    NULL, //pix_fmts
+    NULL, //supported_samplerates
+    NULL, //sample_fmts
+    NULL, //channel_layouts
+    0,    //max_lowres
+    NULL, //priv_class
+    NULL, //profiles
+    sizeof(StagefrightContext),
+    NULL, //next
+    NULL, //init_thread_copy
+    NULL, //update_thread_context
+    NULL, //defaults
+    NULL, //init_static_data
+    Stagefright_init,
+    NULL, //encode
+    NULL, //encode2
+    Stagefright_decode_frame,
+    Stagefright_close,
+};
diff --git a/libavcodec/libtheoraenc.c b/libavcodec/libtheoraenc.c
index 75b0a16..4c90822 100644
--- a/libavcodec/libtheoraenc.c
+++ b/libavcodec/libtheoraenc.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2006 Paul Richards <paul.richards@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,7 +30,7 @@
  * and o_ prefixes on variables which are libogg types.
  */
 
-/* Libav includes */
+/* FFmpeg includes */
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/pixdesc.h"
@@ -96,7 +96,7 @@ static int get_stats(AVCodecContext *avctx, int eos)
     bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_OUT, &buf, sizeof(buf));
     if (bytes < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error getting first pass stats\n");
-        return -1;
+        return AVERROR_EXTERNAL;
     }
     if (!eos) {
         h->stats = av_fast_realloc(h->stats, &h->stats_size,
@@ -113,7 +113,7 @@ static int get_stats(AVCodecContext *avctx, int eos)
     return 0;
 #else
     av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n");
-    return -1;
+    return AVERROR(ENOSUP);
 #endif
 }
 
@@ -127,7 +127,7 @@ static int submit_stats(AVCodecContext *avctx)
     if (!h->stats) {
         if (!avctx->stats_in) {
             av_log(avctx, AV_LOG_ERROR, "No statsfile for second pass\n");
-            return -1;
+            return AVERROR(EINVAL);
         }
         h->stats_size = strlen(avctx->stats_in) * 3/4;
         h->stats      = av_malloc(h->stats_size);
@@ -139,7 +139,7 @@ static int submit_stats(AVCodecContext *avctx)
                               h->stats_size - h->stats_offset);
         if (bytes < 0) {
             av_log(avctx, AV_LOG_ERROR, "Error submitting stats\n");
-            return -1;
+            return AVERROR_EXTERNAL;
         }
         if (!bytes)
             return 0;
@@ -148,7 +148,7 @@ static int submit_stats(AVCodecContext *avctx)
     return 0;
 #else
     av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n");
-    return -1;
+    return AVERROR(ENOSUP);
 #endif
 }
 
@@ -160,6 +160,7 @@ static av_cold int encode_init(AVCodecContext* avc_context)
     unsigned int offset;
     TheoraContext *h = avc_context->priv_data;
     uint32_t gop_size = avc_context->gop_size;
+    int ret;
 
     /* Set up the theora_info struct */
     th_info_init(&t_info);
@@ -196,17 +197,16 @@ static av_cold int encode_init(AVCodecContext* avc_context)
         t_info.pixel_fmt = TH_PF_444;
     else {
         av_log(avc_context, AV_LOG_ERROR, "Unsupported pix_fmt\n");
-        return -1;
+        return AVERROR(EINVAL);
     }
-    av_pix_fmt_get_chroma_sub_sample(avc_context->pix_fmt,
-                                     &h->uv_hshift, &h->uv_vshift);
+    avcodec_get_chroma_sub_sample(avc_context->pix_fmt, &h->uv_hshift, &h->uv_vshift);
 
     if (avc_context->flags & CODEC_FLAG_QSCALE) {
-        /* to be constant with the libvorbis implementation, clip global_quality to 0 - 10
-           Theora accepts a quality parameter p, which is:
-                * 0 <= p <=63
-                * an int value
-         */
+        /* Clip global_quality in QP units to the [0 - 10] range
+           to be consistent with the libvorbis implementation.
+           Theora accepts a quality parameter which is an int value in
+           the [0 - 63] range.
+        */
         t_info.quality        = av_clipf(avc_context->global_quality / (float)FF_QP2LAMBDA, 0, 10) * 6.3;
         t_info.target_bitrate = 0;
     } else {
@@ -218,7 +218,7 @@ static av_cold int encode_init(AVCodecContext* avc_context)
     h->t_state = th_encode_alloc(&t_info);
     if (!h->t_state) {
         av_log(avc_context, AV_LOG_ERROR, "theora_encode_init failed\n");
-        return -1;
+        return AVERROR_EXTERNAL;
     }
 
     h->keyframe_mask = (1 << t_info.keyframe_granule_shift) - 1;
@@ -228,16 +228,16 @@ static av_cold int encode_init(AVCodecContext* avc_context)
     if (th_encode_ctl(h->t_state, TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE,
                       &gop_size, sizeof(gop_size))) {
         av_log(avc_context, AV_LOG_ERROR, "Error setting GOP size\n");
-        return -1;
+        return AVERROR_EXTERNAL;
     }
 
     // need to enable 2 pass (via TH_ENCCTL_2PASS_) before encoding headers
     if (avc_context->flags & CODEC_FLAG_PASS1) {
-        if (get_stats(avc_context, 0))
-            return -1;
+        if ((ret = get_stats(avc_context, 0)) < 0)
+            return ret;
     } else if (avc_context->flags & CODEC_FLAG_PASS2) {
-        if (submit_stats(avc_context))
-            return -1;
+        if ((ret = submit_stats(avc_context)) < 0)
+            return ret;
     }
 
     /*
@@ -253,8 +253,8 @@ static av_cold int encode_init(AVCodecContext* avc_context)
     th_comment_init(&t_comment);
 
     while (th_encode_flushheader(h->t_state, &t_comment, &o_packet))
-        if (concatenate_packet(&offset, avc_context, &o_packet))
-            return -1;
+        if ((ret = concatenate_packet(&offset, avc_context, &o_packet)) < 0)
+            return ret;
 
     th_comment_clear(&t_comment);
 
@@ -276,8 +276,8 @@ static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
     if (!frame) {
         th_encode_packetout(h->t_state, 1, &o_packet);
         if (avc_context->flags & CODEC_FLAG_PASS1)
-            if (get_stats(avc_context, 1))
-                return -1;
+            if ((ret = get_stats(avc_context, 1)) < 0)
+                return ret;
         return 0;
     }
 
@@ -290,8 +290,8 @@ static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
     }
 
     if (avc_context->flags & CODEC_FLAG_PASS2)
-        if (submit_stats(avc_context))
-            return -1;
+        if ((ret = submit_stats(avc_context)) < 0)
+            return ret;
 
     /* Now call into theora_encode_YUVin */
     result = th_encode_ycbcr_in(h->t_state, t_yuv_buffer);
@@ -309,12 +309,12 @@ static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
             break;
         }
         av_log(avc_context, AV_LOG_ERROR, "theora_encode_YUVin failed (%s) [%d]\n", message, result);
-        return -1;
+        return AVERROR_EXTERNAL;
     }
 
     if (avc_context->flags & CODEC_FLAG_PASS1)
-        if (get_stats(avc_context, 0))
-            return -1;
+        if ((ret = get_stats(avc_context, 0)) < 0)
+            return ret;
 
     /* Pick up returned ogg_packet */
     result = th_encode_packetout(h->t_state, 0, &o_packet);
@@ -327,14 +327,12 @@ static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt,
         break;
     default:
         av_log(avc_context, AV_LOG_ERROR, "theora_encode_packetout failed [%d]\n", result);
-        return -1;
+        return AVERROR_EXTERNAL;
     }
 
     /* Copy ogg_packet content out to buffer */
-    if ((ret = ff_alloc_packet(pkt, o_packet.bytes)) < 0) {
-        av_log(avc_context, AV_LOG_ERROR, "Error getting output packet of size %ld.\n", o_packet.bytes);
+    if ((ret = ff_alloc_packet2(avc_context, pkt, o_packet.bytes)) < 0)
         return ret;
-    }
     memcpy(pkt->data, o_packet.packet, o_packet.bytes);
 
     // HACK: assumes no encoder delay, this is true until libtheora becomes
diff --git a/libavcodec/libtwolame.c b/libavcodec/libtwolame.c
index def5fee..e26454b 100644
--- a/libavcodec/libtwolame.c
+++ b/libavcodec/libtwolame.c
@@ -2,20 +2,20 @@
  * Interface to libtwolame for mp2 encoding
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -77,6 +77,10 @@ static av_cold int twolame_encode_init(AVCodecContext *avctx)
     twolame_set_num_channels(s->glopts, avctx->channels);
     twolame_set_in_samplerate(s->glopts, avctx->sample_rate);
     twolame_set_out_samplerate(s->glopts, avctx->sample_rate);
+
+    if (!avctx->bit_rate)
+        avctx->bit_rate = avctx->sample_rate < 28000 ? 160000 : 384000;
+
     if (avctx->flags & CODEC_FLAG_QSCALE || !avctx->bit_rate) {
         twolame_set_VBR(s->glopts, TRUE);
         twolame_set_VBR_level(s->glopts,
@@ -102,7 +106,7 @@ static int twolame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     TWOLAMEContext *s = avctx->priv_data;
     int ret;
 
-    if ((ret = ff_alloc_packet(avpkt, MPA_MAX_CODED_FRAME_SIZE)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, MPA_MAX_CODED_FRAME_SIZE)) < 0)
         return ret;
 
     if (frame) {
@@ -190,7 +194,7 @@ static const AVClass twolame_class = {
 };
 
 static const AVCodecDefault twolame_defaults[] = {
-    { "b", "384000" },
+    { "b", "0" },
     { NULL },
 };
 
diff --git a/libavcodec/libutvideo.h b/libavcodec/libutvideo.h
new file mode 100644
index 0000000..5fb1174
--- /dev/null
+++ b/libavcodec/libutvideo.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2011-2012 Derek Buitenhuis
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation;
+ * version 2 of the License.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Known FOURCCs:
+ *     'ULY0' (YCbCr 4:2:0), 'ULY2' (YCbCr 4:2:2), 'ULRG' (RGB), 'ULRA' (RGBA),
+ *     'ULH0' (YCbCr 4:2:0 BT.709), 'ULH2' (YCbCr 4:2:2 BT.709)
+ */
+
+#ifndef AVCODEC_LIBUTVIDEO_H
+#define AVCODEC_LIBUTVIDEO_H
+
+#include <stdlib.h>
+#include <utvideo/utvideo.h>
+#include <utvideo/Codec.h>
+
+/*
+ * Ut Video version 12.0.0 changed the RGB format names and removed
+ * the _WIN names, so if the new names are absent, define them
+ * against the old names so compatibility with pre-v12 versions
+ * is maintained.
+ */
+#if !defined(UTVF_NFCC_BGR_BU)
+#define UTVF_NFCC_BGR_BU UTVF_RGB24_WIN
+#endif
+
+#if !defined(UTVF_NFCC_BGRA_BU)
+#define UTVF_NFCC_BGRA_BU UTVF_RGB32_WIN
+#endif
+
+/*
+ * Ut Video version 13.0.1 introduced new BT.709 variants.
+ * Special-case these and only use them if v13 is detected.
+ */
+#if defined(UTVF_HDYC)
+#define UTV_BT709
+#endif
+
+typedef struct {
+    uint32_t version;
+    uint32_t original_format;
+    uint32_t frameinfo_size;
+    uint32_t flags;
+} UtVideoExtra;
+
+typedef struct {
+    CCodec *codec;
+    unsigned int buf_size;
+    uint8_t *buffer;
+} UtVideoContext;
+
+#endif /* AVCODEC_LIBUTVIDEO_H */
diff --git a/libavcodec/libutvideodec.cpp b/libavcodec/libutvideodec.cpp
new file mode 100644
index 0000000..60dbd15
--- /dev/null
+++ b/libavcodec/libutvideodec.cpp
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2011 Derek Buitenhuis
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation;
+ * version 2 of the License.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Known FOURCCs:
+ *     'ULY0' (YCbCr 4:2:0), 'ULY2' (YCbCr 4:2:2), 'ULRG' (RGB), 'ULRA' (RGBA),
+ *     'ULH0' (YCbCr 4:2:0 BT.709), 'ULH2' (YCbCr 4:2:2 BT.709)
+ */
+
+extern "C" {
+#include "avcodec.h"
+}
+
+#include "libutvideo.h"
+#include "get_bits.h"
+
+static av_cold int utvideo_decode_init(AVCodecContext *avctx)
+{
+    UtVideoContext *utv = (UtVideoContext *)avctx->priv_data;
+    UtVideoExtra info;
+    int format;
+    int begin_ret;
+
+    if (avctx->extradata_size != 4*4) {
+        av_log(avctx, AV_LOG_ERROR, "Extradata size mismatch.\n");
+        return -1;
+    }
+
+    /* Read extradata */
+    info.version = AV_RL32(avctx->extradata);
+    info.original_format = AV_RL32(avctx->extradata + 4);
+    info.frameinfo_size = AV_RL32(avctx->extradata + 8);
+    info.flags = AV_RL32(avctx->extradata + 12);
+
+    /* Pick format based on FOURCC */
+    switch (avctx->codec_tag) {
+#ifdef UTV_BT709
+    case MKTAG('U', 'L', 'H', '0'):
+        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+        avctx->colorspace = AVCOL_SPC_BT709;
+        format = UTVF_YV12;
+        break;
+    case MKTAG('U', 'L', 'H', '2'):
+        avctx->pix_fmt = AV_PIX_FMT_YUYV422;
+        avctx->colorspace = AVCOL_SPC_BT709;
+        format = UTVF_YUY2;
+        break;
+#endif
+    case MKTAG('U', 'L', 'Y', '0'):
+        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+        format = UTVF_YV12;
+        break;
+    case MKTAG('U', 'L', 'Y', '2'):
+        avctx->pix_fmt = AV_PIX_FMT_YUYV422;
+        format = UTVF_YUY2;
+        break;
+    case MKTAG('U', 'L', 'R', 'G'):
+        avctx->pix_fmt = AV_PIX_FMT_BGR24;
+        format = UTVF_NFCC_BGR_BU;
+        break;
+    case MKTAG('U', 'L', 'R', 'A'):
+        avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        format = UTVF_NFCC_BGRA_BU;
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR,
+              "Not a Ut Video FOURCC: %X\n", avctx->codec_tag);
+        return -1;
+    }
+
+    /* Only allocate the buffer once */
+    utv->buf_size = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
+    utv->buffer = (uint8_t *)av_malloc(utv->buf_size * sizeof(uint8_t));
+
+    if (utv->buffer == NULL) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to allocate output buffer.\n");
+        return -1;
+    }
+
+    /* Allocate the output frame */
+    avctx->coded_frame = av_frame_alloc();
+
+    /* Ut Video only supports 8-bit */
+    avctx->bits_per_raw_sample = 8;
+
+    /* Is it interlaced? */
+    avctx->coded_frame->interlaced_frame = info.flags & 0x800 ? 1 : 0;
+
+    /* Apparently Ut Video doesn't store this info... */
+    avctx->coded_frame->top_field_first = 1;
+
+    /*
+     * Create a Ut Video instance. Since the function wants
+     * an "interface name" string, pass it the name of the lib.
+     */
+    utv->codec = CCodec::CreateInstance(UNFCC(avctx->codec_tag), "libavcodec");
+
+    /* Initialize Decoding */
+    begin_ret = utv->codec->DecodeBegin(format, avctx->width, avctx->height,
+                            CBGROSSWIDTH_WINDOWS, &info, sizeof(UtVideoExtra));
+
+    /* Check to see if the decoder initlized properly */
+    if (begin_ret != 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Could not initialize decoder: %d\n", begin_ret);
+        return -1;
+    }
+
+    return 0;
+}
+
+static int utvideo_decode_frame(AVCodecContext *avctx, void *data,
+                                int *got_frame, AVPacket *avpkt)
+{
+    UtVideoContext *utv = (UtVideoContext *)avctx->priv_data;
+    AVFrame *pic = avctx->coded_frame;
+    int w = avctx->width, h = avctx->height;
+
+    /* Set flags */
+    pic->reference = 0;
+    pic->pict_type = AV_PICTURE_TYPE_I;
+    pic->key_frame = 1;
+
+    /* Decode the frame */
+    utv->codec->DecodeFrame(utv->buffer, avpkt->data, true);
+
+    /* Set the output data depending on the colorspace */
+    switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+        pic->linesize[0] = w;
+        pic->linesize[1] = pic->linesize[2] = w / 2;
+        pic->data[0] = utv->buffer;
+        pic->data[2] = utv->buffer + (w * h);
+        pic->data[1] = pic->data[2] + (w * h / 4);
+        break;
+    case AV_PIX_FMT_YUYV422:
+        pic->linesize[0] = w * 2;
+        pic->data[0] = utv->buffer;
+        break;
+    case AV_PIX_FMT_BGR24:
+    case AV_PIX_FMT_RGB32:
+        /* Make the linesize negative, since Ut Video uses bottom-up BGR */
+        pic->linesize[0] = -1 * w * (avctx->pix_fmt == AV_PIX_FMT_BGR24 ? 3 : 4);
+        pic->data[0] = utv->buffer + utv->buf_size + pic->linesize[0];
+        break;
+    }
+
+    *got_frame = 1;
+    av_frame_move_ref((AVFrame*)data, pic);
+
+    return avpkt->size;
+}
+
+static av_cold int utvideo_decode_close(AVCodecContext *avctx)
+{
+    UtVideoContext *utv = (UtVideoContext *)avctx->priv_data;
+
+    /* Free output */
+    av_frame_free(&avctx->coded_frame);
+    av_freep(&utv->buffer);
+
+    /* Finish decoding and clean up the instance */
+    utv->codec->DecodeEnd();
+    CCodec::DeleteInstance(utv->codec);
+
+    return 0;
+}
+
+AVCodec ff_libutvideo_decoder = {
+    "libutvideo",
+    NULL_IF_CONFIG_SMALL("Ut Video"),
+    AVMEDIA_TYPE_VIDEO,
+    AV_CODEC_ID_UTVIDEO,
+    0,    //capabilities
+    NULL, //supported_framerates
+    NULL, //pix_fmts
+    NULL, //supported_samplerates
+    NULL, //sample_fmts
+    NULL, //channel_layouts
+    0,    //max_lowres
+    NULL, //priv_class
+    NULL, //profiles
+    sizeof(UtVideoContext),
+    NULL, //next
+    NULL, //init_thread_copy
+    NULL, //update_thread_context
+    NULL, //defaults
+    NULL, //init_static_data
+    utvideo_decode_init,
+    NULL, //encode
+    NULL, //encode2
+    utvideo_decode_frame,
+    utvideo_decode_close,
+};
diff --git a/libavcodec/libutvideoenc.cpp b/libavcodec/libutvideoenc.cpp
new file mode 100644
index 0000000..f0d5619
--- /dev/null
+++ b/libavcodec/libutvideoenc.cpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2012 Derek Buitenhuis
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation;
+ * version 2 of the License.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Known FOURCCs:
+ *     'ULY0' (YCbCr 4:2:0), 'ULY2' (YCbCr 4:2:2), 'ULRG' (RGB), 'ULRA' (RGBA),
+ *     'ULH0' (YCbCr 4:2:0 BT.709), 'ULH2' (YCbCr 4:2:2 BT.709)
+ */
+
+extern "C" {
+#include "libavutil/avassert.h"
+#include "avcodec.h"
+#include "internal.h"
+}
+
+#include "libutvideo.h"
+#include "put_bits.h"
+
+static av_cold int utvideo_encode_init(AVCodecContext *avctx)
+{
+    UtVideoContext *utv = (UtVideoContext *)avctx->priv_data;
+    UtVideoExtra *info;
+    uint32_t flags, in_format;
+
+    switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+        in_format = UTVF_YV12;
+        avctx->bits_per_coded_sample = 12;
+        if (avctx->colorspace == AVCOL_SPC_BT709)
+            avctx->codec_tag = MKTAG('U', 'L', 'H', '0');
+        else
+            avctx->codec_tag = MKTAG('U', 'L', 'Y', '0');
+        break;
+    case AV_PIX_FMT_YUYV422:
+        in_format = UTVF_YUYV;
+        avctx->bits_per_coded_sample = 16;
+        if (avctx->colorspace == AVCOL_SPC_BT709)
+            avctx->codec_tag = MKTAG('U', 'L', 'H', '2');
+        else
+            avctx->codec_tag = MKTAG('U', 'L', 'Y', '2');
+        break;
+    case AV_PIX_FMT_BGR24:
+        in_format = UTVF_NFCC_BGR_BU;
+        avctx->bits_per_coded_sample = 24;
+        avctx->codec_tag = MKTAG('U', 'L', 'R', 'G');
+        break;
+    case AV_PIX_FMT_RGB32:
+        in_format = UTVF_NFCC_BGRA_BU;
+        avctx->bits_per_coded_sample = 32;
+        avctx->codec_tag = MKTAG('U', 'L', 'R', 'A');
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+
+    /* Check before we alloc anything */
+    if (avctx->prediction_method != 0 && avctx->prediction_method != 2) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid prediction method.\n");
+        return AVERROR(EINVAL);
+    }
+
+    flags = ((avctx->prediction_method + 1) << 8) | (avctx->thread_count - 1);
+
+    avctx->priv_data = utv;
+    avctx->coded_frame = av_frame_alloc();
+
+    /* Alloc extradata buffer */
+    info = (UtVideoExtra *)av_malloc(sizeof(*info));
+
+    if (info == NULL) {
+        av_log(avctx, AV_LOG_ERROR, "Could not allocate extradata buffer.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    /*
+     * We use this buffer to hold the data that Ut Video returns,
+     * since we cannot decode planes separately with it.
+     */
+    utv->buf_size = avpicture_get_size(avctx->pix_fmt,
+                                       avctx->width, avctx->height);
+    utv->buffer = (uint8_t *)av_malloc(utv->buf_size);
+
+    if (utv->buffer == NULL) {
+        av_log(avctx, AV_LOG_ERROR, "Could not allocate output buffer.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    /*
+     * Create a Ut Video instance. Since the function wants
+     * an "interface name" string, pass it the name of the lib.
+     */
+    utv->codec = CCodec::CreateInstance(UNFCC(avctx->codec_tag), "libavcodec");
+
+    /* Initialize encoder */
+    utv->codec->EncodeBegin(in_format, avctx->width, avctx->height,
+                            CBGROSSWIDTH_WINDOWS);
+
+    /* Get extradata from encoder */
+    avctx->extradata_size = utv->codec->EncodeGetExtraDataSize();
+    utv->codec->EncodeGetExtraData(info, avctx->extradata_size, in_format,
+                                   avctx->width, avctx->height,
+                                   CBGROSSWIDTH_WINDOWS);
+    avctx->extradata = (uint8_t *)info;
+
+    /* Set flags */
+    utv->codec->SetState(&flags, sizeof(flags));
+
+    return 0;
+}
+
+static int utvideo_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                                const AVFrame *pic, int *got_packet)
+{
+    UtVideoContext *utv = (UtVideoContext *)avctx->priv_data;
+    int w = avctx->width, h = avctx->height;
+    int ret, rgb_size, i;
+    bool keyframe;
+    uint8_t *y, *u, *v;
+    uint8_t *dst;
+
+    /* Alloc buffer */
+    if ((ret = ff_alloc_packet2(avctx, pkt, utv->buf_size)) < 0)
+        return ret;
+
+    dst = pkt->data;
+
+    /* Move input if needed data into Ut Video friendly buffer */
+    switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_YUV420P:
+        y = utv->buffer;
+        u = y + w * h;
+        v = u + w * h / 4;
+        for (i = 0; i < h; i++) {
+            memcpy(y, pic->data[0] + i * pic->linesize[0], w);
+            y += w;
+        }
+        for (i = 0; i < h / 2; i++) {
+            memcpy(u, pic->data[2] + i * pic->linesize[2], w >> 1);
+            memcpy(v, pic->data[1] + i * pic->linesize[1], w >> 1);
+            u += w >> 1;
+            v += w >> 1;
+        }
+        break;
+    case AV_PIX_FMT_YUYV422:
+        for (i = 0; i < h; i++)
+            memcpy(utv->buffer + i * (w << 1),
+                   pic->data[0] + i * pic->linesize[0], w << 1);
+        break;
+    case AV_PIX_FMT_BGR24:
+    case AV_PIX_FMT_RGB32:
+        /* Ut Video takes bottom-up BGR */
+        rgb_size = avctx->pix_fmt == AV_PIX_FMT_BGR24 ? 3 : 4;
+        for (i = 0; i < h; i++)
+            memcpy(utv->buffer + (h - i - 1) * w * rgb_size,
+                   pic->data[0] + i * pic->linesize[0],
+                   w * rgb_size);
+        break;
+    default:
+        return AVERROR(EINVAL);
+    }
+
+    /* Encode frame */
+    pkt->size = utv->codec->EncodeFrame(dst, &keyframe, utv->buffer);
+
+    if (!pkt->size) {
+        av_log(avctx, AV_LOG_ERROR, "EncodeFrame failed!\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /*
+     * Ut Video is intra-only and every frame is a keyframe,
+     * and the API always returns true. In case something
+     * durastic changes in the future, such as inter support,
+     * assert that this is true.
+     */
+    av_assert2(keyframe == true);
+    avctx->coded_frame->key_frame = 1;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int utvideo_encode_close(AVCodecContext *avctx)
+{
+    UtVideoContext *utv = (UtVideoContext *)avctx->priv_data;
+
+    av_freep(&avctx->coded_frame);
+    av_freep(&avctx->extradata);
+    av_freep(&utv->buffer);
+
+    utv->codec->EncodeEnd();
+    CCodec::DeleteInstance(utv->codec);
+
+    return 0;
+}
+
+AVCodec ff_libutvideo_encoder = {
+    "libutvideo",
+    NULL_IF_CONFIG_SMALL("Ut Video"),
+    AVMEDIA_TYPE_VIDEO,
+    AV_CODEC_ID_UTVIDEO,
+    CODEC_CAP_AUTO_THREADS | CODEC_CAP_LOSSLESS,
+    NULL, /* supported_framerates */
+    (const enum AVPixelFormat[]) {
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUYV422, AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE
+    },
+    NULL, /* supported_samplerates */
+    NULL, /* sample_fmts */
+    NULL, /* channel_layouts */
+    0,    /* max_lowres */
+    NULL, /* priv_class */
+    NULL, /* profiles */
+    sizeof(UtVideoContext),
+    NULL, /* next */
+    NULL, /* init_thread_copy */
+    NULL, /* update_thread_context */
+    NULL, /* defaults */
+    NULL, /* init_static_data */
+    utvideo_encode_init,
+    NULL, /* encode */
+    utvideo_encode_frame,
+    NULL, /* decode */
+    utvideo_encode_close,
+    NULL, /* flush */
+};
diff --git a/libavcodec/libvo-aacenc.c b/libavcodec/libvo-aacenc.c
index 9450792..04f9902 100644
--- a/libavcodec/libvo-aacenc.c
+++ b/libavcodec/libvo-aacenc.c
@@ -2,20 +2,20 @@
  * AAC encoder wrapper
  * Copyright (c) 2010 Martin Storsjo
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -153,10 +153,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             return ret;
     }
 
-    if ((ret = ff_alloc_packet(avpkt, FFMAX(8192, 768 * avctx->channels)))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, FFMAX(8192, 768 * avctx->channels))) < 0)
         return ret;
-    }
 
     input.Buffer  = samples;
     input.Length  = 2 * avctx->channels * avctx->frame_size;
@@ -179,6 +177,13 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     return 0;
 }
 
+/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
+ * failures */
+static const int mpeg4audio_sample_rates[16] = {
+    96000, 88200, 64000, 48000, 44100, 32000,
+    24000, 22050, 16000, 12000, 11025, 8000, 7350
+};
+
 AVCodec ff_libvo_aacenc_encoder = {
     .name           = "libvo_aacenc",
     .long_name      = NULL_IF_CONFIG_SMALL("Android VisualOn AAC (Advanced Audio Coding)"),
@@ -188,6 +193,7 @@ AVCodec ff_libvo_aacenc_encoder = {
     .init           = aac_encode_init,
     .encode2        = aac_encode_frame,
     .close          = aac_encode_close,
+    .supported_samplerates = mpeg4audio_sample_rates,
     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
diff --git a/libavcodec/libvo-amrwbenc.c b/libavcodec/libvo-amrwbenc.c
index b255ba5..4216a41 100644
--- a/libavcodec/libvo-amrwbenc.c
+++ b/libavcodec/libvo-amrwbenc.c
@@ -2,20 +2,20 @@
  * AMR Audio encoder stub
  * Copyright (c) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -45,7 +45,7 @@ static const AVOption options[] = {
     { NULL }
 };
 
-static const AVClass class = {
+static const AVClass amrwb_class = {
     "libvo_amrwbenc", av_default_item_name, options, LIBAVUTIL_VERSION_INT
 };
 
@@ -79,7 +79,7 @@ static av_cold int amr_wb_encode_init(AVCodecContext *avctx)
 {
     AMRWBContext *s = avctx->priv_data;
 
-    if (avctx->sample_rate != 16000) {
+    if (avctx->sample_rate != 16000 && avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
         av_log(avctx, AV_LOG_ERROR, "Only 16000Hz sample rate supported\n");
         return AVERROR(ENOSYS);
     }
@@ -115,10 +115,8 @@ static int amr_wb_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     const int16_t *samples = (const int16_t *)frame->data[0];
     int size, ret;
 
-    if ((ret = ff_alloc_packet(avpkt, MAX_PACKET_SIZE))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, MAX_PACKET_SIZE)) < 0)
         return ret;
-    }
 
     if (s->last_bitrate != avctx->bit_rate) {
         s->mode         = get_wb_bitrate_mode(avctx->bit_rate, avctx);
@@ -150,5 +148,5 @@ AVCodec ff_libvo_amrwbenc_encoder = {
     .close          = amr_wb_encode_close,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
-    .priv_class     = &class,
+    .priv_class     = &amrwb_class,
 };
diff --git a/libavcodec/libvorbisdec.c b/libavcodec/libvorbisdec.c
new file mode 100644
index 0000000..b703b65
--- /dev/null
+++ b/libavcodec/libvorbisdec.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2002 Mark Hills <mark@pogo.org.uk>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <vorbis/vorbisenc.h>
+
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+
+typedef struct OggVorbisDecContext {
+    vorbis_info vi;                     /**< vorbis_info used during init   */
+    vorbis_dsp_state vd;                /**< DSP state used for analysis    */
+    vorbis_block vb;                    /**< vorbis_block used for analysis */
+    vorbis_comment vc;                  /**< VorbisComment info             */
+    ogg_packet op;                      /**< ogg packet                     */
+} OggVorbisDecContext;
+
+static int oggvorbis_decode_init(AVCodecContext *avccontext) {
+    OggVorbisDecContext *context = avccontext->priv_data ;
+    uint8_t *p= avccontext->extradata;
+    int i, hsizes[3];
+    unsigned char *headers[3], *extradata = avccontext->extradata;
+
+    vorbis_info_init(&context->vi) ;
+    vorbis_comment_init(&context->vc) ;
+
+    if(! avccontext->extradata_size || ! p) {
+        av_log(avccontext, AV_LOG_ERROR, "vorbis extradata absent\n");
+        return -1;
+    }
+
+    if(p[0] == 0 && p[1] == 30) {
+        for(i = 0; i < 3; i++){
+            hsizes[i] = bytestream_get_be16((const uint8_t **)&p);
+            headers[i] = p;
+            p += hsizes[i];
+        }
+    } else if(*p == 2) {
+        unsigned int offset = 1;
+        p++;
+        for(i=0; i<2; i++) {
+            hsizes[i] = 0;
+            while((*p == 0xFF) && (offset < avccontext->extradata_size)) {
+                hsizes[i] += 0xFF;
+                offset++;
+                p++;
+            }
+            if(offset >= avccontext->extradata_size - 1) {
+                av_log(avccontext, AV_LOG_ERROR,
+                       "vorbis header sizes damaged\n");
+                return -1;
+            }
+            hsizes[i] += *p;
+            offset++;
+            p++;
+        }
+        hsizes[2] = avccontext->extradata_size - hsizes[0]-hsizes[1]-offset;
+#if 0
+        av_log(avccontext, AV_LOG_DEBUG,
+               "vorbis header sizes: %d, %d, %d, / extradata_len is %d \n",
+               hsizes[0], hsizes[1], hsizes[2], avccontext->extradata_size);
+#endif
+        headers[0] = extradata + offset;
+        headers[1] = extradata + offset + hsizes[0];
+        headers[2] = extradata + offset + hsizes[0] + hsizes[1];
+    } else {
+        av_log(avccontext, AV_LOG_ERROR,
+               "vorbis initial header len is wrong: %d\n", *p);
+        return -1;
+    }
+
+    for(i=0; i<3; i++){
+        context->op.b_o_s= i==0;
+        context->op.bytes = hsizes[i];
+        context->op.packet = headers[i];
+        if(vorbis_synthesis_headerin(&context->vi, &context->vc, &context->op)<0){
+            av_log(avccontext, AV_LOG_ERROR, "%d. vorbis header damaged\n", i+1);
+            return -1;
+        }
+    }
+
+    avccontext->channels = context->vi.channels;
+    avccontext->sample_rate = context->vi.rate;
+    avccontext->sample_fmt = AV_SAMPLE_FMT_S16;
+    avccontext->time_base= (AVRational){1, avccontext->sample_rate};
+
+    vorbis_synthesis_init(&context->vd, &context->vi);
+    vorbis_block_init(&context->vd, &context->vb);
+
+    return 0 ;
+}
+
+
+static inline int conv(int samples, float **pcm, char *buf, int channels) {
+    int i, j;
+    ogg_int16_t *ptr, *data = (ogg_int16_t*)buf ;
+    float *mono ;
+
+    for(i = 0 ; i < channels ; i++){
+        ptr = &data[i];
+        mono = pcm[i] ;
+
+        for(j = 0 ; j < samples ; j++) {
+            *ptr = av_clip_int16(mono[j] * 32767.f);
+            ptr += channels;
+        }
+    }
+
+    return 0 ;
+}
+
+static int oggvorbis_decode_frame(AVCodecContext *avccontext, void *data,
+                        int *got_frame_ptr, AVPacket *avpkt)
+{
+    OggVorbisDecContext *context = avccontext->priv_data ;
+    AVFrame *frame = data;
+    float **pcm ;
+    ogg_packet *op= &context->op;
+    int samples, total_samples, total_bytes;
+    int ret;
+    int16_t *output;
+
+    if(!avpkt->size){
+    //FIXME flush
+        return 0;
+    }
+
+    frame->nb_samples = 8192*4;
+    if ((ret = ff_get_buffer(avccontext, frame, 0)) < 0)
+        return ret;
+    output = (int16_t *)frame->data[0];
+
+
+    op->packet = avpkt->data;
+    op->bytes  = avpkt->size;
+
+//    av_log(avccontext, AV_LOG_DEBUG, "%d %d %d %"PRId64" %"PRId64" %d %d\n", op->bytes, op->b_o_s, op->e_o_s, op->granulepos, op->packetno, buf_size, context->vi.rate);
+
+/*    for(i=0; i<op->bytes; i++)
+      av_log(avccontext, AV_LOG_DEBUG, "%02X ", op->packet[i]);
+    av_log(avccontext, AV_LOG_DEBUG, "\n");*/
+
+    if(vorbis_synthesis(&context->vb, op) == 0)
+        vorbis_synthesis_blockin(&context->vd, &context->vb) ;
+
+    total_samples = 0 ;
+    total_bytes = 0 ;
+
+    while((samples = vorbis_synthesis_pcmout(&context->vd, &pcm)) > 0) {
+        conv(samples, pcm, (char*)output + total_bytes, context->vi.channels) ;
+        total_bytes += samples * 2 * context->vi.channels ;
+        total_samples += samples ;
+        vorbis_synthesis_read(&context->vd, samples) ;
+    }
+
+    frame->nb_samples = total_samples;
+    *got_frame_ptr   = total_samples > 0;
+    return avpkt->size;
+}
+
+
+static int oggvorbis_decode_close(AVCodecContext *avccontext) {
+    OggVorbisDecContext *context = avccontext->priv_data ;
+
+    vorbis_info_clear(&context->vi) ;
+    vorbis_comment_clear(&context->vc) ;
+
+    return 0 ;
+}
+
+
+AVCodec ff_libvorbis_decoder = {
+    .name           = "libvorbis",
+    .long_name      = NULL_IF_CONFIG_SMALL("libvorbis"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_VORBIS,
+    .priv_data_size = sizeof(OggVorbisDecContext),
+    .init           = oggvorbis_decode_init,
+    .decode         = oggvorbis_decode_frame,
+    .close          = oggvorbis_decode_close,
+    .capabilities   = CODEC_CAP_DELAY,
+};
diff --git a/libavcodec/libvorbis.c b/libavcodec/libvorbisenc.c
index 4b4caaa..ed6baa9 100644
--- a/libavcodec/libvorbis.c
+++ b/libavcodec/libvorbisenc.c
@@ -1,42 +1,34 @@
 /*
- * copyright (c) 2002 Mark Hills <mark@pogo.org.uk>
+ * Copyright (c) 2002 Mark Hills <mark@pogo.org.uk>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-/**
- * @file
- * Vorbis encoding support via libvorbisenc.
- * @author Mark Hills <mark@pogo.org.uk>
- */
-
 #include <vorbis/vorbisenc.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/fifo.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
 #include "audio_frame_queue.h"
-#include "bytestream.h"
 #include "internal.h"
 #include "vorbis.h"
 #include "vorbis_parser.h"
 
-#undef NDEBUG
-#include <assert.h>
 
 /* Number of samples the user should send in each call.
  * This value is used because it is the LCD of all possible frame sizes, so
@@ -47,7 +39,7 @@
 
 #define BUFFER_SIZE (1024 * 64)
 
-typedef struct LibvorbisContext {
+typedef struct LibvorbisEncContext {
     AVClass *av_class;                  /**< class for AVOptions            */
     vorbis_info vi;                     /**< vorbis_info used during init   */
     vorbis_dsp_state vd;                /**< DSP state used for analysis    */
@@ -56,14 +48,13 @@ typedef struct LibvorbisContext {
     int eof;                            /**< end-of-file flag               */
     int dsp_initialized;                /**< vd has been initialized        */
     vorbis_comment vc;                  /**< VorbisComment info             */
-    ogg_packet op;                      /**< ogg packet                     */
     double iblock;                      /**< impulse block bias option      */
     VorbisParseContext vp;              /**< parse context to get durations */
     AudioFrameQueue afq;                /**< frame queue for timestamps     */
-} LibvorbisContext;
+} LibvorbisEncContext;
 
 static const AVOption options[] = {
-    { "iblock", "Sets the impulse block bias", offsetof(LibvorbisContext, iblock), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, -15, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
+    { "iblock", "Sets the impulse block bias", offsetof(LibvorbisEncContext, iblock), AV_OPT_TYPE_DOUBLE, { .dbl = 0 }, -15, 0, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
     { NULL }
 };
 
@@ -72,8 +63,12 @@ static const AVCodecDefault defaults[] = {
     { NULL },
 };
 
-static const AVClass class = { "libvorbis", av_default_item_name, options, LIBAVUTIL_VERSION_INT };
-
+static const AVClass vorbis_class = {
+    .class_name = "libvorbis",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
 
 static int vorbis_error_to_averror(int ov_err)
 {
@@ -87,7 +82,7 @@ static int vorbis_error_to_averror(int ov_err)
 
 static av_cold int libvorbis_setup(vorbis_info *vi, AVCodecContext *avctx)
 {
-    LibvorbisContext *s = avctx->priv_data;
+    LibvorbisEncContext *s = avctx->priv_data;
     double cfreq;
     int ret;
 
@@ -117,14 +112,14 @@ static av_cold int libvorbis_setup(vorbis_info *vi, AVCodecContext *avctx)
         /* variable bitrate by estimate, disable slow rate management */
         if (minrate == -1 && maxrate == -1)
             if ((ret = vorbis_encode_ctl(vi, OV_ECTL_RATEMANAGE2_SET, NULL)))
-                goto error;
+                goto error; /* should not happen */
     }
 
     /* cutoff frequency */
     if (avctx->cutoff > 0) {
         cfreq = avctx->cutoff / 1000.0;
         if ((ret = vorbis_encode_ctl(vi, OV_ECTL_LOWPASS_SET, &cfreq)))
-            goto error;
+            goto error; /* should not happen */
     }
 
     /* impulse block bias */
@@ -133,6 +128,35 @@ static av_cold int libvorbis_setup(vorbis_info *vi, AVCodecContext *avctx)
             goto error;
     }
 
+    if (avctx->channels == 3 &&
+            avctx->channel_layout != (AV_CH_LAYOUT_STEREO|AV_CH_FRONT_CENTER) ||
+        avctx->channels == 4 &&
+            avctx->channel_layout != AV_CH_LAYOUT_2_2 &&
+            avctx->channel_layout != AV_CH_LAYOUT_QUAD ||
+        avctx->channels == 5 &&
+            avctx->channel_layout != AV_CH_LAYOUT_5POINT0 &&
+            avctx->channel_layout != AV_CH_LAYOUT_5POINT0_BACK ||
+        avctx->channels == 6 &&
+            avctx->channel_layout != AV_CH_LAYOUT_5POINT1 &&
+            avctx->channel_layout != AV_CH_LAYOUT_5POINT1_BACK ||
+        avctx->channels == 7 &&
+            avctx->channel_layout != (AV_CH_LAYOUT_5POINT1|AV_CH_BACK_CENTER) ||
+        avctx->channels == 8 &&
+            avctx->channel_layout != AV_CH_LAYOUT_7POINT1) {
+        if (avctx->channel_layout) {
+            char name[32];
+            av_get_channel_layout_string(name, sizeof(name), avctx->channels,
+                                         avctx->channel_layout);
+            av_log(avctx, AV_LOG_ERROR, "%s not supported by Vorbis: "
+                                             "output stream will have incorrect "
+                                             "channel layout.\n", name);
+        } else {
+            av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The encoder "
+                                               "will use Vorbis channel layout for "
+                                               "%d channels.\n", avctx->channels);
+        }
+    }
+
     if ((ret = vorbis_encode_setup_init(vi)))
         goto error;
 
@@ -149,7 +173,7 @@ static int xiph_len(int l)
 
 static av_cold int libvorbis_encode_close(AVCodecContext *avctx)
 {
-    LibvorbisContext *s = avctx->priv_data;
+    LibvorbisEncContext *s = avctx->priv_data;
 
     /* notify vorbisenc this is EOF */
     if (s->dsp_initialized)
@@ -159,7 +183,7 @@ static av_cold int libvorbis_encode_close(AVCodecContext *avctx)
     vorbis_dsp_clear(&s->vd);
     vorbis_info_clear(&s->vi);
 
-    av_fifo_free(s->pkt_fifo);
+    av_fifo_freep(&s->pkt_fifo);
     ff_af_queue_close(&s->afq);
     av_freep(&avctx->extradata);
 
@@ -168,7 +192,7 @@ static av_cold int libvorbis_encode_close(AVCodecContext *avctx)
 
 static av_cold int libvorbis_encode_init(AVCodecContext *avctx)
 {
-    LibvorbisContext *s = avctx->priv_data;
+    LibvorbisEncContext *s = avctx->priv_data;
     ogg_packet header, header_comm, header_code;
     uint8_t *p;
     unsigned int offset;
@@ -192,7 +216,8 @@ static av_cold int libvorbis_encode_init(AVCodecContext *avctx)
     }
 
     vorbis_comment_init(&s->vc);
-    vorbis_comment_add_tag(&s->vc, "encoder", LIBAVCODEC_IDENT);
+    if (!(avctx->flags & CODEC_FLAG_BITEXACT))
+        vorbis_comment_add_tag(&s->vc, "encoder", LIBAVCODEC_IDENT);
 
     if ((ret = vorbis_analysis_headerout(&s->vd, &s->vc, &header, &header_comm,
                                          &header_code))) {
@@ -219,7 +244,7 @@ static av_cold int libvorbis_encode_init(AVCodecContext *avctx)
     offset += header_comm.bytes;
     memcpy(&p[offset], header_code.packet, header_code.bytes);
     offset += header_code.bytes;
-    assert(offset == avctx->extradata_size);
+    av_assert0(offset == avctx->extradata_size);
 
     if ((ret = avpriv_vorbis_parse_extradata(avctx, &s->vp)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "invalid extradata\n");
@@ -246,7 +271,7 @@ error:
 static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                                   const AVFrame *frame, int *got_packet_ptr)
 {
-    LibvorbisContext *s = avctx->priv_data;
+    LibvorbisEncContext *s = avctx->priv_data;
     ogg_packet op;
     int ret, duration;
 
@@ -270,7 +295,7 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
             return ret;
     } else {
-        if (!s->eof)
+        if (!s->eof && s->afq.frame_alloc)
             if ((ret = vorbis_analysis_wrote(&s->vd, 0)) < 0) {
                 av_log(avctx, AV_LOG_ERROR, "error in vorbis_analysis_wrote()\n");
                 return vorbis_error_to_averror(ret);
@@ -288,7 +313,7 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         /* add any available packets to the output packet buffer */
         while ((ret = vorbis_bitrate_flushpacket(&s->vd, &op)) == 1) {
             if (av_fifo_space(s->pkt_fifo) < sizeof(ogg_packet) + op.bytes) {
-                av_log(avctx, AV_LOG_ERROR, "packet buffer is too small");
+                av_log(avctx, AV_LOG_ERROR, "packet buffer is too small\n");
                 return AVERROR_BUG;
             }
             av_fifo_generic_write(s->pkt_fifo, &op, sizeof(ogg_packet), NULL);
@@ -310,10 +335,8 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 
     av_fifo_generic_read(s->pkt_fifo, &op, sizeof(ogg_packet), NULL);
 
-    if ((ret = ff_alloc_packet(avpkt, op.bytes))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, op.bytes)) < 0)
         return ret;
-    }
     av_fifo_generic_read(s->pkt_fifo, avpkt->data, op.bytes, NULL);
 
     avpkt->pts = ff_samples_to_time_base(avctx, op.granulepos);
@@ -322,9 +345,12 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     if (duration > 0) {
         /* we do not know encoder delay until we get the first packet from
          * libvorbis, so we have to update the AudioFrameQueue counts */
-        if (!avctx->delay) {
+        if (!avctx->delay && s->afq.frames) {
             avctx->delay              = duration;
-            s->afq.remaining_delay   += duration;
+            av_assert0(!s->afq.remaining_delay);
+            s->afq.frames->duration  += duration;
+            if (s->afq.frames->pts != AV_NOPTS_VALUE)
+                s->afq.frames->pts       -= duration;
             s->afq.remaining_samples += duration;
         }
         ff_af_queue_remove(&s->afq, duration, &avpkt->pts, &avpkt->duration);
@@ -336,16 +362,16 @@ static int libvorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 
 AVCodec ff_libvorbis_encoder = {
     .name           = "libvorbis",
-    .long_name      = NULL_IF_CONFIG_SMALL("libvorbis Vorbis"),
+    .long_name      = NULL_IF_CONFIG_SMALL("libvorbis"),
     .type           = AVMEDIA_TYPE_AUDIO,
     .id             = AV_CODEC_ID_VORBIS,
-    .priv_data_size = sizeof(LibvorbisContext),
+    .priv_data_size = sizeof(LibvorbisEncContext),
     .init           = libvorbis_encode_init,
     .encode2        = libvorbis_encode_frame,
     .close          = libvorbis_encode_close,
     .capabilities   = CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
-    .priv_class     = &class,
+    .priv_class     = &vorbis_class,
     .defaults       = defaults,
 };
diff --git a/libavcodec/libvpx.c b/libavcodec/libvpx.c
index 20f4484..5d29893 100644
--- a/libavcodec/libvpx.c
+++ b/libavcodec/libvpx.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2013 Guillaume Martres <smarter@ubuntu.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -22,14 +22,9 @@
 
 #include "libvpx.h"
 
-int ff_vp9_check_experimental(AVCodecContext *avctx)
+av_cold void ff_vp9_init_static(AVCodec *codec)
 {
-    if (avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL &&
-        (vpx_codec_version_major() < 1 ||
-         (vpx_codec_version_major() == 1 && vpx_codec_version_minor() < 3))) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Non-experimental support of VP9 requires libvpx >= 1.3.0\n");
-        return AVERROR_EXPERIMENTAL;
-    }
-    return 0;
+    if (    vpx_codec_version_major() < 1
+        || (vpx_codec_version_major() == 1 && vpx_codec_version_minor() < 3))
+        codec->capabilities |= CODEC_CAP_EXPERIMENTAL;
 }
diff --git a/libavcodec/libvpx.h b/libavcodec/libvpx.h
index cb1ed09..36a275c 100644
--- a/libavcodec/libvpx.h
+++ b/libavcodec/libvpx.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2013 Guillaume Martres <smarter@ubuntu.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,6 +23,6 @@
 
 #include "avcodec.h"
 
-int ff_vp9_check_experimental(AVCodecContext *avctx);
+void ff_vp9_init_static(AVCodec *codec);
 
 #endif /* AVCODEC_LIBVPX_H */
diff --git a/libavcodec/libvpxdec.c b/libavcodec/libvpxdec.c
index 6052207..94e1e4d 100644
--- a/libavcodec/libvpxdec.c
+++ b/libavcodec/libvpxdec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010, Google, Inc.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -97,7 +97,7 @@ static int vp8_decode(AVCodecContext *avctx,
         }
         if ((ret = ff_get_buffer(avctx, picture, 0)) < 0)
             return ret;
-        av_image_copy(picture->data, picture->linesize, img->planes,
+        av_image_copy(picture->data, picture->linesize, (const uint8_t **)img->planes,
                       img->stride, avctx->pix_fmt, img->d_w, img->d_h);
         *got_frame           = 1;
     }
@@ -133,9 +133,6 @@ AVCodec ff_libvpx_vp8_decoder = {
 #if CONFIG_LIBVPX_VP9_DECODER
 static av_cold int vp9_init(AVCodecContext *avctx)
 {
-    int ret;
-    if ((ret = ff_vp9_check_experimental(avctx)))
-        return ret;
     return vpx_init(avctx, &vpx_codec_vp9_dx_algo);
 }
 
@@ -148,6 +145,7 @@ AVCodec ff_libvpx_vp9_decoder = {
     .init           = vp9_init,
     .close          = vp8_free,
     .decode         = vp8_decode,
-    .capabilities   = CODEC_CAP_AUTO_THREADS,
+    .capabilities   = CODEC_CAP_AUTO_THREADS | CODEC_CAP_DR1,
+    .init_static_data = ff_vp9_init_static,
 };
 #endif /* CONFIG_LIBVPX_VP9_DECODER */
diff --git a/libavcodec/libvpxenc.c b/libavcodec/libvpxenc.c
index 99f8b3e..3dddffd 100644
--- a/libavcodec/libvpxenc.c
+++ b/libavcodec/libvpxenc.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010, Google, Inc.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,9 +30,11 @@
 
 #include "avcodec.h"
 #include "internal.h"
+#include "libavutil/avassert.h"
 #include "libvpx.h"
 #include "libavutil/base64.h"
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/opt.h"
 
@@ -43,11 +45,16 @@
 struct FrameListData {
     void *buf;                       /**< compressed data buffer */
     size_t sz;                       /**< length of compressed data */
+    void *buf_alpha;
+    size_t sz_alpha;
     int64_t pts;                     /**< time stamp to show frame
                                           (in timebase units) */
     unsigned long duration;          /**< duration to show frame
                                           (in timebase units) */
     uint32_t flags;                  /**< flags for this frame */
+    uint64_t sse[4];
+    int have_sse;                    /**< true if we have pending sse[] */
+    uint64_t frame_number;
     struct FrameListData *next;
 };
 
@@ -55,17 +62,40 @@ typedef struct VP8EncoderContext {
     AVClass *class;
     struct vpx_codec_ctx encoder;
     struct vpx_image rawimg;
+    struct vpx_codec_ctx encoder_alpha;
+    struct vpx_image rawimg_alpha;
+    uint8_t is_alpha;
     struct vpx_fixed_buf twopass_stats;
-    unsigned long deadline; //i.e., RT/GOOD/BEST
+    int deadline; //i.e., RT/GOOD/BEST
+    uint64_t sse[4];
+    int have_sse; /**< true if we have pending sse[] */
+    uint64_t frame_number;
     struct FrameListData *coded_frame_list;
+
     int cpu_used;
+    /**
+     * VP8 specific flags, see VP8F_* below.
+     */
+    int flags;
+#define VP8F_ERROR_RESILIENT 0x00000001 ///< Enable measures appropriate for streaming over lossy links
+#define VP8F_AUTO_ALT_REF    0x00000002 ///< Enable automatic alternate reference frame generation
+
     int auto_alt_ref;
+
     int arnr_max_frames;
     int arnr_strength;
     int arnr_type;
+
     int lag_in_frames;
     int error_resilient;
     int crf;
+    int max_intra_rate;
+
+    // VP9-only
+    int lossless;
+    int tile_columns;
+    int tile_rows;
+    int frame_parallel;
 } VP8Context;
 
 /** String mappings for enum vp8e_enc_control_id */
@@ -87,6 +117,13 @@ static const char *const ctlidstr[] = {
     [VP8E_SET_ARNR_STRENGTH]     = "VP8E_SET_ARNR_STRENGTH",
     [VP8E_SET_ARNR_TYPE]         = "VP8E_SET_ARNR_TYPE",
     [VP8E_SET_CQ_LEVEL]          = "VP8E_SET_CQ_LEVEL",
+    [VP8E_SET_MAX_INTRA_BITRATE_PCT] = "VP8E_SET_MAX_INTRA_BITRATE_PCT",
+#if CONFIG_LIBVPX_VP9_ENCODER
+    [VP9E_SET_LOSSLESS]                = "VP9E_SET_LOSSLESS",
+    [VP9E_SET_TILE_COLUMNS]            = "VP9E_SET_TILE_COLUMNS",
+    [VP9E_SET_TILE_ROWS]               = "VP9E_SET_TILE_ROWS",
+    [VP9E_SET_FRAME_PARALLEL_DECODING] = "VP9E_SET_FRAME_PARALLEL_DECODING",
+#endif
 };
 
 static av_cold void log_encoder_error(AVCodecContext *avctx, const char *desc)
@@ -121,7 +158,7 @@ static av_cold void dump_enc_cfg(AVCodecContext *avctx,
            width, "g_lag_in_frames:",   cfg->g_lag_in_frames);
     av_log(avctx, level, "rate control settings\n"
            "  %*s%u\n  %*s%u\n  %*s%u\n  %*s%u\n"
-           "  %*s%d\n  %*s%p(%zu)\n  %*s%u\n",
+           "  %*s%d\n  %*s%p(%"SIZE_SPECIFIER")\n  %*s%u\n",
            width, "rc_dropframe_thresh:",   cfg->rc_dropframe_thresh,
            width, "rc_resize_allowed:",     cfg->rc_resize_allowed,
            width, "rc_resize_up_thresh:",   cfg->rc_resize_up_thresh,
@@ -168,6 +205,8 @@ static void coded_frame_add(void *list, struct FrameListData *cx_frame)
 static av_cold void free_coded_frame(struct FrameListData *cx_frame)
 {
     av_freep(&cx_frame->buf);
+    if (cx_frame->buf_alpha)
+        av_freep(&cx_frame->buf_alpha);
     av_freep(&cx_frame);
 }
 
@@ -208,6 +247,8 @@ static av_cold int vp8_free(AVCodecContext *avctx)
     VP8Context *ctx = avctx->priv_data;
 
     vpx_codec_destroy(&ctx->encoder);
+    if (ctx->is_alpha)
+        vpx_codec_destroy(&ctx->encoder_alpha);
     av_freep(&ctx->twopass_stats.buf);
     av_freep(&avctx->coded_frame);
     av_freep(&avctx->stats_out);
@@ -220,16 +261,28 @@ static av_cold int vpx_init(AVCodecContext *avctx,
 {
     VP8Context *ctx = avctx->priv_data;
     struct vpx_codec_enc_cfg enccfg;
+    struct vpx_codec_enc_cfg enccfg_alpha;
+    vpx_codec_flags_t flags = (avctx->flags & CODEC_FLAG_PSNR) ? VPX_CODEC_USE_PSNR : 0;
     int res;
 
     av_log(avctx, AV_LOG_INFO, "%s\n", vpx_codec_version_str());
     av_log(avctx, AV_LOG_VERBOSE, "%s\n", vpx_codec_build_config());
 
+    if (avctx->pix_fmt == AV_PIX_FMT_YUVA420P)
+        ctx->is_alpha = 1;
+
     if ((res = vpx_codec_enc_config_default(iface, &enccfg, 0)) != VPX_CODEC_OK) {
         av_log(avctx, AV_LOG_ERROR, "Failed to get config: %s\n",
                vpx_codec_err_to_string(res));
         return AVERROR(EINVAL);
     }
+
+    if(!avctx->bit_rate)
+        if(avctx->rc_max_rate || avctx->rc_buffer_size || avctx->rc_initial_buffer_occupancy) {
+            av_log( avctx, AV_LOG_ERROR, "Rate control parameters set without a bitrate\n");
+            return AVERROR(EINVAL);
+        }
+
     dump_enc_cfg(avctx, &enccfg);
 
     enccfg.g_w            = avctx->width;
@@ -237,9 +290,7 @@ static av_cold int vpx_init(AVCodecContext *avctx,
     enccfg.g_timebase.num = avctx->time_base.num;
     enccfg.g_timebase.den = avctx->time_base.den;
     enccfg.g_threads      = avctx->thread_count;
-
-    if (ctx->lag_in_frames >= 0)
-        enccfg.g_lag_in_frames = ctx->lag_in_frames;
+    enccfg.g_lag_in_frames= ctx->lag_in_frames;
 
     if (avctx->flags & CODEC_FLAG_PASS1)
         enccfg.g_pass = VPX_RC_FIRST_PASS;
@@ -248,28 +299,47 @@ static av_cold int vpx_init(AVCodecContext *avctx,
     else
         enccfg.g_pass = VPX_RC_ONE_PASS;
 
-    if (!avctx->bit_rate)
-        avctx->bit_rate = enccfg.rc_target_bitrate * 1000;
-    else
-        enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1, 1000,
-                                              AV_ROUND_NEAR_INF);
-
-    if (ctx->crf)
-        enccfg.rc_end_usage = VPX_CQ;
-    else if (avctx->rc_min_rate == avctx->rc_max_rate &&
-             avctx->rc_min_rate == avctx->bit_rate)
+    if (avctx->rc_min_rate == avctx->rc_max_rate &&
+        avctx->rc_min_rate == avctx->bit_rate && avctx->bit_rate)
         enccfg.rc_end_usage = VPX_CBR;
+    else if (ctx->crf)
+        enccfg.rc_end_usage = VPX_CQ;
 
-    if (avctx->qmin > 0)
+    if (avctx->bit_rate) {
+        enccfg.rc_target_bitrate = av_rescale_rnd(avctx->bit_rate, 1, 1000,
+                                                AV_ROUND_NEAR_INF);
+    } else {
+        if (enccfg.rc_end_usage == VPX_CQ) {
+            enccfg.rc_target_bitrate = 1000000;
+        } else {
+            avctx->bit_rate = enccfg.rc_target_bitrate * 1000;
+            av_log(avctx, AV_LOG_WARNING,
+                   "Neither bitrate nor constrained quality specified, using default bitrate of %dkbit/sec\n",
+                   enccfg.rc_target_bitrate);
+        }
+    }
+
+    if (avctx->qmin >= 0)
         enccfg.rc_min_quantizer = avctx->qmin;
-    if (avctx->qmax > 0)
+    if (avctx->qmax >= 0)
         enccfg.rc_max_quantizer = avctx->qmax;
+
+    if (enccfg.rc_end_usage == VPX_CQ) {
+        if (ctx->crf < enccfg.rc_min_quantizer || ctx->crf > enccfg.rc_max_quantizer) {
+                av_log(avctx, AV_LOG_ERROR,
+                       "CQ level must be between minimum and maximum quantizer value (%d-%d)\n",
+                       enccfg.rc_min_quantizer, enccfg.rc_max_quantizer);
+                return AVERROR(EINVAL);
+        }
+    }
+
     enccfg.rc_dropframe_thresh = avctx->frame_skip_threshold;
 
     //0-100 (0 => CBR, 100 => VBR)
     enccfg.rc_2pass_vbr_bias_pct           = round(avctx->qcompress * 100);
-    enccfg.rc_2pass_vbr_minsection_pct     =
-        avctx->rc_min_rate * 100LL / avctx->bit_rate;
+    if (avctx->bit_rate)
+        enccfg.rc_2pass_vbr_minsection_pct     =
+            avctx->rc_min_rate * 100LL / avctx->bit_rate;
     if (avctx->rc_max_rate)
         enccfg.rc_2pass_vbr_maxsection_pct =
             avctx->rc_max_rate * 100LL / avctx->bit_rate;
@@ -281,6 +351,7 @@ static av_cold int vpx_init(AVCodecContext *avctx,
         enccfg.rc_buf_initial_sz =
             avctx->rc_initial_buffer_occupancy * 1000LL / avctx->bit_rate;
     enccfg.rc_buf_optimal_sz     = enccfg.rc_buf_sz * 5 / 6;
+    enccfg.rc_undershoot_pct     = round(avctx->rc_buffer_aggressivity * 100);
 
     //_enc_init() will balk if kf_min_dist differs from max w/VPX_KF_AUTO
     if (avctx->keyint_min >= 0 && avctx->keyint_min == avctx->gop_size)
@@ -302,7 +373,7 @@ static av_cold int vpx_init(AVCodecContext *avctx,
         ctx->twopass_stats.buf = av_malloc(ctx->twopass_stats.sz);
         if (!ctx->twopass_stats.buf) {
             av_log(avctx, AV_LOG_ERROR,
-                   "Stat buffer alloc (%zu bytes) failed\n",
+                   "Stat buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
                    ctx->twopass_stats.sz);
             return AVERROR(ENOMEM);
         }
@@ -323,20 +394,30 @@ static av_cold int vpx_init(AVCodecContext *avctx,
    if (avctx->profile != FF_PROFILE_UNKNOWN)
        enccfg.g_profile = avctx->profile;
 
-    enccfg.g_error_resilient = ctx->error_resilient;
+    enccfg.g_error_resilient = ctx->error_resilient || ctx->flags & VP8F_ERROR_RESILIENT;
 
     dump_enc_cfg(avctx, &enccfg);
     /* Construct Encoder Context */
-    res = vpx_codec_enc_init(&ctx->encoder, iface, &enccfg, 0);
+    res = vpx_codec_enc_init(&ctx->encoder, iface, &enccfg, flags);
     if (res != VPX_CODEC_OK) {
         log_encoder_error(avctx, "Failed to initialize encoder");
         return AVERROR(EINVAL);
     }
 
+    if (ctx->is_alpha) {
+        enccfg_alpha = enccfg;
+        res = vpx_codec_enc_init(&ctx->encoder_alpha, iface, &enccfg_alpha, flags);
+        if (res != VPX_CODEC_OK) {
+            log_encoder_error(avctx, "Failed to initialize alpha encoder");
+            return AVERROR(EINVAL);
+        }
+    }
+
     //codec control failures are currently treated only as warnings
     av_log(avctx, AV_LOG_DEBUG, "vpx_codec_control\n");
-    if (ctx->cpu_used != INT_MIN)
-        codecctl_int(avctx, VP8E_SET_CPUUSED,          ctx->cpu_used);
+    codecctl_int(avctx, VP8E_SET_CPUUSED,          ctx->cpu_used);
+    if (ctx->flags & VP8F_AUTO_ALT_REF)
+        ctx->auto_alt_ref = 1;
     if (ctx->auto_alt_ref >= 0)
         codecctl_int(avctx, VP8E_SET_ENABLEAUTOALTREF, ctx->auto_alt_ref);
     if (ctx->arnr_max_frames >= 0)
@@ -346,14 +427,36 @@ static av_cold int vpx_init(AVCodecContext *avctx,
     if (ctx->arnr_type >= 0)
         codecctl_int(avctx, VP8E_SET_ARNR_TYPE,        ctx->arnr_type);
     codecctl_int(avctx, VP8E_SET_NOISE_SENSITIVITY, avctx->noise_reduction);
-    codecctl_int(avctx, VP8E_SET_TOKEN_PARTITIONS,  av_log2(avctx->slices));
+    if (avctx->codec_id == AV_CODEC_ID_VP8)
+        codecctl_int(avctx, VP8E_SET_TOKEN_PARTITIONS,  av_log2(avctx->slices));
     codecctl_int(avctx, VP8E_SET_STATIC_THRESHOLD,  avctx->mb_threshold);
     codecctl_int(avctx, VP8E_SET_CQ_LEVEL,          ctx->crf);
+    if (ctx->max_intra_rate >= 0)
+        codecctl_int(avctx, VP8E_SET_MAX_INTRA_BITRATE_PCT, ctx->max_intra_rate);
+
+#if CONFIG_LIBVPX_VP9_ENCODER
+    if (avctx->codec_id == AV_CODEC_ID_VP9) {
+        if (ctx->lossless >= 0)
+            codecctl_int(avctx, VP9E_SET_LOSSLESS, ctx->lossless);
+        if (ctx->tile_columns >= 0)
+            codecctl_int(avctx, VP9E_SET_TILE_COLUMNS, ctx->tile_columns);
+        if (ctx->tile_rows >= 0)
+            codecctl_int(avctx, VP9E_SET_TILE_ROWS, ctx->tile_rows);
+        if (ctx->frame_parallel >= 0)
+            codecctl_int(avctx, VP9E_SET_FRAME_PARALLEL_DECODING, ctx->frame_parallel);
+    }
+#endif
+
+    av_log(avctx, AV_LOG_DEBUG, "Using deadline: %d\n", ctx->deadline);
 
     //provide dummy value to initialize wrapper, values will be updated each _encode()
     vpx_img_wrap(&ctx->rawimg, VPX_IMG_FMT_I420, avctx->width, avctx->height, 1,
                  (unsigned char*)1);
 
+    if (ctx->is_alpha)
+        vpx_img_wrap(&ctx->rawimg_alpha, VPX_IMG_FMT_I420, avctx->width, avctx->height, 1,
+                     (unsigned char*)1);
+
     avctx->coded_frame = av_frame_alloc();
     if (!avctx->coded_frame) {
         av_log(avctx, AV_LOG_ERROR, "Error allocating coded frame\n");
@@ -364,13 +467,39 @@ static av_cold int vpx_init(AVCodecContext *avctx,
 }
 
 static inline void cx_pktcpy(struct FrameListData *dst,
-                             const struct vpx_codec_cx_pkt *src)
+                             const struct vpx_codec_cx_pkt *src,
+                             const struct vpx_codec_cx_pkt *src_alpha,
+                             VP8Context *ctx)
 {
     dst->pts      = src->data.frame.pts;
     dst->duration = src->data.frame.duration;
     dst->flags    = src->data.frame.flags;
     dst->sz       = src->data.frame.sz;
     dst->buf      = src->data.frame.buf;
+    dst->have_sse = 0;
+    /* For alt-ref frame, don't store PSNR or increment frame_number */
+    if (!(dst->flags & VPX_FRAME_IS_INVISIBLE)) {
+        dst->frame_number = ++ctx->frame_number;
+        dst->have_sse = ctx->have_sse;
+        if (ctx->have_sse) {
+            /* associate last-seen SSE to the frame. */
+            /* Transfers ownership from ctx to dst. */
+            /* WARNING! This makes the assumption that PSNR_PKT comes
+               just before the frame it refers to! */
+            memcpy(dst->sse, ctx->sse, sizeof(dst->sse));
+            ctx->have_sse = 0;
+        }
+    } else {
+        dst->frame_number = -1;   /* sanity marker */
+    }
+    if (src_alpha) {
+        dst->buf_alpha = src_alpha->data.frame.buf;
+        dst->sz_alpha = src_alpha->data.frame.sz;
+    }
+    else {
+        dst->buf_alpha = NULL;
+        dst->sz_alpha = 0;
+    }
 }
 
 /**
@@ -383,7 +512,8 @@ static inline void cx_pktcpy(struct FrameListData *dst,
 static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
                       AVPacket *pkt, AVFrame *coded_frame)
 {
-    int ret = ff_alloc_packet(pkt, cx_frame->sz);
+    int ret = ff_alloc_packet2(avctx, pkt, cx_frame->sz);
+    uint8_t *side_data;
     if (ret >= 0) {
         memcpy(pkt->data, cx_frame->buf, pkt->size);
         pkt->pts = pkt->dts    = cx_frame->pts;
@@ -395,9 +525,32 @@ static int storeframe(AVCodecContext *avctx, struct FrameListData *cx_frame,
             pkt->flags            |= AV_PKT_FLAG_KEY;
         } else
             coded_frame->pict_type = AV_PICTURE_TYPE_P;
+
+        if (cx_frame->have_sse) {
+            int i;
+            /* Beware of the Y/U/V/all order! */
+            coded_frame->error[0] = cx_frame->sse[1];
+            coded_frame->error[1] = cx_frame->sse[2];
+            coded_frame->error[2] = cx_frame->sse[3];
+            coded_frame->error[3] = 0;    // alpha
+            for (i = 0; i < 4; ++i) {
+                avctx->error[i] += coded_frame->error[i];
+            }
+            cx_frame->have_sse = 0;
+        }
+        if (cx_frame->sz_alpha > 0) {
+            side_data = av_packet_new_side_data(pkt,
+                                                AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,
+                                                cx_frame->sz_alpha + 8);
+            if(side_data == NULL) {
+                av_free_packet(pkt);
+                av_free(pkt);
+                return AVERROR(ENOMEM);
+            }
+            AV_WB64(side_data, 1);
+            memcpy(side_data + 8, cx_frame->buf_alpha, cx_frame->sz_alpha);
+        }
     } else {
-        av_log(avctx, AV_LOG_ERROR,
-               "Error getting output packet of size %zu.\n", cx_frame->sz);
         return ret;
     }
     return pkt->size;
@@ -416,7 +569,9 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out,
 {
     VP8Context *ctx = avctx->priv_data;
     const struct vpx_codec_cx_pkt *pkt;
+    const struct vpx_codec_cx_pkt *pkt_alpha = NULL;
     const void *iter = NULL;
+    const void *iter_alpha = NULL;
     int size = 0;
 
     if (ctx->coded_frame_list) {
@@ -431,7 +586,9 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out,
 
     /* consume all available output from the encoder before returning. buffers
        are only good through the next vpx_codec call */
-    while ((pkt = vpx_codec_get_cx_data(&ctx->encoder, &iter))) {
+    while ((pkt = vpx_codec_get_cx_data(&ctx->encoder, &iter)) &&
+            (!ctx->is_alpha ||
+             (ctx->is_alpha && (pkt_alpha = vpx_codec_get_cx_data(&ctx->encoder_alpha, &iter_alpha))))) {
         switch (pkt->kind) {
         case VPX_CODEC_CX_FRAME_PKT:
             if (!size) {
@@ -439,8 +596,8 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out,
 
                 /* avoid storing the frame when the list is empty and we haven't yet
                    provided a frame for output */
-                assert(!ctx->coded_frame_list);
-                cx_pktcpy(&cx_frame, pkt);
+                av_assert0(!ctx->coded_frame_list);
+                cx_pktcpy(&cx_frame, pkt, pkt_alpha, ctx);
                 size = storeframe(avctx, &cx_frame, pkt_out, coded_frame);
                 if (size < 0)
                     return size;
@@ -453,16 +610,28 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out,
                            "Frame queue element alloc failed\n");
                     return AVERROR(ENOMEM);
                 }
-                cx_pktcpy(cx_frame, pkt);
+                cx_pktcpy(cx_frame, pkt, pkt_alpha, ctx);
                 cx_frame->buf = av_malloc(cx_frame->sz);
 
                 if (!cx_frame->buf) {
                     av_log(avctx, AV_LOG_ERROR,
-                           "Data buffer alloc (%zu bytes) failed\n",
+                           "Data buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
                            cx_frame->sz);
+                    av_free(cx_frame);
                     return AVERROR(ENOMEM);
                 }
                 memcpy(cx_frame->buf, pkt->data.frame.buf, pkt->data.frame.sz);
+                if (ctx->is_alpha) {
+                    cx_frame->buf_alpha = av_malloc(cx_frame->sz_alpha);
+                    if (!cx_frame->buf_alpha) {
+                        av_log(avctx, AV_LOG_ERROR,
+                               "Data buffer alloc (%"SIZE_SPECIFIER" bytes) failed\n",
+                               cx_frame->sz_alpha);
+                        av_free(cx_frame);
+                        return AVERROR(ENOMEM);
+                    }
+                    memcpy(cx_frame->buf_alpha, pkt_alpha->data.frame.buf, pkt_alpha->data.frame.sz);
+                }
                 coded_frame_add(&ctx->coded_frame_list, cx_frame);
             }
             break;
@@ -481,7 +650,14 @@ static int queue_frames(AVCodecContext *avctx, AVPacket *pkt_out,
             stats->sz += pkt->data.twopass_stats.sz;
             break;
         }
-        case VPX_CODEC_PSNR_PKT: //FIXME add support for CODEC_FLAG_PSNR
+        case VPX_CODEC_PSNR_PKT:
+            av_assert0(!ctx->have_sse);
+            ctx->sse[0] = pkt->data.psnr.sse[0];
+            ctx->sse[1] = pkt->data.psnr.sse[1];
+            ctx->sse[2] = pkt->data.psnr.sse[2];
+            ctx->sse[3] = pkt->data.psnr.sse[3];
+            ctx->have_sse = 1;
+            break;
         case VPX_CODEC_CUSTOM_PKT:
             //ignore unsupported/unrecognized packet types
             break;
@@ -496,6 +672,7 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
 {
     VP8Context *ctx = avctx->priv_data;
     struct vpx_image *rawimg = NULL;
+    struct vpx_image *rawimg_alpha = NULL;
     int64_t timestamp = 0;
     int res, coded_size;
     vpx_enc_frame_flags_t flags = 0;
@@ -508,6 +685,20 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
         rawimg->stride[VPX_PLANE_Y] = frame->linesize[0];
         rawimg->stride[VPX_PLANE_U] = frame->linesize[1];
         rawimg->stride[VPX_PLANE_V] = frame->linesize[2];
+        if (ctx->is_alpha) {
+            uint8_t *u_plane, *v_plane;
+            rawimg_alpha = &ctx->rawimg_alpha;
+            rawimg_alpha->planes[VPX_PLANE_Y] = frame->data[3];
+            u_plane = av_malloc(frame->linesize[1] * frame->height);
+            memset(u_plane, 0x80, frame->linesize[1] * frame->height);
+            rawimg_alpha->planes[VPX_PLANE_U] = u_plane;
+            v_plane = av_malloc(frame->linesize[2] * frame->height);
+            memset(v_plane, 0x80, frame->linesize[2] * frame->height);
+            rawimg_alpha->planes[VPX_PLANE_V] = v_plane;
+            rawimg_alpha->stride[VPX_PLANE_Y] = frame->linesize[0];
+            rawimg_alpha->stride[VPX_PLANE_U] = frame->linesize[1];
+            rawimg_alpha->stride[VPX_PLANE_V] = frame->linesize[2];
+        }
         timestamp                   = frame->pts;
         if (frame->pict_type == AV_PICTURE_TYPE_I)
             flags |= VPX_EFLAG_FORCE_KF;
@@ -519,6 +710,16 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
         log_encoder_error(avctx, "Error encoding frame");
         return AVERROR_INVALIDDATA;
     }
+
+    if (ctx->is_alpha) {
+        res = vpx_codec_encode(&ctx->encoder_alpha, rawimg_alpha, timestamp,
+                               avctx->ticks_per_frame, flags, ctx->deadline);
+        if (res != VPX_CODEC_OK) {
+            log_encoder_error(avctx, "Error encoding alpha frame");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
     coded_size = queue_frames(avctx, pkt, avctx->coded_frame);
 
     if (!frame && avctx->flags & CODEC_FLAG_PASS1) {
@@ -534,39 +735,81 @@ static int vp8_encode(AVCodecContext *avctx, AVPacket *pkt,
                          ctx->twopass_stats.sz);
     }
 
+    if (rawimg_alpha) {
+        av_free(rawimg_alpha->planes[VPX_PLANE_U]);
+        av_free(rawimg_alpha->planes[VPX_PLANE_V]);
+    }
+
     *got_packet = !!coded_size;
     return 0;
 }
 
 #define OFFSET(x) offsetof(VP8Context, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
-static const AVOption options[] = {
-    { "cpu-used",        "Quality/Speed ratio modifier",           OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1}, INT_MIN, INT_MAX, VE},
-    { "auto-alt-ref",    "Enable use of alternate reference "
-                         "frames (2-pass only)",                   OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1},      -1,      1,       VE},
-    { "lag-in-frames",   "Number of frames to look ahead for "
-                         "alternate reference frame selection",    OFFSET(lag_in_frames),   AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE},
-    { "arnr-maxframes",  "altref noise reduction max frame count", OFFSET(arnr_max_frames), AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE},
-    { "arnr-strength",   "altref noise reduction filter strength", OFFSET(arnr_strength),   AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE},
-    { "arnr-type",       "altref noise reduction filter type",     OFFSET(arnr_type),       AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE, "arnr_type"},
-    { "backward",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, VE, "arnr_type" },
-    { "forward",         NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 0, VE, "arnr_type" },
-    { "centered",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 3}, 0, 0, VE, "arnr_type" },
-    { "deadline",        "Time to spend encoding, in microseconds.", OFFSET(deadline),      AV_OPT_TYPE_INT, {.i64 = VPX_DL_GOOD_QUALITY}, INT_MIN, INT_MAX, VE, "quality"},
-    { "best",            NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VPX_DL_BEST_QUALITY}, 0, 0, VE, "quality"},
-    { "good",            NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VPX_DL_GOOD_QUALITY}, 0, 0, VE, "quality"},
-    { "realtime",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VPX_DL_REALTIME},     0, 0, VE, "quality"},
-    { "error-resilient", "Error resilience configuration", OFFSET(error_resilient), AV_OPT_TYPE_FLAGS, {.i64 = 0}, INT_MIN, INT_MAX, VE, "er"},
-#ifdef VPX_ERROR_RESILIENT_DEFAULT
-    { "default",         "Improve resiliency against losses of whole frames", 0, AV_OPT_TYPE_CONST, {.i64 = VPX_ERROR_RESILIENT_DEFAULT}, 0, 0, VE, "er"},
-    { "partitions",      "The frame partitions are independently decodable "
-                         "by the bool decoder, meaning that partitions can be decoded even "
-                         "though earlier partitions have been lost. Note that intra predicition"
-                         " is still done over the partition boundary.",       0, AV_OPT_TYPE_CONST, {.i64 = VPX_ERROR_RESILIENT_PARTITIONS}, 0, 0, VE, "er"},
+
+#ifndef VPX_ERROR_RESILIENT_DEFAULT
+#define VPX_ERROR_RESILIENT_DEFAULT 1
+#define VPX_ERROR_RESILIENT_PARTITIONS 2
 #endif
-    { "crf",              "Select the quality for constant quality mode", offsetof(VP8Context, crf), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 63, VE },
+
+#define COMMON_OPTIONS \
+    { "cpu-used",        "Quality/Speed ratio modifier",           OFFSET(cpu_used),        AV_OPT_TYPE_INT, {.i64 = 1},       -16,     16,      VE}, \
+    { "auto-alt-ref",    "Enable use of alternate reference " \
+                         "frames (2-pass only)",                   OFFSET(auto_alt_ref),    AV_OPT_TYPE_INT, {.i64 = -1},      -1,      1,       VE}, \
+    { "lag-in-frames",   "Number of frames to look ahead for " \
+                         "alternate reference frame selection",    OFFSET(lag_in_frames),   AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE}, \
+    { "arnr-maxframes",  "altref noise reduction max frame count", OFFSET(arnr_max_frames), AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE}, \
+    { "arnr-strength",   "altref noise reduction filter strength", OFFSET(arnr_strength),   AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE}, \
+    { "arnr-type",       "altref noise reduction filter type",     OFFSET(arnr_type),       AV_OPT_TYPE_INT, {.i64 = -1},      -1,      INT_MAX, VE, "arnr_type"}, \
+    { "backward",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1}, 0, 0, VE, "arnr_type" }, \
+    { "forward",         NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2}, 0, 0, VE, "arnr_type" }, \
+    { "centered",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 3}, 0, 0, VE, "arnr_type" }, \
+    { "deadline",        "Time to spend encoding, in microseconds.", OFFSET(deadline),      AV_OPT_TYPE_INT, {.i64 = VPX_DL_GOOD_QUALITY}, INT_MIN, INT_MAX, VE, "quality"}, \
+    { "best",            NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VPX_DL_BEST_QUALITY}, 0, 0, VE, "quality"}, \
+    { "good",            NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VPX_DL_GOOD_QUALITY}, 0, 0, VE, "quality"}, \
+    { "realtime",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = VPX_DL_REALTIME},     0, 0, VE, "quality"}, \
+    { "error-resilient", "Error resilience configuration", OFFSET(error_resilient), AV_OPT_TYPE_FLAGS, {.i64 = 0}, INT_MIN, INT_MAX, VE, "er"}, \
+    { "max-intra-rate",  "Maximum I-frame bitrate (pct) 0=unlimited",  OFFSET(max_intra_rate),  AV_OPT_TYPE_INT,  {.i64 = -1}, -1,      INT_MAX, VE}, \
+    { "default",         "Improve resiliency against losses of whole frames", 0, AV_OPT_TYPE_CONST, {.i64 = VPX_ERROR_RESILIENT_DEFAULT}, 0, 0, VE, "er"}, \
+    { "partitions",      "The frame partitions are independently decodable " \
+                         "by the bool decoder, meaning that partitions can be decoded even " \
+                         "though earlier partitions have been lost. Note that intra predicition" \
+                         " is still done over the partition boundary.",       0, AV_OPT_TYPE_CONST, {.i64 = VPX_ERROR_RESILIENT_PARTITIONS}, 0, 0, VE, "er"}, \
+    { "crf",              "Select the quality for constant quality mode", offsetof(VP8Context, crf), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 63, VE }, \
+
+#define LEGACY_OPTIONS \
+    {"speed", "", offsetof(VP8Context, cpu_used), AV_OPT_TYPE_INT, {.i64 = 1}, -16, 16, VE}, \
+    {"quality", "", offsetof(VP8Context, deadline), AV_OPT_TYPE_INT, {.i64 = VPX_DL_GOOD_QUALITY}, INT_MIN, INT_MAX, VE, "quality"}, \
+    {"vp8flags", "", offsetof(VP8Context, flags), FF_OPT_TYPE_FLAGS, {.i64 = 0}, 0, UINT_MAX, VE, "flags"}, \
+    {"error_resilient", "enable error resilience", 0, FF_OPT_TYPE_CONST, {.dbl = VP8F_ERROR_RESILIENT}, INT_MIN, INT_MAX, VE, "flags"}, \
+    {"altref", "enable use of alternate reference frames (VP8/2-pass only)", 0, FF_OPT_TYPE_CONST, {.dbl = VP8F_AUTO_ALT_REF}, INT_MIN, INT_MAX, VE, "flags"}, \
+    {"arnr_max_frames", "altref noise reduction max frame count", offsetof(VP8Context, arnr_max_frames), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 15, VE}, \
+    {"arnr_strength", "altref noise reduction filter strength", offsetof(VP8Context, arnr_strength), AV_OPT_TYPE_INT, {.i64 = 3}, 0, 6, VE}, \
+    {"arnr_type", "altref noise reduction filter type", offsetof(VP8Context, arnr_type), AV_OPT_TYPE_INT, {.i64 = 3}, 1, 3, VE}, \
+    {"rc_lookahead", "Number of frames to look ahead for alternate reference frame selection", offsetof(VP8Context, lag_in_frames), AV_OPT_TYPE_INT, {.i64 = 25}, 0, 25, VE}, \
+
+#if CONFIG_LIBVPX_VP8_ENCODER
+static const AVOption vp8_options[] = {
+    COMMON_OPTIONS
+    LEGACY_OPTIONS
     { NULL }
 };
+#endif
+
+#if CONFIG_LIBVPX_VP9_ENCODER
+static const AVOption vp9_options[] = {
+    COMMON_OPTIONS
+    { "lossless",        "Lossless mode",                               OFFSET(lossless),        AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, VE},
+    { "tile-columns",    "Number of tile columns to use, log2",         OFFSET(tile_columns),    AV_OPT_TYPE_INT, {.i64 = -1}, -1, 6, VE},
+    { "tile-rows",       "Number of tile rows to use, log2",            OFFSET(tile_rows),       AV_OPT_TYPE_INT, {.i64 = -1}, -1, 2, VE},
+    { "frame-parallel",  "Enable frame parallel decodability features", OFFSET(frame_parallel),  AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, VE},
+    LEGACY_OPTIONS
+    { NULL }
+};
+#endif
+
+#undef COMMON_OPTIONS
+#undef LEGACY_OPTIONS
 
 static const AVCodecDefault defaults[] = {
     { "qmin",             "-1" },
@@ -583,9 +826,9 @@ static av_cold int vp8_init(AVCodecContext *avctx)
 }
 
 static const AVClass class_vp8 = {
-    .class_name = "libvpx encoder",
+    .class_name = "libvpx-vp8 encoder",
     .item_name  = av_default_item_name,
-    .option     = options,
+    .option     = vp8_options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
@@ -599,7 +842,7 @@ AVCodec ff_libvpx_vp8_encoder = {
     .encode2        = vp8_encode,
     .close          = vp8_free,
     .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
-    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_NONE },
     .priv_class     = &class_vp8,
     .defaults       = defaults,
 };
@@ -608,16 +851,13 @@ AVCodec ff_libvpx_vp8_encoder = {
 #if CONFIG_LIBVPX_VP9_ENCODER
 static av_cold int vp9_init(AVCodecContext *avctx)
 {
-    int ret;
-    if ((ret = ff_vp9_check_experimental(avctx)))
-        return ret;
     return vpx_init(avctx, &vpx_codec_vp9_cx_algo);
 }
 
 static const AVClass class_vp9 = {
-    .class_name = "libvpx encoder",
+    .class_name = "libvpx-vp9 encoder",
     .item_name  = av_default_item_name,
-    .option     = options,
+    .option     = vp9_options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
@@ -634,5 +874,6 @@ AVCodec ff_libvpx_vp9_encoder = {
     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
     .priv_class     = &class_vp9,
     .defaults       = defaults,
+    .init_static_data = ff_vp9_init_static,
 };
 #endif /* CONFIG_LIBVPX_VP9_ENCODER */
diff --git a/libavcodec/libwavpackenc.c b/libavcodec/libwavpackenc.c
index 34ec013..77d98a2 100644
--- a/libavcodec/libwavpackenc.c
+++ b/libavcodec/libwavpackenc.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/libwebpenc.c b/libavcodec/libwebpenc.c
index b981f48..5283da5 100644
--- a/libavcodec/libwebpenc.c
+++ b/libavcodec/libwebpenc.c
@@ -2,20 +2,20 @@
  * WebP encoding support via libwebp
  * Copyright (c) 2013 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 4f44a06..edf6fc6 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -2,23 +2,24 @@
  * H.264 encoding using the x264 library
  * Copyright (C) 2005  Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/eval.h"
 #include "libavutil/internal.h"
 #include "libavutil/opt.h"
 #include "libavutil/mem.h"
@@ -48,7 +49,10 @@ typedef struct X264Context {
     char *preset;
     char *tune;
     char *profile;
+    char *level;
     int fastfirstpass;
+    char *wpredp;
+    char *x264opts;
     float crf;
     float crf_max;
     int cqp;
@@ -108,16 +112,21 @@ static int encode_nals(AVCodecContext *ctx, AVPacket *pkt,
     for (i = 0; i < nnal; i++)
         size += nals[i].i_payload;
 
-    if ((ret = ff_alloc_packet(pkt, size)) < 0)
+    if ((ret = ff_alloc_packet2(ctx, pkt, size)) < 0)
         return ret;
 
     p = pkt->data;
 
     /* Write the SEI as part of the first frame. */
     if (x4->sei_size > 0 && nnal > 0) {
+        if (x4->sei_size > size) {
+            av_log(ctx, AV_LOG_ERROR, "Error: nal buffer is too small\n");
+            return -1;
+        }
         memcpy(p, x4->sei, x4->sei_size);
         p += x4->sei_size;
         x4->sei_size = 0;
+        av_freep(&x4->sei);
     }
 
     for (i = 0; i < nnal; i++){
@@ -128,23 +137,42 @@ static int encode_nals(AVCodecContext *ctx, AVPacket *pkt,
     return 1;
 }
 
+static int avfmt2_num_planes(int avfmt)
+{
+    switch (avfmt) {
+    case AV_PIX_FMT_YUV420P:
+    case AV_PIX_FMT_YUVJ420P:
+    case AV_PIX_FMT_YUV420P9:
+    case AV_PIX_FMT_YUV420P10:
+    case AV_PIX_FMT_YUV444P:
+        return 3;
+
+    case AV_PIX_FMT_BGR24:
+    case AV_PIX_FMT_RGB24:
+        return 1;
+
+    default:
+        return 3;
+    }
+}
+
 static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                       int *got_packet)
 {
     X264Context *x4 = ctx->priv_data;
     x264_nal_t *nal;
     int nnal, i, ret;
-    x264_picture_t pic_out;
+    x264_picture_t pic_out = {0};
     AVFrameSideData *side_data;
 
     x264_picture_init( &x4->pic );
     x4->pic.img.i_csp   = x4->params.i_csp;
     if (x264_bit_depth > 8)
         x4->pic.img.i_csp |= X264_CSP_HIGH_DEPTH;
-    x4->pic.img.i_plane = 3;
+    x4->pic.img.i_plane = avfmt2_num_planes(ctx->pix_fmt);
 
     if (frame) {
-        for (i = 0; i < 3; i++) {
+        for (i = 0; i < x4->pic.img.i_plane; i++) {
             x4->pic.img.plane[i]    = frame->data[i];
             x4->pic.img.i_stride[i] = frame->linesize[i];
         }
@@ -155,7 +183,7 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
             frame->pict_type == AV_PICTURE_TYPE_P ? X264_TYPE_P :
             frame->pict_type == AV_PICTURE_TYPE_B ? X264_TYPE_B :
                                             X264_TYPE_AUTO;
-        if (x4->params.b_tff != frame->top_field_first) {
+        if (x4->params.b_interlaced && x4->params.b_tff != frame->top_field_first) {
             x4->params.b_tff = frame->top_field_first;
             x264_encoder_reconfig(x4->enc, &x4->params);
         }
@@ -187,6 +215,7 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
         }
 
         if (x4->params.rc.i_rc_method == X264_RC_CQP &&
+            x4->cqp >= 0 &&
             x4->params.rc.i_qp_constant != x4->cqp) {
             x4->params.rc.i_qp_constant = x4->cqp;
             x264_encoder_reconfig(x4->enc, &x4->params);
@@ -282,6 +311,20 @@ static av_cold int X264_close(AVCodecContext *avctx)
     return 0;
 }
 
+#define OPT_STR(opt, param)                                                   \
+    do {                                                                      \
+        int ret;                                                              \
+        if (param!=NULL && (ret = x264_param_parse(&x4->params, opt, param)) < 0) { \
+            if(ret == X264_PARAM_BAD_NAME)                                    \
+                av_log(avctx, AV_LOG_ERROR,                                   \
+                        "bad option '%s': '%s'\n", opt, param);               \
+            else                                                              \
+                av_log(avctx, AV_LOG_ERROR,                                   \
+                        "bad value for '%s': '%s'\n", opt, param);            \
+            return -1;                                                        \
+        }                                                                     \
+    } while (0)
+
 static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
 {
     switch (pix_fmt) {
@@ -290,10 +333,19 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
     case AV_PIX_FMT_YUV420P9:
     case AV_PIX_FMT_YUV420P10: return X264_CSP_I420;
     case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUVJ422P:
     case AV_PIX_FMT_YUV422P10: return X264_CSP_I422;
     case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUVJ444P:
     case AV_PIX_FMT_YUV444P9:
     case AV_PIX_FMT_YUV444P10: return X264_CSP_I444;
+#ifdef X264_CSP_BGR
+    case AV_PIX_FMT_BGR24:
+        return X264_CSP_BGR;
+
+    case AV_PIX_FMT_RGB24:
+        return X264_CSP_RGB;
+#endif
     case AV_PIX_FMT_NV12:      return X264_CSP_NV12;
     case AV_PIX_FMT_NV16:
     case AV_PIX_FMT_NV20:      return X264_CSP_NV16;
@@ -310,14 +362,29 @@ static int convert_pix_fmt(enum AVPixelFormat pix_fmt)
 static av_cold int X264_init(AVCodecContext *avctx)
 {
     X264Context *x4 = avctx->priv_data;
+    int sw,sh;
+
+    if (avctx->global_quality > 0)
+        av_log(avctx, AV_LOG_WARNING, "-qscale is ignored, -crf is recommended.\n");
 
     x264_param_default(&x4->params);
 
     x4->params.b_deblocking_filter         = avctx->flags & CODEC_FLAG_LOOP_FILTER;
 
+    x4->params.rc.f_pb_factor             = avctx->b_quant_factor;
+    x4->params.analyse.i_chroma_qp_offset = avctx->chromaoffset;
     if (x4->preset || x4->tune)
         if (x264_param_default_preset(&x4->params, x4->preset, x4->tune) < 0) {
+            int i;
             av_log(avctx, AV_LOG_ERROR, "Error setting preset/tune %s/%s.\n", x4->preset, x4->tune);
+            av_log(avctx, AV_LOG_INFO, "Possible presets:");
+            for (i = 0; x264_preset_names[i]; i++)
+                av_log(avctx, AV_LOG_INFO, " %s", x264_preset_names[i]);
+            av_log(avctx, AV_LOG_INFO, "\n");
+            av_log(avctx, AV_LOG_INFO, "Possible tunes:");
+            for (i = 0; x264_tune_names[i]; i++)
+                av_log(avctx, AV_LOG_INFO, " %s", x264_tune_names[i]);
+            av_log(avctx, AV_LOG_INFO, "\n");
             return AVERROR(EINVAL);
         }
 
@@ -329,6 +396,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.i_log_level          = X264_LOG_DEBUG;
     x4->params.i_csp                = convert_pix_fmt(avctx->pix_fmt);
 
+    OPT_STR("weightp", x4->wpredp);
+
     if (avctx->bit_rate) {
         x4->params.rc.i_bitrate   = avctx->bit_rate / 1000;
         x4->params.rc.i_rc_method = X264_RC_ABR;
@@ -357,10 +426,10 @@ static av_cold int X264_init(AVCodecContext *avctx)
             (float)avctx->rc_initial_buffer_occupancy / avctx->rc_buffer_size;
     }
 
+    OPT_STR("level", x4->level);
+
     if (avctx->i_quant_factor > 0)
         x4->params.rc.f_ip_factor         = 1 / fabs(avctx->i_quant_factor);
-    x4->params.rc.f_pb_factor             = avctx->b_quant_factor;
-    x4->params.analyse.i_chroma_qp_offset = avctx->chromaoffset;
 
     if (avctx->me_method == ME_EPZS)
         x4->params.analyse.i_me_method = X264_ME_DIA;
@@ -391,6 +460,28 @@ static av_cold int X264_init(AVCodecContext *avctx)
         x4->params.rc.f_qcompress       = avctx->qcompress; /* 0.0 => cbr, 1.0 => constant qp */
     if (avctx->refs >= 0)
         x4->params.i_frame_reference    = avctx->refs;
+    else if (x4->level) {
+        int i;
+        int mbn = FF_CEIL_RSHIFT(avctx->width, 4) * FF_CEIL_RSHIFT(avctx->height, 4);
+        int level_id = -1;
+        char *tail;
+        int scale = X264_BUILD < 129 ? 384 : 1;
+
+        if (!strcmp(x4->level, "1b")) {
+            level_id = 9;
+        } else if (strlen(x4->level) <= 3){
+            level_id = av_strtod(x4->level, &tail) * 10 + 0.5;
+            if (*tail)
+                level_id = -1;
+        }
+        if (level_id <= 0)
+            av_log(avctx, AV_LOG_WARNING, "Failed to parse level\n");
+
+        for (i = 0; i<x264_levels[i].level_idc; i++)
+            if (x264_levels[i].level_idc == level_id)
+                x4->params.i_frame_reference = av_clip(x264_levels[i].dpb / mbn / scale, 1, x4->params.i_frame_reference);
+    }
+
     if (avctx->trellis >= 0)
         x4->params.analyse.i_trellis    = avctx->trellis;
     if (avctx->me_range >= 0)
@@ -454,25 +545,68 @@ static av_cold int X264_init(AVCodecContext *avctx)
 
     if (x4->slice_max_size >= 0)
         x4->params.i_slice_max_size =  x4->slice_max_size;
+    else {
+        /*
+         * Allow x264 to be instructed through AVCodecContext about the maximum
+         * size of the RTP payload. For example, this enables the production of
+         * payload suitable for the H.264 RTP packetization-mode 0 i.e. single
+         * NAL unit per RTP packet.
+         */
+        if (avctx->rtp_payload_size)
+            x4->params.i_slice_max_size = avctx->rtp_payload_size;
+    }
 
     if (x4->fastfirstpass)
         x264_param_apply_fastfirstpass(&x4->params);
 
+    /* Allow specifying the x264 profile through AVCodecContext. */
+    if (!x4->profile)
+        switch (avctx->profile) {
+        case FF_PROFILE_H264_BASELINE:
+            x4->profile = av_strdup("baseline");
+            break;
+        case FF_PROFILE_H264_HIGH:
+            x4->profile = av_strdup("high");
+            break;
+        case FF_PROFILE_H264_HIGH_10:
+            x4->profile = av_strdup("high10");
+            break;
+        case FF_PROFILE_H264_HIGH_422:
+            x4->profile = av_strdup("high422");
+            break;
+        case FF_PROFILE_H264_HIGH_444:
+            x4->profile = av_strdup("high444");
+            break;
+        case FF_PROFILE_H264_MAIN:
+            x4->profile = av_strdup("main");
+            break;
+        default:
+            break;
+        }
+
     if (x4->nal_hrd >= 0)
         x4->params.i_nal_hrd = x4->nal_hrd;
 
     if (x4->profile)
         if (x264_param_apply_profile(&x4->params, x4->profile) < 0) {
+            int i;
             av_log(avctx, AV_LOG_ERROR, "Error setting profile %s.\n", x4->profile);
+            av_log(avctx, AV_LOG_INFO, "Possible profiles:");
+            for (i = 0; x264_profile_names[i]; i++)
+                av_log(avctx, AV_LOG_INFO, " %s", x264_profile_names[i]);
+            av_log(avctx, AV_LOG_INFO, "\n");
             return AVERROR(EINVAL);
         }
 
     x4->params.i_width          = avctx->width;
     x4->params.i_height         = avctx->height;
-    x4->params.vui.i_sar_width  = avctx->sample_aspect_ratio.num;
-    x4->params.vui.i_sar_height = avctx->sample_aspect_ratio.den;
-    x4->params.i_fps_num = x4->params.i_timebase_den = avctx->time_base.den;
-    x4->params.i_fps_den = x4->params.i_timebase_num = avctx->time_base.num;
+    av_reduce(&sw, &sh, avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den, 4096);
+    x4->params.vui.i_sar_width  = sw;
+    x4->params.vui.i_sar_height = sh;
+    x4->params.i_timebase_den = avctx->time_base.den;
+    x4->params.i_timebase_num = avctx->time_base.num;
+    x4->params.i_fps_num = avctx->time_base.den;
+    x4->params.i_fps_den = avctx->time_base.num * avctx->ticks_per_frame;
 
     x4->params.analyse.b_psnr = avctx->flags & CODEC_FLAG_PSNR;
 
@@ -487,11 +621,33 @@ static av_cold int X264_init(AVCodecContext *avctx)
     x4->params.i_slice_count  = avctx->slices;
 
     x4->params.vui.b_fullrange = avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
+                                 avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
+                                 avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
                                  avctx->color_range == AVCOL_RANGE_JPEG;
 
+    if (avctx->colorspace != AVCOL_SPC_UNSPECIFIED)
+        x4->params.vui.i_colmatrix = avctx->colorspace;
+    if (avctx->color_primaries != AVCOL_PRI_UNSPECIFIED)
+        x4->params.vui.i_colorprim = avctx->color_primaries;
+    if (avctx->color_trc != AVCOL_TRC_UNSPECIFIED)
+        x4->params.vui.i_transfer  = avctx->color_trc;
+
     if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER)
         x4->params.b_repeat_headers = 0;
 
+    if(x4->x264opts){
+        const char *p= x4->x264opts;
+        while(p){
+            char param[256]={0}, val[256]={0};
+            if(sscanf(p, "%255[^:=]=%255[^:]", param, val) == 1){
+                OPT_STR(param, "1");
+            }else
+                OPT_STR(param, val);
+            p= strchr(p, ':');
+            p+=!!p;
+        }
+    }
+
     if (x4->x264_params) {
         AVDictionary *dict    = NULL;
         AVDictionaryEntry *en = NULL;
@@ -554,7 +710,9 @@ static const enum AVPixelFormat pix_fmts_8bit[] = {
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_YUVJ420P,
     AV_PIX_FMT_YUV422P,
+    AV_PIX_FMT_YUVJ422P,
     AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_YUVJ444P,
     AV_PIX_FMT_NV12,
     AV_PIX_FMT_NV16,
     AV_PIX_FMT_NONE
@@ -571,6 +729,13 @@ static const enum AVPixelFormat pix_fmts_10bit[] = {
     AV_PIX_FMT_NV20,
     AV_PIX_FMT_NONE
 };
+static const enum AVPixelFormat pix_fmts_8bit_rgb[] = {
+#ifdef X264_CSP_BGR
+    AV_PIX_FMT_BGR24,
+    AV_PIX_FMT_RGB24,
+#endif
+    AV_PIX_FMT_NONE
+};
 
 static av_cold void X264_init_static(AVCodec *codec)
 {
@@ -589,6 +754,10 @@ static const AVOption options[] = {
     { "tune",          "Tune the encoding params (cf. x264 --fullhelp)",  OFFSET(tune),          AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE},
     { "profile",       "Set profile restrictions (cf. x264 --fullhelp) ", OFFSET(profile),       AV_OPT_TYPE_STRING, { 0 }, 0, 0, VE},
     { "fastfirstpass", "Use fast settings when encoding first pass",      OFFSET(fastfirstpass), AV_OPT_TYPE_INT,    { .i64 = 1 }, 0, 1, VE},
+    {"level", "Specify level (as defined by Annex A)", OFFSET(level), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"passlogfile", "Filename for 2 pass stats", OFFSET(stats), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"wpredp", "Weighted prediction for P-frames", OFFSET(wpredp), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
+    {"x264opts", "x264 options", OFFSET(x264opts), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, VE},
     { "crf",           "Select the quality for constant quality mode",    OFFSET(crf),           AV_OPT_TYPE_FLOAT,  {.dbl = -1 }, -1, FLT_MAX, VE },
     { "crf_max",       "In CRF mode, prevents VBV from lowering quality beyond this point.",OFFSET(crf_max), AV_OPT_TYPE_FLOAT, {.dbl = -1 }, -1, FLT_MAX, VE },
     { "qp",            "Constant quantization parameter rate control method",OFFSET(cqp),        AV_OPT_TYPE_INT,    { .i64 = -1 }, -1, INT_MAX, VE },
@@ -638,16 +807,24 @@ static const AVOption options[] = {
     { NULL },
 };
 
-static const AVClass class = {
+static const AVClass x264_class = {
     .class_name = "libx264",
     .item_name  = av_default_item_name,
     .option     = options,
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
+static const AVClass rgbclass = {
+    .class_name = "libx264rgb",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 static const AVCodecDefault x264_defaults[] = {
     { "b",                "0" },
     { "bf",               "-1" },
+    { "flags2",           "0" },
     { "g",                "-1" },
     { "i_qfactor",        "-1" },
     { "qmin",             "-1" },
@@ -655,6 +832,7 @@ static const AVCodecDefault x264_defaults[] = {
     { "qdiff",            "-1" },
     { "qblur",            "-1" },
     { "qcomp",            "-1" },
+//     { "rc_lookahead",     "-1" },
     { "refs",             "-1" },
     { "sc_threshold",     "-1" },
     { "trellis",          "-1" },
@@ -683,7 +861,22 @@ AVCodec ff_libx264_encoder = {
     .encode2          = X264_frame,
     .close            = X264_close,
     .capabilities     = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
-    .priv_class       = &class,
+    .priv_class       = &x264_class,
     .defaults         = x264_defaults,
     .init_static_data = X264_init_static,
 };
+
+AVCodec ff_libx264rgb_encoder = {
+    .name           = "libx264rgb",
+    .long_name      = NULL_IF_CONFIG_SMALL("libx264 H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 RGB"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .priv_data_size = sizeof(X264Context),
+    .init           = X264_init,
+    .encode2        = X264_frame,
+    .close          = X264_close,
+    .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
+    .priv_class     = &rgbclass,
+    .defaults       = x264_defaults,
+    .pix_fmts       = pix_fmts_8bit_rgb,
+};
diff --git a/libavcodec/libx265.c b/libavcodec/libx265.c
index 026f6ff..4cf8c85 100644
--- a/libavcodec/libx265.c
+++ b/libavcodec/libx265.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2013-2014 Derek Buitenhuis
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -228,7 +228,7 @@ static int libx265_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     ret = x265_encoder_encode(ctx->encoder, &nal, &nnal,
                               pic ? &x265pic : NULL, &x265pic_out);
     if (ret < 0)
-        return AVERROR_UNKNOWN;
+        return AVERROR_EXTERNAL;
 
     if (!nnal)
         return 0;
diff --git a/libavcodec/libxavs.c b/libavcodec/libxavs.c
index 7a74e36..92dcece 100644
--- a/libavcodec/libxavs.c
+++ b/libavcodec/libxavs.c
@@ -2,20 +2,20 @@
  * AVS encoding using the xavs library
  * Copyright (C) 2010 Amanda, Y.N. Wu <amanda11192003@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -88,10 +88,8 @@ static int encode_nals(AVCodecContext *ctx, AVPacket *pkt,
     for (i = 0; i < nnal; i++)
         size += nals[i].i_payload;
 
-    if ((ret = ff_alloc_packet(pkt, size)) < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", size);
+    if ((ret = ff_alloc_packet2(ctx, pkt, size)) < 0)
         return ret;
-    }
     p = pkt->data;
 
     /* Write the SEI as part of the first frame. */
@@ -145,7 +143,7 @@ static int XAVS_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     if (!ret) {
         if (!frame && !(x4->end_of_stream)) {
-            if ((ret = ff_alloc_packet(pkt, 4)) < 0)
+            if ((ret = ff_alloc_packet2(avctx, pkt, 4)) < 0)
                 return ret;
 
             pkt->data[0] = 0x0;
@@ -153,7 +151,7 @@ static int XAVS_frame(AVCodecContext *avctx, AVPacket *pkt,
             pkt->data[2] = 0x01;
             pkt->data[3] = 0xb1;
             pkt->dts = 2*x4->pts_buffer[(x4->out_frame_count-1)%(avctx->max_b_frames+1)] -
-                       x4->pts_buffer[(x4->out_frame_count-2)%(avctx->max_b_frames+1)];
+                         x4->pts_buffer[(x4->out_frame_count-2)%(avctx->max_b_frames+1)];
             x4->end_of_stream = END_OF_STREAM;
             *got_packet = 1;
         }
@@ -355,7 +353,7 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
     if (!x4->enc)
         return -1;
 
-    if (!(x4->pts_buffer = av_mallocz((avctx->max_b_frames+1) * sizeof(*x4->pts_buffer))))
+    if (!(x4->pts_buffer = av_mallocz_array((avctx->max_b_frames+1), sizeof(*x4->pts_buffer))))
         return AVERROR(ENOMEM);
 
     avctx->coded_frame = av_frame_alloc();
@@ -394,10 +392,10 @@ static av_cold int XAVS_init(AVCodecContext *avctx)
 #define OFFSET(x) offsetof(XavsContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
-    { "crf",           "Select the quality for constant quality mode",    OFFSET(crf),           AV_OPT_TYPE_FLOAT,  {-1 }, -1, FLT_MAX, VE },
+    { "crf",           "Select the quality for constant quality mode",    OFFSET(crf),           AV_OPT_TYPE_FLOAT,  {.dbl = -1 }, -1, FLT_MAX, VE },
     { "qp",            "Constant quantization parameter rate control method",OFFSET(cqp),        AV_OPT_TYPE_INT,    {.i64 = -1 }, -1, INT_MAX, VE },
     { "b-bias",        "Influences how often B-frames are used",          OFFSET(b_bias),        AV_OPT_TYPE_INT,    {.i64 = INT_MIN}, INT_MIN, INT_MAX, VE },
-    { "cplxblur",      "Reduce fluctuations in QP (before curve compression)", OFFSET(cplxblur), AV_OPT_TYPE_FLOAT,  {-1 }, -1, FLT_MAX, VE},
+    { "cplxblur",      "Reduce fluctuations in QP (before curve compression)", OFFSET(cplxblur), AV_OPT_TYPE_FLOAT,  {.dbl = -1 }, -1, FLT_MAX, VE},
     { "direct-pred",   "Direct MV prediction mode",                       OFFSET(direct_pred),   AV_OPT_TYPE_INT,    {.i64 = -1 }, -1, INT_MAX, VE, "direct-pred" },
     { "none",          NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_DIRECT_PRED_NONE },     0, 0, VE, "direct-pred" },
     { "spatial",       NULL,      0,    AV_OPT_TYPE_CONST, { .i64 = XAVS_DIRECT_PRED_SPATIAL },  0, 0, VE, "direct-pred" },
@@ -410,7 +408,7 @@ static const AVOption options[] = {
     { NULL },
 };
 
-static const AVClass class = {
+static const AVClass xavs_class = {
     .class_name = "libxavs",
     .item_name  = av_default_item_name,
     .option     = options,
@@ -433,6 +431,6 @@ AVCodec ff_libxavs_encoder = {
     .close          = XAVS_close,
     .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_AUTO_THREADS,
     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
-    .priv_class     = &class,
+    .priv_class     = &xavs_class,
     .defaults       = xavs_defaults,
 };
diff --git a/libavcodec/libxvid.c b/libavcodec/libxvid.c
index ddeceac..b521e01 100644
--- a/libavcodec/libxvid.c
+++ b/libavcodec/libxvid.c
@@ -2,20 +2,20 @@
  * Interface to xvidcore for mpeg4 encoding
  * Copyright (c) 2004 Adam Thayer <krevnik@comcast.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,14 +26,23 @@
  */
 
 #include <xvid.h>
-#include <unistd.h>
 #include "avcodec.h"
+#include "internal.h"
+#include "libavutil/file.h"
 #include "libavutil/cpu.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 #include "libxvid.h"
 #include "mpegvideo.h"
 
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if HAVE_IO_H
+#include <io.h>
+#endif
+
 /**
  * Buffer management macros.
  */
@@ -46,7 +55,7 @@
  * This stores all the private context for the codec.
  */
 struct xvid_context {
-    AVClass *class;                /**< Handle for Xvid encoder */
+    AVClass *class;
     void *encoder_handle;          /**< Handle for Xvid encoder */
     int xsize;                     /**< Frame x size */
     int ysize;                     /**< Frame y size */
@@ -58,6 +67,7 @@ struct xvid_context {
     char *twopassbuffer;           /**< Character buffer for two-pass */
     char *old_twopassbuffer;       /**< Old character buffer (two-pass) */
     char *twopassfile;             /**< second pass temp file name */
+    int twopassfd;
     unsigned char *intra_matrix;   /**< P-Frame Quant Matrix */
     unsigned char *inter_matrix;   /**< I-Frame Quant Matrix */
     int lumi_aq;                   /**< Lumi masking as an aq method */
@@ -75,6 +85,8 @@ struct xvid_ff_pass1 {
     struct xvid_context *context;   /**< Pointer to private context */
 };
 
+static int xvid_encode_close(AVCodecContext *avctx);
+
 /*
  * Xvid 2-Pass Kludge Section
  *
@@ -105,7 +117,7 @@ static int xvid_ff_2pass_create(xvid_plg_create_t * param,
     /* This is because we can safely prevent a buffer overflow */
     log[0] = 0;
     snprintf(log, BUFFER_REMAINING(log),
-        "# avconv 2-pass log file, using xvid codec\n");
+        "# ffmpeg 2-pass log file, using xvid codec\n");
     snprintf(BUFFER_CAT(log), BUFFER_REMAINING(log),
         "# Do not modify. libxvidcore version: %d.%d.%d\n\n",
         XVID_VERSION_MAJOR(XVID_VERSION),
@@ -352,46 +364,47 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
     uint16_t *intra, *inter;
     int fd;
 
-    xvid_plugin_single_t single       = { 0 };
-    struct xvid_ff_pass1 rc2pass1     = { 0 };
-    xvid_plugin_2pass2_t rc2pass2     = { 0 };
-    xvid_plugin_lumimasking_t masking_l = { 0 }; /* For lumi masking */
-    xvid_plugin_lumimasking_t masking_v = { 0 }; /* For variance AQ */
-    xvid_plugin_ssim_t ssim           = { 0 };
-    xvid_gbl_init_t xvid_gbl_init     = { 0 };
-    xvid_enc_create_t xvid_enc_create = { 0 };
-    xvid_enc_plugin_t plugins[7];
-
-    /* Bring in VOP flags from avconv command-line */
-    x->vop_flags = XVID_VOP_HALFPEL; /* Bare minimum quality */
+    xvid_plugin_single_t      single          = { 0 };
+    struct xvid_ff_pass1      rc2pass1        = { 0 };
+    xvid_plugin_2pass2_t      rc2pass2        = { 0 };
+    xvid_plugin_lumimasking_t masking_l       = { 0 }; /* For lumi masking */
+    xvid_plugin_lumimasking_t masking_v       = { 0 }; /* For variance AQ */
+    xvid_plugin_ssim_t        ssim            = { 0 };
+    xvid_gbl_init_t           xvid_gbl_init   = { 0 };
+    xvid_enc_create_t         xvid_enc_create = { 0 };
+    xvid_enc_plugin_t         plugins[4];
+
+    x->twopassfd = -1;
+
+    /* Bring in VOP flags from ffmpeg command-line */
+    x->vop_flags = XVID_VOP_HALFPEL;              /* Bare minimum quality */
     if( xvid_flags & CODEC_FLAG_4MV )
-        x->vop_flags |= XVID_VOP_INTER4V; /* Level 3 */
-    if( avctx->trellis
-        )
-        x->vop_flags |= XVID_VOP_TRELLISQUANT; /* Level 5 */
+        x->vop_flags    |= XVID_VOP_INTER4V;      /* Level 3 */
+    if( avctx->trellis)
+        x->vop_flags    |= XVID_VOP_TRELLISQUANT; /* Level 5 */
     if( xvid_flags & CODEC_FLAG_AC_PRED )
-        x->vop_flags |= XVID_VOP_HQACPRED; /* Level 6 */
+        x->vop_flags    |= XVID_VOP_HQACPRED;     /* Level 6 */
     if( xvid_flags & CODEC_FLAG_GRAY )
-        x->vop_flags |= XVID_VOP_GREYSCALE;
+        x->vop_flags    |= XVID_VOP_GREYSCALE;
 
     /* Decide which ME quality setting to use */
     x->me_flags = 0;
     switch( avctx->me_method ) {
        case ME_FULL:   /* Quality 6 */
-           x->me_flags |=  XVID_ME_EXTSEARCH16
-                       |   XVID_ME_EXTSEARCH8;
+           x->me_flags  |=  XVID_ME_EXTSEARCH16
+                        |   XVID_ME_EXTSEARCH8;
 
        case ME_EPZS:   /* Quality 4 */
-           x->me_flags |=  XVID_ME_ADVANCEDDIAMOND8
-                       |   XVID_ME_HALFPELREFINE8
-                       |   XVID_ME_CHROMA_PVOP
-                       |   XVID_ME_CHROMA_BVOP;
+           x->me_flags  |=  XVID_ME_ADVANCEDDIAMOND8
+                        |   XVID_ME_HALFPELREFINE8
+                        |   XVID_ME_CHROMA_PVOP
+                        |   XVID_ME_CHROMA_BVOP;
 
        case ME_LOG:    /* Quality 2 */
        case ME_PHODS:
        case ME_X1:
-           x->me_flags |=  XVID_ME_ADVANCEDDIAMOND16
-                       |   XVID_ME_HALFPELREFINE16;
+           x->me_flags  |=  XVID_ME_ADVANCEDDIAMOND16
+                        |   XVID_ME_HALFPELREFINE16;
 
        case ME_ZERO:   /* Quality 0 */
        default:
@@ -402,21 +415,21 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
     switch( avctx->mb_decision ) {
        case 2:
            x->vop_flags |= XVID_VOP_MODEDECISION_RD;
-           x->me_flags |=  XVID_ME_HALFPELREFINE8_RD
-                       |   XVID_ME_QUARTERPELREFINE8_RD
-                       |   XVID_ME_EXTSEARCH_RD
-                       |   XVID_ME_CHECKPREDICTION_RD;
+           x->me_flags  |=  XVID_ME_HALFPELREFINE8_RD
+                        |   XVID_ME_QUARTERPELREFINE8_RD
+                        |   XVID_ME_EXTSEARCH_RD
+                        |   XVID_ME_CHECKPREDICTION_RD;
        case 1:
            if( !(x->vop_flags & XVID_VOP_MODEDECISION_RD) )
                x->vop_flags |= XVID_VOP_FAST_MODEDECISION_RD;
-           x->me_flags |=  XVID_ME_HALFPELREFINE16_RD
-                       |   XVID_ME_QUARTERPELREFINE16_RD;
+           x->me_flags  |=  XVID_ME_HALFPELREFINE16_RD
+                        |   XVID_ME_QUARTERPELREFINE16_RD;
 
        default:
            break;
     }
 
-    /* Bring in VOL flags from avconv command-line */
+    /* Bring in VOL flags from ffmpeg command-line */
 #if FF_API_GMC
     if (avctx->flags & CODEC_FLAG_GMC)
         x->gmc = 1;
@@ -424,12 +437,12 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
 
     x->vol_flags = 0;
     if (x->gmc) {
-        x->vol_flags |= XVID_VOL_GMC;
-        x->me_flags |= XVID_ME_GME_REFINE;
+        x->vol_flags    |= XVID_VOL_GMC;
+        x->me_flags     |= XVID_ME_GME_REFINE;
     }
     if( xvid_flags & CODEC_FLAG_QPEL ) {
-        x->vol_flags |= XVID_VOL_QUARTERPEL;
-        x->me_flags |= XVID_ME_QUARTERPELREFINE16;
+        x->vol_flags    |= XVID_VOL_QUARTERPEL;
+        x->me_flags     |= XVID_ME_QUARTERPELREFINE16;
         if( x->vop_flags & XVID_VOP_INTER4V )
             x->me_flags |= XVID_ME_QUARTERPELREFINE8;
     }
@@ -473,7 +486,7 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
         if( x->twopassbuffer == NULL || x->old_twopassbuffer == NULL ) {
             av_log(avctx, AV_LOG_ERROR,
                 "Xvid: Cannot allocate 2-pass log buffers\n");
-            return -1;
+            goto fail;
         }
         x->twopassbuffer[0] = x->old_twopassbuffer[0] = 0;
 
@@ -484,28 +497,27 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
         rc2pass2.version = XVID_VERSION;
         rc2pass2.bitrate = avctx->bit_rate;
 
-        fd = ff_tempfile("xvidff.", &x->twopassfile);
+        fd = av_tempfile("xvidff.", &x->twopassfile, 0, avctx);
         if( fd == -1 ) {
             av_log(avctx, AV_LOG_ERROR,
                 "Xvid: Cannot write 2-pass pipe\n");
-            return -1;
+            goto fail;
         }
+        x->twopassfd = fd;
 
         if( avctx->stats_in == NULL ) {
             av_log(avctx, AV_LOG_ERROR,
                 "Xvid: No 2-pass information loaded for second pass\n");
-            return -1;
+            goto fail;
         }
 
         if( strlen(avctx->stats_in) >
               write(fd, avctx->stats_in, strlen(avctx->stats_in)) ) {
-            close(fd);
             av_log(avctx, AV_LOG_ERROR,
                 "Xvid: Cannot write to 2-pass pipe\n");
-            return -1;
+            goto fail;
         }
 
-        close(fd);
         rc2pass2.filename = x->twopassfile;
         plugins[xvid_enc_create.num_plugins].func = xvid_plugin_2pass2;
         plugins[xvid_enc_create.num_plugins].param = &rc2pass2;
@@ -523,12 +535,6 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
     if (avctx->lumi_masking != 0.0)
         x->lumi_aq = 1;
 
-    if (x->lumi_aq && x->variance_aq) {
-        x->variance_aq = 0;
-        av_log(avctx, AV_LOG_WARNING,
-               "variance_aq is ignored when lumi_aq is set.\n");
-    }
-
     /* Luminance Masking */
     if (x->lumi_aq) {
         masking_l.method = 0;
@@ -549,6 +555,11 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
         xvid_enc_create.num_plugins++;
     }
 
+    if( x->lumi_aq && x->variance_aq )
+        av_log(avctx, AV_LOG_INFO,
+               "Both lumi_aq and variance_aq are enabled. The resulting quality"
+               "will be the worse one of the two effects made by the AQ.\n");
+
     /* SSIM */
     if (x->ssim) {
         plugins[xvid_enc_create.num_plugins].func = xvid_plugin_ssim;
@@ -632,11 +643,13 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
     xvid_enc_create.bquant_ratio = 100 * avctx->b_quant_factor;
     if( avctx->max_b_frames > 0  && !x->quicktime_format ) xvid_enc_create.global |= XVID_GLOBAL_PACKED;
 
+    av_assert0(xvid_enc_create.num_plugins + (!!x->ssim) + (!!x->variance_aq) + (!!x->lumi_aq) <= FF_ARRAY_ELEMS(plugins));
+
     /* Create encoder context */
     xerr = xvid_encore(NULL, XVID_ENC_CREATE, &xvid_enc_create, NULL);
     if( xerr ) {
         av_log(avctx, AV_LOG_ERROR, "Xvid: Could not create encoder reference\n");
-        return -1;
+        goto fail;
     }
 
     x->encoder_handle = xvid_enc_create.handle;
@@ -645,6 +658,9 @@ static av_cold int xvid_encode_init(AVCodecContext *avctx)  {
         return AVERROR(ENOMEM);
 
     return 0;
+fail:
+    xvid_encode_close(avctx);
+    return -1;
 }
 
 static int xvid_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
@@ -660,11 +676,8 @@ static int xvid_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     xvid_enc_frame_t xvid_enc_frame = { 0 };
     xvid_enc_stats_t xvid_enc_stats = { 0 };
 
-    if (!user_packet &&
-        (ret = av_new_packet(pkt, mb_width*mb_height*MAX_MB_BYTES + FF_MIN_BUFFER_SIZE)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, mb_width*mb_height*MAX_MB_BYTES + FF_MIN_BUFFER_SIZE)) < 0)
         return ret;
-    }
 
     /* Start setting up the frame */
     xvid_enc_frame.version = XVID_VERSION;
@@ -677,7 +690,7 @@ static int xvid_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     /* Initialize input image fields */
     if( avctx->pix_fmt != AV_PIX_FMT_YUV420P ) {
         av_log(avctx, AV_LOG_ERROR, "Xvid: Color spaces other than 420p not supported\n");
-        return -1;
+        return AVERROR(EINVAL);
     }
 
     xvid_enc_frame.input.csp = XVID_CSP_PLANAR; /* YUV420P */
@@ -698,11 +711,13 @@ static int xvid_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                                           XVID_TYPE_AUTO;
 
     /* Pixel aspect ratio setting */
-    if (avctx->sample_aspect_ratio.num < 1 || avctx->sample_aspect_ratio.num > 255 ||
-        avctx->sample_aspect_ratio.den < 1 || avctx->sample_aspect_ratio.den > 255) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid pixel aspect ratio %i/%i\n",
+    if (avctx->sample_aspect_ratio.num < 0 || avctx->sample_aspect_ratio.num > 255 ||
+        avctx->sample_aspect_ratio.den < 0 || avctx->sample_aspect_ratio.den > 255) {
+        av_log(avctx, AV_LOG_WARNING,
+               "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
-        return -1;
+        av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
+                   avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
     }
     xvid_enc_frame.par = XVID_PAR_EXT;
     xvid_enc_frame.par_width  = avctx->sample_aspect_ratio.num;
@@ -762,23 +777,31 @@ static int xvid_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         if (!xerr)
             return 0;
         av_log(avctx, AV_LOG_ERROR, "Xvid: Encoding Error Occurred: %i\n", xerr);
-        return -1;
+        return AVERROR_EXTERNAL;
     }
 }
 
 static av_cold int xvid_encode_close(AVCodecContext *avctx) {
     struct xvid_context *x = avctx->priv_data;
 
-    xvid_encore(x->encoder_handle, XVID_ENC_DESTROY, NULL, NULL);
+    if(x->encoder_handle)
+        xvid_encore(x->encoder_handle, XVID_ENC_DESTROY, NULL, NULL);
+    x->encoder_handle = NULL;
 
     av_freep(&avctx->extradata);
     if( x->twopassbuffer != NULL ) {
-        av_free(x->twopassbuffer);
-        av_free(x->old_twopassbuffer);
+        av_freep(&x->twopassbuffer);
+        av_freep(&x->old_twopassbuffer);
+        avctx->stats_out = NULL;
+    }
+    if (x->twopassfd>=0) {
+        unlink(x->twopassfile);
+        close(x->twopassfd);
+        x->twopassfd = -1;
     }
-    av_free(x->twopassfile);
-    av_free(x->intra_matrix);
-    av_free(x->inter_matrix);
+    av_freep(&x->twopassfile);
+    av_freep(&x->intra_matrix);
+    av_freep(&x->inter_matrix);
 
     return 0;
 }
diff --git a/libavcodec/libxvid.h b/libavcodec/libxvid.h
index 413d353..90ecd6f 100644
--- a/libavcodec/libxvid.h
+++ b/libavcodec/libxvid.h
@@ -1,20 +1,20 @@
 /*
  * copyright (C) 2006 Corey Hickey
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/libxvid_rc.c b/libavcodec/libxvid_rc.c
index 7f4a89d..ae6ddb8 100644
--- a/libavcodec/libxvid_rc.c
+++ b/libavcodec/libxvid_rc.c
@@ -3,74 +3,41 @@
  *
  * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "config.h"
 #include <xvid.h>
-#include <unistd.h>
-#if !HAVE_MKSTEMP
-#include <fcntl.h>
-#endif
-
 #include "libavutil/attributes.h"
-#include "libavutil/internal.h"
+#include "libavutil/file.h"
 #include "avcodec.h"
 #include "libxvid.h"
 #include "mpegvideo.h"
 
-#undef NDEBUG
-#include <assert.h>
-
-/* Wrapper to work around the lack of mkstemp() on mingw.
- * Also, tries to create file in /tmp first, if possible.
- * *prefix can be a character constant; *filename will be allocated internally.
- * @return file descriptor of opened file (or -1 on error)
- * and opened file name in **filename. */
-int ff_tempfile(const char *prefix, char **filename) {
-    int fd=-1;
-#if !HAVE_MKSTEMP
-    *filename = tempnam(".", prefix);
-#else
-    size_t len = strlen(prefix) + 12; /* room for "/tmp/" and "XXXXXX\0" */
-    *filename = av_malloc(len);
+#if HAVE_UNISTD_H
+#include <unistd.h>
 #endif
-    /* -----common section-----*/
-    if (*filename == NULL) {
-        av_log(NULL, AV_LOG_ERROR, "ff_tempfile: Cannot allocate file name\n");
-        return -1;
-    }
-#if !HAVE_MKSTEMP
-    fd = avpriv_open(*filename, O_RDWR | O_BINARY | O_CREAT, 0444);
-#else
-    snprintf(*filename, len, "/tmp/%sXXXXXX", prefix);
-    fd = mkstemp(*filename);
-    if (fd < 0) {
-        snprintf(*filename, len, "./%sXXXXXX", prefix);
-        fd = mkstemp(*filename);
-    }
+
+#if HAVE_IO_H
+#include <io.h>
 #endif
-    /* -----common section-----*/
-    if (fd < 0) {
-        av_log(NULL, AV_LOG_ERROR, "ff_tempfile: Cannot open temporary file %s\n", *filename);
-        return -1;
-    }
-    return fd; /* success */
-}
+
+#undef NDEBUG
+#include <assert.h>
 
 av_cold int ff_xvid_rate_control_init(MpegEncContext *s)
 {
@@ -79,7 +46,7 @@ av_cold int ff_xvid_rate_control_init(MpegEncContext *s)
     xvid_plg_create_t xvid_plg_create = { 0 };
     xvid_plugin_2pass2_t xvid_2pass2  = { 0 };
 
-    fd=ff_tempfile("xvidrc.", &tmp_name);
+    fd=av_tempfile("xvidrc.", &tmp_name, 0, s->avctx);
     if (fd == -1) {
         av_log(NULL, AV_LOG_ERROR, "Can't create temporary pass2 file.\n");
         return -1;
@@ -96,7 +63,12 @@ av_cold int ff_xvid_rate_control_init(MpegEncContext *s)
             frame_types[rce->pict_type], (int)lrintf(rce->qscale / FF_QP2LAMBDA), rce->i_count, s->mb_num - rce->i_count - rce->skip_count,
             rce->skip_count, (rce->i_tex_bits + rce->p_tex_bits + rce->misc_bits+7)/8, (rce->header_bits+rce->mv_bits+7)/8);
 
-        write(fd, tmp, strlen(tmp));
+        if (write(fd, tmp, strlen(tmp)) < 0) {
+            av_log(NULL, AV_LOG_ERROR, "Error %s writing 2pass logfile\n", strerror(errno));
+            av_free(tmp_name);
+            close(fd);
+            return AVERROR(errno);
+        }
     }
 
     close(fd);
diff --git a/libavcodec/libzvbi-teletextdec.c b/libavcodec/libzvbi-teletextdec.c
new file mode 100644
index 0000000..e65e3fb
--- /dev/null
+++ b/libavcodec/libzvbi-teletextdec.c
@@ -0,0 +1,570 @@
+/*
+ * Teletext decoding for ffmpeg
+ * Copyright (c) 2005-2010, 2012 Wolfram Gloger
+ * Copyright (c) 2013 Marton Balint
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "libavcodec/ass.h"
+#include "libavutil/opt.h"
+#include "libavutil/bprint.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+
+#include <libzvbi.h>
+
+#define TEXT_MAXSZ    (25 * (56 + 1) * 4 + 2)
+#define VBI_NB_COLORS 40
+#define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+#define VBI_R(rgba)   (((rgba) >> 0) & 0xFF)
+#define VBI_G(rgba)   (((rgba) >> 8) & 0xFF)
+#define VBI_B(rgba)   (((rgba) >> 16) & 0xFF)
+#define VBI_A(rgba)   (((rgba) >> 24) & 0xFF)
+#define MAX_BUFFERED_PAGES 25
+#define BITMAP_CHAR_WIDTH  12
+#define BITMAP_CHAR_HEIGHT 10
+#define MAX_SLICES 64
+
+typedef struct TeletextPage
+{
+    AVSubtitleRect *sub_rect;
+    int pgno;
+    int subno;
+    int64_t pts;
+} TeletextPage;
+
+typedef struct TeletextContext
+{
+    AVClass        *class;
+    char           *pgno;
+    int             x_offset;
+    int             y_offset;
+    int             format_id; /* 0 = bitmap, 1 = text/ass */
+    int             chop_top;
+    int             sub_duration; /* in msec */
+    int             transparent_bg;
+    int             chop_spaces;
+
+    int             lines_processed;
+    TeletextPage    *pages;
+    int             nb_pages;
+    int64_t         pts;
+    int             handler_ret;
+
+    vbi_decoder *   vbi;
+#ifdef DEBUG
+    vbi_export *    ex;
+#endif
+    vbi_sliced      sliced[MAX_SLICES];
+} TeletextContext;
+
+static int chop_spaces_utf8(const unsigned char* t, int len)
+{
+    t += len;
+    while (len > 0) {
+        if (*--t != ' ' || (len-1 > 0 && *(t-1) & 0x80))
+            break;
+        --len;
+    }
+    return len;
+}
+
+static void subtitle_rect_free(AVSubtitleRect **sub_rect)
+{
+    av_freep(&(*sub_rect)->pict.data[0]);
+    av_freep(&(*sub_rect)->pict.data[1]);
+    av_freep(&(*sub_rect)->ass);
+    av_freep(sub_rect);
+}
+
+static int create_ass_text(TeletextContext *ctx, const char *text, char **ass)
+{
+    int ret;
+    AVBPrint buf, buf2;
+    const int ts_start    = av_rescale_q(ctx->pts,          AV_TIME_BASE_Q,        (AVRational){1, 100});
+    const int ts_duration = av_rescale_q(ctx->sub_duration, (AVRational){1, 1000}, (AVRational){1, 100});
+
+    /* First we escape the plain text into buf. */
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    ff_ass_bprint_text_event(&buf, text, strlen(text), "", 0);
+
+    if (!av_bprint_is_complete(&buf)) {
+        av_bprint_finalize(&buf, NULL);
+        return AVERROR(ENOMEM);
+    }
+
+    /* Then we create the ass dialog line in buf2 from the escaped text in buf. */
+    av_bprint_init(&buf2, 0, AV_BPRINT_SIZE_UNLIMITED);
+    ff_ass_bprint_dialog(&buf2, buf.str, ts_start, ts_duration, 0);
+    av_bprint_finalize(&buf, NULL);
+
+    if (!av_bprint_is_complete(&buf2)) {
+        av_bprint_finalize(&buf2, NULL);
+        return AVERROR(ENOMEM);
+    }
+
+    if ((ret = av_bprint_finalize(&buf2, ass)) < 0)
+        return ret;
+
+    return 0;
+}
+
+/* Draw a page as text */
+static int gen_sub_text(TeletextContext *ctx, AVSubtitleRect *sub_rect, vbi_page *page, int chop_top)
+{
+    const char *in;
+    AVBPrint buf;
+    char *vbi_text = av_malloc(TEXT_MAXSZ);
+    int sz;
+
+    if (!vbi_text)
+        return AVERROR(ENOMEM);
+
+    sz = vbi_print_page_region(page, vbi_text, TEXT_MAXSZ-1, "UTF-8",
+                                   /*table mode*/ TRUE, FALSE,
+                                   0,             chop_top,
+                                   page->columns, page->rows-chop_top);
+    if (sz <= 0) {
+        av_log(ctx, AV_LOG_ERROR, "vbi_print error\n");
+        av_free(vbi_text);
+        return AVERROR_EXTERNAL;
+    }
+    vbi_text[sz] = '\0';
+    in  = vbi_text;
+    av_bprint_init(&buf, 0, TEXT_MAXSZ);
+
+    if (ctx->chop_spaces) {
+        for (;;) {
+            int nl, sz;
+
+            // skip leading spaces and newlines
+            in += strspn(in, " \n");
+            // compute end of row
+            for (nl = 0; in[nl]; ++nl)
+                if (in[nl] == '\n' && (nl==0 || !(in[nl-1] & 0x80)))
+                    break;
+            if (!in[nl])
+                break;
+            // skip trailing spaces
+            sz = chop_spaces_utf8(in, nl);
+            av_bprint_append_data(&buf, in, sz);
+            av_bprintf(&buf, "\n");
+            in += nl;
+        }
+    } else {
+        av_bprintf(&buf, "%s\n", vbi_text);
+    }
+    av_free(vbi_text);
+
+    if (!av_bprint_is_complete(&buf)) {
+        av_bprint_finalize(&buf, NULL);
+        return AVERROR(ENOMEM);
+    }
+
+    if (buf.len) {
+        int ret;
+        sub_rect->type = SUBTITLE_ASS;
+        if ((ret = create_ass_text(ctx, buf.str, &sub_rect->ass)) < 0) {
+            av_bprint_finalize(&buf, NULL);
+            return ret;
+        }
+        av_log(ctx, AV_LOG_DEBUG, "subtext:%s:txetbus\n", sub_rect->ass);
+    } else {
+        sub_rect->type = SUBTITLE_NONE;
+    }
+    av_bprint_finalize(&buf, NULL);
+    return 0;
+}
+
+static void fix_transparency(TeletextContext *ctx, AVSubtitleRect *sub_rect, vbi_page *page,
+                             int chop_top, uint8_t transparent_color, int resx, int resy)
+{
+    int iy;
+
+    // Hack for transparency, inspired by VLC code...
+    for (iy = 0; iy < resy; iy++) {
+        uint8_t *pixel = sub_rect->pict.data[0] + iy * sub_rect->pict.linesize[0];
+        vbi_char *vc = page->text + (iy / BITMAP_CHAR_HEIGHT + chop_top) * page->columns;
+        vbi_char *vcnext = vc + page->columns;
+        for (; vc < vcnext; vc++) {
+            uint8_t *pixelnext = pixel + BITMAP_CHAR_WIDTH;
+            switch (vc->opacity) {
+                case VBI_TRANSPARENT_SPACE:
+                    memset(pixel, transparent_color, BITMAP_CHAR_WIDTH);
+                    break;
+                case VBI_OPAQUE:
+                case VBI_SEMI_TRANSPARENT:
+                    if (!ctx->transparent_bg)
+                        break;
+                case VBI_TRANSPARENT_FULL:
+                    for(; pixel < pixelnext; pixel++)
+                        if (*pixel == vc->background)
+                            *pixel = transparent_color;
+                    break;
+            }
+            pixel = pixelnext;
+        }
+    }
+}
+
+/* Draw a page as bitmap */
+static int gen_sub_bitmap(TeletextContext *ctx, AVSubtitleRect *sub_rect, vbi_page *page, int chop_top)
+{
+    int resx = page->columns * BITMAP_CHAR_WIDTH;
+    int resy = (page->rows - chop_top) * BITMAP_CHAR_HEIGHT;
+    uint8_t ci, cmax = 0;
+    int ret;
+    vbi_char *vc = page->text + (chop_top * page->columns);
+    vbi_char *vcend = page->text + (page->rows * page->columns);
+
+    for (; vc < vcend; vc++) {
+        if (vc->opacity != VBI_TRANSPARENT_SPACE) {
+            cmax = VBI_NB_COLORS;
+            break;
+        }
+    }
+
+    if (cmax == 0) {
+        av_log(ctx, AV_LOG_DEBUG, "dropping empty page %3x\n", page->pgno);
+        sub_rect->type = SUBTITLE_NONE;
+        return 0;
+    }
+
+    if ((ret = avpicture_alloc(&sub_rect->pict, AV_PIX_FMT_PAL8, resx, resy)) < 0)
+        return ret;
+    // Yes, we want to allocate the palette on our own because AVSubtitle works this way
+    sub_rect->pict.data[1] = NULL;
+
+    vbi_draw_vt_page_region(page, VBI_PIXFMT_PAL8,
+                            sub_rect->pict.data[0], sub_rect->pict.linesize[0],
+                            0, chop_top, page->columns, page->rows - chop_top,
+                            /*reveal*/ 1, /*flash*/ 1);
+
+    fix_transparency(ctx, sub_rect, page, chop_top, cmax, resx, resy);
+    sub_rect->x = ctx->x_offset;
+    sub_rect->y = ctx->y_offset + chop_top * BITMAP_CHAR_HEIGHT;
+    sub_rect->w = resx;
+    sub_rect->h = resy;
+    sub_rect->nb_colors = (int)cmax + 1;
+    sub_rect->pict.data[1] = av_mallocz(AVPALETTE_SIZE);
+    if (!sub_rect->pict.data[1]) {
+        av_freep(&sub_rect->pict.data[0]);
+        return AVERROR(ENOMEM);
+    }
+    for (ci = 0; ci < cmax; ci++) {
+        int r, g, b, a;
+
+        r = VBI_R(page->color_map[ci]);
+        g = VBI_G(page->color_map[ci]);
+        b = VBI_B(page->color_map[ci]);
+        a = VBI_A(page->color_map[ci]);
+        ((uint32_t *)sub_rect->pict.data[1])[ci] = RGBA(r, g, b, a);
+        av_dlog(ctx, "palette %0x\n", ((uint32_t *)sub_rect->pict.data[1])[ci]);
+    }
+    ((uint32_t *)sub_rect->pict.data[1])[cmax] = RGBA(0, 0, 0, 0);
+    sub_rect->type = SUBTITLE_BITMAP;
+    return 0;
+}
+
+static void handler(vbi_event *ev, void *user_data)
+{
+    TeletextContext *ctx = user_data;
+    TeletextPage *new_pages;
+    vbi_page page;
+    int res;
+    char pgno_str[12];
+    vbi_subno subno;
+    vbi_page_type vpt;
+    int chop_top;
+    char *lang;
+
+    snprintf(pgno_str, sizeof pgno_str, "%03x", ev->ev.ttx_page.pgno);
+    av_log(ctx, AV_LOG_DEBUG, "decoded page %s.%02x\n",
+           pgno_str, ev->ev.ttx_page.subno & 0xFF);
+
+    if (strcmp(ctx->pgno, "*") && !strstr(ctx->pgno, pgno_str))
+        return;
+    if (ctx->handler_ret < 0)
+        return;
+
+    res = vbi_fetch_vt_page(ctx->vbi, &page,
+                            ev->ev.ttx_page.pgno,
+                            ev->ev.ttx_page.subno,
+                            VBI_WST_LEVEL_3p5, 25, TRUE);
+
+    if (!res)
+        return;
+
+#ifdef DEBUG
+    fprintf(stderr, "\nSaving res=%d dy0=%d dy1=%d...\n",
+            res, page.dirty.y0, page.dirty.y1);
+    fflush(stderr);
+
+    if (!vbi_export_stdio(ctx->ex, stderr, &page))
+        fprintf(stderr, "failed: %s\n", vbi_export_errstr(ctx->ex));
+#endif
+
+    vpt = vbi_classify_page(ctx->vbi, ev->ev.ttx_page.pgno, &subno, &lang);
+    chop_top = ctx->chop_top ||
+        ((page.rows > 1) && (vpt == VBI_SUBTITLE_PAGE));
+
+    av_log(ctx, AV_LOG_DEBUG, "%d x %d page chop:%d\n",
+           page.columns, page.rows, chop_top);
+
+    if (ctx->nb_pages < MAX_BUFFERED_PAGES) {
+        if ((new_pages = av_realloc_array(ctx->pages, ctx->nb_pages + 1, sizeof(TeletextPage)))) {
+            TeletextPage *cur_page = new_pages + ctx->nb_pages;
+            ctx->pages = new_pages;
+            cur_page->sub_rect = av_mallocz(sizeof(*cur_page->sub_rect));
+            cur_page->pts = ctx->pts;
+            cur_page->pgno = ev->ev.ttx_page.pgno;
+            cur_page->subno = ev->ev.ttx_page.subno;
+            if (cur_page->sub_rect) {
+                res = (ctx->format_id == 0) ?
+                    gen_sub_bitmap(ctx, cur_page->sub_rect, &page, chop_top) :
+                    gen_sub_text  (ctx, cur_page->sub_rect, &page, chop_top);
+                if (res < 0) {
+                    av_freep(&cur_page->sub_rect);
+                    ctx->handler_ret = res;
+                } else {
+                    ctx->pages[ctx->nb_pages++] = *cur_page;
+                }
+            } else {
+                ctx->handler_ret = AVERROR(ENOMEM);
+            }
+        } else {
+            ctx->handler_ret = AVERROR(ENOMEM);
+        }
+    } else {
+        //TODO: If multiple packets contain more than one page, pages may got queued up, and this may happen...
+        av_log(ctx, AV_LOG_ERROR, "Buffered too many pages, dropping page %s.\n", pgno_str);
+        ctx->handler_ret = AVERROR(ENOSYS);
+    }
+
+    vbi_unref_page(&page);
+}
+
+static inline int data_identifier_is_teletext(int data_identifier) {
+    /* See EN 301 775 section 4.4.2. */
+    return (data_identifier >= 0x10 && data_identifier <= 0x1F ||
+            data_identifier >= 0x99 && data_identifier <= 0x9B);
+}
+
+static int slice_to_vbi_lines(TeletextContext *ctx, uint8_t* buf, int size)
+{
+    int lines = 0;
+    while (size >= 2 && lines < MAX_SLICES) {
+        int data_unit_id     = buf[0];
+        int data_unit_length = buf[1];
+        if (data_unit_length + 2 > size)
+            return AVERROR_INVALIDDATA;
+        if (data_unit_id == 0x02 || data_unit_id == 0x03) {
+            if (data_unit_length != 0x2c)
+                return AVERROR_INVALIDDATA;
+            else {
+                int line_offset  = buf[2] & 0x1f;
+                int field_parity = buf[2] & 0x20;
+                int i;
+                ctx->sliced[lines].id = VBI_SLICED_TELETEXT_B;
+                ctx->sliced[lines].line = (line_offset > 0 ? (line_offset + (field_parity ? 0 : 313)) : 0);
+                for (i = 0; i < 42; i++)
+                    ctx->sliced[lines].data[i] = vbi_rev8(buf[4 + i]);
+                lines++;
+            }
+        }
+        size -= data_unit_length + 2;
+        buf += data_unit_length + 2;
+    }
+    if (size)
+        av_log(ctx, AV_LOG_WARNING, "%d bytes remained after slicing data\n", size);
+    return lines;
+}
+
+static int teletext_decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *pkt)
+{
+    TeletextContext *ctx = avctx->priv_data;
+    AVSubtitle      *sub = data;
+    int             ret = 0;
+
+    if (!ctx->vbi) {
+        if (!(ctx->vbi = vbi_decoder_new()))
+            return AVERROR(ENOMEM);
+        if (!vbi_event_handler_add(ctx->vbi, VBI_EVENT_TTX_PAGE, handler, ctx)) {
+            vbi_decoder_delete(ctx->vbi);
+            ctx->vbi = NULL;
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    if (avctx->pkt_timebase.den && pkt->pts != AV_NOPTS_VALUE)
+        ctx->pts = av_rescale_q(pkt->pts, avctx->pkt_timebase, AV_TIME_BASE_Q);
+
+    if (pkt->size) {
+        int lines;
+        const int full_pes_size = pkt->size + 45; /* PES header is 45 bytes */
+
+        // We allow unreasonably big packets, even if the standard only allows a max size of 1472
+        if (full_pes_size < 184 || full_pes_size > 65504 || full_pes_size % 184 != 0)
+            return AVERROR_INVALIDDATA;
+
+        ctx->handler_ret = pkt->size;
+
+        if (data_identifier_is_teletext(*pkt->data)) {
+            if ((lines = slice_to_vbi_lines(ctx, pkt->data + 1, pkt->size - 1)) < 0)
+                return lines;
+            av_dlog(avctx, "ctx=%p buf_size=%d lines=%u pkt_pts=%7.3f\n",
+                    ctx, pkt->size, lines, (double)pkt->pts/90000.0);
+            if (lines > 0) {
+#ifdef DEBUG
+                int i;
+                av_log(avctx, AV_LOG_DEBUG, "line numbers:");
+                for(i = 0; i < lines; i++)
+                    av_log(avctx, AV_LOG_DEBUG, " %d", ctx->sliced[i].line);
+                av_log(avctx, AV_LOG_DEBUG, "\n");
+#endif
+                vbi_decode(ctx->vbi, ctx->sliced, lines, 0.0);
+                ctx->lines_processed += lines;
+            }
+        }
+        ctx->pts = AV_NOPTS_VALUE;
+        ret = ctx->handler_ret;
+    }
+
+    if (ret < 0)
+        return ret;
+
+    // is there a subtitle to pass?
+    if (ctx->nb_pages) {
+        int i;
+        sub->format = ctx->format_id;
+        sub->start_display_time = 0;
+        sub->end_display_time = ctx->sub_duration;
+        sub->num_rects = 0;
+        sub->pts = ctx->pages->pts;
+
+        if (ctx->pages->sub_rect->type != SUBTITLE_NONE) {
+            sub->rects = av_malloc(sizeof(*sub->rects));
+            if (sub->rects) {
+                sub->num_rects = 1;
+                sub->rects[0] = ctx->pages->sub_rect;
+            } else {
+                ret = AVERROR(ENOMEM);
+            }
+        } else {
+            av_log(avctx, AV_LOG_DEBUG, "sending empty sub\n");
+            sub->rects = NULL;
+        }
+        if (!sub->rects) // no rect was passed
+            subtitle_rect_free(&ctx->pages->sub_rect);
+
+        for (i = 0; i < ctx->nb_pages - 1; i++)
+            ctx->pages[i] = ctx->pages[i + 1];
+        ctx->nb_pages--;
+
+        if (ret >= 0)
+            *data_size = 1;
+    } else
+        *data_size = 0;
+
+    return ret;
+}
+
+static int teletext_init_decoder(AVCodecContext *avctx)
+{
+    TeletextContext *ctx = avctx->priv_data;
+    unsigned int maj, min, rev;
+
+    vbi_version(&maj, &min, &rev);
+    if (!(maj > 0 || min > 2 || min == 2 && rev >= 26)) {
+        av_log(avctx, AV_LOG_ERROR, "decoder needs zvbi version >= 0.2.26.\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    if (ctx->format_id == 0) {
+        avctx->width  = 41 * BITMAP_CHAR_WIDTH;
+        avctx->height = 25 * BITMAP_CHAR_HEIGHT;
+    }
+
+    ctx->vbi = NULL;
+    ctx->pts = AV_NOPTS_VALUE;
+
+#ifdef DEBUG
+    {
+        char *t;
+        ctx->ex = vbi_export_new("text", &t);
+    }
+#endif
+    av_log(avctx, AV_LOG_VERBOSE, "page filter: %s\n", ctx->pgno);
+    return (ctx->format_id == 1) ? ff_ass_subtitle_header_default(avctx) : 0;
+}
+
+static int teletext_close_decoder(AVCodecContext *avctx)
+{
+    TeletextContext *ctx = avctx->priv_data;
+
+    av_dlog(avctx, "lines_total=%u\n", ctx->lines_processed);
+    while (ctx->nb_pages)
+        subtitle_rect_free(&ctx->pages[--ctx->nb_pages].sub_rect);
+    av_freep(&ctx->pages);
+
+    vbi_decoder_delete(ctx->vbi);
+    ctx->vbi = NULL;
+    ctx->pts = AV_NOPTS_VALUE;
+    return 0;
+}
+
+static void teletext_flush(AVCodecContext *avctx)
+{
+    teletext_close_decoder(avctx);
+}
+
+#define OFFSET(x) offsetof(TeletextContext, x)
+#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    {"txt_page",        "list of teletext page numbers to decode, * is all", OFFSET(pgno),           AV_OPT_TYPE_STRING, {.str = "*"},      0, 0,        SD},
+    {"txt_chop_top",    "discards the top teletext line",                    OFFSET(chop_top),       AV_OPT_TYPE_INT,    {.i64 = 1},        0, 1,        SD},
+    {"txt_format",      "format of the subtitles (bitmap or text)",          OFFSET(format_id),      AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,        SD,  "txt_format"},
+    {"bitmap",          NULL,                                                0,                      AV_OPT_TYPE_CONST,  {.i64 = 0},        0, 0,        SD,  "txt_format"},
+    {"text",            NULL,                                                0,                      AV_OPT_TYPE_CONST,  {.i64 = 1},        0, 0,        SD,  "txt_format"},
+    {"txt_left",        "x offset of generated bitmaps",                     OFFSET(x_offset),       AV_OPT_TYPE_INT,    {.i64 = 0},        0, 65535,    SD},
+    {"txt_top",         "y offset of generated bitmaps",                     OFFSET(y_offset),       AV_OPT_TYPE_INT,    {.i64 = 0},        0, 65535,    SD},
+    {"txt_chop_spaces", "chops leading and trailing spaces from text",       OFFSET(chop_spaces),    AV_OPT_TYPE_INT,    {.i64 = 1},        0, 1,        SD},
+    {"txt_duration",    "display duration of teletext pages in msecs",       OFFSET(sub_duration),   AV_OPT_TYPE_INT,    {.i64 = 30000},    0, 86400000, SD},
+    {"txt_transparent", "force transparent background of the teletext",      OFFSET(transparent_bg), AV_OPT_TYPE_INT,    {.i64 = 0},        0, 1,        SD},
+    { NULL },
+};
+
+static const AVClass teletext_class = {
+    .class_name = "libzvbi_teletextdec",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_libzvbi_teletext_decoder = {
+    .name      = "libzvbi_teletextdec",
+    .long_name = NULL_IF_CONFIG_SMALL("Libzvbi DVB teletext decoder"),
+    .type      = AVMEDIA_TYPE_SUBTITLE,
+    .id        = AV_CODEC_ID_DVB_TELETEXT,
+    .priv_data_size = sizeof(TeletextContext),
+    .init      = teletext_init_decoder,
+    .close     = teletext_close_decoder,
+    .decode    = teletext_decode_frame,
+    .capabilities = CODEC_CAP_DELAY,
+    .flush     = teletext_flush,
+    .priv_class= &teletext_class,
+};
diff --git a/libavcodec/ljpegenc.c b/libavcodec/ljpegenc.c
index fbb024b..19ae151 100644
--- a/libavcodec/ljpegenc.c
+++ b/libavcodec/ljpegenc.c
@@ -8,20 +8,20 @@
  * aspecting, new decode_frame mechanism and apple mjpeg-b support
  *                                  by Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -77,7 +77,7 @@ static int ljpeg_encode_bgr(AVCodecContext *avctx, PutBitContext *pb,
         const int modified_predictor = y ? predictor : 1;
         uint8_t *ptr = frame->data[0] + (linesize * y);
 
-        if (pb->buf_end - pb->buf - (put_bits_count(pb) >> 3) < width * 3 * 3) {
+        if (pb->buf_end - pb->buf - (put_bits_count(pb) >> 3) < width * 3 * 4) {
             av_log(avctx, AV_LOG_ERROR, "encoded frame too large\n");
             return -1;
         }
@@ -86,9 +86,15 @@ static int ljpeg_encode_bgr(AVCodecContext *avctx, PutBitContext *pb,
             top[i]= left[i]= topleft[i]= buffer[0][i];
 
         for (x = 0; x < width; x++) {
-            buffer[x][1] =  ptr[3 * x + 0] -     ptr[3 * x + 1] + 0x100;
-            buffer[x][2] =  ptr[3 * x + 2] -     ptr[3 * x + 1] + 0x100;
-            buffer[x][0] = (ptr[3 * x + 0] + 2 * ptr[3 * x + 1] + ptr[3 * x + 2]) >> 2;
+            if(avctx->pix_fmt == AV_PIX_FMT_BGR24){
+                buffer[x][1] =  ptr[3 * x + 0] -     ptr[3 * x + 1] + 0x100;
+                buffer[x][2] =  ptr[3 * x + 2] -     ptr[3 * x + 1] + 0x100;
+                buffer[x][0] = (ptr[3 * x + 0] + 2 * ptr[3 * x + 1] + ptr[3 * x + 2]) >> 2;
+            }else{
+                buffer[x][1] =  ptr[4 * x + 0] -     ptr[4 * x + 1] + 0x100;
+                buffer[x][2] =  ptr[4 * x + 2] -     ptr[4 * x + 1] + 0x100;
+                buffer[x][0] = (ptr[4 * x + 0] + 2 * ptr[4 * x + 1] + ptr[4 * x + 2]) >> 2;
+            }
 
             for (i = 0; i < 3; i++) {
                 int pred, diff;
@@ -213,25 +219,28 @@ static int ljpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int max_pkt_size = FF_MIN_BUFFER_SIZE;
     int ret, header_bits;
 
-    if (avctx->pix_fmt == AV_PIX_FMT_BGR24)
-        max_pkt_size += width * height * 3 * 3;
+    if(    avctx->pix_fmt == AV_PIX_FMT_BGR0
+        || avctx->pix_fmt == AV_PIX_FMT_BGRA
+        || avctx->pix_fmt == AV_PIX_FMT_BGR24)
+        max_pkt_size += width * height * 3 * 4;
     else {
         max_pkt_size += mb_width * mb_height * 3 * 4
                         * s->hsample[0] * s->vsample[0];
     }
-    if ((ret = ff_alloc_packet(pkt, max_pkt_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", max_pkt_size);
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, max_pkt_size)) < 0)
         return ret;
-    }
 
     init_put_bits(&pb, pkt->data, pkt->size);
 
     ff_mjpeg_encode_picture_header(avctx, &pb, &s->scantable,
-                                   s->matrix);
+                                   s->matrix, s->matrix);
 
     header_bits = put_bits_count(&pb);
 
-    if (avctx->pix_fmt == AV_PIX_FMT_BGR24)
+    if(    avctx->pix_fmt == AV_PIX_FMT_BGR0
+        || avctx->pix_fmt == AV_PIX_FMT_BGRA
+        || avctx->pix_fmt == AV_PIX_FMT_BGR24)
         ret = ljpeg_encode_bgr(avctx, &pb, pict);
     else
         ret = ljpeg_encode_yuv(avctx, &pb, pict);
@@ -240,6 +249,7 @@ static int ljpeg_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     emms_c();
 
+    ff_mjpeg_escape_FF(&pb, header_bits >> 3);
     ff_mjpeg_encode_picture_trailer(&pb, header_bits);
 
     flush_put_bits(&pb);
@@ -263,7 +273,6 @@ static av_cold int ljpeg_encode_close(AVCodecContext *avctx)
 static av_cold int ljpeg_encode_init(AVCodecContext *avctx)
 {
     LJpegEncContext *s = avctx->priv_data;
-    int chroma_v_shift, chroma_h_shift;
 
     if ((avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
          avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
@@ -289,21 +298,7 @@ static av_cold int ljpeg_encode_init(AVCodecContext *avctx)
     ff_init_scantable(s->idsp.idct_permutation, &s->scantable,
                       ff_zigzag_direct);
 
-    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
-                                     &chroma_v_shift);
-
-    if (avctx->pix_fmt   == AV_PIX_FMT_BGR24) {
-        s->vsample[0] = s->hsample[0] =
-        s->vsample[1] = s->hsample[1] =
-        s->vsample[2] = s->hsample[2] = 1;
-    } else {
-        s->vsample[0] = 2;
-        s->vsample[1] = 2 >> chroma_v_shift;
-        s->vsample[2] = 2 >> chroma_v_shift;
-        s->hsample[0] = 2;
-        s->hsample[1] = 2 >> chroma_h_shift;
-        s->hsample[2] = 2 >> chroma_h_shift;
-    }
+    ff_mjpeg_init_hvsample(avctx, s->hsample, s->vsample);
 
     ff_mjpeg_build_huffman_codes(s->huff_size_dc_luminance,
                                  s->huff_code_dc_luminance,
@@ -326,12 +321,10 @@ AVCodec ff_ljpeg_encoder = {
     .init           = ljpeg_encode_init,
     .encode2        = ljpeg_encode_frame,
     .close          = ljpeg_encode_close,
-    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUVJ420P,
-                                                    AV_PIX_FMT_YUVJ422P,
-                                                    AV_PIX_FMT_YUVJ444P,
-                                                    AV_PIX_FMT_BGR24,
-                                                    AV_PIX_FMT_YUV420P,
-                                                    AV_PIX_FMT_YUV422P,
-                                                    AV_PIX_FMT_YUV444P,
-                                                    AV_PIX_FMT_NONE },
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .pix_fmts       = (const enum AVPixelFormat[]){
+        AV_PIX_FMT_BGR24   , AV_PIX_FMT_BGRA    , AV_PIX_FMT_BGR0,
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV420P , AV_PIX_FMT_YUV444P , AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_NONE},
 };
diff --git a/libavcodec/loco.c b/libavcodec/loco.c
index 6be081d..bf52c49 100644
--- a/libavcodec/loco.c
+++ b/libavcodec/loco.c
@@ -2,20 +2,20 @@
  * LOCO codec
  * Copyright (c) 2005 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -132,7 +132,10 @@ static int loco_decode_plane(LOCOContext *l, uint8_t *data, int width, int heigh
     int val;
     int i, j;
 
-    init_get_bits(&rc.gb, buf, buf_size*8);
+    if(buf_size<=0)
+        return -1;
+
+    init_get_bits8(&rc.gb, buf, buf_size);
     rc.save  = 0;
     rc.run   = 0;
     rc.run2  = 0;
@@ -175,88 +178,70 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame * const p     = data;
     int decoded, ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
     p->key_frame = 1;
 
+#define ADVANCE_BY_DECODED do { \
+    if (decoded < 0 || decoded >= buf_size) goto buf_too_small; \
+    buf += decoded; buf_size -= decoded; \
+} while(0)
     switch(l->mode) {
     case LOCO_CYUY2: case LOCO_YUY2: case LOCO_UYVY:
         decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height,
                                     p->linesize[0], buf, buf_size, 1);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
+        ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[1], avctx->width / 2, avctx->height,
                                     p->linesize[1], buf, buf_size, 1);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
+        ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[2], avctx->width / 2, avctx->height,
                                     p->linesize[2], buf, buf_size, 1);
         break;
     case LOCO_CYV12: case LOCO_YV12:
         decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height,
                                     p->linesize[0], buf, buf_size, 1);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
+        ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[2], avctx->width / 2, avctx->height / 2,
                                     p->linesize[2], buf, buf_size, 1);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
+        ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[1], avctx->width / 2, avctx->height / 2,
                                     p->linesize[1], buf, buf_size, 1);
         break;
     case LOCO_CRGB: case LOCO_RGB:
         decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1), avctx->width, avctx->height,
                                     -p->linesize[0], buf, buf_size, 3);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
+        ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 1, avctx->width, avctx->height,
                                     -p->linesize[0], buf, buf_size, 3);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
+        ADVANCE_BY_DECODED;
         decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 2, avctx->width, avctx->height,
                                     -p->linesize[0], buf, buf_size, 3);
         break;
+    case LOCO_CRGBA:
     case LOCO_RGBA:
-        decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height,
-                                    p->linesize[0], buf, buf_size, 4);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
-        decoded = loco_decode_plane(l, p->data[0] + 1, avctx->width, avctx->height,
-                                    p->linesize[0], buf, buf_size, 4);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
-        decoded = loco_decode_plane(l, p->data[0] + 2, avctx->width, avctx->height,
-                                    p->linesize[0], buf, buf_size, 4);
-        if (decoded >= buf_size)
-            goto buf_too_small;
-        buf += decoded; buf_size -= decoded;
-
-        decoded = loco_decode_plane(l, p->data[0] + 3, avctx->width, avctx->height,
-                                    p->linesize[0], buf, buf_size, 4);
+        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1), avctx->width, avctx->height,
+                                    -p->linesize[0], buf, buf_size, 4);
+        ADVANCE_BY_DECODED;
+        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 1, avctx->width, avctx->height,
+                                    -p->linesize[0], buf, buf_size, 4);
+        ADVANCE_BY_DECODED;
+        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 2, avctx->width, avctx->height,
+                                    -p->linesize[0], buf, buf_size, 4);
+        ADVANCE_BY_DECODED;
+        decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 3, avctx->width, avctx->height,
+                                    -p->linesize[0], buf, buf_size, 4);
         break;
+    default:
+        av_assert0(0);
     }
 
+    if (decoded < 0 || decoded > buf_size)
+        goto buf_too_small;
+    buf_size -= decoded;
+
     *got_frame      = 1;
 
-    return buf_size;
+    return avpkt->size - buf_size;
 buf_too_small:
     av_log(avctx, AV_LOG_ERROR, "Input data too small.\n");
     return AVERROR(EINVAL);
@@ -303,7 +288,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
         break;
     case LOCO_CRGBA:
     case LOCO_RGBA:
-        avctx->pix_fmt = AV_PIX_FMT_RGB32;
+        avctx->pix_fmt = AV_PIX_FMT_BGRA;
         break;
     default:
         av_log(avctx, AV_LOG_INFO, "Unknown colorspace, index = %i\n", l->mode);
diff --git a/libavcodec/lossless_audiodsp.c b/libavcodec/lossless_audiodsp.c
new file mode 100644
index 0000000..32f4c9e
--- /dev/null
+++ b/libavcodec/lossless_audiodsp.c
@@ -0,0 +1,49 @@
+/*
+ * Monkey's Audio lossless audio decoder
+ * Copyright (c) 2007 Benjamin Zores <ben@geexbox.org>
+ *  based upon libdemac from Dave Chapman.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "lossless_audiodsp.h"
+
+static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2,
+                                              const int16_t *v3,
+                                              int order, int mul)
+{
+    int res = 0;
+
+    while (order--) {
+        res   += *v1 * *v2++;
+        *v1++ += mul * *v3++;
+    }
+    return res;
+}
+
+av_cold void ff_llauddsp_init(LLAudDSPContext *c)
+{
+    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
+
+    if (ARCH_ARM)
+        ff_llauddsp_init_arm(c);
+    if (ARCH_PPC)
+        ff_llauddsp_init_ppc(c);
+    if (ARCH_X86)
+        ff_llauddsp_init_x86(c);
+}
diff --git a/libavcodec/apedsp.h b/libavcodec/lossless_audiodsp.h
index 64e2749..4c27502 100644
--- a/libavcodec/apedsp.h
+++ b/libavcodec/lossless_audiodsp.h
@@ -3,29 +3,29 @@
  * Copyright (c) 2007 Benjamin Zores <ben@geexbox.org>
  *  based upon libdemac from Dave Chapman.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_APEDSP_H
-#define AVCODEC_APEDSP_H
+#ifndef AVCODEC_LLAUDDSP_H
+#define AVCODEC_LLAUDDSP_H
 
 #include <stdint.h>
 
-typedef struct APEDSPContext {
+typedef struct LLAudDSPContext {
     /**
      * Calculate scalar product of v1 and v2,
      * and v1[i] += v3[i] * mul
@@ -35,10 +35,11 @@ typedef struct APEDSPContext {
                                             const int16_t *v2,
                                             const int16_t *v3,
                                             int len, int mul);
-} APEDSPContext;
+} LLAudDSPContext;
 
-void ff_apedsp_init_arm(APEDSPContext *c);
-void ff_apedsp_init_ppc(APEDSPContext *c);
-void ff_apedsp_init_x86(APEDSPContext *c);
+void ff_llauddsp_init(LLAudDSPContext *c);
+void ff_llauddsp_init_arm(LLAudDSPContext *c);
+void ff_llauddsp_init_ppc(LLAudDSPContext *c);
+void ff_llauddsp_init_x86(LLAudDSPContext *c);
 
-#endif /* AVCODEC_APEDSP_H */
+#endif /* AVCODEC_LLAUDDSP_H */
diff --git a/libavcodec/lossless_videodsp.c b/libavcodec/lossless_videodsp.c
new file mode 100644
index 0000000..3491621
--- /dev/null
+++ b/libavcodec/lossless_videodsp.c
@@ -0,0 +1,128 @@
+/*
+ * Lossless video DSP utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "lossless_videodsp.h"
+#include "libavcodec/mathops.h"
+
+static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){
+    long i;
+    unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
+    unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL;
+    for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
+        long a = *(long*)(src+i);
+        long b = *(long*)(dst+i);
+        *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb);
+    }
+    for(; i<w; i++)
+        dst[i] = (dst[i] + src[i]) & mask;
+}
+
+static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){
+    long i;
+#if !HAVE_FAST_UNALIGNED
+    if((long)src2 & (sizeof(long)-1)){
+        for(i=0; i+3<w; i+=4){
+            dst[i+0] = (src1[i+0]-src2[i+0]) & mask;
+            dst[i+1] = (src1[i+1]-src2[i+1]) & mask;
+            dst[i+2] = (src1[i+2]-src2[i+2]) & mask;
+            dst[i+3] = (src1[i+3]-src2[i+3]) & mask;
+        }
+    }else
+#endif
+    {
+        unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL;
+        unsigned long pw_msb = pw_lsb +  0x0001000100010001ULL;
+
+        for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) {
+            long a = *(long*)(src1+i);
+            long b = *(long*)(src2+i);
+            *(long*)(dst+i) = ((a|pw_msb) - (b&pw_lsb)) ^ ((a^b^pw_msb)&pw_msb);
+        }
+    }
+    for (; i<w; i++)
+        dst[i] = (src1[i] - src2[i]) & mask;
+}
+
+static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top){
+    int i;
+    uint16_t l, lt;
+
+    l  = *left;
+    lt = *left_top;
+
+    for(i=0; i<w; i++){
+        l  = (mid_pred(l, src[i], (l + src[i] - lt) & mask) + diff[i]) & mask;
+        lt = src[i];
+        dst[i] = l;
+    }
+
+    *left     = l;
+    *left_top = lt;
+}
+
+static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){
+    int i;
+    uint16_t l, lt;
+
+    l  = *left;
+    lt = *left_top;
+
+    for(i=0; i<w; i++){
+        const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & mask);
+        lt = src1[i];
+        l  = src2[i];
+        dst[i] = (l - pred) & mask;
+    }
+
+    *left     = l;
+    *left_top = lt;
+}
+
+static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc){
+    int i;
+
+    for(i=0; i<w-1; i++){
+        acc+= src[i];
+        dst[i]= acc & mask;
+        i++;
+        acc+= src[i];
+        dst[i]= acc & mask;
+    }
+
+    for(; i<w; i++){
+        acc+= src[i];
+        dst[i]= acc & mask;
+    }
+
+    return acc;
+}
+
+
+void ff_llviddsp_init(LLVidDSPContext *c, AVCodecContext *avctx)
+{
+    c->add_int16 = add_int16_c;
+    c->diff_int16= diff_int16_c;
+    c->add_hfyu_left_pred_int16   = add_hfyu_left_pred_int16_c;
+    c->add_hfyu_median_pred_int16 = add_hfyu_median_pred_int16_c;
+    c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c;
+
+    if (ARCH_X86)
+        ff_llviddsp_init_x86(c, avctx);
+}
diff --git a/libavcodec/lossless_videodsp.h b/libavcodec/lossless_videodsp.h
new file mode 100644
index 0000000..040902e
--- /dev/null
+++ b/libavcodec/lossless_videodsp.h
@@ -0,0 +1,40 @@
+/*
+ * Lossless video DSP utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#ifndef AVCODEC_LOSSLESS_VIDEODSP_H
+#define AVCODEC_LOSSLESS_VIDEODSP_H
+
+#include "avcodec.h"
+#include "libavutil/cpu.h"
+
+typedef struct LLVidDSPContext {
+    void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w);
+    void (*diff_int16)(uint16_t *dst/*align 16*/, const uint16_t *src1/*align 16*/, const uint16_t *src2/*align 1*/, unsigned mask, int w);
+
+    void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
+    void (*add_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
+    int  (*add_hfyu_left_pred_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned left);
+} LLVidDSPContext;
+
+void ff_llviddsp_init(LLVidDSPContext *llviddsp, AVCodecContext *avctx);
+void ff_llviddsp_init_x86(LLVidDSPContext *llviddsp, AVCodecContext *avctx);
+
+#endif //AVCODEC_LOSSLESS_VIDEODSP_H
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index fbd1bdf..a6f2377 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -2,28 +2,29 @@
  * LPC utility code
  * Copyright (c) 2006  Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/common.h"
-#include "libavutil/lls.h"
+#include "libavutil/lls2.h"
 
 #define LPC_USE_DOUBLE
 #include "lpc.h"
+#include "libavutil/avassert.h"
 
 
 /**
@@ -38,7 +39,7 @@ static void lpc_apply_welch_window_c(const int32_t *data, int len,
 
     /* The optimization in commit fa4ed8c does not support odd len.
      * If someone wants odd len extend that change. */
-    assert(!(len & 1));
+    av_assert2(!(len & 1));
 
     n2 = (len >> 1);
     c = 2.0 / (len - 1.0);
@@ -179,8 +180,9 @@ int ff_lpc_calc_coefs(LPCContext *s,
     int i, j, pass = 0;
     int opt_order;
 
-    assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER &&
+    av_assert2(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER &&
            lpc_type > FF_LPC_TYPE_FIXED);
+    av_assert0(lpc_type == FF_LPC_TYPE_CHOLESKY || lpc_type == FF_LPC_TYPE_LEVINSON);
 
     /* reinit LPC context if parameters have changed */
     if (blocksize != s->blocksize || max_order != s->max_order ||
@@ -189,6 +191,9 @@ int ff_lpc_calc_coefs(LPCContext *s,
         ff_lpc_init(s, blocksize, max_order, lpc_type);
     }
 
+    if(lpc_passes <= 0)
+        lpc_passes = 2;
+
     if (lpc_type == FF_LPC_TYPE_LEVINSON || (lpc_type == FF_LPC_TYPE_CHOLESKY && lpc_passes > 1)) {
         s->lpc_apply_welch_window(samples, blocksize, s->windowed_samples);
 
@@ -203,7 +208,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
     }
 
     if (lpc_type == FF_LPC_TYPE_CHOLESKY) {
-        LLSModel m[2];
+        LLSModel2 m[2];
         LOCAL_ALIGNED(32, double, var, [FFALIGN(MAX_LPC_ORDER+1,4)]);
         double av_uninit(weight);
         memset(var, 0, FFALIGN(MAX_LPC_ORDER+1,4)*sizeof(*var));
@@ -212,7 +217,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
             m[0].coeff[max_order-1][j] = -lpc[max_order-1][j];
 
         for(; pass<lpc_passes; pass++){
-            avpriv_init_lls(&m[pass&1], max_order);
+            avpriv_init_lls2(&m[pass&1], max_order);
 
             weight=0;
             for(i=max_order; i<blocksize; i++){
@@ -233,7 +238,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
 
                 m[pass&1].update_lls(&m[pass&1], var);
             }
-            avpriv_solve_lls(&m[pass&1], 0.001, 0);
+            avpriv_solve_lls2(&m[pass&1], 0.001, 0);
         }
 
         for(i=0; i<max_order; i++){
@@ -244,6 +249,7 @@ int ff_lpc_calc_coefs(LPCContext *s,
         for(i=max_order-1; i>0; i--)
             ref[i] = ref[i-1] - ref[i];
     }
+
     opt_order = max_order;
 
     if(omethod == ORDER_METHOD_EST) {
diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h
index c41a1f8..c323230 100644
--- a/libavcodec/lpc.h
+++ b/libavcodec/lpc.h
@@ -2,20 +2,20 @@
  * LPC utility code
  * Copyright (c) 2006  Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,6 +23,7 @@
 #define AVCODEC_LPC_H
 
 #include <stdint.h>
+#include "libavutil/avassert.h"
 
 #define ORDER_METHOD_EST     0
 #define ORDER_METHOD_2LEVEL  1
@@ -66,7 +67,7 @@ typedef struct LPCContext {
     /**
      * Perform autocorrelation on input samples with delay of 0 to lag.
      * @param data  input samples.
-     *              constraints: no alignment needed, but must have have at
+     *              constraints: no alignment needed, but must have at
      *              least lag*sizeof(double) valid bytes preceding it, and
      *              size must be at least (len+1)*sizeof(double) if data is
      *              16-byte aligned or (len+2)*sizeof(double) if data is
@@ -155,6 +156,8 @@ static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order,
     LPC_TYPE err;
     LPC_TYPE *lpc_last = lpc;
 
+    av_assert2(normalize || !fail);
+
     if (normalize)
         err = *autoc++;
 
diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c
index 8a05aed..17f59ea 100644
--- a/libavcodec/lsp.c
+++ b/libavcodec/lsp.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2007 Reynaldo H. Verdejo Pinochet (QCELP decoder)
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,8 @@
 #define FRAC_BITS 14
 #include "mathops.h"
 #include "lsp.h"
+#include "libavcodec/mips/lsp_mips.h"
+#include "libavutil/avassert.h"
 
 void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, int lsfq_max, int lp_order)
 {
@@ -73,7 +75,7 @@ static int16_t ff_cos(uint16_t arg)
     uint8_t offset= arg;
     uint8_t ind = arg >> 8;
 
-    assert(arg <= 0x3fff);
+    av_assert2(arg <= 0x3fff);
 
     return tab_cos[ind] + (offset * (tab_cos[ind+1] - tab_cos[ind]) >> 8);
 }
@@ -173,7 +175,11 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd
 
     /* LSP values for first subframe (3.2.5 of G.729, Equation 24)*/
     for(i=0; i<lp_order; i++)
+#ifdef G729_BITEXACT
+        lsp_1st[i] = (lsp_2nd[i] >> 1) + (lsp_prev[i] >> 1);
+#else
         lsp_1st[i] = (lsp_2nd[i] + lsp_prev[i]) >> 1;
+#endif
 
     ff_acelp_lsp2lpc(lp_1st, lsp_1st, lp_order >> 1);
 
@@ -181,6 +187,7 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd
     ff_acelp_lsp2lpc(lp_2nd, lsp_2nd, lp_order >> 1);
 }
 
+#ifndef ff_lsp2polyf
 void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order)
 {
     int i, j;
@@ -197,13 +204,14 @@ void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order)
         f[1] += val;
     }
 }
+#endif /* ff_lsp2polyf */
 
 void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order)
 {
     double pa[MAX_LP_HALF_ORDER+1], qa[MAX_LP_HALF_ORDER+1];
     float *lpc2 = lpc + (lp_half_order << 1) - 1;
 
-    assert(lp_half_order <= MAX_LP_HALF_ORDER);
+    av_assert2(lp_half_order <= MAX_LP_HALF_ORDER);
 
     ff_lsp2polyf(lsp,     pa, lp_half_order);
     ff_lsp2polyf(lsp + 1, qa, lp_half_order);
diff --git a/libavcodec/lsp.h b/libavcodec/lsp.h
index 4b95567..46a2d47 100644
--- a/libavcodec/lsp.h
+++ b/libavcodec/lsp.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2008 Vladimir Voroshilov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/lzw.c b/libavcodec/lzw.c
index fae5687..6832c12 100644
--- a/libavcodec/lzw.c
+++ b/libavcodec/lzw.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -93,12 +93,6 @@ static int lzw_get_code(struct LZWState * s)
     return c & s->curmask;
 }
 
-int ff_lzw_size_read(LZWState *p)
-{
-    struct LZWState *s = p;
-    return bytestream2_tell(&s->gb);
-}
-
 void ff_lzw_decode_tail(LZWState *p)
 {
     struct LZWState *s = (struct LZWState *)p;
diff --git a/libavcodec/lzw.h b/libavcodec/lzw.h
index d925d35..4653c1c 100644
--- a/libavcodec/lzw.h
+++ b/libavcodec/lzw.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,7 +47,6 @@ void ff_lzw_decode_open(LZWState **p);
 void ff_lzw_decode_close(LZWState **p);
 int ff_lzw_decode_init(LZWState *s, int csize, const uint8_t *buf, int buf_size, int mode);
 int ff_lzw_decode(LZWState *s, uint8_t *buf, int len);
-int ff_lzw_size_read(LZWState *lzw);
 void ff_lzw_decode_tail(LZWState *lzw);
 
 /** LZW encode state */
diff --git a/libavcodec/lzwenc.c b/libavcodec/lzwenc.c
index 7c37bf2..03080ee 100644
--- a/libavcodec/lzwenc.c
+++ b/libavcodec/lzwenc.c
@@ -2,20 +2,20 @@
  * LZW encoder
  * Copyright (c) 2007 Bartlomiej Wolowiec
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -77,7 +77,7 @@ static inline int hash(int head, const int add)
     head ^= (add << LZW_HASH_SHIFT);
     if (head >= LZW_HASH_SIZE)
         head -= LZW_HASH_SIZE;
-    assert(head >= 0 && head < LZW_HASH_SIZE);
+    av_assert2(head >= 0 && head < LZW_HASH_SIZE);
     return head;
 }
 
@@ -112,7 +112,7 @@ static inline int hashOffset(const int head)
  */
 static inline void writeCode(LZWEncodeState * s, int c)
 {
-    assert(0 <= c && c < 1 << s->bits);
+    av_assert2(0 <= c && c < 1 << s->bits);
     s->put_bits(&s->pb, s->bits, c);
 }
 
@@ -208,7 +208,7 @@ void ff_lzw_encode_init(LZWEncodeState *s, uint8_t *outbuf, int outsize,
     s->maxbits = maxbits;
     init_put_bits(&s->pb, outbuf, outsize);
     s->bufsize = outsize;
-    assert(s->maxbits >= 9 && s->maxbits <= LZW_MAXBITS);
+    av_assert0(s->maxbits >= 9 && s->maxbits <= LZW_MAXBITS);
     s->maxcode = 1 << s->maxbits;
     s->output_bytes = 0;
     s->last_code = LZW_PREFIX_EMPTY;
@@ -263,6 +263,9 @@ int ff_lzw_encode_flush(LZWEncodeState *s,
     if (s->last_code != -1)
         writeCode(s, s->last_code);
     writeCode(s, s->end_code);
+    if (s->mode == FF_LZW_GIF)
+        s->put_bits(&s->pb, 1, 0);
+
     lzw_flush_put_bits(&s->pb);
     s->last_code = -1;
 
diff --git a/libavcodec/mace.c b/libavcodec/mace.c
index 25c6b70..6eaa296 100644
--- a/libavcodec/mace.c
+++ b/libavcodec/mace.c
@@ -2,20 +2,20 @@
  * MACE decoder
  * Copyright (c) 2002 Laszlo Torok <torokl@alpha.dfmk.hu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -244,12 +244,17 @@ static int mace_decode_frame(AVCodecContext *avctx, void *data,
     int i, j, k, l, ret;
     int is_mace3 = (avctx->codec_id == AV_CODEC_ID_MACE3);
 
+    if (buf_size % (avctx->channels << is_mace3)) {
+        av_log(avctx, AV_LOG_ERROR, "buffer size %d is odd\n", buf_size);
+        buf_size -= buf_size % (avctx->channels << is_mace3);
+        if (!buf_size)
+            return AVERROR_INVALIDDATA;
+    }
+
     /* get output buffer */
     frame->nb_samples = 3 * (buf_size << (1 - is_mace3)) / avctx->channels;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (int16_t **)frame->extended_data;
 
     for(i = 0; i < avctx->channels; i++) {
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index 7af13e1..87fca0c 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2001, 2002 Fabrice Bellard
  * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #ifndef AVCODEC_MATHOPS_H
@@ -39,8 +39,6 @@ extern const uint8_t ff_zigzag_direct[64];
 #   include "arm/mathops.h"
 #elif ARCH_AVR32
 #   include "avr32/mathops.h"
-#elif ARCH_BFIN
-#   include "bfin/mathops.h"
 #elif ARCH_MIPS
 #   include "mips/mathops.h"
 #elif ARCH_PPC
diff --git a/libavcodec/mathtables.c b/libavcodec/mathtables.c
index 47695bc..a07ac50 100644
--- a/libavcodec/mathtables.c
+++ b/libavcodec/mathtables.c
@@ -1,18 +1,20 @@
 /*
- * This file is part of Libav.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mdct_fixed.c b/libavcodec/mdct_fixed.c
index 9e06861..a32cb00 100644
--- a/libavcodec/mdct_fixed.c
+++ b/libavcodec/mdct_fixed.c
@@ -1,22 +1,23 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #define FFT_FLOAT 0
+#define FFT_FIXED_32 0
 #include "mdct_template.c"
 
 /* same as ff_mdct_calcw_c with double-width unscaled output */
diff --git a/libavcodec/mdct_fixed_32.c b/libavcodec/mdct_fixed_32.c
new file mode 100644
index 0000000..5a34dfe
--- /dev/null
+++ b/libavcodec/mdct_fixed_32.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Stanislav Ocovaj (socovaj@mips.com)
+ *           Goran Cordasic   (goran@mips.com)
+ *           Djordje Pesut    (djordje@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define FFT_FLOAT 0
+#define FFT_FIXED_32 1
+#include "mdct_template.c"
diff --git a/libavcodec/mdct_float.c b/libavcodec/mdct_float.c
index a0a62b3..cff2d21 100644
--- a/libavcodec/mdct_float.c
+++ b/libavcodec/mdct_float.c
@@ -1,20 +1,21 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #define FFT_FLOAT 1
+#define FFT_FIXED_32 0
 #include "mdct_template.c"
diff --git a/libavcodec/mdct_template.c b/libavcodec/mdct_template.c
index bad890e..7fa8bcc 100644
--- a/libavcodec/mdct_template.c
+++ b/libavcodec/mdct_template.c
@@ -2,20 +2,20 @@
  * MDCT/IMDCT transforms
  * Copyright (c) 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,7 +34,11 @@
 #if FFT_FLOAT
 #   define RSCALE(x) (x)
 #else
+#if FFT_FIXED_32
+#   define RSCALE(x) (((x) + 32) >> 6)
+#else /* FFT_FIXED_32 */
 #   define RSCALE(x) ((x) >> 1)
+#endif /* FFT_FIXED_32 */
 #endif
 
 /**
@@ -56,7 +60,7 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
     if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
         goto fail;
 
-    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
+    s->tcos = av_malloc_array(n/2, sizeof(FFTSample));
     if (!s->tcos)
         goto fail;
 
diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index 6b70e37..051adae 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -4,20 +4,20 @@
  *
  * based upon code from Sebastian Jedruszkiewicz <elf@frogger.rules.pl>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,6 +29,7 @@
 
 #include "avcodec.h"
 #include "blockdsp.h"
+#include "bswapdsp.h"
 #include "idctdsp.h"
 #include "mpegvideo.h"
 #include "mpeg12.h"
@@ -37,6 +38,7 @@
 typedef struct MDECContext {
     AVCodecContext *avctx;
     BlockDSPContext bdsp;
+    BswapDSPContext bbdsp;
     IDCTDSPContext idsp;
     ThreadFrame frame;
     GetBitContext gb;
@@ -124,7 +126,7 @@ static inline int mdec_decode_block_intra(MDECContext *a, int16_t *block, int n)
 static inline int decode_mb(MDECContext *a, int16_t block[6][64])
 {
     int i, ret;
-    const int block_index[6] = { 5, 4, 0, 1, 2, 3 };
+    static const int block_index[6] = { 5, 4, 0, 1, 2, 3 };
 
     a->bdsp.clear_blocks(block[0]);
 
@@ -166,23 +168,19 @@ static int decode_frame(AVCodecContext *avctx,
     const uint8_t *buf    = avpkt->data;
     int buf_size          = avpkt->size;
     ThreadFrame frame     = { .f = data };
-    int i, ret;
+    int ret;
 
-    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
         return ret;
-    }
     frame.f->pict_type = AV_PICTURE_TYPE_I;
     frame.f->key_frame = 1;
 
-    av_fast_malloc(&a->bitstream_buffer, &a->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    av_fast_padded_malloc(&a->bitstream_buffer, &a->bitstream_buffer_size, buf_size);
     if (!a->bitstream_buffer)
         return AVERROR(ENOMEM);
-    for (i = 0; i < buf_size; i += 2) {
-        a->bitstream_buffer[i]     = buf[i + 1];
-        a->bitstream_buffer[i + 1] = buf[i];
-    }
-    init_get_bits(&a->gb, a->bitstream_buffer, buf_size * 8);
+    a->bbdsp.bswap16_buf((uint16_t *)a->bitstream_buffer, (uint16_t *)buf, (buf_size + 1) / 2);
+    if ((ret = init_get_bits8(&a->gb, a->bitstream_buffer, buf_size)) < 0)
+        return ret;
 
     /* skip over 4 preamble bytes in stream (typically 0xXX 0xXX 0x00 0x38) */
     skip_bits(&a->gb, 32);
@@ -216,6 +214,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     a->avctx           = avctx;
 
     ff_blockdsp_init(&a->bdsp, avctx);
+    ff_bswapdsp_init(&a->bbdsp);
     ff_idctdsp_init(&a->idsp, avctx);
     ff_mpeg12_init_vlcs();
     ff_init_scantable(a->idsp.idct_permutation, &a->scantable,
diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c
index 9fcc937..1355a23 100644
--- a/libavcodec/me_cmp.c
+++ b/libavcodec/me_cmp.c
@@ -1,22 +1,27 @@
 /*
- * This file is part of Libav.
+ * DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/internal.h"
 #include "avcodec.h"
 #include "copy_block.h"
 #include "simple_idct.h"
@@ -407,6 +412,14 @@ void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
         case FF_CMP_NSSE:
             cmp[i] = c->nsse[i];
             break;
+#if CONFIG_DWT
+        case FF_CMP_W53:
+            cmp[i]= c->w53[i];
+            break;
+        case FF_CMP_W97:
+            cmp[i]= c->w97[i];
+            break;
+#endif
         default:
             av_log(NULL, AV_LOG_ERROR,
                    "internal error in cmp function selection\n");
@@ -434,7 +447,7 @@ static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst,
 {
     int i, temp[64], sum = 0;
 
-    assert(h == 8);
+    av_assert2(h == 8);
 
     for (i = 0; i < 8; i++) {
         // FIXME: try pointer walks
@@ -486,7 +499,7 @@ static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
 {
     int i, temp[64], sum = 0;
 
-    assert(h == 8);
+    av_assert2(h == 8);
 
     for (i = 0; i < 8; i++) {
         // FIXME: try pointer walks
@@ -538,7 +551,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
 {
     LOCAL_ALIGNED_16(int16_t, temp, [64]);
 
-    assert(h == 8);
+    av_assert2(h == 8);
 
     s->pdsp.diff_pixels(temp, src1, src2, stride);
     s->fdsp.fdct(temp);
@@ -605,7 +618,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
     LOCAL_ALIGNED_16(int16_t, temp, [64]);
     int sum = 0, i;
 
-    assert(h == 8);
+    av_assert2(h == 8);
 
     s->pdsp.diff_pixels(temp, src1, src2, stride);
     s->fdsp.fdct(temp);
@@ -623,7 +636,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
     int16_t *const bak = temp + 64;
     int sum = 0, i;
 
-    assert(h == 8);
+    av_assert2(h == 8);
     s->mb_intra = 0;
 
     s->pdsp.diff_pixels(temp, src1, src2, stride);
@@ -652,7 +665,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
     const int esc_length = s->ac_esc_length;
     uint8_t *length, *last_length;
 
-    assert(h == 8);
+    av_assert2(h == 8);
 
     copy_block8(lsrc1, src1, 8, stride, 8);
     copy_block8(lsrc2, src2, 8, stride, 8);
@@ -696,7 +709,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
 
         level = temp[i] + 64;
 
-        assert(level - 64);
+        av_assert2(level - 64);
 
         if ((level & (~127)) == 0) {
             bits += last_length[UNI_AC_ENC_INDEX(run, level)];
@@ -727,7 +740,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
     const int esc_length = s->ac_esc_length;
     uint8_t *length, *last_length;
 
-    assert(h == 8);
+    av_assert2(h == 8);
 
     s->pdsp.diff_pixels(temp, src1, src2, stride);
 
@@ -768,7 +781,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
 
         level = temp[i] + 64;
 
-        assert(level - 64);
+        av_assert2(level - 64);
 
         if ((level & (~127)) == 0)
             bits += last_length[UNI_AC_ENC_INDEX(run, level)];
@@ -801,20 +814,24 @@ static int vsad_intra ## size ## _c(MpegEncContext *c,                  \
 VSAD_INTRA(8)
 VSAD_INTRA(16)
 
-static int vsad16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
-                    int stride, int h)
-{
-    int score = 0, x, y;
-
-    for (y = 1; y < h; y++) {
-        for (x = 0; x < 16; x++)
-            score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);
-        s1 += stride;
-        s2 += stride;
-    }
-
-    return score;
+#define VSAD(size)                                                             \
+static int vsad ## size ## _c(MpegEncContext *c,                               \
+                              uint8_t *s1, uint8_t *s2,                        \
+                              int stride, int h)                               \
+{                                                                              \
+    int score = 0, x, y;                                                       \
+                                                                               \
+    for (y = 1; y < h; y++) {                                                  \
+        for (x = 0; x < size; x++)                                             \
+            score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);   \
+        s1 += stride;                                                          \
+        s2 += stride;                                                          \
+    }                                                                          \
+                                                                               \
+    return score;                                                              \
 }
+VSAD(8)
+VSAD(16)
 
 #define SQ(a) ((a) * (a))
 #define VSSE_INTRA(size)                                                \
@@ -839,20 +856,23 @@ static int vsse_intra ## size ## _c(MpegEncContext *c,                  \
 VSSE_INTRA(8)
 VSSE_INTRA(16)
 
-static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
-                    int stride, int h)
-{
-    int score = 0, x, y;
-
-    for (y = 1; y < h; y++) {
-        for (x = 0; x < 16; x++)
-            score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);
-        s1 += stride;
-        s2 += stride;
-    }
-
-    return score;
+#define VSSE(size)                                                             \
+static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,     \
+                    int stride, int h)                                         \
+{                                                                              \
+    int score = 0, x, y;                                                       \
+                                                                               \
+    for (y = 1; y < h; y++) {                                                  \
+        for (x = 0; x < size; x++)                                             \
+            score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);      \
+        s1 += stride;                                                          \
+        s2 += stride;                                                          \
+    }                                                                          \
+                                                                               \
+    return score;                                                              \
 }
+VSSE(8)
+VSSE(16)
 
 #define WRAPPER8_16_SQ(name8, name16)                                   \
 static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src,        \
@@ -890,8 +910,31 @@ av_cold void ff_me_cmp_init_static(void)
         ff_square_tab[i] = (i - 256) * (i - 256);
 }
 
+int ff_check_alignment(void)
+{
+    static int did_fail = 0;
+    LOCAL_ALIGNED_16(int, aligned, [4]);
+
+    if ((intptr_t)aligned & 15) {
+        if (!did_fail) {
+#if HAVE_MMX || HAVE_ALTIVEC
+            av_log(NULL, AV_LOG_ERROR,
+                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
+                "and may be very slow or crash. This is not a bug in libavcodec,\n"
+                "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
+                "Do not report crashes to FFmpeg developers.\n");
+#endif
+            did_fail=1;
+        }
+        return -1;
+    }
+    return 0;
+}
+
 av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
 {
+    ff_check_alignment();
+
     c->sum_abs_dctelem = sum_abs_dctelem_c;
 
     /* TODO [0] 16  [1] 8 */
@@ -925,14 +968,21 @@ av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
     SET_CMP_FUNC(rd)
     SET_CMP_FUNC(bit)
     c->vsad[0] = vsad16_c;
+    c->vsad[1] = vsad8_c;
     c->vsad[4] = vsad_intra16_c;
     c->vsad[5] = vsad_intra8_c;
     c->vsse[0] = vsse16_c;
+    c->vsse[1] = vsse8_c;
     c->vsse[4] = vsse_intra16_c;
     c->vsse[5] = vsse_intra8_c;
     c->nsse[0] = nsse16_c;
     c->nsse[1] = nsse8_c;
+#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
+    ff_dsputil_init_dwt(c);
+#endif
 
+    if (ARCH_ALPHA)
+        ff_me_cmp_init_alpha(c, avctx);
     if (ARCH_ARM)
         ff_me_cmp_init_arm(c, avctx);
     if (ARCH_PPC)
diff --git a/libavcodec/me_cmp.h b/libavcodec/me_cmp.h
index 05ae30b..0e3b922 100644
--- a/libavcodec/me_cmp.h
+++ b/libavcodec/me_cmp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,20 @@
 
 extern uint32_t ff_square_tab[512];
 
+
+/* minimum alignment rules ;)
+ * If you notice errors in the align stuff, need more alignment for some ASM code
+ * for some CPU or need to use a function with less aligned data then send a mail
+ * to the ffmpeg-devel mailing list, ...
+ *
+ * !warning These alignments might not match reality, (missing attribute((align))
+ * stuff somewhere possible).
+ * I (Michael) did not check them, these are just the alignments which I think
+ * could be reached easily ...
+ *
+ * !future video codecs might need functions with less strict alignment
+ */
+
 struct MpegEncContext;
 /* Motion estimation:
  * h is limited to { width / 2, width, 2 * width },
@@ -48,6 +62,8 @@ typedef struct MECmpContext {
     me_cmp_func vsad[6];
     me_cmp_func vsse[6];
     me_cmp_func nsse[6];
+    me_cmp_func w53[6];
+    me_cmp_func w97[6];
     me_cmp_func dct_max[6];
     me_cmp_func dct264_sad[6];
 
@@ -63,11 +79,16 @@ typedef struct MECmpContext {
 
 void ff_me_cmp_init_static(void);
 
+int ff_check_alignment(void);
+
 void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx);
+void ff_me_cmp_init_alpha(MECmpContext *c, AVCodecContext *avctx);
 void ff_me_cmp_init_arm(MECmpContext *c, AVCodecContext *avctx);
 void ff_me_cmp_init_ppc(MECmpContext *c, AVCodecContext *avctx);
 void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx);
 
 void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type);
 
+void ff_dsputil_init_dwt(MECmpContext *c);
+
 #endif /* AVCODEC_ME_CMP_H */
diff --git a/libavcodec/metasound.c b/libavcodec/metasound.c
index dd9ffe0..2dab135 100644
--- a/libavcodec/metasound.c
+++ b/libavcodec/metasound.c
@@ -4,20 +4,20 @@
  * based on TwinVQ decoder
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -236,7 +236,7 @@ static int metasound_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx,
             skip_bits(&gb, 4 - (get_bits_count(&gb) & 3));
     }
 
-    return 0;
+    return (get_bits_count(&gb) + 7) / 8;
 }
 
 typedef struct MetasoundProps {
diff --git a/libavcodec/metasound_data.c b/libavcodec/metasound_data.c
index 8aa53e5..ed23cdf 100644
--- a/libavcodec/metasound_data.c
+++ b/libavcodec/metasound_data.c
@@ -2,20 +2,20 @@
  * MetaSound decoder
  * Copyright (c) 2013 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -11208,6 +11208,14 @@ static const int16_t fcb16m[] = {
       -688,   -209,    915,    622,  -1038,   -474,   -343,    -91,
       -173,   -104,    255,     96,   1547,    773,   -625,   2272,
        -90,   -509,   -527,   -247,   -147,   -234,    -45,    166,
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
 };
 
 static const int16_t fcb16sl[] = {
diff --git a/libavcodec/metasound_data.h b/libavcodec/metasound_data.h
index 4925516..5c33411 100644
--- a/libavcodec/metasound_data.h
+++ b/libavcodec/metasound_data.h
@@ -2,20 +2,20 @@
  * MetaSound decoder
  * Copyright (c) 2013 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/microdvddec.c b/libavcodec/microdvddec.c
new file mode 100644
index 0000000..f3c640f
--- /dev/null
+++ b/libavcodec/microdvddec.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MicroDVD subtitle decoder
+ *
+ * Based on the specifications found here:
+ * https://trac.videolan.org/vlc/ticket/1825#comment:6
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/parseutils.h"
+#include "libavutil/bprint.h"
+#include "avcodec.h"
+#include "ass.h"
+
+static int indexof(const char *s, int c)
+{
+    char *f = strchr(s, c);
+    return f ? (f - s) : -1;
+}
+
+struct microdvd_tag {
+    char key;
+    int persistent;
+    uint32_t data1;
+    uint32_t data2;
+    char *data_string;
+    int data_string_len;
+};
+
+#define MICRODVD_PERSISTENT_OFF     0
+#define MICRODVD_PERSISTENT_ON      1
+#define MICRODVD_PERSISTENT_OPENED  2
+
+// Color, Font, Size, cHarset, stYle, Position, cOordinate
+#define MICRODVD_TAGS "cfshyYpo"
+
+static void microdvd_set_tag(struct microdvd_tag *tags, struct microdvd_tag tag)
+{
+    int tag_index = indexof(MICRODVD_TAGS, tag.key);
+
+    if (tag_index < 0)
+        return;
+    memcpy(&tags[tag_index], &tag, sizeof(tag));
+}
+
+// italic, bold, underline, strike-through
+#define MICRODVD_STYLES "ibus"
+
+static char *microdvd_load_tags(struct microdvd_tag *tags, char *s)
+{
+    while (*s == '{') {
+        char *start = s;
+        char tag_char = *(s + 1);
+        struct microdvd_tag tag = {0};
+
+        if (!tag_char || *(s + 2) != ':')
+            break;
+        s += 3;
+
+        switch (tag_char) {
+
+        /* Style */
+        case 'Y':
+            tag.persistent = MICRODVD_PERSISTENT_ON;
+        case 'y':
+            while (*s && *s != '}') {
+                int style_index = indexof(MICRODVD_STYLES, *s);
+
+                if (style_index >= 0)
+                    tag.data1 |= (1 << style_index);
+                s++;
+            }
+            if (*s != '}')
+                break;
+            /* We must distinguish persistent and non-persistent styles
+             * to handle this kind of style tags: {y:ib}{Y:us} */
+            tag.key = tag_char;
+            break;
+
+        /* Color */
+        case 'C':
+            tag.persistent = MICRODVD_PERSISTENT_ON;
+        case 'c':
+            if (*s == '$')
+                s++;
+            tag.data1 = strtol(s, &s, 16) & 0x00ffffff;
+            if (*s != '}')
+                break;
+            tag.key = 'c';
+            break;
+
+        /* Font name */
+        case 'F':
+            tag.persistent = MICRODVD_PERSISTENT_ON;
+        case 'f': {
+            int len = indexof(s, '}');
+            if (len < 0)
+                break;
+            tag.data_string = s;
+            tag.data_string_len = len;
+            s += len;
+            tag.key = 'f';
+            break;
+        }
+
+        /* Font size */
+        case 'S':
+            tag.persistent = MICRODVD_PERSISTENT_ON;
+        case 's':
+            tag.data1 = strtol(s, &s, 10);
+            if (*s != '}')
+                break;
+            tag.key = 's';
+            break;
+
+        /* Charset */
+        case 'H': {
+            //TODO: not yet handled, just parsed.
+            int len = indexof(s, '}');
+            if (len < 0)
+                break;
+            tag.data_string = s;
+            tag.data_string_len = len;
+            s += len;
+            tag.key = 'h';
+            break;
+        }
+
+        /* Position */
+        case 'P':
+            tag.persistent = MICRODVD_PERSISTENT_ON;
+            tag.data1 = (*s++ == '1');
+            if (*s != '}')
+                break;
+            tag.key = 'p';
+            break;
+
+        /* Coordinates */
+        case 'o':
+            tag.persistent = MICRODVD_PERSISTENT_ON;
+            tag.data1 = strtol(s, &s, 10);
+            if (*s != ',')
+                break;
+            s++;
+            tag.data2 = strtol(s, &s, 10);
+            if (*s != '}')
+                break;
+            tag.key = 'o';
+            break;
+
+        default:    /* Unknown tag, we consider it's text */
+            break;
+        }
+
+        if (tag.key == 0)
+            return start;
+
+        microdvd_set_tag(tags, tag);
+        s++;
+    }
+    return s;
+}
+
+static void microdvd_open_tags(AVBPrint *new_line, struct microdvd_tag *tags)
+{
+    int i, sidx;
+    for (i = 0; i < sizeof(MICRODVD_TAGS) - 1; i++) {
+        if (tags[i].persistent == MICRODVD_PERSISTENT_OPENED)
+            continue;
+        switch (tags[i].key) {
+        case 'Y':
+        case 'y':
+            for (sidx = 0; sidx < sizeof(MICRODVD_STYLES) - 1; sidx++)
+                if (tags[i].data1 & (1 << sidx))
+                    av_bprintf(new_line, "{\\%c1}", MICRODVD_STYLES[sidx]);
+            break;
+
+        case 'c':
+            av_bprintf(new_line, "{\\c&H%06X&}", tags[i].data1);
+            break;
+
+        case 'f':
+            av_bprintf(new_line, "{\\fn%.*s}",
+                       tags[i].data_string_len, tags[i].data_string);
+            break;
+
+        case 's':
+            av_bprintf(new_line, "{\\fs%d}", tags[i].data1);
+            break;
+
+        case 'p':
+            if (tags[i].data1 == 0)
+                av_bprintf(new_line, "{\\an8}");
+            break;
+
+        case 'o':
+            av_bprintf(new_line, "{\\pos(%d,%d)}",
+                       tags[i].data1, tags[i].data2);
+            break;
+        }
+        if (tags[i].persistent == MICRODVD_PERSISTENT_ON)
+            tags[i].persistent = MICRODVD_PERSISTENT_OPENED;
+    }
+}
+
+static void microdvd_close_no_persistent_tags(AVBPrint *new_line,
+                                              struct microdvd_tag *tags)
+{
+    int i, sidx;
+
+    for (i = sizeof(MICRODVD_TAGS) - 2; i >= 0; i--) {
+        if (tags[i].persistent != MICRODVD_PERSISTENT_OFF)
+            continue;
+        switch (tags[i].key) {
+
+        case 'y':
+            for (sidx = sizeof(MICRODVD_STYLES) - 2; sidx >= 0; sidx--)
+                if (tags[i].data1 & (1 << sidx))
+                    av_bprintf(new_line, "{\\%c0}", MICRODVD_STYLES[sidx]);
+            break;
+
+        case 'c':
+            av_bprintf(new_line, "{\\c}");
+            break;
+
+        case 'f':
+            av_bprintf(new_line, "{\\fn}");
+            break;
+
+        case 's':
+            av_bprintf(new_line, "{\\fs}");
+            break;
+        }
+        tags[i].key = 0;
+    }
+}
+
+static int microdvd_decode_frame(AVCodecContext *avctx,
+                                 void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVSubtitle *sub = data;
+    AVBPrint new_line;
+    char c;
+    char *decoded_sub;
+    char *line = avpkt->data;
+    char *end = avpkt->data + avpkt->size;
+    struct microdvd_tag tags[sizeof(MICRODVD_TAGS) - 1] = {{0}};
+
+    if (avpkt->size <= 0)
+        return avpkt->size;
+
+    /* To be removed later */
+    if (sscanf(line, "{%*d}{%*[0123456789]}%c", &c) == 1 &&
+        line[avpkt->size - 1] == '\n') {
+        av_log(avctx, AV_LOG_ERROR, "AVPacket is not clean (contains timing "
+               "information and a trailing line break). You need to upgrade "
+               "your libavformat or sanitize your packet.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    av_bprint_init(&new_line, 0, 2048);
+
+    // subtitle content
+    while (line < end && *line) {
+
+        // parse MicroDVD tags, and open them in ASS
+        line = microdvd_load_tags(tags, line);
+        microdvd_open_tags(&new_line, tags);
+
+        // simple copy until EOL or forced carriage return
+        while (line < end && *line && *line != '|') {
+            av_bprint_chars(&new_line, *line, 1);
+            line++;
+        }
+
+        // line split
+        if (line < end && *line == '|') {
+            microdvd_close_no_persistent_tags(&new_line, tags);
+            av_bprintf(&new_line, "\\N");
+            line++;
+        }
+    }
+    if (new_line.len) {
+        av_bprintf(&new_line, "\r\n");
+
+    av_bprint_finalize(&new_line, &decoded_sub);
+    if (*decoded_sub) {
+        int64_t start    = avpkt->pts;
+        int64_t duration = avpkt->duration;
+        int ts_start     = av_rescale_q(start,    avctx->time_base, (AVRational){1,100});
+        int ts_duration  = duration != -1 ?
+                           av_rescale_q(duration, avctx->time_base, (AVRational){1,100}) : -1;
+        ff_ass_add_rect(sub, decoded_sub, ts_start, ts_duration, 0);
+    }
+    av_free(decoded_sub);
+    }
+
+    *got_sub_ptr = sub->num_rects > 0;
+    return avpkt->size;
+}
+
+static int microdvd_init(AVCodecContext *avctx)
+{
+    int i, sidx;
+    AVBPrint font_buf;
+    int font_size    = ASS_DEFAULT_FONT_SIZE;
+    int color        = ASS_DEFAULT_COLOR;
+    int bold         = ASS_DEFAULT_BOLD;
+    int italic       = ASS_DEFAULT_ITALIC;
+    int underline    = ASS_DEFAULT_UNDERLINE;
+    int alignment    = ASS_DEFAULT_ALIGNMENT;
+    struct microdvd_tag tags[sizeof(MICRODVD_TAGS) - 1] = {{0}};
+
+    av_bprint_init(&font_buf, 0, AV_BPRINT_SIZE_AUTOMATIC);
+    av_bprintf(&font_buf, "%s", ASS_DEFAULT_FONT);
+
+    if (avctx->extradata) {
+        microdvd_load_tags(tags, avctx->extradata);
+        for (i = 0; i < sizeof(MICRODVD_TAGS) - 1; i++) {
+            switch (av_tolower(tags[i].key)) {
+            case 'y':
+                for (sidx = 0; sidx < sizeof(MICRODVD_STYLES) - 1; sidx++) {
+                    if (tags[i].data1 & (1 << sidx)) {
+                        switch (MICRODVD_STYLES[sidx]) {
+                        case 'i': italic    = 1; break;
+                        case 'b': bold      = 1; break;
+                        case 'u': underline = 1; break;
+                        }
+                    }
+                }
+                break;
+
+            case 'c': color     = tags[i].data1; break;
+            case 's': font_size = tags[i].data1; break;
+            case 'p': alignment =             8; break;
+
+            case 'f':
+                av_bprint_clear(&font_buf);
+                av_bprintf(&font_buf, "%.*s",
+                           tags[i].data_string_len, tags[i].data_string);
+                break;
+            }
+        }
+    }
+    return ff_ass_subtitle_header(avctx, font_buf.str, font_size, color,
+                                  ASS_DEFAULT_BACK_COLOR, bold, italic,
+                                  underline, alignment);
+}
+
+AVCodec ff_microdvd_decoder = {
+    .name         = "microdvd",
+    .long_name    = NULL_IF_CONFIG_SMALL("MicroDVD subtitle"),
+    .type         = AVMEDIA_TYPE_SUBTITLE,
+    .id           = AV_CODEC_ID_MICRODVD,
+    .init         = microdvd_init,
+    .decode       = microdvd_decode_frame,
+};
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c
index 88ee5d3..24724fa 100644
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -2,20 +2,20 @@
  * Copyright (C) 2005  Ole André Vadla Ravnås <oleavr@gmail.com>
  * Copyright (C) 2008  Ramiro Polla
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -120,7 +120,8 @@ static av_cold int mimic_decode_end(AVCodecContext *avctx)
     MimicContext *ctx = avctx->priv_data;
     int i;
 
-    av_free(ctx->swap_buf);
+    av_freep(&ctx->swap_buf);
+    ctx->swap_buf_size = 0;
 
     for (i = 0; i < FF_ARRAY_ELEMS(ctx->frames); i++) {
         if (ctx->frames[i].f)
@@ -181,7 +182,7 @@ static int mimic_decode_update_thread_context(AVCodecContext *avctx, const AVCod
 
     for (i = 0; i < FF_ARRAY_ELEMS(dst->frames); i++) {
         ff_thread_release_buffer(avctx, &dst->frames[i]);
-        if (src->frames[i].f->data[0]) {
+        if (i != src->next_cur_index && src->frames[i].f->data[0]) {
             ret = ff_thread_ref_frame(&dst->frames[i], &src->frames[i]);
             if (ret < 0)
                 return ret;
@@ -257,7 +258,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale)
 
         value = get_bits(&ctx->gb, num_bits);
 
-        /* Libav's IDCT behaves somewhat different from the original code, so
+        /* FFmpeg's IDCT behaves somewhat different from the original code, so
          * a factor of 4 was added to the input */
 
         coeff = vlcdec_lookup[num_bits][value];
@@ -394,8 +395,8 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
         avctx->height  = height;
         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
         for (i = 0; i < 3; i++) {
-            ctx->num_vblocks[i] = -((-height) >> (3 + !!i));
-            ctx->num_hblocks[i] =     width   >> (3 + !!i);
+            ctx->num_vblocks[i] = FF_CEIL_RSHIFT(height,   3 + !!i);
+            ctx->num_hblocks[i] =                width >> (3 + !!i);
         }
     } else if (width != ctx->avctx->width || height != ctx->avctx->height) {
         avpriv_request_sample(avctx, "Resolution changing");
@@ -411,10 +412,8 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data,
     ctx->frames[ctx->cur_index].f->pict_type = is_pframe ? AV_PICTURE_TYPE_P :
                                                            AV_PICTURE_TYPE_I;
     if ((res = ff_thread_get_buffer(avctx, &ctx->frames[ctx->cur_index],
-                                    AV_GET_BUFFER_FLAG_REF)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                                    AV_GET_BUFFER_FLAG_REF)) < 0)
         return res;
-    }
 
     ctx->next_prev_index = ctx->cur_index;
     ctx->next_cur_index  = (ctx->cur_index - 1) & 15;
diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile
new file mode 100644
index 0000000..6537b43
--- /dev/null
+++ b/libavcodec/mips/Makefile
@@ -0,0 +1,20 @@
+MIPSFPU-OBJS-$(CONFIG_AMRNB_DECODER)      += mips/acelp_filters_mips.o     \
+                                             mips/celp_filters_mips.o      \
+                                             mips/celp_math_mips.o         \
+                                             mips/acelp_vectors_mips.o
+MIPSFPU-OBJS-$(CONFIG_AMRWB_DECODER)      += mips/acelp_filters_mips.o     \
+                                             mips/celp_filters_mips.o      \
+                                             mips/amrwbdec_mips.o          \
+                                             mips/celp_math_mips.o         \
+                                             mips/acelp_vectors_mips.o
+MIPSFPU-OBJS-$(CONFIG_MPEGAUDIODSP)       += mips/mpegaudiodsp_mips_float.o
+MIPSDSPR1-OBJS-$(CONFIG_MPEGAUDIODSP)     += mips/mpegaudiodsp_mips_fixed.o
+MIPSFPU-OBJS-$(CONFIG_FFT)                += mips/fft_mips.o
+MIPSFPU-OBJS                              += mips/fmtconvert_mips.o
+OBJS-$(CONFIG_AC3DSP)                     += mips/ac3dsp_mips.o
+OBJS-$(CONFIG_AAC_DECODER)                += mips/aacdec_mips.o            \
+                                             mips/aacsbr_mips.o            \
+                                             mips/sbrdsp_mips.o            \
+                                             mips/aacpsdsp_mips.o
+MIPSDSPR1-OBJS-$(CONFIG_AAC_ENCODER)      += mips/aaccoder_mips.o
+MIPSFPU-OBJS-$(CONFIG_AAC_ENCODER)        += mips/iirfilter_mips.o
diff --git a/libavcodec/mips/aaccoder_mips.c b/libavcodec/mips/aaccoder_mips.c
new file mode 100644
index 0000000..d6210d1
--- /dev/null
+++ b/libavcodec/mips/aaccoder_mips.c
@@ -0,0 +1,2498 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanislav Ocovaj (socovaj@mips.com)
+ *          Szabolcs Pal     (sabolc@mips.com)
+ *
+ * AAC coefficients encoder optimized for MIPS floating-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/aaccoder.c
+ */
+
+#include "libavutil/libm.h"
+
+#include <float.h>
+#include "libavutil/mathematics.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/put_bits.h"
+#include "libavcodec/aac.h"
+#include "libavcodec/aacenc.h"
+#include "libavcodec/aactab.h"
+
+#if HAVE_INLINE_ASM
+typedef struct BandCodingPath {
+    int prev_idx;
+    float cost;
+    int run;
+} BandCodingPath;
+
+static const uint8_t run_value_bits_long[64] = {
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
+};
+
+static const uint8_t run_value_bits_short[16] = {
+    3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
+};
+
+static const uint8_t *run_value_bits[2] = {
+    run_value_bits_long, run_value_bits_short
+};
+
+static const uint8_t uquad_sign_bits[81] = {
+    0, 1, 1, 1, 2, 2, 1, 2, 2,
+    1, 2, 2, 2, 3, 3, 2, 3, 3,
+    1, 2, 2, 2, 3, 3, 2, 3, 3,
+    1, 2, 2, 2, 3, 3, 2, 3, 3,
+    2, 3, 3, 3, 4, 4, 3, 4, 4,
+    2, 3, 3, 3, 4, 4, 3, 4, 4,
+    1, 2, 2, 2, 3, 3, 2, 3, 3,
+    2, 3, 3, 3, 4, 4, 3, 4, 4,
+    2, 3, 3, 3, 4, 4, 3, 4, 4
+};
+
+static const uint8_t upair7_sign_bits[64] = {
+    0, 1, 1, 1, 1, 1, 1, 1,
+    1, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2,
+};
+
+static const uint8_t upair12_sign_bits[169] = {
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static const uint8_t esc_sign_bits[289] = {
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
+};
+
+static void abs_pow34_v(float *out, const float *in, const int size) {
+#ifndef USE_REALLY_FULL_SEARCH
+    int i;
+    float a, b, c, d;
+    float ax, bx, cx, dx;
+
+    for (i = 0; i < size; i += 4) {
+        a = fabsf(in[i  ]);
+        b = fabsf(in[i+1]);
+        c = fabsf(in[i+2]);
+        d = fabsf(in[i+3]);
+
+        ax = sqrtf(a);
+        bx = sqrtf(b);
+        cx = sqrtf(c);
+        dx = sqrtf(d);
+
+        a = a * ax;
+        b = b * bx;
+        c = c * cx;
+        d = d * dx;
+
+        out[i  ] = sqrtf(a);
+        out[i+1] = sqrtf(b);
+        out[i+2] = sqrtf(c);
+        out[i+3] = sqrtf(d);
+    }
+#endif /* USE_REALLY_FULL_SEARCH */
+}
+
+static float find_max_val(int group_len, int swb_size, const float *scaled) {
+    float maxval = 0.0f;
+    int w2, i;
+    for (w2 = 0; w2 < group_len; w2++) {
+        for (i = 0; i < swb_size; i++) {
+            maxval = FFMAX(maxval, scaled[w2*128+i]);
+        }
+    }
+    return maxval;
+}
+
+static int find_min_book(float maxval, int sf) {
+    float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
+    float Q34 = sqrtf(Q * sqrtf(Q));
+    int qmaxval, cb;
+    qmaxval = maxval * Q34 + 0.4054f;
+    if      (qmaxval ==  0) cb = 0;
+    else if (qmaxval ==  1) cb = 1;
+    else if (qmaxval ==  2) cb = 3;
+    else if (qmaxval <=  4) cb = 5;
+    else if (qmaxval <=  7) cb = 7;
+    else if (qmaxval <= 12) cb = 9;
+    else                    cb = 11;
+    return cb;
+}
+
+/**
+ * Functions developed from template function and optimized for quantizing and encoding band
+ */
+static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
+                                                     PutBitContext *pb, const float *in,
+                                                     const float *scaled, int size, int scale_idx,
+                                                     int cb, const float lambda, const float uplim,
+                                                     int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+
+    uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
+    uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
+
+    abs_pow34_v(s->scoefs, in, size);
+    scaled = s->scoefs;
+    for (i = 0; i < size; i += 4) {
+        int curidx;
+        int *in_int = (int *)&in[i];
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                      \n\t"
+            ".set noreorder                 \n\t"
+
+            "slt    %[qc1], $zero,  %[qc1]  \n\t"
+            "slt    %[qc2], $zero,  %[qc2]  \n\t"
+            "slt    %[qc3], $zero,  %[qc3]  \n\t"
+            "slt    %[qc4], $zero,  %[qc4]  \n\t"
+            "lw     $t0,    0(%[in_int])    \n\t"
+            "lw     $t1,    4(%[in_int])    \n\t"
+            "lw     $t2,    8(%[in_int])    \n\t"
+            "lw     $t3,    12(%[in_int])   \n\t"
+            "srl    $t0,    $t0,    31      \n\t"
+            "srl    $t1,    $t1,    31      \n\t"
+            "srl    $t2,    $t2,    31      \n\t"
+            "srl    $t3,    $t3,    31      \n\t"
+            "subu   $t4,    $zero,  %[qc1]  \n\t"
+            "subu   $t5,    $zero,  %[qc2]  \n\t"
+            "subu   $t6,    $zero,  %[qc3]  \n\t"
+            "subu   $t7,    $zero,  %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t5,    $t1     \n\t"
+            "movn   %[qc3], $t6,    $t2     \n\t"
+            "movn   %[qc4], $t7,    $t3     \n\t"
+
+            ".set pop                       \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3",
+              "t4", "t5", "t6", "t7",
+              "memory"
+        );
+
+        curidx = qc1;
+        curidx *= 3;
+        curidx += qc2;
+        curidx *= 3;
+        curidx += qc3;
+        curidx *= 3;
+        curidx += qc4;
+        curidx += 40;
+
+        put_bits(pb, p_bits[curidx], p_codes[curidx]);
+    }
+}
+
+static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
+                                                     PutBitContext *pb, const float *in,
+                                                     const float *scaled, int size, int scale_idx,
+                                                     int cb, const float lambda, const float uplim,
+                                                     int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+
+    uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
+    uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
+
+    abs_pow34_v(s->scoefs, in, size);
+    scaled = s->scoefs;
+    for (i = 0; i < size; i += 4) {
+        int curidx, sign, count;
+        int *in_int = (int *)&in[i];
+        uint8_t v_bits;
+        unsigned int v_codes;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                              \n\t"
+            ".set noreorder                         \n\t"
+
+            "ori    $t4,        $zero,      2       \n\t"
+            "ori    %[sign],    $zero,      0       \n\t"
+            "slt    $t0,        $t4,        %[qc1]  \n\t"
+            "slt    $t1,        $t4,        %[qc2]  \n\t"
+            "slt    $t2,        $t4,        %[qc3]  \n\t"
+            "slt    $t3,        $t4,        %[qc4]  \n\t"
+            "movn   %[qc1],     $t4,        $t0     \n\t"
+            "movn   %[qc2],     $t4,        $t1     \n\t"
+            "movn   %[qc3],     $t4,        $t2     \n\t"
+            "movn   %[qc4],     $t4,        $t3     \n\t"
+            "lw     $t0,        0(%[in_int])        \n\t"
+            "lw     $t1,        4(%[in_int])        \n\t"
+            "lw     $t2,        8(%[in_int])        \n\t"
+            "lw     $t3,        12(%[in_int])       \n\t"
+            "slt    $t0,        $t0,        $zero   \n\t"
+            "movn   %[sign],    $t0,        %[qc1]  \n\t"
+            "slt    $t1,        $t1,        $zero   \n\t"
+            "slt    $t2,        $t2,        $zero   \n\t"
+            "slt    $t3,        $t3,        $zero   \n\t"
+            "sll    $t0,        %[sign],    1       \n\t"
+            "or     $t0,        $t0,        $t1     \n\t"
+            "movn   %[sign],    $t0,        %[qc2]  \n\t"
+            "slt    $t4,        $zero,      %[qc1]  \n\t"
+            "slt    $t1,        $zero,      %[qc2]  \n\t"
+            "slt    %[count],   $zero,      %[qc3]  \n\t"
+            "sll    $t0,        %[sign],    1       \n\t"
+            "or     $t0,        $t0,        $t2     \n\t"
+            "movn   %[sign],    $t0,        %[qc3]  \n\t"
+            "slt    $t2,        $zero,      %[qc4]  \n\t"
+            "addu   %[count],   %[count],   $t4     \n\t"
+            "addu   %[count],   %[count],   $t1     \n\t"
+            "sll    $t0,        %[sign],    1       \n\t"
+            "or     $t0,        $t0,        $t3     \n\t"
+            "movn   %[sign],    $t0,        %[qc4]  \n\t"
+            "addu   %[count],   %[count],   $t2     \n\t"
+
+            ".set pop                               \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+              [sign]"=&r"(sign), [count]"=&r"(count)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3", "t4",
+              "memory"
+        );
+
+        curidx = qc1;
+        curidx *= 3;
+        curidx += qc2;
+        curidx *= 3;
+        curidx += qc3;
+        curidx *= 3;
+        curidx += qc4;
+
+        v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
+        v_bits  = p_bits[curidx] + count;
+        put_bits(pb, v_bits, v_codes);
+    }
+}
+
+static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
+                                                     PutBitContext *pb, const float *in,
+                                                     const float *scaled, int size, int scale_idx,
+                                                     int cb, const float lambda, const float uplim,
+                                                     int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+
+    uint8_t  *p_bits  = (uint8_t  *)ff_aac_spectral_bits[cb-1];
+    uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
+
+    abs_pow34_v(s->scoefs, in, size);
+    scaled = s->scoefs;
+    for (i = 0; i < size; i += 4) {
+        int curidx, curidx2;
+        int *in_int = (int *)&in[i];
+        uint8_t v_bits;
+        unsigned int v_codes;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                      \n\t"
+            ".set noreorder                 \n\t"
+
+            "ori    $t4,    $zero,  4       \n\t"
+            "slt    $t0,    $t4,    %[qc1]  \n\t"
+            "slt    $t1,    $t4,    %[qc2]  \n\t"
+            "slt    $t2,    $t4,    %[qc3]  \n\t"
+            "slt    $t3,    $t4,    %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t4,    $t1     \n\t"
+            "movn   %[qc3], $t4,    $t2     \n\t"
+            "movn   %[qc4], $t4,    $t3     \n\t"
+            "lw     $t0,    0(%[in_int])    \n\t"
+            "lw     $t1,    4(%[in_int])    \n\t"
+            "lw     $t2,    8(%[in_int])    \n\t"
+            "lw     $t3,    12(%[in_int])   \n\t"
+            "srl    $t0,    $t0,    31      \n\t"
+            "srl    $t1,    $t1,    31      \n\t"
+            "srl    $t2,    $t2,    31      \n\t"
+            "srl    $t3,    $t3,    31      \n\t"
+            "subu   $t4,    $zero,  %[qc1]  \n\t"
+            "subu   $t5,    $zero,  %[qc2]  \n\t"
+            "subu   $t6,    $zero,  %[qc3]  \n\t"
+            "subu   $t7,    $zero,  %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t5,    $t1     \n\t"
+            "movn   %[qc3], $t6,    $t2     \n\t"
+            "movn   %[qc4], $t7,    $t3     \n\t"
+
+            ".set pop                       \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3",
+              "t4", "t5", "t6", "t7",
+              "memory"
+        );
+
+        curidx = 9 * qc1;
+        curidx += qc2 + 40;
+
+        curidx2 = 9 * qc3;
+        curidx2 += qc4 + 40;
+
+        v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
+        v_bits  = p_bits[curidx] + p_bits[curidx2];
+        put_bits(pb, v_bits, v_codes);
+    }
+}
+
+static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
+                                                      PutBitContext *pb, const float *in,
+                                                      const float *scaled, int size, int scale_idx,
+                                                      int cb, const float lambda, const float uplim,
+                                                      int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+
+    uint8_t  *p_bits  = (uint8_t*) ff_aac_spectral_bits[cb-1];
+    uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
+
+    abs_pow34_v(s->scoefs, in, size);
+    scaled = s->scoefs;
+    for (i = 0; i < size; i += 4) {
+        int curidx, sign1, count1, sign2, count2;
+        int *in_int = (int *)&in[i];
+        uint8_t v_bits;
+        unsigned int v_codes;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                              \n\t"
+            ".set noreorder                         \n\t"
+
+            "ori    $t4,        $zero,      7       \n\t"
+            "ori    %[sign1],   $zero,      0       \n\t"
+            "ori    %[sign2],   $zero,      0       \n\t"
+            "slt    $t0,        $t4,        %[qc1]  \n\t"
+            "slt    $t1,        $t4,        %[qc2]  \n\t"
+            "slt    $t2,        $t4,        %[qc3]  \n\t"
+            "slt    $t3,        $t4,        %[qc4]  \n\t"
+            "movn   %[qc1],     $t4,        $t0     \n\t"
+            "movn   %[qc2],     $t4,        $t1     \n\t"
+            "movn   %[qc3],     $t4,        $t2     \n\t"
+            "movn   %[qc4],     $t4,        $t3     \n\t"
+            "lw     $t0,        0(%[in_int])        \n\t"
+            "lw     $t1,        4(%[in_int])        \n\t"
+            "lw     $t2,        8(%[in_int])        \n\t"
+            "lw     $t3,        12(%[in_int])       \n\t"
+            "slt    $t0,        $t0,        $zero   \n\t"
+            "movn   %[sign1],   $t0,        %[qc1]  \n\t"
+            "slt    $t2,        $t2,        $zero   \n\t"
+            "movn   %[sign2],   $t2,        %[qc3]  \n\t"
+            "slt    $t1,        $t1,        $zero   \n\t"
+            "sll    $t0,        %[sign1],   1       \n\t"
+            "or     $t0,        $t0,        $t1     \n\t"
+            "movn   %[sign1],   $t0,        %[qc2]  \n\t"
+            "slt    $t3,        $t3,        $zero   \n\t"
+            "sll    $t0,        %[sign2],   1       \n\t"
+            "or     $t0,        $t0,        $t3     \n\t"
+            "movn   %[sign2],   $t0,        %[qc4]  \n\t"
+            "slt    %[count1],  $zero,      %[qc1]  \n\t"
+            "slt    $t1,        $zero,      %[qc2]  \n\t"
+            "slt    %[count2],  $zero,      %[qc3]  \n\t"
+            "slt    $t2,        $zero,      %[qc4]  \n\t"
+            "addu   %[count1],  %[count1],  $t1     \n\t"
+            "addu   %[count2],  %[count2],  $t2     \n\t"
+
+            ".set pop                               \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+              [sign1]"=&r"(sign1), [count1]"=&r"(count1),
+              [sign2]"=&r"(sign2), [count2]"=&r"(count2)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3", "t4",
+              "memory"
+        );
+
+        curidx  = 8 * qc1;
+        curidx += qc2;
+
+        v_codes = (p_codes[curidx] << count1) | sign1;
+        v_bits  = p_bits[curidx] + count1;
+        put_bits(pb, v_bits, v_codes);
+
+        curidx  = 8 * qc3;
+        curidx += qc4;
+
+        v_codes = (p_codes[curidx] << count2) | sign2;
+        v_bits  = p_bits[curidx] + count2;
+        put_bits(pb, v_bits, v_codes);
+    }
+}
+
+static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
+                                                       PutBitContext *pb, const float *in,
+                                                       const float *scaled, int size, int scale_idx,
+                                                       int cb, const float lambda, const float uplim,
+                                                       int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+
+    uint8_t  *p_bits  = (uint8_t*) ff_aac_spectral_bits[cb-1];
+    uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
+
+    abs_pow34_v(s->scoefs, in, size);
+    scaled = s->scoefs;
+    for (i = 0; i < size; i += 4) {
+        int curidx, sign1, count1, sign2, count2;
+        int *in_int = (int *)&in[i];
+        uint8_t v_bits;
+        unsigned int v_codes;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                              \n\t"
+            ".set noreorder                         \n\t"
+
+            "ori    $t4,        $zero,      12      \n\t"
+            "ori    %[sign1],   $zero,      0       \n\t"
+            "ori    %[sign2],   $zero,      0       \n\t"
+            "slt    $t0,        $t4,        %[qc1]  \n\t"
+            "slt    $t1,        $t4,        %[qc2]  \n\t"
+            "slt    $t2,        $t4,        %[qc3]  \n\t"
+            "slt    $t3,        $t4,        %[qc4]  \n\t"
+            "movn   %[qc1],     $t4,        $t0     \n\t"
+            "movn   %[qc2],     $t4,        $t1     \n\t"
+            "movn   %[qc3],     $t4,        $t2     \n\t"
+            "movn   %[qc4],     $t4,        $t3     \n\t"
+            "lw     $t0,        0(%[in_int])        \n\t"
+            "lw     $t1,        4(%[in_int])        \n\t"
+            "lw     $t2,        8(%[in_int])        \n\t"
+            "lw     $t3,        12(%[in_int])       \n\t"
+            "slt    $t0,        $t0,        $zero   \n\t"
+            "movn   %[sign1],   $t0,        %[qc1]  \n\t"
+            "slt    $t2,        $t2,        $zero   \n\t"
+            "movn   %[sign2],   $t2,        %[qc3]  \n\t"
+            "slt    $t1,        $t1,        $zero   \n\t"
+            "sll    $t0,        %[sign1],   1       \n\t"
+            "or     $t0,        $t0,        $t1     \n\t"
+            "movn   %[sign1],   $t0,        %[qc2]  \n\t"
+            "slt    $t3,        $t3,        $zero   \n\t"
+            "sll    $t0,        %[sign2],   1       \n\t"
+            "or     $t0,        $t0,        $t3     \n\t"
+            "movn   %[sign2],   $t0,        %[qc4]  \n\t"
+            "slt    %[count1],  $zero,      %[qc1]  \n\t"
+            "slt    $t1,        $zero,      %[qc2]  \n\t"
+            "slt    %[count2],  $zero,      %[qc3]  \n\t"
+            "slt    $t2,        $zero,      %[qc4]  \n\t"
+            "addu   %[count1],  %[count1],  $t1     \n\t"
+            "addu   %[count2],  %[count2],  $t2     \n\t"
+
+            ".set pop                               \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+              [sign1]"=&r"(sign1), [count1]"=&r"(count1),
+              [sign2]"=&r"(sign2), [count2]"=&r"(count2)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3", "t4",
+              "memory"
+        );
+
+        curidx  = 13 * qc1;
+        curidx += qc2;
+
+        v_codes = (p_codes[curidx] << count1) | sign1;
+        v_bits  = p_bits[curidx] + count1;
+        put_bits(pb, v_bits, v_codes);
+
+        curidx  = 13 * qc3;
+        curidx += qc4;
+
+        v_codes = (p_codes[curidx] << count2) | sign2;
+        v_bits  = p_bits[curidx] + count2;
+        put_bits(pb, v_bits, v_codes);
+    }
+}
+
+static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
+                                                   PutBitContext *pb, const float *in,
+                                                   const float *scaled, int size, int scale_idx,
+                                                   int cb, const float lambda, const float uplim,
+                                                   int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+
+    uint8_t  *p_bits    = (uint8_t* )ff_aac_spectral_bits[cb-1];
+    uint16_t *p_codes   = (uint16_t*)ff_aac_spectral_codes[cb-1];
+    float    *p_vectors = (float*   )ff_aac_codebook_vectors[cb-1];
+
+    abs_pow34_v(s->scoefs, in, size);
+    scaled = s->scoefs;
+
+    if (cb < 11) {
+        for (i = 0; i < size; i += 4) {
+            int curidx, curidx2, sign1, count1, sign2, count2;
+            int *in_int = (int *)&in[i];
+            uint8_t v_bits;
+            unsigned int v_codes;
+
+            qc1 = scaled[i  ] * Q34 + 0.4054f;
+            qc2 = scaled[i+1] * Q34 + 0.4054f;
+            qc3 = scaled[i+2] * Q34 + 0.4054f;
+            qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+            __asm__ volatile (
+                ".set push                                  \n\t"
+                ".set noreorder                             \n\t"
+
+                "ori        $t4,        $zero,      16      \n\t"
+                "ori        %[sign1],   $zero,      0       \n\t"
+                "ori        %[sign2],   $zero,      0       \n\t"
+                "slt        $t0,        $t4,        %[qc1]  \n\t"
+                "slt        $t1,        $t4,        %[qc2]  \n\t"
+                "slt        $t2,        $t4,        %[qc3]  \n\t"
+                "slt        $t3,        $t4,        %[qc4]  \n\t"
+                "movn       %[qc1],     $t4,        $t0     \n\t"
+                "movn       %[qc2],     $t4,        $t1     \n\t"
+                "movn       %[qc3],     $t4,        $t2     \n\t"
+                "movn       %[qc4],     $t4,        $t3     \n\t"
+                "lw         $t0,        0(%[in_int])        \n\t"
+                "lw         $t1,        4(%[in_int])        \n\t"
+                "lw         $t2,        8(%[in_int])        \n\t"
+                "lw         $t3,        12(%[in_int])       \n\t"
+                "slt        $t0,        $t0,        $zero   \n\t"
+                "movn       %[sign1],   $t0,        %[qc1]  \n\t"
+                "slt        $t2,        $t2,        $zero   \n\t"
+                "movn       %[sign2],   $t2,        %[qc3]  \n\t"
+                "slt        $t1,        $t1,        $zero   \n\t"
+                "sll        $t0,        %[sign1],   1       \n\t"
+                "or         $t0,        $t0,        $t1     \n\t"
+                "movn       %[sign1],   $t0,        %[qc2]  \n\t"
+                "slt        $t3,        $t3,        $zero   \n\t"
+                "sll        $t0,        %[sign2],   1       \n\t"
+                "or         $t0,        $t0,        $t3     \n\t"
+                "movn       %[sign2],   $t0,        %[qc4]  \n\t"
+                "slt        %[count1],  $zero,      %[qc1]  \n\t"
+                "slt        $t1,        $zero,      %[qc2]  \n\t"
+                "slt        %[count2],  $zero,      %[qc3]  \n\t"
+                "slt        $t2,        $zero,      %[qc4]  \n\t"
+                "addu       %[count1],  %[count1],  $t1     \n\t"
+                "addu       %[count2],  %[count2],  $t2     \n\t"
+
+                ".set pop                                   \n\t"
+
+                : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+                  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+                  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
+                  [sign2]"=&r"(sign2), [count2]"=&r"(count2)
+                : [in_int]"r"(in_int)
+                : "t0", "t1", "t2", "t3", "t4",
+                  "memory"
+            );
+
+            curidx = 17 * qc1;
+            curidx += qc2;
+            curidx2 = 17 * qc3;
+            curidx2 += qc4;
+
+            v_codes = (p_codes[curidx] << count1) | sign1;
+            v_bits  = p_bits[curidx] + count1;
+            put_bits(pb, v_bits, v_codes);
+
+            v_codes = (p_codes[curidx2] << count2) | sign2;
+            v_bits  = p_bits[curidx2] + count2;
+            put_bits(pb, v_bits, v_codes);
+        }
+    } else {
+        for (i = 0; i < size; i += 4) {
+            int curidx, curidx2, sign1, count1, sign2, count2;
+            int *in_int = (int *)&in[i];
+            uint8_t v_bits;
+            unsigned int v_codes;
+            int c1, c2, c3, c4;
+
+            qc1 = scaled[i  ] * Q34 + 0.4054f;
+            qc2 = scaled[i+1] * Q34 + 0.4054f;
+            qc3 = scaled[i+2] * Q34 + 0.4054f;
+            qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+            __asm__ volatile (
+                ".set push                                  \n\t"
+                ".set noreorder                             \n\t"
+
+                "ori        $t4,        $zero,      16      \n\t"
+                "ori        %[sign1],   $zero,      0       \n\t"
+                "ori        %[sign2],   $zero,      0       \n\t"
+                "shll_s.w   %[c1],      %[qc1],     18      \n\t"
+                "shll_s.w   %[c2],      %[qc2],     18      \n\t"
+                "shll_s.w   %[c3],      %[qc3],     18      \n\t"
+                "shll_s.w   %[c4],      %[qc4],     18      \n\t"
+                "srl        %[c1],      %[c1],      18      \n\t"
+                "srl        %[c2],      %[c2],      18      \n\t"
+                "srl        %[c3],      %[c3],      18      \n\t"
+                "srl        %[c4],      %[c4],      18      \n\t"
+                "slt        $t0,        $t4,        %[qc1]  \n\t"
+                "slt        $t1,        $t4,        %[qc2]  \n\t"
+                "slt        $t2,        $t4,        %[qc3]  \n\t"
+                "slt        $t3,        $t4,        %[qc4]  \n\t"
+                "movn       %[qc1],     $t4,        $t0     \n\t"
+                "movn       %[qc2],     $t4,        $t1     \n\t"
+                "movn       %[qc3],     $t4,        $t2     \n\t"
+                "movn       %[qc4],     $t4,        $t3     \n\t"
+                "lw         $t0,        0(%[in_int])        \n\t"
+                "lw         $t1,        4(%[in_int])        \n\t"
+                "lw         $t2,        8(%[in_int])        \n\t"
+                "lw         $t3,        12(%[in_int])       \n\t"
+                "slt        $t0,        $t0,        $zero   \n\t"
+                "movn       %[sign1],   $t0,        %[qc1]  \n\t"
+                "slt        $t2,        $t2,        $zero   \n\t"
+                "movn       %[sign2],   $t2,        %[qc3]  \n\t"
+                "slt        $t1,        $t1,        $zero   \n\t"
+                "sll        $t0,        %[sign1],   1       \n\t"
+                "or         $t0,        $t0,        $t1     \n\t"
+                "movn       %[sign1],   $t0,        %[qc2]  \n\t"
+                "slt        $t3,        $t3,        $zero   \n\t"
+                "sll        $t0,        %[sign2],   1       \n\t"
+                "or         $t0,        $t0,        $t3     \n\t"
+                "movn       %[sign2],   $t0,        %[qc4]  \n\t"
+                "slt        %[count1],  $zero,      %[qc1]  \n\t"
+                "slt        $t1,        $zero,      %[qc2]  \n\t"
+                "slt        %[count2],  $zero,      %[qc3]  \n\t"
+                "slt        $t2,        $zero,      %[qc4]  \n\t"
+                "addu       %[count1],  %[count1],  $t1     \n\t"
+                "addu       %[count2],  %[count2],  $t2     \n\t"
+
+                ".set pop                                   \n\t"
+
+                : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+                  [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+                  [sign1]"=&r"(sign1), [count1]"=&r"(count1),
+                  [sign2]"=&r"(sign2), [count2]"=&r"(count2),
+                  [c1]"=&r"(c1), [c2]"=&r"(c2),
+                  [c3]"=&r"(c3), [c4]"=&r"(c4)
+                : [in_int]"r"(in_int)
+                : "t0", "t1", "t2", "t3", "t4",
+                  "memory"
+            );
+
+            curidx = 17 * qc1;
+            curidx += qc2;
+
+            curidx2 = 17 * qc3;
+            curidx2 += qc4;
+
+            v_codes = (p_codes[curidx] << count1) | sign1;
+            v_bits  = p_bits[curidx] + count1;
+            put_bits(pb, v_bits, v_codes);
+
+            if (p_vectors[curidx*2  ] == 64.0f) {
+                int len = av_log2(c1);
+                v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
+                put_bits(pb, len * 2 - 3, v_codes);
+            }
+            if (p_vectors[curidx*2+1] == 64.0f) {
+                int len = av_log2(c2);
+                v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
+                put_bits(pb, len*2-3, v_codes);
+            }
+
+            v_codes = (p_codes[curidx2] << count2) | sign2;
+            v_bits  = p_bits[curidx2] + count2;
+            put_bits(pb, v_bits, v_codes);
+
+            if (p_vectors[curidx2*2  ] == 64.0f) {
+                int len = av_log2(c3);
+                v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
+                put_bits(pb, len* 2 - 3, v_codes);
+            }
+            if (p_vectors[curidx2*2+1] == 64.0f) {
+                int len = av_log2(c4);
+                v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
+                put_bits(pb, len * 2 - 3, v_codes);
+            }
+        }
+    }
+}
+
+static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
+                                                         PutBitContext *pb, const float *in,
+                                                         const float *scaled, int size, int scale_idx,
+                                                         int cb, const float lambda, const float uplim,
+                                                         int *bits) = {
+    NULL,
+    quantize_and_encode_band_cost_SQUAD_mips,
+    quantize_and_encode_band_cost_SQUAD_mips,
+    quantize_and_encode_band_cost_UQUAD_mips,
+    quantize_and_encode_band_cost_UQUAD_mips,
+    quantize_and_encode_band_cost_SPAIR_mips,
+    quantize_and_encode_band_cost_SPAIR_mips,
+    quantize_and_encode_band_cost_UPAIR7_mips,
+    quantize_and_encode_band_cost_UPAIR7_mips,
+    quantize_and_encode_band_cost_UPAIR12_mips,
+    quantize_and_encode_band_cost_UPAIR12_mips,
+    quantize_and_encode_band_cost_ESC_mips,
+};
+
+#define quantize_and_encode_band_cost(                                  \
+                                s, pb, in, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits)                    \
+    quantize_and_encode_band_cost_arr[cb](                              \
+                                s, pb, in, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits)
+
+static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
+                                          const float *in, int size, int scale_idx,
+                                          int cb, const float lambda)
+{
+    quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
+                                  INFINITY, NULL);
+}
+
+/**
+ * Functions developed from template function and optimized for getting the number of bits
+ */
+static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
+                                        PutBitContext *pb, const float *in,
+                                        const float *scaled, int size, int scale_idx,
+                                        int cb, const float lambda, const float uplim,
+                                        int *bits)
+{
+    return 0;
+}
+
+static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
+                                         PutBitContext *pb, const float *in,
+                                         const float *scaled, int size, int scale_idx,
+                                         int cb, const float lambda, const float uplim,
+                                         int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        int curidx;
+        int *in_int = (int *)&in[i];
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                      \n\t"
+            ".set noreorder                 \n\t"
+
+            "slt    %[qc1], $zero,  %[qc1]  \n\t"
+            "slt    %[qc2], $zero,  %[qc2]  \n\t"
+            "slt    %[qc3], $zero,  %[qc3]  \n\t"
+            "slt    %[qc4], $zero,  %[qc4]  \n\t"
+            "lw     $t0,    0(%[in_int])    \n\t"
+            "lw     $t1,    4(%[in_int])    \n\t"
+            "lw     $t2,    8(%[in_int])    \n\t"
+            "lw     $t3,    12(%[in_int])   \n\t"
+            "srl    $t0,    $t0,    31      \n\t"
+            "srl    $t1,    $t1,    31      \n\t"
+            "srl    $t2,    $t2,    31      \n\t"
+            "srl    $t3,    $t3,    31      \n\t"
+            "subu   $t4,    $zero,  %[qc1]  \n\t"
+            "subu   $t5,    $zero,  %[qc2]  \n\t"
+            "subu   $t6,    $zero,  %[qc3]  \n\t"
+            "subu   $t7,    $zero,  %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t5,    $t1     \n\t"
+            "movn   %[qc3], $t6,    $t2     \n\t"
+            "movn   %[qc4], $t7,    $t3     \n\t"
+
+            ".set pop                       \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3",
+              "t4", "t5", "t6", "t7",
+              "memory"
+        );
+
+        curidx = qc1;
+        curidx *= 3;
+        curidx += qc2;
+        curidx *= 3;
+        curidx += qc3;
+        curidx *= 3;
+        curidx += qc4;
+        curidx += 40;
+
+        curbits += p_bits[curidx];
+    }
+    return curbits;
+}
+
+static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
+                                         PutBitContext *pb, const float *in,
+                                         const float *scaled, int size, int scale_idx,
+                                         int cb, const float lambda, const float uplim,
+                                         int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int curbits = 0;
+    int qc1, qc2, qc3, qc4;
+
+    uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        int curidx;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                      \n\t"
+            ".set noreorder                 \n\t"
+
+            "ori    $t4,    $zero,  2       \n\t"
+            "slt    $t0,    $t4,    %[qc1]  \n\t"
+            "slt    $t1,    $t4,    %[qc2]  \n\t"
+            "slt    $t2,    $t4,    %[qc3]  \n\t"
+            "slt    $t3,    $t4,    %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t4,    $t1     \n\t"
+            "movn   %[qc3], $t4,    $t2     \n\t"
+            "movn   %[qc4], $t4,    $t3     \n\t"
+
+            ".set pop                       \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            :
+            : "t0", "t1", "t2", "t3", "t4"
+        );
+
+        curidx = qc1;
+        curidx *= 3;
+        curidx += qc2;
+        curidx *= 3;
+        curidx += qc3;
+        curidx *= 3;
+        curidx += qc4;
+
+        curbits += p_bits[curidx];
+        curbits += uquad_sign_bits[curidx];
+    }
+    return curbits;
+}
+
+static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
+                                         PutBitContext *pb, const float *in,
+                                         const float *scaled, int size, int scale_idx,
+                                         int cb, const float lambda, const float uplim,
+                                         int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        int curidx, curidx2;
+        int *in_int = (int *)&in[i];
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                      \n\t"
+            ".set noreorder                 \n\t"
+
+            "ori    $t4,    $zero,  4       \n\t"
+            "slt    $t0,    $t4,    %[qc1]  \n\t"
+            "slt    $t1,    $t4,    %[qc2]  \n\t"
+            "slt    $t2,    $t4,    %[qc3]  \n\t"
+            "slt    $t3,    $t4,    %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t4,    $t1     \n\t"
+            "movn   %[qc3], $t4,    $t2     \n\t"
+            "movn   %[qc4], $t4,    $t3     \n\t"
+            "lw     $t0,    0(%[in_int])    \n\t"
+            "lw     $t1,    4(%[in_int])    \n\t"
+            "lw     $t2,    8(%[in_int])    \n\t"
+            "lw     $t3,    12(%[in_int])   \n\t"
+            "srl    $t0,    $t0,    31      \n\t"
+            "srl    $t1,    $t1,    31      \n\t"
+            "srl    $t2,    $t2,    31      \n\t"
+            "srl    $t3,    $t3,    31      \n\t"
+            "subu   $t4,    $zero,  %[qc1]  \n\t"
+            "subu   $t5,    $zero,  %[qc2]  \n\t"
+            "subu   $t6,    $zero,  %[qc3]  \n\t"
+            "subu   $t7,    $zero,  %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t5,    $t1     \n\t"
+            "movn   %[qc3], $t6,    $t2     \n\t"
+            "movn   %[qc4], $t7,    $t3     \n\t"
+
+            ".set pop                       \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3",
+              "t4", "t5", "t6", "t7",
+              "memory"
+        );
+
+        curidx  = 9 * qc1;
+        curidx += qc2 + 40;
+
+        curidx2  = 9 * qc3;
+        curidx2 += qc4 + 40;
+
+        curbits += p_bits[curidx] + p_bits[curidx2];
+    }
+    return curbits;
+}
+
+static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
+                                          PutBitContext *pb, const float *in,
+                                          const float *scaled, int size, int scale_idx,
+                                          int cb, const float lambda, const float uplim,
+                                          int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        int curidx, curidx2;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                      \n\t"
+            ".set noreorder                 \n\t"
+
+            "ori    $t4,    $zero,  7       \n\t"
+            "slt    $t0,    $t4,    %[qc1]  \n\t"
+            "slt    $t1,    $t4,    %[qc2]  \n\t"
+            "slt    $t2,    $t4,    %[qc3]  \n\t"
+            "slt    $t3,    $t4,    %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t4,    $t1     \n\t"
+            "movn   %[qc3], $t4,    $t2     \n\t"
+            "movn   %[qc4], $t4,    $t3     \n\t"
+
+            ".set pop                       \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            :
+            : "t0", "t1", "t2", "t3", "t4"
+        );
+
+        curidx  = 8 * qc1;
+        curidx += qc2;
+
+        curidx2  = 8 * qc3;
+        curidx2 += qc4;
+
+        curbits += p_bits[curidx] +
+                   upair7_sign_bits[curidx] +
+                   p_bits[curidx2] +
+                   upair7_sign_bits[curidx2];
+    }
+    return curbits;
+}
+
+static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
+                                           PutBitContext *pb, const float *in,
+                                           const float *scaled, int size, int scale_idx,
+                                           int cb, const float lambda, const float uplim,
+                                           int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        int curidx, curidx2;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                      \n\t"
+            ".set noreorder                 \n\t"
+
+            "ori    $t4,    $zero,  12      \n\t"
+            "slt    $t0,    $t4,    %[qc1]  \n\t"
+            "slt    $t1,    $t4,    %[qc2]  \n\t"
+            "slt    $t2,    $t4,    %[qc3]  \n\t"
+            "slt    $t3,    $t4,    %[qc4]  \n\t"
+            "movn   %[qc1], $t4,    $t0     \n\t"
+            "movn   %[qc2], $t4,    $t1     \n\t"
+            "movn   %[qc3], $t4,    $t2     \n\t"
+            "movn   %[qc4], $t4,    $t3     \n\t"
+
+            ".set pop                       \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            :
+            : "t0", "t1", "t2", "t3", "t4"
+        );
+
+        curidx  = 13 * qc1;
+        curidx += qc2;
+
+        curidx2  = 13 * qc3;
+        curidx2 += qc4;
+
+        curbits += p_bits[curidx] +
+                   p_bits[curidx2] +
+                   upair12_sign_bits[curidx] +
+                   upair12_sign_bits[curidx2];
+    }
+    return curbits;
+}
+
+static float get_band_numbits_ESC_mips(struct AACEncContext *s,
+                                       PutBitContext *pb, const float *in,
+                                       const float *scaled, int size, int scale_idx,
+                                       int cb, const float lambda, const float uplim,
+                                       int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    int i;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        int curidx, curidx2;
+        int cond0, cond1, cond2, cond3;
+        int c1, c2, c3, c4;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+
+            "ori        $t4,        $zero,  15          \n\t"
+            "ori        $t5,        $zero,  16          \n\t"
+            "shll_s.w   %[c1],      %[qc1], 18          \n\t"
+            "shll_s.w   %[c2],      %[qc2], 18          \n\t"
+            "shll_s.w   %[c3],      %[qc3], 18          \n\t"
+            "shll_s.w   %[c4],      %[qc4], 18          \n\t"
+            "srl        %[c1],      %[c1],  18          \n\t"
+            "srl        %[c2],      %[c2],  18          \n\t"
+            "srl        %[c3],      %[c3],  18          \n\t"
+            "srl        %[c4],      %[c4],  18          \n\t"
+            "slt        %[cond0],   $t4,    %[qc1]      \n\t"
+            "slt        %[cond1],   $t4,    %[qc2]      \n\t"
+            "slt        %[cond2],   $t4,    %[qc3]      \n\t"
+            "slt        %[cond3],   $t4,    %[qc4]      \n\t"
+            "movn       %[qc1],     $t5,    %[cond0]    \n\t"
+            "movn       %[qc2],     $t5,    %[cond1]    \n\t"
+            "movn       %[qc3],     $t5,    %[cond2]    \n\t"
+            "movn       %[qc4],     $t5,    %[cond3]    \n\t"
+            "ori        $t5,        $zero,  31          \n\t"
+            "clz        %[c1],      %[c1]               \n\t"
+            "clz        %[c2],      %[c2]               \n\t"
+            "clz        %[c3],      %[c3]               \n\t"
+            "clz        %[c4],      %[c4]               \n\t"
+            "subu       %[c1],      $t5,    %[c1]       \n\t"
+            "subu       %[c2],      $t5,    %[c2]       \n\t"
+            "subu       %[c3],      $t5,    %[c3]       \n\t"
+            "subu       %[c4],      $t5,    %[c4]       \n\t"
+            "sll        %[c1],      %[c1],  1           \n\t"
+            "sll        %[c2],      %[c2],  1           \n\t"
+            "sll        %[c3],      %[c3],  1           \n\t"
+            "sll        %[c4],      %[c4],  1           \n\t"
+            "addiu      %[c1],      %[c1],  -3          \n\t"
+            "addiu      %[c2],      %[c2],  -3          \n\t"
+            "addiu      %[c3],      %[c3],  -3          \n\t"
+            "addiu      %[c4],      %[c4],  -3          \n\t"
+            "subu       %[cond0],   $zero,  %[cond0]    \n\t"
+            "subu       %[cond1],   $zero,  %[cond1]    \n\t"
+            "subu       %[cond2],   $zero,  %[cond2]    \n\t"
+            "subu       %[cond3],   $zero,  %[cond3]    \n\t"
+            "and        %[c1],      %[c1],  %[cond0]    \n\t"
+            "and        %[c2],      %[c2],  %[cond1]    \n\t"
+            "and        %[c3],      %[c3],  %[cond2]    \n\t"
+            "and        %[c4],      %[c4],  %[cond3]    \n\t"
+
+            ".set pop                                   \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+              [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
+              [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
+              [c1]"=&r"(c1), [c2]"=&r"(c2),
+              [c3]"=&r"(c3), [c4]"=&r"(c4)
+            :
+            : "t4", "t5"
+        );
+
+        curidx = 17 * qc1;
+        curidx += qc2;
+
+        curidx2 = 17 * qc3;
+        curidx2 += qc4;
+
+        curbits += p_bits[curidx];
+        curbits += esc_sign_bits[curidx];
+        curbits += p_bits[curidx2];
+        curbits += esc_sign_bits[curidx2];
+
+        curbits += c1;
+        curbits += c2;
+        curbits += c3;
+        curbits += c4;
+    }
+    return curbits;
+}
+
+static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
+                                             PutBitContext *pb, const float *in,
+                                             const float *scaled, int size, int scale_idx,
+                                             int cb, const float lambda, const float uplim,
+                                             int *bits) = {
+    get_band_numbits_ZERO_mips,
+    get_band_numbits_SQUAD_mips,
+    get_band_numbits_SQUAD_mips,
+    get_band_numbits_UQUAD_mips,
+    get_band_numbits_UQUAD_mips,
+    get_band_numbits_SPAIR_mips,
+    get_band_numbits_SPAIR_mips,
+    get_band_numbits_UPAIR7_mips,
+    get_band_numbits_UPAIR7_mips,
+    get_band_numbits_UPAIR12_mips,
+    get_band_numbits_UPAIR12_mips,
+    get_band_numbits_ESC_mips,
+};
+
+#define get_band_numbits(                                  \
+                                s, pb, in, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits)                    \
+    get_band_numbits_arr[cb](                              \
+                                s, pb, in, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits)
+
+static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
+                                     const float *scaled, int size, int scale_idx,
+                                     int cb, const float lambda, const float uplim,
+                                     int *bits)
+{
+    return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
+}
+
+/**
+ * Functions developed from template function and optimized for getting the band cost
+ */
+#if HAVE_MIPSFPU
+static float get_band_cost_ZERO_mips(struct AACEncContext *s,
+                                     PutBitContext *pb, const float *in,
+                                     const float *scaled, int size, int scale_idx,
+                                     int cb, const float lambda, const float uplim,
+                                     int *bits)
+{
+    int i;
+    float cost = 0;
+
+    for (i = 0; i < size; i += 4) {
+        cost += in[i  ] * in[i  ];
+        cost += in[i+1] * in[i+1];
+        cost += in[i+2] * in[i+2];
+        cost += in[i+3] * in[i+3];
+    }
+    if (bits)
+        *bits = 0;
+    return cost * lambda;
+}
+
+static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
+                                      PutBitContext *pb, const float *in,
+                                      const float *scaled, int size, int scale_idx,
+                                      int cb, const float lambda, const float uplim,
+                                      int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    int i;
+    float cost = 0;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];
+    float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        const float *vec;
+        int curidx;
+        int   *in_int = (int   *)&in[i];
+        float *in_pos = (float *)&in[i];
+        float di0, di1, di2, di3;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+
+            "slt        %[qc1], $zero,  %[qc1]          \n\t"
+            "slt        %[qc2], $zero,  %[qc2]          \n\t"
+            "slt        %[qc3], $zero,  %[qc3]          \n\t"
+            "slt        %[qc4], $zero,  %[qc4]          \n\t"
+            "lw         $t0,    0(%[in_int])            \n\t"
+            "lw         $t1,    4(%[in_int])            \n\t"
+            "lw         $t2,    8(%[in_int])            \n\t"
+            "lw         $t3,    12(%[in_int])           \n\t"
+            "srl        $t0,    $t0,    31              \n\t"
+            "srl        $t1,    $t1,    31              \n\t"
+            "srl        $t2,    $t2,    31              \n\t"
+            "srl        $t3,    $t3,    31              \n\t"
+            "subu       $t4,    $zero,  %[qc1]          \n\t"
+            "subu       $t5,    $zero,  %[qc2]          \n\t"
+            "subu       $t6,    $zero,  %[qc3]          \n\t"
+            "subu       $t7,    $zero,  %[qc4]          \n\t"
+            "movn       %[qc1], $t4,    $t0             \n\t"
+            "movn       %[qc2], $t5,    $t1             \n\t"
+            "movn       %[qc3], $t6,    $t2             \n\t"
+            "movn       %[qc4], $t7,    $t3             \n\t"
+
+            ".set pop                                   \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3",
+              "t4", "t5", "t6", "t7",
+              "memory"
+        );
+
+        curidx = qc1;
+        curidx *= 3;
+        curidx += qc2;
+        curidx *= 3;
+        curidx += qc3;
+        curidx *= 3;
+        curidx += qc4;
+        curidx += 40;
+
+        curbits += p_bits[curidx];
+        vec     = &p_codes[curidx*4];
+
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+
+            "lwc1       $f0,    0(%[in_pos])            \n\t"
+            "lwc1       $f1,    0(%[vec])               \n\t"
+            "lwc1       $f2,    4(%[in_pos])            \n\t"
+            "lwc1       $f3,    4(%[vec])               \n\t"
+            "lwc1       $f4,    8(%[in_pos])            \n\t"
+            "lwc1       $f5,    8(%[vec])               \n\t"
+            "lwc1       $f6,    12(%[in_pos])           \n\t"
+            "lwc1       $f7,    12(%[vec])              \n\t"
+            "nmsub.s    %[di0], $f0,    $f1,    %[IQ]   \n\t"
+            "nmsub.s    %[di1], $f2,    $f3,    %[IQ]   \n\t"
+            "nmsub.s    %[di2], $f4,    $f5,    %[IQ]   \n\t"
+            "nmsub.s    %[di3], $f6,    $f7,    %[IQ]   \n\t"
+
+            ".set pop                                   \n\t"
+
+            : [di0]"=&f"(di0), [di1]"=&f"(di1),
+              [di2]"=&f"(di2), [di3]"=&f"(di3)
+            : [in_pos]"r"(in_pos), [vec]"r"(vec),
+              [IQ]"f"(IQ)
+            : "$f0", "$f1", "$f2", "$f3",
+              "$f4", "$f5", "$f6", "$f7",
+              "memory"
+        );
+
+        cost += di0 * di0 + di1 * di1
+                + di2 * di2 + di3 * di3;
+    }
+
+    if (bits)
+        *bits = curbits;
+    return cost * lambda + curbits;
+}
+
+static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
+                                      PutBitContext *pb, const float *in,
+                                      const float *scaled, int size, int scale_idx,
+                                      int cb, const float lambda, const float uplim,
+                                      int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    int i;
+    float cost = 0;
+    int curbits = 0;
+    int qc1, qc2, qc3, qc4;
+
+    uint8_t *p_bits  = (uint8_t*)ff_aac_spectral_bits[cb-1];
+    float   *p_codes = (float  *)ff_aac_codebook_vectors[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        const float *vec;
+        int curidx;
+        float *in_pos = (float *)&in[i];
+        float di0, di1, di2, di3;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+
+            "ori        $t4,    $zero,  2               \n\t"
+            "slt        $t0,    $t4,    %[qc1]          \n\t"
+            "slt        $t1,    $t4,    %[qc2]          \n\t"
+            "slt        $t2,    $t4,    %[qc3]          \n\t"
+            "slt        $t3,    $t4,    %[qc4]          \n\t"
+            "movn       %[qc1], $t4,    $t0             \n\t"
+            "movn       %[qc2], $t4,    $t1             \n\t"
+            "movn       %[qc3], $t4,    $t2             \n\t"
+            "movn       %[qc4], $t4,    $t3             \n\t"
+
+            ".set pop                                   \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            :
+            : "t0", "t1", "t2", "t3", "t4"
+        );
+
+        curidx = qc1;
+        curidx *= 3;
+        curidx += qc2;
+        curidx *= 3;
+        curidx += qc3;
+        curidx *= 3;
+        curidx += qc4;
+
+        curbits += p_bits[curidx];
+        curbits += uquad_sign_bits[curidx];
+        vec     = &p_codes[curidx*4];
+
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+
+            "lwc1       %[di0], 0(%[in_pos])            \n\t"
+            "lwc1       %[di1], 4(%[in_pos])            \n\t"
+            "lwc1       %[di2], 8(%[in_pos])            \n\t"
+            "lwc1       %[di3], 12(%[in_pos])           \n\t"
+            "abs.s      %[di0], %[di0]                  \n\t"
+            "abs.s      %[di1], %[di1]                  \n\t"
+            "abs.s      %[di2], %[di2]                  \n\t"
+            "abs.s      %[di3], %[di3]                  \n\t"
+            "lwc1       $f0,    0(%[vec])               \n\t"
+            "lwc1       $f1,    4(%[vec])               \n\t"
+            "lwc1       $f2,    8(%[vec])               \n\t"
+            "lwc1       $f3,    12(%[vec])              \n\t"
+            "nmsub.s    %[di0], %[di0], $f0,    %[IQ]   \n\t"
+            "nmsub.s    %[di1], %[di1], $f1,    %[IQ]   \n\t"
+            "nmsub.s    %[di2], %[di2], $f2,    %[IQ]   \n\t"
+            "nmsub.s    %[di3], %[di3], $f3,    %[IQ]   \n\t"
+
+            ".set pop                                   \n\t"
+
+            : [di0]"=&f"(di0), [di1]"=&f"(di1),
+              [di2]"=&f"(di2), [di3]"=&f"(di3)
+            : [in_pos]"r"(in_pos), [vec]"r"(vec),
+              [IQ]"f"(IQ)
+            : "$f0", "$f1", "$f2", "$f3",
+              "memory"
+        );
+
+        cost += di0 * di0 + di1 * di1
+                + di2 * di2 + di3 * di3;
+    }
+
+    if (bits)
+        *bits = curbits;
+    return cost * lambda + curbits;
+}
+
+static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
+                                      PutBitContext *pb, const float *in,
+                                      const float *scaled, int size, int scale_idx,
+                                      int cb, const float lambda, const float uplim,
+                                      int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    int i;
+    float cost = 0;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];
+    float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        const float *vec, *vec2;
+        int curidx, curidx2;
+        int   *in_int = (int   *)&in[i];
+        float *in_pos = (float *)&in[i];
+        float di0, di1, di2, di3;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+
+            "ori        $t4,    $zero,  4               \n\t"
+            "slt        $t0,    $t4,    %[qc1]          \n\t"
+            "slt        $t1,    $t4,    %[qc2]          \n\t"
+            "slt        $t2,    $t4,    %[qc3]          \n\t"
+            "slt        $t3,    $t4,    %[qc4]          \n\t"
+            "movn       %[qc1], $t4,    $t0             \n\t"
+            "movn       %[qc2], $t4,    $t1             \n\t"
+            "movn       %[qc3], $t4,    $t2             \n\t"
+            "movn       %[qc4], $t4,    $t3             \n\t"
+            "lw         $t0,    0(%[in_int])            \n\t"
+            "lw         $t1,    4(%[in_int])            \n\t"
+            "lw         $t2,    8(%[in_int])            \n\t"
+            "lw         $t3,    12(%[in_int])           \n\t"
+            "srl        $t0,    $t0,    31              \n\t"
+            "srl        $t1,    $t1,    31              \n\t"
+            "srl        $t2,    $t2,    31              \n\t"
+            "srl        $t3,    $t3,    31              \n\t"
+            "subu       $t4,    $zero,  %[qc1]          \n\t"
+            "subu       $t5,    $zero,  %[qc2]          \n\t"
+            "subu       $t6,    $zero,  %[qc3]          \n\t"
+            "subu       $t7,    $zero,  %[qc4]          \n\t"
+            "movn       %[qc1], $t4,    $t0             \n\t"
+            "movn       %[qc2], $t5,    $t1             \n\t"
+            "movn       %[qc3], $t6,    $t2             \n\t"
+            "movn       %[qc4], $t7,    $t3             \n\t"
+
+            ".set pop                                   \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3",
+              "t4", "t5", "t6", "t7",
+              "memory"
+        );
+
+        curidx = 9 * qc1;
+        curidx += qc2 + 40;
+
+        curidx2 = 9 * qc3;
+        curidx2 += qc4 + 40;
+
+        curbits += p_bits[curidx];
+        curbits += p_bits[curidx2];
+
+        vec     = &p_codes[curidx*2];
+        vec2    = &p_codes[curidx2*2];
+
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+
+            "lwc1       $f0,    0(%[in_pos])            \n\t"
+            "lwc1       $f1,    0(%[vec])               \n\t"
+            "lwc1       $f2,    4(%[in_pos])            \n\t"
+            "lwc1       $f3,    4(%[vec])               \n\t"
+            "lwc1       $f4,    8(%[in_pos])            \n\t"
+            "lwc1       $f5,    0(%[vec2])              \n\t"
+            "lwc1       $f6,    12(%[in_pos])           \n\t"
+            "lwc1       $f7,    4(%[vec2])              \n\t"
+            "nmsub.s    %[di0], $f0,    $f1,    %[IQ]   \n\t"
+            "nmsub.s    %[di1], $f2,    $f3,    %[IQ]   \n\t"
+            "nmsub.s    %[di2], $f4,    $f5,    %[IQ]   \n\t"
+            "nmsub.s    %[di3], $f6,    $f7,    %[IQ]   \n\t"
+
+            ".set pop                                   \n\t"
+
+            : [di0]"=&f"(di0), [di1]"=&f"(di1),
+              [di2]"=&f"(di2), [di3]"=&f"(di3)
+            : [in_pos]"r"(in_pos), [vec]"r"(vec),
+              [vec2]"r"(vec2), [IQ]"f"(IQ)
+            : "$f0", "$f1", "$f2", "$f3",
+              "$f4", "$f5", "$f6", "$f7",
+              "memory"
+        );
+
+        cost += di0 * di0 + di1 * di1
+                + di2 * di2 + di3 * di3;
+    }
+
+    if (bits)
+        *bits = curbits;
+    return cost * lambda + curbits;
+}
+
+static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
+                                       PutBitContext *pb, const float *in,
+                                       const float *scaled, int size, int scale_idx,
+                                       int cb, const float lambda, const float uplim,
+                                       int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    int i;
+    float cost = 0;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];
+    float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        const float *vec, *vec2;
+        int curidx, curidx2, sign1, count1, sign2, count2;
+        int   *in_int = (int   *)&in[i];
+        float *in_pos = (float *)&in[i];
+        float di0, di1, di2, di3;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                                          \n\t"
+            ".set noreorder                                     \n\t"
+
+            "ori        $t4,        $zero,      7               \n\t"
+            "ori        %[sign1],   $zero,      0               \n\t"
+            "ori        %[sign2],   $zero,      0               \n\t"
+            "slt        $t0,        $t4,        %[qc1]          \n\t"
+            "slt        $t1,        $t4,        %[qc2]          \n\t"
+            "slt        $t2,        $t4,        %[qc3]          \n\t"
+            "slt        $t3,        $t4,        %[qc4]          \n\t"
+            "movn       %[qc1],     $t4,        $t0             \n\t"
+            "movn       %[qc2],     $t4,        $t1             \n\t"
+            "movn       %[qc3],     $t4,        $t2             \n\t"
+            "movn       %[qc4],     $t4,        $t3             \n\t"
+            "lw         $t0,        0(%[in_int])                \n\t"
+            "lw         $t1,        4(%[in_int])                \n\t"
+            "lw         $t2,        8(%[in_int])                \n\t"
+            "lw         $t3,        12(%[in_int])               \n\t"
+            "slt        $t0,        $t0,        $zero           \n\t"
+            "movn       %[sign1],   $t0,        %[qc1]          \n\t"
+            "slt        $t2,        $t2,        $zero           \n\t"
+            "movn       %[sign2],   $t2,        %[qc3]          \n\t"
+            "slt        $t1,        $t1,        $zero           \n\t"
+            "sll        $t0,        %[sign1],   1               \n\t"
+            "or         $t0,        $t0,        $t1             \n\t"
+            "movn       %[sign1],   $t0,        %[qc2]          \n\t"
+            "slt        $t3,        $t3,        $zero           \n\t"
+            "sll        $t0,        %[sign2],   1               \n\t"
+            "or         $t0,        $t0,        $t3             \n\t"
+            "movn       %[sign2],   $t0,        %[qc4]          \n\t"
+            "slt        %[count1],  $zero,      %[qc1]          \n\t"
+            "slt        $t1,        $zero,      %[qc2]          \n\t"
+            "slt        %[count2],  $zero,      %[qc3]          \n\t"
+            "slt        $t2,        $zero,      %[qc4]          \n\t"
+            "addu       %[count1],  %[count1],  $t1             \n\t"
+            "addu       %[count2],  %[count2],  $t2             \n\t"
+
+            ".set pop                                           \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+              [sign1]"=&r"(sign1), [count1]"=&r"(count1),
+              [sign2]"=&r"(sign2), [count2]"=&r"(count2)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3", "t4",
+              "memory"
+        );
+
+        curidx = 8 * qc1;
+        curidx += qc2;
+
+        curidx2 = 8 * qc3;
+        curidx2 += qc4;
+
+        curbits += p_bits[curidx];
+        curbits += upair7_sign_bits[curidx];
+        vec     = &p_codes[curidx*2];
+
+        curbits += p_bits[curidx2];
+        curbits += upair7_sign_bits[curidx2];
+        vec2    = &p_codes[curidx2*2];
+
+        __asm__ volatile (
+            ".set push                                          \n\t"
+            ".set noreorder                                     \n\t"
+
+            "lwc1       %[di0],     0(%[in_pos])                \n\t"
+            "lwc1       %[di1],     4(%[in_pos])                \n\t"
+            "lwc1       %[di2],     8(%[in_pos])                \n\t"
+            "lwc1       %[di3],     12(%[in_pos])               \n\t"
+            "abs.s      %[di0],     %[di0]                      \n\t"
+            "abs.s      %[di1],     %[di1]                      \n\t"
+            "abs.s      %[di2],     %[di2]                      \n\t"
+            "abs.s      %[di3],     %[di3]                      \n\t"
+            "lwc1       $f0,        0(%[vec])                   \n\t"
+            "lwc1       $f1,        4(%[vec])                   \n\t"
+            "lwc1       $f2,        0(%[vec2])                  \n\t"
+            "lwc1       $f3,        4(%[vec2])                  \n\t"
+            "nmsub.s    %[di0],     %[di0],     $f0,    %[IQ]   \n\t"
+            "nmsub.s    %[di1],     %[di1],     $f1,    %[IQ]   \n\t"
+            "nmsub.s    %[di2],     %[di2],     $f2,    %[IQ]   \n\t"
+            "nmsub.s    %[di3],     %[di3],     $f3,    %[IQ]   \n\t"
+
+            ".set pop                                           \n\t"
+
+            : [di0]"=&f"(di0), [di1]"=&f"(di1),
+              [di2]"=&f"(di2), [di3]"=&f"(di3)
+            : [in_pos]"r"(in_pos), [vec]"r"(vec),
+              [vec2]"r"(vec2), [IQ]"f"(IQ)
+            : "$f0", "$f1", "$f2", "$f3",
+              "memory"
+        );
+
+        cost += di0 * di0 + di1 * di1
+                + di2 * di2 + di3 * di3;
+    }
+
+    if (bits)
+        *bits = curbits;
+    return cost * lambda + curbits;
+}
+
+static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
+                                        PutBitContext *pb, const float *in,
+                                        const float *scaled, int size, int scale_idx,
+                                        int cb, const float lambda, const float uplim,
+                                        int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    int i;
+    float cost = 0;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits  = (uint8_t *)ff_aac_spectral_bits[cb-1];
+    float   *p_codes = (float   *)ff_aac_codebook_vectors[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        const float *vec, *vec2;
+        int curidx, curidx2;
+        int sign1, count1, sign2, count2;
+        int   *in_int = (int   *)&in[i];
+        float *in_pos = (float *)&in[i];
+        float di0, di1, di2, di3;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                                          \n\t"
+            ".set noreorder                                     \n\t"
+
+            "ori        $t4,        $zero,      12              \n\t"
+            "ori        %[sign1],   $zero,      0               \n\t"
+            "ori        %[sign2],   $zero,      0               \n\t"
+            "slt        $t0,        $t4,        %[qc1]          \n\t"
+            "slt        $t1,        $t4,        %[qc2]          \n\t"
+            "slt        $t2,        $t4,        %[qc3]          \n\t"
+            "slt        $t3,        $t4,        %[qc4]          \n\t"
+            "movn       %[qc1],     $t4,        $t0             \n\t"
+            "movn       %[qc2],     $t4,        $t1             \n\t"
+            "movn       %[qc3],     $t4,        $t2             \n\t"
+            "movn       %[qc4],     $t4,        $t3             \n\t"
+            "lw         $t0,        0(%[in_int])                \n\t"
+            "lw         $t1,        4(%[in_int])                \n\t"
+            "lw         $t2,        8(%[in_int])                \n\t"
+            "lw         $t3,        12(%[in_int])               \n\t"
+            "slt        $t0,        $t0,        $zero           \n\t"
+            "movn       %[sign1],   $t0,        %[qc1]          \n\t"
+            "slt        $t2,        $t2,        $zero           \n\t"
+            "movn       %[sign2],   $t2,        %[qc3]          \n\t"
+            "slt        $t1,        $t1,        $zero           \n\t"
+            "sll        $t0,        %[sign1],   1               \n\t"
+            "or         $t0,        $t0,        $t1             \n\t"
+            "movn       %[sign1],   $t0,        %[qc2]          \n\t"
+            "slt        $t3,        $t3,        $zero           \n\t"
+            "sll        $t0,        %[sign2],   1               \n\t"
+            "or         $t0,        $t0,        $t3             \n\t"
+            "movn       %[sign2],   $t0,        %[qc4]          \n\t"
+            "slt        %[count1],  $zero,      %[qc1]          \n\t"
+            "slt        $t1,        $zero,      %[qc2]          \n\t"
+            "slt        %[count2],  $zero,      %[qc3]          \n\t"
+            "slt        $t2,        $zero,      %[qc4]          \n\t"
+            "addu       %[count1],  %[count1],  $t1             \n\t"
+            "addu       %[count2],  %[count2],  $t2             \n\t"
+
+            ".set pop                                           \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+              [sign1]"=&r"(sign1), [count1]"=&r"(count1),
+              [sign2]"=&r"(sign2), [count2]"=&r"(count2)
+            : [in_int]"r"(in_int)
+            : "t0", "t1", "t2", "t3", "t4",
+              "memory"
+        );
+
+        curidx = 13 * qc1;
+        curidx += qc2;
+
+        curidx2 = 13 * qc3;
+        curidx2 += qc4;
+
+        curbits += p_bits[curidx];
+        curbits += p_bits[curidx2];
+        curbits += upair12_sign_bits[curidx];
+        curbits += upair12_sign_bits[curidx2];
+        vec     = &p_codes[curidx*2];
+        vec2    = &p_codes[curidx2*2];
+
+        __asm__ volatile (
+            ".set push                                          \n\t"
+            ".set noreorder                                     \n\t"
+
+            "lwc1       %[di0],     0(%[in_pos])                \n\t"
+            "lwc1       %[di1],     4(%[in_pos])                \n\t"
+            "lwc1       %[di2],     8(%[in_pos])                \n\t"
+            "lwc1       %[di3],     12(%[in_pos])               \n\t"
+            "abs.s      %[di0],     %[di0]                      \n\t"
+            "abs.s      %[di1],     %[di1]                      \n\t"
+            "abs.s      %[di2],     %[di2]                      \n\t"
+            "abs.s      %[di3],     %[di3]                      \n\t"
+            "lwc1       $f0,        0(%[vec])                   \n\t"
+            "lwc1       $f1,        4(%[vec])                   \n\t"
+            "lwc1       $f2,        0(%[vec2])                  \n\t"
+            "lwc1       $f3,        4(%[vec2])                  \n\t"
+            "nmsub.s    %[di0],     %[di0],     $f0,    %[IQ]   \n\t"
+            "nmsub.s    %[di1],     %[di1],     $f1,    %[IQ]   \n\t"
+            "nmsub.s    %[di2],     %[di2],     $f2,    %[IQ]   \n\t"
+            "nmsub.s    %[di3],     %[di3],     $f3,    %[IQ]   \n\t"
+
+            ".set pop                                           \n\t"
+
+            : [di0]"=&f"(di0), [di1]"=&f"(di1),
+              [di2]"=&f"(di2), [di3]"=&f"(di3)
+            : [in_pos]"r"(in_pos), [vec]"r"(vec),
+              [vec2]"r"(vec2), [IQ]"f"(IQ)
+            : "$f0", "$f1", "$f2", "$f3",
+              "memory"
+        );
+
+        cost += di0 * di0 + di1 * di1
+                + di2 * di2 + di3 * di3;
+    }
+
+    if (bits)
+        *bits = curbits;
+    return cost * lambda + curbits;
+}
+
+static float get_band_cost_ESC_mips(struct AACEncContext *s,
+                                    PutBitContext *pb, const float *in,
+                                    const float *scaled, int size, int scale_idx,
+                                    int cb, const float lambda, const float uplim,
+                                    int *bits)
+{
+    const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
+    const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
+    const float CLIPPED_ESCAPE = 165140.0f * IQ;
+    int i;
+    float cost = 0;
+    int qc1, qc2, qc3, qc4;
+    int curbits = 0;
+
+    uint8_t *p_bits  = (uint8_t*)ff_aac_spectral_bits[cb-1];
+    float   *p_codes = (float*  )ff_aac_codebook_vectors[cb-1];
+
+    for (i = 0; i < size; i += 4) {
+        const float *vec, *vec2;
+        int curidx, curidx2;
+        float t1, t2, t3, t4;
+        float di1, di2, di3, di4;
+        int cond0, cond1, cond2, cond3;
+        int c1, c2, c3, c4;
+
+        qc1 = scaled[i  ] * Q34 + 0.4054f;
+        qc2 = scaled[i+1] * Q34 + 0.4054f;
+        qc3 = scaled[i+2] * Q34 + 0.4054f;
+        qc4 = scaled[i+3] * Q34 + 0.4054f;
+
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+
+            "ori        $t4,        $zero,  15          \n\t"
+            "ori        $t5,        $zero,  16          \n\t"
+            "shll_s.w   %[c1],      %[qc1], 18          \n\t"
+            "shll_s.w   %[c2],      %[qc2], 18          \n\t"
+            "shll_s.w   %[c3],      %[qc3], 18          \n\t"
+            "shll_s.w   %[c4],      %[qc4], 18          \n\t"
+            "srl        %[c1],      %[c1],  18          \n\t"
+            "srl        %[c2],      %[c2],  18          \n\t"
+            "srl        %[c3],      %[c3],  18          \n\t"
+            "srl        %[c4],      %[c4],  18          \n\t"
+            "slt        %[cond0],   $t4,    %[qc1]      \n\t"
+            "slt        %[cond1],   $t4,    %[qc2]      \n\t"
+            "slt        %[cond2],   $t4,    %[qc3]      \n\t"
+            "slt        %[cond3],   $t4,    %[qc4]      \n\t"
+            "movn       %[qc1],     $t5,    %[cond0]    \n\t"
+            "movn       %[qc2],     $t5,    %[cond1]    \n\t"
+            "movn       %[qc3],     $t5,    %[cond2]    \n\t"
+            "movn       %[qc4],     $t5,    %[cond3]    \n\t"
+
+            ".set pop                                   \n\t"
+
+            : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
+              [qc3]"+r"(qc3), [qc4]"+r"(qc4),
+              [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
+              [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
+              [c1]"=&r"(c1), [c2]"=&r"(c2),
+              [c3]"=&r"(c3), [c4]"=&r"(c4)
+            :
+            : "t4", "t5"
+        );
+
+        curidx = 17 * qc1;
+        curidx += qc2;
+
+        curidx2 = 17 * qc3;
+        curidx2 += qc4;
+
+        curbits += p_bits[curidx];
+        curbits += esc_sign_bits[curidx];
+        vec     = &p_codes[curidx*2];
+
+        curbits += p_bits[curidx2];
+        curbits += esc_sign_bits[curidx2];
+        vec2     = &p_codes[curidx2*2];
+
+        curbits += (av_log2(c1) * 2 - 3) & (-cond0);
+        curbits += (av_log2(c2) * 2 - 3) & (-cond1);
+        curbits += (av_log2(c3) * 2 - 3) & (-cond2);
+        curbits += (av_log2(c4) * 2 - 3) & (-cond3);
+
+        t1 = fabsf(in[i  ]);
+        t2 = fabsf(in[i+1]);
+        t3 = fabsf(in[i+2]);
+        t4 = fabsf(in[i+3]);
+
+        if (cond0) {
+            if (t1 >= CLIPPED_ESCAPE) {
+                di1 = t1 - CLIPPED_ESCAPE;
+            } else {
+                di1 = t1 - c1 * cbrtf(c1) * IQ;
+            }
+        } else
+            di1 = t1 - vec[0] * IQ;
+
+        if (cond1) {
+            if (t2 >= CLIPPED_ESCAPE) {
+                di2 = t2 - CLIPPED_ESCAPE;
+            } else {
+                di2 = t2 - c2 * cbrtf(c2) * IQ;
+            }
+        } else
+            di2 = t2 - vec[1] * IQ;
+
+        if (cond2) {
+            if (t3 >= CLIPPED_ESCAPE) {
+                di3 = t3 - CLIPPED_ESCAPE;
+            } else {
+                di3 = t3 - c3 * cbrtf(c3) * IQ;
+            }
+        } else
+            di3 = t3 - vec2[0] * IQ;
+
+        if (cond3) {
+            if (t4 >= CLIPPED_ESCAPE) {
+                di4 = t4 - CLIPPED_ESCAPE;
+            } else {
+                di4 = t4 - c4 * cbrtf(c4) * IQ;
+            }
+        } else
+            di4 = t4 - vec2[1]*IQ;
+
+        cost += di1 * di1 + di2 * di2
+                + di3 * di3 + di4 * di4;
+    }
+
+    if (bits)
+        *bits = curbits;
+    return cost * lambda + curbits;
+}
+
+static float (*const get_band_cost_arr[])(struct AACEncContext *s,
+                                          PutBitContext *pb, const float *in,
+                                          const float *scaled, int size, int scale_idx,
+                                          int cb, const float lambda, const float uplim,
+                                          int *bits) = {
+    get_band_cost_ZERO_mips,
+    get_band_cost_SQUAD_mips,
+    get_band_cost_SQUAD_mips,
+    get_band_cost_UQUAD_mips,
+    get_band_cost_UQUAD_mips,
+    get_band_cost_SPAIR_mips,
+    get_band_cost_SPAIR_mips,
+    get_band_cost_UPAIR7_mips,
+    get_band_cost_UPAIR7_mips,
+    get_band_cost_UPAIR12_mips,
+    get_band_cost_UPAIR12_mips,
+    get_band_cost_ESC_mips,
+};
+
+#define get_band_cost(                                  \
+                                s, pb, in, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits)                    \
+    get_band_cost_arr[cb](                              \
+                                s, pb, in, scaled, size, scale_idx, cb, \
+                                lambda, uplim, bits)
+
+static float quantize_band_cost(struct AACEncContext *s, const float *in,
+                                const float *scaled, int size, int scale_idx,
+                                int cb, const float lambda, const float uplim,
+                                int *bits)
+{
+    return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
+}
+
+static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
+                                               AACEncContext *s,
+                                               SingleChannelElement *sce,
+                                               const float lambda)
+{
+    int start = 0, i, w, w2, g;
+    int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
+    float dists[128] = { 0 }, uplims[128];
+    float maxvals[128];
+    int fflag, minscaler;
+    int its  = 0;
+    int allz = 0;
+    float minthr = INFINITY;
+
+    destbits = FFMIN(destbits, 5800);
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            int nz = 0;
+            float uplim = 0.0f;
+            for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
+                uplim += band->threshold;
+                if (band->energy <= band->threshold || band->threshold == 0.0f) {
+                    sce->zeroes[(w+w2)*16+g] = 1;
+                    continue;
+                }
+                nz = 1;
+            }
+            uplims[w*16+g] = uplim *512;
+            sce->zeroes[w*16+g] = !nz;
+            if (nz)
+                minthr = FFMIN(minthr, uplim);
+            allz |= nz;
+        }
+    }
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            if (sce->zeroes[w*16+g]) {
+                sce->sf_idx[w*16+g] = SCALE_ONE_POS;
+                continue;
+            }
+            sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
+        }
+    }
+
+    if (!allz)
+        return;
+    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
+
+    for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+        start = w*128;
+        for (g = 0;  g < sce->ics.num_swb; g++) {
+            const float *scaled = s->scoefs + start;
+            maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
+            start += sce->ics.swb_sizes[g];
+        }
+    }
+
+    do {
+        int tbits, qstep;
+        minscaler = sce->sf_idx[0];
+        qstep = its ? 1 : 32;
+        do {
+            int prev = -1;
+            tbits = 0;
+            fflag = 0;
+
+            if (qstep > 1) {
+                for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+                    start = w*128;
+                    for (g = 0;  g < sce->ics.num_swb; g++) {
+                        const float *coefs = sce->coeffs + start;
+                        const float *scaled = s->scoefs + start;
+                        int bits = 0;
+                        int cb;
+
+                        if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
+                            start += sce->ics.swb_sizes[g];
+                            continue;
+                        }
+                        minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
+                        cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                        for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                            int b;
+                            bits += quantize_band_cost_bits(s, coefs + w2*128,
+                                                            scaled + w2*128,
+                                                            sce->ics.swb_sizes[g],
+                                                            sce->sf_idx[w*16+g],
+                                                            cb,
+                                                            1.0f,
+                                                            INFINITY,
+                                                            &b);
+                        }
+                        if (prev != -1) {
+                            bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
+                        }
+                        tbits += bits;
+                        start += sce->ics.swb_sizes[g];
+                        prev = sce->sf_idx[w*16+g];
+                    }
+                }
+            }
+            else {
+                for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+                    start = w*128;
+                    for (g = 0;  g < sce->ics.num_swb; g++) {
+                        const float *coefs = sce->coeffs + start;
+                        const float *scaled = s->scoefs + start;
+                        int bits = 0;
+                        int cb;
+                        float dist = 0.0f;
+
+                        if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
+                            start += sce->ics.swb_sizes[g];
+                            continue;
+                        }
+                        minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
+                        cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+                        for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
+                            int b;
+                            dist += quantize_band_cost(s, coefs + w2*128,
+                                                       scaled + w2*128,
+                                                       sce->ics.swb_sizes[g],
+                                                       sce->sf_idx[w*16+g],
+                                                       cb,
+                                                       1.0f,
+                                                       INFINITY,
+                                                       &b);
+                            bits += b;
+                        }
+                        dists[w*16+g] = dist - bits;
+                        if (prev != -1) {
+                            bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
+                        }
+                        tbits += bits;
+                        start += sce->ics.swb_sizes[g];
+                        prev = sce->sf_idx[w*16+g];
+                    }
+                }
+            }
+            if (tbits > destbits) {
+                for (i = 0; i < 128; i++)
+                    if (sce->sf_idx[i] < 218 - qstep)
+                        sce->sf_idx[i] += qstep;
+            } else {
+                for (i = 0; i < 128; i++)
+                    if (sce->sf_idx[i] > 60 - qstep)
+                        sce->sf_idx[i] -= qstep;
+            }
+            qstep >>= 1;
+            if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
+                qstep = 1;
+        } while (qstep);
+
+        fflag = 0;
+        minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
+        for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
+            for (g = 0; g < sce->ics.num_swb; g++) {
+                int prevsc = sce->sf_idx[w*16+g];
+                if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
+                    if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
+                        sce->sf_idx[w*16+g]--;
+                    else
+                        sce->sf_idx[w*16+g]-=2;
+                }
+                sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
+                sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
+                if (sce->sf_idx[w*16+g] != prevsc)
+                    fflag = 1;
+                sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
+            }
+        }
+        its++;
+    } while (fflag && its < 10);
+}
+
+static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe,
+                               const float lambda)
+{
+    int start = 0, i, w, w2, g;
+    float M[128], S[128];
+    float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
+    SingleChannelElement *sce0 = &cpe->ch[0];
+    SingleChannelElement *sce1 = &cpe->ch[1];
+    if (!cpe->common_window)
+        return;
+    for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
+        for (g = 0;  g < sce0->ics.num_swb; g++) {
+            if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
+                float dist1 = 0.0f, dist2 = 0.0f;
+                for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
+                    FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
+                    FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
+                    float minthr = FFMIN(band0->threshold, band1->threshold);
+                    float maxthr = FFMAX(band0->threshold, band1->threshold);
+                    for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
+                        M[i  ] = (sce0->coeffs[start+w2*128+i  ]
+                                + sce1->coeffs[start+w2*128+i  ]) * 0.5;
+                        M[i+1] = (sce0->coeffs[start+w2*128+i+1]
+                                + sce1->coeffs[start+w2*128+i+1]) * 0.5;
+                        M[i+2] = (sce0->coeffs[start+w2*128+i+2]
+                                + sce1->coeffs[start+w2*128+i+2]) * 0.5;
+                        M[i+3] = (sce0->coeffs[start+w2*128+i+3]
+                                + sce1->coeffs[start+w2*128+i+3]) * 0.5;
+
+                        S[i  ] =  M[i  ]
+                                - sce1->coeffs[start+w2*128+i  ];
+                        S[i+1] =  M[i+1]
+                                - sce1->coeffs[start+w2*128+i+1];
+                        S[i+2] =  M[i+2]
+                                - sce1->coeffs[start+w2*128+i+2];
+                        S[i+3] =  M[i+3]
+                                - sce1->coeffs[start+w2*128+i+3];
+                   }
+                    abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
+                    abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
+                    dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
+                                                L34,
+                                                sce0->ics.swb_sizes[g],
+                                                sce0->sf_idx[(w+w2)*16+g],
+                                                sce0->band_type[(w+w2)*16+g],
+                                                lambda / band0->threshold, INFINITY, NULL);
+                    dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
+                                                R34,
+                                                sce1->ics.swb_sizes[g],
+                                                sce1->sf_idx[(w+w2)*16+g],
+                                                sce1->band_type[(w+w2)*16+g],
+                                                lambda / band1->threshold, INFINITY, NULL);
+                    dist2 += quantize_band_cost(s, M,
+                                                M34,
+                                                sce0->ics.swb_sizes[g],
+                                                sce0->sf_idx[(w+w2)*16+g],
+                                                sce0->band_type[(w+w2)*16+g],
+                                                lambda / maxthr, INFINITY, NULL);
+                    dist2 += quantize_band_cost(s, S,
+                                                S34,
+                                                sce1->ics.swb_sizes[g],
+                                                sce1->sf_idx[(w+w2)*16+g],
+                                                sce1->band_type[(w+w2)*16+g],
+                                                lambda / minthr, INFINITY, NULL);
+                }
+                cpe->ms_mask[w*16+g] = dist2 < dist1;
+            }
+            start += sce0->ics.swb_sizes[g];
+        }
+    }
+}
+#endif /*HAVE_MIPSFPU */
+
+static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
+                                       int win, int group_len, const float lambda)
+{
+    BandCodingPath path[120][12];
+    int w, swb, cb, start, size;
+    int i, j;
+    const int max_sfb  = sce->ics.max_sfb;
+    const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
+    const int run_esc  = (1 << run_bits) - 1;
+    int idx, ppos, count;
+    int stackrun[120], stackcb[120], stack_len;
+    float next_minbits = INFINITY;
+    int next_mincb = 0;
+
+    abs_pow34_v(s->scoefs, sce->coeffs, 1024);
+    start = win*128;
+    for (cb = 0; cb < 12; cb++) {
+        path[0][cb].cost     = run_bits+4;
+        path[0][cb].prev_idx = -1;
+        path[0][cb].run      = 0;
+    }
+    for (swb = 0; swb < max_sfb; swb++) {
+        size = sce->ics.swb_sizes[swb];
+        if (sce->zeroes[win*16 + swb]) {
+            float cost_stay_here = path[swb][0].cost;
+            float cost_get_here  = next_minbits + run_bits + 4;
+            if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
+                != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
+                cost_stay_here += run_bits;
+            if (cost_get_here < cost_stay_here) {
+                path[swb+1][0].prev_idx = next_mincb;
+                path[swb+1][0].cost     = cost_get_here;
+                path[swb+1][0].run      = 1;
+            } else {
+                path[swb+1][0].prev_idx = 0;
+                path[swb+1][0].cost     = cost_stay_here;
+                path[swb+1][0].run      = path[swb][0].run + 1;
+            }
+            next_minbits = path[swb+1][0].cost;
+            next_mincb = 0;
+            for (cb = 1; cb < 12; cb++) {
+                path[swb+1][cb].cost = 61450;
+                path[swb+1][cb].prev_idx = -1;
+                path[swb+1][cb].run = 0;
+            }
+        } else {
+            float minbits = next_minbits;
+            int mincb = next_mincb;
+            int startcb = sce->band_type[win*16+swb];
+            next_minbits = INFINITY;
+            next_mincb = 0;
+            for (cb = 0; cb < startcb; cb++) {
+                path[swb+1][cb].cost = 61450;
+                path[swb+1][cb].prev_idx = -1;
+                path[swb+1][cb].run = 0;
+            }
+            for (cb = startcb; cb < 12; cb++) {
+                float cost_stay_here, cost_get_here;
+                float bits = 0.0f;
+                for (w = 0; w < group_len; w++) {
+                    bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
+                                                    s->scoefs + start + w*128, size,
+                                                    sce->sf_idx[(win+w)*16+swb], cb,
+                                                    0, INFINITY, NULL);
+                }
+                cost_stay_here = path[swb][cb].cost + bits;
+                cost_get_here  = minbits            + bits + run_bits + 4;
+                if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
+                    != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
+                    cost_stay_here += run_bits;
+                if (cost_get_here < cost_stay_here) {
+                    path[swb+1][cb].prev_idx = mincb;
+                    path[swb+1][cb].cost     = cost_get_here;
+                    path[swb+1][cb].run      = 1;
+                } else {
+                    path[swb+1][cb].prev_idx = cb;
+                    path[swb+1][cb].cost     = cost_stay_here;
+                    path[swb+1][cb].run      = path[swb][cb].run + 1;
+                }
+                if (path[swb+1][cb].cost < next_minbits) {
+                    next_minbits = path[swb+1][cb].cost;
+                    next_mincb = cb;
+                }
+            }
+        }
+        start += sce->ics.swb_sizes[swb];
+    }
+
+    stack_len = 0;
+    idx       = 0;
+    for (cb = 1; cb < 12; cb++)
+        if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
+            idx = cb;
+    ppos = max_sfb;
+    while (ppos > 0) {
+        av_assert1(idx >= 0);
+        cb = idx;
+        stackrun[stack_len] = path[ppos][cb].run;
+        stackcb [stack_len] = cb;
+        idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
+        ppos -= path[ppos][cb].run;
+        stack_len++;
+    }
+
+    start = 0;
+    for (i = stack_len - 1; i >= 0; i--) {
+        put_bits(&s->pb, 4, stackcb[i]);
+        count = stackrun[i];
+        memset(sce->zeroes + win*16 + start, !stackcb[i], count);
+        for (j = 0; j < count; j++) {
+            sce->band_type[win*16 + start] =  stackcb[i];
+            start++;
+        }
+        while (count >= run_esc) {
+            put_bits(&s->pb, run_bits, run_esc);
+            count -= run_esc;
+        }
+        put_bits(&s->pb, run_bits, count);
+    }
+}
+#endif /* HAVE_INLINE_ASM */
+
+void ff_aac_coder_init_mips(AACEncContext *c) {
+#if HAVE_INLINE_ASM
+    AACCoefficientsEncoder *e = c->coder;
+    int option = c->options.aac_coder;
+
+    if (option == 2) {
+        e->quantize_and_encode_band = quantize_and_encode_band_mips;
+        e->encode_window_bands_info = codebook_trellis_rate_mips;
+#if HAVE_MIPSFPU
+        e->search_for_quantizers    = search_for_quantizers_twoloop_mips;
+        e->search_for_ms            = search_for_ms_mips;
+#endif /* HAVE_MIPSFPU */
+    }
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/mips/aacdec_mips.c b/libavcodec/mips/aacdec_mips.c
new file mode 100644
index 0000000..e403366
--- /dev/null
+++ b/libavcodec/mips/aacdec_mips.c
@@ -0,0 +1,831 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Darko Laus      (darko@mips.com)
+ *           Djordje Pesut   (djordje@mips.com)
+ *           Mirjana Vulin   (mvulin@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/aacdec.c
+ */
+
+#include "libavcodec/aac.h"
+#include "aacdec_mips.h"
+#include "libavcodec/aactab.h"
+#include "libavcodec/sinewin.h"
+
+#if HAVE_INLINE_ASM
+static av_always_inline int lcg_random(unsigned previous_val)
+{
+    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
+    return v.s;
+}
+
+static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    float *in    = sce->coeffs;
+    float *out   = sce->ret;
+    float *saved = sce->saved;
+    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
+    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
+    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
+    float *buf  = ac->buf_mdct;
+    int i;
+
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        for (i = 0; i < 1024; i += 128)
+            ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
+    } else
+        ac->mdct.imdct_half(&ac->mdct, buf, in);
+
+    /* window overlapping
+     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
+     * and long to short transitions are considered to be short to short
+     * transitions. This leaves just two cases (long to long and short to short)
+     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
+     */
+    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
+            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
+        ac->fdsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
+    } else {
+        {
+            float *buf1 = saved;
+            float *buf2 = out;
+            int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+            int loop_end;
+
+            /* loop unrolled 8 times */
+            __asm__ volatile (
+                ".set push                               \n\t"
+                ".set noreorder                          \n\t"
+                "addiu   %[loop_end], %[src],      1792  \n\t"
+            "1:                                          \n\t"
+                "lw      %[temp0],    0(%[src])          \n\t"
+                "lw      %[temp1],    4(%[src])          \n\t"
+                "lw      %[temp2],    8(%[src])          \n\t"
+                "lw      %[temp3],    12(%[src])         \n\t"
+                "lw      %[temp4],    16(%[src])         \n\t"
+                "lw      %[temp5],    20(%[src])         \n\t"
+                "lw      %[temp6],    24(%[src])         \n\t"
+                "lw      %[temp7],    28(%[src])         \n\t"
+                "addiu   %[src],      %[src],      32    \n\t"
+                "sw      %[temp0],    0(%[dst])          \n\t"
+                "sw      %[temp1],    4(%[dst])          \n\t"
+                "sw      %[temp2],    8(%[dst])          \n\t"
+                "sw      %[temp3],    12(%[dst])         \n\t"
+                "sw      %[temp4],    16(%[dst])         \n\t"
+                "sw      %[temp5],    20(%[dst])         \n\t"
+                "sw      %[temp6],    24(%[dst])         \n\t"
+                "sw      %[temp7],    28(%[dst])         \n\t"
+                "bne     %[src],      %[loop_end], 1b    \n\t"
+                " addiu  %[dst],      %[dst],      32    \n\t"
+                ".set pop                                \n\t"
+
+                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+                  [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
+                  [dst]"+r"(buf2)
+                :
+                : "memory"
+            );
+        }
+
+        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+            {
+                float wi;
+                float wj;
+                int i;
+                float temp0, temp1, temp2, temp3;
+                float *dst0 = out + 448 + 0*128;
+                float *dst1 = dst0 + 64 + 63;
+                float *dst2 = saved + 63;
+                float *win0 = (float*)swindow;
+                float *win1 = win0 + 64 + 63;
+                float *win0_prev = (float*)swindow_prev;
+                float *win1_prev = win0_prev + 64 + 63;
+                float *src0_prev = saved + 448;
+                float *src1_prev = buf + 0*128 + 63;
+                float *src0 = buf + 0*128 + 64;
+                float *src1 = buf + 1*128 + 63;
+
+                for(i = 0; i < 64; i++)
+                {
+                    temp0 = src0_prev[0];
+                    temp1 = src1_prev[0];
+                    wi = *win0_prev;
+                    wj = *win1_prev;
+                    temp2 = src0[0];
+                    temp3 = src1[0];
+                    dst0[0] = temp0 * wj - temp1 * wi;
+                    dst1[0] = temp0 * wi + temp1 * wj;
+
+                    wi = *win0;
+                    wj = *win1;
+
+                    temp0 = src0[128];
+                    temp1 = src1[128];
+                    dst0[128] = temp2 * wj - temp3 * wi;
+                    dst1[128] = temp2 * wi + temp3 * wj;
+
+                    temp2 = src0[256];
+                    temp3 = src1[256];
+                    dst0[256] = temp0 * wj - temp1 * wi;
+                    dst1[256] = temp0 * wi + temp1 * wj;
+                    dst0[384] = temp2 * wj - temp3 * wi;
+                    dst1[384] = temp2 * wi + temp3 * wj;
+
+                    temp0 = src0[384];
+                    temp1 = src1[384];
+                    dst0[512] = temp0 * wj - temp1 * wi;
+                    dst2[0] = temp0 * wi + temp1 * wj;
+
+                    src0++;
+                    src1--;
+                    src0_prev++;
+                    src1_prev--;
+                    win0++;
+                    win1--;
+                    win0_prev++;
+                    win1_prev--;
+                    dst0++;
+                    dst1--;
+                    dst2--;
+                }
+            }
+        } else {
+            ac->fdsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
+            {
+                float *buf1 = buf + 64;
+                float *buf2 = out + 576;
+                int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+                int loop_end;
+
+                /* loop unrolled 8 times */
+                __asm__ volatile (
+                    ".set push                               \n\t"
+                    ".set noreorder                          \n\t"
+                    "addiu   %[loop_end], %[src],      1792  \n\t"
+                "1:                                          \n\t"
+                    "lw      %[temp0],    0(%[src])          \n\t"
+                    "lw      %[temp1],    4(%[src])          \n\t"
+                    "lw      %[temp2],    8(%[src])          \n\t"
+                    "lw      %[temp3],    12(%[src])         \n\t"
+                    "lw      %[temp4],    16(%[src])         \n\t"
+                    "lw      %[temp5],    20(%[src])         \n\t"
+                    "lw      %[temp6],    24(%[src])         \n\t"
+                    "lw      %[temp7],    28(%[src])         \n\t"
+                    "addiu   %[src],      %[src],      32    \n\t"
+                    "sw      %[temp0],    0(%[dst])          \n\t"
+                    "sw      %[temp1],    4(%[dst])          \n\t"
+                    "sw      %[temp2],    8(%[dst])          \n\t"
+                    "sw      %[temp3],    12(%[dst])         \n\t"
+                    "sw      %[temp4],    16(%[dst])         \n\t"
+                    "sw      %[temp5],    20(%[dst])         \n\t"
+                    "sw      %[temp6],    24(%[dst])         \n\t"
+                    "sw      %[temp7],    28(%[dst])         \n\t"
+                    "bne     %[src],      %[loop_end], 1b    \n\t"
+                    " addiu  %[dst],      %[dst],      32    \n\t"
+                    ".set pop                                \n\t"
+
+                    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+                      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+                      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+                      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+                      [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
+                      [dst]"+r"(buf2)
+                    :
+                    : "memory"
+                );
+            }
+        }
+    }
+
+    // buffer update
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        ac->fdsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
+        ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
+        ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
+        {
+            float *buf1 = buf + 7*128 + 64;
+            float *buf2 = saved + 448;
+            int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+            int loop_end;
+
+            /* loop unrolled 8 times */
+            __asm__ volatile (
+                ".set push                                \n\t"
+                ".set noreorder                           \n\t"
+                "addiu   %[loop_end], %[src],       256   \n\t"
+            "1:                                           \n\t"
+                "lw      %[temp0],    0(%[src])           \n\t"
+                "lw      %[temp1],    4(%[src])           \n\t"
+                "lw      %[temp2],    8(%[src])           \n\t"
+                "lw      %[temp3],    12(%[src])          \n\t"
+                "lw      %[temp4],    16(%[src])          \n\t"
+                "lw      %[temp5],    20(%[src])          \n\t"
+                "lw      %[temp6],    24(%[src])          \n\t"
+                "lw      %[temp7],    28(%[src])          \n\t"
+                "addiu   %[src],      %[src],       32    \n\t"
+                "sw      %[temp0],    0(%[dst])           \n\t"
+                "sw      %[temp1],    4(%[dst])           \n\t"
+                "sw      %[temp2],    8(%[dst])           \n\t"
+                "sw      %[temp3],    12(%[dst])          \n\t"
+                "sw      %[temp4],    16(%[dst])          \n\t"
+                "sw      %[temp5],    20(%[dst])          \n\t"
+                "sw      %[temp6],    24(%[dst])          \n\t"
+                "sw      %[temp7],    28(%[dst])          \n\t"
+                "bne     %[src],      %[loop_end],  1b    \n\t"
+                " addiu  %[dst],      %[dst],       32    \n\t"
+                ".set pop                                 \n\t"
+
+                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+                  [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
+                  [dst]"+r"(buf2)
+                :
+                : "memory"
+            );
+        }
+    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
+        float *buf1 = buf + 512;
+        float *buf2 = saved;
+        int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+        int loop_end;
+
+        /* loop unrolled 8 times */
+        __asm__ volatile (
+            ".set push                                \n\t"
+            ".set noreorder                           \n\t"
+            "addiu   %[loop_end], %[src],       1792  \n\t"
+        "1:                                           \n\t"
+            "lw      %[temp0],    0(%[src])           \n\t"
+            "lw      %[temp1],    4(%[src])           \n\t"
+            "lw      %[temp2],    8(%[src])           \n\t"
+            "lw      %[temp3],    12(%[src])          \n\t"
+            "lw      %[temp4],    16(%[src])          \n\t"
+            "lw      %[temp5],    20(%[src])          \n\t"
+            "lw      %[temp6],    24(%[src])          \n\t"
+            "lw      %[temp7],    28(%[src])          \n\t"
+            "addiu   %[src],      %[src],       32    \n\t"
+            "sw      %[temp0],    0(%[dst])           \n\t"
+            "sw      %[temp1],    4(%[dst])           \n\t"
+            "sw      %[temp2],    8(%[dst])           \n\t"
+            "sw      %[temp3],    12(%[dst])          \n\t"
+            "sw      %[temp4],    16(%[dst])          \n\t"
+            "sw      %[temp5],    20(%[dst])          \n\t"
+            "sw      %[temp6],    24(%[dst])          \n\t"
+            "sw      %[temp7],    28(%[dst])          \n\t"
+            "bne     %[src],      %[loop_end],  1b    \n\t"
+            " addiu  %[dst],      %[dst],       32    \n\t"
+            ".set pop                                 \n\t"
+
+            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+              [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
+              [dst]"+r"(buf2)
+            :
+            : "memory"
+        );
+        {
+            float *buf1 = buf + 7*128 + 64;
+            float *buf2 = saved + 448;
+            int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+            int loop_end;
+
+            /* loop unrolled 8 times */
+            __asm__ volatile (
+                ".set push                                 \n\t"
+                ".set noreorder                            \n\t"
+                "addiu   %[loop_end], %[src],        256   \n\t"
+            "1:                                            \n\t"
+                "lw      %[temp0],    0(%[src])            \n\t"
+                "lw      %[temp1],    4(%[src])            \n\t"
+                "lw      %[temp2],    8(%[src])            \n\t"
+                "lw      %[temp3],    12(%[src])           \n\t"
+                "lw      %[temp4],    16(%[src])           \n\t"
+                "lw      %[temp5],    20(%[src])           \n\t"
+                "lw      %[temp6],    24(%[src])           \n\t"
+                "lw      %[temp7],    28(%[src])           \n\t"
+                "addiu   %[src],      %[src],        32    \n\t"
+                "sw      %[temp0],    0(%[dst])            \n\t"
+                "sw      %[temp1],    4(%[dst])            \n\t"
+                "sw      %[temp2],    8(%[dst])            \n\t"
+                "sw      %[temp3],    12(%[dst])           \n\t"
+                "sw      %[temp4],    16(%[dst])           \n\t"
+                "sw      %[temp5],    20(%[dst])           \n\t"
+                "sw      %[temp6],    24(%[dst])           \n\t"
+                "sw      %[temp7],    28(%[dst])           \n\t"
+                "bne     %[src],      %[loop_end],   1b    \n\t"
+                " addiu  %[dst],      %[dst],        32    \n\t"
+                ".set pop                                  \n\t"
+
+                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+                  [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
+                  [dst]"+r"(buf2)
+                :
+                : "memory"
+            );
+        }
+    } else { // LONG_STOP or ONLY_LONG
+        float *buf1 = buf + 512;
+        float *buf2 = saved;
+        int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+        int loop_end;
+
+        /* loop unrolled 8 times */
+        __asm__ volatile (
+            ".set push                                 \n\t"
+            ".set noreorder                            \n\t"
+            "addiu   %[loop_end], %[src],        2048  \n\t"
+        "1:                                            \n\t"
+            "lw      %[temp0],    0(%[src])            \n\t"
+            "lw      %[temp1],    4(%[src])            \n\t"
+            "lw      %[temp2],    8(%[src])            \n\t"
+            "lw      %[temp3],    12(%[src])           \n\t"
+            "lw      %[temp4],    16(%[src])           \n\t"
+            "lw      %[temp5],    20(%[src])           \n\t"
+            "lw      %[temp6],    24(%[src])           \n\t"
+            "lw      %[temp7],    28(%[src])           \n\t"
+            "addiu   %[src],      %[src],        32    \n\t"
+            "sw      %[temp0],    0(%[dst])            \n\t"
+            "sw      %[temp1],    4(%[dst])            \n\t"
+            "sw      %[temp2],    8(%[dst])            \n\t"
+            "sw      %[temp3],    12(%[dst])           \n\t"
+            "sw      %[temp4],    16(%[dst])           \n\t"
+            "sw      %[temp5],    20(%[dst])           \n\t"
+            "sw      %[temp6],    24(%[dst])           \n\t"
+            "sw      %[temp7],    28(%[dst])           \n\t"
+            "bne     %[src],      %[loop_end],   1b    \n\t"
+            " addiu  %[dst],      %[dst],        32    \n\t"
+            ".set pop                                  \n\t"
+
+            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+              [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
+              [dst]"+r"(buf2)
+            :
+            : "memory"
+        );
+    }
+}
+
+static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce)
+{
+    const LongTermPrediction *ltp = &sce->ics.ltp;
+    const uint16_t *offsets = sce->ics.swb_offset;
+    int i, sfb;
+    int j, k;
+
+    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+        float *predTime = sce->ret;
+        float *predFreq = ac->buf_mdct;
+        float *p_predTime;
+        int16_t num_samples = 2048;
+
+        if (ltp->lag < 1024)
+            num_samples = ltp->lag + 1024;
+            j = (2048 - num_samples) >> 2;
+            k = (2048 - num_samples) & 3;
+            p_predTime = &predTime[num_samples];
+
+        for (i = 0; i < num_samples; i++)
+            predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
+        for (i = 0; i < j; i++) {
+
+            /* loop unrolled 4 times */
+            __asm__ volatile (
+                "sw      $0,              0(%[p_predTime])        \n\t"
+                "sw      $0,              4(%[p_predTime])        \n\t"
+                "sw      $0,              8(%[p_predTime])        \n\t"
+                "sw      $0,              12(%[p_predTime])       \n\t"
+                "addiu   %[p_predTime],   %[p_predTime],     16   \n\t"
+
+                : [p_predTime]"+r"(p_predTime)
+                :
+                : "memory"
+            );
+        }
+        for (i = 0; i < k; i++) {
+
+            __asm__ volatile (
+                "sw      $0,              0(%[p_predTime])        \n\t"
+                "addiu   %[p_predTime],   %[p_predTime],     4    \n\t"
+
+                : [p_predTime]"+r"(p_predTime)
+                :
+                : "memory"
+            );
+        }
+
+        ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
+
+        if (sce->tns.present)
+            ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
+
+        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
+            if (ltp->used[sfb])
+                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
+                    sce->coeffs[i] += predFreq[i];
+    }
+}
+
+#if HAVE_MIPSFPU
+static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
+{
+    IndividualChannelStream *ics = &sce->ics;
+    float *saved     = sce->saved;
+    float *saved_ltp = sce->coeffs;
+    const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
+    const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
+    int i;
+    int loop_end, loop_end1, loop_end2;
+    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11;
+
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        float *buf = saved;
+        float *buf0 = saved_ltp;
+        float *p_saved_ltp = saved_ltp + 576;
+        float *ptr1 = &saved_ltp[512];
+        float *ptr2 = &ac->buf_mdct[1023];
+        float *ptr3 = (float*)&swindow[63];
+        loop_end1 = (int)(p_saved_ltp + 448);
+
+        /* loop unrolled 8 times */
+        __asm__ volatile (
+            ".set push                                     \n\t"
+            ".set noreorder                                \n\t"
+            "addiu   %[loop_end],   %[src],         2048   \n\t"
+        "1:                                                \n\t"
+            "lw      %[temp0],      0(%[src])              \n\t"
+            "lw      %[temp1],      4(%[src])              \n\t"
+            "lw      %[temp2],      8(%[src])              \n\t"
+            "lw      %[temp3],      12(%[src])             \n\t"
+            "lw      %[temp4],      16(%[src])             \n\t"
+            "lw      %[temp5],      20(%[src])             \n\t"
+            "lw      %[temp6],      24(%[src])             \n\t"
+            "lw      %[temp7],      28(%[src])             \n\t"
+            "addiu   %[src],        %[src],         32     \n\t"
+            "sw      %[temp0],      0(%[dst])              \n\t"
+            "sw      %[temp1],      4(%[dst])              \n\t"
+            "sw      %[temp2],      8(%[dst])              \n\t"
+            "sw      %[temp3],      12(%[dst])             \n\t"
+            "sw      %[temp4],      16(%[dst])             \n\t"
+            "sw      %[temp5],      20(%[dst])             \n\t"
+            "sw      %[temp6],      24(%[dst])             \n\t"
+            "sw      %[temp7],      28(%[dst])             \n\t"
+            "bne     %[src],        %[loop_end],    1b     \n\t"
+            " addiu  %[dst],        %[dst],         32     \n\t"
+            ".set pop                                      \n\t"
+
+            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+              [loop_end]"=&r"(loop_end), [src]"+r"(buf),
+              [dst]"+r"(buf0)
+            :
+            : "memory"
+        );
+
+        /* loop unrolled 8 times */
+        __asm__ volatile (
+        "1:                                                   \n\t"
+            "sw     $0,              0(%[p_saved_ltp])        \n\t"
+            "sw     $0,              4(%[p_saved_ltp])        \n\t"
+            "sw     $0,              8(%[p_saved_ltp])        \n\t"
+            "sw     $0,              12(%[p_saved_ltp])       \n\t"
+            "sw     $0,              16(%[p_saved_ltp])       \n\t"
+            "sw     $0,              20(%[p_saved_ltp])       \n\t"
+            "sw     $0,              24(%[p_saved_ltp])       \n\t"
+            "sw     $0,              28(%[p_saved_ltp])       \n\t"
+            "addiu  %[p_saved_ltp],  %[p_saved_ltp],     32   \n\t"
+            "bne    %[p_saved_ltp],  %[loop_end1],       1b   \n\t"
+
+            : [p_saved_ltp]"+r"(p_saved_ltp)
+            : [loop_end1]"r"(loop_end1)
+            : "memory"
+        );
+
+        ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
+        for (i = 0; i < 16; i++){
+            /* loop unrolled 4 times */
+            __asm__ volatile (
+                "lwc1    %[temp0],    0(%[ptr2])                \n\t"
+                "lwc1    %[temp1],    -4(%[ptr2])               \n\t"
+                "lwc1    %[temp2],    -8(%[ptr2])               \n\t"
+                "lwc1    %[temp3],    -12(%[ptr2])              \n\t"
+                "lwc1    %[temp4],    0(%[ptr3])                \n\t"
+                "lwc1    %[temp5],    -4(%[ptr3])               \n\t"
+                "lwc1    %[temp6],    -8(%[ptr3])               \n\t"
+                "lwc1    %[temp7],    -12(%[ptr3])              \n\t"
+                "mul.s   %[temp8],    %[temp0],     %[temp4]    \n\t"
+                "mul.s   %[temp9],    %[temp1],     %[temp5]    \n\t"
+                "mul.s   %[temp10],   %[temp2],     %[temp6]    \n\t"
+                "mul.s   %[temp11],   %[temp3],     %[temp7]    \n\t"
+                "swc1    %[temp8],    0(%[ptr1])                \n\t"
+                "swc1    %[temp9],    4(%[ptr1])                \n\t"
+                "swc1    %[temp10],   8(%[ptr1])                \n\t"
+                "swc1    %[temp11],   12(%[ptr1])               \n\t"
+                "addiu   %[ptr1],     %[ptr1],      16          \n\t"
+                "addiu   %[ptr2],     %[ptr2],      -16         \n\t"
+                "addiu   %[ptr3],     %[ptr3],      -16         \n\t"
+
+                : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
+                  [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
+                  [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
+                  [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
+                  [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
+                  [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
+                :
+                : "memory"
+            );
+        }
+    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
+        float *buff0 = saved;
+        float *buff1 = saved_ltp;
+        float *ptr1 = &saved_ltp[512];
+        float *ptr2 = &ac->buf_mdct[1023];
+        float *ptr3 = (float*)&swindow[63];
+        loop_end = (int)(saved + 448);
+
+        /* loop unrolled 8 times */
+        __asm__ volatile (
+            ".set push                                  \n\t"
+            ".set noreorder                             \n\t"
+        "1:                                             \n\t"
+            "lw      %[temp0],    0(%[src])             \n\t"
+            "lw      %[temp1],    4(%[src])             \n\t"
+            "lw      %[temp2],    8(%[src])             \n\t"
+            "lw      %[temp3],    12(%[src])            \n\t"
+            "lw      %[temp4],    16(%[src])            \n\t"
+            "lw      %[temp5],    20(%[src])            \n\t"
+            "lw      %[temp6],    24(%[src])            \n\t"
+            "lw      %[temp7],    28(%[src])            \n\t"
+            "addiu   %[src],      %[src],         32    \n\t"
+            "sw      %[temp0],    0(%[dst])             \n\t"
+            "sw      %[temp1],    4(%[dst])             \n\t"
+            "sw      %[temp2],    8(%[dst])             \n\t"
+            "sw      %[temp3],    12(%[dst])            \n\t"
+            "sw      %[temp4],    16(%[dst])            \n\t"
+            "sw      %[temp5],    20(%[dst])            \n\t"
+            "sw      %[temp6],    24(%[dst])            \n\t"
+            "sw      %[temp7],    28(%[dst])            \n\t"
+            "sw      $0,          2304(%[dst])          \n\t"
+            "sw      $0,          2308(%[dst])          \n\t"
+            "sw      $0,          2312(%[dst])          \n\t"
+            "sw      $0,          2316(%[dst])          \n\t"
+            "sw      $0,          2320(%[dst])          \n\t"
+            "sw      $0,          2324(%[dst])          \n\t"
+            "sw      $0,          2328(%[dst])          \n\t"
+            "sw      $0,          2332(%[dst])          \n\t"
+            "bne     %[src],      %[loop_end],    1b    \n\t"
+            " addiu  %[dst],      %[dst],         32    \n\t"
+            ".set pop                                   \n\t"
+
+            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+              [src]"+r"(buff0), [dst]"+r"(buff1)
+            : [loop_end]"r"(loop_end)
+            : "memory"
+        );
+        ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
+        for (i = 0; i < 16; i++){
+            /* loop unrolled 8 times */
+            __asm__ volatile (
+                "lwc1    %[temp0],    0(%[ptr2])                \n\t"
+                "lwc1    %[temp1],    -4(%[ptr2])               \n\t"
+                "lwc1    %[temp2],    -8(%[ptr2])               \n\t"
+                "lwc1    %[temp3],    -12(%[ptr2])              \n\t"
+                "lwc1    %[temp4],    0(%[ptr3])                \n\t"
+                "lwc1    %[temp5],    -4(%[ptr3])               \n\t"
+                "lwc1    %[temp6],    -8(%[ptr3])               \n\t"
+                "lwc1    %[temp7],    -12(%[ptr3])              \n\t"
+                "mul.s   %[temp8],    %[temp0],     %[temp4]    \n\t"
+                "mul.s   %[temp9],    %[temp1],     %[temp5]    \n\t"
+                "mul.s   %[temp10],   %[temp2],     %[temp6]    \n\t"
+                "mul.s   %[temp11],   %[temp3],     %[temp7]    \n\t"
+                "swc1    %[temp8],    0(%[ptr1])                \n\t"
+                "swc1    %[temp9],    4(%[ptr1])                \n\t"
+                "swc1    %[temp10],   8(%[ptr1])                \n\t"
+                "swc1    %[temp11],   12(%[ptr1])               \n\t"
+                "addiu   %[ptr1],     %[ptr1],      16          \n\t"
+                "addiu   %[ptr2],     %[ptr2],      -16         \n\t"
+                "addiu   %[ptr3],     %[ptr3],      -16         \n\t"
+
+                : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
+                  [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
+                  [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
+                  [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
+                  [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
+                  [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
+                :
+                : "memory"
+            );
+        }
+    } else { // LONG_STOP or ONLY_LONG
+        float *ptr1, *ptr2, *ptr3;
+        ac->fdsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
+
+        ptr1 = &saved_ltp[512];
+        ptr2 = &ac->buf_mdct[1023];
+        ptr3 = (float*)&lwindow[511];
+
+        for (i = 0; i < 512; i+=4){
+            /* loop unrolled 4 times */
+            __asm__ volatile (
+                "lwc1    %[temp0],    0(%[ptr2])                \n\t"
+                "lwc1    %[temp1],    -4(%[ptr2])               \n\t"
+                "lwc1    %[temp2],    -8(%[ptr2])               \n\t"
+                "lwc1    %[temp3],    -12(%[ptr2])              \n\t"
+                "lwc1    %[temp4],    0(%[ptr3])                \n\t"
+                "lwc1    %[temp5],    -4(%[ptr3])               \n\t"
+                "lwc1    %[temp6],    -8(%[ptr3])               \n\t"
+                "lwc1    %[temp7],    -12(%[ptr3])              \n\t"
+                "mul.s   %[temp8],    %[temp0],     %[temp4]    \n\t"
+                "mul.s   %[temp9],    %[temp1],     %[temp5]    \n\t"
+                "mul.s   %[temp10],   %[temp2],     %[temp6]    \n\t"
+                "mul.s   %[temp11],   %[temp3],     %[temp7]    \n\t"
+                "swc1    %[temp8],    0(%[ptr1])                \n\t"
+                "swc1    %[temp9],    4(%[ptr1])                \n\t"
+                "swc1    %[temp10],   8(%[ptr1])                \n\t"
+                "swc1    %[temp11],   12(%[ptr1])               \n\t"
+                "addiu   %[ptr1],     %[ptr1],      16          \n\t"
+                "addiu   %[ptr2],     %[ptr2],      -16         \n\t"
+                "addiu   %[ptr3],     %[ptr3],      -16         \n\t"
+
+                : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
+                  [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
+                  [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
+                  [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
+                  [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
+                  [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2),
+                  [ptr3]"+r"(ptr3)
+                :
+                : "memory"
+            );
+        }
+    }
+
+    {
+        float *buf1 = sce->ltp_state+1024;
+        float *buf2 = sce->ltp_state;
+        float *buf3 = sce->ret;
+        float *buf4 = sce->ltp_state+1024;
+        float *buf5 = saved_ltp;
+        float *buf6 = sce->ltp_state+2048;
+
+        /* loops unrolled 8 times */
+        __asm__ volatile (
+            ".set push                                    \n\t"
+            ".set noreorder                               \n\t"
+            "addiu   %[loop_end],   %[src],         4096  \n\t"
+            "addiu   %[loop_end1],  %[src1],        4096  \n\t"
+            "addiu   %[loop_end2],  %[src2],        4096  \n\t"
+        "1:                                               \n\t"
+            "lw      %[temp0],      0(%[src])             \n\t"
+            "lw      %[temp1],      4(%[src])             \n\t"
+            "lw      %[temp2],      8(%[src])             \n\t"
+            "lw      %[temp3],      12(%[src])            \n\t"
+            "lw      %[temp4],      16(%[src])            \n\t"
+            "lw      %[temp5],      20(%[src])            \n\t"
+            "lw      %[temp6],      24(%[src])            \n\t"
+            "lw      %[temp7],      28(%[src])            \n\t"
+            "addiu   %[src],        %[src],         32    \n\t"
+            "sw      %[temp0],      0(%[dst])             \n\t"
+            "sw      %[temp1],      4(%[dst])             \n\t"
+            "sw      %[temp2],      8(%[dst])             \n\t"
+            "sw      %[temp3],      12(%[dst])            \n\t"
+            "sw      %[temp4],      16(%[dst])            \n\t"
+            "sw      %[temp5],      20(%[dst])            \n\t"
+            "sw      %[temp6],      24(%[dst])            \n\t"
+            "sw      %[temp7],      28(%[dst])            \n\t"
+            "bne     %[src],        %[loop_end],    1b    \n\t"
+            " addiu  %[dst],        %[dst],         32    \n\t"
+        "2:                                               \n\t"
+            "lw      %[temp0],      0(%[src1])            \n\t"
+            "lw      %[temp1],      4(%[src1])            \n\t"
+            "lw      %[temp2],      8(%[src1])            \n\t"
+            "lw      %[temp3],      12(%[src1])           \n\t"
+            "lw      %[temp4],      16(%[src1])           \n\t"
+            "lw      %[temp5],      20(%[src1])           \n\t"
+            "lw      %[temp6],      24(%[src1])           \n\t"
+            "lw      %[temp7],      28(%[src1])           \n\t"
+            "addiu   %[src1],       %[src1],        32    \n\t"
+            "sw      %[temp0],      0(%[dst1])            \n\t"
+            "sw      %[temp1],      4(%[dst1])            \n\t"
+            "sw      %[temp2],      8(%[dst1])            \n\t"
+            "sw      %[temp3],      12(%[dst1])           \n\t"
+            "sw      %[temp4],      16(%[dst1])           \n\t"
+            "sw      %[temp5],      20(%[dst1])           \n\t"
+            "sw      %[temp6],      24(%[dst1])           \n\t"
+            "sw      %[temp7],      28(%[dst1])           \n\t"
+            "bne     %[src1],       %[loop_end1],   2b    \n\t"
+            " addiu  %[dst1],       %[dst1],        32    \n\t"
+        "3:                                               \n\t"
+            "lw      %[temp0],      0(%[src2])            \n\t"
+            "lw      %[temp1],      4(%[src2])            \n\t"
+            "lw      %[temp2],      8(%[src2])            \n\t"
+            "lw      %[temp3],      12(%[src2])           \n\t"
+            "lw      %[temp4],      16(%[src2])           \n\t"
+            "lw      %[temp5],      20(%[src2])           \n\t"
+            "lw      %[temp6],      24(%[src2])           \n\t"
+            "lw      %[temp7],      28(%[src2])           \n\t"
+            "addiu   %[src2],       %[src2],        32    \n\t"
+            "sw      %[temp0],      0(%[dst2])            \n\t"
+            "sw      %[temp1],      4(%[dst2])            \n\t"
+            "sw      %[temp2],      8(%[dst2])            \n\t"
+            "sw      %[temp3],      12(%[dst2])           \n\t"
+            "sw      %[temp4],      16(%[dst2])           \n\t"
+            "sw      %[temp5],      20(%[dst2])           \n\t"
+            "sw      %[temp6],      24(%[dst2])           \n\t"
+            "sw      %[temp7],      28(%[dst2])           \n\t"
+            "bne     %[src2],       %[loop_end2],   3b    \n\t"
+            " addiu  %[dst2],       %[dst2],        32    \n\t"
+            ".set pop                                     \n\t"
+
+            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+              [loop_end]"=&r"(loop_end), [loop_end1]"=&r"(loop_end1),
+              [loop_end2]"=&r"(loop_end2), [src]"+r"(buf1),
+              [dst]"+r"(buf2), [src1]"+r"(buf3), [dst1]"+r"(buf4),
+              [src2]"+r"(buf5), [dst2]"+r"(buf6)
+            :
+            : "memory"
+        );
+    }
+}
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+
+void ff_aacdec_init_mips(AACContext *c)
+{
+#if HAVE_INLINE_ASM
+    c->imdct_and_windowing         = imdct_and_windowing_mips;
+    c->apply_ltp                   = apply_ltp_mips;
+#if HAVE_MIPSFPU
+    c->update_ltp                  = update_ltp_mips;
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/mips/aacdec_mips.h b/libavcodec/mips/aacdec_mips.h
new file mode 100644
index 0000000..9ba3079
--- /dev/null
+++ b/libavcodec/mips/aacdec_mips.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Darko Laus      (darko@mips.com)
+ *           Djordje Pesut   (djordje@mips.com)
+ *           Mirjana Vulin   (mvulin@mips.com)
+ *
+ * AAC Spectral Band Replication decoding functions optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/aacdec.c
+ */
+
+#ifndef AVCODEC_MIPS_AACDEC_FLOAT_H
+#define AVCODEC_MIPS_AACDEC_FLOAT_H
+
+#include "libavcodec/aac.h"
+
+#if HAVE_INLINE_ASM && HAVE_MIPSFPU
+static inline float *VMUL2_mips(float *dst, const float *v, unsigned idx,
+                           const float *scale)
+{
+    float temp0, temp1, temp2;
+    int temp3, temp4;
+    float *ret;
+
+    __asm__ volatile(
+        "andi    %[temp3],  %[idx],       15           \n\t"
+        "ext     %[temp4],  %[idx],       4,      4    \n\t"
+        "sll     %[temp3],  %[temp3],     2            \n\t"
+        "sll     %[temp4],  %[temp4],     2            \n\t"
+        "lwc1    %[temp2],  0(%[scale])                \n\t"
+        "lwxc1   %[temp0],  %[temp3](%[v])             \n\t"
+        "lwxc1   %[temp1],  %[temp4](%[v])             \n\t"
+        "mul.s   %[temp0],  %[temp0],     %[temp2]     \n\t"
+        "mul.s   %[temp1],  %[temp1],     %[temp2]     \n\t"
+        "addiu   %[ret],    %[dst],       8            \n\t"
+        "swc1    %[temp0],  0(%[dst])                  \n\t"
+        "swc1    %[temp1],  4(%[dst])                  \n\t"
+
+        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
+          [temp2]"=&f"(temp2), [temp3]"=&r"(temp3),
+          [temp4]"=&r"(temp4), [ret]"=&r"(ret)
+        : [idx]"r"(idx), [scale]"r"(scale), [v]"r"(v),
+          [dst]"r"(dst)
+        : "memory"
+    );
+    return ret;
+}
+
+static inline float *VMUL4_mips(float *dst, const float *v, unsigned idx,
+                           const float *scale)
+{
+    int temp0, temp1, temp2, temp3;
+    float temp4, temp5, temp6, temp7, temp8;
+    float *ret;
+
+    __asm__ volatile(
+        "andi    %[temp0],  %[idx],       3           \n\t"
+        "ext     %[temp1],  %[idx],       2,      2   \n\t"
+        "ext     %[temp2],  %[idx],       4,      2   \n\t"
+        "ext     %[temp3],  %[idx],       6,      2   \n\t"
+        "sll     %[temp0],  %[temp0],     2           \n\t"
+        "sll     %[temp1],  %[temp1],     2           \n\t"
+        "sll     %[temp2],  %[temp2],     2           \n\t"
+        "sll     %[temp3],  %[temp3],     2           \n\t"
+        "lwc1    %[temp4],  0(%[scale])               \n\t"
+        "lwxc1   %[temp5],  %[temp0](%[v])            \n\t"
+        "lwxc1   %[temp6],  %[temp1](%[v])            \n\t"
+        "lwxc1   %[temp7],  %[temp2](%[v])            \n\t"
+        "lwxc1   %[temp8],  %[temp3](%[v])            \n\t"
+        "mul.s   %[temp5],  %[temp5],     %[temp4]    \n\t"
+        "mul.s   %[temp6],  %[temp6],     %[temp4]    \n\t"
+        "mul.s   %[temp7],  %[temp7],     %[temp4]    \n\t"
+        "mul.s   %[temp8],  %[temp8],     %[temp4]    \n\t"
+        "addiu   %[ret],    %[dst],       16          \n\t"
+        "swc1    %[temp5],  0(%[dst])                 \n\t"
+        "swc1    %[temp6],  4(%[dst])                 \n\t"
+        "swc1    %[temp7],  8(%[dst])                 \n\t"
+        "swc1    %[temp8],  12(%[dst])                \n\t"
+
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+          [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+          [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
+          [temp8]"=&f"(temp8), [ret]"=&r"(ret)
+        : [idx]"r"(idx), [scale]"r"(scale), [v]"r"(v),
+          [dst]"r"(dst)
+        : "memory"
+    );
+    return ret;
+}
+
+static inline float *VMUL2S_mips(float *dst, const float *v, unsigned idx,
+                            unsigned sign, const float *scale)
+{
+    int temp0, temp1, temp2, temp3, temp4, temp5;
+    float temp6, temp7, temp8, temp9;
+    float *ret;
+
+    __asm__ volatile(
+        "andi    %[temp0],  %[idx],       15         \n\t"
+        "ext     %[temp1],  %[idx],       4,     4   \n\t"
+        "lw      %[temp4],  0(%[scale])              \n\t"
+        "srl     %[temp2],  %[sign],      1          \n\t"
+        "sll     %[temp3],  %[sign],      31         \n\t"
+        "sll     %[temp2],  %[temp2],     31         \n\t"
+        "sll     %[temp0],  %[temp0],     2          \n\t"
+        "sll     %[temp1],  %[temp1],     2          \n\t"
+        "lwxc1   %[temp8],  %[temp0](%[v])           \n\t"
+        "lwxc1   %[temp9],  %[temp1](%[v])           \n\t"
+        "xor     %[temp5],  %[temp4],     %[temp2]   \n\t"
+        "xor     %[temp4],  %[temp4],     %[temp3]   \n\t"
+        "mtc1    %[temp5],  %[temp6]                 \n\t"
+        "mtc1    %[temp4],  %[temp7]                 \n\t"
+        "mul.s   %[temp8],  %[temp8],     %[temp6]   \n\t"
+        "mul.s   %[temp9],  %[temp9],     %[temp7]   \n\t"
+        "addiu   %[ret],    %[dst],       8          \n\t"
+        "swc1    %[temp8],  0(%[dst])                \n\t"
+        "swc1    %[temp9],  4(%[dst])                \n\t"
+
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+          [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
+          [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
+          [ret]"=&r"(ret)
+        : [idx]"r"(idx), [scale]"r"(scale), [v]"r"(v),
+          [dst]"r"(dst), [sign]"r"(sign)
+        : "memory"
+    );
+    return ret;
+}
+
+static inline float *VMUL4S_mips(float *dst, const float *v, unsigned idx,
+                            unsigned sign, const float *scale)
+{
+    int temp0, temp1, temp2, temp3, temp4;
+    float temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
+    float *ret;
+    unsigned int mask = 1U << 31;
+
+    __asm__ volatile(
+        "lw      %[temp0],   0(%[scale])               \n\t"
+        "and     %[temp1],   %[idx],       3           \n\t"
+        "ext     %[temp2],   %[idx],       2,      2   \n\t"
+        "ext     %[temp3],   %[idx],       4,      2   \n\t"
+        "ext     %[temp4],   %[idx],       6,      2   \n\t"
+        "sll     %[temp1],   %[temp1],     2           \n\t"
+        "sll     %[temp2],   %[temp2],     2           \n\t"
+        "sll     %[temp3],   %[temp3],     2           \n\t"
+        "sll     %[temp4],   %[temp4],     2           \n\t"
+        "lwxc1   %[temp10],  %[temp1](%[v])            \n\t"
+        "lwxc1   %[temp11],  %[temp2](%[v])            \n\t"
+        "lwxc1   %[temp12],  %[temp3](%[v])            \n\t"
+        "lwxc1   %[temp13],  %[temp4](%[v])            \n\t"
+        "and     %[temp1],   %[sign],      %[mask]     \n\t"
+        "ext     %[temp2],   %[idx],       12,     1   \n\t"
+        "ext     %[temp3],   %[idx],       13,     1   \n\t"
+        "ext     %[temp4],   %[idx],       14,     1   \n\t"
+        "sllv    %[sign],    %[sign],      %[temp2]    \n\t"
+        "xor     %[temp1],   %[temp0],     %[temp1]    \n\t"
+        "and     %[temp2],   %[sign],      %[mask]     \n\t"
+        "mtc1    %[temp1],   %[temp14]                 \n\t"
+        "xor     %[temp2],   %[temp0],     %[temp2]    \n\t"
+        "sllv    %[sign],    %[sign],      %[temp3]    \n\t"
+        "mtc1    %[temp2],   %[temp15]                 \n\t"
+        "and     %[temp3],   %[sign],      %[mask]     \n\t"
+        "sllv    %[sign],    %[sign],      %[temp4]    \n\t"
+        "xor     %[temp3],   %[temp0],     %[temp3]    \n\t"
+        "and     %[temp4],   %[sign],      %[mask]     \n\t"
+        "mtc1    %[temp3],   %[temp16]                 \n\t"
+        "xor     %[temp4],   %[temp0],     %[temp4]    \n\t"
+        "mtc1    %[temp4],   %[temp17]                 \n\t"
+        "mul.s   %[temp10],  %[temp10],    %[temp14]   \n\t"
+        "mul.s   %[temp11],  %[temp11],    %[temp15]   \n\t"
+        "mul.s   %[temp12],  %[temp12],    %[temp16]   \n\t"
+        "mul.s   %[temp13],  %[temp13],    %[temp17]   \n\t"
+        "addiu   %[ret],     %[dst],       16          \n\t"
+        "swc1    %[temp10],  0(%[dst])                 \n\t"
+        "swc1    %[temp11],  4(%[dst])                 \n\t"
+        "swc1    %[temp12],  8(%[dst])                 \n\t"
+        "swc1    %[temp13],  12(%[dst])                \n\t"
+
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+          [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+          [temp4]"=&r"(temp4), [temp10]"=&f"(temp10),
+          [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
+          [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
+          [temp15]"=&f"(temp15), [temp16]"=&f"(temp16),
+          [temp17]"=&f"(temp17), [ret]"=&r"(ret),
+          [sign]"+r"(sign)
+        : [idx]"r"(idx), [scale]"r"(scale), [v]"r"(v),
+          [dst]"r"(dst), [mask]"r"(mask)
+        : "memory"
+    );
+    return ret;
+}
+
+#define VMUL2 VMUL2_mips
+#define VMUL4 VMUL4_mips
+#define VMUL2S VMUL2S_mips
+#define VMUL4S VMUL4S_mips
+#endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
+
+#endif /* AVCODEC_MIPS_AACDEC_FLOAT_H */
diff --git a/libavcodec/mips/aacpsdsp_mips.c b/libavcodec/mips/aacpsdsp_mips.c
new file mode 100644
index 0000000..4730a7f
--- /dev/null
+++ b/libavcodec/mips/aacpsdsp_mips.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Darko Laus      (darko@mips.com)
+ *           Djordje Pesut   (djordje@mips.com)
+ *           Mirjana Vulin   (mvulin@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/aacpsdsp.c
+ */
+
+#include "config.h"
+#include "libavcodec/aacpsdsp.h"
+
+#if HAVE_INLINE_ASM
+static void ps_hybrid_analysis_ileave_mips(float (*out)[32][2], float L[2][38][64],
+                                        int i, int len)
+{
+    int temp0, temp1, temp2, temp3;
+    int temp4, temp5, temp6, temp7;
+    float *out1=&out[i][0][0];
+    float *L1=&L[0][0][i];
+    float *j=out1+ len*2;
+
+    for (; i < 64; i++) {
+
+        /* loop unrolled 8 times */
+        __asm__ volatile (
+        "1:                                          \n\t"
+            "lw      %[temp0],   0(%[L1])            \n\t"
+            "lw      %[temp1],   9728(%[L1])         \n\t"
+            "lw      %[temp2],   256(%[L1])          \n\t"
+            "lw      %[temp3],   9984(%[L1])         \n\t"
+            "lw      %[temp4],   512(%[L1])          \n\t"
+            "lw      %[temp5],   10240(%[L1])        \n\t"
+            "lw      %[temp6],   768(%[L1])          \n\t"
+            "lw      %[temp7],   10496(%[L1])        \n\t"
+            "sw      %[temp0],   0(%[out1])          \n\t"
+            "sw      %[temp1],   4(%[out1])          \n\t"
+            "sw      %[temp2],   8(%[out1])          \n\t"
+            "sw      %[temp3],   12(%[out1])         \n\t"
+            "sw      %[temp4],   16(%[out1])         \n\t"
+            "sw      %[temp5],   20(%[out1])         \n\t"
+            "sw      %[temp6],   24(%[out1])         \n\t"
+            "sw      %[temp7],   28(%[out1])         \n\t"
+            "addiu   %[out1],    %[out1],      32    \n\t"
+            "addiu   %[L1],      %[L1],        1024  \n\t"
+            "bne     %[out1],    %[j],         1b    \n\t"
+
+            : [out1]"+r"(out1), [L1]"+r"(L1), [j]"+r"(j),
+              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
+            : [len]"r"(len)
+            : "memory"
+        );
+        out1-=(len<<1)-64;
+        L1-=(len<<6)-1;
+        j+=len*2;
+    }
+}
+
+static void ps_hybrid_synthesis_deint_mips(float out[2][38][64],
+                                        float (*in)[32][2],
+                                        int i, int len)
+{
+    int n;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+    float *out1 = (float*)out + i;
+    float *out2 = (float*)out + 2432 + i;
+    float *in1 = (float*)in + 64 * i;
+    float *in2 = (float*)in + 64 * i + 1;
+
+    for (; i < 64; i++) {
+        for (n = 0; n < 7; n++) {
+
+            /* loop unrolled 8 times */
+            __asm__ volatile (
+                 "lw      %[temp0],   0(%[in1])               \n\t"
+                 "lw      %[temp1],   0(%[in2])               \n\t"
+                 "lw      %[temp2],   8(%[in1])               \n\t"
+                 "lw      %[temp3],   8(%[in2])               \n\t"
+                 "lw      %[temp4],   16(%[in1])              \n\t"
+                 "lw      %[temp5],   16(%[in2])              \n\t"
+                 "lw      %[temp6],   24(%[in1])              \n\t"
+                 "lw      %[temp7],   24(%[in2])              \n\t"
+                 "addiu   %[out1],    %[out1],         1024   \n\t"
+                 "addiu   %[out2],    %[out2],         1024   \n\t"
+                 "addiu   %[in1],     %[in1],          32     \n\t"
+                 "addiu   %[in2],     %[in2],          32     \n\t"
+                 "sw      %[temp0],   -1024(%[out1])          \n\t"
+                 "sw      %[temp1],   -1024(%[out2])          \n\t"
+                 "sw      %[temp2],   -768(%[out1])           \n\t"
+                 "sw      %[temp3],   -768(%[out2])           \n\t"
+                 "sw      %[temp4],   -512(%[out1])           \n\t"
+                 "sw      %[temp5],   -512(%[out2])           \n\t"
+                 "sw      %[temp6],   -256(%[out1])           \n\t"
+                 "sw      %[temp7],   -256(%[out2])           \n\t"
+
+                 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+                   [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+                   [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+                   [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+                   [out1]"+r"(out1), [out2]"+r"(out2),
+                   [in1]"+r"(in1), [in2]"+r"(in2)
+                 :
+                 : "memory"
+            );
+        }
+        /* loop unrolled 8 times */
+        __asm__ volatile (
+            "lw      %[temp0],   0(%[in1])               \n\t"
+            "lw      %[temp1],   0(%[in2])               \n\t"
+            "lw      %[temp2],   8(%[in1])               \n\t"
+            "lw      %[temp3],   8(%[in2])               \n\t"
+            "lw      %[temp4],   16(%[in1])              \n\t"
+            "lw      %[temp5],   16(%[in2])              \n\t"
+            "lw      %[temp6],   24(%[in1])              \n\t"
+            "lw      %[temp7],   24(%[in2])              \n\t"
+            "addiu   %[out1],    %[out1],        -7164   \n\t"
+            "addiu   %[out2],    %[out2],        -7164   \n\t"
+            "addiu   %[in1],     %[in1],         32      \n\t"
+            "addiu   %[in2],     %[in2],         32      \n\t"
+            "sw      %[temp0],   7164(%[out1])           \n\t"
+            "sw      %[temp1],   7164(%[out2])           \n\t"
+            "sw      %[temp2],   7420(%[out1])           \n\t"
+            "sw      %[temp3],   7420(%[out2])           \n\t"
+            "sw      %[temp4],   7676(%[out1])           \n\t"
+            "sw      %[temp5],   7676(%[out2])           \n\t"
+            "sw      %[temp6],   7932(%[out1])           \n\t"
+            "sw      %[temp7],   7932(%[out2])           \n\t"
+
+            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+              [out1]"+r"(out1), [out2]"+r"(out2),
+              [in1]"+r"(in1), [in2]"+r"(in2)
+            :
+            : "memory"
+        );
+    }
+}
+
+#if HAVE_MIPSFPU
+static void ps_add_squares_mips(float *dst, const float (*src)[2], int n)
+{
+    int i;
+    float temp0, temp1, temp2, temp3, temp4, temp5;
+    float temp6, temp7, temp8, temp9, temp10, temp11;
+    float *src0 = (float*)&src[0][0];
+    float *dst0 = &dst[0];
+
+    for (i = 0; i < 8; i++) {
+        /* loop unrolled 4 times */
+        __asm__ volatile (
+            "lwc1     %[temp0],    0(%[src0])                          \n\t"
+            "lwc1     %[temp1],    4(%[src0])                          \n\t"
+            "lwc1     %[temp2],    8(%[src0])                          \n\t"
+            "lwc1     %[temp3],    12(%[src0])                         \n\t"
+            "lwc1     %[temp4],    16(%[src0])                         \n\t"
+            "lwc1     %[temp5],    20(%[src0])                         \n\t"
+            "lwc1     %[temp6],    24(%[src0])                         \n\t"
+            "lwc1     %[temp7],    28(%[src0])                         \n\t"
+            "lwc1     %[temp8],    0(%[dst0])                          \n\t"
+            "lwc1     %[temp9],    4(%[dst0])                          \n\t"
+            "lwc1     %[temp10],   8(%[dst0])                          \n\t"
+            "lwc1     %[temp11],   12(%[dst0])                         \n\t"
+            "mul.s    %[temp1],    %[temp1],    %[temp1]               \n\t"
+            "mul.s    %[temp3],    %[temp3],    %[temp3]               \n\t"
+            "mul.s    %[temp5],    %[temp5],    %[temp5]               \n\t"
+            "mul.s    %[temp7],    %[temp7],    %[temp7]               \n\t"
+            "madd.s   %[temp0],    %[temp1],    %[temp0],   %[temp0]   \n\t"
+            "madd.s   %[temp2],    %[temp3],    %[temp2],   %[temp2]   \n\t"
+            "madd.s   %[temp4],    %[temp5],    %[temp4],   %[temp4]   \n\t"
+            "madd.s   %[temp6],    %[temp7],    %[temp6],   %[temp6]   \n\t"
+            "add.s    %[temp0],    %[temp8],    %[temp0]               \n\t"
+            "add.s    %[temp2],    %[temp9],    %[temp2]               \n\t"
+            "add.s    %[temp4],    %[temp10],   %[temp4]               \n\t"
+            "add.s    %[temp6],    %[temp11],   %[temp6]               \n\t"
+            "swc1     %[temp0],    0(%[dst0])                          \n\t"
+            "swc1     %[temp2],    4(%[dst0])                          \n\t"
+            "swc1     %[temp4],    8(%[dst0])                          \n\t"
+            "swc1     %[temp6],    12(%[dst0])                         \n\t"
+            "addiu    %[dst0],     %[dst0],     16                     \n\t"
+            "addiu    %[src0],     %[src0],     32                     \n\t"
+
+            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+              [temp9]"=&f"(temp9), [dst0]"+r"(dst0), [src0]"+r"(src0),
+              [temp10]"=&f"(temp10), [temp11]"=&f"(temp11)
+            :
+            : "memory"
+        );
+   }
+}
+
+static void ps_mul_pair_single_mips(float (*dst)[2], float (*src0)[2], float *src1,
+                                 int n)
+{
+    float temp0, temp1, temp2;
+    float *p_d, *p_s0, *p_s1, *end;
+    p_d = &dst[0][0];
+    p_s0 = &src0[0][0];
+    p_s1 = &src1[0];
+    end = p_s1 + n;
+
+    __asm__ volatile(
+        ".set push                                      \n\t"
+        ".set noreorder                                 \n\t"
+        "1:                                             \n\t"
+        "lwc1     %[temp2],   0(%[p_s1])                \n\t"
+        "lwc1     %[temp0],   0(%[p_s0])                \n\t"
+        "lwc1     %[temp1],   4(%[p_s0])                \n\t"
+        "addiu    %[p_d],     %[p_d],       8           \n\t"
+        "mul.s    %[temp0],   %[temp0],     %[temp2]    \n\t"
+        "mul.s    %[temp1],   %[temp1],     %[temp2]    \n\t"
+        "addiu    %[p_s0],    %[p_s0],      8           \n\t"
+        "swc1     %[temp0],   -8(%[p_d])                \n\t"
+        "swc1     %[temp1],   -4(%[p_d])                \n\t"
+        "bne      %[p_s1],    %[end],       1b          \n\t"
+        " addiu   %[p_s1],    %[p_s1],      4           \n\t"
+        ".set pop                                       \n\t"
+
+        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
+          [temp2]"=&f"(temp2), [p_d]"+r"(p_d),
+          [p_s0]"+r"(p_s0), [p_s1]"+r"(p_s1)
+        : [end]"r"(end)
+        : "memory"
+    );
+}
+
+static void ps_decorrelate_mips(float (*out)[2], float (*delay)[2],
+                             float (*ap_delay)[PS_QMF_TIME_SLOTS + PS_MAX_AP_DELAY][2],
+                             const float phi_fract[2], float (*Q_fract)[2],
+                             const float *transient_gain,
+                             float g_decay_slope,
+                             int len)
+{
+    float *p_delay = &delay[0][0];
+    float *p_out = &out[0][0];
+    float *p_ap_delay = &ap_delay[0][0][0];
+    float *p_t_gain = (float*)transient_gain;
+    float *p_Q_fract = &Q_fract[0][0];
+    float ag0, ag1, ag2;
+    float phi_fract0 = phi_fract[0];
+    float phi_fract1 = phi_fract[1];
+    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+
+    len = (int)((int*)p_delay + (len << 1));
+
+    /* merged 2 loops */
+    __asm__ volatile(
+        ".set    push                                                    \n\t"
+        ".set    noreorder                                               \n\t"
+        "li.s    %[ag0],        0.65143905753106                         \n\t"
+        "li.s    %[ag1],        0.56471812200776                         \n\t"
+        "li.s    %[ag2],        0.48954165955695                         \n\t"
+        "mul.s   %[ag0],        %[ag0],        %[g_decay_slope]          \n\t"
+        "mul.s   %[ag1],        %[ag1],        %[g_decay_slope]          \n\t"
+        "mul.s   %[ag2],        %[ag2],        %[g_decay_slope]          \n\t"
+    "1:                                                                  \n\t"
+        "lwc1    %[temp0],      0(%[p_delay])                            \n\t"
+        "lwc1    %[temp1],      4(%[p_delay])                            \n\t"
+        "lwc1    %[temp4],      16(%[p_ap_delay])                        \n\t"
+        "lwc1    %[temp5],      20(%[p_ap_delay])                        \n\t"
+        "mul.s   %[temp3],      %[temp0],      %[phi_fract1]             \n\t"
+        "lwc1    %[temp6],      0(%[p_Q_fract])                          \n\t"
+        "mul.s   %[temp2],      %[temp1],      %[phi_fract1]             \n\t"
+        "lwc1    %[temp7],      4(%[p_Q_fract])                          \n\t"
+        "madd.s  %[temp3],      %[temp3],      %[temp1], %[phi_fract0]   \n\t"
+        "msub.s  %[temp2],      %[temp2],      %[temp0], %[phi_fract0]   \n\t"
+        "mul.s   %[temp8],      %[temp5],      %[temp7]                  \n\t"
+        "mul.s   %[temp9],      %[temp4],      %[temp7]                  \n\t"
+        "lwc1    %[temp7],      12(%[p_Q_fract])                         \n\t"
+        "mul.s   %[temp0],      %[ag0],        %[temp2]                  \n\t"
+        "mul.s   %[temp1],      %[ag0],        %[temp3]                  \n\t"
+        "msub.s  %[temp8],      %[temp8],      %[temp4], %[temp6]        \n\t"
+        "lwc1    %[temp4],      304(%[p_ap_delay])                       \n\t"
+        "madd.s  %[temp9],      %[temp9],      %[temp5], %[temp6]        \n\t"
+        "lwc1    %[temp5],      308(%[p_ap_delay])                       \n\t"
+        "sub.s   %[temp0],      %[temp8],      %[temp0]                  \n\t"
+        "sub.s   %[temp1],      %[temp9],      %[temp1]                  \n\t"
+        "madd.s  %[temp2],      %[temp2],      %[ag0],   %[temp0]        \n\t"
+        "lwc1    %[temp6],      8(%[p_Q_fract])                          \n\t"
+        "madd.s  %[temp3],      %[temp3],      %[ag0],   %[temp1]        \n\t"
+        "mul.s   %[temp8],      %[temp5],      %[temp7]                  \n\t"
+        "mul.s   %[temp9],      %[temp4],      %[temp7]                  \n\t"
+        "lwc1    %[temp7],      20(%[p_Q_fract])                         \n\t"
+        "msub.s  %[temp8],      %[temp8],      %[temp4], %[temp6]        \n\t"
+        "swc1    %[temp2],      40(%[p_ap_delay])                        \n\t"
+        "mul.s   %[temp2],      %[ag1],        %[temp0]                  \n\t"
+        "swc1    %[temp3],      44(%[p_ap_delay])                        \n\t"
+        "mul.s   %[temp3],      %[ag1],        %[temp1]                  \n\t"
+        "lwc1    %[temp4],      592(%[p_ap_delay])                       \n\t"
+        "madd.s  %[temp9],      %[temp9],      %[temp5], %[temp6]        \n\t"
+        "lwc1    %[temp5],      596(%[p_ap_delay])                       \n\t"
+        "sub.s   %[temp2],      %[temp8],      %[temp2]                  \n\t"
+        "sub.s   %[temp3],      %[temp9],      %[temp3]                  \n\t"
+        "lwc1    %[temp6],      16(%[p_Q_fract])                         \n\t"
+        "madd.s  %[temp0],      %[temp0],      %[ag1],   %[temp2]        \n\t"
+        "madd.s  %[temp1],      %[temp1],      %[ag1],   %[temp3]        \n\t"
+        "mul.s   %[temp8],      %[temp5],      %[temp7]                  \n\t"
+        "mul.s   %[temp9],      %[temp4],      %[temp7]                  \n\t"
+        "msub.s  %[temp8],      %[temp8],      %[temp4], %[temp6]        \n\t"
+        "madd.s  %[temp9],      %[temp9],      %[temp5], %[temp6]        \n\t"
+        "swc1    %[temp0],      336(%[p_ap_delay])                       \n\t"
+        "mul.s   %[temp0],      %[ag2],        %[temp2]                  \n\t"
+        "swc1    %[temp1],      340(%[p_ap_delay])                       \n\t"
+        "mul.s   %[temp1],      %[ag2],        %[temp3]                  \n\t"
+        "lwc1    %[temp4],      0(%[p_t_gain])                           \n\t"
+        "sub.s   %[temp0],      %[temp8],      %[temp0]                  \n\t"
+        "addiu   %[p_ap_delay], %[p_ap_delay], 8                         \n\t"
+        "sub.s   %[temp1],      %[temp9],      %[temp1]                  \n\t"
+        "addiu   %[p_t_gain],   %[p_t_gain],   4                         \n\t"
+        "madd.s  %[temp2],      %[temp2],      %[ag2],   %[temp0]        \n\t"
+        "addiu   %[p_delay],    %[p_delay],    8                         \n\t"
+        "madd.s  %[temp3],      %[temp3],      %[ag2],   %[temp1]        \n\t"
+        "addiu   %[p_out],      %[p_out],      8                         \n\t"
+        "mul.s   %[temp5],      %[temp4],      %[temp0]                  \n\t"
+        "mul.s   %[temp6],      %[temp4],      %[temp1]                  \n\t"
+        "swc1    %[temp2],      624(%[p_ap_delay])                       \n\t"
+        "swc1    %[temp3],      628(%[p_ap_delay])                       \n\t"
+        "swc1    %[temp5],      -8(%[p_out])                             \n\t"
+        "swc1    %[temp6],      -4(%[p_out])                             \n\t"
+        "bne     %[p_delay],    %[len],        1b                        \n\t"
+        " swc1   %[temp6],      -4(%[p_out])                             \n\t"
+        ".set    pop                                                     \n\t"
+
+        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+          [temp9]"=&f"(temp9), [p_delay]"+r"(p_delay), [p_ap_delay]"+r"(p_ap_delay),
+          [p_Q_fract]"+r"(p_Q_fract), [p_t_gain]"+r"(p_t_gain), [p_out]"+r"(p_out),
+          [ag0]"=&f"(ag0), [ag1]"=&f"(ag1), [ag2]"=&f"(ag2)
+        : [phi_fract0]"f"(phi_fract0), [phi_fract1]"f"(phi_fract1),
+          [len]"r"(len), [g_decay_slope]"f"(g_decay_slope)
+        : "memory"
+    );
+}
+
+static void ps_stereo_interpolate_mips(float (*l)[2], float (*r)[2],
+                                    float h[2][4], float h_step[2][4],
+                                    int len)
+{
+    float h0 = h[0][0];
+    float h1 = h[0][1];
+    float h2 = h[0][2];
+    float h3 = h[0][3];
+    float hs0 = h_step[0][0];
+    float hs1 = h_step[0][1];
+    float hs2 = h_step[0][2];
+    float hs3 = h_step[0][3];
+    float temp0, temp1, temp2, temp3;
+    float l_re, l_im, r_re, r_im;
+
+    len = (int)((int*)l + (len << 1));
+
+    __asm__ volatile(
+        ".set    push                                     \n\t"
+        ".set    noreorder                                \n\t"
+    "1:                                                   \n\t"
+        "add.s   %[h0],     %[h0],     %[hs0]             \n\t"
+        "lwc1    %[l_re],   0(%[l])                       \n\t"
+        "add.s   %[h1],     %[h1],     %[hs1]             \n\t"
+        "lwc1    %[r_re],   0(%[r])                       \n\t"
+        "add.s   %[h2],     %[h2],     %[hs2]             \n\t"
+        "lwc1    %[l_im],   4(%[l])                       \n\t"
+        "add.s   %[h3],     %[h3],     %[hs3]             \n\t"
+        "lwc1    %[r_im],   4(%[r])                       \n\t"
+        "mul.s   %[temp0],  %[h0],     %[l_re]            \n\t"
+        "addiu   %[l],      %[l],      8                  \n\t"
+        "mul.s   %[temp2],  %[h1],     %[l_re]            \n\t"
+        "addiu   %[r],      %[r],      8                  \n\t"
+        "madd.s  %[temp0],  %[temp0],  %[h2],   %[r_re]   \n\t"
+        "madd.s  %[temp2],  %[temp2],  %[h3],   %[r_re]   \n\t"
+        "mul.s   %[temp1],  %[h0],     %[l_im]            \n\t"
+        "mul.s   %[temp3],  %[h1],     %[l_im]            \n\t"
+        "madd.s  %[temp1],  %[temp1],  %[h2],   %[r_im]   \n\t"
+        "madd.s  %[temp3],  %[temp3],  %[h3],   %[r_im]   \n\t"
+        "swc1    %[temp0],  -8(%[l])                      \n\t"
+        "swc1    %[temp2],  -8(%[r])                      \n\t"
+        "swc1    %[temp1],  -4(%[l])                      \n\t"
+        "bne     %[l],      %[len],    1b                 \n\t"
+        " swc1   %[temp3],  -4(%[r])                      \n\t"
+        ".set    pop                                      \n\t"
+
+        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
+          [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
+          [h0]"+f"(h0), [h1]"+f"(h1), [h2]"+f"(h2),
+          [h3]"+f"(h3), [l]"+r"(l), [r]"+r"(r),
+          [l_re]"=&f"(l_re), [l_im]"=&f"(l_im),
+          [r_re]"=&f"(r_re), [r_im]"=&f"(r_im)
+        : [hs0]"f"(hs0), [hs1]"f"(hs1), [hs2]"f"(hs2),
+          [hs3]"f"(hs3), [len]"r"(len)
+        : "memory"
+    );
+}
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+
+void ff_psdsp_init_mips(PSDSPContext *s)
+{
+#if HAVE_INLINE_ASM
+    s->hybrid_analysis_ileave = ps_hybrid_analysis_ileave_mips;
+    s->hybrid_synthesis_deint = ps_hybrid_synthesis_deint_mips;
+#if HAVE_MIPSFPU
+    s->add_squares            = ps_add_squares_mips;
+    s->mul_pair_single        = ps_mul_pair_single_mips;
+    s->decorrelate            = ps_decorrelate_mips;
+    s->stereo_interpolate[0]  = ps_stereo_interpolate_mips;
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/mips/aacpsy_mips.h b/libavcodec/mips/aacpsy_mips.h
new file mode 100644
index 0000000..d1353c4
--- /dev/null
+++ b/libavcodec/mips/aacpsy_mips.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Bojan Zivkovic   (bojan@mips.com)
+ *
+ * AAC encoder psychoacoustic model routines optimized
+ * for MIPS floating-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/aacpsy.c
+ */
+
+#ifndef AVCODEC_MIPS_AACPSY_MIPS_H
+#define AVCODEC_MIPS_AACPSY_MIPS_H
+
+#if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 )
+static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands,
+                               AacPsyChannel *pch, const uint8_t *band_sizes,
+                               const float *coefs)
+{
+    int i, w, g;
+    int start = 0;
+    for (w = 0; w < wi->num_windows*16; w += 16) {
+        for (g = 0; g < num_bands; g++) {
+            AacPsyBand *band = &pch->band[w+g];
+
+            float form_factor = 0.0f;
+            float Temp;
+            band->energy = 0.0f;
+            for (i = 0; i < band_sizes[g]; i+=4) {
+                float a, b, c, d;
+                float ax, bx, cx, dx;
+                float *cf = (float *)&coefs[start+i];
+
+                __asm__ volatile (
+                    "lwc1   %[a],   0(%[cf])                \n\t"
+                    "lwc1   %[b],   4(%[cf])                \n\t"
+                    "lwc1   %[c],   8(%[cf])                \n\t"
+                    "lwc1   %[d],   12(%[cf])               \n\t"
+                    "abs.s  %[a],   %[a]                    \n\t"
+                    "abs.s  %[b],   %[b]                    \n\t"
+                    "abs.s  %[c],   %[c]                    \n\t"
+                    "abs.s  %[d],   %[d]                    \n\t"
+                    "sqrt.s %[ax],  %[a]                    \n\t"
+                    "sqrt.s %[bx],  %[b]                    \n\t"
+                    "sqrt.s %[cx],  %[c]                    \n\t"
+                    "sqrt.s %[dx],  %[d]                    \n\t"
+                    "madd.s %[e],   %[e],   %[a],   %[a]    \n\t"
+                    "madd.s %[e],   %[e],   %[b],   %[b]    \n\t"
+                    "madd.s %[e],   %[e],   %[c],   %[c]    \n\t"
+                    "madd.s %[e],   %[e],   %[d],   %[d]    \n\t"
+                    "add.s  %[f],   %[f],   %[ax]           \n\t"
+                    "add.s  %[f],   %[f],   %[bx]           \n\t"
+                    "add.s  %[f],   %[f],   %[cx]           \n\t"
+                    "add.s  %[f],   %[f],   %[dx]           \n\t"
+
+                    : [a]"=&f"(a), [b]"=&f"(b),
+                      [c]"=&f"(c), [d]"=&f"(d),
+                      [e]"+f"(band->energy), [f]"+f"(form_factor),
+                      [ax]"=&f"(ax), [bx]"=&f"(bx),
+                      [cx]"=&f"(cx), [dx]"=&f"(dx)
+                    : [cf]"r"(cf)
+                    : "memory"
+                );
+            }
+
+            Temp = sqrtf((float)band_sizes[g] / band->energy);
+            band->thr      = band->energy * 0.001258925f;
+            band->nz_lines = form_factor * sqrtf(Temp);
+            start += band_sizes[g];
+        }
+    }
+}
+
+static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float * psy_fir_coeffs)
+{
+    float sum1, sum2, sum3, sum4;
+    float *fb = (float*)firbuf;
+    float *fb_end = fb + AAC_BLOCK_SIZE_LONG;
+    float *hp = hpfsmpl;
+
+    float coeff0 = psy_fir_coeffs[1];
+    float coeff1 = psy_fir_coeffs[3];
+    float coeff2 = psy_fir_coeffs[5];
+    float coeff3 = psy_fir_coeffs[7];
+    float coeff4 = psy_fir_coeffs[9];
+
+    __asm__ volatile (
+        ".set push                                          \n\t"
+        ".set noreorder                                     \n\t"
+
+        "li.s   $f12,       32768                           \n\t"
+        "1:                                                 \n\t"
+        "lwc1   $f0,        40(%[fb])                       \n\t"
+        "lwc1   $f1,        4(%[fb])                        \n\t"
+        "lwc1   $f2,        80(%[fb])                       \n\t"
+        "lwc1   $f3,        44(%[fb])                       \n\t"
+        "lwc1   $f4,        8(%[fb])                        \n\t"
+        "madd.s %[sum1],    $f0,        $f1,    %[coeff0]   \n\t"
+        "lwc1   $f5,        84(%[fb])                       \n\t"
+        "lwc1   $f6,        48(%[fb])                       \n\t"
+        "madd.s %[sum2],    $f3,        $f4,    %[coeff0]   \n\t"
+        "lwc1   $f7,        12(%[fb])                       \n\t"
+        "madd.s %[sum1],    %[sum1],    $f2,    %[coeff0]   \n\t"
+        "lwc1   $f8,        88(%[fb])                       \n\t"
+        "lwc1   $f9,        52(%[fb])                       \n\t"
+        "madd.s %[sum2],    %[sum2],    $f5,    %[coeff0]   \n\t"
+        "madd.s %[sum3],    $f6,        $f7,    %[coeff0]   \n\t"
+        "lwc1   $f10,       16(%[fb])                       \n\t"
+        "lwc1   $f11,       92(%[fb])                       \n\t"
+        "madd.s %[sum1],    %[sum1],    $f7,    %[coeff1]   \n\t"
+        "lwc1   $f1,        72(%[fb])                       \n\t"
+        "madd.s %[sum3],    %[sum3],    $f8,    %[coeff0]   \n\t"
+        "madd.s %[sum4],    $f9,        $f10,   %[coeff0]   \n\t"
+        "madd.s %[sum2],    %[sum2],    $f10,   %[coeff1]   \n\t"
+        "madd.s %[sum1],    %[sum1],    $f1,    %[coeff1]   \n\t"
+        "lwc1   $f4,        76(%[fb])                       \n\t"
+        "lwc1   $f8,        20(%[fb])                       \n\t"
+        "madd.s %[sum4],    %[sum4],    $f11,   %[coeff0]   \n\t"
+        "lwc1   $f11,       24(%[fb])                       \n\t"
+        "madd.s %[sum2],    %[sum2],    $f4,    %[coeff1]   \n\t"
+        "madd.s %[sum1],    %[sum1],    $f8,    %[coeff2]   \n\t"
+        "madd.s %[sum3],    %[sum3],    $f8,    %[coeff1]   \n\t"
+        "madd.s %[sum4],    %[sum4],    $f11,   %[coeff1]   \n\t"
+        "lwc1   $f7,        64(%[fb])                       \n\t"
+        "madd.s %[sum2],    %[sum2],    $f11,   %[coeff2]   \n\t"
+        "lwc1   $f10,       68(%[fb])                       \n\t"
+        "madd.s %[sum3],    %[sum3],    $f2,    %[coeff1]   \n\t"
+        "madd.s %[sum4],    %[sum4],    $f5,    %[coeff1]   \n\t"
+        "madd.s %[sum1],    %[sum1],    $f7,    %[coeff2]   \n\t"
+        "madd.s %[sum2],    %[sum2],    $f10,   %[coeff2]   \n\t"
+        "lwc1   $f2,        28(%[fb])                       \n\t"
+        "lwc1   $f5,        32(%[fb])                       \n\t"
+        "lwc1   $f8,        56(%[fb])                       \n\t"
+        "lwc1   $f11,       60(%[fb])                       \n\t"
+        "madd.s %[sum3],    %[sum3],    $f2,    %[coeff2]   \n\t"
+        "madd.s %[sum4],    %[sum4],    $f5,    %[coeff2]   \n\t"
+        "madd.s %[sum1],    %[sum1],    $f2,    %[coeff3]   \n\t"
+        "madd.s %[sum2],    %[sum2],    $f5,    %[coeff3]   \n\t"
+        "madd.s %[sum3],    %[sum3],    $f1,    %[coeff2]   \n\t"
+        "madd.s %[sum4],    %[sum4],    $f4,    %[coeff2]   \n\t"
+        "madd.s %[sum1],    %[sum1],    $f8,    %[coeff3]   \n\t"
+        "madd.s %[sum2],    %[sum2],    $f11,   %[coeff3]   \n\t"
+        "lwc1   $f1,        36(%[fb])                       \n\t"
+        "addiu  %[fb],      %[fb],      16                  \n\t"
+        "madd.s %[sum4],    %[sum4],    $f0,    %[coeff3]   \n\t"
+        "madd.s %[sum3],    %[sum3],    $f1,    %[coeff3]   \n\t"
+        "madd.s %[sum1],    %[sum1],    $f1,    %[coeff4]   \n\t"
+        "madd.s %[sum2],    %[sum2],    $f0,    %[coeff4]   \n\t"
+        "madd.s %[sum4],    %[sum4],    $f10,   %[coeff3]   \n\t"
+        "madd.s %[sum3],    %[sum3],    $f7,    %[coeff3]   \n\t"
+        "madd.s %[sum1],    %[sum1],    $f6,    %[coeff4]   \n\t"
+        "madd.s %[sum2],    %[sum2],    $f9,    %[coeff4]   \n\t"
+        "madd.s %[sum4],    %[sum4],    $f6,    %[coeff4]   \n\t"
+        "madd.s %[sum3],    %[sum3],    $f3,    %[coeff4]   \n\t"
+        "mul.s  %[sum1],    %[sum1],    $f12                \n\t"
+        "mul.s  %[sum2],    %[sum2],    $f12                \n\t"
+        "madd.s %[sum4],    %[sum4],    $f11,   %[coeff4]   \n\t"
+        "madd.s %[sum3],    %[sum3],    $f8,    %[coeff4]   \n\t"
+        "swc1   %[sum1],    0(%[hp])                        \n\t"
+        "swc1   %[sum2],    4(%[hp])                        \n\t"
+        "mul.s  %[sum4],    %[sum4],    $f12                \n\t"
+        "mul.s  %[sum3],    %[sum3],    $f12                \n\t"
+        "swc1   %[sum4],    12(%[hp])                       \n\t"
+        "swc1   %[sum3],    8(%[hp])                        \n\t"
+        "bne    %[fb],      %[fb_end],  1b                  \n\t"
+        " addiu %[hp],      %[hp],      16                  \n\t"
+
+        ".set pop                                           \n\t"
+
+        : [sum1]"=&f"(sum1), [sum2]"=&f"(sum2),
+          [sum3]"=&f"(sum3), [sum4]"=&f"(sum4),
+          [fb]"+r"(fb), [hp]"+r"(hp)
+        : [coeff0]"f"(coeff0), [coeff1]"f"(coeff1),
+          [coeff2]"f"(coeff2), [coeff3]"f"(coeff3),
+          [coeff4]"f"(coeff4), [fb_end]"r"(fb_end)
+        : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6",
+          "$f7", "$f8", "$f9", "$f10", "$f11", "$f12",
+          "memory"
+    );
+}
+
+#define calc_thr_3gpp calc_thr_3gpp_mips
+#define psy_hp_filter psy_hp_filter_mips
+
+#endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
+#endif /* AVCODEC_MIPS_AACPSY_MIPS_H */
diff --git a/libavcodec/mips/aacsbr_mips.c b/libavcodec/mips/aacsbr_mips.c
new file mode 100644
index 0000000..53a5fd0
--- /dev/null
+++ b/libavcodec/mips/aacsbr_mips.c
@@ -0,0 +1,618 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Djordje Pesut   (djordje@mips.com)
+ *           Mirjana Vulin   (mvulin@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/aacsbr.c
+ */
+
+#include "libavcodec/aac.h"
+#include "libavcodec/aacsbr.h"
+
+#define ENVELOPE_ADJUSTMENT_OFFSET 2
+
+#if HAVE_INLINE_ASM
+static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
+                      float X_low[32][40][2], const float W[2][32][32][2],
+                      int buf_idx)
+{
+    int i, k;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+    float *p_x_low = &X_low[0][8][0];
+    float *p_w = (float*)&W[buf_idx][0][0][0];
+    float *p_x1_low = &X_low[0][0][0];
+    float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
+
+    float *loop_end=p_x1_low + 2560;
+
+    /* loop unrolled 8 times */
+    __asm__ volatile (
+    "1:                                                 \n\t"
+        "sw     $0,            0(%[p_x1_low])           \n\t"
+        "sw     $0,            4(%[p_x1_low])           \n\t"
+        "sw     $0,            8(%[p_x1_low])           \n\t"
+        "sw     $0,            12(%[p_x1_low])          \n\t"
+        "sw     $0,            16(%[p_x1_low])          \n\t"
+        "sw     $0,            20(%[p_x1_low])          \n\t"
+        "sw     $0,            24(%[p_x1_low])          \n\t"
+        "sw     $0,            28(%[p_x1_low])          \n\t"
+        "addiu  %[p_x1_low],   %[p_x1_low],      32     \n\t"
+        "bne    %[p_x1_low],   %[loop_end],      1b     \n\t"
+        "addiu  %[p_x1_low],   %[p_x1_low],      -10240 \n\t"
+
+        : [p_x1_low]"+r"(p_x1_low)
+        : [loop_end]"r"(loop_end)
+        : "memory"
+    );
+
+    for (k = 0; k < sbr->kx[1]; k++) {
+        for (i = 0; i < 32; i+=4) {
+            /* loop unrolled 4 times */
+            __asm__ volatile (
+                "lw     %[temp0],   0(%[p_w])               \n\t"
+                "lw     %[temp1],   4(%[p_w])               \n\t"
+                "lw     %[temp2],   256(%[p_w])             \n\t"
+                "lw     %[temp3],   260(%[p_w])             \n\t"
+                "lw     %[temp4],   512(%[p_w])             \n\t"
+                "lw     %[temp5],   516(%[p_w])             \n\t"
+                "lw     %[temp6],   768(%[p_w])             \n\t"
+                "lw     %[temp7],   772(%[p_w])             \n\t"
+                "sw     %[temp0],   0(%[p_x_low])           \n\t"
+                "sw     %[temp1],   4(%[p_x_low])           \n\t"
+                "sw     %[temp2],   8(%[p_x_low])           \n\t"
+                "sw     %[temp3],   12(%[p_x_low])          \n\t"
+                "sw     %[temp4],   16(%[p_x_low])          \n\t"
+                "sw     %[temp5],   20(%[p_x_low])          \n\t"
+                "sw     %[temp6],   24(%[p_x_low])          \n\t"
+                "sw     %[temp7],   28(%[p_x_low])          \n\t"
+                "addiu  %[p_x_low], %[p_x_low],     32      \n\t"
+                "addiu  %[p_w],     %[p_w],         1024    \n\t"
+
+                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+                  [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
+                :
+                : "memory"
+            );
+        }
+        p_x_low += 16;
+        p_w -= 2046;
+    }
+
+    for (k = 0; k < sbr->kx[0]; k++) {
+        for (i = 0; i < 2; i++) {
+
+            /* loop unrolled 4 times */
+            __asm__ volatile (
+                "lw     %[temp0],    0(%[p_w1])             \n\t"
+                "lw     %[temp1],    4(%[p_w1])             \n\t"
+                "lw     %[temp2],    256(%[p_w1])           \n\t"
+                "lw     %[temp3],    260(%[p_w1])           \n\t"
+                "lw     %[temp4],    512(%[p_w1])           \n\t"
+                "lw     %[temp5],    516(%[p_w1])           \n\t"
+                "lw     %[temp6],    768(%[p_w1])           \n\t"
+                "lw     %[temp7],    772(%[p_w1])           \n\t"
+                "sw     %[temp0],    0(%[p_x1_low])         \n\t"
+                "sw     %[temp1],    4(%[p_x1_low])         \n\t"
+                "sw     %[temp2],    8(%[p_x1_low])         \n\t"
+                "sw     %[temp3],    12(%[p_x1_low])        \n\t"
+                "sw     %[temp4],    16(%[p_x1_low])        \n\t"
+                "sw     %[temp5],    20(%[p_x1_low])        \n\t"
+                "sw     %[temp6],    24(%[p_x1_low])        \n\t"
+                "sw     %[temp7],    28(%[p_x1_low])        \n\t"
+                "addiu  %[p_x1_low], %[p_x1_low],   32      \n\t"
+                "addiu  %[p_w1],     %[p_w1],       1024    \n\t"
+
+                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
+                  [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
+                :
+                : "memory"
+            );
+        }
+        p_x1_low += 64;
+        p_w1 -= 510;
+    }
+    return 0;
+}
+
+static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
+                     const float Y0[38][64][2], const float Y1[38][64][2],
+                     const float X_low[32][40][2], int ch)
+{
+    int k, i;
+    const int i_f = 32;
+    int temp0, temp1, temp2, temp3;
+    const float *X_low1, *Y01, *Y11;
+    float *x1=&X[0][0][0];
+    float *j=x1+4864;
+    const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
+
+    /* loop unrolled 8 times */
+    __asm__ volatile (
+    "1:                                       \n\t"
+        "sw     $0,      0(%[x1])             \n\t"
+        "sw     $0,      4(%[x1])             \n\t"
+        "sw     $0,      8(%[x1])             \n\t"
+        "sw     $0,      12(%[x1])            \n\t"
+        "sw     $0,      16(%[x1])            \n\t"
+        "sw     $0,      20(%[x1])            \n\t"
+        "sw     $0,      24(%[x1])            \n\t"
+        "sw     $0,      28(%[x1])            \n\t"
+        "addiu  %[x1],   %[x1],      32       \n\t"
+        "bne    %[x1],   %[j],       1b       \n\t"
+        "addiu  %[x1],   %[x1],      -19456   \n\t"
+
+        : [x1]"+r"(x1)
+        : [j]"r"(j)
+        : "memory"
+    );
+
+    if (i_Temp != 0) {
+
+        X_low1=&X_low[0][2][0];
+
+        for (k = 0; k < sbr->kx[0]; k++) {
+
+            __asm__ volatile (
+                "move    %[i],        $zero                  \n\t"
+            "2:                                              \n\t"
+                "lw      %[temp0],    0(%[X_low1])           \n\t"
+                "lw      %[temp1],    4(%[X_low1])           \n\t"
+                "sw      %[temp0],    0(%[x1])               \n\t"
+                "sw      %[temp1],    9728(%[x1])            \n\t"
+                "addiu   %[x1],       %[x1],         256     \n\t"
+                "addiu   %[X_low1],   %[X_low1],     8       \n\t"
+                "addiu   %[i],        %[i],          1       \n\t"
+                "bne     %[i],        %[i_Temp],     2b      \n\t"
+
+                : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
+                  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
+                : [i_Temp]"r"(i_Temp)
+                : "memory"
+            );
+            x1-=(i_Temp<<6)-1;
+            X_low1-=(i_Temp<<1)-80;
+        }
+
+        x1=&X[0][0][k];
+        Y01=(float*)&Y0[32][k][0];
+
+        for (; k < sbr->kx[0] + sbr->m[0]; k++) {
+            __asm__ volatile (
+                "move    %[i],       $zero               \n\t"
+            "3:                                          \n\t"
+                "lw      %[temp0],   0(%[Y01])           \n\t"
+                "lw      %[temp1],   4(%[Y01])           \n\t"
+                "sw      %[temp0],   0(%[x1])            \n\t"
+                "sw      %[temp1],   9728(%[x1])         \n\t"
+                "addiu   %[x1],      %[x1],      256     \n\t"
+                "addiu   %[Y01],     %[Y01],     512     \n\t"
+                "addiu   %[i],       %[i],       1       \n\t"
+                "bne     %[i],       %[i_Temp],  3b      \n\t"
+
+                : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
+                  [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
+                : [i_Temp]"r"(i_Temp)
+                : "memory"
+            );
+            x1 -=(i_Temp<<6)-1;
+            Y01 -=(i_Temp<<7)-2;
+        }
+    }
+
+    x1=&X[0][i_Temp][0];
+    X_low1=&X_low[0][i_Temp+2][0];
+    temp3=38;
+
+    for (k = 0; k < sbr->kx[1]; k++) {
+
+        __asm__ volatile (
+            "move    %[i],       %[i_Temp]              \n\t"
+        "4:                                             \n\t"
+            "lw      %[temp0],   0(%[X_low1])           \n\t"
+            "lw      %[temp1],   4(%[X_low1])           \n\t"
+            "sw      %[temp0],   0(%[x1])               \n\t"
+            "sw      %[temp1],   9728(%[x1])            \n\t"
+            "addiu   %[x1],      %[x1],         256     \n\t"
+            "addiu   %[X_low1],  %[X_low1],     8       \n\t"
+            "addiu   %[i],       %[i],          1       \n\t"
+            "bne     %[i],       %[temp3],      4b      \n\t"
+
+            : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
+              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2)
+            : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
+            : "memory"
+        );
+        x1 -= ((38-i_Temp)<<6)-1;
+        X_low1 -= ((38-i_Temp)<<1)- 80;
+    }
+
+    x1=&X[0][i_Temp][k];
+    Y11=&Y1[i_Temp][k][0];
+    temp2=32;
+
+    for (; k < sbr->kx[1] + sbr->m[1]; k++) {
+
+        __asm__ volatile (
+           "move    %[i],       %[i_Temp]               \n\t"
+        "5:                                             \n\t"
+           "lw      %[temp0],   0(%[Y11])               \n\t"
+           "lw      %[temp1],   4(%[Y11])               \n\t"
+           "sw      %[temp0],   0(%[x1])                \n\t"
+           "sw      %[temp1],   9728(%[x1])             \n\t"
+           "addiu   %[x1],      %[x1],          256     \n\t"
+           "addiu   %[Y11],     %[Y11],         512     \n\t"
+           "addiu   %[i],       %[i],           1       \n\t"
+           "bne     %[i],       %[temp2],       5b      \n\t"
+
+           : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
+             [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
+           : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
+             [temp2]"r"(temp2)
+           : "memory"
+        );
+
+        x1 -= ((32-i_Temp)<<6)-1;
+        Y11 -= ((32-i_Temp)<<7)-2;
+   }
+      return 0;
+}
+
+#if HAVE_MIPSFPU
+static void sbr_hf_assemble_mips(float Y1[38][64][2],
+                            const float X_high[64][40][2],
+                            SpectralBandReplication *sbr, SBRData *ch_data,
+                            const int e_a[2])
+{
+    int e, i, j, m;
+    const int h_SL = 4 * !sbr->bs_smoothing_mode;
+    const int kx = sbr->kx[1];
+    const int m_max = sbr->m[1];
+    static const float h_smooth[5] = {
+        0.33333333333333,
+        0.30150283239582,
+        0.21816949906249,
+        0.11516383427084,
+        0.03183050093751,
+    };
+
+    float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
+    int indexnoise = ch_data->f_indexnoise;
+    int indexsine  = ch_data->f_indexsine;
+    float *g_temp1, *q_temp1, *pok, *pok1;
+    float temp1, temp2, temp3, temp4;
+    int size = m_max;
+
+    if (sbr->reset) {
+        for (i = 0; i < h_SL; i++) {
+            memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
+            memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0],  m_max * sizeof(sbr->q_m[0][0]));
+        }
+    } else if (h_SL) {
+        memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
+        memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
+    }
+
+    for (e = 0; e < ch_data->bs_num_env; e++) {
+        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
+            g_temp1 = g_temp[h_SL + i];
+            pok = sbr->gain[e];
+            q_temp1 = q_temp[h_SL + i];
+            pok1 = sbr->q_m[e];
+
+            /* loop unrolled 4 times */
+            for (j=0; j<(size>>2); j++) {
+                __asm__ volatile (
+                    "lw      %[temp1],   0(%[pok])               \n\t"
+                    "lw      %[temp2],   4(%[pok])               \n\t"
+                    "lw      %[temp3],   8(%[pok])               \n\t"
+                    "lw      %[temp4],   12(%[pok])              \n\t"
+                    "sw      %[temp1],   0(%[g_temp1])           \n\t"
+                    "sw      %[temp2],   4(%[g_temp1])           \n\t"
+                    "sw      %[temp3],   8(%[g_temp1])           \n\t"
+                    "sw      %[temp4],   12(%[g_temp1])          \n\t"
+                    "lw      %[temp1],   0(%[pok1])              \n\t"
+                    "lw      %[temp2],   4(%[pok1])              \n\t"
+                    "lw      %[temp3],   8(%[pok1])              \n\t"
+                    "lw      %[temp4],   12(%[pok1])             \n\t"
+                    "sw      %[temp1],   0(%[q_temp1])           \n\t"
+                    "sw      %[temp2],   4(%[q_temp1])           \n\t"
+                    "sw      %[temp3],   8(%[q_temp1])           \n\t"
+                    "sw      %[temp4],   12(%[q_temp1])          \n\t"
+                    "addiu   %[pok],     %[pok],           16    \n\t"
+                    "addiu   %[g_temp1], %[g_temp1],       16    \n\t"
+                    "addiu   %[pok1],    %[pok1],          16    \n\t"
+                    "addiu   %[q_temp1], %[q_temp1],       16    \n\t"
+
+                    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+                      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
+                      [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
+                      [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
+                    :
+                    : "memory"
+                );
+            }
+
+            for (j=0; j<(size&3); j++) {
+                __asm__ volatile (
+                    "lw      %[temp1],   0(%[pok])              \n\t"
+                    "lw      %[temp2],   0(%[pok1])             \n\t"
+                    "sw      %[temp1],   0(%[g_temp1])          \n\t"
+                    "sw      %[temp2],   0(%[q_temp1])          \n\t"
+                    "addiu   %[pok],     %[pok],          4     \n\t"
+                    "addiu   %[g_temp1], %[g_temp1],      4     \n\t"
+                    "addiu   %[pok1],    %[pok1],         4     \n\t"
+                    "addiu   %[q_temp1], %[q_temp1],      4     \n\t"
+
+                    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+                      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
+                      [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
+                      [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
+                    :
+                    : "memory"
+                );
+            }
+        }
+    }
+
+    for (e = 0; e < ch_data->bs_num_env; e++) {
+        for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
+            LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
+            LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
+            float *g_filt, *q_filt;
+
+            if (h_SL && e != e_a[0] && e != e_a[1]) {
+                g_filt = g_filt_tab;
+                q_filt = q_filt_tab;
+
+                for (m = 0; m < m_max; m++) {
+                    const int idx1 = i + h_SL;
+                    g_filt[m] = 0.0f;
+                    q_filt[m] = 0.0f;
+
+                    for (j = 0; j <= h_SL; j++) {
+                        g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
+                        q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
+                    }
+                }
+            } else {
+                g_filt = g_temp[i + h_SL];
+                q_filt = q_temp[i];
+            }
+
+            sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
+                               i + ENVELOPE_ADJUSTMENT_OFFSET);
+
+            if (e != e_a[0] && e != e_a[1]) {
+                sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
+                                                   q_filt, indexnoise,
+                                                   kx, m_max);
+            } else {
+                int idx = indexsine&1;
+                int A = (1-((indexsine+(kx & 1))&2));
+                int B = (A^(-idx)) + idx;
+                float *out = &Y1[i][kx][idx];
+                float *in  = sbr->s_m[e];
+                float temp0, temp1, temp2, temp3, temp4, temp5;
+                float A_f = (float)A;
+                float B_f = (float)B;
+
+                for (m = 0; m+1 < m_max; m+=2) {
+
+                    temp2 = out[0];
+                    temp3 = out[2];
+
+                    __asm__ volatile(
+                        "lwc1    %[temp0],  0(%[in])                     \n\t"
+                        "lwc1    %[temp1],  4(%[in])                     \n\t"
+                        "madd.s  %[temp4],  %[temp2],  %[temp0], %[A_f]  \n\t"
+                        "madd.s  %[temp5],  %[temp3],  %[temp1], %[B_f]  \n\t"
+                        "swc1    %[temp4],  0(%[out])                    \n\t"
+                        "swc1    %[temp5],  8(%[out])                    \n\t"
+                        "addiu   %[in],     %[in],     8                 \n\t"
+                        "addiu   %[out],    %[out],    16                \n\t"
+
+                        : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
+                          [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
+                          [in]"+r"(in), [out]"+r"(out)
+                        : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
+                          [temp3]"f"(temp3)
+                        : "memory"
+                    );
+                }
+                if(m_max&1)
+                    out[2*m  ] += in[m  ] * A;
+            }
+            indexnoise = (indexnoise + m_max) & 0x1ff;
+            indexsine = (indexsine + 1) & 3;
+        }
+    }
+    ch_data->f_indexnoise = indexnoise;
+    ch_data->f_indexsine  = indexsine;
+}
+
+static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
+                                  float (*alpha0)[2], float (*alpha1)[2],
+                                  const float X_low[32][40][2], int k0)
+{
+    int k;
+    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
+    float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
+
+    c = 1.000001f;
+
+    for (k = 0; k < k0; k++) {
+        LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
+        float dk;
+        phi1 = &phi[0][0][0];
+        alpha_1 = &alpha1[k][0];
+        alpha_0 = &alpha0[k][0];
+        dsp->autocorrelate(X_low[k], phi);
+
+        __asm__ volatile (
+            "lwc1    %[temp0],  40(%[phi1])                       \n\t"
+            "lwc1    %[temp1],  16(%[phi1])                       \n\t"
+            "lwc1    %[temp2],  24(%[phi1])                       \n\t"
+            "lwc1    %[temp3],  28(%[phi1])                       \n\t"
+            "mul.s   %[dk],     %[temp0],    %[temp1]             \n\t"
+            "lwc1    %[temp4],  0(%[phi1])                        \n\t"
+            "mul.s   %[res2],   %[temp2],    %[temp2]             \n\t"
+            "lwc1    %[temp5],  4(%[phi1])                        \n\t"
+            "madd.s  %[res2],   %[res2],     %[temp3],  %[temp3]  \n\t"
+            "lwc1    %[temp6],  8(%[phi1])                        \n\t"
+            "div.s   %[res2],   %[res2],     %[c]                 \n\t"
+            "lwc1    %[temp0],  12(%[phi1])                       \n\t"
+            "sub.s   %[dk],     %[dk],       %[res2]              \n\t"
+
+            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+              [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
+            : [phi1]"r"(phi1), [c]"f"(c)
+            : "memory"
+        );
+
+        if (!dk) {
+            alpha_1[0] = 0;
+            alpha_1[1] = 0;
+        } else {
+            __asm__ volatile (
+                "mul.s   %[temp_real], %[temp4],     %[temp2]            \n\t"
+                "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3]  \n\t"
+                "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1]  \n\t"
+                "mul.s   %[temp_im],   %[temp4],     %[temp3]            \n\t"
+                "madd.s  %[temp_im],   %[temp_im],   %[temp5], %[temp2]  \n\t"
+                "nmsub.s %[temp_im],   %[temp_im],   %[temp0], %[temp1]  \n\t"
+                "div.s   %[temp_real], %[temp_real], %[dk]               \n\t"
+                "div.s   %[temp_im],   %[temp_im],   %[dk]               \n\t"
+                "swc1    %[temp_real], 0(%[alpha_1])                     \n\t"
+                "swc1    %[temp_im],   4(%[alpha_1])                     \n\t"
+
+                : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
+                : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
+                  [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
+                  [temp5]"f"(temp5), [temp6]"f"(temp6),
+                  [alpha_1]"r"(alpha_1), [dk]"f"(dk)
+                : "memory"
+            );
+        }
+
+        if (!phi1[4]) {
+            alpha_0[0] = 0;
+            alpha_0[1] = 0;
+        } else {
+            __asm__ volatile (
+                "lwc1    %[temp6],     0(%[alpha_1])                     \n\t"
+                "lwc1    %[temp7],     4(%[alpha_1])                     \n\t"
+                "mul.s   %[temp_real], %[temp6],     %[temp2]            \n\t"
+                "add.s   %[temp_real], %[temp_real], %[temp4]            \n\t"
+                "madd.s  %[temp_real], %[temp_real], %[temp7], %[temp3]  \n\t"
+                "mul.s   %[temp_im],   %[temp7],     %[temp2]            \n\t"
+                "add.s   %[temp_im],   %[temp_im],   %[temp5]            \n\t"
+                "nmsub.s %[temp_im],   %[temp_im],   %[temp6], %[temp3]  \n\t"
+                "div.s   %[temp_real], %[temp_real], %[temp1]            \n\t"
+                "div.s   %[temp_im],   %[temp_im],   %[temp1]            \n\t"
+                "neg.s   %[temp_real], %[temp_real]                      \n\t"
+                "neg.s   %[temp_im],   %[temp_im]                        \n\t"
+                "swc1    %[temp_real], 0(%[alpha_0])                     \n\t"
+                "swc1    %[temp_im],   4(%[alpha_0])                     \n\t"
+
+                : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
+                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
+                  [res1]"=&f"(res1), [res2]"=&f"(res2)
+                : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
+                  [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
+                  [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
+                : "memory"
+            );
+        }
+
+        __asm__ volatile (
+            "lwc1    %[temp1],      0(%[alpha_1])                           \n\t"
+            "lwc1    %[temp2],      4(%[alpha_1])                           \n\t"
+            "lwc1    %[temp_real],  0(%[alpha_0])                           \n\t"
+            "lwc1    %[temp_im],    4(%[alpha_0])                           \n\t"
+            "mul.s   %[res1],       %[temp1],      %[temp1]                 \n\t"
+            "madd.s  %[res1],       %[res1],       %[temp2],    %[temp2]    \n\t"
+            "mul.s   %[res2],       %[temp_real],  %[temp_real]             \n\t"
+            "madd.s  %[res2],       %[res2],       %[temp_im],  %[temp_im]  \n\t"
+
+            : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
+              [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [res1]"=&f"(res1), [res2]"=&f"(res2)
+            : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
+            : "memory"
+        );
+
+        if (res1 >= 16.0f || res2 >= 16.0f) {
+            alpha_1[0] = 0;
+            alpha_1[1] = 0;
+            alpha_0[0] = 0;
+            alpha_0[1] = 0;
+        }
+    }
+}
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+
+void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
+{
+#if HAVE_INLINE_ASM
+    c->sbr_lf_gen            = sbr_lf_gen_mips;
+    c->sbr_x_gen             = sbr_x_gen_mips;
+#if HAVE_MIPSFPU
+    c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
+    c->sbr_hf_assemble       = sbr_hf_assemble_mips;
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/mips/aacsbr_mips.h b/libavcodec/mips/aacsbr_mips.h
new file mode 100644
index 0000000..8e6ad7d
--- /dev/null
+++ b/libavcodec/mips/aacsbr_mips.h
@@ -0,0 +1,493 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Djordje Pesut   (djordje@mips.com)
+ *           Mirjana Vulin   (mvulin@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/aacsbr.c
+ */
+
+#ifndef AVCODEC_MIPS_AACSBR_FLOAT_H
+#define AVCODEC_MIPS_AACSBR_FLOAT_H
+
+#include "libavcodec/aac.h"
+#include "libavcodec/sbr.h"
+
+#if HAVE_INLINE_ASM
+static void sbr_qmf_analysis_mips(AVFloatDSPContext *fdsp, FFTContext *mdct,
+                             SBRDSPContext *sbrdsp, const float *in, float *x,
+                             float z[320], float W[2][32][32][2], int buf_idx)
+{
+    int i;
+    float *w0;
+    float *w1;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    w0 = x;
+    w1 = x + 1024;
+    for(i = 0; i < 36; i++)
+    {
+        /* loop unrolled 8 times */
+        __asm__ volatile(
+            "lw      %[temp0],   0(%[w1])         \n\t"
+            "lw      %[temp1],   4(%[w1])         \n\t"
+            "lw      %[temp2],   8(%[w1])         \n\t"
+            "lw      %[temp3],   12(%[w1])        \n\t"
+            "lw      %[temp4],   16(%[w1])        \n\t"
+            "lw      %[temp5],   20(%[w1])        \n\t"
+            "lw      %[temp6],   24(%[w1])        \n\t"
+            "lw      %[temp7],   28(%[w1])        \n\t"
+            "sw      %[temp0],   0(%[w0])         \n\t"
+            "sw      %[temp1],   4(%[w0])         \n\t"
+            "sw      %[temp2],   8(%[w0])         \n\t"
+            "sw      %[temp3],   12(%[w0])        \n\t"
+            "sw      %[temp4],   16(%[w0])        \n\t"
+            "sw      %[temp5],   20(%[w0])        \n\t"
+            "sw      %[temp6],   24(%[w0])        \n\t"
+            "sw      %[temp7],   28(%[w0])        \n\t"
+            "addiu   %[w0],      %[w0],     32    \n\t"
+            "addiu   %[w1],      %[w1],     32    \n\t"
+
+            : [w0]"+r"(w0), [w1]"+r"(w1),
+              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
+            :
+            : "memory"
+        );
+    }
+
+    w0 = x + 288;
+    w1 = (float*)in;
+    for(i = 0; i < 128; i++)
+    {
+        /* loop unrolled 8 times */
+        __asm__ volatile(
+            "lw       %[temp0],    0(%[w1])        \n\t"
+            "lw       %[temp1],    4(%[w1])        \n\t"
+            "lw       %[temp2],    8(%[w1])        \n\t"
+            "lw       %[temp3],    12(%[w1])       \n\t"
+            "lw       %[temp4],    16(%[w1])       \n\t"
+            "lw       %[temp5],    20(%[w1])       \n\t"
+            "lw       %[temp6],    24(%[w1])       \n\t"
+            "lw       %[temp7],    28(%[w1])       \n\t"
+            "sw       %[temp0],    0(%[w0])        \n\t"
+            "sw       %[temp1],    4(%[w0])        \n\t"
+            "sw       %[temp2],    8(%[w0])        \n\t"
+            "sw       %[temp3],    12(%[w0])       \n\t"
+            "sw       %[temp4],    16(%[w0])       \n\t"
+            "sw       %[temp5],    20(%[w0])       \n\t"
+            "sw       %[temp6],    24(%[w0])       \n\t"
+            "sw       %[temp7],    28(%[w0])       \n\t"
+            "addiu    %[w0],       %[w0],     32   \n\t"
+            "addiu    %[w1],       %[w1],     32   \n\t"
+
+            : [w0]"+r"(w0), [w1]"+r"(w1),
+              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
+            :
+            : "memory"
+        );
+    }
+
+    for (i = 0; i < 32; i++) { // numTimeSlots*RATE = 16*2 as 960 sample frames
+                               // are not supported
+        fdsp->vector_fmul_reverse(z, sbr_qmf_window_ds, x, 320);
+        sbrdsp->sum64x5(z);
+        sbrdsp->qmf_pre_shuffle(z);
+        mdct->imdct_half(mdct, z, z+64);
+        sbrdsp->qmf_post_shuffle(W[buf_idx][i], z);
+        x += 32;
+    }
+}
+
+#if HAVE_MIPSFPU
+static void sbr_qmf_synthesis_mips(FFTContext *mdct,
+                              SBRDSPContext *sbrdsp, AVFloatDSPContext *fdsp,
+                              float *out, float X[2][38][64],
+                              float mdct_buf[2][64],
+                              float *v0, int *v_off, const unsigned int div)
+{
+    int i, n;
+    const float *sbr_qmf_window = div ? sbr_qmf_window_ds : sbr_qmf_window_us;
+    const int step = 128 >> div;
+    float *v;
+    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13;
+    float temp14, temp15, temp16, temp17, temp18, temp19;
+    float *vv0, *s0, *dst;
+    dst = out;
+
+    for (i = 0; i < 32; i++) {
+        if (*v_off < step) {
+            int saved_samples = (1280 - 128) >> div;
+            memcpy(&v0[SBR_SYNTHESIS_BUF_SIZE - saved_samples], v0, saved_samples * sizeof(float));
+            *v_off = SBR_SYNTHESIS_BUF_SIZE - saved_samples - step;
+        } else {
+            *v_off -= step;
+        }
+        v = v0 + *v_off;
+        if (div) {
+            for (n = 0; n < 32; n++) {
+                X[0][i][   n] = -X[0][i][n];
+                X[0][i][32+n] =  X[1][i][31-n];
+            }
+            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
+            sbrdsp->qmf_deint_neg(v, mdct_buf[0]);
+        } else {
+            sbrdsp->neg_odd_64(X[1][i]);
+            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
+            mdct->imdct_half(mdct, mdct_buf[1], X[1][i]);
+            sbrdsp->qmf_deint_bfly(v, mdct_buf[1], mdct_buf[0]);
+        }
+
+        if(div == 0)
+        {
+            float *v0_end;
+            vv0 = v;
+            v0_end = v + 60;
+            s0 = (float*)sbr_qmf_window;
+
+            /* 10 calls of function vector_fmul_add merged into one loop
+               and loop unrolled 4 times */
+            __asm__ volatile(
+                ".set    push                                           \n\t"
+                ".set    noreorder                                      \n\t"
+                "lwc1    %[temp4],   0(%[v0])                           \n\t"
+                "lwc1    %[temp5],   0(%[s0])                           \n\t"
+                "lwc1    %[temp6],   4(%[v0])                           \n\t"
+                "lwc1    %[temp7],   4(%[s0])                           \n\t"
+                "lwc1    %[temp8],   8(%[v0])                           \n\t"
+                "lwc1    %[temp9],   8(%[s0])                           \n\t"
+                "lwc1    %[temp10],  12(%[v0])                          \n\t"
+                "lwc1    %[temp11],  12(%[s0])                          \n\t"
+                "lwc1    %[temp12],  768(%[v0])                         \n\t"
+                "lwc1    %[temp13],  256(%[s0])                         \n\t"
+                "lwc1    %[temp14],  772(%[v0])                         \n\t"
+                "lwc1    %[temp15],  260(%[s0])                         \n\t"
+                "lwc1    %[temp16],  776(%[v0])                         \n\t"
+                "lwc1    %[temp17],  264(%[s0])                         \n\t"
+                "lwc1    %[temp18],  780(%[v0])                         \n\t"
+                "lwc1    %[temp19],  268(%[s0])                         \n\t"
+            "1:                                                         \n\t"
+                "mul.s   %[temp0],   %[temp4],   %[temp5]               \n\t"
+                "lwc1    %[temp4],   1024(%[v0])                        \n\t"
+                "mul.s   %[temp1],   %[temp6],   %[temp7]               \n\t"
+                "lwc1    %[temp5],   512(%[s0])                         \n\t"
+                "mul.s   %[temp2],   %[temp8],   %[temp9]               \n\t"
+                "lwc1    %[temp6],   1028(%[v0])                        \n\t"
+                "mul.s   %[temp3],   %[temp10],  %[temp11]              \n\t"
+                "lwc1    %[temp7],   516(%[s0])                         \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   1032(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   520(%[s0])                         \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  1036(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  524(%[s0])                         \n\t"
+                "lwc1    %[temp12],  1792(%[v0])                        \n\t"
+                "lwc1    %[temp13],  768(%[s0])                         \n\t"
+                "lwc1    %[temp14],  1796(%[v0])                        \n\t"
+                "lwc1    %[temp15],  772(%[s0])                         \n\t"
+                "lwc1    %[temp16],  1800(%[v0])                        \n\t"
+                "lwc1    %[temp17],  776(%[s0])                         \n\t"
+                "lwc1    %[temp18],  1804(%[v0])                        \n\t"
+                "lwc1    %[temp19],  780(%[s0])                         \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
+                "lwc1    %[temp4],   2048(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
+                "lwc1    %[temp5],   1024(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
+                "lwc1    %[temp6],   2052(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
+                "lwc1    %[temp7],   1028(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   2056(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   1032(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  2060(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  1036(%[s0])                        \n\t"
+                "lwc1    %[temp12],  2816(%[v0])                        \n\t"
+                "lwc1    %[temp13],  1280(%[s0])                        \n\t"
+                "lwc1    %[temp14],  2820(%[v0])                        \n\t"
+                "lwc1    %[temp15],  1284(%[s0])                        \n\t"
+                "lwc1    %[temp16],  2824(%[v0])                        \n\t"
+                "lwc1    %[temp17],  1288(%[s0])                        \n\t"
+                "lwc1    %[temp18],  2828(%[v0])                        \n\t"
+                "lwc1    %[temp19],  1292(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
+                "lwc1    %[temp4],   3072(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
+                "lwc1    %[temp5],   1536(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
+                "lwc1    %[temp6],   3076(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
+                "lwc1    %[temp7],   1540(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   3080(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   1544(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  3084(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  1548(%[s0])                        \n\t"
+                "lwc1    %[temp12],  3840(%[v0])                        \n\t"
+                "lwc1    %[temp13],  1792(%[s0])                        \n\t"
+                "lwc1    %[temp14],  3844(%[v0])                        \n\t"
+                "lwc1    %[temp15],  1796(%[s0])                        \n\t"
+                "lwc1    %[temp16],  3848(%[v0])                        \n\t"
+                "lwc1    %[temp17],  1800(%[s0])                        \n\t"
+                "lwc1    %[temp18],  3852(%[v0])                        \n\t"
+                "lwc1    %[temp19],  1804(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
+                "lwc1    %[temp4],   4096(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
+                "lwc1    %[temp5],   2048(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
+                "lwc1    %[temp6],   4100(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
+                "lwc1    %[temp7],   2052(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   4104(%[v0])                        \n\t"
+                "addiu   %[dst],     %[dst],     16                     \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   2056(%[s0])                        \n\t"
+                "addiu   %[s0],      %[s0],      16                     \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  4108(%[v0])                        \n\t"
+                "addiu   %[v0],      %[v0],      16                     \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  2044(%[s0])                        \n\t"
+                "lwc1    %[temp12],  4848(%[v0])                        \n\t"
+                "lwc1    %[temp13],  2288(%[s0])                        \n\t"
+                "lwc1    %[temp14],  4852(%[v0])                        \n\t"
+                "lwc1    %[temp15],  2292(%[s0])                        \n\t"
+                "lwc1    %[temp16],  4856(%[v0])                        \n\t"
+                "lwc1    %[temp17],  2296(%[s0])                        \n\t"
+                "lwc1    %[temp18],  4860(%[v0])                        \n\t"
+                "lwc1    %[temp19],  2300(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
+                "lwc1    %[temp4],   0(%[v0])                           \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
+                "lwc1    %[temp5],   0(%[s0])                           \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
+                "lwc1    %[temp6],   4(%[v0])                           \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
+                "lwc1    %[temp7],   4(%[s0])                           \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   8(%[v0])                           \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   8(%[s0])                           \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  12(%[v0])                          \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  12(%[s0])                          \n\t"
+                "lwc1    %[temp12],  768(%[v0])                         \n\t"
+                "lwc1    %[temp13],  256(%[s0])                         \n\t"
+                "lwc1    %[temp14],  772(%[v0])                         \n\t"
+                "lwc1    %[temp15],  260(%[s0])                         \n\t"
+                "lwc1    %[temp16],  776(%[v0])                         \n\t"
+                "lwc1    %[temp17],  264(%[s0])                         \n\t"
+                "lwc1    %[temp18],  780(%[v0])                         \n\t"
+                "lwc1    %[temp19],  268(%[s0])                         \n\t"
+                "swc1    %[temp0],   -16(%[dst])                        \n\t"
+                "swc1    %[temp1],   -12(%[dst])                        \n\t"
+                "swc1    %[temp2],   -8(%[dst])                         \n\t"
+                "bne     %[v0],      %[v0_end],  1b                     \n\t"
+                " swc1   %[temp3],   -4(%[dst])                         \n\t"
+                "mul.s   %[temp0],   %[temp4],   %[temp5]               \n\t"
+                "lwc1    %[temp4],   1024(%[v0])                        \n\t"
+                "mul.s   %[temp1],   %[temp6],   %[temp7]               \n\t"
+                "lwc1    %[temp5],   512(%[s0])                         \n\t"
+                "mul.s   %[temp2],   %[temp8],   %[temp9]               \n\t"
+                "lwc1    %[temp6],   1028(%[v0])                        \n\t"
+                "mul.s   %[temp3],   %[temp10],  %[temp11]              \n\t"
+                "lwc1    %[temp7],   516(%[s0])                         \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   1032(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   520(%[s0])                         \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  1036(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  524(%[s0])                         \n\t"
+                "lwc1    %[temp12],  1792(%[v0])                        \n\t"
+                "lwc1    %[temp13],  768(%[s0])                         \n\t"
+                "lwc1    %[temp14],  1796(%[v0])                        \n\t"
+                "lwc1    %[temp15],  772(%[s0])                         \n\t"
+                "lwc1    %[temp16],  1800(%[v0])                        \n\t"
+                "lwc1    %[temp17],  776(%[s0])                         \n\t"
+                "lwc1    %[temp18],  1804(%[v0])                        \n\t"
+                "lwc1    %[temp19],  780(%[s0])                         \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
+                "lwc1    %[temp4],   2048(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
+                "lwc1    %[temp5],   1024(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
+                "lwc1    %[temp6],   2052(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
+                "lwc1    %[temp7],   1028(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   2056(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   1032(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  2060(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  1036(%[s0])                        \n\t"
+                "lwc1    %[temp12],  2816(%[v0])                        \n\t"
+                "lwc1    %[temp13],  1280(%[s0])                        \n\t"
+                "lwc1    %[temp14],  2820(%[v0])                        \n\t"
+                "lwc1    %[temp15],  1284(%[s0])                        \n\t"
+                "lwc1    %[temp16],  2824(%[v0])                        \n\t"
+                "lwc1    %[temp17],  1288(%[s0])                        \n\t"
+                "lwc1    %[temp18],  2828(%[v0])                        \n\t"
+                "lwc1    %[temp19],  1292(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
+                "lwc1    %[temp4],   3072(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
+                "lwc1    %[temp5],   1536(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
+                "lwc1    %[temp6],   3076(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
+                "lwc1    %[temp7],   1540(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   3080(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   1544(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  3084(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  1548(%[s0])                        \n\t"
+                "lwc1    %[temp12],  3840(%[v0])                        \n\t"
+                "lwc1    %[temp13],  1792(%[s0])                        \n\t"
+                "lwc1    %[temp14],  3844(%[v0])                        \n\t"
+                "lwc1    %[temp15],  1796(%[s0])                        \n\t"
+                "lwc1    %[temp16],  3848(%[v0])                        \n\t"
+                "lwc1    %[temp17],  1800(%[s0])                        \n\t"
+                "lwc1    %[temp18],  3852(%[v0])                        \n\t"
+                "lwc1    %[temp19],  1804(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
+                "lwc1    %[temp4],   4096(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
+                "lwc1    %[temp5],   2048(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
+                "lwc1    %[temp6],   4100(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
+                "lwc1    %[temp7],   2052(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "lwc1    %[temp8],   4104(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "lwc1    %[temp9],   2056(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "lwc1    %[temp10],  4108(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "lwc1    %[temp11],  2060(%[s0])                        \n\t"
+                "lwc1    %[temp12],  4864(%[v0])                        \n\t"
+                "lwc1    %[temp13],  2304(%[s0])                        \n\t"
+                "lwc1    %[temp14],  4868(%[v0])                        \n\t"
+                "lwc1    %[temp15],  2308(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp4],   %[temp5]   \n\t"
+                "lwc1    %[temp16],  4872(%[v0])                        \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp6],   %[temp7]   \n\t"
+                "lwc1    %[temp17],  2312(%[s0])                        \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp8],   %[temp9]   \n\t"
+                "lwc1    %[temp18],  4876(%[v0])                        \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp10],  %[temp11]  \n\t"
+                "lwc1    %[temp19],  2316(%[s0])                        \n\t"
+                "madd.s  %[temp0],   %[temp0],   %[temp12],  %[temp13]  \n\t"
+                "addiu   %[dst],     %[dst],     16                     \n\t"
+                "madd.s  %[temp1],   %[temp1],   %[temp14],  %[temp15]  \n\t"
+                "madd.s  %[temp2],   %[temp2],   %[temp16],  %[temp17]  \n\t"
+                "madd.s  %[temp3],   %[temp3],   %[temp18],  %[temp19]  \n\t"
+                "swc1    %[temp0],   -16(%[dst])                        \n\t"
+                "swc1    %[temp1],   -12(%[dst])                        \n\t"
+                "swc1    %[temp2],   -8(%[dst])                         \n\t"
+                "swc1    %[temp3],   -4(%[dst])                         \n\t"
+                ".set    pop                                            \n\t"
+
+                : [dst]"+r"(dst), [v0]"+r"(vv0), [s0]"+r"(s0),
+                  [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+                  [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+                  [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
+                  [temp12]"=&f"(temp12), [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
+                  [temp15]"=&f"(temp15), [temp16]"=&f"(temp16), [temp17]"=&f"(temp17),
+                  [temp18]"=&f"(temp18), [temp19]"=&f"(temp19)
+                : [v0_end]"r"(v0_end)
+                : "memory"
+            );
+        }
+        else
+        {
+            fdsp->vector_fmul   (out, v                , sbr_qmf_window                       , 64 >> div);
+            fdsp->vector_fmul_add(out, v + ( 192 >> div), sbr_qmf_window + ( 64 >> div), out   , 64 >> div);
+            fdsp->vector_fmul_add(out, v + ( 256 >> div), sbr_qmf_window + (128 >> div), out   , 64 >> div);
+            fdsp->vector_fmul_add(out, v + ( 448 >> div), sbr_qmf_window + (192 >> div), out   , 64 >> div);
+            fdsp->vector_fmul_add(out, v + ( 512 >> div), sbr_qmf_window + (256 >> div), out   , 64 >> div);
+            fdsp->vector_fmul_add(out, v + ( 704 >> div), sbr_qmf_window + (320 >> div), out   , 64 >> div);
+            fdsp->vector_fmul_add(out, v + ( 768 >> div), sbr_qmf_window + (384 >> div), out   , 64 >> div);
+            fdsp->vector_fmul_add(out, v + ( 960 >> div), sbr_qmf_window + (448 >> div), out   , 64 >> div);
+            fdsp->vector_fmul_add(out, v + (1024 >> div), sbr_qmf_window + (512 >> div), out   , 64 >> div);
+            fdsp->vector_fmul_add(out, v + (1216 >> div), sbr_qmf_window + (576 >> div), out   , 64 >> div);
+            out += 64 >> div;
+        }
+    }
+}
+
+#define sbr_qmf_analysis sbr_qmf_analysis_mips
+#define sbr_qmf_synthesis sbr_qmf_synthesis_mips
+
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVCODEC_MIPS_AACSBR_FLOAT_H */
diff --git a/libavcodec/mips/ac3dsp_mips.c b/libavcodec/mips/ac3dsp_mips.c
new file mode 100644
index 0000000..f33c6f1
--- /dev/null
+++ b/libavcodec/mips/ac3dsp_mips.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Branimir Vasic (bvasic@mips.com)
+ *           Nedeljko Babic (nbabic@mips.com)
+ *
+ * Various AC-3 DSP Utils optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/ac3dsp.c
+ */
+
+#include "config.h"
+#include "libavcodec/ac3dsp.h"
+#include "libavcodec/ac3.h"
+
+
+#if HAVE_INLINE_ASM
+#if HAVE_MIPSDSPR1
+static void ac3_bit_alloc_calc_bap_mips(int16_t *mask, int16_t *psd,
+                                        int start, int end,
+                                        int snr_offset, int floor,
+                                        const uint8_t *bap_tab, uint8_t *bap)
+{
+    int band, band_end, cond;
+    int m, address1, address2;
+    int16_t *psd1, *psd_end;
+    uint8_t *bap1;
+
+    if (snr_offset == -960) {
+        memset(bap, 0, AC3_MAX_COEFS);
+        return;
+    }
+
+    psd1 = &psd[start];
+    bap1 = &bap[start];
+    band = ff_ac3_bin_to_band_tab[start];
+
+    do {
+        m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor;
+        band_end = ff_ac3_band_start_tab[++band];
+        band_end = FFMIN(band_end, end);
+        psd_end = psd + band_end - 1;
+
+        __asm__ volatile (
+            "slt        %[cond],        %[psd1],        %[psd_end]  \n\t"
+            "beqz       %[cond],        1f                          \n\t"
+            "2:                                                     \n\t"
+            "lh         %[address1],    0(%[psd1])                  \n\t"
+            "lh         %[address2],    2(%[psd1])                  \n\t"
+            "addiu      %[psd1],        %[psd1],        4           \n\t"
+            "subu       %[address1],    %[address1],    %[m]        \n\t"
+            "sra        %[address1],    %[address1],    5           \n\t"
+            "addiu      %[address1],    %[address1],    -32         \n\t"
+            "shll_s.w   %[address1],    %[address1],    26          \n\t"
+            "subu       %[address2],    %[address2],    %[m]        \n\t"
+            "sra        %[address2],    %[address2],    5           \n\t"
+            "sra        %[address1],    %[address1],    26          \n\t"
+            "addiu      %[address1],    %[address1],    32          \n\t"
+            "lbux       %[address1],    %[address1](%[bap_tab])     \n\t"
+            "addiu      %[address2],    %[address2],    -32         \n\t"
+            "shll_s.w   %[address2],    %[address2],    26          \n\t"
+            "sb         %[address1],    0(%[bap1])                  \n\t"
+            "slt        %[cond],        %[psd1],        %[psd_end]  \n\t"
+            "sra        %[address2],    %[address2],    26          \n\t"
+            "addiu      %[address2],    %[address2],    32          \n\t"
+            "lbux       %[address2],    %[address2](%[bap_tab])     \n\t"
+            "sb         %[address2],    1(%[bap1])                  \n\t"
+            "addiu      %[bap1],        %[bap1],        2           \n\t"
+            "bnez       %[cond],        2b                          \n\t"
+            "addiu      %[psd_end],     %[psd_end],     2           \n\t"
+            "slt        %[cond],        %[psd1],        %[psd_end]  \n\t"
+            "beqz       %[cond],        3f                          \n\t"
+            "1:                                                     \n\t"
+            "lh         %[address1],    0(%[psd1])                  \n\t"
+            "addiu      %[psd1],        %[psd1],        2           \n\t"
+            "subu       %[address1],    %[address1],    %[m]        \n\t"
+            "sra        %[address1],    %[address1],    5           \n\t"
+            "addiu      %[address1],    %[address1],    -32         \n\t"
+            "shll_s.w   %[address1],    %[address1],    26          \n\t"
+            "sra        %[address1],    %[address1],    26          \n\t"
+            "addiu      %[address1],    %[address1],    32          \n\t"
+            "lbux       %[address1],    %[address1](%[bap_tab])     \n\t"
+            "sb         %[address1],    0(%[bap1])                  \n\t"
+            "addiu      %[bap1],        %[bap1],        1           \n\t"
+            "3:                                                     \n\t"
+
+            : [address1]"=&r"(address1), [address2]"=&r"(address2),
+              [cond]"=&r"(cond), [bap1]"+r"(bap1),
+              [psd1]"+r"(psd1), [psd_end]"+r"(psd_end)
+            : [m]"r"(m), [bap_tab]"r"(bap_tab)
+            : "memory"
+        );
+    } while (end > band_end);
+}
+
+static void ac3_update_bap_counts_mips(uint16_t mant_cnt[16], uint8_t *bap,
+                                       int len)
+{
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    __asm__ volatile (
+        "andi   %[temp3],   %[len],         3               \n\t"
+        "addu   %[temp2],   %[bap],         %[len]          \n\t"
+        "addu   %[temp4],   %[bap],         %[temp3]        \n\t"
+        "beq    %[temp2],   %[temp4],       4f              \n\t"
+        "1:                                                 \n\t"
+        "lbu    %[temp0],   -1(%[temp2])                    \n\t"
+        "lbu    %[temp5],   -2(%[temp2])                    \n\t"
+        "lbu    %[temp6],   -3(%[temp2])                    \n\t"
+        "sll    %[temp0],   %[temp0],       1               \n\t"
+        "addu   %[temp0],   %[mant_cnt],    %[temp0]        \n\t"
+        "sll    %[temp5],   %[temp5],       1               \n\t"
+        "addu   %[temp5],   %[mant_cnt],    %[temp5]        \n\t"
+        "lhu    %[temp1],   0(%[temp0])                     \n\t"
+        "sll    %[temp6],   %[temp6],       1               \n\t"
+        "addu   %[temp6],   %[mant_cnt],    %[temp6]        \n\t"
+        "addiu  %[temp1],   %[temp1],       1               \n\t"
+        "sh     %[temp1],   0(%[temp0])                     \n\t"
+        "lhu    %[temp1],   0(%[temp5])                     \n\t"
+        "lbu    %[temp7],   -4(%[temp2])                    \n\t"
+        "addiu  %[temp2],   %[temp2],       -4              \n\t"
+        "addiu  %[temp1],   %[temp1],       1               \n\t"
+        "sh     %[temp1],   0(%[temp5])                     \n\t"
+        "lhu    %[temp1],   0(%[temp6])                     \n\t"
+        "sll    %[temp7],   %[temp7],       1               \n\t"
+        "addu   %[temp7],   %[mant_cnt],    %[temp7]        \n\t"
+        "addiu  %[temp1],   %[temp1],1                      \n\t"
+        "sh     %[temp1],   0(%[temp6])                     \n\t"
+        "lhu    %[temp1],   0(%[temp7])                     \n\t"
+        "addiu  %[temp1],   %[temp1],       1               \n\t"
+        "sh     %[temp1],   0(%[temp7])                     \n\t"
+        "bne    %[temp2],   %[temp4],       1b              \n\t"
+        "4:                                                 \n\t"
+        "beqz   %[temp3],   2f                              \n\t"
+        "3:                                                 \n\t"
+        "addiu  %[temp3],   %[temp3],       -1              \n\t"
+        "lbu    %[temp0],   -1(%[temp2])                    \n\t"
+        "addiu  %[temp2],   %[temp2],       -1              \n\t"
+        "sll    %[temp0],   %[temp0],       1               \n\t"
+        "addu   %[temp0],   %[mant_cnt],    %[temp0]        \n\t"
+        "lhu    %[temp1],   0(%[temp0])                     \n\t"
+        "addiu  %[temp1],   %[temp1],       1               \n\t"
+        "sh     %[temp1],   0(%[temp0])                     \n\t"
+        "bgtz   %[temp3],   3b                              \n\t"
+        "2:                                                 \n\t"
+
+        : [temp0] "=&r" (temp0), [temp1] "=&r" (temp1),
+          [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
+          [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
+          [temp6] "=&r" (temp6), [temp7] "=&r" (temp7)
+        : [len] "r" (len), [bap] "r" (bap),
+          [mant_cnt] "r" (mant_cnt)
+        : "memory"
+    );
+}
+#endif
+
+#if HAVE_MIPSFPU && HAVE_MIPS32R2
+static void float_to_fixed24_mips(int32_t *dst, const float *src, unsigned int len)
+{
+    const float scale = 1 << 24;
+    float src0, src1, src2, src3, src4, src5, src6, src7;
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+    do {
+        __asm__ volatile (
+            "lwc1       %[src0],    0(%[src])               \n\t"
+            "lwc1       %[src1],    4(%[src])               \n\t"
+            "lwc1       %[src2],    8(%[src])               \n\t"
+            "lwc1       %[src3],    12(%[src])              \n\t"
+            "lwc1       %[src4],    16(%[src])              \n\t"
+            "lwc1       %[src5],    20(%[src])              \n\t"
+            "lwc1       %[src6],    24(%[src])              \n\t"
+            "lwc1       %[src7],    28(%[src])              \n\t"
+            "mul.s      %[src0],    %[src0],    %[scale]    \n\t"
+            "mul.s      %[src1],    %[src1],    %[scale]    \n\t"
+            "mul.s      %[src2],    %[src2],    %[scale]    \n\t"
+            "mul.s      %[src3],    %[src3],    %[scale]    \n\t"
+            "mul.s      %[src4],    %[src4],    %[scale]    \n\t"
+            "mul.s      %[src5],    %[src5],    %[scale]    \n\t"
+            "mul.s      %[src6],    %[src6],    %[scale]    \n\t"
+            "mul.s      %[src7],    %[src7],    %[scale]    \n\t"
+            "cvt.w.s    %[src0],    %[src0]                 \n\t"
+            "cvt.w.s    %[src1],    %[src1]                 \n\t"
+            "cvt.w.s    %[src2],    %[src2]                 \n\t"
+            "cvt.w.s    %[src3],    %[src3]                 \n\t"
+            "cvt.w.s    %[src4],    %[src4]                 \n\t"
+            "cvt.w.s    %[src5],    %[src5]                 \n\t"
+            "cvt.w.s    %[src6],    %[src6]                 \n\t"
+            "cvt.w.s    %[src7],    %[src7]                 \n\t"
+            "mfc1       %[temp0],   %[src0]                 \n\t"
+            "mfc1       %[temp1],   %[src1]                 \n\t"
+            "mfc1       %[temp2],   %[src2]                 \n\t"
+            "mfc1       %[temp3],   %[src3]                 \n\t"
+            "mfc1       %[temp4],   %[src4]                 \n\t"
+            "mfc1       %[temp5],   %[src5]                 \n\t"
+            "mfc1       %[temp6],   %[src6]                 \n\t"
+            "mfc1       %[temp7],   %[src7]                 \n\t"
+            "sw         %[temp0],   0(%[dst])               \n\t"
+            "sw         %[temp1],   4(%[dst])               \n\t"
+            "sw         %[temp2],   8(%[dst])               \n\t"
+            "sw         %[temp3],   12(%[dst])              \n\t"
+            "sw         %[temp4],   16(%[dst])              \n\t"
+            "sw         %[temp5],   20(%[dst])              \n\t"
+            "sw         %[temp6],   24(%[dst])              \n\t"
+            "sw         %[temp7],   28(%[dst])              \n\t"
+
+            : [dst] "+r" (dst), [src] "+r" (src),
+              [src0] "=&f" (src0), [src1] "=&f" (src1),
+              [src2] "=&f" (src2), [src3] "=&f" (src3),
+              [src4] "=&f" (src4), [src5] "=&f" (src5),
+              [src6] "=&f" (src6), [src7] "=&f" (src7),
+              [temp0] "=r" (temp0), [temp1] "=r" (temp1),
+              [temp2] "=r" (temp2), [temp3] "=r" (temp3),
+              [temp4] "=r" (temp4), [temp5] "=r" (temp5),
+              [temp6] "=r" (temp6), [temp7] "=r" (temp7)
+            : [scale] "f" (scale)
+            : "memory"
+        );
+        src = src + 8;
+        dst = dst + 8;
+        len -= 8;
+    } while (len > 0);
+}
+
+static void ac3_downmix_mips(float **samples, float (*matrix)[2],
+                          int out_ch, int in_ch, int len)
+{
+    int i, j, i1, i2, i3;
+    float v0, v1, v2, v3;
+    float v4, v5, v6, v7;
+    float samples0, samples1, samples2, samples3, matrix_j, matrix_j2;
+    float *samples_p,*matrix_p, **samples_x, **samples_end, **samples_sw;
+
+    __asm__ volatile(
+        ".set   push                                                \n\t"
+        ".set   noreorder                                           \n\t"
+
+        "li     %[i1],          2                                   \n\t"
+        "sll    %[len],         2                                   \n\t"
+        "move   %[i],           $zero                               \n\t"
+        "sll    %[j],           %[in_ch],               2           \n\t"
+
+        "bne    %[out_ch],      %[i1],                  3f          \n\t"   // if (out_ch == 2)
+        " li    %[i2],          1                                   \n\t"
+
+        "2:                                                         \n\t"   // start of the for loop (for (i = 0; i < len; i+=4))
+        "move   %[matrix_p],    %[matrix]                           \n\t"
+        "move   %[samples_x],   %[samples]                          \n\t"
+        "mtc1   $zero,          %[v0]                               \n\t"
+        "mtc1   $zero,          %[v1]                               \n\t"
+        "mtc1   $zero,          %[v2]                               \n\t"
+        "mtc1   $zero,          %[v3]                               \n\t"
+        "mtc1   $zero,          %[v4]                               \n\t"
+        "mtc1   $zero,          %[v5]                               \n\t"
+        "mtc1   $zero,          %[v6]                               \n\t"
+        "mtc1   $zero,          %[v7]                               \n\t"
+        "addiu  %[i1],          %[i],                  4            \n\t"
+        "addiu  %[i2],          %[i],                  8            \n\t"
+        "lw     %[samples_p],   0(%[samples_x])                     \n\t"
+        "addiu  %[i3],          %[i],                  12           \n\t"
+        "addu   %[samples_end], %[samples_x],          %[j]         \n\t"
+        "move   %[samples_sw],  %[samples_p]                        \n\t"
+
+        "1:                                                         \n\t"   // start of the inner for loop (for (j = 0; j < in_ch; j++))
+        "lwc1   %[matrix_j],    0(%[matrix_p])                      \n\t"
+        "lwc1   %[matrix_j2],   4(%[matrix_p])                      \n\t"
+        "lwxc1  %[samples0],    %[i](%[samples_p])                  \n\t"
+        "lwxc1  %[samples1],    %[i1](%[samples_p])                 \n\t"
+        "lwxc1  %[samples2],    %[i2](%[samples_p])                 \n\t"
+        "lwxc1  %[samples3],    %[i3](%[samples_p])                 \n\t"
+        "addiu  %[matrix_p],    8                                   \n\t"
+        "addiu  %[samples_x],   4                                   \n\t"
+        "madd.s %[v0],          %[v0],  %[samples0],    %[matrix_j] \n\t"
+        "madd.s %[v1],          %[v1],  %[samples1],    %[matrix_j] \n\t"
+        "madd.s %[v2],          %[v2],  %[samples2],    %[matrix_j] \n\t"
+        "madd.s %[v3],          %[v3],  %[samples3],    %[matrix_j] \n\t"
+        "madd.s %[v4],          %[v4],  %[samples0],    %[matrix_j2]\n\t"
+        "madd.s %[v5],          %[v5],  %[samples1],    %[matrix_j2]\n\t"
+        "madd.s %[v6],          %[v6],  %[samples2],    %[matrix_j2]\n\t"
+        "madd.s %[v7],          %[v7],  %[samples3],    %[matrix_j2]\n\t"
+        "bne    %[samples_x],   %[samples_end],         1b          \n\t"
+        " lw    %[samples_p],   0(%[samples_x])                     \n\t"
+
+        "lw     %[samples_p],   4(%[samples])                       \n\t"
+        "swxc1  %[v0],          %[i](%[samples_sw])                 \n\t"
+        "swxc1  %[v1],          %[i1](%[samples_sw])                \n\t"
+        "swxc1  %[v2],          %[i2](%[samples_sw])                \n\t"
+        "swxc1  %[v3],          %[i3](%[samples_sw])                \n\t"
+        "swxc1  %[v4],          %[i](%[samples_p])                  \n\t"
+        "addiu  %[i],           16                                  \n\t"
+        "swxc1  %[v5],          %[i1](%[samples_p])                 \n\t"
+        "swxc1  %[v6],          %[i2](%[samples_p])                 \n\t"
+        "bne    %[i],           %[len],                 2b          \n\t"
+        " swxc1 %[v7],          %[i3](%[samples_p])                 \n\t"
+
+        "3:                                                         \n\t"
+        "bne    %[out_ch],      %[i2],                  6f          \n\t"   // if (out_ch == 1)
+        " nop                                                       \n\t"
+
+        "5:                                                         \n\t"   // start of the outer for loop (for (i = 0; i < len; i+=4))
+        "move   %[matrix_p],    %[matrix]                           \n\t"
+        "move   %[samples_x],   %[samples]                          \n\t"
+        "mtc1   $zero,          %[v0]                               \n\t"
+        "mtc1   $zero,          %[v1]                               \n\t"
+        "mtc1   $zero,          %[v2]                               \n\t"
+        "mtc1   $zero,          %[v3]                               \n\t"
+        "addiu  %[i1],          %[i],                  4            \n\t"
+        "addiu  %[i2],          %[i],                  8            \n\t"
+        "lw     %[samples_p],   0(%[samples_x])                     \n\t"
+        "addiu  %[i3],          %[i],                  12           \n\t"
+        "addu   %[samples_end], %[samples_x],          %[j]         \n\t"
+        "move   %[samples_sw],  %[samples_p]                        \n\t"
+
+        "4:                                                         \n\t"   // start of the inner for loop (for (j = 0; j < in_ch; j++))
+        "lwc1   %[matrix_j],    0(%[matrix_p])                      \n\t"
+        "lwxc1  %[samples0],    %[i](%[samples_p])                  \n\t"
+        "lwxc1  %[samples1],    %[i1](%[samples_p])                 \n\t"
+        "lwxc1  %[samples2],    %[i2](%[samples_p])                 \n\t"
+        "lwxc1  %[samples3],    %[i3](%[samples_p])                 \n\t"
+        "addiu  %[matrix_p],    8                                   \n\t"
+        "addiu  %[samples_x],   4                                   \n\t"
+        "madd.s %[v0],          %[v0],  %[samples0],    %[matrix_j] \n\t"
+        "madd.s %[v1],          %[v1],  %[samples1],    %[matrix_j] \n\t"
+        "madd.s %[v2],          %[v2],  %[samples2],    %[matrix_j] \n\t"
+        "madd.s %[v3],          %[v3],  %[samples3],    %[matrix_j] \n\t"
+        "bne    %[samples_x],   %[samples_end],         4b          \n\t"
+        " lw    %[samples_p],   0(%[samples_x])                     \n\t"
+
+        "swxc1  %[v0],          %[i](%[samples_sw])                 \n\t"
+        "addiu  %[i],           16                                  \n\t"
+        "swxc1  %[v1],          %[i1](%[samples_sw])                \n\t"
+        "swxc1  %[v2],          %[i2](%[samples_sw])                \n\t"
+        "bne    %[i],           %[len],                 5b          \n\t"
+        " swxc1 %[v3],          %[i3](%[samples_sw])                \n\t"
+        "6:                                                         \n\t"
+
+        ".set   pop"
+        :[samples_p]"=&r"(samples_p), [matrix_j]"=&f"(matrix_j), [matrix_j2]"=&f"(matrix_j2),
+         [samples0]"=&f"(samples0), [samples1]"=&f"(samples1),
+         [samples2]"=&f"(samples2), [samples3]"=&f"(samples3),
+         [v0]"=&f"(v0), [v1]"=&f"(v1), [v2]"=&f"(v2), [v3]"=&f"(v3),
+         [v4]"=&f"(v4), [v5]"=&f"(v5), [v6]"=&f"(v6), [v7]"=&f"(v7),
+         [samples_x]"=&r"(samples_x), [matrix_p]"=&r"(matrix_p),
+         [samples_end]"=&r"(samples_end), [samples_sw]"=&r"(samples_sw),
+         [i1]"=&r"(i1), [i2]"=&r"(i2), [i3]"=&r"(i3), [i]"=&r"(i),
+         [j]"=&r"(j), [len]"+r"(len)
+        :[samples]"r"(samples), [matrix]"r"(matrix),
+         [in_ch]"r"(in_ch), [out_ch]"r"(out_ch)
+        :"memory"
+    );
+}
+#endif
+#endif /* HAVE_INLINE_ASM */
+
+void ff_ac3dsp_init_mips(AC3DSPContext *c, int bit_exact) {
+#if HAVE_INLINE_ASM
+#if HAVE_MIPSDSPR1
+    c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_mips;
+    c->update_bap_counts  = ac3_update_bap_counts_mips;
+#endif
+#if HAVE_MIPSFPU && HAVE_MIPS32R2
+    c->float_to_fixed24 = float_to_fixed24_mips;
+    c->downmix          = ac3_downmix_mips;
+#endif
+#endif
+
+}
diff --git a/libavcodec/mips/acelp_filters_mips.c b/libavcodec/mips/acelp_filters_mips.c
new file mode 100644
index 0000000..c8d980a
--- /dev/null
+++ b/libavcodec/mips/acelp_filters_mips.c
@@ -0,0 +1,216 @@
+ /*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic@mips.com)
+ *
+ * various filters for ACELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/acelp_filters.c
+ */
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/acelp_filters.h"
+
+#if HAVE_INLINE_ASM
+static void ff_acelp_interpolatef_mips(float *out, const float *in,
+                           const float *filter_coeffs, int precision,
+                           int frac_pos, int filter_length, int length)
+{
+    int n, i;
+    int prec = precision * 4;
+    int fc_offset = precision - frac_pos;
+    float in_val_p, in_val_m, fc_val_p, fc_val_m;
+
+    for (n = 0; n < length; n++) {
+        /**
+        * four pointers are defined in order to minimize number of
+        * computations done in inner loop
+        */
+        const float *p_in_p = &in[n];
+        const float *p_in_m = &in[n-1];
+        const float *p_filter_coeffs_p = &filter_coeffs[frac_pos];
+        const float *p_filter_coeffs_m = filter_coeffs + fc_offset;
+        float v = 0;
+
+        for (i = 0; i < filter_length;i++) {
+            __asm__ volatile (
+                "lwc1   %[in_val_p],           0(%[p_in_p])                    \n\t"
+                "lwc1   %[fc_val_p],           0(%[p_filter_coeffs_p])         \n\t"
+                "lwc1   %[in_val_m],           0(%[p_in_m])                    \n\t"
+                "lwc1   %[fc_val_m],           0(%[p_filter_coeffs_m])         \n\t"
+                "addiu  %[p_in_p],             %[p_in_p],              4       \n\t"
+                "madd.s %[v],%[v],             %[in_val_p],%[fc_val_p]         \n\t"
+                "addiu  %[p_in_m],             %[p_in_m],              -4      \n\t"
+                "addu   %[p_filter_coeffs_p],  %[p_filter_coeffs_p],   %[prec] \n\t"
+                "addu   %[p_filter_coeffs_m],  %[p_filter_coeffs_m],   %[prec] \n\t"
+                "madd.s %[v],%[v],%[in_val_m], %[fc_val_m]                     \n\t"
+
+                : [v] "=&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m),
+                  [p_filter_coeffs_p] "+r" (p_filter_coeffs_p),
+                  [in_val_p] "=&f" (in_val_p), [in_val_m] "=&f" (in_val_m),
+                  [fc_val_p] "=&f" (fc_val_p), [fc_val_m] "=&f" (fc_val_m),
+                  [p_filter_coeffs_m] "+r" (p_filter_coeffs_m)
+                : [prec] "r" (prec)
+                : "memory"
+            );
+        }
+        out[n] = v;
+    }
+}
+
+static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const float *in,
+                                              const float zero_coeffs[2],
+                                              const float pole_coeffs[2],
+                                              float gain, float mem[2], int n)
+{
+    /**
+    * loop is unrolled eight times
+    */
+
+    __asm__ volatile (
+        "lwc1   $f0,    0(%[mem])                                              \n\t"
+        "blez   %[n],   ff_acelp_apply_order_2_transfer_function_end%=         \n\t"
+        "lwc1   $f1,    4(%[mem])                                              \n\t"
+        "lwc1   $f2,    0(%[pole_coeffs])                                      \n\t"
+        "lwc1   $f3,    4(%[pole_coeffs])                                      \n\t"
+        "lwc1   $f4,    0(%[zero_coeffs])                                      \n\t"
+        "lwc1   $f5,    4(%[zero_coeffs])                                      \n\t"
+
+        "ff_acelp_apply_order_2_transfer_function_madd%=:                      \n\t"
+
+        "lwc1   $f6,    0(%[in])                                               \n\t"
+        "mul.s  $f9,    $f3,      $f1                                          \n\t"
+        "mul.s  $f7,    $f2,      $f0                                          \n\t"
+        "msub.s $f7,    $f7,      %[gain], $f6                                 \n\t"
+        "sub.s  $f7,    $f7,      $f9                                          \n\t"
+        "madd.s $f8,    $f7,      $f4,     $f0                                 \n\t"
+        "madd.s $f8,    $f8,      $f5,     $f1                                 \n\t"
+        "lwc1   $f11,   4(%[in])                                               \n\t"
+        "mul.s  $f12,   $f3,      $f0                                          \n\t"
+        "mul.s  $f13,   $f2,      $f7                                          \n\t"
+        "msub.s $f13,   $f13,     %[gain], $f11                                \n\t"
+        "sub.s  $f13,   $f13,     $f12                                         \n\t"
+        "madd.s $f14,   $f13,     $f4,     $f7                                 \n\t"
+        "madd.s $f14,   $f14,     $f5,     $f0                                 \n\t"
+        "swc1   $f8,    0(%[out])                                              \n\t"
+        "lwc1   $f6,    8(%[in])                                               \n\t"
+        "mul.s  $f9,    $f3,      $f7                                          \n\t"
+        "mul.s  $f15,   $f2,      $f13                                         \n\t"
+        "msub.s $f15,   $f15,     %[gain], $f6                                 \n\t"
+        "sub.s  $f15,   $f15,     $f9                                          \n\t"
+        "madd.s $f8,    $f15,     $f4,     $f13                                \n\t"
+        "madd.s $f8,    $f8,      $f5,     $f7                                 \n\t"
+        "swc1   $f14,   4(%[out])                                              \n\t"
+        "lwc1   $f11,   12(%[in])                                              \n\t"
+        "mul.s  $f12,   $f3,      $f13                                         \n\t"
+        "mul.s  $f16,   $f2,      $f15                                         \n\t"
+        "msub.s $f16,   $f16,     %[gain], $f11                                \n\t"
+        "sub.s  $f16,   $f16,     $f12                                         \n\t"
+        "madd.s $f14,   $f16,     $f4,     $f15                                \n\t"
+        "madd.s $f14,   $f14,     $f5,     $f13                                \n\t"
+        "swc1   $f8,    8(%[out])                                              \n\t"
+        "lwc1   $f6,    16(%[in])                                              \n\t"
+        "mul.s  $f9,    $f3,      $f15                                         \n\t"
+        "mul.s  $f7,    $f2,      $f16                                         \n\t"
+        "msub.s $f7,    $f7,      %[gain], $f6                                 \n\t"
+        "sub.s  $f7,    $f7,      $f9                                          \n\t"
+        "madd.s $f8,    $f7,      $f4,     $f16                                \n\t"
+        "madd.s $f8,    $f8,      $f5,     $f15                                \n\t"
+        "swc1   $f14,   12(%[out])                                             \n\t"
+        "lwc1   $f11,   20(%[in])                                              \n\t"
+        "mul.s  $f12,   $f3,      $f16                                         \n\t"
+        "mul.s  $f13,   $f2,      $f7                                          \n\t"
+        "msub.s $f13,   $f13,     %[gain], $f11                                \n\t"
+        "sub.s  $f13,   $f13,     $f12                                         \n\t"
+        "madd.s $f14,   $f13,     $f4,     $f7                                 \n\t"
+        "madd.s $f14,   $f14,     $f5,     $f16                                \n\t"
+        "swc1   $f8,    16(%[out])                                             \n\t"
+        "lwc1   $f6,    24(%[in])                                              \n\t"
+        "mul.s  $f9,    $f3,      $f7                                          \n\t"
+        "mul.s  $f15,   $f2,      $f13                                         \n\t"
+        "msub.s $f15,   $f15,     %[gain], $f6                                 \n\t"
+        "sub.s  $f1,    $f15,     $f9                                          \n\t"
+        "madd.s $f8,    $f1,      $f4,     $f13                                \n\t"
+        "madd.s $f8,    $f8,      $f5,     $f7                                 \n\t"
+        "swc1   $f14,   20(%[out])                                             \n\t"
+        "lwc1   $f11,   28(%[in])                                              \n\t"
+        "mul.s  $f12,   $f3,      $f13                                         \n\t"
+        "mul.s  $f16,   $f2,      $f1                                          \n\t"
+        "msub.s $f16,   $f16,     %[gain], $f11                                \n\t"
+        "sub.s  $f0,    $f16,     $f12                                         \n\t"
+        "madd.s $f14,   $f0,      $f4,     $f1                                 \n\t"
+        "madd.s $f14,   $f14,     $f5,     $f13                                \n\t"
+        "swc1   $f8,    24(%[out])                                             \n\t"
+        "addiu  %[out], 32                                                     \n\t"
+        "addiu  %[in],  32                                                     \n\t"
+        "addiu  %[n],   -8                                                     \n\t"
+        "swc1   $f14,   -4(%[out])                                             \n\t"
+        "bnez   %[n],   ff_acelp_apply_order_2_transfer_function_madd%=        \n\t"
+        "swc1   $f1,    4(%[mem])                                              \n\t"
+        "swc1   $f0,    0(%[mem])                                              \n\t"
+
+        "ff_acelp_apply_order_2_transfer_function_end%=:                       \n\t"
+
+         : [out] "+r" (out),
+           [in] "+r" (in), [gain] "+f" (gain),
+           [n] "+r" (n), [mem] "+r" (mem)
+         : [zero_coeffs] "r" (zero_coeffs),
+           [pole_coeffs] "r" (pole_coeffs)
+         : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
+           "$f6", "$f7",  "$f8", "$f9", "$f10", "$f11",
+           "$f12", "$f13", "$f14", "$f15", "$f16", "memory"
+    );
+}
+#endif /* HAVE_INLINE_ASM */
+
+void ff_acelp_filter_init_mips(ACELPFContext *c)
+{
+#if HAVE_INLINE_ASM
+    c->acelp_interpolatef                      = ff_acelp_interpolatef_mips;
+    c->acelp_apply_order_2_transfer_function   = ff_acelp_apply_order_2_transfer_function_mips;
+#endif
+}
diff --git a/libavcodec/mips/acelp_vectors_mips.c b/libavcodec/mips/acelp_vectors_mips.c
new file mode 100644
index 0000000..8770df8
--- /dev/null
+++ b/libavcodec/mips/acelp_vectors_mips.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic@mips.com)
+ *
+ * adaptive and fixed codebook vector operations for ACELP-based codecs
+ * optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/acelp_vectors.c
+ */
+#include "config.h"
+#include "libavcodec/acelp_vectors.h"
+
+#if HAVE_INLINE_ASM
+static void ff_weighted_vector_sumf_mips(
+                  float *out, const float *in_a, const float *in_b,
+                  float weight_coeff_a, float weight_coeff_b, int length)
+{
+    const float *a_end = in_a + length;
+
+    /* loop unrolled two times */
+    __asm__ volatile (
+        "blez   %[length], ff_weighted_vector_sumf_end%=                     \n\t"
+
+        "ff_weighted_vector_sumf_madd%=:                                     \n\t"
+        "lwc1   $f0,       0(%[in_a])                                        \n\t"
+        "lwc1   $f3,       4(%[in_a])                                        \n\t"
+        "lwc1   $f1,       0(%[in_b])                                        \n\t"
+        "lwc1   $f4,       4(%[in_b])                                        \n\t"
+        "mul.s  $f2,       %[weight_coeff_a], $f0                            \n\t"
+        "mul.s  $f5,       %[weight_coeff_a], $f3                            \n\t"
+        "madd.s $f2,       $f2,               %[weight_coeff_b], $f1         \n\t"
+        "madd.s $f5,       $f5,               %[weight_coeff_b], $f4         \n\t"
+        "addiu  %[in_a],   8                                                 \n\t"
+        "addiu  %[in_b],   8                                                 \n\t"
+        "swc1   $f2,       0(%[out])                                         \n\t"
+        "swc1   $f5,       4(%[out])                                         \n\t"
+        "addiu  %[out],    8                                                 \n\t"
+        "bne   %[in_a],    %[a_end],          ff_weighted_vector_sumf_madd%= \n\t"
+
+        "ff_weighted_vector_sumf_end%=:                                      \n\t"
+
+        : [out] "+r" (out), [in_a] "+r" (in_a),   [in_b] "+r" (in_b)
+        : [weight_coeff_a] "f" (weight_coeff_a),
+          [weight_coeff_b] "f" (weight_coeff_b),
+          [length] "r" (length), [a_end]"r"(a_end)
+        : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "memory"
+    );
+}
+#endif /* HAVE_INLINE_ASM */
+
+void ff_acelp_vectors_init_mips(ACELPVContext *c)
+{
+#if HAVE_INLINE_ASM
+    c->weighted_vector_sumf = ff_weighted_vector_sumf_mips;
+#endif
+}
diff --git a/libavcodec/mips/amrwbdec_mips.c b/libavcodec/mips/amrwbdec_mips.c
new file mode 100644
index 0000000..1d6ed2d
--- /dev/null
+++ b/libavcodec/mips/amrwbdec_mips.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/amrwbdec.c
+ */
+#include "libavutil/avutil.h"
+#include "libavcodec/amrwbdata.h"
+#include "amrwbdec_mips.h"
+
+#if HAVE_INLINE_ASM
+void hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1],
+                          float mem[HB_FIR_SIZE], const float *in)
+{
+    int i;
+    float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples
+
+    memcpy(data, mem, HB_FIR_SIZE * sizeof(float));
+    memcpy(data + HB_FIR_SIZE, in, AMRWB_SFR_SIZE_16k * sizeof(float));
+
+    for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) {
+        float output;
+        float * p_data = (data+i);
+
+        /**
+        * inner loop is entirely unrolled and instructions are scheduled
+        * to minimize pipeline stall
+        */
+        __asm__ volatile(
+            "mtc1       $zero,     %[output]                      \n\t"
+            "lwc1       $f0,       0(%[p_data])                   \n\t"
+            "lwc1       $f1,       0(%[fir_coef])                 \n\t"
+            "lwc1       $f2,       4(%[p_data])                   \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f3,       4(%[fir_coef])                 \n\t"
+            "lwc1       $f4,       8(%[p_data])                   \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+            "lwc1       $f5,       8(%[fir_coef])                 \n\t"
+
+            "lwc1       $f0,       12(%[p_data])                  \n\t"
+            "lwc1       $f1,       12(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f2,       16(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f3,       16(%[fir_coef])                \n\t"
+            "lwc1       $f4,       20(%[p_data])                  \n\t"
+            "lwc1       $f5,       20(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       24(%[p_data])                  \n\t"
+            "lwc1       $f1,       24(%[fir_coef])                \n\t"
+            "lwc1       $f2,       28(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       28(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       32(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+            "lwc1       $f5,       32(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+
+            "lwc1       $f0,       36(%[p_data])                  \n\t"
+            "lwc1       $f1,       36(%[fir_coef])                \n\t"
+            "lwc1       $f2,       40(%[p_data])                  \n\t"
+            "lwc1       $f3,       40(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       44(%[p_data])                  \n\t"
+            "lwc1       $f5,       44(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       48(%[p_data])                  \n\t"
+            "lwc1       $f1,       48(%[fir_coef])                \n\t"
+            "lwc1       $f2,       52(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       52(%[fir_coef])                \n\t"
+            "lwc1       $f4,       56(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f5,       56(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       60(%[p_data])                  \n\t"
+            "lwc1       $f1,       60(%[fir_coef])                \n\t"
+            "lwc1       $f2,       64(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       64(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       68(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+            "lwc1       $f5,       68(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+
+            "lwc1       $f0,       72(%[p_data])                  \n\t"
+            "lwc1       $f1,       72(%[fir_coef])                \n\t"
+            "lwc1       $f2,       76(%[p_data])                  \n\t"
+            "lwc1       $f3,       76(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       80(%[p_data])                  \n\t"
+            "lwc1       $f5,       80(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       84(%[p_data])                  \n\t"
+            "lwc1       $f1,       84(%[fir_coef])                \n\t"
+            "lwc1       $f2,       88(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       88(%[fir_coef])                \n\t"
+            "lwc1       $f4,       92(%[p_data])                  \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f5,       92(%[fir_coef])                \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       96(%[p_data])                  \n\t"
+            "lwc1       $f1,       96(%[fir_coef])                \n\t"
+            "lwc1       $f2,       100(%[p_data])                 \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f3,       100(%[fir_coef])               \n\t"
+            "lwc1       $f4,       104(%[p_data])                 \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f5,       104(%[fir_coef])               \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+
+            "lwc1       $f0,       108(%[p_data])                 \n\t"
+            "lwc1       $f1,       108(%[fir_coef])               \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "lwc1       $f2,       112(%[p_data])                 \n\t"
+            "lwc1       $f3,       112(%[fir_coef])               \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+            "lwc1       $f4,       116(%[p_data])                 \n\t"
+            "lwc1       $f5,       116(%[fir_coef])               \n\t"
+            "lwc1       $f0,       120(%[p_data])                 \n\t"
+            "madd.s     %[output], %[output],       $f2, $f3      \n\t"
+            "lwc1       $f1,       120(%[fir_coef])               \n\t"
+            "madd.s     %[output], %[output],       $f4, $f5      \n\t"
+            "madd.s     %[output], %[output],       $f0, $f1      \n\t"
+
+            : [output]"=&f"(output)
+            : [fir_coef]"r"(fir_coef), [p_data]"r"(p_data)
+            : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "memory"
+        );
+        out[i] = output;
+    }
+    memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float));
+}
+#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/mips/amrwbdec_mips.h b/libavcodec/mips/amrwbdec_mips.h
new file mode 100644
index 0000000..a469918
--- /dev/null
+++ b/libavcodec/mips/amrwbdec_mips.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/amrwbdec.c
+ */
+#ifndef AVCODEC_AMRWBDEC_MIPS_H
+#define AVCODEC_AMRWBDEC_MIPS_H
+#include "config.h"
+
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+void hb_fir_filter_mips(float *out, const float fir_coef[],
+                          float mem[], const float *in);
+#define hb_fir_filter hb_fir_filter_mips
+#endif
+
+#endif /* AVCODEC_AMRWBDEC_MIPS_H  */
diff --git a/libavcodec/mips/celp_filters_mips.c b/libavcodec/mips/celp_filters_mips.c
new file mode 100644
index 0000000..ef5b07b
--- /dev/null
+++ b/libavcodec/mips/celp_filters_mips.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic@mips.com)
+ *
+ * various filters for CELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/celp_filters.c
+ */
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "libavcodec/celp_filters.h"
+
+#if HAVE_INLINE_ASM
+static void ff_celp_lp_synthesis_filterf_mips(float *out,
+                                  const float *filter_coeffs,
+                                  const float* in, int buffer_length,
+                                  int filter_length)
+{
+    int i,n;
+
+    float out0, out1, out2, out3;
+    float old_out0, old_out1, old_out2, old_out3;
+    float a,b,c;
+    const float *p_filter_coeffs;
+    float *p_out;
+
+    a = filter_coeffs[0];
+    b = filter_coeffs[1];
+    c = filter_coeffs[2];
+    b -= filter_coeffs[0] * filter_coeffs[0];
+    c -= filter_coeffs[1] * filter_coeffs[0];
+    c -= filter_coeffs[0] * b;
+
+    old_out0 = out[-4];
+    old_out1 = out[-3];
+    old_out2 = out[-2];
+    old_out3 = out[-1];
+    for (n = 0; n <= buffer_length - 4; n+=4) {
+        p_filter_coeffs = filter_coeffs;
+        p_out = out;
+
+        out0 = in[0];
+        out1 = in[1];
+        out2 = in[2];
+        out3 = in[3];
+
+        __asm__ volatile(
+            "lwc1       $f2,     8(%[filter_coeffs])                        \n\t"
+            "lwc1       $f1,     4(%[filter_coeffs])                        \n\t"
+            "lwc1       $f0,     0(%[filter_coeffs])                        \n\t"
+            "nmsub.s    %[out0], %[out0],             $f2, %[old_out1]      \n\t"
+            "nmsub.s    %[out1], %[out1],             $f2, %[old_out2]      \n\t"
+            "nmsub.s    %[out2], %[out2],             $f2, %[old_out3]      \n\t"
+            "lwc1       $f3,     12(%[filter_coeffs])                       \n\t"
+            "nmsub.s    %[out0], %[out0],             $f1, %[old_out2]      \n\t"
+            "nmsub.s    %[out1], %[out1],             $f1, %[old_out3]      \n\t"
+            "nmsub.s    %[out2], %[out2],             $f3, %[old_out2]      \n\t"
+            "nmsub.s    %[out0], %[out0],             $f0, %[old_out3]      \n\t"
+            "nmsub.s    %[out3], %[out3],             $f3, %[old_out3]      \n\t"
+            "nmsub.s    %[out1], %[out1],             $f3, %[old_out1]      \n\t"
+            "nmsub.s    %[out0], %[out0],             $f3, %[old_out0]      \n\t"
+
+            : [out0]"+f"(out0), [out1]"+f"(out1),
+              [out2]"+f"(out2), [out3]"+f"(out3)
+            : [old_out0]"f"(old_out0), [old_out1]"f"(old_out1),
+              [old_out2]"f"(old_out2), [old_out3]"f"(old_out3),
+              [filter_coeffs]"r"(filter_coeffs)
+            : "$f0", "$f1", "$f2", "$f3", "$f4", "memory"
+        );
+
+        for (i = 5; i <= filter_length; i += 2) {
+            __asm__ volatile(
+                "lwc1    %[old_out3], -20(%[p_out])                         \n\t"
+                "lwc1    $f5,         16(%[p_filter_coeffs])                \n\t"
+                "addiu   %[p_out],    -8                                    \n\t"
+                "addiu   %[p_filter_coeffs], 8                              \n\t"
+                "nmsub.s %[out1],     %[out1],      $f5, %[old_out0]        \n\t"
+                "nmsub.s %[out3],     %[out3],      $f5, %[old_out2]        \n\t"
+                "lwc1    $f4,         12(%[p_filter_coeffs])                \n\t"
+                "lwc1    %[old_out2], -16(%[p_out])                         \n\t"
+                "nmsub.s %[out0],     %[out0],      $f5, %[old_out3]        \n\t"
+                "nmsub.s %[out2],     %[out2],      $f5, %[old_out1]        \n\t"
+                "nmsub.s %[out1],     %[out1],      $f4, %[old_out3]        \n\t"
+                "nmsub.s %[out3],     %[out3],      $f4, %[old_out1]        \n\t"
+                "mov.s   %[old_out1], %[old_out3]                           \n\t"
+                "nmsub.s %[out0],     %[out0],      $f4, %[old_out2]        \n\t"
+                "nmsub.s %[out2],     %[out2],      $f4, %[old_out0]        \n\t"
+
+                : [out0]"+f"(out0), [out1]"+f"(out1),
+                  [out2]"+f"(out2), [out3]"+f"(out3), [old_out0]"+f"(old_out0),
+                  [old_out1]"+f"(old_out1), [old_out2]"+f"(old_out2),
+                  [old_out3]"+f"(old_out3),[p_filter_coeffs]"+r"(p_filter_coeffs),
+                  [p_out]"+r"(p_out)
+                :
+                : "$f4", "$f5", "memory"
+            );
+            FFSWAP(float, old_out0, old_out2);
+        }
+
+        __asm__ volatile(
+            "nmsub.s    %[out3], %[out3], %[a], %[out2]                     \n\t"
+            "nmsub.s    %[out2], %[out2], %[a], %[out1]                     \n\t"
+            "nmsub.s    %[out3], %[out3], %[b], %[out1]                     \n\t"
+            "nmsub.s    %[out1], %[out1], %[a], %[out0]                     \n\t"
+            "nmsub.s    %[out2], %[out2], %[b], %[out0]                     \n\t"
+            "nmsub.s    %[out3], %[out3], %[c], %[out0]                     \n\t"
+
+            : [out0]"+f"(out0), [out1]"+f"(out1),
+              [out2]"+f"(out2), [out3]"+f"(out3)
+            : [a]"f"(a), [b]"f"(b), [c]"f"(c)
+        );
+
+        out[0] = out0;
+        out[1] = out1;
+        out[2] = out2;
+        out[3] = out3;
+
+        old_out0 = out0;
+        old_out1 = out1;
+        old_out2 = out2;
+        old_out3 = out3;
+
+        out += 4;
+        in  += 4;
+    }
+
+    out -= n;
+    in -= n;
+    for (; n < buffer_length; n++) {
+        float out_val, out_val_i, fc_val;
+        p_filter_coeffs = filter_coeffs;
+        p_out = &out[n];
+        out_val = in[n];
+        for (i = 1; i <= filter_length; i++) {
+            __asm__ volatile(
+                "lwc1    %[fc_val],          0(%[p_filter_coeffs])                        \n\t"
+                "lwc1    %[out_val_i],       -4(%[p_out])                                 \n\t"
+                "addiu   %[p_filter_coeffs], 4                                            \n\t"
+                "addiu   %[p_out],           -4                                           \n\t"
+                "nmsub.s %[out_val],         %[out_val],          %[fc_val], %[out_val_i] \n\t"
+
+                : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val),
+                  [out_val_i]"=&f"(out_val_i), [p_out]"+r"(p_out),
+                  [p_filter_coeffs]"+r"(p_filter_coeffs)
+                :
+                : "memory"
+            );
+        }
+        out[n] = out_val;
+    }
+}
+
+static void ff_celp_lp_zero_synthesis_filterf_mips(float *out,
+                                       const float *filter_coeffs,
+                                       const float *in, int buffer_length,
+                                       int filter_length)
+{
+    int i,n;
+    float sum_out8, sum_out7, sum_out6, sum_out5, sum_out4, fc_val;
+    float sum_out3, sum_out2, sum_out1;
+    const float *p_filter_coeffs, *p_in;
+
+    for (n = 0; n < buffer_length; n+=8) {
+        p_in = &in[n];
+        p_filter_coeffs = filter_coeffs;
+        sum_out8 = in[n+7];
+        sum_out7 = in[n+6];
+        sum_out6 = in[n+5];
+        sum_out5 = in[n+4];
+        sum_out4 = in[n+3];
+        sum_out3 = in[n+2];
+        sum_out2 = in[n+1];
+        sum_out1 = in[n];
+        i = filter_length;
+
+        /* i is always greater than 0
+        * outer loop is unrolled eight times so there is less memory access
+        * inner loop is unrolled two times
+        */
+        __asm__ volatile(
+            "filt_lp_inner%=:                                               \n\t"
+            "lwc1   %[fc_val],   0(%[p_filter_coeffs])                      \n\t"
+            "lwc1   $f7,         6*4(%[p_in])                               \n\t"
+            "lwc1   $f6,         5*4(%[p_in])                               \n\t"
+            "lwc1   $f5,         4*4(%[p_in])                               \n\t"
+            "lwc1   $f4,         3*4(%[p_in])                               \n\t"
+            "lwc1   $f3,         2*4(%[p_in])                               \n\t"
+            "lwc1   $f2,         4(%[p_in])                                 \n\t"
+            "lwc1   $f1,         0(%[p_in])                                 \n\t"
+            "lwc1   $f0,         -4(%[p_in])                                \n\t"
+            "addiu  %[i],        -2                                         \n\t"
+            "madd.s %[sum_out8], %[sum_out8],          %[fc_val], $f7       \n\t"
+            "madd.s %[sum_out7], %[sum_out7],          %[fc_val], $f6       \n\t"
+            "madd.s %[sum_out6], %[sum_out6],          %[fc_val], $f5       \n\t"
+            "madd.s %[sum_out5], %[sum_out5],          %[fc_val], $f4       \n\t"
+            "madd.s %[sum_out4], %[sum_out4],          %[fc_val], $f3       \n\t"
+            "madd.s %[sum_out3], %[sum_out3],          %[fc_val], $f2       \n\t"
+            "madd.s %[sum_out2], %[sum_out2],          %[fc_val], $f1       \n\t"
+            "madd.s %[sum_out1], %[sum_out1],          %[fc_val], $f0       \n\t"
+            "lwc1   %[fc_val],   4(%[p_filter_coeffs])                      \n\t"
+            "lwc1   $f7,         -8(%[p_in])                                \n\t"
+            "addiu  %[p_filter_coeffs], 8                                   \n\t"
+            "addiu  %[p_in],     -8                                         \n\t"
+            "madd.s %[sum_out8], %[sum_out8],          %[fc_val], $f6       \n\t"
+            "madd.s %[sum_out7], %[sum_out7],          %[fc_val], $f5       \n\t"
+            "madd.s %[sum_out6], %[sum_out6],          %[fc_val], $f4       \n\t"
+            "madd.s %[sum_out5], %[sum_out5],          %[fc_val], $f3       \n\t"
+            "madd.s %[sum_out4], %[sum_out4],          %[fc_val], $f2       \n\t"
+            "madd.s %[sum_out3], %[sum_out3],          %[fc_val], $f1       \n\t"
+            "madd.s %[sum_out2], %[sum_out2],          %[fc_val], $f0       \n\t"
+            "madd.s %[sum_out1], %[sum_out1],          %[fc_val], $f7       \n\t"
+            "bgtz   %[i],        filt_lp_inner%=                            \n\t"
+
+            : [sum_out8]"+f"(sum_out8), [sum_out7]"+f"(sum_out7),
+              [sum_out6]"+f"(sum_out6), [sum_out5]"+f"(sum_out5),
+              [sum_out4]"+f"(sum_out4), [sum_out3]"+f"(sum_out3),
+              [sum_out2]"+f"(sum_out2), [sum_out1]"+f"(sum_out1),
+              [fc_val]"=&f"(fc_val), [p_filter_coeffs]"+r"(p_filter_coeffs),
+              [p_in]"+r"(p_in), [i]"+r"(i)
+            :
+            : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "memory"
+        );
+
+        out[n+7] = sum_out8;
+        out[n+6] = sum_out7;
+        out[n+5] = sum_out6;
+        out[n+4] = sum_out5;
+        out[n+3] = sum_out4;
+        out[n+2] = sum_out3;
+        out[n+1] = sum_out2;
+        out[n] = sum_out1;
+    }
+}
+#endif /* HAVE_INLINE_ASM */
+
+void ff_celp_filter_init_mips(CELPFContext *c)
+{
+#if HAVE_INLINE_ASM
+    c->celp_lp_synthesis_filterf        = ff_celp_lp_synthesis_filterf_mips;
+    c->celp_lp_zero_synthesis_filterf   = ff_celp_lp_zero_synthesis_filterf_mips;
+#endif
+}
diff --git a/libavcodec/mips/celp_math_mips.c b/libavcodec/mips/celp_math_mips.c
new file mode 100644
index 0000000..d7ccc23
--- /dev/null
+++ b/libavcodec/mips/celp_math_mips.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic@mips.com)
+ *
+ * Math operations optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/celp_math.c
+ */
+#include "config.h"
+#include "libavcodec/celp_math.h"
+
+#if HAVE_INLINE_ASM
+static float ff_dot_productf_mips(const float* a, const float* b,
+                                              int length)
+{
+    float sum;
+    const float* a_end = a + length;
+
+    __asm__ volatile (
+        "mtc1   $zero,      %[sum]                              \n\t"
+        "blez   %[length],  ff_dot_productf_end%=               \n\t"
+        "ff_dot_productf_madd%=:                                \n\t"
+        "lwc1   $f2,        0(%[a])                             \n\t"
+        "lwc1   $f1,        0(%[b])                             \n\t"
+        "addiu  %[a],       %[a],      4                        \n\t"
+        "addiu  %[b],       %[b],      4                        \n\t"
+        "madd.s %[sum],     %[sum],    $f1, $f2                 \n\t"
+        "bne   %[a],        %[a_end],  ff_dot_productf_madd%=   \n\t"
+        "ff_dot_productf_end%=:                                 \n\t"
+
+        : [sum] "=&f" (sum), [a] "+r" (a), [b] "+r" (b)
+        : [a_end]"r"(a_end), [length] "r" (length)
+        : "$f1", "$f2", "memory"
+    );
+    return sum;
+}
+#endif /* HAVE_INLINE_ASM */
+
+void ff_celp_math_init_mips(CELPMContext *c)
+{
+#if HAVE_INLINE_ASM
+    c->dot_productf = ff_dot_productf_mips;
+#endif
+}
diff --git a/libavcodec/mips/compute_antialias_fixed.h b/libavcodec/mips/compute_antialias_fixed.h
new file mode 100644
index 0000000..a967f67
--- /dev/null
+++ b/libavcodec/mips/compute_antialias_fixed.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Bojan Zivkovic (bojan@mips.com)
+ *
+ * Compute antialias function optimised for MIPS fixed-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/mpegaudiodec.c
+ */
+
+#ifndef AVCODEC_MIPS_COMPUTE_ANTIALIAS_FIXED_H
+#define AVCODEC_MIPS_COMPUTE_ANTIALIAS_FIXED_H
+
+#if HAVE_INLINE_ASM
+static void compute_antialias_mips_fixed(MPADecodeContext *s,
+                                        GranuleDef *g)
+{
+    int32_t *ptr, *csa;
+    int n, i;
+    int MAX_lo = 0xffffffff;
+
+    /* we antialias only "long" bands */
+    if (g->block_type == 2) {
+        if (!g->switch_point)
+            return;
+        /* XXX: check this for 8000Hz case */
+        n = 1;
+    } else {
+        n = SBLIMIT - 1;
+    }
+
+
+    ptr = g->sb_hybrid + 18;
+
+    for(i = n;i > 0;i--) {
+        int tmp0, tmp1, tmp2, tmp00, tmp11;
+        int temp_reg1, temp_reg2, temp_reg3, temp_reg4, temp_reg5, temp_reg6;
+        csa = &csa_table[0][0];
+
+        /**
+         * instructions are scheduled to minimize pipeline stall.
+         */
+        __asm__ volatile (
+            "lw   %[tmp0],      -1*4(%[ptr])                            \n\t"
+            "lw   %[tmp1],      0*4(%[ptr])                             \n\t"
+            "lw   %[temp_reg1], 0*4(%[csa])                             \n\t"
+            "lw   %[temp_reg2], 2*4(%[csa])                             \n\t"
+            "add  %[tmp2],      %[tmp0],      %[tmp1]                   \n\t"
+            "lw   %[temp_reg3], 3*4(%[csa])                             \n\t"
+            "mult $ac0,         %[tmp2],      %[temp_reg1]              \n\t"
+            "mult $ac1,         %[tmp2],      %[temp_reg1]              \n\t"
+            "lw   %[tmp00],     -2*4(%[ptr])                            \n\t"
+            "lw   %[tmp11],     1*4(%[ptr])                             \n\t"
+            "lw   %[temp_reg4], 4*4(%[csa])                             \n\t"
+            "mtlo %[MAX_lo],    $ac0                                    \n\t"
+            "mtlo $zero,        $ac1                                    \n\t"
+            "msub $ac0,         %[tmp1],      %[temp_reg2]              \n\t"
+            "madd $ac1,         %[tmp0],      %[temp_reg3]              \n\t"
+            "add  %[tmp2],      %[tmp00],     %[tmp11]                  \n\t"
+            "lw   %[temp_reg5], 6*4(%[csa])                             \n\t"
+            "mult $ac2,         %[tmp2],      %[temp_reg4]              \n\t"
+            "mult $ac3,         %[tmp2],      %[temp_reg4]              \n\t"
+            "mfhi %[temp_reg1], $ac0                                    \n\t"
+            "mfhi %[temp_reg2], $ac1                                    \n\t"
+            "lw   %[temp_reg6], 7*4(%[csa])                             \n\t"
+            "mtlo %[MAX_lo],    $ac2                                    \n\t"
+            "msub $ac2,         %[tmp11],     %[temp_reg5]              \n\t"
+            "mtlo $zero,        $ac3                                    \n\t"
+            "madd $ac3,         %[tmp00],     %[temp_reg6]              \n\t"
+            "sll  %[temp_reg1], %[temp_reg1], 2                         \n\t"
+            "sw   %[temp_reg1], -1*4(%[ptr])                            \n\t"
+            "mfhi %[temp_reg4], $ac2                                    \n\t"
+            "sll  %[temp_reg2], %[temp_reg2], 2                         \n\t"
+            "mfhi %[temp_reg5], $ac3                                    \n\t"
+            "sw   %[temp_reg2], 0*4(%[ptr])                             \n\t"
+            "lw   %[tmp0],      -3*4(%[ptr])                            \n\t"
+            "lw   %[tmp1],      2*4(%[ptr])                             \n\t"
+            "lw   %[temp_reg1], 8*4(%[csa])                             \n\t"
+            "sll  %[temp_reg4], %[temp_reg4], 2                         \n\t"
+            "add  %[tmp2],      %[tmp0],      %[tmp1]                   \n\t"
+            "sll  %[temp_reg5], %[temp_reg5], 2                         \n\t"
+            "mult $ac0,         %[tmp2],      %[temp_reg1]              \n\t"
+            "mult $ac1,         %[tmp2],      %[temp_reg1]              \n\t"
+            "sw   %[temp_reg4], -2*4(%[ptr])                            \n\t"
+            "sw   %[temp_reg5], 1*4(%[ptr])                             \n\t"
+            "lw   %[temp_reg2], 10*4(%[csa])                            \n\t"
+            "mtlo %[MAX_lo],    $ac0                                    \n\t"
+            "lw   %[temp_reg3], 11*4(%[csa])                            \n\t"
+            "msub $ac0,         %[tmp1],      %[temp_reg2]              \n\t"
+            "mtlo $zero,        $ac1                                    \n\t"
+            "madd $ac1,         %[tmp0],      %[temp_reg3]              \n\t"
+            "lw   %[tmp00],     -4*4(%[ptr])                            \n\t"
+            "lw   %[tmp11],     3*4(%[ptr])                             \n\t"
+            "mfhi %[temp_reg1], $ac0                                    \n\t"
+            "lw   %[temp_reg4], 12*4(%[csa])                            \n\t"
+            "mfhi %[temp_reg2], $ac1                                    \n\t"
+            "add  %[tmp2],      %[tmp00],     %[tmp11]                  \n\t"
+            "mult $ac2,         %[tmp2],      %[temp_reg4]              \n\t"
+            "mult $ac3,         %[tmp2],      %[temp_reg4]              \n\t"
+            "lw   %[temp_reg5], 14*4(%[csa])                            \n\t"
+            "lw   %[temp_reg6], 15*4(%[csa])                            \n\t"
+            "sll  %[temp_reg1], %[temp_reg1], 2                         \n\t"
+            "mtlo %[MAX_lo],    $ac2                                    \n\t"
+            "msub $ac2,         %[tmp11],     %[temp_reg5]              \n\t"
+            "mtlo $zero,        $ac3                                    \n\t"
+            "madd $ac3,         %[tmp00],     %[temp_reg6]              \n\t"
+            "sll  %[temp_reg2], %[temp_reg2], 2                         \n\t"
+            "sw   %[temp_reg1], -3*4(%[ptr])                            \n\t"
+            "mfhi %[temp_reg4], $ac2                                    \n\t"
+            "sw   %[temp_reg2], 2*4(%[ptr])                             \n\t"
+            "mfhi %[temp_reg5], $ac3                                    \n\t"
+            "lw   %[tmp0],      -5*4(%[ptr])                            \n\t"
+            "lw   %[tmp1],      4*4(%[ptr])                             \n\t"
+            "lw   %[temp_reg1], 16*4(%[csa])                            \n\t"
+            "lw   %[temp_reg2], 18*4(%[csa])                            \n\t"
+            "add  %[tmp2],      %[tmp0],      %[tmp1]                   \n\t"
+            "lw   %[temp_reg3], 19*4(%[csa])                            \n\t"
+            "mult $ac0,         %[tmp2],      %[temp_reg1]              \n\t"
+            "mult $ac1,         %[tmp2],      %[temp_reg1]              \n\t"
+            "sll  %[temp_reg4], %[temp_reg4], 2                         \n\t"
+            "sll  %[temp_reg5], %[temp_reg5], 2                         \n\t"
+            "sw   %[temp_reg4], -4*4(%[ptr])                            \n\t"
+            "mtlo %[MAX_lo],    $ac0                                    \n\t"
+            "msub $ac0,         %[tmp1],      %[temp_reg2]              \n\t"
+            "mtlo $zero,        $ac1                                    \n\t"
+            "madd $ac1,         %[tmp0],      %[temp_reg3]              \n\t"
+            "sw   %[temp_reg5], 3*4(%[ptr])                             \n\t"
+            "lw   %[tmp00],     -6*4(%[ptr])                            \n\t"
+            "mfhi %[temp_reg1], $ac0                                    \n\t"
+            "lw   %[tmp11],     5*4(%[ptr])                             \n\t"
+            "mfhi %[temp_reg2], $ac1                                    \n\t"
+            "lw   %[temp_reg4], 20*4(%[csa])                            \n\t"
+            "add  %[tmp2],      %[tmp00],     %[tmp11]                  \n\t"
+            "lw   %[temp_reg5], 22*4(%[csa])                            \n\t"
+            "mult $ac2,         %[tmp2],      %[temp_reg4]              \n\t"
+            "mult $ac3,         %[tmp2],      %[temp_reg4]              \n\t"
+            "lw   %[temp_reg6], 23*4(%[csa])                            \n\t"
+            "sll  %[temp_reg1], %[temp_reg1], 2                         \n\t"
+            "sll  %[temp_reg2], %[temp_reg2], 2                         \n\t"
+            "mtlo %[MAX_lo],    $ac2                                    \n\t"
+            "msub $ac2,         %[tmp11],     %[temp_reg5]              \n\t"
+            "mtlo $zero,        $ac3                                    \n\t"
+            "madd $ac3,         %[tmp00],     %[temp_reg6]              \n\t"
+            "sw   %[temp_reg1], -5*4(%[ptr])                            \n\t"
+            "sw   %[temp_reg2], 4*4(%[ptr])                             \n\t"
+            "mfhi %[temp_reg4], $ac2                                    \n\t"
+            "lw   %[tmp0],      -7*4(%[ptr])                            \n\t"
+            "mfhi %[temp_reg5], $ac3                                    \n\t"
+            "lw   %[tmp1],      6*4(%[ptr])                             \n\t"
+            "lw   %[temp_reg1], 24*4(%[csa])                            \n\t"
+            "lw   %[temp_reg2], 26*4(%[csa])                            \n\t"
+            "add  %[tmp2],      %[tmp0],      %[tmp1]                   \n\t"
+            "lw   %[temp_reg3], 27*4(%[csa])                            \n\t"
+            "mult $ac0,         %[tmp2],      %[temp_reg1]              \n\t"
+            "mult $ac1,         %[tmp2],      %[temp_reg1]              \n\t"
+            "sll  %[temp_reg4], %[temp_reg4], 2                         \n\t"
+            "sll  %[temp_reg5], %[temp_reg5], 2                         \n\t"
+            "sw   %[temp_reg4], -6*4(%[ptr])                            \n\t"
+            "mtlo %[MAX_lo],    $ac0                                    \n\t"
+            "msub $ac0,         %[tmp1],      %[temp_reg2]              \n\t"
+            "mtlo $zero,        $ac1                                    \n\t"
+            "madd $ac1,         %[tmp0],      %[temp_reg3]              \n\t"
+            "sw   %[temp_reg5], 5*4(%[ptr])                             \n\t"
+            "lw   %[tmp00],     -8*4(%[ptr])                            \n\t"
+            "mfhi %[temp_reg1], $ac0                                    \n\t"
+            "lw   %[tmp11],     7*4(%[ptr])                             \n\t"
+            "mfhi %[temp_reg2], $ac1                                    \n\t"
+            "lw   %[temp_reg4], 28*4(%[csa])                            \n\t"
+            "add  %[tmp2],      %[tmp00],     %[tmp11]                  \n\t"
+            "lw   %[temp_reg5], 30*4(%[csa])                            \n\t"
+            "mult $ac2,         %[tmp2],      %[temp_reg4]              \n\t"
+            "mult $ac3,         %[tmp2],      %[temp_reg4]              \n\t"
+            "lw   %[temp_reg6], 31*4(%[csa])                            \n\t"
+            "sll  %[temp_reg1], %[temp_reg1], 2                         \n\t"
+            "sll  %[temp_reg2], %[temp_reg2], 2                         \n\t"
+            "mtlo %[MAX_lo],    $ac2                                    \n\t"
+            "msub $ac2,         %[tmp11],     %[temp_reg5]              \n\t"
+            "mtlo $zero,        $ac3                                    \n\t"
+            "madd $ac3,         %[tmp00],     %[temp_reg6]              \n\t"
+            "sw   %[temp_reg1], -7*4(%[ptr])                            \n\t"
+            "sw   %[temp_reg2], 6*4(%[ptr])                             \n\t"
+            "mfhi %[temp_reg4], $ac2                                    \n\t"
+            "mfhi %[temp_reg5], $ac3                                    \n\t"
+            "sll  %[temp_reg4], %[temp_reg4], 2                         \n\t"
+            "sll  %[temp_reg5], %[temp_reg5], 2                         \n\t"
+            "sw   %[temp_reg4], -8*4(%[ptr])                            \n\t"
+            "sw   %[temp_reg5], 7*4(%[ptr])                             \n\t"
+
+            : [tmp0] "=&r" (tmp0), [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
+              [tmp00] "=&r" (tmp00), [tmp11] "=&r" (tmp11),
+              [temp_reg1] "=&r" (temp_reg1), [temp_reg2] "=&r" (temp_reg2),
+              [temp_reg3] "=&r" (temp_reg3), [temp_reg4] "=&r" (temp_reg4),
+              [temp_reg5] "=&r" (temp_reg5), [temp_reg6] "=&r" (temp_reg6)
+            : [csa] "r" (csa), [ptr] "r" (ptr),
+              [MAX_lo] "r" (MAX_lo)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+         );
+
+        ptr += 18;
+    }
+}
+#define compute_antialias compute_antialias_mips_fixed
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVCODEC_MIPS_COMPUTE_ANTIALIAS_FIXED_H */
diff --git a/libavcodec/mips/compute_antialias_float.h b/libavcodec/mips/compute_antialias_float.h
new file mode 100644
index 0000000..0f6f03f
--- /dev/null
+++ b/libavcodec/mips/compute_antialias_float.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Bojan Zivkovic (bojan@mips.com)
+ *
+ * Compute antialias function optimised for MIPS floating-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/mpegaudiodec.c
+ */
+
+#ifndef AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H
+#define AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H
+
+#if HAVE_INLINE_ASM
+static void compute_antialias_mips_float(MPADecodeContext *s,
+                                        GranuleDef *g)
+{
+    float *ptr, *ptr_end;
+    float *csa = &csa_table[0][0];
+    int n;
+    /* temporary variables */
+    float in1, in2, in3, in4, in5, in6, in7, in8;
+    float out1, out2, out3, out4;
+
+    ptr = g->sb_hybrid + 18;
+    /* we antialias only "long" bands */
+    if (g->block_type == 2) {
+        if (!g->switch_point)
+            return;
+        /* XXX: check this for 8000Hz case */
+        n = 1;
+        ptr_end = ptr + 18;
+    } else {
+        n = 31;
+        ptr_end = ptr + 558;
+    }
+
+    /**
+    * instructions are scheduled to minimize pipeline stall.
+    */
+
+    __asm__ volatile (
+        "compute_antialias_float_loop%=:                                \t\n"
+        "lwc1    %[in1],  -1*4(%[ptr])                                  \t\n"
+        "lwc1    %[in2],  0(%[csa])                                     \t\n"
+        "lwc1    %[in3],  1*4(%[csa])                                   \t\n"
+        "lwc1    %[in4],  0(%[ptr])                                     \t\n"
+        "lwc1    %[in5],  -2*4(%[ptr])                                  \t\n"
+        "lwc1    %[in6],  4*4(%[csa])                                   \t\n"
+        "mul.s   %[out1], %[in1],  %[in2]                               \t\n"
+        "mul.s   %[out2], %[in1],  %[in3]                               \t\n"
+        "lwc1    %[in7],  5*4(%[csa])                                   \t\n"
+        "lwc1    %[in8],  1*4(%[ptr])                                   \t\n"
+        "nmsub.s %[out1], %[out1], %[in3], %[in4]                       \t\n"
+        "madd.s  %[out2], %[out2], %[in2], %[in4]                       \t\n"
+        "mul.s   %[out3], %[in5],  %[in6]                               \t\n"
+        "mul.s   %[out4], %[in5],  %[in7]                               \t\n"
+        "lwc1    %[in1],  -3*4(%[ptr])                                  \t\n"
+        "swc1    %[out1], -1*4(%[ptr])                                  \t\n"
+        "swc1    %[out2], 0(%[ptr])                                     \t\n"
+        "nmsub.s %[out3], %[out3], %[in7], %[in8]                       \t\n"
+        "madd.s  %[out4], %[out4], %[in6], %[in8]                       \t\n"
+        "lwc1    %[in2],  8*4(%[csa])                                   \t\n"
+        "swc1    %[out3], -2*4(%[ptr])                                  \t\n"
+        "swc1    %[out4], 1*4(%[ptr])                                   \t\n"
+        "lwc1    %[in3],  9*4(%[csa])                                   \t\n"
+        "lwc1    %[in4],  2*4(%[ptr])                                   \t\n"
+        "mul.s   %[out1], %[in1],  %[in2]                               \t\n"
+        "lwc1    %[in5],  -4*4(%[ptr])                                  \t\n"
+        "lwc1    %[in6],  12*4(%[csa])                                  \t\n"
+        "mul.s   %[out2], %[in1],  %[in3]                               \t\n"
+        "lwc1    %[in7],  13*4(%[csa])                                  \t\n"
+        "nmsub.s %[out1], %[out1], %[in3], %[in4]                       \t\n"
+        "lwc1    %[in8],  3*4(%[ptr])                                   \t\n"
+        "mul.s   %[out3], %[in5],  %[in6]                               \t\n"
+        "madd.s  %[out2], %[out2], %[in2], %[in4]                       \t\n"
+        "mul.s   %[out4], %[in5],  %[in7]                               \t\n"
+        "swc1    %[out1], -3*4(%[ptr])                                  \t\n"
+        "lwc1    %[in1],  -5*4(%[ptr])                                  \t\n"
+        "nmsub.s %[out3], %[out3], %[in7], %[in8]                       \t\n"
+        "swc1    %[out2], 2*4(%[ptr])                                   \t\n"
+        "madd.s  %[out4], %[out4], %[in6], %[in8]                       \t\n"
+        "lwc1    %[in2],  16*4(%[csa])                                  \t\n"
+        "lwc1    %[in3],  17*4(%[csa])                                  \t\n"
+        "swc1    %[out3], -4*4(%[ptr])                                  \t\n"
+        "lwc1    %[in4],  4*4(%[ptr])                                   \t\n"
+        "swc1    %[out4], 3*4(%[ptr])                                   \t\n"
+        "mul.s   %[out1], %[in1],  %[in2]                               \t\n"
+        "mul.s   %[out2], %[in1],  %[in3]                               \t\n"
+        "lwc1    %[in5],  -6*4(%[ptr])                                  \t\n"
+        "lwc1    %[in6],  20*4(%[csa])                                  \t\n"
+        "lwc1    %[in7],  21*4(%[csa])                                  \t\n"
+        "nmsub.s %[out1], %[out1], %[in3], %[in4]                       \t\n"
+        "madd.s  %[out2], %[out2], %[in2], %[in4]                       \t\n"
+        "lwc1    %[in8],  5*4(%[ptr])                                   \t\n"
+        "mul.s   %[out3], %[in5],  %[in6]                               \t\n"
+        "mul.s   %[out4], %[in5],  %[in7]                               \t\n"
+        "swc1    %[out1], -5*4(%[ptr])                                  \t\n"
+        "swc1    %[out2], 4*4(%[ptr])                                   \t\n"
+        "lwc1    %[in1],  -7*4(%[ptr])                                  \t\n"
+        "nmsub.s %[out3], %[out3], %[in7], %[in8]                       \t\n"
+        "madd.s  %[out4], %[out4], %[in6], %[in8]                       \t\n"
+        "lwc1    %[in2],  24*4(%[csa])                                  \t\n"
+        "lwc1    %[in3],  25*4(%[csa])                                  \t\n"
+        "lwc1    %[in4],  6*4(%[ptr])                                   \t\n"
+        "swc1    %[out3], -6*4(%[ptr])                                  \t\n"
+        "swc1    %[out4], 5*4(%[ptr])                                   \t\n"
+        "mul.s   %[out1], %[in1],  %[in2]                               \t\n"
+        "lwc1    %[in5],  -8*4(%[ptr])                                  \t\n"
+        "mul.s   %[out2], %[in1],  %[in3]                               \t\n"
+        "lwc1    %[in6],  28*4(%[csa])                                  \t\n"
+        "lwc1    %[in7],  29*4(%[csa])                                  \t\n"
+        "nmsub.s %[out1], %[out1], %[in3], %[in4]                       \t\n"
+        "lwc1    %[in8],  7*4(%[ptr])                                   \t\n"
+        "madd.s  %[out2], %[out2], %[in2], %[in4]                       \t\n"
+        "mul.s   %[out3], %[in5],  %[in6]                               \t\n"
+        "mul.s   %[out4], %[in5],  %[in7]                               \t\n"
+        "swc1    %[out1], -7*4(%[ptr])                                  \t\n"
+        "swc1    %[out2], 6*4(%[ptr])                                   \t\n"
+        "addiu   %[ptr],  %[ptr],  72                                   \t\n"
+        "nmsub.s %[out3], %[out3], %[in7], %[in8]                       \t\n"
+        "madd.s  %[out4], %[out4], %[in6], %[in8]                       \t\n"
+        "swc1    %[out3], -26*4(%[ptr])                                 \t\n"
+        "swc1    %[out4], -11*4(%[ptr])                                 \t\n"
+        "bne     %[ptr],  %[ptr_end],  compute_antialias_float_loop%=   \t\n"
+
+        : [ptr] "+r" (ptr),
+          [in1] "=&f" (in1), [in2] "=&f" (in2),
+          [in3] "=&f" (in3), [in4] "=&f" (in4),
+          [in5] "=&f" (in5), [in6] "=&f" (in6),
+          [in7] "=&f" (in7), [in8] "=&f" (in8),
+          [out1] "=&f" (out1), [out2] "=&f" (out2),
+          [out3] "=&f" (out3), [out4] "=&f" (out4)
+        : [csa] "r" (csa), [ptr_end] "r" (ptr_end)
+        : "memory"
+    );
+}
+#define compute_antialias compute_antialias_mips_float
+#endif /* HAVE_INLINE_ASM */
+
+#endif /* AVCODEC_MIPS_COMPUTE_ANTIALIAS_FLOAT_H */
diff --git a/libavcodec/mips/fft_mips.c b/libavcodec/mips/fft_mips.c
new file mode 100644
index 0000000..d240f1f
--- /dev/null
+++ b/libavcodec/mips/fft_mips.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Stanislav Ocovaj (socovaj@mips.com)
+ * Author:  Zoran Lukic (zoranl@mips.com)
+ *
+ * Optimized MDCT/IMDCT and FFT transforms
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/fft_table.h"
+
+/**
+ * FFT transform
+ */
+
+#if HAVE_INLINE_ASM
+static void ff_fft_calc_mips(FFTContext *s, FFTComplex *z)
+{
+    int nbits, i, n, num_transforms, offset, step;
+    int n4, n2, n34;
+    FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+    FFTComplex *tmpz;
+    float w_re, w_im;
+    float *w_re_ptr, *w_im_ptr;
+    const int fft_size = (1 << s->nbits);
+    int s_n = s->nbits;
+    int tem1, tem2;
+    float pom,  pom1,  pom2,  pom3;
+    float temp, temp1, temp3, temp4;
+    FFTComplex * tmpz_n2, * tmpz_n34, * tmpz_n4;
+    FFTComplex * tmpz_n2_i, * tmpz_n34_i, * tmpz_n4_i, * tmpz_i;
+
+    /**
+    *num_transforms = (0x2aab >> (16 - s->nbits)) | 1;
+    */
+    __asm__ volatile (
+        "li   %[tem1], 16                                      \n\t"
+        "sub  %[s_n],  %[tem1], %[s_n]                         \n\t"
+        "li   %[tem2], 10923                                   \n\t"
+        "srav %[tem2], %[tem2], %[s_n]                         \n\t"
+        "ori  %[num_t],%[tem2], 1                              \n\t"
+        : [num_t]"=r"(num_transforms), [s_n]"+r"(s_n),
+          [tem1]"=&r"(tem1), [tem2]"=&r"(tem2)
+    );
+
+
+    for (n=0; n<num_transforms; n++) {
+        offset = fft_offsets_lut[n] << 2;
+        tmpz = z + offset;
+
+        tmp1 = tmpz[0].re + tmpz[1].re;
+        tmp5 = tmpz[2].re + tmpz[3].re;
+        tmp2 = tmpz[0].im + tmpz[1].im;
+        tmp6 = tmpz[2].im + tmpz[3].im;
+        tmp3 = tmpz[0].re - tmpz[1].re;
+        tmp8 = tmpz[2].im - tmpz[3].im;
+        tmp4 = tmpz[0].im - tmpz[1].im;
+        tmp7 = tmpz[2].re - tmpz[3].re;
+
+        tmpz[0].re = tmp1 + tmp5;
+        tmpz[2].re = tmp1 - tmp5;
+        tmpz[0].im = tmp2 + tmp6;
+        tmpz[2].im = tmp2 - tmp6;
+        tmpz[1].re = tmp3 + tmp8;
+        tmpz[3].re = tmp3 - tmp8;
+        tmpz[1].im = tmp4 - tmp7;
+        tmpz[3].im = tmp4 + tmp7;
+
+    }
+
+    if (fft_size < 8)
+        return;
+
+    num_transforms = (num_transforms >> 1) | 1;
+
+    for (n=0; n<num_transforms; n++) {
+        offset = fft_offsets_lut[n] << 3;
+        tmpz = z + offset;
+
+        __asm__ volatile (
+            "lwc1  %[tmp1], 32(%[tmpz])                     \n\t"
+            "lwc1  %[pom],  40(%[tmpz])                     \n\t"
+            "lwc1  %[tmp3], 48(%[tmpz])                     \n\t"
+            "lwc1  %[pom1], 56(%[tmpz])                     \n\t"
+            "lwc1  %[tmp2], 36(%[tmpz])                     \n\t"
+            "lwc1  %[pom2], 44(%[tmpz])                     \n\t"
+            "lwc1  %[pom3], 60(%[tmpz])                     \n\t"
+            "lwc1  %[tmp4], 52(%[tmpz])                     \n\t"
+            "add.s %[tmp1], %[tmp1],    %[pom]              \n\t"  // tmp1 = tmpz[4].re + tmpz[5].re;
+            "add.s %[tmp3], %[tmp3],    %[pom1]             \n\t"  // tmp3 = tmpz[6].re + tmpz[7].re;
+            "add.s %[tmp2], %[tmp2],    %[pom2]             \n\t"  // tmp2 = tmpz[4].im + tmpz[5].im;
+            "lwc1  %[pom],  40(%[tmpz])                     \n\t"
+            "add.s %[tmp4], %[tmp4],    %[pom3]             \n\t"  // tmp4 = tmpz[6].im + tmpz[7].im;
+            "add.s %[tmp5], %[tmp1],    %[tmp3]             \n\t"  // tmp5 = tmp1 + tmp3;
+            "sub.s %[tmp7], %[tmp1],    %[tmp3]             \n\t"  // tmp7 = tmp1 - tmp3;
+            "lwc1  %[tmp1], 32(%[tmpz])                     \n\t"
+            "lwc1  %[pom1], 44(%[tmpz])                     \n\t"
+            "add.s %[tmp6], %[tmp2],    %[tmp4]             \n\t"  // tmp6 = tmp2 + tmp4;
+            "sub.s %[tmp8], %[tmp2],    %[tmp4]             \n\t"  // tmp8 = tmp2 - tmp4;
+            "lwc1  %[tmp2], 36(%[tmpz])                     \n\t"
+            "lwc1  %[pom2], 56(%[tmpz])                     \n\t"
+            "lwc1  %[pom3], 60(%[tmpz])                     \n\t"
+            "lwc1  %[tmp3], 48(%[tmpz])                     \n\t"
+            "lwc1  %[tmp4], 52(%[tmpz])                     \n\t"
+            "sub.s %[tmp1], %[tmp1],    %[pom]              \n\t"  // tmp1 = tmpz[4].re - tmpz[5].re;
+            "lwc1  %[pom],  0(%[tmpz])                      \n\t"
+            "sub.s %[tmp2], %[tmp2],    %[pom1]             \n\t"  // tmp2 = tmpz[4].im - tmpz[5].im;
+            "sub.s %[tmp3], %[tmp3],    %[pom2]             \n\t"  // tmp3 = tmpz[6].re - tmpz[7].re;
+            "lwc1  %[pom2], 4(%[tmpz])                      \n\t"
+            "sub.s %[pom1], %[pom],     %[tmp5]             \n\t"
+            "sub.s %[tmp4], %[tmp4],    %[pom3]             \n\t"  // tmp4 = tmpz[6].im - tmpz[7].im;
+            "add.s %[pom3], %[pom],     %[tmp5]             \n\t"
+            "sub.s %[pom],  %[pom2],    %[tmp6]             \n\t"
+            "add.s %[pom2], %[pom2],    %[tmp6]             \n\t"
+            "swc1  %[pom1], 32(%[tmpz])                     \n\t"  // tmpz[4].re = tmpz[0].re - tmp5;
+            "swc1  %[pom3], 0(%[tmpz])                      \n\t"  // tmpz[0].re = tmpz[0].re + tmp5;
+            "swc1  %[pom],  36(%[tmpz])                     \n\t"  // tmpz[4].im = tmpz[0].im - tmp6;
+            "swc1  %[pom2], 4(%[tmpz])                      \n\t"  // tmpz[0].im = tmpz[0].im + tmp6;
+            "lwc1  %[pom1], 16(%[tmpz])                     \n\t"
+            "lwc1  %[pom3], 20(%[tmpz])                     \n\t"
+            "li.s  %[pom],  0.7071067812                    \n\t"  // float pom = 0.7071067812f;
+            "add.s %[temp1],%[tmp1],    %[tmp2]             \n\t"
+            "sub.s %[temp], %[pom1],    %[tmp8]             \n\t"
+            "add.s %[pom2], %[pom3],    %[tmp7]             \n\t"
+            "sub.s %[temp3],%[tmp3],    %[tmp4]             \n\t"
+            "sub.s %[temp4],%[tmp2],    %[tmp1]             \n\t"
+            "swc1  %[temp], 48(%[tmpz])                     \n\t"  // tmpz[6].re = tmpz[2].re - tmp8;
+            "swc1  %[pom2], 52(%[tmpz])                     \n\t"  // tmpz[6].im = tmpz[2].im + tmp7;
+            "add.s %[pom1], %[pom1],    %[tmp8]             \n\t"
+            "sub.s %[pom3], %[pom3],    %[tmp7]             \n\t"
+            "add.s %[tmp3], %[tmp3],    %[tmp4]             \n\t"
+            "mul.s %[tmp5], %[pom],     %[temp1]            \n\t"  // tmp5 = pom * (tmp1 + tmp2);
+            "mul.s %[tmp7], %[pom],     %[temp3]            \n\t"  // tmp7 = pom * (tmp3 - tmp4);
+            "mul.s %[tmp6], %[pom],     %[temp4]            \n\t"  // tmp6 = pom * (tmp2 - tmp1);
+            "mul.s %[tmp8], %[pom],     %[tmp3]             \n\t"  // tmp8 = pom * (tmp3 + tmp4);
+            "swc1  %[pom1], 16(%[tmpz])                     \n\t"  // tmpz[2].re = tmpz[2].re + tmp8;
+            "swc1  %[pom3], 20(%[tmpz])                     \n\t"  // tmpz[2].im = tmpz[2].im - tmp7;
+            "add.s %[tmp1], %[tmp5],    %[tmp7]             \n\t"  // tmp1 = tmp5 + tmp7;
+            "sub.s %[tmp3], %[tmp5],    %[tmp7]             \n\t"  // tmp3 = tmp5 - tmp7;
+            "add.s %[tmp2], %[tmp6],    %[tmp8]             \n\t"  // tmp2 = tmp6 + tmp8;
+            "sub.s %[tmp4], %[tmp6],    %[tmp8]             \n\t"  // tmp4 = tmp6 - tmp8;
+            "lwc1  %[temp], 8(%[tmpz])                      \n\t"
+            "lwc1  %[temp1],12(%[tmpz])                     \n\t"
+            "lwc1  %[pom],  24(%[tmpz])                     \n\t"
+            "lwc1  %[pom2], 28(%[tmpz])                     \n\t"
+            "sub.s %[temp4],%[temp],    %[tmp1]             \n\t"
+            "sub.s %[temp3],%[temp1],   %[tmp2]             \n\t"
+            "add.s %[temp], %[temp],    %[tmp1]             \n\t"
+            "add.s %[temp1],%[temp1],   %[tmp2]             \n\t"
+            "sub.s %[pom1], %[pom],     %[tmp4]             \n\t"
+            "add.s %[pom3], %[pom2],    %[tmp3]             \n\t"
+            "add.s %[pom],  %[pom],     %[tmp4]             \n\t"
+            "sub.s %[pom2], %[pom2],    %[tmp3]             \n\t"
+            "swc1  %[temp4],40(%[tmpz])                     \n\t"  // tmpz[5].re = tmpz[1].re - tmp1;
+            "swc1  %[temp3],44(%[tmpz])                     \n\t"  // tmpz[5].im = tmpz[1].im - tmp2;
+            "swc1  %[temp], 8(%[tmpz])                      \n\t"  // tmpz[1].re = tmpz[1].re + tmp1;
+            "swc1  %[temp1],12(%[tmpz])                     \n\t"  // tmpz[1].im = tmpz[1].im + tmp2;
+            "swc1  %[pom1], 56(%[tmpz])                     \n\t"  // tmpz[7].re = tmpz[3].re - tmp4;
+            "swc1  %[pom3], 60(%[tmpz])                     \n\t"  // tmpz[7].im = tmpz[3].im + tmp3;
+            "swc1  %[pom],  24(%[tmpz])                     \n\t"  // tmpz[3].re = tmpz[3].re + tmp4;
+            "swc1  %[pom2], 28(%[tmpz])                     \n\t"  // tmpz[3].im = tmpz[3].im - tmp3;
+            : [tmp1]"=&f"(tmp1), [pom]"=&f"(pom),   [pom1]"=&f"(pom1), [pom2]"=&f"(pom2),
+              [tmp3]"=&f"(tmp3), [tmp2]"=&f"(tmp2), [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5),  [tmp7]"=&f"(tmp7),
+              [tmp6]"=&f"(tmp6), [tmp8]"=&f"(tmp8), [pom3]"=&f"(pom3),[temp]"=&f"(temp), [temp1]"=&f"(temp1),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4)
+            : [tmpz]"r"(tmpz)
+            : "memory"
+        );
+    }
+
+    step = 1 << (MAX_LOG2_NFFT - 4);
+    n4 = 4;
+
+    for (nbits=4; nbits<=s->nbits; nbits++) {
+        /*
+        * num_transforms = (num_transforms >> 1) | 1;
+        */
+        __asm__ volatile (
+            "sra %[num_t], %[num_t], 1               \n\t"
+            "ori %[num_t], %[num_t], 1               \n\t"
+
+            : [num_t] "+r" (num_transforms)
+        );
+        n2  = 2 * n4;
+        n34 = 3 * n4;
+
+        for (n=0; n<num_transforms; n++) {
+            offset = fft_offsets_lut[n] << nbits;
+            tmpz = z + offset;
+
+            tmpz_n2  = tmpz +  n2;
+            tmpz_n4  = tmpz +  n4;
+            tmpz_n34 = tmpz +  n34;
+
+            __asm__ volatile (
+                "lwc1  %[pom1], 0(%[tmpz_n2])            \n\t"
+                "lwc1  %[pom],  0(%[tmpz_n34])           \n\t"
+                "lwc1  %[pom2], 4(%[tmpz_n2])            \n\t"
+                "lwc1  %[pom3], 4(%[tmpz_n34])           \n\t"
+                "lwc1  %[temp1],0(%[tmpz])               \n\t"
+                "lwc1  %[temp3],4(%[tmpz])               \n\t"
+                "add.s %[tmp5], %[pom1],      %[pom]     \n\t"   //  tmp5 = tmpz[ n2].re + tmpz[n34].re;
+                "sub.s %[tmp1], %[pom1],      %[pom]     \n\t"   //  tmp1 = tmpz[ n2].re - tmpz[n34].re;
+                "add.s %[tmp6], %[pom2],      %[pom3]    \n\t"   //  tmp6 = tmpz[ n2].im + tmpz[n34].im;
+                "sub.s %[tmp2], %[pom2],      %[pom3]    \n\t"   //  tmp2 = tmpz[ n2].im - tmpz[n34].im;
+                "sub.s %[temp], %[temp1],     %[tmp5]    \n\t"
+                "add.s %[temp1],%[temp1],     %[tmp5]    \n\t"
+                "sub.s %[temp4],%[temp3],     %[tmp6]    \n\t"
+                "add.s %[temp3],%[temp3],     %[tmp6]    \n\t"
+                "swc1  %[temp], 0(%[tmpz_n2])            \n\t"   //  tmpz[ n2].re = tmpz[ 0].re - tmp5;
+                "swc1  %[temp1],0(%[tmpz])               \n\t"   //  tmpz[  0].re = tmpz[ 0].re + tmp5;
+                "lwc1  %[pom1], 0(%[tmpz_n4])            \n\t"
+                "swc1  %[temp4],4(%[tmpz_n2])            \n\t"   //  tmpz[ n2].im = tmpz[ 0].im - tmp6;
+                "lwc1  %[temp], 4(%[tmpz_n4])            \n\t"
+                "swc1  %[temp3],4(%[tmpz])               \n\t"   //  tmpz[  0].im = tmpz[ 0].im + tmp6;
+                "sub.s %[pom],  %[pom1],      %[tmp2]    \n\t"
+                "add.s %[pom1], %[pom1],      %[tmp2]    \n\t"
+                "add.s %[temp1],%[temp],      %[tmp1]    \n\t"
+                "sub.s %[temp], %[temp],      %[tmp1]    \n\t"
+                "swc1  %[pom],  0(%[tmpz_n34])           \n\t"   //  tmpz[n34].re = tmpz[n4].re - tmp2;
+                "swc1  %[pom1], 0(%[tmpz_n4])            \n\t"   //  tmpz[ n4].re = tmpz[n4].re + tmp2;
+                "swc1  %[temp1],4(%[tmpz_n34])           \n\t"   //  tmpz[n34].im = tmpz[n4].im + tmp1;
+                "swc1  %[temp], 4(%[tmpz_n4])            \n\t"   //  tmpz[ n4].im = tmpz[n4].im - tmp1;
+                : [tmp5]"=&f"(tmp5),
+                  [tmp1]"=&f"(tmp1), [pom]"=&f"(pom),        [pom1]"=&f"(pom1),        [pom2]"=&f"(pom2),
+                  [tmp2]"=&f"(tmp2), [tmp6]"=&f"(tmp6),          [pom3]"=&f"(pom3),
+                  [temp]"=&f"(temp), [temp1]"=&f"(temp1),     [temp3]"=&f"(temp3),       [temp4]"=&f"(temp4)
+                : [tmpz]"r"(tmpz), [tmpz_n2]"r"(tmpz_n2), [tmpz_n34]"r"(tmpz_n34), [tmpz_n4]"r"(tmpz_n4)
+                : "memory"
+            );
+
+            w_re_ptr = (float*)(ff_cos_65536 + step);
+            w_im_ptr = (float*)(ff_cos_65536 + MAX_FFT_SIZE/4 - step);
+
+            for (i=1; i<n4; i++) {
+                w_re = w_re_ptr[0];
+                w_im = w_im_ptr[0];
+                tmpz_n2_i = tmpz_n2  + i;
+                tmpz_n4_i = tmpz_n4  + i;
+                tmpz_n34_i= tmpz_n34 + i;
+                tmpz_i    = tmpz     + i;
+
+                __asm__ volatile (
+                    "lwc1     %[temp],  0(%[tmpz_n2_i])               \n\t"
+                    "lwc1     %[temp1], 4(%[tmpz_n2_i])               \n\t"
+                    "lwc1     %[pom],   0(%[tmpz_n34_i])              \n\t"
+                    "lwc1     %[pom1],  4(%[tmpz_n34_i])              \n\t"
+                    "mul.s    %[temp3], %[w_im],    %[temp]           \n\t"
+                    "mul.s    %[temp4], %[w_im],    %[temp1]          \n\t"
+                    "mul.s    %[pom2],  %[w_im],    %[pom1]           \n\t"
+                    "mul.s    %[pom3],  %[w_im],    %[pom]            \n\t"
+                    "msub.s   %[tmp2],  %[temp3],   %[w_re], %[temp1] \n\t"  // tmp2 = w_re * tmpz[ n2+i].im - w_im * tmpz[ n2+i].re;
+                    "madd.s   %[tmp1],  %[temp4],   %[w_re], %[temp]  \n\t"  // tmp1 = w_re * tmpz[ n2+i].re + w_im * tmpz[ n2+i].im;
+                    "msub.s   %[tmp3],  %[pom2],    %[w_re], %[pom]   \n\t"  // tmp3 = w_re * tmpz[n34+i].re - w_im * tmpz[n34+i].im;
+                    "madd.s   %[tmp4],  %[pom3],    %[w_re], %[pom1]  \n\t"  // tmp4 = w_re * tmpz[n34+i].im + w_im * tmpz[n34+i].re;
+                    "lwc1     %[temp],  0(%[tmpz_i])                  \n\t"
+                    "lwc1     %[pom],   4(%[tmpz_i])                  \n\t"
+                    "add.s    %[tmp5],  %[tmp1],    %[tmp3]           \n\t"  // tmp5 = tmp1 + tmp3;
+                    "sub.s    %[tmp1],  %[tmp1],    %[tmp3]           \n\t"  // tmp1 = tmp1 - tmp3;
+                    "add.s    %[tmp6],  %[tmp2],    %[tmp4]           \n\t"  // tmp6 = tmp2 + tmp4;
+                    "sub.s    %[tmp2],  %[tmp2],    %[tmp4]           \n\t"  // tmp2 = tmp2 - tmp4;
+                    "sub.s    %[temp1], %[temp],    %[tmp5]           \n\t"
+                    "add.s    %[temp],  %[temp],    %[tmp5]           \n\t"
+                    "sub.s    %[pom1],  %[pom],     %[tmp6]           \n\t"
+                    "add.s    %[pom],   %[pom],     %[tmp6]           \n\t"
+                    "lwc1     %[temp3], 0(%[tmpz_n4_i])               \n\t"
+                    "lwc1     %[pom2],  4(%[tmpz_n4_i])               \n\t"
+                    "swc1     %[temp1], 0(%[tmpz_n2_i])               \n\t"  // tmpz[ n2+i].re = tmpz[   i].re - tmp5;
+                    "swc1     %[temp],  0(%[tmpz_i])                  \n\t"  // tmpz[    i].re = tmpz[   i].re + tmp5;
+                    "swc1     %[pom1],  4(%[tmpz_n2_i])               \n\t"  // tmpz[ n2+i].im = tmpz[   i].im - tmp6;
+                    "swc1     %[pom] ,  4(%[tmpz_i])                  \n\t"  // tmpz[    i].im = tmpz[   i].im + tmp6;
+                    "sub.s    %[temp4], %[temp3],   %[tmp2]           \n\t"
+                    "add.s    %[pom3],  %[pom2],    %[tmp1]           \n\t"
+                    "add.s    %[temp3], %[temp3],   %[tmp2]           \n\t"
+                    "sub.s    %[pom2],  %[pom2],    %[tmp1]           \n\t"
+                    "swc1     %[temp4], 0(%[tmpz_n34_i])              \n\t"  // tmpz[n34+i].re = tmpz[n4+i].re - tmp2;
+                    "swc1     %[pom3],  4(%[tmpz_n34_i])              \n\t"  // tmpz[n34+i].im = tmpz[n4+i].im + tmp1;
+                    "swc1     %[temp3], 0(%[tmpz_n4_i])               \n\t"  // tmpz[ n4+i].re = tmpz[n4+i].re + tmp2;
+                    "swc1     %[pom2],  4(%[tmpz_n4_i])               \n\t"  // tmpz[ n4+i].im = tmpz[n4+i].im - tmp1;
+                    : [tmp1]"=&f"(tmp1), [tmp2]"=&f" (tmp2), [temp]"=&f"(temp), [tmp3]"=&f"(tmp3),
+                      [tmp4]"=&f"(tmp4), [tmp5]"=&f"(tmp5), [tmp6]"=&f"(tmp6),
+                      [temp1]"=&f"(temp1), [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
+                      [pom]"=&f"(pom), [pom1]"=&f"(pom1), [pom2]"=&f"(pom2), [pom3]"=&f"(pom3)
+                    : [w_re]"f"(w_re), [w_im]"f"(w_im),
+                      [tmpz_i]"r"(tmpz_i),[tmpz_n2_i]"r"(tmpz_n2_i),
+                      [tmpz_n34_i]"r"(tmpz_n34_i), [tmpz_n4_i]"r"(tmpz_n4_i)
+                    : "memory"
+                );
+                w_re_ptr += step;
+                w_im_ptr -= step;
+            }
+        }
+        step >>= 1;
+        n4   <<= 1;
+    }
+}
+
+/**
+ * MDCT/IMDCT transforms.
+ */
+
+static void ff_imdct_half_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2, *in3, *in4;
+    FFTComplex *z = (FFTComplex *)output;
+
+    int j1;
+    const float *tcos1, *tsin1, *tcos2, *tsin2;
+    float temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8,
+        temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
+    FFTComplex *z1, *z2;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    in3 = input + 2;
+    in4 = input + n2 - 3;
+
+    tcos1 = tcos;
+    tsin1 = tsin;
+
+    /* n4 = 64 or 128 */
+    for(k = 0; k < n4; k += 2) {
+        j  = revtab[k    ];
+        j1 = revtab[k + 1];
+
+        __asm__ volatile (
+            "lwc1           %[temp1],       0(%[in2])                           \t\n"
+            "lwc1           %[temp2],       0(%[tcos1])                         \t\n"
+            "lwc1           %[temp3],       0(%[tsin1])                         \t\n"
+            "lwc1           %[temp4],       0(%[in1])                           \t\n"
+            "lwc1           %[temp5],       0(%[in4])                           \t\n"
+            "mul.s          %[temp9],       %[temp1],   %[temp2]                \t\n"
+            "mul.s          %[temp10],      %[temp1],   %[temp3]                \t\n"
+            "lwc1           %[temp6],       4(%[tcos1])                         \t\n"
+            "lwc1           %[temp7],       4(%[tsin1])                         \t\n"
+            "nmsub.s        %[temp9],       %[temp9],   %[temp4],   %[temp3]    \t\n"
+            "madd.s         %[temp10],      %[temp10],  %[temp4],   %[temp2]    \t\n"
+            "mul.s          %[temp11],      %[temp5],   %[temp6]                \t\n"
+            "mul.s          %[temp12],      %[temp5],   %[temp7]                \t\n"
+            "lwc1           %[temp8],       0(%[in3])                           \t\n"
+            "addiu          %[tcos1],       %[tcos1],   8                       \t\n"
+            "addiu          %[tsin1],       %[tsin1],   8                       \t\n"
+            "addiu          %[in1],         %[in1],     16                      \t\n"
+            "nmsub.s        %[temp11],      %[temp11],  %[temp8],   %[temp7]    \t\n"
+            "madd.s         %[temp12],      %[temp12],  %[temp8],   %[temp6]    \t\n"
+            "addiu          %[in2],         %[in2],     -16                     \t\n"
+            "addiu          %[in3],         %[in3],     16                      \t\n"
+            "addiu          %[in4],         %[in4],     -16                     \t\n"
+
+            : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
+              [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
+              [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10),
+              [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
+              [tsin1]"+r"(tsin1), [tcos1]"+r"(tcos1),
+              [in1]"+r"(in1), [in2]"+r"(in2),
+              [in3]"+r"(in3), [in4]"+r"(in4)
+            :
+            : "memory"
+        );
+
+        z[j ].re = temp9;
+        z[j ].im = temp10;
+        z[j1].re = temp11;
+        z[j1].im = temp12;
+    }
+
+    s->fft_calc(s, z);
+
+    /* post rotation + reordering */
+    /* n8 = 32 or 64 */
+    for(k = 0; k < n8; k += 2) {
+        tcos1 = &tcos[n8 - k - 2];
+        tsin1 = &tsin[n8 - k - 2];
+        tcos2 = &tcos[n8 + k];
+        tsin2 = &tsin[n8 + k];
+        z1 = &z[n8 - k - 2];
+        z2 = &z[n8 + k    ];
+
+        __asm__ volatile (
+            "lwc1       %[temp1],   12(%[z1])                           \t\n"
+            "lwc1       %[temp2],   4(%[tsin1])                         \t\n"
+            "lwc1       %[temp3],   4(%[tcos1])                         \t\n"
+            "lwc1       %[temp4],   8(%[z1])                            \t\n"
+            "lwc1       %[temp5],   4(%[z1])                            \t\n"
+            "mul.s      %[temp9],   %[temp1],   %[temp2]                \t\n"
+            "mul.s      %[temp10],  %[temp1],   %[temp3]                \t\n"
+            "lwc1       %[temp6],   0(%[tsin1])                         \t\n"
+            "lwc1       %[temp7],   0(%[tcos1])                         \t\n"
+            "nmsub.s    %[temp9],   %[temp9],   %[temp4],   %[temp3]    \t\n"
+            "madd.s     %[temp10],  %[temp10],  %[temp4],   %[temp2]    \t\n"
+            "mul.s      %[temp11],  %[temp5],   %[temp6]                \t\n"
+            "mul.s      %[temp12],  %[temp5],   %[temp7]                \t\n"
+            "lwc1       %[temp8],   0(%[z1])                            \t\n"
+            "lwc1       %[temp1],   4(%[z2])                            \t\n"
+            "lwc1       %[temp2],   0(%[tsin2])                         \t\n"
+            "lwc1       %[temp3],   0(%[tcos2])                         \t\n"
+            "nmsub.s    %[temp11],  %[temp11],  %[temp8],   %[temp7]    \t\n"
+            "madd.s     %[temp12],  %[temp12],  %[temp8],   %[temp6]    \t\n"
+            "mul.s      %[temp13],  %[temp1],   %[temp2]                \t\n"
+            "mul.s      %[temp14],  %[temp1],   %[temp3]                \t\n"
+            "lwc1       %[temp4],   0(%[z2])                            \t\n"
+            "lwc1       %[temp5],   12(%[z2])                           \t\n"
+            "lwc1       %[temp6],   4(%[tsin2])                         \t\n"
+            "lwc1       %[temp7],   4(%[tcos2])                         \t\n"
+            "nmsub.s    %[temp13],  %[temp13],  %[temp4],   %[temp3]    \t\n"
+            "madd.s     %[temp14],  %[temp14],  %[temp4],   %[temp2]    \t\n"
+            "mul.s      %[temp15],  %[temp5],   %[temp6]                \t\n"
+            "mul.s      %[temp16],  %[temp5],   %[temp7]                \t\n"
+            "lwc1       %[temp8],   8(%[z2])                            \t\n"
+            "nmsub.s    %[temp15],  %[temp15],  %[temp8],   %[temp7]    \t\n"
+            "madd.s     %[temp16],  %[temp16],  %[temp8],   %[temp6]    \t\n"
+            : [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4),
+              [temp5]"=&f"(temp5), [temp6]"=&f"(temp6),
+              [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10),
+              [temp11]"=&f"(temp11), [temp12]"=&f"(temp12),
+              [temp13]"=&f"(temp13), [temp14]"=&f"(temp14),
+              [temp15]"=&f"(temp15), [temp16]"=&f"(temp16)
+            : [z1]"r"(z1), [z2]"r"(z2),
+              [tsin1]"r"(tsin1), [tcos1]"r"(tcos1),
+              [tsin2]"r"(tsin2), [tcos2]"r"(tcos2)
+            : "memory"
+        );
+
+        z1[1].re = temp9;
+        z1[1].im = temp14;
+        z2[0].re = temp13;
+        z2[0].im = temp10;
+
+        z1[0].re = temp11;
+        z1[0].im = temp16;
+        z2[1].re = temp15;
+        z2[1].im = temp12;
+    }
+}
+
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ */
+static void ff_imdct_calc_mips(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k;
+    int n = 1 << s->mdct_bits;
+    int n2 = n >> 1;
+    int n4 = n >> 2;
+
+    ff_imdct_half_mips(s, output+n4, input);
+
+    for(k = 0; k < n4; k+=4) {
+        output[k] = -output[n2-k-1];
+        output[k+1] = -output[n2-k-2];
+        output[k+2] = -output[n2-k-3];
+        output[k+3] = -output[n2-k-4];
+
+        output[n-k-1] = output[n2+k];
+        output[n-k-2] = output[n2+k+1];
+        output[n-k-3] = output[n2+k+2];
+        output[n-k-4] = output[n2+k+3];
+    }
+}
+#endif /* HAVE_INLINE_ASM */
+
+av_cold void ff_fft_init_mips(FFTContext *s)
+{
+    int n=0;
+
+    ff_fft_lut_init(fft_offsets_lut, 0, 1 << 16, &n);
+    ff_init_ff_cos_tabs(16);
+
+#if HAVE_INLINE_ASM
+    s->fft_calc     = ff_fft_calc_mips;
+#if CONFIG_MDCT
+    s->imdct_calc   = ff_imdct_calc_mips;
+    s->imdct_half   = ff_imdct_half_mips;
+#endif
+#endif
+}
diff --git a/libavcodec/mips/fmtconvert_mips.c b/libavcodec/mips/fmtconvert_mips.c
new file mode 100644
index 0000000..8a0265f
--- /dev/null
+++ b/libavcodec/mips/fmtconvert_mips.c
@@ -0,0 +1,342 @@
+/*
+ * Format Conversion Utils for MIPS
+ *
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of is
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Zoran Lukic (zoranl@mips.com)
+ * Author:  Nedeljko Babic (nbabic@mips.com)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "config.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/fmtconvert.h"
+
+#if HAVE_INLINE_ASM
+#if HAVE_MIPSDSPR1
+static void float_to_int16_mips(int16_t *dst, const float *src, long len)
+{
+    const float *src_end = src + len;
+    int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7;
+    float src0, src1, src2, src3, src4, src5, src6, src7;
+
+    /*
+     * loop is 8 times unrolled in assembler in order to achieve better performance
+     */
+    __asm__ volatile(
+        "beq        %[len],  $zero,   fti16_end%=   \n\t"
+        "fti16_lp%=:                                \n\t"
+        "lwc1       %[src0], 0(%[src])              \n\t"
+        "lwc1       %[src1], 4(%[src])              \n\t"
+        "lwc1       %[src2], 8(%[src])              \n\t"
+        "lwc1       %[src3], 12(%[src])             \n\t"
+        "cvt.w.s    %[src0], %[src0]                \n\t"
+        "cvt.w.s    %[src1], %[src1]                \n\t"
+        "cvt.w.s    %[src2], %[src2]                \n\t"
+        "cvt.w.s    %[src3], %[src3]                \n\t"
+        "mfc1       %[ret0], %[src0]                \n\t"
+        "mfc1       %[ret1], %[src1]                \n\t"
+        "mfc1       %[ret2], %[src2]                \n\t"
+        "mfc1       %[ret3], %[src3]                \n\t"
+        "lwc1       %[src4], 16(%[src])             \n\t"
+        "lwc1       %[src5], 20(%[src])             \n\t"
+        "lwc1       %[src6], 24(%[src])             \n\t"
+        "lwc1       %[src7], 28(%[src])             \n\t"
+        "cvt.w.s    %[src4], %[src4]                \n\t"
+        "cvt.w.s    %[src5], %[src5]                \n\t"
+        "cvt.w.s    %[src6], %[src6]                \n\t"
+        "cvt.w.s    %[src7], %[src7]                \n\t"
+        "addiu      %[src],  32                     \n\t"
+        "shll_s.w   %[ret0], %[ret0], 16            \n\t"
+        "shll_s.w   %[ret1], %[ret1], 16            \n\t"
+        "shll_s.w   %[ret2], %[ret2], 16            \n\t"
+        "shll_s.w   %[ret3], %[ret3], 16            \n\t"
+        "srl        %[ret0], %[ret0], 16            \n\t"
+        "srl        %[ret1], %[ret1], 16            \n\t"
+        "srl        %[ret2], %[ret2], 16            \n\t"
+        "srl        %[ret3], %[ret3], 16            \n\t"
+        "sh         %[ret0], 0(%[dst])              \n\t"
+        "sh         %[ret1], 2(%[dst])              \n\t"
+        "sh         %[ret2], 4(%[dst])              \n\t"
+        "sh         %[ret3], 6(%[dst])              \n\t"
+        "mfc1       %[ret4], %[src4]                \n\t"
+        "mfc1       %[ret5], %[src5]                \n\t"
+        "mfc1       %[ret6], %[src6]                \n\t"
+        "mfc1       %[ret7], %[src7]                \n\t"
+        "shll_s.w   %[ret4], %[ret4], 16            \n\t"
+        "shll_s.w   %[ret5], %[ret5], 16            \n\t"
+        "shll_s.w   %[ret6], %[ret6], 16            \n\t"
+        "shll_s.w   %[ret7], %[ret7], 16            \n\t"
+        "srl        %[ret4], %[ret4], 16            \n\t"
+        "srl        %[ret5], %[ret5], 16            \n\t"
+        "srl        %[ret6], %[ret6], 16            \n\t"
+        "srl        %[ret7], %[ret7], 16            \n\t"
+        "sh         %[ret4], 8(%[dst])              \n\t"
+        "sh         %[ret5], 10(%[dst])             \n\t"
+        "sh         %[ret6], 12(%[dst])             \n\t"
+        "sh         %[ret7], 14(%[dst])             \n\t"
+        "addiu      %[dst],  16                     \n\t"
+        "bne        %[src],  %[src_end], fti16_lp%= \n\t"
+        "fti16_end%=:                               \n\t"
+        : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3),
+          [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7),
+          [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3),
+          [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7),
+          [src]"+r"(src), [dst]"+r"(dst)
+        : [src_end]"r"(src_end), [len]"r"(len)
+        : "memory"
+    );
+}
+
+static void float_to_int16_interleave_mips(int16_t *dst, const float **src, long len,
+        int channels)
+{
+    int   c, ch2 = channels <<1;
+    int ret0, ret1, ret2, ret3, ret4, ret5, ret6, ret7;
+    float src0, src1, src2, src3, src4, src5, src6, src7;
+    int16_t *dst_ptr0, *dst_ptr1, *dst_ptr2, *dst_ptr3;
+    int16_t *dst_ptr4, *dst_ptr5, *dst_ptr6, *dst_ptr7;
+    const float *src_ptr, *src_ptr2, *src_end;
+
+    if (channels == 2) {
+        src_ptr = &src[0][0];
+        src_ptr2 = &src[1][0];
+        src_end = src_ptr + len;
+
+        __asm__ volatile (
+            "fti16i2_lp%=:                                   \n\t"
+            "lwc1       %[src0],    0(%[src_ptr])            \n\t"
+            "lwc1       %[src1],    0(%[src_ptr2])           \n\t"
+            "addiu      %[src_ptr], 4                        \n\t"
+            "cvt.w.s    $f9,        %[src0]                  \n\t"
+            "cvt.w.s    $f10,       %[src1]                  \n\t"
+            "mfc1       %[ret0],    $f9                      \n\t"
+            "mfc1       %[ret1],    $f10                     \n\t"
+            "shll_s.w   %[ret0],    %[ret0], 16              \n\t"
+            "shll_s.w   %[ret1],    %[ret1], 16              \n\t"
+            "addiu      %[src_ptr2], 4                       \n\t"
+            "srl        %[ret0],    %[ret0], 16              \n\t"
+            "srl        %[ret1],    %[ret1], 16              \n\t"
+            "sh         %[ret0],    0(%[dst])                \n\t"
+            "sh         %[ret1],    2(%[dst])                \n\t"
+            "addiu      %[dst],     4                        \n\t"
+            "bne        %[src_ptr], %[src_end], fti16i2_lp%= \n\t"
+            : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1),
+              [src0]"=&f"(src0), [src1]"=&f"(src1),
+              [src_ptr]"+r"(src_ptr), [src_ptr2]"+r"(src_ptr2),
+              [dst]"+r"(dst)
+            : [src_end]"r"(src_end)
+            : "memory"
+        );
+    } else {
+        for (c = 0; c < channels; c++) {
+            src_ptr  = &src[c][0];
+            dst_ptr0 = &dst[c];
+            src_end = src_ptr + len;
+            /*
+             * loop is 8 times unrolled in assembler in order to achieve better performance
+             */
+            __asm__ volatile(
+                "fti16i_lp%=:                                     \n\t"
+                "lwc1       %[src0], 0(%[src_ptr])                \n\t"
+                "lwc1       %[src1], 4(%[src_ptr])                \n\t"
+                "lwc1       %[src2], 8(%[src_ptr])                \n\t"
+                "lwc1       %[src3], 12(%[src_ptr])               \n\t"
+                "cvt.w.s    %[src0], %[src0]                      \n\t"
+                "cvt.w.s    %[src1], %[src1]                      \n\t"
+                "cvt.w.s    %[src2], %[src2]                      \n\t"
+                "cvt.w.s    %[src3], %[src3]                      \n\t"
+                "mfc1       %[ret0], %[src0]                      \n\t"
+                "mfc1       %[ret1], %[src1]                      \n\t"
+                "mfc1       %[ret2], %[src2]                      \n\t"
+                "mfc1       %[ret3], %[src3]                      \n\t"
+                "lwc1       %[src4], 16(%[src_ptr])               \n\t"
+                "lwc1       %[src5], 20(%[src_ptr])               \n\t"
+                "lwc1       %[src6], 24(%[src_ptr])               \n\t"
+                "lwc1       %[src7], 28(%[src_ptr])               \n\t"
+                "addu       %[dst_ptr1], %[dst_ptr0], %[ch2]      \n\t"
+                "addu       %[dst_ptr2], %[dst_ptr1], %[ch2]      \n\t"
+                "addu       %[dst_ptr3], %[dst_ptr2], %[ch2]      \n\t"
+                "addu       %[dst_ptr4], %[dst_ptr3], %[ch2]      \n\t"
+                "addu       %[dst_ptr5], %[dst_ptr4], %[ch2]      \n\t"
+                "addu       %[dst_ptr6], %[dst_ptr5], %[ch2]      \n\t"
+                "addu       %[dst_ptr7], %[dst_ptr6], %[ch2]      \n\t"
+                "addiu      %[src_ptr],  32                       \n\t"
+                "cvt.w.s    %[src4], %[src4]                      \n\t"
+                "cvt.w.s    %[src5], %[src5]                      \n\t"
+                "cvt.w.s    %[src6], %[src6]                      \n\t"
+                "cvt.w.s    %[src7], %[src7]                      \n\t"
+                "shll_s.w   %[ret0], %[ret0], 16                  \n\t"
+                "shll_s.w   %[ret1], %[ret1], 16                  \n\t"
+                "shll_s.w   %[ret2], %[ret2], 16                  \n\t"
+                "shll_s.w   %[ret3], %[ret3], 16                  \n\t"
+                "srl        %[ret0], %[ret0], 16                  \n\t"
+                "srl        %[ret1], %[ret1], 16                  \n\t"
+                "srl        %[ret2], %[ret2], 16                  \n\t"
+                "srl        %[ret3], %[ret3], 16                  \n\t"
+                "sh         %[ret0], 0(%[dst_ptr0])               \n\t"
+                "sh         %[ret1], 0(%[dst_ptr1])               \n\t"
+                "sh         %[ret2], 0(%[dst_ptr2])               \n\t"
+                "sh         %[ret3], 0(%[dst_ptr3])               \n\t"
+                "mfc1       %[ret4], %[src4]                      \n\t"
+                "mfc1       %[ret5], %[src5]                      \n\t"
+                "mfc1       %[ret6], %[src6]                      \n\t"
+                "mfc1       %[ret7], %[src7]                      \n\t"
+                "shll_s.w   %[ret4], %[ret4], 16                  \n\t"
+                "shll_s.w   %[ret5], %[ret5], 16                  \n\t"
+                "shll_s.w   %[ret6], %[ret6], 16                  \n\t"
+                "shll_s.w   %[ret7], %[ret7], 16                  \n\t"
+                "srl        %[ret4], %[ret4], 16                  \n\t"
+                "srl        %[ret5], %[ret5], 16                  \n\t"
+                "srl        %[ret6], %[ret6], 16                  \n\t"
+                "srl        %[ret7], %[ret7], 16                  \n\t"
+                "sh         %[ret4], 0(%[dst_ptr4])               \n\t"
+                "sh         %[ret5], 0(%[dst_ptr5])               \n\t"
+                "sh         %[ret6], 0(%[dst_ptr6])               \n\t"
+                "sh         %[ret7], 0(%[dst_ptr7])               \n\t"
+                "addu       %[dst_ptr0], %[dst_ptr7], %[ch2]      \n\t"
+                "bne        %[src_ptr],  %[src_end],  fti16i_lp%= \n\t"
+                : [ret0]"=&r"(ret0), [ret1]"=&r"(ret1), [ret2]"=&r"(ret2), [ret3]"=&r"(ret3),
+                  [ret4]"=&r"(ret4), [ret5]"=&r"(ret5), [ret6]"=&r"(ret6), [ret7]"=&r"(ret7),
+                  [src0]"=&f"(src0), [src1]"=&f"(src1), [src2]"=&f"(src2), [src3]"=&f"(src3),
+                  [src4]"=&f"(src4), [src5]"=&f"(src5), [src6]"=&f"(src6), [src7]"=&f"(src7),
+                  [dst_ptr1]"=&r"(dst_ptr1), [dst_ptr2]"=&r"(dst_ptr2), [dst_ptr3]"=&r"(dst_ptr3),
+                  [dst_ptr4]"=&r"(dst_ptr4), [dst_ptr5]"=&r"(dst_ptr5), [dst_ptr6]"=&r"(dst_ptr6),
+                  [dst_ptr7]"=&r"(dst_ptr7), [dst_ptr0]"+r"(dst_ptr0), [src_ptr]"+r"(src_ptr)
+                : [ch2]"r"(ch2), [src_end]"r"(src_end)
+                : "memory"
+            );
+        }
+    }
+}
+#endif /* HAVE_MIPSDSPR1 */
+
+static void int32_to_float_fmul_scalar_mips(float *dst, const int *src,
+        float mul, int len)
+{
+    /*
+     * variables used in inline assembler
+     */
+    float temp1, temp3, temp5, temp7, temp9, temp11, temp13, temp15;
+
+    int rpom1, rpom2, rpom11, rpom21, rpom12, rpom22, rpom13, rpom23;
+    const int *src_end = src + len;
+    /*
+     * loop is 8 times unrolled in assembler in order to achieve better performance
+     */
+    __asm__ volatile (
+        "i32tf_lp%=:                                    \n\t"
+        "lw       %[rpom11],     0(%[src])              \n\t"
+        "lw       %[rpom21],     4(%[src])              \n\t"
+        "lw       %[rpom1],      8(%[src])              \n\t"
+        "lw       %[rpom2],      12(%[src])             \n\t"
+        "mtc1     %[rpom11],     %[temp1]               \n\t"
+        "mtc1     %[rpom21],     %[temp3]               \n\t"
+        "mtc1     %[rpom1],      %[temp5]               \n\t"
+        "mtc1     %[rpom2],      %[temp7]               \n\t"
+
+        "lw       %[rpom13],     16(%[src])             \n\t"
+        "lw       %[rpom23],     20(%[src])             \n\t"
+        "lw       %[rpom12],     24(%[src])             \n\t"
+        "lw       %[rpom22],     28(%[src])             \n\t"
+        "mtc1     %[rpom13],     %[temp9]               \n\t"
+        "mtc1     %[rpom23],     %[temp11]              \n\t"
+        "mtc1     %[rpom12],     %[temp13]              \n\t"
+        "mtc1     %[rpom22],     %[temp15]              \n\t"
+
+        "addiu    %[src],        32                     \n\t"
+        "cvt.s.w  %[temp1],      %[temp1]               \n\t"
+        "cvt.s.w  %[temp3],      %[temp3]               \n\t"
+        "cvt.s.w  %[temp5],      %[temp5]               \n\t"
+        "cvt.s.w  %[temp7],      %[temp7]               \n\t"
+
+        "cvt.s.w  %[temp9],      %[temp9]               \n\t"
+        "cvt.s.w  %[temp11],     %[temp11]              \n\t"
+        "cvt.s.w  %[temp13],     %[temp13]              \n\t"
+        "cvt.s.w  %[temp15],     %[temp15]              \n\t"
+
+        "mul.s   %[temp1],       %[temp1],    %[mul]    \n\t"
+        "mul.s   %[temp3],       %[temp3],    %[mul]    \n\t"
+        "mul.s   %[temp5],       %[temp5],    %[mul]    \n\t"
+        "mul.s   %[temp7],       %[temp7],    %[mul]    \n\t"
+
+        "mul.s   %[temp9],       %[temp9],    %[mul]    \n\t"
+        "mul.s   %[temp11],      %[temp11],   %[mul]    \n\t"
+        "mul.s   %[temp13],      %[temp13],   %[mul]    \n\t"
+        "mul.s   %[temp15],      %[temp15],   %[mul]    \n\t"
+
+        "swc1    %[temp1],       0(%[dst])              \n\t" /*dst[i] = src[i] * mul;    */
+        "swc1    %[temp3],       4(%[dst])              \n\t" /*dst[i+1] = src[i+1] * mul;*/
+        "swc1    %[temp5],       8(%[dst])              \n\t" /*dst[i+2] = src[i+2] * mul;*/
+        "swc1    %[temp7],       12(%[dst])             \n\t" /*dst[i+3] = src[i+3] * mul;*/
+
+        "swc1    %[temp9],       16(%[dst])             \n\t" /*dst[i+4] = src[i+4] * mul;*/
+        "swc1    %[temp11],      20(%[dst])             \n\t" /*dst[i+5] = src[i+5] * mul;*/
+        "swc1    %[temp13],      24(%[dst])             \n\t" /*dst[i+6] = src[i+6] * mul;*/
+        "swc1    %[temp15],      28(%[dst])             \n\t" /*dst[i+7] = src[i+7] * mul;*/
+        "addiu   %[dst],        32                      \n\t"
+        "bne     %[src],        %[src_end], i32tf_lp%=  \n\t"
+        : [temp1]"=&f"(temp1),   [temp11]"=&f"(temp11),
+          [temp13]"=&f"(temp13), [temp15]"=&f"(temp15),
+          [temp3]"=&f"(temp3),   [temp5]"=&f"(temp5),
+          [temp7]"=&f"(temp7),   [temp9]"=&f"(temp9),
+          [rpom1]"=&r"(rpom1),   [rpom2]"=&r"(rpom2),
+          [rpom11]"=&r"(rpom11), [rpom21]"=&r"(rpom21),
+          [rpom12]"=&r"(rpom12), [rpom22]"=&r"(rpom22),
+          [rpom13]"=&r"(rpom13), [rpom23]"=&r"(rpom23),
+          [dst]"+r"(dst),       [src]"+r"(src)
+        : [mul]"f"(mul),        [src_end]"r"(src_end)
+        : "memory"
+    );
+}
+#endif /* HAVE_INLINE_ASM */
+
+av_cold void ff_fmt_convert_init_mips(FmtConvertContext *c)
+{
+#if HAVE_INLINE_ASM
+#if HAVE_MIPSDSPR1
+    c->float_to_int16_interleave = float_to_int16_interleave_mips;
+    c->float_to_int16 = float_to_int16_mips;
+#endif
+    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_mips;
+#endif
+}
diff --git a/libavcodec/mips/iirfilter_mips.c b/libavcodec/mips/iirfilter_mips.c
new file mode 100644
index 0000000..a5646cd
--- /dev/null
+++ b/libavcodec/mips/iirfilter_mips.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Bojan Zivkovic (bojan@mips.com)
+ *
+ * IIR filter optimized for MIPS floating-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ /**
+ * @file
+ * Reference: libavcodec/iirfilter.c
+ */
+
+#include "libavcodec/iirfilter.h"
+
+#if HAVE_INLINE_ASM
+typedef struct FFIIRFilterCoeffs {
+    int   order;
+    float gain;
+    int   *cx;
+    float *cy;
+} FFIIRFilterCoeffs;
+
+typedef struct FFIIRFilterState {
+    float x[1];
+} FFIIRFilterState;
+
+static void ff_iir_filter_flt_mips(const struct FFIIRFilterCoeffs *c,
+                                   struct FFIIRFilterState *s, int size,
+                                   const float *src, int sstep, float *dst, int dstep)
+{
+    if (c->order == 2) {
+        int i;
+        const float *src0 = src;
+        float       *dst0 = dst;
+        for (i = 0; i < size; i++) {
+            float in = *src0 * c->gain  + s->x[0] * c->cy[0] + s->x[1] * c->cy[1];
+            *dst0 = s->x[0] + in + s->x[1] * c->cx[1];
+            s->x[0] = s->x[1];
+            s->x[1] = in;
+            src0 += sstep;
+            dst0 += dstep;
+        }
+    } else if (c->order == 4) {
+        int i;
+        const float *src0 = src;
+        float       *dst0 = dst;
+        float four = 4.0;
+        float six  = 6.0;
+        for (i = 0; i < size; i += 4) {
+            float in1, in2, in3, in4;
+            float res1, res2, res3, res4;
+            float *x  = s->x;
+            float *cy = c->cy;
+            float gain = c->gain;
+            float src0_0 = src0[0      ];
+            float src0_1 = src0[sstep  ];
+            float src0_2 = src0[2*sstep];
+            float src0_3 = src0[3*sstep];
+
+            __asm__ volatile (
+                "lwc1   $f0,        0(%[cy])                    \n\t"
+                "lwc1   $f4,        0(%[x])                     \n\t"
+                "lwc1   $f5,        4(%[x])                     \n\t"
+                "lwc1   $f6,        8(%[x])                     \n\t"
+                "lwc1   $f7,        12(%[x])                    \n\t"
+                "mul.s  %[in1],     %[src0_0],  %[gain]         \n\t"
+                "mul.s  %[in2],     %[src0_1],  %[gain]         \n\t"
+                "mul.s  %[in3],     %[src0_2],  %[gain]         \n\t"
+                "mul.s  %[in4],     %[src0_3],  %[gain]         \n\t"
+                "lwc1   $f1,        4(%[cy])                    \n\t"
+                "madd.s %[in1],     %[in1],     $f0,    $f4     \n\t"
+                "madd.s %[in2],     %[in2],     $f0,    $f5     \n\t"
+                "madd.s %[in3],     %[in3],     $f0,    $f6     \n\t"
+                "madd.s %[in4],     %[in4],     $f0,    $f7     \n\t"
+                "lwc1   $f2,        8(%[cy])                    \n\t"
+                "madd.s %[in1],     %[in1],     $f1,    $f5     \n\t"
+                "madd.s %[in2],     %[in2],     $f1,    $f6     \n\t"
+                "madd.s %[in3],     %[in3],     $f1,    $f7     \n\t"
+                "lwc1   $f3,        12(%[cy])                   \n\t"
+                "add.s  $f8,        $f5,        $f7             \n\t"
+                "madd.s %[in1],     %[in1],     $f2,    $f6     \n\t"
+                "madd.s %[in2],     %[in2],     $f2,    $f7     \n\t"
+                "mul.s  $f9,        $f6,        %[six]          \n\t"
+                "mul.s  $f10,       $f7,        %[six]          \n\t"
+                "madd.s %[in1],     %[in1],     $f3,    $f7     \n\t"
+                "madd.s %[in2],     %[in2],     $f3,    %[in1]  \n\t"
+                "madd.s %[in3],     %[in3],     $f2,    %[in1]  \n\t"
+                "madd.s %[in4],     %[in4],     $f1,    %[in1]  \n\t"
+                "add.s  %[res1],    $f4,        %[in1]          \n\t"
+                "swc1   %[in1],     0(%[x])                     \n\t"
+                "add.s  $f0,        $f6,        %[in1]          \n\t"
+                "madd.s %[in3],     %[in3],     $f3,    %[in2]  \n\t"
+                "madd.s %[in4],     %[in4],     $f2,    %[in2]  \n\t"
+                "add.s  %[res2],    $f5,        %[in2]          \n\t"
+                "madd.s %[res1],    %[res1],    $f8,    %[four] \n\t"
+                "add.s  $f8,        $f7,        %[in2]          \n\t"
+                "swc1   %[in2],     4(%[x])                     \n\t"
+                "madd.s %[in4],     %[in4],     $f3,    %[in3]  \n\t"
+                "add.s  %[res3],    $f6,        %[in3]          \n\t"
+                "add.s  %[res1],    %[res1],    $f9             \n\t"
+                "madd.s %[res2],    %[res2],    $f0,    %[four] \n\t"
+                "swc1   %[in3],     8(%[x])                     \n\t"
+                "add.s  %[res4],    $f7,        %[in4]          \n\t"
+                "madd.s %[res3],    %[res3],    $f8,    %[four] \n\t"
+                "swc1   %[in4],     12(%[x])                    \n\t"
+                "add.s  %[res2],    %[res2],    $f10            \n\t"
+                "add.s  $f8,        %[in1],     %[in3]          \n\t"
+                "madd.s %[res3],    %[res3],    %[in1], %[six]  \n\t"
+                "madd.s %[res4],    %[res4],    $f8,    %[four] \n\t"
+                "madd.s %[res4],    %[res4],    %[in2], %[six]  \n\t"
+
+                : [in1]"=&f"(in1), [in2]"=&f"(in2),
+                  [in3]"=&f"(in3), [in4]"=&f"(in4),
+                  [res1]"=&f"(res1), [res2]"=&f"(res2),
+                  [res3]"=&f"(res3), [res4]"=&f"(res4)
+                : [src0_0]"f"(src0_0), [src0_1]"f"(src0_1),
+                  [src0_2]"f"(src0_2), [src0_3]"f"(src0_3),
+                  [gain]"f"(gain), [x]"r"(x), [cy]"r"(cy),
+                  [four]"f"(four), [six]"f"(six)
+                : "$f0", "$f1", "$f2", "$f3",
+                  "$f4", "$f5", "$f6", "$f7",
+                  "$f8", "$f9", "$f10",
+                  "memory"
+            );
+
+            dst0[0      ] = res1;
+            dst0[sstep  ] = res2;
+            dst0[2*sstep] = res3;
+            dst0[3*sstep] = res4;
+
+            src0 += 4*sstep;
+            dst0 += 4*dstep;
+        }
+    } else {
+        int i;
+        const float *src0 = src;
+        float       *dst0 = dst;
+        for (i = 0; i < size; i++) {
+            int j;
+            float in, res;
+            in = *src0 * c->gain;
+            for(j = 0; j < c->order; j++)
+                in += c->cy[j] * s->x[j];
+            res = s->x[0] + in + s->x[c->order >> 1] * c->cx[c->order >> 1];
+            for(j = 1; j < c->order >> 1; j++)
+                res += (s->x[j] + s->x[c->order - j]) * c->cx[j];
+            for(j = 0; j < c->order - 1; j++)
+                s->x[j] = s->x[j + 1];
+            *dst0 = res;
+            s->x[c->order - 1] = in;
+            src0 += sstep;
+            dst0 += dstep;
+        }
+    }
+}
+#endif /* HAVE_INLINE_ASM */
+
+void ff_iir_filter_init_mips(FFIIRFilterContext *f) {
+#if HAVE_INLINE_ASM
+    f->filter_flt = ff_iir_filter_flt_mips;
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/mips/lsp_mips.h b/libavcodec/mips/lsp_mips.h
new file mode 100644
index 0000000..c55ef03
--- /dev/null
+++ b/libavcodec/mips/lsp_mips.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nedeljko Babic (nbabic@mips.com)
+ *
+ * LSP routines for ACELP-based codecs optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/lsp.c
+ */
+#ifndef AVCODEC_LSP_MIPS_H
+#define AVCODEC_LSP_MIPS_H
+
+#if HAVE_MIPSFPU && HAVE_INLINE_ASM
+static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int lp_half_order)
+{
+    int i, j = 0;
+    double * p_fi = f;
+    double * p_f = 0;
+
+    f[0] = 1.0;
+    f[1] = -2 * lsp[0];
+    lsp -= 2;
+
+    for(i=2; i<=lp_half_order; i++)
+    {
+        double tmp, f_j_2, f_j_1, f_j;
+        double val = lsp[2*i];
+
+        __asm__ volatile(
+            "move   %[p_f],     %[p_fi]                         \n\t"
+            "add.d  %[val],     %[val],     %[val]              \n\t"
+            "addiu  %[p_fi],    8                               \n\t"
+            "ldc1   %[f_j_1],   0(%[p_f])                       \n\t"
+            "ldc1   %[f_j],     8(%[p_f])                       \n\t"
+            "neg.d  %[val],     %[val]                          \n\t"
+            "add.d  %[tmp],     %[f_j_1],   %[f_j_1]            \n\t"
+            "madd.d %[tmp],     %[tmp],     %[f_j], %[val]      \n\t"
+            "addiu  %[j],       %[i], -2                        \n\t"
+            "ldc1   %[f_j_2],   -8(%[p_f])                      \n\t"
+            "sdc1   %[tmp],     16(%[p_f])                      \n\t"
+            "beqz   %[j],       ff_lsp2polyf_lp_j_end%=         \n\t"
+            "ff_lsp2polyf_lp_j%=:                               \n\t"
+            "add.d  %[tmp],     %[f_j],     %[f_j_2]            \n\t"
+            "madd.d %[tmp],     %[tmp],     %[f_j_1], %[val]    \n\t"
+            "mov.d  %[f_j],     %[f_j_1]                        \n\t"
+            "addiu  %[j],       -1                              \n\t"
+            "mov.d  %[f_j_1],   %[f_j_2]                        \n\t"
+            "ldc1   %[f_j_2],   -16(%[p_f])                     \n\t"
+            "sdc1   %[tmp],     8(%[p_f])                       \n\t"
+            "addiu  %[p_f],     -8                              \n\t"
+            "bgtz   %[j],       ff_lsp2polyf_lp_j%=             \n\t"
+            "ff_lsp2polyf_lp_j_end%=:                           \n\t"
+
+            : [f_j_2]"=&f"(f_j_2), [f_j_1]"=&f"(f_j_1), [val]"+f"(val),
+              [tmp]"=&f"(tmp), [f_j]"=&f"(f_j), [p_f]"+r"(p_f),
+              [j]"+r"(j), [p_fi]"+r"(p_fi)
+            : [i]"r"(i)
+            : "memory"
+        );
+        f[1] += val;
+    }
+}
+#define ff_lsp2polyf ff_lsp2polyf_mips
+#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */
+#endif /* AVCODEC_LSP_MIPS_H */
diff --git a/libavcodec/mips/mathops.h b/libavcodec/mips/mathops.h
index dd80f68..368290a 100644
--- a/libavcodec/mips/mathops.h
+++ b/libavcodec/mips/mathops.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mips/mpegaudiodsp_mips_fixed.c b/libavcodec/mips/mpegaudiodsp_mips_fixed.c
new file mode 100644
index 0000000..047a833
--- /dev/null
+++ b/libavcodec/mips/mpegaudiodsp_mips_fixed.c
@@ -0,0 +1,907 @@
+    /*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Bojan Zivkovic (bojan@mips.com)
+ *
+ * MPEG Audio decoder optimized for MIPS fixed-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/mpegaudiodsp_template.c
+ */
+
+#include <string.h>
+
+#include "libavcodec/mpegaudiodsp.h"
+
+static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *window,
+                               int *dither_state, int16_t *samples, int incr)
+{
+    register const int32_t *w, *w2, *p;
+    int j;
+    int16_t *samples2;
+    int w_asm, p_asm, w_asm1, p_asm1, w_asm2, p_asm2;
+    int w2_asm, w2_asm1, *p_temp1, *p_temp2;
+    int sum1 = 0;
+    int const min_asm = -32768, max_asm = 32767;
+    int temp1, temp2 = 0, temp3 = 0;
+    int64_t sum;
+
+    /* copy to avoid wrap */
+    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
+    samples2 = samples + 31 * incr;
+    w = window;
+    w2 = window + 31;
+    sum = *dither_state;
+    p = synth_buf + 16;
+    p_temp1 = synth_buf + 16;
+    p_temp2 = synth_buf + 48;
+    temp1 = sum;
+
+    /**
+    * use of round_sample function from the original code is eliminated,
+    * changed with appropriate assembly instructions.
+    */
+    __asm__ volatile (
+         "mthi   $zero                                                    \n\t"
+         "mtlo   %[temp1]                                                 \n\t"
+         "lw     %[w_asm],  0(%[w])                                       \n\t"
+         "lw     %[p_asm],  0(%[p])                                       \n\t"
+         "lw     %[w_asm1], 64*4(%[w])                                    \n\t"
+         "lw     %[p_asm1], 64*4(%[p])                                    \n\t"
+         "lw     %[w_asm2], 128*4(%[w])                                   \n\t"
+         "lw     %[p_asm2], 128*4(%[p])                                   \n\t"
+         "madd   %[w_asm],  %[p_asm]                                      \n\t"
+         "madd   %[w_asm1], %[p_asm1]                                     \n\t"
+         "madd   %[w_asm2], %[p_asm2]                                     \n\t"
+         "lw     %[w_asm],  192*4(%[w])                                   \n\t"
+         "lw     %[p_asm],  192*4(%[p])                                   \n\t"
+         "lw     %[w_asm1], 256*4(%[w])                                   \n\t"
+         "lw     %[p_asm1], 256*4(%[p])                                   \n\t"
+         "lw     %[w_asm2], 320*4(%[w])                                   \n\t"
+         "lw     %[p_asm2], 320*4(%[p])                                   \n\t"
+         "madd   %[w_asm],  %[p_asm]                                      \n\t"
+         "madd   %[w_asm1], %[p_asm1]                                     \n\t"
+         "madd   %[w_asm2], %[p_asm2]                                     \n\t"
+         "lw     %[w_asm],  384*4(%[w])                                   \n\t"
+         "lw     %[p_asm],  384*4(%[p])                                   \n\t"
+         "lw     %[w_asm1], 448*4(%[w])                                   \n\t"
+         "lw     %[p_asm1], 448*4(%[p])                                   \n\t"
+         "lw     %[w_asm2], 32*4(%[w])                                    \n\t"
+         "lw     %[p_asm2], 32*4(%[p])                                    \n\t"
+         "madd   %[w_asm],  %[p_asm]                                      \n\t"
+         "madd   %[w_asm1], %[p_asm1]                                     \n\t"
+         "msub   %[w_asm2], %[p_asm2]                                     \n\t"
+         "lw     %[w_asm],  96*4(%[w])                                    \n\t"
+         "lw     %[p_asm],  96*4(%[p])                                    \n\t"
+         "lw     %[w_asm1], 160*4(%[w])                                   \n\t"
+         "lw     %[p_asm1], 160*4(%[p])                                   \n\t"
+         "lw     %[w_asm2], 224*4(%[w])                                   \n\t"
+         "lw     %[p_asm2], 224*4(%[p])                                   \n\t"
+         "msub   %[w_asm],  %[p_asm]                                      \n\t"
+         "msub   %[w_asm1], %[p_asm1]                                     \n\t"
+         "msub   %[w_asm2], %[p_asm2]                                     \n\t"
+         "lw     %[w_asm],  288*4(%[w])                                   \n\t"
+         "lw     %[p_asm],  288*4(%[p])                                   \n\t"
+         "lw     %[w_asm1], 352*4(%[w])                                   \n\t"
+         "lw     %[p_asm1], 352*4(%[p])                                   \n\t"
+         "msub   %[w_asm],  %[p_asm]                                      \n\t"
+         "lw     %[w_asm],  480*4(%[w])                                   \n\t"
+         "lw     %[p_asm],  480*4(%[p])                                   \n\t"
+         "lw     %[w_asm2], 416*4(%[w])                                   \n\t"
+         "lw     %[p_asm2], 416*4(%[p])                                   \n\t"
+         "msub   %[w_asm],  %[p_asm]                                      \n\t"
+         "msub   %[w_asm1], %[p_asm1]                                     \n\t"
+         "msub   %[w_asm2], %[p_asm2]                                     \n\t"
+
+         /*round_sample function from the original code is eliminated,
+          * changed with appropriate assembly instructions
+          * code example:
+
+         "extr.w  %[sum1],$ac0,24                                       \n\t"
+         "mflo %[temp3],  $ac0                                          \n\t"
+         "and  %[temp1],  %[temp3],  0x00ffffff                         \n\t"
+         "slt  %[temp2],  %[sum1],   %[min_asm]                         \n\t"
+         "movn %[sum1],   %[min_asm],%[temp2]                           \n\t"
+         "slt  %[temp2],  %[max_asm],%[sum1]                            \n\t"
+         "movn %[sum1],   %[max_asm],%[temp2]                           \n\t"
+         "sh   %[sum1],   0(%[samples])                                 \n\t"
+         */
+
+         "extr.w %[sum1],   $ac0,       24                                \n\t"
+         "mflo   %[temp3]                                                 \n\t"
+         "addi   %[w],      %[w],       4                                 \n\t"
+         "and    %[temp1],  %[temp3],   0x00ffffff                        \n\t"
+         "slt    %[temp2],  %[sum1],    %[min_asm]                        \n\t"
+         "movn   %[sum1],   %[min_asm], %[temp2]                          \n\t"
+         "slt    %[temp2],  %[max_asm], %[sum1]                           \n\t"
+         "movn   %[sum1],   %[max_asm], %[temp2]                          \n\t"
+         "sh     %[sum1],   0(%[samples])                                 \n\t"
+
+        : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
+          [p_asm1] "=&r" (p_asm1), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
+          [w_asm2] "=&r" (w_asm2), [p_asm2] "=&r" (p_asm2),
+          [sum1] "+r" (sum1), [w] "+r" (w), [temp3] "+r" (temp3)
+        : [p] "r" (p), [samples] "r" (samples), [min_asm] "r" (min_asm),
+          [max_asm] "r" (max_asm)
+        : "memory", "hi","lo"
+     );
+
+     samples += incr;
+
+    /* we calculate two samples at the same time to avoid one memory
+       access per two sample */
+
+    for(j = 1; j < 16; j++) {
+        __asm__ volatile (
+             "mthi   $0,         $ac1                                      \n\t"
+             "mtlo   $0,         $ac1                                      \n\t"
+             "mthi   $0                                                    \n\t"
+             "mtlo   %[temp1]                                              \n\t"
+             "addi   %[p_temp1], %[p_temp1],       4                       \n\t"
+             "lw     %[w_asm],   0(%[w])                                   \n\t"
+             "lw     %[p_asm],   0(%[p_temp1])                             \n\t"
+             "lw     %[w2_asm],  0(%[w2])                                  \n\t"
+             "lw     %[w_asm1],  64*4(%[w])                                \n\t"
+             "lw     %[p_asm1],  64*4(%[p_temp1])                          \n\t"
+             "lw     %[w2_asm1], 64*4(%[w2])                               \n\t"
+             "madd   %[w_asm],   %[p_asm]                                  \n\t"
+             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
+             "madd   %[w_asm1],  %[p_asm1]                                 \n\t"
+             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
+             "lw     %[w_asm],   128*4(%[w])                               \n\t"
+             "lw     %[p_asm],   128*4(%[p_temp1])                         \n\t"
+             "lw     %[w2_asm],  128*4(%[w2])                              \n\t"
+             "lw     %[w_asm1],  192*4(%[w])                               \n\t"
+             "lw     %[p_asm1],  192*4(%[p_temp1])                         \n\t"
+             "lw     %[w2_asm1], 192*4(%[w2])                              \n\t"
+             "madd   %[w_asm],   %[p_asm]                                  \n\t"
+             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
+             "madd   %[w_asm1],  %[p_asm1]                                 \n\t"
+             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
+             "lw     %[w_asm],   256*4(%[w])                               \n\t"
+             "lw     %[p_asm],   256*4(%[p_temp1])                         \n\t"
+             "lw     %[w2_asm],  256*4(%[w2])                              \n\t"
+             "lw     %[w_asm1],  320*4(%[w])                               \n\t"
+             "lw     %[p_asm1],  320*4(%[p_temp1])                         \n\t"
+             "lw     %[w2_asm1], 320*4(%[w2])                              \n\t"
+             "madd   %[w_asm],   %[p_asm]                                  \n\t"
+             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
+             "madd   %[w_asm1],  %[p_asm1]                                 \n\t"
+             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
+             "lw     %[w_asm],   384*4(%[w])                               \n\t"
+             "lw     %[p_asm],   384*4(%[p_temp1])                         \n\t"
+             "lw     %[w2_asm],  384*4(%[w2])                              \n\t"
+             "lw     %[w_asm1],  448*4(%[w])                               \n\t"
+             "lw     %[p_asm1],  448*4(%[p_temp1])                         \n\t"
+             "lw     %[w2_asm1], 448*4(%[w2])                              \n\t"
+             "madd   %[w_asm],   %[p_asm]                                  \n\t"
+             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
+             "madd   %[w_asm1],  %[p_asm1]                                 \n\t"
+             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
+             "addi   %[p_temp2], %[p_temp2],       -4                      \n\t"
+             "lw     %[w_asm],   32*4(%[w])                                \n\t"
+             "lw     %[p_asm],   0(%[p_temp2])                             \n\t"
+             "lw     %[w2_asm],  32*4(%[w2])                               \n\t"
+             "lw     %[w_asm1],  96*4(%[w])                                \n\t"
+             "lw     %[p_asm1],  64*4(%[p_temp2])                          \n\t"
+             "lw     %[w2_asm1], 96*4(%[w2])                               \n\t"
+             "msub   %[w_asm],   %[p_asm]                                  \n\t"
+             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
+             "msub   %[w_asm1],  %[p_asm1]                                 \n\t"
+             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
+             "lw     %[w_asm],   160*4(%[w])                               \n\t"
+             "lw     %[p_asm],   128*4(%[p_temp2])                         \n\t"
+             "lw     %[w2_asm],  160*4(%[w2])                              \n\t"
+             "lw     %[w_asm1],  224*4(%[w])                               \n\t"
+             "lw     %[p_asm1],  192*4(%[p_temp2])                         \n\t"
+             "lw     %[w2_asm1], 224*4(%[w2])                              \n\t"
+             "msub   %[w_asm],   %[p_asm]                                  \n\t"
+             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
+             "msub   %[w_asm1],  %[p_asm1]                                 \n\t"
+             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
+             "lw     %[w_asm],   288*4(%[w])                               \n\t"
+             "lw     %[p_asm],   256*4(%[p_temp2])                         \n\t"
+             "lw     %[w2_asm],  288*4(%[w2])                              \n\t"
+             "lw     %[w_asm1],  352*4(%[w])                               \n\t"
+             "lw     %[p_asm1],  320*4(%[p_temp2])                         \n\t"
+             "lw     %[w2_asm1], 352*4(%[w2])                              \n\t"
+             "msub   %[w_asm],   %[p_asm]                                  \n\t"
+             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
+             "msub   %[w_asm1],  %[p_asm1]                                 \n\t"
+             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
+             "lw     %[w_asm],   416*4(%[w])                               \n\t"
+             "lw     %[p_asm],   384*4(%[p_temp2])                         \n\t"
+             "lw     %[w2_asm],  416*4(%[w2])                              \n\t"
+             "lw     %[w_asm1],  480*4(%[w])                               \n\t"
+             "lw     %[p_asm1],  448*4(%[p_temp2])                         \n\t"
+             "lw     %[w2_asm1], 480*4(%[w2])                              \n\t"
+             "msub   %[w_asm],   %[p_asm]                                  \n\t"
+             "msub   %[w_asm1],  %[p_asm1]                                 \n\t"
+             "msub   $ac1,       %[w2_asm],        %[p_asm]                \n\t"
+             "msub   $ac1,       %[w2_asm1],       %[p_asm1]               \n\t"
+             "addi   %[w],       %[w],             4                       \n\t"
+             "addi   %[w2],      %[w2],            -4                      \n\t"
+             "mflo   %[temp2]                                              \n\t"
+             "extr.w %[sum1],    $ac0,             24                      \n\t"
+             "li     %[temp3],   1                                         \n\t"
+             "and    %[temp1],   %[temp2],         0x00ffffff              \n\t"
+             "madd   $ac1,       %[temp1],         %[temp3]                \n\t"
+             "slt    %[temp2],   %[sum1],          %[min_asm]              \n\t"
+             "movn   %[sum1],    %[min_asm],       %[temp2]                \n\t"
+             "slt    %[temp2],   %[max_asm],       %[sum1]                 \n\t"
+             "movn   %[sum1],    %[max_asm],       %[temp2]                \n\t"
+             "sh     %[sum1],    0(%[samples])                             \n\t"
+             "mflo   %[temp3],   $ac1                                      \n\t"
+             "extr.w %[sum1],    $ac1,             24                      \n\t"
+             "and    %[temp1],   %[temp3],         0x00ffffff              \n\t"
+             "slt    %[temp2],   %[sum1],          %[min_asm]              \n\t"
+             "movn   %[sum1],    %[min_asm],       %[temp2]                \n\t"
+             "slt    %[temp2],   %[max_asm],       %[sum1]                 \n\t"
+             "movn   %[sum1],    %[max_asm],       %[temp2]                \n\t"
+             "sh     %[sum1],    0(%[samples2])                            \n\t"
+
+            : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
+              [p_asm1] "=&r" (p_asm1), [w2_asm1] "=&r" (w2_asm1),
+              [w2_asm] "=&r" (w2_asm), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
+              [p_temp1] "+r" (p_temp1), [p_temp2] "+r" (p_temp2), [sum1] "+r" (sum1),
+              [w] "+r" (w), [w2] "+r" (w2), [samples] "+r" (samples),
+              [samples2] "+r" (samples2), [temp3] "+r" (temp3)
+            : [min_asm] "r" (min_asm), [max_asm] "r" (max_asm)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo"
+        );
+
+        samples += incr;
+        samples2 -= incr;
+    }
+
+    p = synth_buf + 32;
+
+    __asm__ volatile (
+        "mthi   $0                                                        \n\t"
+        "mtlo   %[temp1]                                                  \n\t"
+        "lw     %[w_asm],  32*4(%[w])                                     \n\t"
+        "lw     %[p_asm],  0(%[p])                                        \n\t"
+        "lw     %[w_asm1], 96*4(%[w])                                     \n\t"
+        "lw     %[p_asm1], 64*4(%[p])                                     \n\t"
+        "lw     %[w_asm2], 160*4(%[w])                                    \n\t"
+        "lw     %[p_asm2], 128*4(%[p])                                    \n\t"
+        "msub   %[w_asm],  %[p_asm]                                       \n\t"
+        "msub   %[w_asm1], %[p_asm1]                                      \n\t"
+        "msub   %[w_asm2], %[p_asm2]                                      \n\t"
+        "lw     %[w_asm],  224*4(%[w])                                    \n\t"
+        "lw     %[p_asm],  192*4(%[p])                                    \n\t"
+        "lw     %[w_asm1], 288*4(%[w])                                    \n\t"
+        "lw     %[p_asm1], 256*4(%[p])                                    \n\t"
+        "lw     %[w_asm2], 352*4(%[w])                                    \n\t"
+        "lw     %[p_asm2], 320*4(%[p])                                    \n\t"
+        "msub   %[w_asm],  %[p_asm]                                       \n\t"
+        "msub   %[w_asm1], %[p_asm1]                                      \n\t"
+        "msub   %[w_asm2], %[p_asm2]                                      \n\t"
+        "lw     %[w_asm],  416*4(%[w])                                    \n\t"
+        "lw     %[p_asm],  384*4(%[p])                                    \n\t"
+        "lw     %[w_asm1], 480*4(%[w])                                    \n\t"
+        "lw     %[p_asm1], 448*4(%[p])                                    \n\t"
+        "msub   %[w_asm],  %[p_asm]                                       \n\t"
+        "msub   %[w_asm1], %[p_asm1]                                      \n\t"
+        "extr.w %[sum1],   $ac0,       24                                 \n\t"
+        "mflo   %[temp2]                                                  \n\t"
+        "and    %[temp1],  %[temp2],   0x00ffffff                         \n\t"
+        "slt    %[temp2],  %[sum1],    %[min_asm]                         \n\t"
+        "movn   %[sum1],   %[min_asm], %[temp2]                           \n\t"
+        "slt    %[temp2],  %[max_asm], %[sum1]                            \n\t"
+        "movn   %[sum1],   %[max_asm], %[temp2]                           \n\t"
+        "sh     %[sum1],   0(%[samples])                                  \n\t"
+
+        : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
+          [p_asm1] "=&r" (p_asm1), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
+          [w_asm2] "=&r" (w_asm2), [p_asm2] "=&r" (p_asm2), [sum1] "+r" (sum1)
+        : [w] "r" (w), [p] "r" (p), [samples] "r" (samples), [min_asm] "r" (min_asm),
+          [max_asm] "r" (max_asm)
+        : "memory", "hi", "lo", "$ac1hi", "$ac1lo"
+     );
+
+    *dither_state= temp1;
+}
+
+static void imdct36_mips_fixed(int *out, int *buf, int *in, int *win)
+{
+    int j;
+    int t0, t1, t2, t3, s0, s1, s2, s3;
+    int tmp[18], *tmp1, *in1;
+    /* temporary variables */
+    int temp_reg1, temp_reg2, temp_reg3, temp_reg4, temp_reg5, temp_reg6;
+    int t4, t5, t6, t8, t7;
+
+   /* values defined in macros and tables are
+    * eliminated - they are directly loaded in appropriate variables
+    */
+    int const C_1  =  4229717092; /* cos(pi*1/18)*2  */
+    int const C_2  =  4035949074; /* cos(pi*2/18)*2  */
+    int const C_3  =  575416510;  /* -cos(pi*3/18)*2 */
+    int const C_3A =  3719550786; /* cos(pi*3/18)*2  */
+    int const C_4  =  1004831466; /* -cos(pi*4/18)*2 */
+    int const C_5  =  1534215534; /* -cos(pi*5/18)*2 */
+    int const C_7  = -1468965330; /* -cos(pi*7/18)*2 */
+    int const C_8  = -745813244;  /* -cos(pi*8/18)*2 */
+
+   /*
+    * instructions of the first two loops are reorganized and loops are unrolled,
+    * in order to eliminate unnecessary readings and writings in array
+    */
+
+    __asm__ volatile (
+        "lw   %[t1], 17*4(%[in])                                         \n\t"
+        "lw   %[t2], 16*4(%[in])                                         \n\t"
+        "lw   %[t3], 15*4(%[in])                                         \n\t"
+        "lw   %[t4], 14*4(%[in])                                         \n\t"
+        "addu %[t1], %[t1],      %[t2]                                   \n\t"
+        "addu %[t2], %[t2],      %[t3]                                   \n\t"
+        "addu %[t3], %[t3],      %[t4]                                   \n\t"
+        "lw   %[t5], 13*4(%[in])                                         \n\t"
+        "addu %[t1], %[t1],      %[t3]                                   \n\t"
+        "sw   %[t2], 16*4(%[in])                                         \n\t"
+        "lw   %[t6], 12*4(%[in])                                         \n\t"
+        "sw   %[t1], 17*4(%[in])                                         \n\t"
+        "addu %[t4], %[t4],      %[t5]                                   \n\t"
+        "addu %[t5], %[t5],      %[t6]                                   \n\t"
+        "lw   %[t7], 11*4(%[in])                                         \n\t"
+        "addu %[t3], %[t3],      %[t5]                                   \n\t"
+        "sw   %[t4], 14*4(%[in])                                         \n\t"
+        "lw   %[t8], 10*4(%[in])                                         \n\t"
+        "sw   %[t3], 15*4(%[in])                                         \n\t"
+        "addu %[t6], %[t6],      %[t7]                                   \n\t"
+        "addu %[t7], %[t7],      %[t8]                                   \n\t"
+        "sw   %[t6], 12*4(%[in])                                         \n\t"
+        "addu %[t5], %[t5],      %[t7]                                   \n\t"
+        "lw   %[t1], 9*4(%[in])                                          \n\t"
+        "lw   %[t2], 8*4(%[in])                                          \n\t"
+        "sw   %[t5], 13*4(%[in])                                         \n\t"
+        "addu %[t8], %[t8],      %[t1]                                   \n\t"
+        "addu %[t1], %[t1],      %[t2]                                   \n\t"
+        "sw   %[t8], 10*4(%[in])                                         \n\t"
+        "addu %[t7], %[t7],      %[t1]                                   \n\t"
+        "lw   %[t3], 7*4(%[in])                                          \n\t"
+        "lw   %[t4], 6*4(%[in])                                          \n\t"
+        "sw   %[t7], 11*4(%[in])                                         \n\t"
+        "addu %[t2], %[t2],      %[t3]                                   \n\t"
+        "addu %[t3], %[t3],      %[t4]                                   \n\t"
+        "sw   %[t2], 8*4(%[in])                                          \n\t"
+        "addu %[t1], %[t1],      %[t3]                                   \n\t"
+        "lw   %[t5], 5*4(%[in])                                          \n\t"
+        "lw   %[t6], 4*4(%[in])                                          \n\t"
+        "sw   %[t1], 9*4(%[in])                                          \n\t"
+        "addu %[t4], %[t4],      %[t5]                                   \n\t"
+        "addu %[t5], %[t5],      %[t6]                                   \n\t"
+        "sw   %[t4], 6*4(%[in])                                          \n\t"
+        "addu %[t3], %[t3],      %[t5]                                   \n\t"
+        "lw   %[t7], 3*4(%[in])                                          \n\t"
+        "lw   %[t8], 2*4(%[in])                                          \n\t"
+        "sw   %[t3], 7*4(%[in])                                          \n\t"
+        "addu %[t6], %[t6],      %[t7]                                   \n\t"
+        "addu %[t7], %[t7],      %[t8]                                   \n\t"
+        "sw   %[t6], 4*4(%[in])                                          \n\t"
+        "addu %[t5], %[t5],      %[t7]                                   \n\t"
+        "lw   %[t1], 1*4(%[in])                                          \n\t"
+        "lw   %[t2], 0*4(%[in])                                          \n\t"
+        "sw   %[t5], 5*4(%[in])                                          \n\t"
+        "addu %[t8], %[t8],      %[t1]                                   \n\t"
+        "addu %[t1], %[t1],      %[t2]                                   \n\t"
+        "sw   %[t8], 2*4(%[in])                                          \n\t"
+        "addu %[t7], %[t7],      %[t1]                                   \n\t"
+        "sw   %[t7], 3*4(%[in])                                          \n\t"
+        "sw   %[t1], 1*4(%[in])                                          \n\t"
+
+        : [in] "+r" (in), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3),
+          [t4] "=&r" (t4), [t5] "=&r" (t5), [t6] "=&r" (t6),
+          [t7] "=&r" (t7), [t8] "=&r" (t8)
+        :
+        : "memory"
+    );
+
+    for(j = 0; j < 2; j++) {
+
+        tmp1 = tmp + j;
+        in1 = in + j;
+
+         /**
+         *  Original constants are multiplied by two in advanced
+         *  for assembly optimization (e.g. C_2 = 2 * C2).
+         *  That can lead to overflow in operations where they are used.
+         *
+         *  Example of the solution:
+         *
+         *  in original code:
+         *  t0 = ((int64_t)(in1[2*2] + in1[2*4]) * (int64_t)(2*C2))>>32
+         *
+         *  in assembly:
+         *  C_2 = 2 * C2;
+         *   .
+         *   .
+         *  "lw   %[t7],       4*4(%[in1])                               \n\t"
+         *  "lw   %[t8],       8*4(%[in1])                               \n\t"
+         *  "addu %[temp_reg2],%[t7],       %[t8]                        \n\t"
+         *  "multu %[C_2],     %[temp_reg2]                              \n\t"
+         *  "mfhi %[temp_reg1]                                           \n\t"
+         *  "sra  %[temp_reg2],%[temp_reg2],31                           \n\t"
+         *  "move %[t0],       $0                                        \n\t"
+         *  "movn %[t0],       %[C_2],      %[temp_reg2]                 \n\t"
+         *  "sub  %[t0],       %[temp_reg1],%[t0]                        \n\t"
+         */
+
+        __asm__ volatile (
+            "lw    %[t7],        4*4(%[in1])                               \n\t"
+            "lw    %[t8],        8*4(%[in1])                               \n\t"
+            "lw    %[t6],        16*4(%[in1])                              \n\t"
+            "lw    %[t4],        0*4(%[in1])                               \n\t"
+            "addu  %[temp_reg2], %[t7],        %[t8]                       \n\t"
+            "addu  %[t2],        %[t6],        %[t8]                       \n\t"
+            "multu %[C_2],       %[temp_reg2]                              \n\t"
+            "lw    %[t5],        12*4(%[in1])                              \n\t"
+            "sub   %[t2],        %[t2],        %[t7]                       \n\t"
+            "sub   %[t1],        %[t4],        %[t5]                       \n\t"
+            "sra   %[t3],        %[t5],        1                           \n\t"
+            "sra   %[temp_reg1], %[t2],        1                           \n\t"
+            "addu  %[t3],        %[t3],        %[t4]                       \n\t"
+            "sub   %[temp_reg1], %[t1],        %[temp_reg1]                \n\t"
+            "sra   %[temp_reg2], %[temp_reg2], 31                          \n\t"
+            "sw    %[temp_reg1], 6*4(%[tmp1])                              \n\t"
+            "move  %[t0],        $0                                        \n\t"
+            "movn  %[t0],        %[C_2],       %[temp_reg2]                \n\t"
+            "mfhi  %[temp_reg1]                                            \n\t"
+            "addu  %[t1],        %[t1],        %[t2]                       \n\t"
+            "sw    %[t1],        16*4(%[tmp1])                             \n\t"
+            "sub   %[temp_reg4], %[t8],        %[t6]                       \n\t"
+            "add   %[temp_reg2], %[t7],        %[t6]                       \n\t"
+            "mult  $ac1,         %[C_8],       %[temp_reg4]                \n\t"
+            "multu $ac2,         %[C_4],       %[temp_reg2]                \n\t"
+            "sub   %[t0],        %[temp_reg1], %[t0]                       \n\t"
+            "sra   %[temp_reg1], %[temp_reg2], 31                          \n\t"
+            "move  %[t2],        $0                                        \n\t"
+            "movn  %[t2],        %[C_4],       %[temp_reg1]                \n\t"
+            "mfhi  %[t1],        $ac1                                      \n\t"
+            "mfhi  %[temp_reg1], $ac2                                      \n\t"
+            "lw    %[t6],        10*4(%[in1])                              \n\t"
+            "lw    %[t8],        14*4(%[in1])                              \n\t"
+            "lw    %[t7],        2*4(%[in1])                               \n\t"
+            "lw    %[t4],        6*4(%[in1])                               \n\t"
+            "sub   %[temp_reg3], %[t3],        %[t0]                       \n\t"
+            "add   %[temp_reg4], %[t3],        %[t0]                       \n\t"
+            "sub   %[temp_reg1], %[temp_reg1], %[temp_reg2]                \n\t"
+            "add   %[temp_reg4], %[temp_reg4], %[t1]                       \n\t"
+            "sub   %[t2],        %[temp_reg1], %[t2]                       \n\t"
+            "sw    %[temp_reg4], 2*4(%[tmp1])                              \n\t"
+            "sub   %[temp_reg3], %[temp_reg3], %[t2]                       \n\t"
+            "add   %[temp_reg1], %[t3],        %[t2]                       \n\t"
+            "sw    %[temp_reg3], 10*4(%[tmp1])                             \n\t"
+            "sub   %[temp_reg1], %[temp_reg1], %[t1]                       \n\t"
+            "addu  %[temp_reg2], %[t6],        %[t8]                       \n\t"
+            "sw    %[temp_reg1], 14*4(%[tmp1])                             \n\t"
+            "sub   %[temp_reg2], %[temp_reg2], %[t7]                       \n\t"
+            "addu  %[temp_reg3], %[t7],        %[t6]                       \n\t"
+            "multu $ac3,         %[C_3],       %[temp_reg2]                \n\t"
+            "multu %[C_1],       %[temp_reg3]                              \n\t"
+            "sra   %[temp_reg1], %[temp_reg2], 31                          \n\t"
+            "move  %[t1],        $0                                        \n\t"
+            "sra   %[temp_reg3], %[temp_reg3], 31                          \n\t"
+            "movn  %[t1],        %[C_3],       %[temp_reg1]                \n\t"
+            "mfhi  %[temp_reg1], $ac3                                      \n\t"
+            "mfhi  %[temp_reg4]                                            \n\t"
+            "move  %[t2],        $0                                        \n\t"
+            "movn  %[t2],        %[C_1],       %[temp_reg3]                \n\t"
+            "sub   %[temp_reg3], %[t6],        %[t8]                       \n\t"
+            "sub   %[t2],        %[temp_reg4], %[t2]                       \n\t"
+            "multu $ac1,         %[C_7],       %[temp_reg3]                \n\t"
+            "sub   %[temp_reg1], %[temp_reg1], %[temp_reg2]                \n\t"
+            "sra   %[temp_reg4], %[temp_reg3], 31                          \n\t"
+            "sub   %[t1],        %[temp_reg1], %[t1]                       \n\t"
+            "move  %[t3],        $0                                        \n\t"
+            "sw    %[t1],        4*4(%[tmp1])                              \n\t"
+            "movn  %[t3],        %[C_7],       %[temp_reg4]                \n\t"
+            "multu $ac2,         %[C_3A],      %[t4]                       \n\t"
+            "add   %[temp_reg2], %[t7],        %[t8]                       \n\t"
+            "move  %[t1],        $0                                        \n\t"
+            "mfhi  %[temp_reg4], $ac1                                      \n\t"
+            "multu $ac3,%[C_5],  %[temp_reg2]                              \n\t"
+            "move  %[t0],        $0                                        \n\t"
+            "sra   %[temp_reg1], %[temp_reg2], 31                          \n\t"
+            "movn  %[t1],%[C_5], %[temp_reg1]                              \n\t"
+            "sub   %[temp_reg4], %[temp_reg4], %[temp_reg3]                \n\t"
+            "mfhi  %[temp_reg1], $ac3                                      \n\t"
+            "sra   %[temp_reg3], %[t4],        31                          \n\t"
+            "movn  %[t0],        %[C_3A],      %[temp_reg3]                \n\t"
+            "mfhi  %[temp_reg3], $ac2                                      \n\t"
+            "sub   %[t3],        %[temp_reg4], %[t3]                       \n\t"
+            "add   %[temp_reg4], %[t3],        %[t2]                       \n\t"
+            "sub   %[temp_reg1], %[temp_reg1], %[temp_reg2]                \n\t"
+            "sub   %[t1],        %[temp_reg1], %[t1]                       \n\t"
+            "sub   %[t0],        %[temp_reg3], %[t0]                       \n\t"
+            "add   %[temp_reg1], %[t2],        %[t1]                       \n\t"
+            "add   %[temp_reg4], %[temp_reg4], %[t0]                       \n\t"
+            "sub   %[temp_reg2], %[t3],        %[t1]                       \n\t"
+            "sw    %[temp_reg4], 0*4(%[tmp1])                              \n\t"
+            "sub   %[temp_reg1], %[temp_reg1], %[t0]                       \n\t"
+            "sub   %[temp_reg2], %[temp_reg2], %[t0]                       \n\t"
+            "sw    %[temp_reg1], 12*4(%[tmp1])                             \n\t"
+            "sw    %[temp_reg2], 8*4(%[tmp1])                              \n\t"
+
+            : [t7] "=&r" (t7), [temp_reg1] "=&r" (temp_reg1),
+              [temp_reg2] "=&r" (temp_reg2), [temp_reg4] "=&r" (temp_reg4),
+              [temp_reg3] "=&r" (temp_reg3), [t8] "=&r" (t8), [t0] "=&r" (t0),
+              [t4] "=&r" (t4), [t5] "=&r" (t5), [t6] "=&r"(t6), [t2] "=&r" (t2),
+              [t3] "=&r" (t3), [t1] "=&r" (t1)
+            : [C_2] "r" (C_2), [in1] "r" (in1), [tmp1] "r" (tmp1), [C_8] "r" (C_8),
+              [C_4] "r" (C_4), [C_3] "r" (C_3), [C_1] "r" (C_1), [C_7] "r" (C_7),
+              [C_3A] "r" (C_3A), [C_5] "r" (C_5)
+            : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+              "$ac3hi", "$ac3lo"
+         );
+    }
+
+    /**
+    * loop is unrolled four times
+    *
+    * values defined in tables(icos36[] and icos36h[]) are not loaded from
+    * these tables - they are directly loaded in appropriate registers
+    *
+    */
+
+    __asm__ volatile (
+        "lw     %[t2],        1*4(%[tmp])                                  \n\t"
+        "lw     %[t3],        3*4(%[tmp])                                  \n\t"
+        "lw     %[t0],        0*4(%[tmp])                                  \n\t"
+        "lw     %[t1],        2*4(%[tmp])                                  \n\t"
+        "addu   %[temp_reg1], %[t3],        %[t2]                          \n\t"
+        "li     %[temp_reg2], 0x807D2B1E                                   \n\t"
+        "move   %[s1],        $0                                           \n\t"
+        "multu  %[temp_reg2], %[temp_reg1]                                 \n\t"
+        "sra    %[temp_reg1], %[temp_reg1], 31                             \n\t"
+        "movn   %[s1],        %[temp_reg2], %[temp_reg1]                   \n\t"
+        "sub    %[temp_reg3], %[t3],        %[t2]                          \n\t"
+        "li     %[temp_reg4], 0x2de5151                                    \n\t"
+        "mfhi   %[temp_reg2]                                               \n\t"
+        "addu   %[s0],        %[t1],        %[t0]                          \n\t"
+        "lw     %[temp_reg5], 9*4(%[win])                                  \n\t"
+        "mult   $ac1,         %[temp_reg4], %[temp_reg3]                   \n\t"
+        "lw     %[temp_reg6], 4*9*4(%[buf])                                \n\t"
+        "sub    %[s2],        %[t1],        %[t0]                          \n\t"
+        "lw     %[temp_reg3], 29*4(%[win])                                 \n\t"
+        "subu   %[s1],        %[temp_reg2], %[s1]                          \n\t"
+        "lw     %[temp_reg4], 28*4(%[win])                                 \n\t"
+        "add    %[t0],        %[s0],        %[s1]                          \n\t"
+        "extr.w %[s3],        $ac1,23                                      \n\t"
+        "mult   $ac2,         %[t0],        %[temp_reg3]                   \n\t"
+        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
+        "lw     %[temp_reg1], 4*8*4(%[buf])                                \n\t"
+        "mult   %[t1],        %[temp_reg5]                                 \n\t"
+        "lw     %[temp_reg2], 8*4(%[win])                                  \n\t"
+        "mfhi   %[temp_reg3], $ac2                                         \n\t"
+        "mult   $ac3,         %[t0],        %[temp_reg4]                   \n\t"
+        "add    %[t0],        %[s2],        %[s3]                          \n\t"
+        "mfhi   %[temp_reg5]                                               \n\t"
+        "mult   $ac1,         %[t1],        %[temp_reg2]                   \n\t"
+        "sub    %[t1],        %[s2],        %[s3]                          \n\t"
+        "sw     %[temp_reg3], 4*9*4(%[buf])                                \n\t"
+        "mfhi   %[temp_reg4], $ac3                                         \n\t"
+        "lw     %[temp_reg3], 37*4(%[win])                                 \n\t"
+        "mfhi   %[temp_reg2], $ac1                                         \n\t"
+        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
+        "lw     %[temp_reg6], 17*4(%[win])                                 \n\t"
+        "sw     %[temp_reg5], 32*9*4(%[out])                               \n\t"
+        "sw     %[temp_reg4], 4*8*4(%[buf])                                \n\t"
+        "mult   %[t1],        %[temp_reg6]                                 \n\t"
+        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
+        "lw     %[temp_reg2], 0*4(%[win])                                  \n\t"
+        "lw     %[temp_reg5], 4*17*4(%[buf])                               \n\t"
+        "sw     %[temp_reg1], 8*32*4(%[out])                               \n\t"
+        "mfhi   %[temp_reg6]                                               \n\t"
+        "mult   $ac1,         %[t1],        %[temp_reg2]                   \n\t"
+        "lw     %[temp_reg4], 20*4(%[win])                                 \n\t"
+        "lw     %[temp_reg1], 0(%[buf])                                    \n\t"
+        "mult   $ac2,         %[t0],        %[temp_reg3]                   \n\t"
+        "mult   %[t0],        %[temp_reg4]                                 \n\t"
+        "mfhi   %[temp_reg2], $ac1                                         \n\t"
+        "lw     %[t0],        4*4(%[tmp])                                  \n\t"
+        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
+        "mfhi   %[temp_reg3], $ac2                                         \n\t"
+        "mfhi   %[temp_reg4]                                               \n\t"
+        "sw     %[temp_reg5], 17*32*4(%[out])                              \n\t"
+        "lw     %[t1],        6*4(%[tmp])                                  \n\t"
+        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
+        "lw     %[t2],        5*4(%[tmp])                                  \n\t"
+        "sw     %[temp_reg1], 0*32*4(%[out])                               \n\t"
+        "addu   %[s0],        %[t1],        %[t0]                          \n\t"
+        "sw     %[temp_reg3], 4*17*4(%[buf])                               \n\t"
+        "lw     %[t3],        7*4(%[tmp])                                  \n\t"
+        "sub    %[s2],        %[t1],        %[t0]                          \n\t"
+        "sw     %[temp_reg4], 0(%[buf])                                    \n\t"
+        "addu   %[temp_reg5], %[t3],        %[t2]                          \n\t"
+        "li     %[temp_reg6], 0x8483EE0C                                   \n\t"
+        "move   %[s1],        $0                                           \n\t"
+        "multu  %[temp_reg6], %[temp_reg5]                                 \n\t"
+        "sub    %[temp_reg1], %[t3],        %[t2]                          \n\t"
+        "li     %[temp_reg2], 0xf746ea                                     \n\t"
+        "sra    %[temp_reg5], %[temp_reg5], 31                             \n\t"
+        "mult   $ac1,         %[temp_reg2], %[temp_reg1]                   \n\t"
+        "movn   %[s1],        %[temp_reg6], %[temp_reg5]                   \n\t"
+        "mfhi   %[temp_reg5]                                               \n\t"
+        "lw     %[temp_reg3], 10*4(%[win])                                 \n\t"
+        "lw     %[temp_reg4], 4*10*4(%[buf])                               \n\t"
+        "extr.w %[s3],        $ac1,         23                             \n\t"
+        "lw     %[temp_reg1], 4*7*4(%[buf])                                \n\t"
+        "lw     %[temp_reg2], 7*4(%[win])                                  \n\t"
+        "lw     %[temp_reg6], 30*4(%[win])                                 \n\t"
+        "subu   %[s1],        %[temp_reg5], %[s1]                          \n\t"
+        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
+        "add    %[t0],        %[s0],        %[s1]                          \n\t"
+        "mult   $ac2,         %[t1],        %[temp_reg3]                   \n\t"
+        "mult   $ac3,         %[t1],        %[temp_reg2]                   \n\t"
+        "mult   %[t0],        %[temp_reg6]                                 \n\t"
+        "lw     %[temp_reg5], 27*4(%[win])                                 \n\t"
+        "mult   $ac1,         %[t0],        %[temp_reg5]                   \n\t"
+        "mfhi   %[temp_reg3], $ac2                                         \n\t"
+        "mfhi   %[temp_reg2], $ac3                                         \n\t"
+        "mfhi   %[temp_reg6]                                               \n\t"
+        "add    %[t0],        %[s2],        %[s3]                          \n\t"
+        "sub    %[t1],        %[s2],        %[s3]                          \n\t"
+        "add    %[temp_reg3], %[temp_reg3], %[temp_reg4]                   \n\t"
+        "lw     %[temp_reg4], 16*4(%[win])                                 \n\t"
+        "mfhi   %[temp_reg5], $ac1                                         \n\t"
+        "sw     %[temp_reg3], 32*10*4(%[out])                              \n\t"
+        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
+        "lw     %[temp_reg3], 4*16*4(%[buf])                               \n\t"
+        "sw     %[temp_reg6], 4*10*4(%[buf])                               \n\t"
+        "sw     %[temp_reg1], 7*32*4(%[out])                               \n\t"
+        "mult   $ac2,         %[t1],        %[temp_reg4]                   \n\t"
+        "sw     %[temp_reg5], 4*7*4(%[buf])                                \n\t"
+        "lw     %[temp_reg6], 1*4(%[win])                                  \n\t"
+        "lw     %[temp_reg5], 4*1*4(%[buf])                                \n\t"
+        "lw     %[temp_reg1], 36*4(%[win])                                 \n\t"
+        "mult   $ac3,         %[t1],        %[temp_reg6]                   \n\t"
+        "lw     %[temp_reg2], 21*4(%[win])                                 \n\t"
+        "mfhi   %[temp_reg4], $ac2                                         \n\t"
+        "mult   %[t0],        %[temp_reg1]                                 \n\t"
+        "mult   $ac1,         %[t0],%[temp_reg2]                           \n\t"
+        "lw     %[t0],        8*4(%[tmp])                                  \n\t"
+        "mfhi   %[temp_reg6], $ac3                                         \n\t"
+        "lw     %[t1],        10*4(%[tmp])                                 \n\t"
+        "lw     %[t3],        11*4(%[tmp])                                 \n\t"
+        "mfhi   %[temp_reg1]                                               \n\t"
+        "add    %[temp_reg3], %[temp_reg3], %[temp_reg4]                   \n\t"
+        "lw     %[t2],        9*4(%[tmp])                                  \n\t"
+        "mfhi   %[temp_reg2], $ac1                                         \n\t"
+        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
+        "sw     %[temp_reg3], 16*32*4(%[out])                              \n\t"
+        "sw     %[temp_reg5], 1*32*4(%[out])                               \n\t"
+        "sw     %[temp_reg1], 4*16*4(%[buf])                               \n\t"
+        "addu   %[temp_reg3], %[t3],        %[t2]                          \n\t"
+        "li     %[temp_reg4], 0x8D3B7CD6                                   \n\t"
+        "sw     %[temp_reg2], 4*1*4(%[buf])                                \n\t"
+        "multu  %[temp_reg4],%[temp_reg3]                                  \n\t"
+        "sra    %[temp_reg3], %[temp_reg3], 31                             \n\t"
+        "move   %[s1],        $0                                           \n\t"
+        "movn   %[s1],        %[temp_reg4], %[temp_reg3]                   \n\t"
+        "addu   %[s0],        %[t1],        %[t0]                          \n\t"
+        "mfhi   %[temp_reg3]                                               \n\t"
+        "sub    %[s2],        %[t1],        %[t0]                          \n\t"
+        "sub    %[temp_reg5], %[t3],        %[t2]                          \n\t"
+        "li     %[temp_reg6], 0x976fd9                                     \n\t"
+        "lw     %[temp_reg2], 11*4(%[win])                                 \n\t"
+        "lw     %[temp_reg1], 4*11*4(%[buf])                               \n\t"
+        "mult   $ac1,         %[temp_reg6], %[temp_reg5]                   \n\t"
+        "subu   %[s1],        %[temp_reg3], %[s1]                          \n\t"
+        "lw     %[temp_reg5], 31*4(%[win])                                 \n\t"
+        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
+        "add    %[t0],        %[s0],        %[s1]                          \n\t"
+        "mult   $ac2,         %[t1],        %[temp_reg2]                   \n\t"
+        "mult   %[t0],        %[temp_reg5]                                 \n\t"
+        "lw     %[temp_reg4], 6*4(%[win])                                  \n\t"
+        "extr.w %[s3],        $ac1,         23                             \n\t"
+        "lw     %[temp_reg3], 4*6*4(%[buf])                                \n\t"
+        "mfhi   %[temp_reg2], $ac2                                         \n\t"
+        "lw     %[temp_reg6], 26*4(%[win])                                 \n\t"
+        "mfhi   %[temp_reg5]                                               \n\t"
+        "mult   $ac3,         %[t1],        %[temp_reg4]                   \n\t"
+        "mult   $ac1,         %[t0],        %[temp_reg6]                   \n\t"
+        "add    %[t0],        %[s2],        %[s3]                          \n\t"
+        "sub    %[t1],        %[s2],        %[s3]                          \n\t"
+        "add    %[temp_reg2], %[temp_reg2], %[temp_reg1]                   \n\t"
+        "mfhi   %[temp_reg4], $ac3                                         \n\t"
+        "mfhi   %[temp_reg6], $ac1                                         \n\t"
+        "sw     %[temp_reg5], 4*11*4(%[buf])                               \n\t"
+        "sw     %[temp_reg2], 32*11*4(%[out])                              \n\t"
+        "lw     %[temp_reg1], 4*15*4(%[buf])                               \n\t"
+        "add    %[temp_reg3], %[temp_reg3], %[temp_reg4]                   \n\t"
+        "lw     %[temp_reg2], 15*4(%[win])                                 \n\t"
+        "sw     %[temp_reg3], 6*32*4(%[out])                               \n\t"
+        "sw     %[temp_reg6], 4*6*4(%[buf])                                \n\t"
+        "mult   %[t1],        %[temp_reg2]                                 \n\t"
+        "lw     %[temp_reg3], 2*4(%[win])                                  \n\t"
+        "lw     %[temp_reg4], 4*2*4(%[buf])                                \n\t"
+        "lw     %[temp_reg5], 35*4(%[win])                                 \n\t"
+        "mult   $ac1,         %[t1],        %[temp_reg3]                   \n\t"
+        "mfhi   %[temp_reg2]                                               \n\t"
+        "lw     %[temp_reg6], 22*4(%[win])                                 \n\t"
+        "mult   $ac2,         %[t0],        %[temp_reg5]                   \n\t"
+        "lw     %[t1],        14*4(%[tmp])                                 \n\t"
+        "mult   $ac3,         %[t0],        %[temp_reg6]                   \n\t"
+        "lw     %[t0],        12*4(%[tmp])                                 \n\t"
+        "mfhi   %[temp_reg3], $ac1                                         \n\t"
+        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
+        "mfhi   %[temp_reg5], $ac2                                         \n\t"
+        "sw     %[temp_reg1], 15*32*4(%[out])                              \n\t"
+        "mfhi   %[temp_reg6], $ac3                                         \n\t"
+        "lw     %[t2],        13*4(%[tmp])                                 \n\t"
+        "lw     %[t3],        15*4(%[tmp])                                 \n\t"
+        "add    %[temp_reg4], %[temp_reg4], %[temp_reg3]                   \n\t"
+        "sw     %[temp_reg5], 4*15*4(%[buf])                               \n\t"
+        "addu   %[temp_reg1], %[t3],        %[t2]                          \n\t"
+        "li     %[temp_reg2], 0x9C42577C                                   \n\t"
+        "move   %[s1],        $0                                           \n\t"
+        "multu  %[temp_reg2], %[temp_reg1]                                 \n\t"
+        "sw     %[temp_reg4], 2*32*4(%[out])                               \n\t"
+        "sra    %[temp_reg1], %[temp_reg1], 31                             \n\t"
+        "movn   %[s1],        %[temp_reg2], %[temp_reg1]                   \n\t"
+        "sub    %[temp_reg3], %[t3],        %[t2]                          \n\t"
+        "li     %[temp_reg4], 0x6f94a2                                     \n\t"
+        "mfhi   %[temp_reg1]                                               \n\t"
+        "addu   %[s0],        %[t1],        %[t0]                          \n\t"
+        "sw     %[temp_reg6], 4*2*4(%[buf])                                \n\t"
+        "mult   $ac1,         %[temp_reg4], %[temp_reg3]                   \n\t"
+        "sub    %[s2],        %[t1],        %[t0]                          \n\t"
+        "lw     %[temp_reg5], 12*4(%[win])                                 \n\t"
+        "lw     %[temp_reg6], 4*12*4(%[buf])                               \n\t"
+        "subu   %[s1],        %[temp_reg1], %[s1]                          \n\t"
+        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
+        "lw     %[temp_reg3], 32*4(%[win])                                 \n\t"
+        "mult   $ac2,         %[t1],        %[temp_reg5]                   \n\t"
+        "add    %[t0],        %[s0],        %[s1]                          \n\t"
+        "extr.w %[s3],        $ac1,         23                             \n\t"
+        "lw     %[temp_reg2], 5*4(%[win])                                  \n\t"
+        "mult   %[t0],        %[temp_reg3]                                 \n\t"
+        "mfhi   %[temp_reg5], $ac2                                         \n\t"
+        "lw     %[temp_reg4], 25*4(%[win])                                 \n\t"
+        "lw     %[temp_reg1], 4*5*4(%[buf])                                \n\t"
+        "mult   $ac3,         %[t1],        %[temp_reg2]                   \n\t"
+        "mult   $ac1,         %[t0],        %[temp_reg4]                   \n\t"
+        "mfhi   %[temp_reg3]                                               \n\t"
+        "add    %[t0],        %[s2],        %[s3]                          \n\t"
+        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
+        "mfhi   %[temp_reg2], $ac3                                         \n\t"
+        "mfhi   %[temp_reg4], $ac1                                         \n\t"
+        "sub    %[t1],        %[s2],        %[s3]                          \n\t"
+        "sw     %[temp_reg5], 32*12*4(%[out])                              \n\t"
+        "sw     %[temp_reg3], 4*12*4(%[buf])                               \n\t"
+        "lw     %[temp_reg6], 14*4(%[win])                                 \n\t"
+        "lw     %[temp_reg5], 4*14*4(%[buf])                               \n\t"
+        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
+        "sw     %[temp_reg4], 4*5*4(%[buf])                                \n\t"
+        "sw     %[temp_reg1], 5*32*4(%[out])                               \n\t"
+        "mult   %[t1],        %[temp_reg6]                                 \n\t"
+        "lw     %[temp_reg4], 34*4(%[win])                                 \n\t"
+        "lw     %[temp_reg2], 3*4(%[win])                                  \n\t"
+        "lw     %[temp_reg1], 4*3*4(%[buf])                                \n\t"
+        "mult   $ac2,         %[t0],        %[temp_reg4]                   \n\t"
+        "mfhi   %[temp_reg6]                                               \n\t"
+        "mult   $ac1,         %[t1],        %[temp_reg2]                   \n\t"
+        "lw     %[temp_reg3], 23*4(%[win])                                 \n\t"
+        "lw     %[s0],        16*4(%[tmp])                                 \n\t"
+        "mfhi   %[temp_reg4], $ac2                                         \n\t"
+        "lw     %[t1],        17*4(%[tmp])                                 \n\t"
+        "mult   $ac3,         %[t0],        %[temp_reg3]                   \n\t"
+        "move   %[s1],        $0                                           \n\t"
+        "add    %[temp_reg5], %[temp_reg5], %[temp_reg6]                   \n\t"
+        "mfhi   %[temp_reg2], $ac1                                         \n\t"
+        "sw     %[temp_reg5], 14*32*4(%[out])                              \n\t"
+        "sw     %[temp_reg4], 4*14*4(%[buf])                               \n\t"
+        "mfhi   %[temp_reg3], $ac3                                         \n\t"
+        "li     %[temp_reg5], 0xB504F334                                   \n\t"
+        "add    %[temp_reg1], %[temp_reg1], %[temp_reg2]                   \n\t"
+        "multu  %[temp_reg5], %[t1]                                        \n\t"
+        "lw     %[temp_reg2], 4*13*4(%[buf])                               \n\t"
+        "sw     %[temp_reg1], 3*32*4(%[out])                               \n\t"
+        "sra    %[t1],        %[t1],        31                             \n\t"
+        "mfhi   %[temp_reg6]                                               \n\t"
+        "movn   %[s1],        %[temp_reg5], %[t1]                          \n\t"
+        "sw     %[temp_reg3], 4*3*4(%[buf])                                \n\t"
+        "lw     %[temp_reg1], 13*4(%[win])                                 \n\t"
+        "lw     %[temp_reg4], 4*4*4(%[buf])                                \n\t"
+        "lw     %[temp_reg3], 4*4(%[win])                                  \n\t"
+        "lw     %[temp_reg5], 33*4(%[win])                                 \n\t"
+        "subu   %[s1],        %[temp_reg6], %[s1]                          \n\t"
+        "lw     %[temp_reg6], 24*4(%[win])                                 \n\t"
+        "sub    %[t1],        %[s0],        %[s1]                          \n\t"
+        "add    %[t0],        %[s0],        %[s1]                          \n\t"
+        "mult   $ac1,         %[t1],        %[temp_reg1]                   \n\t"
+        "mult   $ac2,         %[t1],        %[temp_reg3]                   \n\t"
+        "mult   $ac3,         %[t0],        %[temp_reg5]                   \n\t"
+        "mult   %[t0],        %[temp_reg6]                                 \n\t"
+        "mfhi   %[temp_reg1], $ac1                                         \n\t"
+        "mfhi   %[temp_reg3], $ac2                                         \n\t"
+        "mfhi   %[temp_reg5], $ac3                                         \n\t"
+        "mfhi   %[temp_reg6]                                               \n\t"
+        "add    %[temp_reg2], %[temp_reg2], %[temp_reg1]                   \n\t"
+        "add    %[temp_reg4], %[temp_reg4], %[temp_reg3]                   \n\t"
+        "sw     %[temp_reg2], 13*32*4(%[out])                              \n\t"
+        "sw     %[temp_reg4], 4*32*4(%[out])                               \n\t"
+        "sw     %[temp_reg5], 4*13*4(%[buf])                               \n\t"
+        "sw     %[temp_reg6], 4*4*4(%[buf])                                \n\t"
+
+        : [t0] "=&r" (t0), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3),
+          [s0] "=&r" (s0), [s2] "=&r" (s2), [temp_reg1] "=&r" (temp_reg1),
+          [temp_reg2] "=&r" (temp_reg2), [s1] "=&r" (s1), [s3] "=&r" (s3),
+          [temp_reg3] "=&r" (temp_reg3), [temp_reg4] "=&r" (temp_reg4),
+          [temp_reg5] "=&r" (temp_reg5), [temp_reg6] "=&r" (temp_reg6),
+          [out] "+r" (out)
+        : [tmp] "r" (tmp), [win] "r" (win), [buf] "r" (buf)
+        : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo",
+          "$ac3hi", "$ac3lo"
+    );
+}
+
+static void ff_imdct36_blocks_mips_fixed(int *out, int *buf, int *in,
+                               int count, int switch_point, int block_type)
+{
+    int j;
+    for (j=0 ; j < count; j++) {
+        /* apply window & overlap with previous buffer */
+
+        /* select window */
+        int win_idx = (switch_point && j < 2) ? 0 : block_type;
+        int *win = ff_mdct_win_fixed[win_idx + (4 & -(j & 1))];
+
+        imdct36_mips_fixed(out, buf, in, win);
+
+        in  += 18;
+        buf += ((j&3) != 3 ? 1 : (72-3));
+        out++;
+    }
+}
+
+void ff_mpadsp_init_mipsdspr1(MPADSPContext *s)
+{
+    s->apply_window_fixed   = ff_mpadsp_apply_window_mips_fixed;
+    s->imdct36_blocks_fixed = ff_imdct36_blocks_mips_fixed;
+}
diff --git a/libavcodec/mips/mpegaudiodsp_mips_float.c b/libavcodec/mips/mpegaudiodsp_mips_float.c
new file mode 100644
index 0000000..beb8e78
--- /dev/null
+++ b/libavcodec/mips/mpegaudiodsp_mips_float.c
@@ -0,0 +1,1250 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Bojan Zivkovic (bojan@mips.com)
+ *
+ * MPEG Audio decoder optimized for MIPS floating-point architecture
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/mpegaudiodsp_template.c
+ *            libavcodec/dct32.c
+ */
+
+#include <string.h>
+
+#include "libavcodec/mpegaudiodsp.h"
+
+static void ff_mpadsp_apply_window_mips_float(float *synth_buf, float *window,
+                               int *dither_state, float *samples, int incr)
+{
+    register const float *w, *w2, *p;
+    int j;
+    float *samples2;
+    float sum, sum2;
+    /* temporary variables */
+    int incr1 = incr << 2;
+    int t_sample;
+    float in1, in2, in3, in4, in5, in6, in7, in8;
+    float *p2;
+
+    /* copy to avoid wrap */
+    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
+
+    /**
+    * instructions are scheduled to minimize pipeline stall.
+    * use of round_sample function from the original code is
+    * changed with appropriate assembly instructions.
+    */
+
+    __asm__ volatile (
+        "lwc1    %[sum],      0(%[dither_state])                            \t\n"
+        "sll     %[t_sample], %[incr1],     5                               \t\n"
+        "sub     %[t_sample], %[t_sample],  %[incr1]                        \n\t"
+        "li      %[j],        4                                             \t\n"
+        "lwc1    %[in1],      0(%[window])                                  \t\n"
+        "lwc1    %[in2],      16*4(%[synth_buf])                            \t\n"
+        "sw      $zero,       0(%[dither_state])                            \t\n"
+        "lwc1    %[in3],      64*4(%[window])                               \t\n"
+        "lwc1    %[in4],      80*4(%[synth_buf])                            \t\n"
+        "addu    %[samples2], %[samples],   %[t_sample]                     \t\n"
+        "madd.s  %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in5],      128*4(%[window])                              \t\n"
+        "lwc1    %[in6],      144*4(%[synth_buf])                           \t\n"
+        "lwc1    %[in7],      192*4(%[window])                              \t\n"
+        "madd.s  %[sum],      %[sum],       %[in3], %[in4]                  \t\n"
+        "lwc1    %[in8],      208*4(%[synth_buf])                           \t\n"
+        "lwc1    %[in1],      256*4(%[window])                              \t\n"
+        "lwc1    %[in2],      272*4(%[synth_buf])                           \t\n"
+        "madd.s  %[sum],      %[sum],       %[in5], %[in6]                  \t\n"
+        "lwc1    %[in3],      320*4(%[window])                              \t\n"
+        "lwc1    %[in4],      336*4(%[synth_buf])                           \t\n"
+        "lwc1    %[in5],      384*4(%[window])                              \t\n"
+        "madd.s  %[sum],      %[sum],       %[in7], %[in8]                  \t\n"
+        "lwc1    %[in6],      400*4(%[synth_buf])                           \t\n"
+        "lwc1    %[in7],      448*4(%[window])                              \t\n"
+        "lwc1    %[in8],      464*4(%[synth_buf])                           \t\n"
+        "madd.s  %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in1],      32*4(%[window])                               \t\n"
+        "lwc1    %[in2],      48*4(%[synth_buf])                            \t\n"
+        "madd.s  %[sum],      %[sum],       %[in3], %[in4]                  \t\n"
+        "lwc1    %[in3],      96*4(%[window])                               \t\n"
+        "lwc1    %[in4],      112*4(%[synth_buf])                           \t\n"
+        "madd.s  %[sum],      %[sum],       %[in5], %[in6]                  \t\n"
+        "lwc1    %[in5],      160*4(%[window])                              \t\n"
+        "lwc1    %[in6],      176*4(%[synth_buf])                           \t\n"
+        "madd.s  %[sum],      %[sum],       %[in7], %[in8]                  \t\n"
+        "lwc1    %[in7],      224*4(%[window])                              \t\n"
+        "lwc1    %[in8],      240*4(%[synth_buf])                           \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in1],      288*4(%[window])                              \t\n"
+        "lwc1    %[in2],      304*4(%[synth_buf])                           \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in3], %[in4]                  \t\n"
+        "lwc1    %[in3],      352*4(%[window])                              \t\n"
+        "lwc1    %[in4],      368*4(%[synth_buf])                           \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in5], %[in6]                  \t\n"
+        "lwc1    %[in5],      416*4(%[window])                              \t\n"
+        "lwc1    %[in6],      432*4(%[synth_buf])                           \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in7], %[in8]                  \t\n"
+        "lwc1    %[in7],      480*4(%[window])                              \t\n"
+        "lwc1    %[in8],      496*4(%[synth_buf])                           \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "addu    %[w],        %[window],    4                               \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in3], %[in4]                  \t\n"
+        "addu    %[w2],       %[window],    124                             \t\n"
+        "addiu   %[p],        %[synth_buf], 68                              \t\n"
+        "addiu   %[p2],       %[synth_buf], 188                             \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in5], %[in6]                  \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in7], %[in8]                  \t\n"
+        "swc1    %[sum],      0(%[samples])                                 \t\n"
+        "addu    %[samples],  %[samples],   %[incr1]                        \t\n"
+
+        /* calculate two samples at the same time to avoid one memory
+           access per two sample */
+
+        "ff_mpadsp_apply_window_loop%=:                                     \t\n"
+        "lwc1    %[in1],      0(%[w])                                       \t\n"
+        "lwc1    %[in2],      0(%[p])                                       \t\n"
+        "lwc1    %[in3],      0(%[w2])                                      \t\n"
+        "lwc1    %[in4],      64*4(%[w])                                    \t\n"
+        "lwc1    %[in5],      64*4(%[p])                                    \t\n"
+        "lwc1    %[in6],      64*4(%[w2])                                   \t\n"
+        "mul.s   %[sum],      %[in1],       %[in2]                          \t\n"
+        "mul.s   %[sum2],     %[in2],       %[in3]                          \t\n"
+        "lwc1    %[in1],      128*4(%[w])                                   \t\n"
+        "lwc1    %[in2],      128*4(%[p])                                   \t\n"
+        "madd.s  %[sum],      %[sum],       %[in4], %[in5]                  \t\n"
+        "nmadd.s %[sum2],     %[sum2],      %[in5], %[in6]                  \t\n"
+        "lwc1    %[in3],      128*4(%[w2])                                  \t\n"
+        "lwc1    %[in4],      192*4(%[w])                                   \t\n"
+        "madd.s  %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in5],      192*4(%[p])                                   \t\n"
+        "lwc1    %[in6],      192*4(%[w2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in2], %[in3]                  \t\n"
+        "lwc1    %[in1],      256*4(%[w])                                   \t\n"
+        "lwc1    %[in2],      256*4(%[p])                                   \t\n"
+        "madd.s  %[sum],      %[sum],       %[in4], %[in5]                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in5], %[in6]                  \t\n"
+        "lwc1    %[in3],      256*4(%[w2])                                  \t\n"
+        "lwc1    %[in4],      320*4(%[w])                                   \t\n"
+        "madd.s  %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in5],      320*4(%[p])                                   \t\n"
+        "lwc1    %[in6],      320*4(%[w2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in2], %[in3]                  \t\n"
+        "lwc1    %[in1],      384*4(%[w])                                   \t\n"
+        "lwc1    %[in2],      384*4(%[p])                                   \t\n"
+        "madd.s  %[sum],      %[sum],       %[in4], %[in5]                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in5], %[in6]                  \t\n"
+        "lwc1    %[in3],      384*4(%[w2])                                  \t\n"
+        "lwc1    %[in4],      448*4(%[w])                                   \t\n"
+        "madd.s  %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in5],      448*4(%[p])                                   \t\n"
+        "lwc1    %[in6],      448*4(%[w2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in2], %[in3]                  \t\n"
+        "madd.s  %[sum],      %[sum],       %[in4], %[in5]                  \t\n"
+        "lwc1    %[in1],      32*4(%[w])                                    \t\n"
+        "lwc1    %[in2],      0(%[p2])                                      \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in5], %[in6]                  \t\n"
+        "lwc1    %[in3],      32*4(%[w2])                                   \t\n"
+        "lwc1    %[in4],      96*4(%[w])                                    \t\n"
+        "lwc1    %[in5],      64*4(%[p2])                                   \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in6],      96*4(%[w2])                                   \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in2], %[in3]                  \t\n"
+        "lwc1    %[in1],      160*4(%[w])                                   \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in4], %[in5]                  \t\n"
+        "lwc1    %[in2],      128*4(%[p2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in5], %[in6]                  \t\n"
+        "lwc1    %[in3],      160*4(%[w2])                                  \t\n"
+        "lwc1    %[in4],      224*4(%[w])                                   \t\n"
+        "lwc1    %[in5],      192*4(%[p2])                                  \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in6],      224*4(%[w2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in2], %[in3]                  \t\n"
+        "lwc1    %[in1],      288*4(%[w])                                   \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in4], %[in5]                  \t\n"
+        "lwc1    %[in2],      256*4(%[p2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in5], %[in6]                  \t\n"
+        "lwc1    %[in3],      288*4(%[w2])                                  \t\n"
+        "lwc1    %[in4],      352*4(%[w])                                   \t\n"
+        "lwc1    %[in5],      320*4(%[p2])                                  \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in6],      352*4(%[w2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in2], %[in3]                  \t\n"
+        "lwc1    %[in1],      416*4(%[w])                                   \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in4], %[in5]                  \t\n"
+        "lwc1    %[in2],      384*4(%[p2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in5], %[in6]                  \t\n"
+        "lwc1    %[in3],      416*4(%[w2])                                  \t\n"
+        "lwc1    %[in4],      480*4(%[w])                                   \t\n"
+        "lwc1    %[in5],      448*4(%[p2])                                  \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in6],      480*4(%[w2])                                  \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in2], %[in3]                  \t\n"
+        "addiu   %[w],        %[w],         4                               \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in4], %[in5]                  \t\n"
+        "addiu   %[w2],       %[w2],        -4                              \t\n"
+        "nmsub.s %[sum2],     %[sum2],      %[in5], %[in6]                  \t\n"
+        "addu    %[j],        %[j],         4                               \t\n"
+        "addiu   %[p],        4                                             \t\n"
+        "swc1    %[sum],      0(%[samples])                                 \t\n"
+        "addiu   %[p2],       -4                                            \t\n"
+        "swc1    %[sum2],     0(%[samples2])                                \t\n"
+        "addu    %[samples],  %[samples],   %[incr1]                        \t\n"
+        "subu    %[samples2], %[samples2],  %[incr1]                        \t\n"
+        "bne     %[j],        64,           ff_mpadsp_apply_window_loop%=   \t\n"
+
+        "lwc1    %[in1],      48*4(%[window])                               \t\n"
+        "lwc1    %[in2],      32*4(%[synth_buf])                            \t\n"
+        "lwc1    %[in3],      112*4(%[window])                              \t\n"
+        "lwc1    %[in4],      96*4(%[synth_buf])                            \t\n"
+        "lwc1    %[in5],      176*4(%[window])                              \t\n"
+        "lwc1    %[in6],      160*4(%[synth_buf])                           \t\n"
+        "mul.s   %[sum],      %[in1],       %[in2]                          \t\n"
+        "lwc1    %[in7],      240*4(%[window])                              \t\n"
+        "lwc1    %[in8],      224*4(%[synth_buf])                           \t\n"
+        "lwc1    %[in1],      304*4(%[window])                              \t\n"
+        "nmadd.s %[sum],      %[sum],       %[in3], %[in4]                  \t\n"
+        "lwc1    %[in2],      288*4(%[synth_buf])                           \t\n"
+        "lwc1    %[in3],      368*4(%[window])                              \t\n"
+        "lwc1    %[in4],      352*4(%[synth_buf])                           \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in5], %[in6]                  \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in7], %[in8]                  \t\n"
+        "lwc1    %[in5],      432*4(%[window])                              \t\n"
+        "lwc1    %[in6],      416*4(%[synth_buf])                           \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in1], %[in2]                  \t\n"
+        "lwc1    %[in7],      496*4(%[window])                              \t\n"
+        "lwc1    %[in8],      480*4(%[synth_buf])                           \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in3], %[in4]                  \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in5], %[in6]                  \t\n"
+        "nmsub.s %[sum],      %[sum],       %[in7], %[in8]                  \t\n"
+        "swc1    %[sum],      0(%[samples])                                 \t\n"
+
+        : [sum] "=&f" (sum), [sum2] "=&f" (sum2),
+          [w2] "=&r" (w2),   [w] "=&r" (w),
+          [p] "=&r" (p), [p2] "=&r" (p2), [j] "=&r" (j),
+          [samples] "+r" (samples), [samples2] "=&r" (samples2),
+          [in1] "=&f" (in1), [in2] "=&f" (in2),
+          [in3] "=&f" (in3), [in4] "=&f" (in4),
+          [in5] "=&f" (in5), [in6] "=&f" (in6),
+          [in7] "=&f" (in7), [in8] "=&f" (in8),
+          [t_sample] "=&r" (t_sample)
+        : [synth_buf] "r" (synth_buf), [window] "r" (window),
+          [dither_state] "r" (dither_state), [incr1] "r" (incr1)
+        : "memory"
+    );
+}
+
+static void ff_dct32_mips_float(float *out, const float *tab)
+{
+    float val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7,
+          val8 , val9 , val10, val11, val12, val13, val14, val15,
+          val16, val17, val18, val19, val20, val21, val22, val23,
+          val24, val25, val26, val27, val28, val29, val30, val31;
+    float fTmp1, fTmp2, fTmp3, fTmp4, fTmp5, fTmp6, fTmp7, fTmp8,
+          fTmp9, fTmp10, fTmp11;
+
+    /**
+    * instructions are scheduled to minimize pipeline stall.
+    */
+    __asm__ volatile (
+        "lwc1       %[fTmp1],       0*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp2],       31*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp3],       15*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp4],       16*4(%[tab])                            \n\t"
+        "li.s       %[fTmp7],       0.50241928618815570551                  \n\t"
+        "add.s      %[fTmp5],       %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp8],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[fTmp6],       %[fTmp3],       %[fTmp4]                \n\t"
+        "sub.s      %[fTmp9],       %[fTmp3],       %[fTmp4]                \n\t"
+        "li.s       %[fTmp10],      0.50060299823519630134                  \n\t"
+        "li.s       %[fTmp11],      10.19000812354805681150                 \n\t"
+        "mul.s      %[fTmp8],       %[fTmp8],       %[fTmp10]               \n\t"
+        "add.s      %[val0],        %[fTmp5],       %[fTmp6]                \n\t"
+        "sub.s      %[val15],       %[fTmp5],       %[fTmp6]                \n\t"
+        "lwc1       %[fTmp1],       7*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp2],       24*4(%[tab])                            \n\t"
+        "madd.s     %[val16],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "nmsub.s    %[val31],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "mul.s      %[val15],       %[val15],       %[fTmp7]                \n\t"
+        "lwc1       %[fTmp3],       8*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp4],       23*4(%[tab])                            \n\t"
+        "add.s      %[fTmp5],       %[fTmp1],       %[fTmp2]                \n\t"
+        "mul.s      %[val31],       %[val31],       %[fTmp7]                \n\t"
+        "sub.s      %[fTmp8],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[fTmp6],       %[fTmp3],       %[fTmp4]                \n\t"
+        "sub.s      %[fTmp9],       %[fTmp3],       %[fTmp4]                \n\t"
+        "li.s       %[fTmp7],       5.10114861868916385802                  \n\t"
+        "li.s       %[fTmp10],      0.67480834145500574602                  \n\t"
+        "li.s       %[fTmp11],      0.74453627100229844977                  \n\t"
+        "add.s      %[val7],        %[fTmp5],       %[fTmp6]                \n\t"
+        "sub.s      %[val8],        %[fTmp5],       %[fTmp6]                \n\t"
+        "mul.s      %[fTmp8],       %[fTmp8],       %[fTmp10]               \n\t"
+        "li.s       %[fTmp1],       0.50979557910415916894                  \n\t"
+        "sub.s      %[fTmp2],       %[val0],        %[val7]                 \n\t"
+        "mul.s      %[val8],        %[val8],        %[fTmp7]                \n\t"
+        "madd.s     %[val23],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "nmsub.s    %[val24],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "add.s      %[val0],        %[val0],        %[val7]                 \n\t"
+        "mul.s      %[val7],        %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp2],       %[val15],       %[val8]                 \n\t"
+        "add.s      %[val8],        %[val15],       %[val8]                 \n\t"
+        "mul.s      %[val24],       %[val24],       %[fTmp7]                \n\t"
+        "sub.s      %[fTmp3],       %[val16],       %[val23]                \n\t"
+        "add.s      %[val16],       %[val16],       %[val23]                \n\t"
+        "mul.s      %[val15],       %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp4],       %[val31],       %[val24]                \n\t"
+        "mul.s      %[val23],       %[fTmp1],       %[fTmp3]                \n\t"
+        "add.s      %[val24],       %[val31],       %[val24]                \n\t"
+        "mul.s      %[val31],       %[fTmp1],       %[fTmp4]                \n\t"
+
+        : [fTmp1]  "=&f" (fTmp1),  [fTmp2] "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3),
+          [fTmp4]  "=&f" (fTmp4),  [fTmp5] "=&f" (fTmp5), [fTmp6] "=&f" (fTmp6),
+          [fTmp7]  "=&f" (fTmp7),  [fTmp8] "=&f" (fTmp8), [fTmp9] "=&f" (fTmp9),
+          [fTmp10] "=&f" (fTmp10), [fTmp11] "=&f" (fTmp11),
+          [val0]  "=f" (val0),  [val7]  "=f" (val7),
+          [val8]  "=f" (val8),  [val15] "=f" (val15),
+          [val16] "=f" (val16), [val23] "=f" (val23),
+          [val24] "=f" (val24), [val31] "=f" (val31)
+        : [tab] "r" (tab)
+        : "memory"
+    );
+
+    __asm__ volatile (
+        "lwc1       %[fTmp1],       3*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp2],       28*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp3],       12*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp4],       19*4(%[tab])                            \n\t"
+        "li.s       %[fTmp7],       0.64682178335999012954                  \n\t"
+        "add.s      %[fTmp5],       %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp8],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[fTmp6],       %[fTmp3],       %[fTmp4]                \n\t"
+        "sub.s      %[fTmp9],       %[fTmp3],       %[fTmp4]                \n\t"
+        "li.s       %[fTmp10],      0.53104259108978417447                  \n\t"
+        "li.s       %[fTmp11],      1.48416461631416627724                  \n\t"
+        "mul.s      %[fTmp8],       %[fTmp8],       %[fTmp10]               \n\t"
+        "add.s      %[val3],        %[fTmp5],       %[fTmp6]                \n\t"
+        "sub.s      %[val12],       %[fTmp5],       %[fTmp6]                \n\t"
+        "lwc1       %[fTmp1],       4*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp2],       27*4(%[tab])                            \n\t"
+        "madd.s     %[val19],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "nmsub.s    %[val28],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "mul.s      %[val12],       %[val12],       %[fTmp7]                \n\t"
+        "lwc1       %[fTmp3],       11*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp4],       20*4(%[tab])                            \n\t"
+        "add.s      %[fTmp5],       %[fTmp1],       %[fTmp2]                \n\t"
+        "mul.s      %[val28],       %[val28],       %[fTmp7]                \n\t"
+        "sub.s      %[fTmp8],       %[fTmp1],       %[fTmp2]                \n\t"
+        "li.s       %[fTmp7],       0.78815462345125022473                  \n\t"
+        "add.s      %[fTmp6],       %[fTmp3],       %[fTmp4]                \n\t"
+        "sub.s      %[fTmp9],       %[fTmp3],       %[fTmp4]                \n\t"
+        "li.s       %[fTmp10],      0.55310389603444452782                  \n\t"
+        "li.s       %[fTmp11],      1.16943993343288495515                  \n\t"
+        "mul.s      %[fTmp8],       %[fTmp8],       %[fTmp10]               \n\t"
+        "add.s      %[val4],        %[fTmp5],       %[fTmp6]                \n\t"
+        "sub.s      %[val11],       %[fTmp5],       %[fTmp6]                \n\t"
+        "li.s       %[fTmp1],       2.56291544774150617881                  \n\t"
+        "madd.s     %[val20],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "nmsub.s    %[val27],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "mul.s      %[val11],       %[val11],       %[fTmp7]                \n\t"
+        "sub.s      %[fTmp2],       %[val3],        %[val4]                 \n\t"
+        "add.s      %[val3],        %[val3],        %[val4]                 \n\t"
+        "sub.s      %[fTmp4],       %[val19],       %[val20]                \n\t"
+        "mul.s      %[val27],       %[val27],       %[fTmp7]                \n\t"
+        "sub.s      %[fTmp3],       %[val12],       %[val11]                \n\t"
+        "mul.s      %[val4],        %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val11],       %[val12],       %[val11]                \n\t"
+        "add.s      %[val19],       %[val19],       %[val20]                \n\t"
+        "mul.s      %[val20],       %[fTmp1],       %[fTmp4]                \n\t"
+        "mul.s      %[val12],       %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val28],       %[val27]                \n\t"
+        "add.s      %[val27],       %[val28],       %[val27]                \n\t"
+        "mul.s      %[val28],       %[fTmp1],       %[fTmp2]                \n\t"
+
+        : [fTmp1]  "=&f" (fTmp1),  [fTmp2]  "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3),
+          [fTmp4]  "=&f" (fTmp4),  [fTmp5]  "=&f" (fTmp5), [fTmp6] "=&f" (fTmp6),
+          [fTmp7]  "=&f" (fTmp7),  [fTmp8]  "=&f" (fTmp8), [fTmp9] "=&f" (fTmp9),
+          [fTmp10] "=&f" (fTmp10), [fTmp11] "=&f" (fTmp11),
+          [val3]  "=f" (val3),  [val4]  "=f" (val4),
+          [val11] "=f" (val11), [val12] "=f" (val12),
+          [val19] "=f" (val19), [val20] "=f" (val20),
+          [val27] "=f" (val27), [val28] "=f" (val28)
+        : [tab] "r" (tab)
+        : "memory"
+    );
+
+    __asm__ volatile (
+        "li.s       %[fTmp1],       0.54119610014619698439                  \n\t"
+        "sub.s      %[fTmp2],       %[val0],        %[val3]                 \n\t"
+        "add.s      %[val0],        %[val0],        %[val3]                 \n\t"
+        "sub.s      %[fTmp3],       %[val7],        %[val4]                 \n\t"
+        "add.s      %[val4],        %[val7],        %[val4]                 \n\t"
+        "sub.s      %[fTmp4],       %[val8],        %[val11]                \n\t"
+        "mul.s      %[val3],        %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val8],        %[val8],        %[val11]                \n\t"
+        "mul.s      %[val7],        %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val15],       %[val12]                \n\t"
+        "mul.s      %[val11],       %[fTmp1],       %[fTmp4]                \n\t"
+        "add.s      %[val12],       %[val15],       %[val12]                \n\t"
+        "mul.s      %[val15],       %[fTmp1],       %[fTmp2]                \n\t"
+
+        : [val0]  "+f" (val0),   [val3] "+f" (val3),
+          [val4]  "+f" (val4),   [val7] "+f" (val7),
+          [val8]  "+f" (val8),   [val11] "+f" (val11),
+          [val12] "+f" (val12),  [val15] "+f" (val15),
+          [fTmp1] "=f"  (fTmp1), [fTmp2] "=&f" (fTmp2),
+          [fTmp3] "=&f" (fTmp3), [fTmp4] "=&f" (fTmp4)
+        :
+    );
+
+    __asm__ volatile (
+        "sub.s      %[fTmp2],       %[val16],       %[val19]                \n\t"
+        "add.s      %[val16],       %[val16],       %[val19]                \n\t"
+        "sub.s      %[fTmp3],       %[val23],       %[val20]                \n\t"
+        "add.s      %[val20],       %[val23],       %[val20]                \n\t"
+        "sub.s      %[fTmp4],       %[val24],       %[val27]                \n\t"
+        "mul.s      %[val19],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val24],       %[val24],       %[val27]                \n\t"
+        "mul.s      %[val23],       %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val31],       %[val28]                \n\t"
+        "mul.s      %[val27],       %[fTmp1],       %[fTmp4]                \n\t"
+        "add.s      %[val28],       %[val31],       %[val28]                \n\t"
+        "mul.s      %[val31],       %[fTmp1],       %[fTmp2]                \n\t"
+
+        : [fTmp2] "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3), [fTmp4] "=&f" (fTmp4),
+          [val16] "+f" (val16), [val19] "+f" (val19), [val20] "+f" (val20),
+          [val23] "+f" (val23), [val24] "+f" (val24), [val27] "+f" (val27),
+          [val28] "+f" (val28), [val31] "+f" (val31)
+        : [fTmp1] "f" (fTmp1)
+    );
+
+    __asm__ volatile (
+        "lwc1       %[fTmp1],       1*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp2],       30*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp3],       14*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp4],       17*4(%[tab])                            \n\t"
+        "li.s       %[fTmp7],       0.52249861493968888062                  \n\t"
+        "add.s      %[fTmp5],       %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp8],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[fTmp6],       %[fTmp3],       %[fTmp4]                \n\t"
+        "sub.s      %[fTmp9],       %[fTmp3],       %[fTmp4]                \n\t"
+        "li.s       %[fTmp10],      0.50547095989754365998                  \n\t"
+        "li.s       %[fTmp11],      3.40760841846871878570                  \n\t"
+        "mul.s      %[fTmp8],       %[fTmp8],       %[fTmp10]               \n\t"
+        "add.s      %[val1],        %[fTmp5],       %[fTmp6]                \n\t"
+        "sub.s      %[val14],       %[fTmp5],       %[fTmp6]                \n\t"
+        "lwc1       %[fTmp1],       6*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp2],       25*4(%[tab])                            \n\t"
+        "madd.s     %[val17],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "nmsub.s    %[val30],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "mul.s      %[val14],       %[val14],       %[fTmp7]                \n\t"
+        "lwc1       %[fTmp3],       9*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp4],       22*4(%[tab])                            \n\t"
+        "add.s      %[fTmp5],       %[fTmp1],       %[fTmp2]                \n\t"
+        "mul.s      %[val30],       %[val30],       %[fTmp7]                \n\t"
+        "sub.s      %[fTmp8],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[fTmp6],       %[fTmp3],       %[fTmp4]                \n\t"
+        "sub.s      %[fTmp9],       %[fTmp3],       %[fTmp4]                \n\t"
+        "li.s       %[fTmp7],       1.72244709823833392782                  \n\t"
+        "li.s       %[fTmp10],      0.62250412303566481615                  \n\t"
+        "li.s       %[fTmp11],      0.83934964541552703873                  \n\t"
+        "add.s      %[val6],        %[fTmp5],       %[fTmp6]                \n\t"
+        "sub.s      %[val9],        %[fTmp5],       %[fTmp6]                \n\t"
+        "mul.s      %[fTmp8],       %[fTmp8],       %[fTmp10]               \n\t"
+        "li.s       %[fTmp1],       0.60134488693504528054                  \n\t"
+        "sub.s      %[fTmp2],       %[val1],        %[val6]                 \n\t"
+        "add.s      %[val1],        %[val1],        %[val6]                 \n\t"
+        "mul.s      %[val9],        %[val9],        %[fTmp7]                \n\t"
+        "madd.s     %[val22],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "nmsub.s    %[val25],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "mul.s      %[val6],        %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp2],       %[val14],       %[val9]                 \n\t"
+        "add.s      %[val9],        %[val14],       %[val9]                 \n\t"
+        "mul.s      %[val25],       %[val25],       %[fTmp7]                \n\t"
+        "sub.s      %[fTmp3],       %[val17],       %[val22]                \n\t"
+        "add.s      %[val17],       %[val17],       %[val22]                \n\t"
+        "mul.s      %[val14],       %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp2],       %[val30],       %[val25]                \n\t"
+        "mul.s      %[val22],       %[fTmp1],       %[fTmp3]                \n\t"
+        "add.s      %[val25],       %[val30],       %[val25]                \n\t"
+        "mul.s      %[val30],       %[fTmp1],       %[fTmp2]                \n\t"
+
+        : [fTmp1]  "=&f" (fTmp1),  [fTmp2]  "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3),
+          [fTmp4]  "=&f" (fTmp4),  [fTmp5]  "=&f" (fTmp5), [fTmp6] "=&f" (fTmp6),
+          [fTmp7]  "=&f" (fTmp7),  [fTmp8]  "=&f" (fTmp8), [fTmp9] "=&f" (fTmp9),
+          [fTmp10] "=&f" (fTmp10), [fTmp11] "=&f" (fTmp11),
+          [val1]  "=f" (val1),  [val6]  "=f" (val6),
+          [val9]  "=f" (val9),  [val14] "=f" (val14),
+          [val17] "=f" (val17), [val22] "=f" (val22),
+          [val25] "=f" (val25), [val30] "=f" (val30)
+        : [tab] "r" (tab)
+        : "memory"
+    );
+
+    __asm__ volatile (
+        "lwc1       %[fTmp1],       2*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp2],       29*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp3],       13*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp4],       18*4(%[tab])                            \n\t"
+        "li.s       %[fTmp7],       0.56694403481635770368                  \n\t"
+        "add.s      %[fTmp5],       %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp8],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[fTmp6],       %[fTmp3],       %[fTmp4]                \n\t"
+        "sub.s      %[fTmp9],       %[fTmp3],       %[fTmp4]                \n\t"
+        "li.s       %[fTmp10],      0.51544730992262454697                  \n\t"
+        "li.s       %[fTmp11],      2.05778100995341155085                  \n\t"
+        "mul.s      %[fTmp8],       %[fTmp8],       %[fTmp10]               \n\t"
+        "add.s      %[val2],        %[fTmp5],       %[fTmp6]                \n\t"
+        "sub.s      %[val13],       %[fTmp5],       %[fTmp6]                \n\t"
+        "lwc1       %[fTmp1],       5*4(%[tab])                             \n\t"
+        "lwc1       %[fTmp2],       26*4(%[tab])                            \n\t"
+        "madd.s     %[val18],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "nmsub.s    %[val29],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "mul.s      %[val13],       %[val13],       %[fTmp7]                \n\t"
+        "lwc1       %[fTmp3],       10*4(%[tab])                            \n\t"
+        "lwc1       %[fTmp4],       21*4(%[tab])                            \n\t"
+        "mul.s      %[val29],       %[val29],       %[fTmp7]                \n\t"
+        "add.s      %[fTmp5],       %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp8],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[fTmp6],       %[fTmp3],       %[fTmp4]                \n\t"
+        "sub.s      %[fTmp9],       %[fTmp3],       %[fTmp4]                \n\t"
+        "li.s       %[fTmp7],       1.06067768599034747134                  \n\t"
+        "li.s       %[fTmp10],      0.58293496820613387367                  \n\t"
+        "li.s       %[fTmp11],      0.97256823786196069369                  \n\t"
+        "add.s      %[val5],        %[fTmp5],       %[fTmp6]                \n\t"
+        "sub.s      %[val10],       %[fTmp5],       %[fTmp6]                \n\t"
+        "mul.s      %[fTmp8],       %[fTmp8],       %[fTmp10]               \n\t"
+        "li.s       %[fTmp1],       0.89997622313641570463                  \n\t"
+        "sub.s      %[fTmp2],       %[val2],        %[val5]                 \n\t"
+        "mul.s      %[val10],       %[val10],       %[fTmp7]                \n\t"
+        "madd.s     %[val21],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "nmsub.s    %[val26],       %[fTmp8],       %[fTmp9],   %[fTmp11]   \n\t"
+        "add.s      %[val2],        %[val2],        %[val5]                 \n\t"
+        "mul.s      %[val5],        %[fTmp1],       %[fTmp2]                \n\t"
+        "sub.s      %[fTmp3],       %[val13],       %[val10]                \n\t"
+        "add.s      %[val10],       %[val13],       %[val10]                \n\t"
+        "mul.s      %[val26],       %[val26],       %[fTmp7]                \n\t"
+        "sub.s      %[fTmp4],       %[val18],       %[val21]                \n\t"
+        "add.s      %[val18],       %[val18],       %[val21]                \n\t"
+        "mul.s      %[val13],       %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val29],       %[val26]                \n\t"
+        "add.s      %[val26],       %[val29],       %[val26]                \n\t"
+        "mul.s      %[val21],       %[fTmp1],       %[fTmp4]                \n\t"
+        "mul.s      %[val29],       %[fTmp1],       %[fTmp2]                \n\t"
+
+        : [fTmp1]  "=&f" (fTmp1),  [fTmp2]  "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3),
+          [fTmp4]  "=&f" (fTmp4),  [fTmp5]  "=&f" (fTmp5), [fTmp6] "=&f" (fTmp6),
+          [fTmp7]  "=&f" (fTmp7),  [fTmp8]  "=&f" (fTmp8), [fTmp9] "=&f" (fTmp9),
+          [fTmp10] "=&f" (fTmp10), [fTmp11] "=&f" (fTmp11),
+          [val2]  "=f" (val2),  [val5]  "=f" (val5),
+          [val10] "=f" (val10), [val13] "=f" (val13),
+          [val18] "=f" (val18), [val21] "=f" (val21),
+          [val26] "=f" (val26), [val29] "=f" (val29)
+        : [tab] "r" (tab)
+        : "memory"
+    );
+
+    __asm__ volatile (
+        "li.s       %[fTmp1],       1.30656296487637652785                  \n\t"
+        "sub.s      %[fTmp2],       %[val1],        %[val2]                 \n\t"
+        "add.s      %[val1],        %[val1],        %[val2]                 \n\t"
+        "sub.s      %[fTmp3],       %[val6],        %[val5]                 \n\t"
+        "add.s      %[val5],        %[val6],        %[val5]                 \n\t"
+        "sub.s      %[fTmp4],       %[val9],        %[val10]                \n\t"
+        "mul.s      %[val2],        %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val9],        %[val9],        %[val10]                \n\t"
+        "mul.s      %[val6],        %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val14],       %[val13]                \n\t"
+        "mul.s      %[val10],       %[fTmp1],       %[fTmp4]                \n\t"
+        "add.s      %[val13],       %[val14],       %[val13]                \n\t"
+        "mul.s      %[val14],       %[fTmp1],       %[fTmp2]                \n\t"
+
+        : [fTmp1] "=f"  (fTmp1), [fTmp2] "=&f" (fTmp2),
+          [fTmp3] "=&f" (fTmp3), [fTmp4] "=&f" (fTmp4),
+          [val1]  "+f" (val1),  [val2]  "+f" (val2),
+          [val5]  "+f" (val5),  [val6]  "+f" (val6),
+          [val9]  "+f" (val9),  [val10] "+f" (val10),
+          [val13] "+f" (val13), [val14] "+f" (val14)
+        :
+    );
+
+    __asm__ volatile (
+        "sub.s      %[fTmp2],       %[val17],       %[val18]                \n\t"
+        "add.s      %[val17],       %[val17],       %[val18]                \n\t"
+        "sub.s      %[fTmp3],       %[val22],       %[val21]                \n\t"
+        "add.s      %[val21],       %[val22],       %[val21]                \n\t"
+        "sub.s      %[fTmp4],       %[val25],       %[val26]                \n\t"
+        "mul.s      %[val18],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val25],       %[val25],       %[val26]                \n\t"
+        "mul.s      %[val22],       %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val30],       %[val29]                \n\t"
+        "mul.s      %[val26],       %[fTmp1],       %[fTmp4]                \n\t"
+        "add.s      %[val29],       %[val30],       %[val29]                \n\t"
+        "mul.s      %[val30],       %[fTmp1],       %[fTmp2]                \n\t"
+
+        : [fTmp2] "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3), [fTmp4] "=&f" (fTmp4),
+          [val17] "+f" (val17), [val18] "+f" (val18), [val21] "+f" (val21),
+          [val22] "+f" (val22), [val25] "+f" (val25), [val26] "+f" (val26),
+          [val29] "+f" (val29), [val30] "+f" (val30)
+        : [fTmp1] "f" (fTmp1)
+    );
+
+    __asm__ volatile (
+        "li.s       %[fTmp1],       0.70710678118654752439                  \n\t"
+        "sub.s      %[fTmp2],       %[val0],        %[val1]                 \n\t"
+        "add.s      %[val0],        %[val0],        %[val1]                 \n\t"
+        "sub.s      %[fTmp3],       %[val3],        %[val2]                 \n\t"
+        "add.s      %[val2],        %[val3],        %[val2]                 \n\t"
+        "sub.s      %[fTmp4],       %[val4],        %[val5]                 \n\t"
+        "mul.s      %[val1],        %[fTmp1],       %[fTmp2]                \n\t"
+        "swc1       %[val0],        0(%[out])                               \n\t"
+        "mul.s      %[val3],        %[fTmp3],       %[fTmp1]                \n\t"
+        "add.s      %[val4],        %[val4],        %[val5]                 \n\t"
+        "mul.s      %[val5],        %[fTmp1],       %[fTmp4]                \n\t"
+        "swc1       %[val1],        16*4(%[out])                            \n\t"
+        "sub.s      %[fTmp2],       %[val7],        %[val6]                 \n\t"
+        "add.s      %[val2],        %[val2],        %[val3]                 \n\t"
+        "swc1       %[val3],        24*4(%[out])                            \n\t"
+        "add.s      %[val6],        %[val7],        %[val6]                 \n\t"
+        "mul.s      %[val7],        %[fTmp1],       %[fTmp2]                \n\t"
+        "swc1       %[val2],        8*4(%[out])                             \n\t"
+        "add.s      %[val6],        %[val6],        %[val7]                 \n\t"
+        "swc1       %[val7],        28*4(%[out])                            \n\t"
+        "add.s      %[val4],        %[val4],        %[val6]                 \n\t"
+        "add.s      %[val6],        %[val6],        %[val5]                 \n\t"
+        "add.s      %[val5],        %[val5],        %[val7]                 \n\t"
+        "swc1       %[val4],        4*4(%[out])                             \n\t"
+        "swc1       %[val5],        20*4(%[out])                            \n\t"
+        "swc1       %[val6],        12*4(%[out])                            \n\t"
+
+        : [fTmp1] "=f"  (fTmp1), [fTmp2] "=&f" (fTmp2),
+          [fTmp3] "=&f" (fTmp3), [fTmp4] "=&f" (fTmp4),
+          [val0] "+f" (val0), [val1] "+f" (val1),
+          [val2] "+f" (val2), [val3] "+f" (val3),
+          [val4] "+f" (val4), [val5] "+f" (val5),
+          [val6] "+f" (val6), [val7] "+f" (val7)
+        : [out] "r" (out)
+    );
+
+    __asm__ volatile (
+        "sub.s      %[fTmp2],       %[val8],        %[val9]                 \n\t"
+        "add.s      %[val8],        %[val8],        %[val9]                 \n\t"
+        "sub.s      %[fTmp3],       %[val11],       %[val10]                \n\t"
+        "add.s      %[val10],       %[val11],       %[val10]                \n\t"
+        "sub.s      %[fTmp4],       %[val12],       %[val13]                \n\t"
+        "mul.s      %[val9],        %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val12],       %[val12],       %[val13]                \n\t"
+        "mul.s      %[val11],       %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val15],       %[val14]                \n\t"
+        "mul.s      %[val13],       %[fTmp1],       %[fTmp4]                \n\t"
+        "add.s      %[val14],       %[val15],       %[val14]                \n\t"
+        "add.s      %[val10],       %[val10],       %[val11]                \n\t"
+        "mul.s      %[val15],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val14],       %[val14],       %[val15]                \n\t"
+        "add.s      %[val12],       %[val12],       %[val14]                \n\t"
+        "add.s      %[val14],       %[val14],       %[val13]                \n\t"
+        "add.s      %[val13],       %[val13],       %[val15]                \n\t"
+        "add.s      %[val8],        %[val8],        %[val12]                \n\t"
+        "add.s      %[val12],       %[val12],       %[val10]                \n\t"
+        "add.s      %[val10],       %[val10],       %[val14]                \n\t"
+        "add.s      %[val14],       %[val14],       %[val9]                 \n\t"
+        "add.s      %[val9],        %[val9],        %[val13]                \n\t"
+        "add.s      %[val13],       %[val13],       %[val11]                \n\t"
+        "add.s      %[val11],       %[val11],       %[val15]                \n\t"
+        "swc1       %[val8],         2*4(%[out])                            \n\t"
+        "swc1       %[val9],        18*4(%[out])                            \n\t"
+        "swc1       %[val10],       10*4(%[out])                            \n\t"
+        "swc1       %[val11],       26*4(%[out])                            \n\t"
+        "swc1       %[val12],        6*4(%[out])                            \n\t"
+        "swc1       %[val13],       22*4(%[out])                            \n\t"
+        "swc1       %[val14],       14*4(%[out])                            \n\t"
+        "swc1       %[val15],       30*4(%[out])                            \n\t"
+
+        : [fTmp2] "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3), [fTmp4] "=&f" (fTmp4),
+          [val8]  "+f" (val8),  [val9]  "+f" (val9),  [val10] "+f" (val10),
+          [val11] "+f" (val11), [val12] "+f" (val12), [val13] "+f" (val13),
+          [val14] "+f" (val14), [val15] "+f" (val15)
+        : [fTmp1] "f" (fTmp1), [out] "r" (out)
+    );
+
+    __asm__ volatile (
+        "sub.s      %[fTmp2],       %[val16],       %[val17]                \n\t"
+        "add.s      %[val16],       %[val16],       %[val17]                \n\t"
+        "sub.s      %[fTmp3],       %[val19],       %[val18]                \n\t"
+        "add.s      %[val18],       %[val19],       %[val18]                \n\t"
+        "sub.s      %[fTmp4],       %[val20],       %[val21]                \n\t"
+        "mul.s      %[val17],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val20],       %[val20],       %[val21]                \n\t"
+        "mul.s      %[val19],       %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val23],       %[val22]                \n\t"
+        "mul.s      %[val21],       %[fTmp1],       %[fTmp4]                \n\t"
+        "add.s      %[val22],       %[val23],       %[val22]                \n\t"
+        "add.s      %[val18],       %[val18],       %[val19]                \n\t"
+        "mul.s      %[val23],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val22],       %[val22],       %[val23]                \n\t"
+        "add.s      %[val20],       %[val20],       %[val22]                \n\t"
+        "add.s      %[val22],       %[val22],       %[val21]                \n\t"
+        "add.s      %[val21],       %[val21],       %[val23]                \n\t"
+
+        : [fTmp2] "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3), [fTmp4] "=&f" (fTmp4),
+          [val16] "+f" (val16), [val17] "+f" (val17), [val18] "+f" (val18),
+          [val19] "+f" (val19), [val20] "+f" (val20), [val21] "+f" (val21),
+          [val22] "+f" (val22), [val23] "+f" (val23)
+        : [fTmp1] "f" (fTmp1)
+    );
+
+    __asm__ volatile (
+        "sub.s      %[fTmp2],       %[val24],       %[val25]                \n\t"
+        "add.s      %[val24],       %[val24],       %[val25]                \n\t"
+        "sub.s      %[fTmp3],       %[val27],       %[val26]                \n\t"
+        "add.s      %[val26],       %[val27],       %[val26]                \n\t"
+        "sub.s      %[fTmp4],       %[val28],       %[val29]                \n\t"
+        "mul.s      %[val25],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val28],       %[val28],       %[val29]                \n\t"
+        "mul.s      %[val27],       %[fTmp1],       %[fTmp3]                \n\t"
+        "sub.s      %[fTmp2],       %[val31],       %[val30]                \n\t"
+        "mul.s      %[val29],       %[fTmp1],       %[fTmp4]                \n\t"
+        "add.s      %[val30],       %[val31],       %[val30]                \n\t"
+        "add.s      %[val26],       %[val26],       %[val27]                \n\t"
+        "mul.s      %[val31],       %[fTmp1],       %[fTmp2]                \n\t"
+        "add.s      %[val30],       %[val30],       %[val31]                \n\t"
+        "add.s      %[val28],       %[val28],       %[val30]                \n\t"
+        "add.s      %[val30],       %[val30],       %[val29]                \n\t"
+        "add.s      %[val29],       %[val29],       %[val31]                \n\t"
+        "add.s      %[val24],       %[val24],       %[val28]                \n\t"
+        "add.s      %[val28],       %[val28],       %[val26]                \n\t"
+        "add.s      %[val26],       %[val26],       %[val30]                \n\t"
+        "add.s      %[val30],       %[val30],       %[val25]                \n\t"
+        "add.s      %[val25],       %[val25],       %[val29]                \n\t"
+        "add.s      %[val29],       %[val29],       %[val27]                \n\t"
+        "add.s      %[val27],       %[val27],       %[val31]                \n\t"
+
+        : [fTmp2] "=&f" (fTmp2), [fTmp3] "=&f" (fTmp3), [fTmp4] "=&f" (fTmp4),
+          [val24] "+f" (val24), [val25] "+f" (val25), [val26] "+f" (val26),
+          [val27] "+f" (val27), [val28] "+f" (val28), [val29] "+f" (val29),
+          [val30] "+f" (val30), [val31] "+f" (val31)
+        : [fTmp1] "f" (fTmp1)
+    );
+
+    out[ 1] = val16 + val24;
+    out[17] = val17 + val25;
+    out[ 9] = val18 + val26;
+    out[25] = val19 + val27;
+    out[ 5] = val20 + val28;
+    out[21] = val21 + val29;
+    out[13] = val22 + val30;
+    out[29] = val23 + val31;
+    out[ 3] = val24 + val20;
+    out[19] = val25 + val21;
+    out[11] = val26 + val22;
+    out[27] = val27 + val23;
+    out[ 7] = val28 + val18;
+    out[23] = val29 + val19;
+    out[15] = val30 + val17;
+    out[31] = val31;
+}
+
+static void imdct36_mips_float(float *out, float *buf, float *in, float *win)
+{
+    float t0, t1, t2, t3, s0, s1, s2, s3;
+    float tmp[18];
+    /* temporary variables */
+    float in1, in2, in3, in4, in5, in6;
+    float out1, out2, out3, out4, out5;
+    float c1, c2, c3, c4, c5, c6, c7, c8, c9;
+
+    /**
+    * all loops are unrolled totally, and instructions are scheduled to
+    * minimize pipeline stall. instructions of the first two loops are
+    * reorganized, in order to eliminate unnecessary readings and
+    * writings into array. values defined in macros and tables are
+    * eliminated - they are directly loaded in appropriate variables
+    */
+
+    /* loop 1 and 2 */
+    __asm__ volatile (
+        "lwc1   %[in1],  17*4(%[in])                                    \t\n"
+        "lwc1   %[in2],  16*4(%[in])                                    \t\n"
+        "lwc1   %[in3],  15*4(%[in])                                    \t\n"
+        "lwc1   %[in4],  14*4(%[in])                                    \t\n"
+        "lwc1   %[in5],  13*4(%[in])                                    \t\n"
+        "lwc1   %[in6],  12*4(%[in])                                    \t\n"
+        "add.s  %[out1], %[in1],  %[in2]                                \t\n"
+        "add.s  %[out2], %[in2],  %[in3]                                \t\n"
+        "add.s  %[out3], %[in3],  %[in4]                                \t\n"
+        "add.s  %[out4], %[in4],  %[in5]                                \t\n"
+        "add.s  %[out5], %[in5],  %[in6]                                \t\n"
+        "lwc1   %[in1],  11*4(%[in])                                    \t\n"
+        "swc1   %[out2], 16*4(%[in])                                    \t\n"
+        "add.s  %[out1], %[out1], %[out3]                               \t\n"
+        "swc1   %[out4], 14*4(%[in])                                    \t\n"
+        "add.s  %[out3], %[out3], %[out5]                               \t\n"
+        "lwc1   %[in2],  10*4(%[in])                                    \t\n"
+        "lwc1   %[in3],  9*4(%[in])                                     \t\n"
+        "swc1   %[out1], 17*4(%[in])                                    \t\n"
+        "lwc1   %[in4],  8*4(%[in])                                     \t\n"
+        "swc1   %[out3], 15*4(%[in])                                    \t\n"
+        "add.s  %[out1], %[in6],  %[in1]                                \t\n"
+        "add.s  %[out2], %[in1],  %[in2]                                \t\n"
+        "add.s  %[out3], %[in2],  %[in3]                                \t\n"
+        "add.s  %[out4], %[in3],  %[in4]                                \t\n"
+        "lwc1   %[in5],  7*4(%[in])                                     \t\n"
+        "swc1   %[out1], 12*4(%[in])                                    \t\n"
+        "add.s  %[out5], %[out5], %[out2]                               \t\n"
+        "swc1   %[out3], 10*4(%[in])                                    \t\n"
+        "add.s  %[out2], %[out2], %[out4]                               \t\n"
+        "lwc1   %[in6],  6*4(%[in])                                     \t\n"
+        "lwc1   %[in1],  5*4(%[in])                                     \t\n"
+        "swc1   %[out5], 13*4(%[in])                                    \t\n"
+        "lwc1   %[in2],  4*4(%[in])                                     \t\n"
+        "swc1   %[out2], 11*4(%[in])                                    \t\n"
+        "add.s  %[out5], %[in4],  %[in5]                                \t\n"
+        "add.s  %[out1], %[in5],  %[in6]                                \t\n"
+        "add.s  %[out2], %[in6],  %[in1]                                \t\n"
+        "add.s  %[out3], %[in1],  %[in2]                                \t\n"
+        "lwc1   %[in3],  3*4(%[in])                                     \t\n"
+        "swc1   %[out5], 8*4(%[in])                                     \t\n"
+        "add.s  %[out4], %[out4], %[out1]                               \t\n"
+        "swc1   %[out2], 6*4(%[in])                                     \t\n"
+        "add.s  %[out1], %[out1], %[out3]                               \t\n"
+        "lwc1   %[in4],  2*4(%[in])                                     \t\n"
+        "lwc1   %[in5],  1*4(%[in])                                     \t\n"
+        "swc1   %[out4], 9*4(%[in])                                     \t\n"
+        "lwc1   %[in6],  0(%[in])                                       \t\n"
+        "swc1   %[out1], 7*4(%[in])                                     \t\n"
+        "add.s  %[out4], %[in2],  %[in3]                                \t\n"
+        "add.s  %[out5], %[in3],  %[in4]                                \t\n"
+        "add.s  %[out1], %[in4],  %[in5]                                \t\n"
+        "add.s  %[out2], %[in5],  %[in6]                                \t\n"
+        "swc1   %[out4], 4*4(%[in])                                     \t\n"
+        "add.s  %[out3], %[out3], %[out5]                               \t\n"
+        "swc1   %[out1], 2*4(%[in])                                     \t\n"
+        "add.s  %[out5], %[out5], %[out2]                               \t\n"
+        "swc1   %[out2], 1*4(%[in])                                     \t\n"
+        "swc1   %[out3], 5*4(%[in])                                     \t\n"
+        "swc1   %[out5], 3*4(%[in])                                     \t\n"
+
+        : [in1] "=&f" (in1), [in2] "=&f" (in2),
+          [in3] "=&f" (in3), [in4] "=&f" (in4),
+          [in5] "=&f" (in5), [in6] "=&f" (in6),
+          [out1] "=&f" (out1), [out2] "=&f" (out2),
+          [out3] "=&f" (out3), [out4] "=&f" (out4),
+          [out5] "=&f" (out5)
+        : [in] "r" (in)
+        : "memory"
+    );
+
+    /* loop 3 */
+    __asm__ volatile (
+        "li.s    %[c1],   0.5                                           \t\n"
+        "lwc1    %[in1],  8*4(%[in])                                    \t\n"
+        "lwc1    %[in2],  16*4(%[in])                                   \t\n"
+        "lwc1    %[in3],  4*4(%[in])                                    \t\n"
+        "lwc1    %[in4],  0(%[in])                                      \t\n"
+        "lwc1    %[in5],  12*4(%[in])                                   \t\n"
+        "li.s    %[c2],   0.93969262078590838405                        \t\n"
+        "add.s   %[t2],   %[in1],  %[in2]                               \t\n"
+        "add.s   %[t0],   %[in1],  %[in3]                               \t\n"
+        "li.s    %[c3],   -0.76604444311897803520                       \t\n"
+        "madd.s  %[t3],   %[in4],  %[in5], %[c1]                        \t\n"
+        "sub.s   %[t1],   %[in4],  %[in5]                               \t\n"
+        "sub.s   %[t2],   %[t2],   %[in3]                               \t\n"
+        "mul.s   %[t0],   %[t0],   %[c2]                                \t\n"
+        "li.s    %[c4],   -0.17364817766693034885                       \t\n"
+        "li.s    %[c5],   -0.86602540378443864676                       \t\n"
+        "li.s    %[c6],   0.98480775301220805936                        \t\n"
+        "nmsub.s %[out1], %[t1],   %[t2],  %[c1]                        \t\n"
+        "add.s   %[out2], %[t1],   %[t2]                                \t\n"
+        "add.s   %[t2],   %[in2],  %[in3]                               \t\n"
+        "sub.s   %[t1],   %[in1],  %[in2]                               \t\n"
+        "sub.s   %[out3], %[t3],   %[t0]                                \t\n"
+        "swc1    %[out1], 6*4(%[tmp])                                   \t\n"
+        "swc1    %[out2], 16*4(%[tmp])                                  \t\n"
+        "mul.s   %[t2],   %[t2],   %[c3]                                \t\n"
+        "mul.s   %[t1],   %[t1],   %[c4]                                \t\n"
+        "add.s   %[out1], %[t3],   %[t0]                                \t\n"
+        "lwc1    %[in1],  10*4(%[in])                                   \t\n"
+        "lwc1    %[in2],  14*4(%[in])                                   \t\n"
+        "sub.s   %[out3], %[out3], %[t2]                                \t\n"
+        "add.s   %[out2], %[t3],   %[t2]                                \t\n"
+        "add.s   %[out1], %[out1], %[t1]                                \t\n"
+        "lwc1    %[in3],  2*4(%[in])                                    \t\n"
+        "lwc1    %[in4],  6*4(%[in])                                    \t\n"
+        "swc1    %[out3], 10*4(%[tmp])                                  \t\n"
+        "sub.s   %[out2], %[out2], %[t1]                                \t\n"
+        "swc1    %[out1], 2*4(%[tmp])                                   \t\n"
+        "add.s   %[out1], %[in1],  %[in2]                               \t\n"
+        "add.s   %[t2],   %[in1],  %[in3]                               \t\n"
+        "sub.s   %[t3],   %[in1],  %[in2]                               \t\n"
+        "swc1    %[out2], 14*4(%[tmp])                                  \t\n"
+        "li.s    %[c7],   -0.34202014332566873304                       \t\n"
+        "sub.s   %[out1], %[out1], %[in3]                               \t\n"
+        "mul.s   %[t2],   %[t2],   %[c6]                                \t\n"
+        "mul.s   %[t3],   %[t3],   %[c7]                                \t\n"
+        "li.s    %[c8],   0.86602540378443864676                        \t\n"
+        "mul.s   %[t0],   %[in4],  %[c8]                                \t\n"
+        "mul.s   %[out1], %[out1], %[c5]                                \t\n"
+        "add.s   %[t1],   %[in2],  %[in3]                               \t\n"
+        "li.s    %[c9],   -0.64278760968653932632                       \t\n"
+        "add.s   %[out2], %[t2],   %[t3]                                \t\n"
+        "lwc1    %[in1],  9*4(%[in])                                    \t\n"
+        "swc1    %[out1], 4*4(%[tmp])                                   \t\n"
+        "mul.s   %[t1],   %[t1],   %[c9]                                \t\n"
+        "lwc1    %[in2],  17*4(%[in])                                   \t\n"
+        "add.s   %[out2], %[out2], %[t0]                                \t\n"
+        "lwc1    %[in3],  5*4(%[in])                                    \t\n"
+        "lwc1    %[in4],  1*4(%[in])                                    \t\n"
+        "add.s   %[out3], %[t2],   %[t1]                                \t\n"
+        "sub.s   %[out1], %[t3],   %[t1]                                \t\n"
+        "swc1    %[out2], 0(%[tmp])                                     \t\n"
+        "lwc1    %[in5],  13*4(%[in])                                   \t\n"
+        "add.s   %[t2],   %[in1],  %[in2]                               \t\n"
+        "sub.s   %[out3], %[out3], %[t0]                                \t\n"
+        "sub.s   %[out1], %[out1], %[t0]                                \t\n"
+        "add.s   %[t0],   %[in1],  %[in3]                               \t\n"
+        "madd.s  %[t3],   %[in4],  %[in5], %[c1]                        \t\n"
+        "sub.s   %[t2],   %[t2],   %[in3]                               \t\n"
+        "swc1    %[out3], 12*4(%[tmp])                                  \t\n"
+        "swc1    %[out1], 8*4(%[tmp])                                   \t\n"
+        "sub.s   %[t1],   %[in4],  %[in5]                               \t\n"
+        "mul.s   %[t0],   %[t0],   %[c2]                                \t\n"
+        "nmsub.s %[out1], %[t1],   %[t2],  %[c1]                        \t\n"
+        "add.s   %[out2], %[t1],   %[t2]                                \t\n"
+        "add.s   %[t2],   %[in2],  %[in3]                               \t\n"
+        "sub.s   %[t1],   %[in1],  %[in2]                               \t\n"
+        "sub.s   %[out3], %[t3],   %[t0]                                \t\n"
+        "swc1    %[out1], 7*4(%[tmp])                                   \t\n"
+        "swc1    %[out2], 17*4(%[tmp])                                  \t\n"
+        "mul.s   %[t2],   %[t2],   %[c3]                                \t\n"
+        "mul.s   %[t1],   %[t1],   %[c4]                                \t\n"
+        "add.s   %[out1], %[t3],   %[t0]                                \t\n"
+        "lwc1    %[in1],  11*4(%[in])                                   \t\n"
+        "lwc1    %[in2],  15*4(%[in])                                   \t\n"
+        "sub.s   %[out3], %[out3], %[t2]                                \t\n"
+        "add.s   %[out2], %[t3],   %[t2]                                \t\n"
+        "add.s   %[out1], %[out1], %[t1]                                \t\n"
+        "lwc1    %[in3],  3*4(%[in])                                    \t\n"
+        "lwc1    %[in4],  7*4(%[in])                                    \t\n"
+        "swc1    %[out3], 11*4(%[tmp])                                  \t\n"
+        "sub.s   %[out2], %[out2], %[t1]                                \t\n"
+        "swc1    %[out1], 3*4(%[tmp])                                   \t\n"
+        "add.s   %[out3], %[in1],  %[in2]                               \t\n"
+        "add.s   %[t2],   %[in1],  %[in3]                               \t\n"
+        "sub.s   %[t3],   %[in1],  %[in2]                               \t\n"
+        "swc1    %[out2], 15*4(%[tmp])                                  \t\n"
+        "mul.s   %[t0],   %[in4],  %[c8]                                \t\n"
+        "sub.s   %[out3], %[out3], %[in3]                               \t\n"
+        "mul.s   %[t2],   %[t2],   %[c6]                                \t\n"
+        "mul.s   %[t3],   %[t3],   %[c7]                                \t\n"
+        "add.s   %[t1],   %[in2],  %[in3]                               \t\n"
+        "mul.s   %[out3], %[out3], %[c5]                                \t\n"
+        "add.s   %[out1], %[t2],   %[t3]                                \t\n"
+        "mul.s   %[t1],   %[t1],   %[c9]                                \t\n"
+        "swc1    %[out3], 5*4(%[tmp])                                   \t\n"
+        "add.s   %[out1], %[out1], %[t0]                                \t\n"
+        "add.s   %[out2], %[t2],   %[t1]                                \t\n"
+        "sub.s   %[out3], %[t3],   %[t1]                                \t\n"
+        "swc1    %[out1], 1*4(%[tmp])                                   \t\n"
+        "sub.s   %[out2], %[out2], %[t0]                                \t\n"
+        "sub.s   %[out3], %[out3], %[t0]                                \t\n"
+        "swc1    %[out2], 13*4(%[tmp])                                  \t\n"
+        "swc1    %[out3], 9*4(%[tmp])                                   \t\n"
+
+        : [t0] "=&f" (t0), [t1] "=&f" (t1),
+          [t2] "=&f" (t2), [t3] "=&f" (t3),
+          [in1] "=&f" (in1), [in2] "=&f" (in2),
+          [in3] "=&f" (in3), [in4] "=&f" (in4),
+          [in5] "=&f" (in5),
+          [out1] "=&f" (out1), [out2] "=&f" (out2),
+          [out3] "=&f" (out3),
+          [c1] "=&f" (c1), [c2] "=&f" (c2),
+          [c3] "=&f" (c3), [c4] "=&f" (c4),
+          [c5] "=&f" (c5), [c6] "=&f" (c6),
+          [c7] "=&f" (c7), [c8] "=&f" (c8),
+          [c9] "=&f" (c9)
+        : [in] "r" (in), [tmp] "r" (tmp)
+        : "memory"
+    );
+
+    /* loop 4 */
+    __asm__ volatile (
+        "lwc1   %[in1],  2*4(%[tmp])                                    \t\n"
+        "lwc1   %[in2],  0(%[tmp])                                      \t\n"
+        "lwc1   %[in3],  3*4(%[tmp])                                    \t\n"
+        "lwc1   %[in4],  1*4(%[tmp])                                    \t\n"
+        "li.s   %[c1],   0.50190991877167369479                         \t\n"
+        "li.s   %[c2],   5.73685662283492756461                         \t\n"
+        "add.s  %[s0],   %[in1], %[in2]                                 \t\n"
+        "sub.s  %[s2],   %[in1], %[in2]                                 \t\n"
+        "add.s  %[s1],   %[in3], %[in4]                                 \t\n"
+        "sub.s  %[s3],   %[in3], %[in4]                                 \t\n"
+        "lwc1   %[in1],  9*4(%[win])                                    \t\n"
+        "lwc1   %[in2],  4*9*4(%[buf])                                  \t\n"
+        "lwc1   %[in3],  8*4(%[win])                                    \t\n"
+        "mul.s  %[s1],   %[s1],  %[c1]                                  \t\n"
+        "mul.s  %[s3],   %[s3],  %[c2]                                  \t\n"
+        "lwc1   %[in4],  4*8*4(%[buf])                                  \t\n"
+        "lwc1   %[in5],  29*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  28*4(%[win])                                   \t\n"
+        "add.s  %[t0],   %[s0],  %[s1]                                  \t\n"
+        "sub.s  %[t1],   %[s0],  %[s1]                                  \t\n"
+        "li.s   %[c1],   0.51763809020504152469                         \t\n"
+        "li.s   %[c2],   1.93185165257813657349                         \t\n"
+        "mul.s  %[out3], %[in5], %[t0]                                  \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "mul.s  %[out4], %[in6], %[t0]                                  \t\n"
+        "add.s  %[t0],   %[s2],  %[s3]                                  \t\n"
+        "swc1   %[out3], 4*9*4(%[buf])                                  \t\n"
+        "swc1   %[out1], 288*4(%[out])                                  \t\n"
+        "swc1   %[out2], 256*4(%[out])                                  \t\n"
+        "swc1   %[out4], 4*8*4(%[buf])                                  \t\n"
+        "sub.s  %[t1],   %[s2],  %[s3]                                  \t\n"
+        "lwc1   %[in1],  17*4(%[win])                                   \t\n"
+        "lwc1   %[in2],  4*17*4(%[buf])                                 \t\n"
+        "lwc1   %[in3],  0(%[win])                                      \t\n"
+        "lwc1   %[in4],  0(%[buf])                                      \t\n"
+        "lwc1   %[in5],  37*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  20*4(%[win])                                   \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "lwc1   %[in1],  6*4(%[tmp])                                    \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "mul.s  %[out3], %[t0],  %[in5]                                 \t\n"
+        "mul.s  %[out4], %[t0],  %[in6]                                 \t\n"
+        "swc1   %[out1], 544*4(%[out])                                  \t\n"
+        "lwc1   %[in2],  4*4(%[tmp])                                    \t\n"
+        "swc1   %[out2], 0(%[out])                                      \t\n"
+        "swc1   %[out3], 4*17*4(%[buf])                                 \t\n"
+        "swc1   %[out4], 0(%[buf])                                      \t\n"
+        "lwc1   %[in3],  7*4(%[tmp])                                    \t\n"
+        "add.s  %[s0],   %[in1], %[in2]                                 \t\n"
+        "sub.s  %[s2],   %[in1], %[in2]                                 \t\n"
+        "lwc1   %[in4],  5*4(%[tmp])                                    \t\n"
+        "add.s  %[s1],   %[in3], %[in4]                                 \t\n"
+        "sub.s  %[s3],   %[in3], %[in4]                                 \t\n"
+        "lwc1   %[in1],  10*4(%[win])                                   \t\n"
+        "lwc1   %[in2],  4*10*4(%[buf])                                 \t\n"
+        "lwc1   %[in3],  7*4(%[win])                                    \t\n"
+        "mul.s  %[s1],   %[s1],  %[c1]                                  \t\n"
+        "mul.s  %[s3],   %[s3],  %[c2]                                  \t\n"
+        "add.s  %[t0],   %[s0],  %[s1]                                  \t\n"
+        "sub.s  %[t1],   %[s0],  %[s1]                                  \t\n"
+        "lwc1   %[in4],  4*7*4(%[buf])                                  \t\n"
+        "lwc1   %[in5],  30*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  27*4(%[win])                                   \t\n"
+        "li.s   %[c1],   0.55168895948124587824                         \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "mul.s  %[out3], %[t0],  %[in5]                                 \t\n"
+        "mul.s  %[out4], %[t0],  %[in6]                                 \t\n"
+        "add.s  %[t0],   %[s2],  %[s3]                                  \t\n"
+        "swc1   %[out1], 320*4(%[out])                                  \t\n"
+        "swc1   %[out2], 224*4(%[out])                                  \t\n"
+        "swc1   %[out3], 4*10*4(%[buf])                                 \t\n"
+        "swc1   %[out4], 4*7*4(%[buf])                                  \t\n"
+        "sub.s  %[t1],   %[s2],  %[s3]                                  \t\n"
+        "lwc1   %[in1],  16*4(%[win])                                   \t\n"
+        "lwc1   %[in2],  4*16*4(%[buf])                                 \t\n"
+        "lwc1   %[in3],  1*4(%[win])                                    \t\n"
+        "lwc1   %[in4],  4*1*4(%[buf])                                  \t\n"
+        "lwc1   %[in5],  36*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  21*4(%[win])                                   \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "lwc1   %[in1],  10*4(%[tmp])                                   \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "mul.s  %[out3], %[in5], %[t0]                                  \t\n"
+        "mul.s  %[out4], %[in6], %[t0]                                  \t\n"
+        "swc1   %[out1], 512*4(%[out])                                  \t\n"
+        "lwc1   %[in2],  8*4(%[tmp])                                    \t\n"
+        "swc1   %[out2], 32*4(%[out])                                   \t\n"
+        "swc1   %[out3], 4*16*4(%[buf])                                 \t\n"
+        "swc1   %[out4], 4*1*4(%[buf])                                  \t\n"
+        "li.s   %[c2],   1.18310079157624925896                         \t\n"
+        "add.s  %[s0],   %[in1], %[in2]                                 \t\n"
+        "sub.s  %[s2],   %[in1], %[in2]                                 \t\n"
+        "lwc1   %[in3],  11*4(%[tmp])                                   \t\n"
+        "lwc1   %[in4],  9*4(%[tmp])                                    \t\n"
+        "add.s  %[s1],   %[in3], %[in4]                                 \t\n"
+        "sub.s  %[s3],   %[in3], %[in4]                                 \t\n"
+        "lwc1   %[in1],  11*4(%[win])                                   \t\n"
+        "lwc1   %[in2],  4*11*4(%[buf])                                 \t\n"
+        "lwc1   %[in3],  6*4(%[win])                                    \t\n"
+        "mul.s  %[s1],   %[s1],  %[c1]                                  \t\n"
+        "mul.s  %[s3],   %[s3],  %[c2]                                  \t\n"
+        "lwc1   %[in4],  4*6*4(%[buf])                                  \t\n"
+        "lwc1   %[in5],  31*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  26*4(%[win])                                   \t\n"
+        "add.s  %[t0],   %[s0],  %[s1]                                  \t\n"
+        "sub.s  %[t1],   %[s0],  %[s1]                                  \t\n"
+        "mul.s  %[out3], %[t0],  %[in5]                                 \t\n"
+        "mul.s  %[out4], %[t0],  %[in6]                                 \t\n"
+        "add.s  %[t0],   %[s2],  %[s3]                                  \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "swc1   %[out3], 4*11*4(%[buf])                                 \t\n"
+        "swc1   %[out4], 4*6*4(%[buf])                                  \t\n"
+        "sub.s  %[t1],   %[s2],  %[s3]                                  \t\n"
+        "swc1   %[out1], 352*4(%[out])                                  \t\n"
+        "swc1   %[out2], 192*4(%[out])                                  \t\n"
+        "lwc1   %[in1],  15*4(%[win])                                   \t\n"
+        "lwc1   %[in2],  4*15*4(%[buf])                                 \t\n"
+        "lwc1   %[in3],  2*4(%[win])                                    \t\n"
+        "lwc1   %[in4],  4*2*4(%[buf])                                  \t\n"
+        "lwc1   %[in5],  35*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  22*4(%[win])                                   \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "lwc1   %[in1],  14*4(%[tmp])                                   \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "mul.s  %[out3], %[t0],  %[in5]                                 \t\n"
+        "mul.s  %[out4], %[t0],  %[in6]                                 \t\n"
+        "swc1   %[out1], 480*4(%[out])                                  \t\n"
+        "lwc1   %[in2],  12*4(%[tmp])                                   \t\n"
+        "swc1   %[out2], 64*4(%[out])                                   \t\n"
+        "swc1   %[out3], 4*15*4(%[buf])                                 \t\n"
+        "swc1   %[out4], 4*2*4(%[buf])                                  \t\n"
+        "lwc1   %[in3],  15*4(%[tmp])                                   \t\n"
+        "add.s  %[s0],   %[in1], %[in2]                                 \t\n"
+        "sub.s  %[s2],   %[in1], %[in2]                                 \t\n"
+        "lwc1   %[in4],  13*4(%[tmp])                                   \t\n"
+        "li.s   %[c1],   0.61038729438072803416                         \t\n"
+        "li.s   %[c2],   0.87172339781054900991                         \t\n"
+        "add.s  %[s1],   %[in3], %[in4]                                 \t\n"
+        "sub.s  %[s3],   %[in3], %[in4]                                 \t\n"
+        "lwc1   %[in1],  12*4(%[win])                                   \t\n"
+        "lwc1   %[in2],  4*12*4(%[buf])                                 \t\n"
+        "lwc1   %[in3],  5*4(%[win])                                    \t\n"
+        "mul.s  %[s1],   %[s1],  %[c1]                                  \t\n"
+        "mul.s  %[s3],   %[s3],  %[c2]                                  \t\n"
+        "lwc1   %[in4],  4*5*4(%[buf])                                  \t\n"
+        "lwc1   %[in5],  32*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  25*4(%[win])                                   \t\n"
+        "add.s  %[t0],   %[s0],  %[s1]                                  \t\n"
+        "sub.s  %[t1],   %[s0],  %[s1]                                  \t\n"
+        "lwc1   %[s0],   16*4(%[tmp])                                   \t\n"
+        "lwc1   %[s1],   17*4(%[tmp])                                   \t\n"
+        "li.s   %[c1],   0.70710678118654752439                         \t\n"
+        "mul.s  %[out3], %[t0],  %[in5]                                 \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "mul.s  %[out4], %[t0],  %[in6]                                 \t\n"
+        "add.s  %[t0],   %[s2],  %[s3]                                  \t\n"
+        "swc1   %[out3], 4*12*4(%[buf])                                 \t\n"
+        "swc1   %[out1], 384*4(%[out])                                  \t\n"
+        "swc1   %[out2], 160*4(%[out])                                  \t\n"
+        "swc1   %[out4], 4*5*4(%[buf])                                  \t\n"
+        "sub.s  %[t1],   %[s2],  %[s3]                                  \t\n"
+        "lwc1   %[in1],  14*4(%[win])                                   \t\n"
+        "lwc1   %[in2],  4*14*4(%[buf])                                 \t\n"
+        "lwc1   %[in3],  3*4(%[win])                                    \t\n"
+        "lwc1   %[in4],  4*3*4(%[buf])                                  \t\n"
+        "lwc1   %[in5],  34*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  23*4(%[win])                                   \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "mul.s  %[s1],   %[s1],  %[c1]                                  \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "mul.s  %[out3], %[in5], %[t0]                                  \t\n"
+        "mul.s  %[out4], %[in6], %[t0]                                  \t\n"
+        "swc1   %[out1], 448*4(%[out])                                  \t\n"
+        "add.s  %[t0],   %[s0],  %[s1]                                  \t\n"
+        "swc1   %[out2], 96*4(%[out])                                   \t\n"
+        "swc1   %[out3], 4*14*4(%[buf])                                 \t\n"
+        "swc1   %[out4], 4*3*4(%[buf])                                  \t\n"
+        "sub.s  %[t1],   %[s0],  %[s1]                                  \t\n"
+        "lwc1   %[in1],  13*4(%[win])                                   \t\n"
+        "lwc1   %[in2],  4*13*4(%[buf])                                 \t\n"
+        "lwc1   %[in3],  4*4(%[win])                                    \t\n"
+        "lwc1   %[in4],  4*4*4(%[buf])                                  \t\n"
+        "lwc1   %[in5],  33*4(%[win])                                   \t\n"
+        "lwc1   %[in6],  24*4(%[win])                                   \t\n"
+        "madd.s %[out1], %[in2], %[in1], %[t1]                          \t\n"
+        "madd.s %[out2], %[in4], %[in3], %[t1]                          \t\n"
+        "mul.s  %[out3], %[t0],  %[in5]                                 \t\n"
+        "mul.s  %[out4], %[t0],  %[in6]                                 \t\n"
+        "swc1   %[out1], 416*4(%[out])                                  \t\n"
+        "swc1   %[out2], 128*4(%[out])                                  \t\n"
+        "swc1   %[out3], 4*13*4(%[buf])                                 \t\n"
+        "swc1   %[out4], 4*4*4(%[buf])                                  \t\n"
+
+        : [c1] "=&f" (c1), [c2] "=&f" (c2),
+          [in1] "=&f" (in1), [in2] "=&f" (in2),
+          [in3] "=&f" (in3), [in4] "=&f" (in4),
+          [in5] "=&f" (in5), [in6] "=&f" (in6),
+          [out1] "=&f" (out1), [out2] "=&f" (out2),
+          [out3] "=&f" (out3), [out4] "=&f" (out4),
+          [t0] "=&f" (t0), [t1] "=&f" (t1),
+          [t2] "=&f" (t2), [t3] "=&f" (t3),
+          [s0] "=&f" (s0), [s1] "=&f" (s1),
+          [s2] "=&f" (s2), [s3] "=&f" (s3)
+        : [tmp] "r" (tmp), [win] "r" (win),
+          [buf] "r" (buf), [out] "r" (out)
+        : "memory"
+    );
+}
+
+static void ff_imdct36_blocks_mips_float(float *out, float *buf, float *in,
+                               int count, int switch_point, int block_type)
+{
+    int j;
+    for (j=0 ; j < count; j++) {
+        /* apply window & overlap with previous buffer */
+
+        /* select window */
+        int win_idx = (switch_point && j < 2) ? 0 : block_type;
+        float *win = ff_mdct_win_float[win_idx + (4 & -(j & 1))];
+
+        imdct36_mips_float(out, buf, in, win);
+
+        in  += 18;
+        buf += ((j&3) != 3 ? 1 : (72-3));
+        out++;
+    }
+}
+
+void ff_mpadsp_init_mipsfpu(MPADSPContext *s)
+{
+    s->apply_window_float   = ff_mpadsp_apply_window_mips_float;
+    s->imdct36_blocks_float = ff_imdct36_blocks_mips_float;
+    s->dct32_float          = ff_dct32_mips_float;
+}
diff --git a/libavcodec/mips/sbrdsp_mips.c b/libavcodec/mips/sbrdsp_mips.c
new file mode 100644
index 0000000..d4460ba
--- /dev/null
+++ b/libavcodec/mips/sbrdsp_mips.c
@@ -0,0 +1,940 @@
+/*
+ * Copyright (c) 2012
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Authors:  Darko Laus      (darko@mips.com)
+ *           Djordje Pesut   (djordje@mips.com)
+ *           Mirjana Vulin   (mvulin@mips.com)
+ *
+ * AAC Spectral Band Replication decoding functions optimized for MIPS
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Reference: libavcodec/sbrdsp.c
+ */
+
+#include "config.h"
+#include "libavcodec/sbrdsp.h"
+
+#if HAVE_INLINE_ASM
+static void sbr_neg_odd_64_mips(float *x)
+{
+    int Temp1, Temp2, Temp3, Temp4, Temp5;
+    float *x1    = &x[1];
+    float *x_end = x1 + 64;
+
+    /* loop unrolled 4 times */
+    __asm__ volatile (
+        "lui    %[Temp5],   0x8000                  \n\t"
+    "1:                                             \n\t"
+        "lw     %[Temp1],   0(%[x1])                \n\t"
+        "lw     %[Temp2],   8(%[x1])                \n\t"
+        "lw     %[Temp3],   16(%[x1])               \n\t"
+        "lw     %[Temp4],   24(%[x1])               \n\t"
+        "xor    %[Temp1],   %[Temp1],   %[Temp5]    \n\t"
+        "xor    %[Temp2],   %[Temp2],   %[Temp5]    \n\t"
+        "xor    %[Temp3],   %[Temp3],   %[Temp5]    \n\t"
+        "xor    %[Temp4],   %[Temp4],   %[Temp5]    \n\t"
+        "sw     %[Temp1],   0(%[x1])                \n\t"
+        "sw     %[Temp2],   8(%[x1])                \n\t"
+        "sw     %[Temp3],   16(%[x1])               \n\t"
+        "sw     %[Temp4],   24(%[x1])               \n\t"
+        "addiu  %[x1],      %[x1],      32          \n\t"
+        "bne    %[x1],      %[x_end],   1b          \n\t"
+
+        : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
+          [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
+          [Temp5]"=&r"(Temp5), [x1]"+r"(x1)
+        : [x_end]"r"(x_end)
+        : "memory"
+    );
+}
+
+static void sbr_qmf_pre_shuffle_mips(float *z)
+{
+    int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6;
+    float *z1 = &z[66];
+    float *z2 = &z[59];
+    float *z3 = &z[2];
+    float *z4 = z1 + 60;
+
+    /* loop unrolled 5 times */
+    __asm__ volatile (
+        "lui    %[Temp6],   0x8000                  \n\t"
+    "1:                                             \n\t"
+        "lw     %[Temp1],   0(%[z2])                \n\t"
+        "lw     %[Temp2],   4(%[z2])                \n\t"
+        "lw     %[Temp3],   8(%[z2])                \n\t"
+        "lw     %[Temp4],   12(%[z2])               \n\t"
+        "lw     %[Temp5],   16(%[z2])               \n\t"
+        "xor    %[Temp1],   %[Temp1],   %[Temp6]    \n\t"
+        "xor    %[Temp2],   %[Temp2],   %[Temp6]    \n\t"
+        "xor    %[Temp3],   %[Temp3],   %[Temp6]    \n\t"
+        "xor    %[Temp4],   %[Temp4],   %[Temp6]    \n\t"
+        "xor    %[Temp5],   %[Temp5],   %[Temp6]    \n\t"
+        "addiu  %[z2],      %[z2],      -20         \n\t"
+        "sw     %[Temp1],   32(%[z1])               \n\t"
+        "sw     %[Temp2],   24(%[z1])               \n\t"
+        "sw     %[Temp3],   16(%[z1])               \n\t"
+        "sw     %[Temp4],   8(%[z1])                \n\t"
+        "sw     %[Temp5],   0(%[z1])                \n\t"
+        "lw     %[Temp1],   0(%[z3])                \n\t"
+        "lw     %[Temp2],   4(%[z3])                \n\t"
+        "lw     %[Temp3],   8(%[z3])                \n\t"
+        "lw     %[Temp4],   12(%[z3])               \n\t"
+        "lw     %[Temp5],   16(%[z3])               \n\t"
+        "sw     %[Temp1],   4(%[z1])                \n\t"
+        "sw     %[Temp2],   12(%[z1])               \n\t"
+        "sw     %[Temp3],   20(%[z1])               \n\t"
+        "sw     %[Temp4],   28(%[z1])               \n\t"
+        "sw     %[Temp5],   36(%[z1])               \n\t"
+        "addiu  %[z3],      %[z3],      20          \n\t"
+        "addiu  %[z1],      %[z1],      40          \n\t"
+        "bne    %[z1],      %[z4],      1b          \n\t"
+        "lw     %[Temp1],   132(%[z])               \n\t"
+        "lw     %[Temp2],   128(%[z])               \n\t"
+        "lw     %[Temp3],   0(%[z])                 \n\t"
+        "lw     %[Temp4],   4(%[z])                 \n\t"
+        "xor    %[Temp1],   %[Temp1],   %[Temp6]    \n\t"
+        "sw     %[Temp1],   504(%[z])               \n\t"
+        "sw     %[Temp2],   508(%[z])               \n\t"
+        "sw     %[Temp3],   256(%[z])               \n\t"
+        "sw     %[Temp4],   260(%[z])               \n\t"
+
+        : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
+          [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
+          [Temp5]"=&r"(Temp5), [Temp6]"=&r"(Temp6),
+          [z1]"+r"(z1), [z2]"+r"(z2), [z3]"+r"(z3)
+        : [z4]"r"(z4), [z]"r"(z)
+        : "memory"
+    );
+}
+
+static void sbr_qmf_post_shuffle_mips(float W[32][2], const float *z)
+{
+    int Temp1, Temp2, Temp3, Temp4, Temp5;
+    float *W_ptr = (float *)W;
+    float *z1    = (float *)z;
+    float *z2    = (float *)&z[60];
+    float *z_end = z1 + 32;
+
+     /* loop unrolled 4 times */
+    __asm__ volatile (
+        "lui    %[Temp5],   0x8000                  \n\t"
+    "1:                                             \n\t"
+        "lw     %[Temp1],   0(%[z2])                \n\t"
+        "lw     %[Temp2],   4(%[z2])                \n\t"
+        "lw     %[Temp3],   8(%[z2])                \n\t"
+        "lw     %[Temp4],   12(%[z2])               \n\t"
+        "xor    %[Temp1],   %[Temp1],   %[Temp5]    \n\t"
+        "xor    %[Temp2],   %[Temp2],   %[Temp5]    \n\t"
+        "xor    %[Temp3],   %[Temp3],   %[Temp5]    \n\t"
+        "xor    %[Temp4],   %[Temp4],   %[Temp5]    \n\t"
+        "addiu  %[z2],      %[z2],      -16         \n\t"
+        "sw     %[Temp1],   24(%[W_ptr])            \n\t"
+        "sw     %[Temp2],   16(%[W_ptr])            \n\t"
+        "sw     %[Temp3],   8(%[W_ptr])             \n\t"
+        "sw     %[Temp4],   0(%[W_ptr])             \n\t"
+        "lw     %[Temp1],   0(%[z1])                \n\t"
+        "lw     %[Temp2],   4(%[z1])                \n\t"
+        "lw     %[Temp3],   8(%[z1])                \n\t"
+        "lw     %[Temp4],   12(%[z1])               \n\t"
+        "sw     %[Temp1],   4(%[W_ptr])             \n\t"
+        "sw     %[Temp2],   12(%[W_ptr])            \n\t"
+        "sw     %[Temp3],   20(%[W_ptr])            \n\t"
+        "sw     %[Temp4],   28(%[W_ptr])            \n\t"
+        "addiu  %[z1],      %[z1],      16          \n\t"
+        "addiu  %[W_ptr],   %[W_ptr],   32          \n\t"
+        "bne    %[z1],      %[z_end],   1b          \n\t"
+
+        : [Temp1]"=&r"(Temp1), [Temp2]"=&r"(Temp2),
+          [Temp3]"=&r"(Temp3), [Temp4]"=&r"(Temp4),
+          [Temp5]"=&r"(Temp5), [z1]"+r"(z1),
+          [z2]"+r"(z2), [W_ptr]"+r"(W_ptr)
+        : [z_end]"r"(z_end)
+        : "memory"
+    );
+}
+
+#if HAVE_MIPSFPU
+static void sbr_sum64x5_mips(float *z)
+{
+    int k;
+    float *z1;
+    float f1, f2, f3, f4, f5, f6, f7, f8;
+    for (k = 0; k < 64; k += 8) {
+
+        z1 = &z[k];
+
+         /* loop unrolled 8 times */
+        __asm__ volatile (
+            "lwc1   $f0,    0(%[z1])        \n\t"
+            "lwc1   $f1,    256(%[z1])      \n\t"
+            "lwc1   $f2,    4(%[z1])        \n\t"
+            "lwc1   $f3,    260(%[z1])      \n\t"
+            "lwc1   $f4,    8(%[z1])        \n\t"
+            "add.s  %[f1],  $f0,    $f1     \n\t"
+            "lwc1   $f5,    264(%[z1])      \n\t"
+            "add.s  %[f2],  $f2,    $f3     \n\t"
+            "lwc1   $f6,    12(%[z1])       \n\t"
+            "lwc1   $f7,    268(%[z1])      \n\t"
+            "add.s  %[f3],  $f4,    $f5     \n\t"
+            "lwc1   $f8,    16(%[z1])       \n\t"
+            "lwc1   $f9,    272(%[z1])      \n\t"
+            "add.s  %[f4],  $f6,    $f7     \n\t"
+            "lwc1   $f10,   20(%[z1])       \n\t"
+            "lwc1   $f11,   276(%[z1])      \n\t"
+            "add.s  %[f5],  $f8,    $f9     \n\t"
+            "lwc1   $f12,   24(%[z1])       \n\t"
+            "lwc1   $f13,   280(%[z1])      \n\t"
+            "add.s  %[f6],  $f10,   $f11    \n\t"
+            "lwc1   $f14,   28(%[z1])       \n\t"
+            "lwc1   $f15,   284(%[z1])      \n\t"
+            "add.s  %[f7],  $f12,   $f13    \n\t"
+            "lwc1   $f0,    512(%[z1])      \n\t"
+            "lwc1   $f1,    516(%[z1])      \n\t"
+            "add.s  %[f8],  $f14,   $f15    \n\t"
+            "lwc1   $f2,    520(%[z1])      \n\t"
+            "add.s  %[f1],  %[f1],  $f0     \n\t"
+            "add.s  %[f2],  %[f2],  $f1     \n\t"
+            "lwc1   $f3,    524(%[z1])      \n\t"
+            "add.s  %[f3],  %[f3],  $f2     \n\t"
+            "lwc1   $f4,    528(%[z1])      \n\t"
+            "lwc1   $f5,    532(%[z1])      \n\t"
+            "add.s  %[f4],  %[f4],  $f3     \n\t"
+            "lwc1   $f6,    536(%[z1])      \n\t"
+            "add.s  %[f5],  %[f5],  $f4     \n\t"
+            "add.s  %[f6],  %[f6],  $f5     \n\t"
+            "lwc1   $f7,    540(%[z1])      \n\t"
+            "add.s  %[f7],  %[f7],  $f6     \n\t"
+            "lwc1   $f0,    768(%[z1])      \n\t"
+            "lwc1   $f1,    772(%[z1])      \n\t"
+            "add.s  %[f8],  %[f8],  $f7     \n\t"
+            "lwc1   $f2,    776(%[z1])      \n\t"
+            "add.s  %[f1],  %[f1],  $f0     \n\t"
+            "add.s  %[f2],  %[f2],  $f1     \n\t"
+            "lwc1   $f3,    780(%[z1])      \n\t"
+            "add.s  %[f3],  %[f3],  $f2     \n\t"
+            "lwc1   $f4,    784(%[z1])      \n\t"
+            "lwc1   $f5,    788(%[z1])      \n\t"
+            "add.s  %[f4],  %[f4],  $f3     \n\t"
+            "lwc1   $f6,    792(%[z1])      \n\t"
+            "add.s  %[f5],  %[f5],  $f4     \n\t"
+            "add.s  %[f6],  %[f6],  $f5     \n\t"
+            "lwc1   $f7,    796(%[z1])      \n\t"
+            "add.s  %[f7],  %[f7],  $f6     \n\t"
+            "lwc1   $f0,    1024(%[z1])     \n\t"
+            "lwc1   $f1,    1028(%[z1])     \n\t"
+            "add.s  %[f8],  %[f8],  $f7     \n\t"
+            "lwc1   $f2,    1032(%[z1])     \n\t"
+            "add.s  %[f1],  %[f1],  $f0     \n\t"
+            "add.s  %[f2],  %[f2],  $f1     \n\t"
+            "lwc1   $f3,    1036(%[z1])     \n\t"
+            "add.s  %[f3],  %[f3],  $f2     \n\t"
+            "lwc1   $f4,    1040(%[z1])     \n\t"
+            "lwc1   $f5,    1044(%[z1])     \n\t"
+            "add.s  %[f4],  %[f4],  $f3     \n\t"
+            "lwc1   $f6,    1048(%[z1])     \n\t"
+            "add.s  %[f5],  %[f5],  $f4     \n\t"
+            "add.s  %[f6],  %[f6],  $f5     \n\t"
+            "lwc1   $f7,    1052(%[z1])     \n\t"
+            "add.s  %[f7],  %[f7],  $f6     \n\t"
+            "swc1   %[f1],  0(%[z1])        \n\t"
+            "swc1   %[f2],  4(%[z1])        \n\t"
+            "add.s  %[f8],  %[f8],  $f7     \n\t"
+            "swc1   %[f3],  8(%[z1])        \n\t"
+            "swc1   %[f4],  12(%[z1])       \n\t"
+            "swc1   %[f5],  16(%[z1])       \n\t"
+            "swc1   %[f6],  20(%[z1])       \n\t"
+            "swc1   %[f7],  24(%[z1])       \n\t"
+            "swc1   %[f8],  28(%[z1])       \n\t"
+
+            : [f1]"=&f"(f1), [f2]"=&f"(f2), [f3]"=&f"(f3),
+              [f4]"=&f"(f4), [f5]"=&f"(f5), [f6]"=&f"(f6),
+              [f7]"=&f"(f7), [f8]"=&f"(f8)
+            : [z1]"r"(z1)
+            : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5",
+              "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
+              "$f12", "$f13", "$f14", "$f15",
+              "memory"
+        );
+    }
+}
+
+static float sbr_sum_square_mips(float (*x)[2], int n)
+{
+    float sum0 = 0.0f, sum1 = 0.0f;
+    float *p_x;
+    float temp0, temp1, temp2, temp3;
+    float *loop_end;
+    p_x = &x[0][0];
+    loop_end = p_x + (n >> 1)*4 - 4;
+
+    __asm__ volatile (
+        ".set      push                                             \n\t"
+        ".set      noreorder                                        \n\t"
+        "lwc1      %[temp0],   0(%[p_x])                            \n\t"
+        "lwc1      %[temp1],   4(%[p_x])                            \n\t"
+        "lwc1      %[temp2],   8(%[p_x])                            \n\t"
+        "lwc1      %[temp3],   12(%[p_x])                           \n\t"
+    "1:                                                             \n\t"
+        "addiu     %[p_x],     %[p_x],       16                     \n\t"
+        "madd.s    %[sum0],    %[sum0],      %[temp0],   %[temp0]   \n\t"
+        "lwc1      %[temp0],   0(%[p_x])                            \n\t"
+        "madd.s    %[sum1],    %[sum1],      %[temp1],   %[temp1]   \n\t"
+        "lwc1      %[temp1],   4(%[p_x])                            \n\t"
+        "madd.s    %[sum0],    %[sum0],      %[temp2],   %[temp2]   \n\t"
+        "lwc1      %[temp2],   8(%[p_x])                            \n\t"
+        "madd.s    %[sum1],    %[sum1],      %[temp3],   %[temp3]   \n\t"
+        "bne       %[p_x],     %[loop_end],  1b                     \n\t"
+        " lwc1     %[temp3],   12(%[p_x])                           \n\t"
+        "madd.s    %[sum0],    %[sum0],      %[temp0],   %[temp0]   \n\t"
+        "madd.s    %[sum1],    %[sum1],      %[temp1],   %[temp1]   \n\t"
+        "madd.s    %[sum0],    %[sum0],      %[temp2],   %[temp2]   \n\t"
+        "madd.s    %[sum1],    %[sum1],      %[temp3],   %[temp3]   \n\t"
+        ".set      pop                                              \n\t"
+
+        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+          [temp3]"=&f"(temp3), [sum0]"+f"(sum0), [sum1]"+f"(sum1),
+          [p_x]"+r"(p_x)
+        : [loop_end]"r"(loop_end)
+        : "memory"
+    );
+    return sum0 + sum1;
+}
+
+static void sbr_qmf_deint_bfly_mips(float *v, const float *src0, const float *src1)
+{
+    int i;
+    float temp0, temp1, temp2, temp3, temp4, temp5;
+    float temp6, temp7, temp8, temp9, temp10, temp11;
+    float *v0 = v;
+    float *v1 = &v[127];
+    float *psrc0 = (float*)src0;
+    float *psrc1 = (float*)&src1[63];
+
+    for (i = 0; i < 4; i++) {
+
+         /* loop unrolled 16 times */
+        __asm__ volatile(
+            "lwc1       %[temp0],   0(%[src0])             \n\t"
+            "lwc1       %[temp1],   0(%[src1])             \n\t"
+            "lwc1       %[temp3],   4(%[src0])             \n\t"
+            "lwc1       %[temp4],   -4(%[src1])            \n\t"
+            "lwc1       %[temp6],   8(%[src0])             \n\t"
+            "lwc1       %[temp7],   -8(%[src1])            \n\t"
+            "lwc1       %[temp9],   12(%[src0])            \n\t"
+            "lwc1       %[temp10],  -12(%[src1])           \n\t"
+            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
+            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
+            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
+            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
+            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
+            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
+            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
+            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
+            "swc1       %[temp2],   0(%[v1])               \n\t"
+            "swc1       %[temp0],   0(%[v0])               \n\t"
+            "swc1       %[temp5],   -4(%[v1])              \n\t"
+            "swc1       %[temp3],   4(%[v0])               \n\t"
+            "swc1       %[temp8],   -8(%[v1])              \n\t"
+            "swc1       %[temp6],   8(%[v0])               \n\t"
+            "swc1       %[temp11],  -12(%[v1])             \n\t"
+            "swc1       %[temp9],   12(%[v0])              \n\t"
+            "lwc1       %[temp0],   16(%[src0])            \n\t"
+            "lwc1       %[temp1],   -16(%[src1])           \n\t"
+            "lwc1       %[temp3],   20(%[src0])            \n\t"
+            "lwc1       %[temp4],   -20(%[src1])           \n\t"
+            "lwc1       %[temp6],   24(%[src0])            \n\t"
+            "lwc1       %[temp7],   -24(%[src1])           \n\t"
+            "lwc1       %[temp9],   28(%[src0])            \n\t"
+            "lwc1       %[temp10],  -28(%[src1])           \n\t"
+            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
+            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
+            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
+            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
+            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
+            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
+            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
+            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
+            "swc1       %[temp2],   -16(%[v1])             \n\t"
+            "swc1       %[temp0],   16(%[v0])              \n\t"
+            "swc1       %[temp5],   -20(%[v1])             \n\t"
+            "swc1       %[temp3],   20(%[v0])              \n\t"
+            "swc1       %[temp8],   -24(%[v1])             \n\t"
+            "swc1       %[temp6],   24(%[v0])              \n\t"
+            "swc1       %[temp11],  -28(%[v1])             \n\t"
+            "swc1       %[temp9],   28(%[v0])              \n\t"
+            "lwc1       %[temp0],   32(%[src0])            \n\t"
+            "lwc1       %[temp1],   -32(%[src1])           \n\t"
+            "lwc1       %[temp3],   36(%[src0])            \n\t"
+            "lwc1       %[temp4],   -36(%[src1])           \n\t"
+            "lwc1       %[temp6],   40(%[src0])            \n\t"
+            "lwc1       %[temp7],   -40(%[src1])           \n\t"
+            "lwc1       %[temp9],   44(%[src0])            \n\t"
+            "lwc1       %[temp10],  -44(%[src1])           \n\t"
+            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
+            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
+            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
+            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
+            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
+            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
+            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
+            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
+            "swc1       %[temp2],   -32(%[v1])             \n\t"
+            "swc1       %[temp0],   32(%[v0])              \n\t"
+            "swc1       %[temp5],   -36(%[v1])             \n\t"
+            "swc1       %[temp3],   36(%[v0])              \n\t"
+            "swc1       %[temp8],   -40(%[v1])             \n\t"
+            "swc1       %[temp6],   40(%[v0])              \n\t"
+            "swc1       %[temp11],  -44(%[v1])             \n\t"
+            "swc1       %[temp9],   44(%[v0])              \n\t"
+            "lwc1       %[temp0],   48(%[src0])            \n\t"
+            "lwc1       %[temp1],   -48(%[src1])           \n\t"
+            "lwc1       %[temp3],   52(%[src0])            \n\t"
+            "lwc1       %[temp4],   -52(%[src1])           \n\t"
+            "lwc1       %[temp6],   56(%[src0])            \n\t"
+            "lwc1       %[temp7],   -56(%[src1])           \n\t"
+            "lwc1       %[temp9],   60(%[src0])            \n\t"
+            "lwc1       %[temp10],  -60(%[src1])           \n\t"
+            "add.s      %[temp2],   %[temp0],   %[temp1]   \n\t"
+            "add.s      %[temp5],   %[temp3],   %[temp4]   \n\t"
+            "add.s      %[temp8],   %[temp6],   %[temp7]   \n\t"
+            "add.s      %[temp11],  %[temp9],   %[temp10]  \n\t"
+            "sub.s      %[temp0],   %[temp0],   %[temp1]   \n\t"
+            "sub.s      %[temp3],   %[temp3],   %[temp4]   \n\t"
+            "sub.s      %[temp6],   %[temp6],   %[temp7]   \n\t"
+            "sub.s      %[temp9],   %[temp9],   %[temp10]  \n\t"
+            "swc1       %[temp2],   -48(%[v1])             \n\t"
+            "swc1       %[temp0],   48(%[v0])              \n\t"
+            "swc1       %[temp5],   -52(%[v1])             \n\t"
+            "swc1       %[temp3],   52(%[v0])              \n\t"
+            "swc1       %[temp8],   -56(%[v1])             \n\t"
+            "swc1       %[temp6],   56(%[v0])              \n\t"
+            "swc1       %[temp11],  -60(%[v1])             \n\t"
+            "swc1       %[temp9],   60(%[v0])              \n\t"
+            "addiu      %[src0],    %[src0],    64         \n\t"
+            "addiu      %[src1],    %[src1],    -64        \n\t"
+            "addiu      %[v0],      %[v0],      64         \n\t"
+            "addiu      %[v1],      %[v1],      -64        \n\t"
+
+            : [v0]"+r"(v0), [v1]"+r"(v1), [src0]"+r"(psrc0), [src1]"+r"(psrc1),
+              [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11)
+            :
+            :"memory"
+        );
+    }
+}
+
+static void sbr_autocorrelate_mips(const float x[40][2], float phi[3][2][2])
+{
+    int i;
+    float real_sum_0 = 0.0f;
+    float real_sum_1 = 0.0f;
+    float real_sum_2 = 0.0f;
+    float imag_sum_1 = 0.0f;
+    float imag_sum_2 = 0.0f;
+    float *p_x, *p_phi;
+    float temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+    float temp7, temp_r, temp_r1, temp_r2, temp_r3, temp_r4;
+    p_x = (float*)&x[0][0];
+    p_phi = &phi[0][0][0];
+
+    __asm__ volatile (
+        "lwc1    %[temp0],      8(%[p_x])                           \n\t"
+        "lwc1    %[temp1],      12(%[p_x])                          \n\t"
+        "lwc1    %[temp2],      16(%[p_x])                          \n\t"
+        "lwc1    %[temp3],      20(%[p_x])                          \n\t"
+        "lwc1    %[temp4],      24(%[p_x])                          \n\t"
+        "lwc1    %[temp5],      28(%[p_x])                          \n\t"
+        "mul.s   %[temp_r],     %[temp1],      %[temp1]             \n\t"
+        "mul.s   %[temp_r1],    %[temp1],      %[temp3]             \n\t"
+        "mul.s   %[temp_r2],    %[temp1],      %[temp2]             \n\t"
+        "mul.s   %[temp_r3],    %[temp1],      %[temp5]             \n\t"
+        "mul.s   %[temp_r4],    %[temp1],      %[temp4]             \n\t"
+        "madd.s  %[temp_r],     %[temp_r],     %[temp0],  %[temp0]  \n\t"
+        "madd.s  %[temp_r1],    %[temp_r1],    %[temp0],  %[temp2]  \n\t"
+        "msub.s  %[temp_r2],    %[temp_r2],    %[temp0],  %[temp3]  \n\t"
+        "madd.s  %[temp_r3],    %[temp_r3],    %[temp0],  %[temp4]  \n\t"
+        "msub.s  %[temp_r4],    %[temp_r4],    %[temp0],  %[temp5]  \n\t"
+        "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
+        "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
+        "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
+        "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
+        "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
+        "addiu   %[p_x],        %[p_x],        8                    \n\t"
+
+        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+          [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
+          [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2),
+          [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1), [temp_r2]"=&f"(temp_r2),
+          [temp_r3]"=&f"(temp_r3), [temp_r4]"=&f"(temp_r4),
+          [p_x]"+r"(p_x), [imag_sum_2]"+f"(imag_sum_2)
+        :
+        : "memory"
+    );
+
+    for (i = 0; i < 12; i++) {
+        __asm__ volatile (
+            "lwc1    %[temp0],      8(%[p_x])                           \n\t"
+            "lwc1    %[temp1],      12(%[p_x])                          \n\t"
+            "lwc1    %[temp2],      16(%[p_x])                          \n\t"
+            "lwc1    %[temp3],      20(%[p_x])                          \n\t"
+            "lwc1    %[temp4],      24(%[p_x])                          \n\t"
+            "lwc1    %[temp5],      28(%[p_x])                          \n\t"
+            "mul.s   %[temp_r],     %[temp1],      %[temp1]             \n\t"
+            "mul.s   %[temp_r1],    %[temp1],      %[temp3]             \n\t"
+            "mul.s   %[temp_r2],    %[temp1],      %[temp2]             \n\t"
+            "mul.s   %[temp_r3],    %[temp1],      %[temp5]             \n\t"
+            "mul.s   %[temp_r4],    %[temp1],      %[temp4]             \n\t"
+            "madd.s  %[temp_r],     %[temp_r],     %[temp0],  %[temp0]  \n\t"
+            "madd.s  %[temp_r1],    %[temp_r1],    %[temp0],  %[temp2]  \n\t"
+            "msub.s  %[temp_r2],    %[temp_r2],    %[temp0],  %[temp3]  \n\t"
+            "madd.s  %[temp_r3],    %[temp_r3],    %[temp0],  %[temp4]  \n\t"
+            "msub.s  %[temp_r4],    %[temp_r4],    %[temp0],  %[temp5]  \n\t"
+            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
+            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
+            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
+            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
+            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
+            "lwc1    %[temp0],      32(%[p_x])                          \n\t"
+            "lwc1    %[temp1],      36(%[p_x])                          \n\t"
+            "mul.s   %[temp_r],     %[temp3],      %[temp3]             \n\t"
+            "mul.s   %[temp_r1],    %[temp3],      %[temp5]             \n\t"
+            "mul.s   %[temp_r2],    %[temp3],      %[temp4]             \n\t"
+            "mul.s   %[temp_r3],    %[temp3],      %[temp1]             \n\t"
+            "mul.s   %[temp_r4],    %[temp3],      %[temp0]             \n\t"
+            "madd.s  %[temp_r],     %[temp_r],     %[temp2],  %[temp2]  \n\t"
+            "madd.s  %[temp_r1],    %[temp_r1],    %[temp2],  %[temp4]  \n\t"
+            "msub.s  %[temp_r2],    %[temp_r2],    %[temp2],  %[temp5]  \n\t"
+            "madd.s  %[temp_r3],    %[temp_r3],    %[temp2],  %[temp0]  \n\t"
+            "msub.s  %[temp_r4],    %[temp_r4],    %[temp2],  %[temp1]  \n\t"
+            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
+            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
+            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
+            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
+            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
+            "lwc1    %[temp2],      40(%[p_x])                          \n\t"
+            "lwc1    %[temp3],      44(%[p_x])                          \n\t"
+            "mul.s   %[temp_r],     %[temp5],      %[temp5]             \n\t"
+            "mul.s   %[temp_r1],    %[temp5],      %[temp1]             \n\t"
+            "mul.s   %[temp_r2],    %[temp5],      %[temp0]             \n\t"
+            "mul.s   %[temp_r3],    %[temp5],      %[temp3]             \n\t"
+            "mul.s   %[temp_r4],    %[temp5],      %[temp2]             \n\t"
+            "madd.s  %[temp_r],     %[temp_r],     %[temp4],  %[temp4]  \n\t"
+            "madd.s  %[temp_r1],    %[temp_r1],    %[temp4],  %[temp0]  \n\t"
+            "msub.s  %[temp_r2],    %[temp_r2],    %[temp4],  %[temp1]  \n\t"
+            "madd.s  %[temp_r3],    %[temp_r3],    %[temp4],  %[temp2]  \n\t"
+            "msub.s  %[temp_r4],    %[temp_r4],    %[temp4],  %[temp3]  \n\t"
+            "add.s   %[real_sum_0], %[real_sum_0], %[temp_r]            \n\t"
+            "add.s   %[real_sum_1], %[real_sum_1], %[temp_r1]           \n\t"
+            "add.s   %[imag_sum_1], %[imag_sum_1], %[temp_r2]           \n\t"
+            "add.s   %[real_sum_2], %[real_sum_2], %[temp_r3]           \n\t"
+            "add.s   %[imag_sum_2], %[imag_sum_2], %[temp_r4]           \n\t"
+            "addiu   %[p_x],        %[p_x],        24                   \n\t"
+
+            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+              [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
+              [imag_sum_1]"+f"(imag_sum_1), [real_sum_2]"+f"(real_sum_2),
+              [temp_r]"=&f"(temp_r), [temp_r1]"=&f"(temp_r1),
+              [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3),
+              [temp_r4]"=&f"(temp_r4), [p_x]"+r"(p_x),
+              [imag_sum_2]"+f"(imag_sum_2)
+            :
+            : "memory"
+        );
+    }
+    __asm__ volatile (
+        "lwc1    %[temp0],    -296(%[p_x])                        \n\t"
+        "lwc1    %[temp1],    -292(%[p_x])                        \n\t"
+        "lwc1    %[temp2],    8(%[p_x])                           \n\t"
+        "lwc1    %[temp3],    12(%[p_x])                          \n\t"
+        "lwc1    %[temp4],    -288(%[p_x])                        \n\t"
+        "lwc1    %[temp5],    -284(%[p_x])                        \n\t"
+        "lwc1    %[temp6],    -280(%[p_x])                        \n\t"
+        "lwc1    %[temp7],    -276(%[p_x])                        \n\t"
+        "madd.s  %[temp_r],   %[real_sum_0], %[temp0],  %[temp0]  \n\t"
+        "madd.s  %[temp_r1],  %[real_sum_0], %[temp2],  %[temp2]  \n\t"
+        "madd.s  %[temp_r2],  %[real_sum_1], %[temp0],  %[temp4]  \n\t"
+        "madd.s  %[temp_r3],  %[imag_sum_1], %[temp0],  %[temp5]  \n\t"
+        "madd.s  %[temp_r],   %[temp_r],     %[temp1],  %[temp1]  \n\t"
+        "madd.s  %[temp_r1],  %[temp_r1],    %[temp3],  %[temp3]  \n\t"
+        "madd.s  %[temp_r2],  %[temp_r2],    %[temp1],  %[temp5]  \n\t"
+        "nmsub.s  %[temp_r3], %[temp_r3],    %[temp1],  %[temp4]  \n\t"
+        "lwc1    %[temp4],    16(%[p_x])                          \n\t"
+        "lwc1    %[temp5],    20(%[p_x])                          \n\t"
+        "swc1    %[temp_r],   40(%[p_phi])                        \n\t"
+        "swc1    %[temp_r1],  16(%[p_phi])                        \n\t"
+        "swc1    %[temp_r2],  24(%[p_phi])                        \n\t"
+        "swc1    %[temp_r3],  28(%[p_phi])                        \n\t"
+        "madd.s  %[temp_r],   %[real_sum_1], %[temp2],  %[temp4]  \n\t"
+        "madd.s  %[temp_r1],  %[imag_sum_1], %[temp2],  %[temp5]  \n\t"
+        "madd.s  %[temp_r2],  %[real_sum_2], %[temp0],  %[temp6]  \n\t"
+        "madd.s  %[temp_r3],  %[imag_sum_2], %[temp0],  %[temp7]  \n\t"
+        "madd.s  %[temp_r],   %[temp_r],     %[temp3],  %[temp5]  \n\t"
+        "nmsub.s %[temp_r1],  %[temp_r1],    %[temp3],  %[temp4]  \n\t"
+        "madd.s  %[temp_r2],  %[temp_r2],    %[temp1],  %[temp7]  \n\t"
+        "nmsub.s %[temp_r3],  %[temp_r3],    %[temp1],  %[temp6]  \n\t"
+        "swc1    %[temp_r],   0(%[p_phi])                         \n\t"
+        "swc1    %[temp_r1],  4(%[p_phi])                         \n\t"
+        "swc1    %[temp_r2],  8(%[p_phi])                         \n\t"
+        "swc1    %[temp_r3],  12(%[p_phi])                        \n\t"
+
+        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+          [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+          [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp_r]"=&f"(temp_r),
+          [real_sum_0]"+f"(real_sum_0), [real_sum_1]"+f"(real_sum_1),
+          [real_sum_2]"+f"(real_sum_2), [imag_sum_1]"+f"(imag_sum_1),
+          [temp_r2]"=&f"(temp_r2), [temp_r3]"=&f"(temp_r3),
+          [temp_r1]"=&f"(temp_r1), [p_phi]"+r"(p_phi),
+          [imag_sum_2]"+f"(imag_sum_2)
+        : [p_x]"r"(p_x)
+        : "memory"
+    );
+}
+
+static void sbr_hf_gen_mips(float (*X_high)[2], const float (*X_low)[2],
+                         const float alpha0[2], const float alpha1[2],
+                         float bw, int start, int end)
+{
+    float alpha[4];
+    int i;
+    float *p_x_low = (float*)&X_low[0][0] + 2*start;
+    float *p_x_high = &X_high[0][0] + 2*start;
+    float temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+    float temp7, temp8, temp9, temp10, temp11, temp12;
+
+    alpha[0] = alpha1[0] * bw * bw;
+    alpha[1] = alpha1[1] * bw * bw;
+    alpha[2] = alpha0[0] * bw;
+    alpha[3] = alpha0[1] * bw;
+
+    for (i = start; i < end; i++) {
+        __asm__ volatile (
+            "lwc1    %[temp0],    -16(%[p_x_low])                        \n\t"
+            "lwc1    %[temp1],    -12(%[p_x_low])                        \n\t"
+            "lwc1    %[temp2],    -8(%[p_x_low])                         \n\t"
+            "lwc1    %[temp3],    -4(%[p_x_low])                         \n\t"
+            "lwc1    %[temp5],    0(%[p_x_low])                          \n\t"
+            "lwc1    %[temp6],    4(%[p_x_low])                          \n\t"
+            "lwc1    %[temp7],    0(%[alpha])                            \n\t"
+            "lwc1    %[temp8],    4(%[alpha])                            \n\t"
+            "lwc1    %[temp9],    8(%[alpha])                            \n\t"
+            "lwc1    %[temp10],   12(%[alpha])                           \n\t"
+            "addiu   %[p_x_high], %[p_x_high],     8                     \n\t"
+            "addiu   %[p_x_low],  %[p_x_low],      8                     \n\t"
+            "mul.s   %[temp11],   %[temp1],        %[temp8]              \n\t"
+            "msub.s  %[temp11],   %[temp11],       %[temp0],  %[temp7]   \n\t"
+            "madd.s  %[temp11],   %[temp11],       %[temp2],  %[temp9]   \n\t"
+            "nmsub.s %[temp11],   %[temp11],       %[temp3],  %[temp10]  \n\t"
+            "add.s   %[temp11],   %[temp11],       %[temp5]              \n\t"
+            "swc1    %[temp11],   -8(%[p_x_high])                        \n\t"
+            "mul.s   %[temp12],   %[temp1],        %[temp7]              \n\t"
+            "madd.s  %[temp12],   %[temp12],       %[temp0],  %[temp8]   \n\t"
+            "madd.s  %[temp12],   %[temp12],       %[temp3],  %[temp9]   \n\t"
+            "madd.s  %[temp12],   %[temp12],       %[temp2],  %[temp10]  \n\t"
+            "add.s   %[temp12],   %[temp12],       %[temp6]              \n\t"
+            "swc1    %[temp12],   -4(%[p_x_high])                        \n\t"
+
+            : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
+              [temp6]"=&f"(temp6), [temp7]"=&f"(temp7), [temp8]"=&f"(temp8),
+              [temp9]"=&f"(temp9), [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
+              [temp12]"=&f"(temp12), [p_x_high]"+r"(p_x_high),
+              [p_x_low]"+r"(p_x_low)
+            : [alpha]"r"(alpha)
+            : "memory"
+        );
+    }
+}
+
+static void sbr_hf_g_filt_mips(float (*Y)[2], const float (*X_high)[40][2],
+                            const float *g_filt, int m_max, intptr_t ixh)
+{
+    float *p_y, *p_x, *p_g;
+    float temp0, temp1, temp2;
+    int loop_end;
+
+    p_g = (float*)&g_filt[0];
+    p_y = &Y[0][0];
+    p_x = (float*)&X_high[0][ixh][0];
+    loop_end = (int)((int*)p_g + m_max);
+
+    __asm__ volatile(
+        ".set    push                                \n\t"
+        ".set    noreorder                           \n\t"
+    "1:                                              \n\t"
+        "lwc1    %[temp0],   0(%[p_g])               \n\t"
+        "lwc1    %[temp1],   0(%[p_x])               \n\t"
+        "lwc1    %[temp2],   4(%[p_x])               \n\t"
+        "mul.s   %[temp1],   %[temp1],     %[temp0]  \n\t"
+        "mul.s   %[temp2],   %[temp2],     %[temp0]  \n\t"
+        "addiu   %[p_g],     %[p_g],       4         \n\t"
+        "addiu   %[p_x],     %[p_x],       320       \n\t"
+        "swc1    %[temp1],   0(%[p_y])               \n\t"
+        "swc1    %[temp2],   4(%[p_y])               \n\t"
+        "bne     %[p_g],     %[loop_end],  1b        \n\t"
+        " addiu  %[p_y],     %[p_y],       8         \n\t"
+        ".set    pop                                 \n\t"
+
+        : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
+          [temp2]"=&f"(temp2), [p_x]"+r"(p_x),
+          [p_y]"+r"(p_y), [p_g]"+r"(p_g)
+        : [loop_end]"r"(loop_end)
+        : "memory"
+    );
+}
+
+static void sbr_hf_apply_noise_0_mips(float (*Y)[2], const float *s_m,
+                                 const float *q_filt, int noise,
+                                 int kx, int m_max)
+{
+    int m;
+
+    for (m = 0; m < m_max; m++){
+
+        float *Y1=&Y[m][0];
+        float *ff_table;
+        float y0,y1, temp1, temp2, temp4, temp5;
+        int temp0, temp3;
+        const float *s_m1=&s_m[m];
+        const float *q_filt1= &q_filt[m];
+
+        __asm__ volatile(
+            "lwc1    %[y0],       0(%[Y1])                                    \n\t"
+            "lwc1    %[temp1],    0(%[s_m1])                                  \n\t"
+            "addiu   %[noise],    %[noise],              1                    \n\t"
+            "andi    %[noise],    %[noise],              0x1ff                \n\t"
+            "sll     %[temp0],    %[noise], 3                                 \n\t"
+            "addu    %[ff_table], %[ff_sbr_noise_table], %[temp0]             \n\t"
+            "add.s   %[y0],       %[y0],                 %[temp1]             \n\t"
+            "mfc1    %[temp3],    %[temp1]                                    \n\t"
+            "bne     %[temp3],    $0,                    1f                   \n\t"
+            "lwc1    %[y1],       4(%[Y1])                                    \n\t"
+            "lwc1    %[temp2],    0(%[q_filt1])                               \n\t"
+            "lwc1    %[temp4],    0(%[ff_table])                              \n\t"
+            "lwc1    %[temp5],    4(%[ff_table])                              \n\t"
+            "madd.s  %[y0],       %[y0],                 %[temp2],  %[temp4]  \n\t"
+            "madd.s  %[y1],       %[y1],                 %[temp2],  %[temp5]  \n\t"
+            "swc1    %[y1],       4(%[Y1])                                    \n\t"
+        "1:                                                                   \n\t"
+            "swc1    %[y0],       0(%[Y1])                                    \n\t"
+
+            : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1),
+              [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
+            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
+              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1)
+            : "memory"
+        );
+    }
+}
+
+static void sbr_hf_apply_noise_1_mips(float (*Y)[2], const float *s_m,
+                                 const float *q_filt, int noise,
+                                 int kx, int m_max)
+{
+    float y0,y1,temp1, temp2, temp4, temp5;
+    int temp0, temp3, m;
+    float phi_sign = 1 - 2 * (kx & 1);
+
+    for (m = 0; m < m_max; m++) {
+
+        float *ff_table;
+        float *Y1=&Y[m][0];
+        const float *s_m1=&s_m[m];
+        const float *q_filt1= &q_filt[m];
+
+        __asm__ volatile(
+            "lwc1   %[y1],       4(%[Y1])                                     \n\t"
+            "lwc1   %[temp1],    0(%[s_m1])                                   \n\t"
+            "lw     %[temp3],    0(%[s_m1])                                   \n\t"
+            "addiu  %[noise],    %[noise],               1                    \n\t"
+            "andi   %[noise],    %[noise],               0x1ff                \n\t"
+            "sll    %[temp0],    %[noise],               3                    \n\t"
+            "addu   %[ff_table], %[ff_sbr_noise_table], %[temp0]              \n\t"
+            "madd.s %[y1],       %[y1],                 %[temp1], %[phi_sign] \n\t"
+            "bne    %[temp3],    $0,                    1f                    \n\t"
+            "lwc1   %[y0],       0(%[Y1])                                     \n\t"
+            "lwc1   %[temp2],    0(%[q_filt1])                                \n\t"
+            "lwc1   %[temp4],    0(%[ff_table])                               \n\t"
+            "lwc1   %[temp5],    4(%[ff_table])                               \n\t"
+            "madd.s %[y0],       %[y0],                 %[temp2], %[temp4]    \n\t"
+            "madd.s %[y1],       %[y1],                 %[temp2], %[temp5]    \n\t"
+            "swc1   %[y0],       0(%[Y1])                                     \n\t"
+        "1:                                                                   \n\t"
+            "swc1   %[y1],       4(%[Y1])                                     \n\t"
+
+            : [ff_table] "=&r" (ff_table), [y0] "=&f" (y0), [y1] "=&f" (y1),
+              [temp0] "=&r" (temp0), [temp1] "=&f" (temp1), [temp2] "=&f" (temp2),
+              [temp3] "=&r" (temp3), [temp4] "=&f" (temp4), [temp5] "=&f" (temp5)
+            : [ff_sbr_noise_table] "r" (ff_sbr_noise_table), [noise] "r" (noise),
+              [Y1] "r" (Y1), [s_m1] "r" (s_m1), [q_filt1] "r" (q_filt1),
+              [phi_sign] "f" (phi_sign)
+            : "memory"
+        );
+        phi_sign = -phi_sign;
+    }
+}
+
+static void sbr_hf_apply_noise_2_mips(float (*Y)[2], const float *s_m,
+                                 const float *q_filt, int noise,
+                                 int kx, int m_max)
+{
+    int m;
+    float *ff_table;
+    float y0,y1, temp0, temp1, temp2, temp3, temp4, temp5;
+
+    for (m = 0; m < m_max; m++) {
+
+        float *Y1=&Y[m][0];
+        const float *s_m1=&s_m[m];
+        const float *q_filt1= &q_filt[m];
+
+        __asm__ volatile(
+            "lwc1   %[y0],       0(%[Y1])                                  \n\t"
+            "lwc1   %[temp1],    0(%[s_m1])                                \n\t"
+            "addiu  %[noise],    %[noise],              1                  \n\t"
+            "andi   %[noise],    %[noise],              0x1ff              \n\t"
+            "sll    %[temp0],    %[noise],              3                  \n\t"
+            "addu   %[ff_table], %[ff_sbr_noise_table], %[temp0]           \n\t"
+            "sub.s  %[y0],       %[y0],                 %[temp1]           \n\t"
+            "mfc1   %[temp3],    %[temp1]                                  \n\t"
+            "bne    %[temp3],    $0,                    1f                 \n\t"
+            "lwc1   %[y1],       4(%[Y1])                                  \n\t"
+            "lwc1   %[temp2],    0(%[q_filt1])                             \n\t"
+            "lwc1   %[temp4],    0(%[ff_table])                            \n\t"
+            "lwc1   %[temp5],    4(%[ff_table])                            \n\t"
+            "madd.s %[y0],       %[y0],                 %[temp2], %[temp4] \n\t"
+            "madd.s %[y1],       %[y1],                 %[temp2], %[temp5] \n\t"
+            "swc1   %[y1],       4(%[Y1])                                  \n\t"
+        "1:                                                                \n\t"
+            "swc1   %[y0],       0(%[Y1])                                  \n\t"
+
+            : [temp0]"=&r"(temp0), [ff_table]"=&r"(ff_table), [y0]"=&f"(y0),
+              [y1]"=&f"(y1), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
+            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
+              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1)
+            : "memory"
+        );
+    }
+}
+
+static void sbr_hf_apply_noise_3_mips(float (*Y)[2], const float *s_m,
+                                 const float *q_filt, int noise,
+                                 int kx, int m_max)
+{
+    float phi_sign = 1 - 2 * (kx & 1);
+    int m;
+
+    for (m = 0; m < m_max; m++) {
+
+        float *Y1=&Y[m][0];
+        float *ff_table;
+        float y0,y1, temp1, temp2, temp4, temp5;
+        int temp0, temp3;
+        const float *s_m1=&s_m[m];
+        const float *q_filt1= &q_filt[m];
+
+        __asm__ volatile(
+            "lwc1    %[y1],       4(%[Y1])                                     \n\t"
+            "lwc1    %[temp1],    0(%[s_m1])                                   \n\t"
+            "addiu   %[noise],    %[noise],              1                     \n\t"
+            "andi    %[noise],    %[noise],              0x1ff                 \n\t"
+            "sll     %[temp0],    %[noise],              3                     \n\t"
+            "addu    %[ff_table], %[ff_sbr_noise_table], %[temp0]              \n\t"
+            "nmsub.s %[y1],       %[y1],                 %[temp1], %[phi_sign] \n\t"
+            "mfc1    %[temp3],    %[temp1]                                     \n\t"
+            "bne     %[temp3],    $0,                    1f                    \n\t"
+            "lwc1    %[y0],       0(%[Y1])                                     \n\t"
+            "lwc1    %[temp2],    0(%[q_filt1])                                \n\t"
+            "lwc1    %[temp4],    0(%[ff_table])                               \n\t"
+            "lwc1    %[temp5],    4(%[ff_table])                               \n\t"
+            "madd.s  %[y0],       %[y0],                 %[temp2], %[temp4]    \n\t"
+            "madd.s  %[y1],       %[y1],                 %[temp2], %[temp5]    \n\t"
+            "swc1    %[y0],       0(%[Y1])                                     \n\t"
+            "1:                                                                \n\t"
+            "swc1    %[y1],       4(%[Y1])                                     \n\t"
+
+            : [ff_table]"=&r"(ff_table), [y0]"=&f"(y0), [y1]"=&f"(y1),
+              [temp0]"=&r"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
+              [temp3]"=&r"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5)
+            : [ff_sbr_noise_table]"r"(ff_sbr_noise_table), [noise]"r"(noise),
+              [Y1]"r"(Y1), [s_m1]"r"(s_m1), [q_filt1]"r"(q_filt1),
+              [phi_sign]"f"(phi_sign)
+            : "memory"
+        );
+       phi_sign = -phi_sign;
+    }
+}
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+
+void ff_sbrdsp_init_mips(SBRDSPContext *s)
+{
+#if HAVE_INLINE_ASM
+    s->neg_odd_64 = sbr_neg_odd_64_mips;
+    s->qmf_pre_shuffle = sbr_qmf_pre_shuffle_mips;
+    s->qmf_post_shuffle = sbr_qmf_post_shuffle_mips;
+#if HAVE_MIPSFPU
+    s->sum64x5 = sbr_sum64x5_mips;
+    s->sum_square = sbr_sum_square_mips;
+    s->qmf_deint_bfly = sbr_qmf_deint_bfly_mips;
+    s->autocorrelate = sbr_autocorrelate_mips;
+    s->hf_gen = sbr_hf_gen_mips;
+    s->hf_g_filt = sbr_hf_g_filt_mips;
+
+    s->hf_apply_noise[0] = sbr_hf_apply_noise_0_mips;
+    s->hf_apply_noise[1] = sbr_hf_apply_noise_1_mips;
+    s->hf_apply_noise[2] = sbr_hf_apply_noise_2_mips;
+    s->hf_apply_noise[3] = sbr_hf_apply_noise_3_mips;
+#endif /* HAVE_MIPSFPU */
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/mjpeg.c b/libavcodec/mjpeg.c
index a353851..bdd7b19 100644
--- a/libavcodec/mjpeg.c
+++ b/libavcodec/mjpeg.c
@@ -8,20 +8,20 @@
  * aspecting, new decode_frame mechanism and apple mjpeg-b support
  *                                  by Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,7 +38,7 @@
  * The spec says that the values given produce "good" quality, and
  * when divided by 2, "very good" quality.
  */
-const unsigned char std_luminance_quant_tbl[64] = {
+static const unsigned char std_luminance_quant_tbl[64] = {
     16,  11,  10,  16,  24,  40,  51,  61,
     12,  12,  14,  19,  26,  58,  60,  55,
     14,  13,  16,  24,  40,  57,  69,  56,
@@ -48,7 +48,7 @@ const unsigned char std_luminance_quant_tbl[64] = {
     49,  64,  78,  87, 103, 121, 120, 101,
     72,  92,  95,  98, 112, 100, 103,  99
 };
-const unsigned char std_chrominance_quant_tbl[64] = {
+static const unsigned char std_chrominance_quant_tbl[64] = {
     17,  18,  24,  47,  99,  99,  99,  99,
     18,  21,  26,  66,  99,  99,  99,  99,
     24,  26,  56,  99,  99,  99,  99,  99,
diff --git a/libavcodec/mjpeg.h b/libavcodec/mjpeg.h
index bd3b1e8..73db1dd 100644
--- a/libavcodec/mjpeg.h
+++ b/libavcodec/mjpeg.h
@@ -8,20 +8,20 @@
  * aspecting, new decode_frame mechanism and apple mjpeg-b support
  *                                  by Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -128,6 +128,7 @@ static inline void put_marker(PutBitContext *p, int code)
 
 #define PREDICT(ret, topleft, top, left, predictor)\
     switch(predictor){\
+        case 0: ret= 0; break;\
         case 1: ret= left; break;\
         case 2: ret= top; break;\
         case 3: ret= topleft; break;\
diff --git a/libavcodec/mjpeg2jpeg_bsf.c b/libavcodec/mjpeg2jpeg_bsf.c
index 59734c9..6adeaf0 100644
--- a/libavcodec/mjpeg2jpeg_bsf.c
+++ b/libavcodec/mjpeg2jpeg_bsf.c
@@ -2,20 +2,20 @@
  * MJPEG/AVI1 to JPEG/JFIF bitstream format filter
  * Copyright (c) 2010 Adrian Daerr and Nicolas George
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mjpeg_parser.c b/libavcodec/mjpeg_parser.c
index ab65461..e548b00 100644
--- a/libavcodec/mjpeg_parser.c
+++ b/libavcodec/mjpeg_parser.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2003 Alex Beregszaszi
  * Copyright (c) 2003-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,27 +28,44 @@
 
 #include "parser.h"
 
+typedef struct MJPEGParserContext{
+    ParseContext pc;
+    int size;
+}MJPEGParserContext;
 
 /**
  * Find the end of the current frame in the bitstream.
  * @return the position of the first byte of the next frame, or -1
  */
-static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
+static int find_frame_end(MJPEGParserContext *m, const uint8_t *buf, int buf_size){
+    ParseContext *pc= &m->pc;
     int vop_found, i;
-    uint16_t state;
+    uint32_t state;
 
     vop_found= pc->frame_start_found;
     state= pc->state;
 
     i=0;
     if(!vop_found){
-        for(i=0; i<buf_size; i++){
+        for(i=0; i<buf_size;){
             state= (state<<8) | buf[i];
-            if(state == 0xFFD8){
-                i++;
-                vop_found=1;
-                break;
+            if(state>=0xFFC00000 && state<=0xFFFEFFFF){
+                if(state>=0xFFD80000 && state<=0xFFD8FFFF){
+                    i++;
+                    vop_found=1;
+                    break;
+                }else if(state<0xFFD00000 || state>0xFFD9FFFF){
+                    m->size= (state&0xFFFF)-1;
+                }
             }
+            if(m->size>0){
+                int size= FFMIN(buf_size-i, m->size);
+                i+=size;
+                m->size-=size;
+                state=0;
+                continue;
+            }else
+                i++;
         }
     }
 
@@ -56,13 +73,25 @@ static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
         /* EOF considered as end of frame */
         if (buf_size == 0)
             return 0;
-        for(; i<buf_size; i++){
+        for(; i<buf_size;){
             state= (state<<8) | buf[i];
-            if(state == 0xFFD8){
-                pc->frame_start_found=0;
-                pc->state=0;
-                return i-1;
+            if(state>=0xFFC00000 && state<=0xFFFEFFFF){
+                if(state>=0xFFD80000 && state<=0xFFD8FFFF){
+                    pc->frame_start_found=0;
+                    pc->state=0;
+                    return i-3;
+                } else if(state<0xFFD00000 || state>0xFFD9FFFF){
+                    m->size= (state&0xFFFF)-1;
+                }
             }
+            if(m->size>0){
+                int size= FFMIN(buf_size-i, m->size);
+                i+=size;
+                m->size-=size;
+                state=0;
+                continue;
+            }else
+                i++;
         }
     }
     pc->frame_start_found= vop_found;
@@ -75,13 +104,14 @@ static int jpeg_parse(AVCodecParserContext *s,
                       const uint8_t **poutbuf, int *poutbuf_size,
                       const uint8_t *buf, int buf_size)
 {
-    ParseContext *pc = s->priv_data;
+    MJPEGParserContext *m = s->priv_data;
+    ParseContext *pc = &m->pc;
     int next;
 
     if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
         next= buf_size;
     }else{
-        next= find_frame_end(pc, buf, buf_size);
+        next= find_frame_end(m, buf, buf_size);
 
         if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
             *poutbuf = NULL;
@@ -98,7 +128,7 @@ static int jpeg_parse(AVCodecParserContext *s,
 
 AVCodecParser ff_mjpeg_parser = {
     .codec_ids      = { AV_CODEC_ID_MJPEG },
-    .priv_data_size = sizeof(ParseContext),
+    .priv_data_size = sizeof(MJPEGParserContext),
     .parser_parse   = jpeg_parse,
     .parser_close   = ff_parse_close,
 };
diff --git a/libavcodec/mjpega_dump_header_bsf.c b/libavcodec/mjpega_dump_header_bsf.c
index ed32d5a..3947c82 100644
--- a/libavcodec/mjpega_dump_header_bsf.c
+++ b/libavcodec/mjpega_dump_header_bsf.c
@@ -2,20 +2,20 @@
  * MJPEG A dump header bitstream filter
  * Copyright (c) 2006 Baptiste Coudurier
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -88,7 +88,6 @@ static int mjpega_dump_header(AVBitStreamFilterContext *bsfc, AVCodecContext *av
 }
 
 AVBitStreamFilter ff_mjpega_dump_header_bsf = {
-    "mjpegadump",
-    0,
-    mjpega_dump_header,
+    .name   = "mjpegadump",
+    .filter = mjpega_dump_header,
 };
diff --git a/libavcodec/mjpegbdec.c b/libavcodec/mjpegbdec.c
index 66cf2d4..f6ee705 100644
--- a/libavcodec/mjpegbdec.c
+++ b/libavcodec/mjpegbdec.c
@@ -2,20 +2,20 @@
  * Apple MJPEG-B decoder
  * Copyright (c) 2002 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -54,6 +54,7 @@ static int mjpegb_decode_frame(AVCodecContext *avctx,
 
     buf_ptr = buf;
     buf_end = buf + buf_size;
+    s->got_picture = 0;
 
 read_header:
     /* reset on every SOI */
@@ -121,7 +122,7 @@ read_header:
                       8 * FFMIN(field_size, buf_end - buf_ptr - sos_offs));
         s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
         s->start_code = SOS;
-        if (ff_mjpeg_decode_sos(s, NULL, NULL) < 0 &&
+        if (ff_mjpeg_decode_sos(s, NULL, 0, NULL) < 0 &&
             (avctx->err_recognition & AV_EF_EXPLODE))
           return AVERROR_INVALIDDATA;
     }
@@ -132,13 +133,17 @@ read_header:
         if (s->bottom_field != s->interlace_polarity && second_field_offs)
         {
             buf_ptr = buf + second_field_offs;
-            second_field_offs = 0;
             goto read_header;
             }
     }
 
     //XXX FIXME factorize, this looks very similar to the EOI code
 
+    if(!s->got_picture) {
+        av_log(avctx, AV_LOG_WARNING, "no picture\n");
+        return buf_size;
+    }
+
     if ((ret = av_frame_ref(data, s->picture_ptr)) < 0)
         return ret;
     *got_frame = 1;
@@ -161,4 +166,5 @@ AVCodec ff_mjpegb_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = mjpegb_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
 };
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index d9a73d8..ea84d9a 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -8,20 +8,20 @@
  * aspecting, new decode_frame mechanism and apple mjpeg-b support
  *                                  by Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,17 +30,20 @@
  * MJPEG decoder.
  */
 
-#include <assert.h>
-
 #include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
 #include "blockdsp.h"
+#include "copy_block.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "mjpeg.h"
 #include "mjpegdec.h"
 #include "jpeglsdec.h"
+#include "tiff.h"
+#include "exif.h"
+#include "bytestream.h"
 
 
 static int build_vlc(VLC *vlc, const uint8_t *bits_table,
@@ -52,7 +55,7 @@ static int build_vlc(VLC *vlc, const uint8_t *bits_table,
     uint16_t huff_sym[256];
     int i;
 
-    assert(nb_codes <= 256);
+    av_assert0(nb_codes <= 256);
 
     ff_mjpeg_build_huffman_codes(huff_size, huff_code, bits_table, val_table);
 
@@ -82,6 +85,17 @@ static void build_basic_mjpeg_vlc(MJpegDecodeContext *s)
               avpriv_mjpeg_val_ac_chrominance, 251, 0, 0);
 }
 
+static void parse_avid(MJpegDecodeContext *s, uint8_t *buf, int len)
+{
+    s->buggy_avid = 1;
+    if (len > 14 && buf[12] == 1) /* 1 - NTSC */
+        s->interlace_polarity = 1;
+    if (len > 14 && buf[12] == 2) /* 2 - PAL */
+        s->interlace_polarity = 0;
+    if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(s->avctx, AV_LOG_INFO, "AVID: len:%d %d\n", len, len > 14 ? buf[12] : -1);
+}
+
 av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
 {
     MJpegDecodeContext *s = avctx->priv_data;
@@ -103,6 +117,7 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
     s->buffer        = NULL;
     s->start_code    = -1;
     s->first_picture = 1;
+    s->got_picture   = 0;
     s->org_height    = avctx->coded_height;
     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
     avctx->colorspace = AVCOL_SPC_BT470BG;
@@ -110,19 +125,28 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
     build_basic_mjpeg_vlc(s);
 
     if (s->extern_huff) {
-        int ret;
-        av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
+        av_log(avctx, AV_LOG_INFO, "using external huffman table\n");
         init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size * 8);
-        if ((ret = ff_mjpeg_decode_dht(s))) {
+        if (ff_mjpeg_decode_dht(s)) {
             av_log(avctx, AV_LOG_ERROR,
-                   "mjpeg: error using external huffman table\n");
-            return ret;
+                   "error using external huffman table, switching back to internal\n");
+            build_basic_mjpeg_vlc(s);
         }
     }
     if (avctx->field_order == AV_FIELD_BB) { /* quicktime icefloe 019 */
         s->interlace_polarity = 1;           /* bottom field first */
-        av_log(avctx, AV_LOG_DEBUG, "mjpeg bottom field first\n");
+        av_log(avctx, AV_LOG_DEBUG, "bottom field first\n");
+    } else if (avctx->field_order == AV_FIELD_UNKNOWN) {
+        if (avctx->codec_tag == AV_RL32("MJPG"))
+            s->interlace_polarity = 1;
+    }
+
+    if (   avctx->extradata_size > 8
+        && AV_RL32(avctx->extradata) == 0x2C
+        && AV_RL32(avctx->extradata+4) == 0x18) {
+        parse_avid(s, avctx->extradata, avctx->extradata_size);
     }
+
     if (avctx->codec->id == AV_CODEC_ID_AMV)
         s->flipped = 1;
 
@@ -138,10 +162,10 @@ int ff_mjpeg_decode_dqt(MJpegDecodeContext *s)
     len = get_bits(&s->gb, 16) - 2;
 
     while (len >= 65) {
-        /* only 8 bit precision handled */
-        if (get_bits(&s->gb, 4) != 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "dqt: 16bit precision\n");
-            return -1;
+        int pr = get_bits(&s->gb, 4);
+        if (pr > 1) {
+            av_log(s->avctx, AV_LOG_ERROR, "dqt: invalid precision\n");
+            return AVERROR_INVALIDDATA;
         }
         index = get_bits(&s->gb, 4);
         if (index >= 4)
@@ -150,7 +174,7 @@ int ff_mjpeg_decode_dqt(MJpegDecodeContext *s)
         /* read quant table */
         for (i = 0; i < 64; i++) {
             j = s->scantable.permutated[i];
-            s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
+            s->quant_matrixes[index][j] = get_bits(&s->gb, pr ? 16 : 8);
         }
 
         // XXX FIXME finetune, and perhaps add dc too
@@ -221,9 +245,15 @@ int ff_mjpeg_decode_dht(MJpegDecodeContext *s)
 int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
 {
     int len, nb_components, i, width, height, pix_fmt_id, ret;
+    int h_count[MAX_COMPONENTS];
+    int v_count[MAX_COMPONENTS];
+
+    s->cur_scan = 0;
+    s->upscale_h = s->upscale_v = 0;
 
     /* XXX: verify len field validity */
     len     = get_bits(&s->gb, 16);
+    s->avctx->bits_per_raw_sample =
     s->bits = get_bits(&s->gb, 8);
 
     if (s->pegasus_rct)
@@ -231,14 +261,17 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
     if (s->bits == 9 && !s->pegasus_rct)
         s->rct  = 1;    // FIXME ugly
 
-    if (s->bits != 8 && !s->lossless) {
-        av_log(s->avctx, AV_LOG_ERROR, "only 8 bits/component accepted\n");
+    if(s->lossless && s->avctx->lowres){
+        av_log(s->avctx, AV_LOG_ERROR, "lowres is not possible with lossless jpeg\n");
         return -1;
     }
 
     height = get_bits(&s->gb, 16);
     width  = get_bits(&s->gb, 16);
 
+    if (s->avctx->codec_id == AV_CODEC_ID_AMV && (height&15))
+        avpriv_request_sample(s->avctx, "non mod 16 height AMV\n");
+
     // HACK for odd_height.mov
     if (s->interlaced && s->width == width && s->height == height + 1)
         height= s->height;
@@ -267,28 +300,32 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
     s->nb_components = nb_components;
     s->h_max         = 1;
     s->v_max         = 1;
+    memset(h_count, 0, sizeof(h_count));
+    memset(v_count, 0, sizeof(v_count));
     for (i = 0; i < nb_components; i++) {
         /* component id */
         s->component_id[i] = get_bits(&s->gb, 8) - 1;
-        s->h_count[i]      = get_bits(&s->gb, 4);
-        s->v_count[i]      = get_bits(&s->gb, 4);
+        h_count[i]         = get_bits(&s->gb, 4);
+        v_count[i]         = get_bits(&s->gb, 4);
         /* compute hmax and vmax (only used in interleaved case) */
-        if (s->h_count[i] > s->h_max)
-            s->h_max = s->h_count[i];
-        if (s->v_count[i] > s->v_max)
-            s->v_max = s->v_count[i];
+        if (h_count[i] > s->h_max)
+            s->h_max = h_count[i];
+        if (v_count[i] > s->v_max)
+            s->v_max = v_count[i];
         s->quant_index[i] = get_bits(&s->gb, 8);
-        if (s->quant_index[i] >= 4)
+        if (s->quant_index[i] >= 4) {
+            av_log(s->avctx, AV_LOG_ERROR, "quant_index is invalid\n");
             return AVERROR_INVALIDDATA;
-        if (!s->h_count[i] || !s->v_count[i]) {
+        }
+        if (!h_count[i] || !v_count[i]) {
             av_log(s->avctx, AV_LOG_ERROR,
                    "Invalid sampling factor in component %d %d:%d\n",
-                   i, s->h_count[i], s->v_count[i]);
+                   i, h_count[i], v_count[i]);
             return AVERROR_INVALIDDATA;
         }
 
         av_log(s->avctx, AV_LOG_DEBUG, "component %d %d:%d id: %d quant:%d\n",
-               i, s->h_count[i], s->v_count[i],
+               i, h_count[i], v_count[i],
                s->component_id[i], s->quant_index[i]);
     }
 
@@ -297,15 +334,18 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         return AVERROR_PATCHWELCOME;
     }
 
-    if (s->v_max == 1 && s->h_max == 1 && s->lossless == 1)
-        s->rgb = 1;
 
     /* if different size, realloc/alloc picture */
-    /* XXX: also check h_count and v_count */
-    if (width != s->width || height != s->height) {
+    if (   width != s->width || height != s->height
+        || memcmp(s->h_count, h_count, sizeof(h_count))
+        || memcmp(s->v_count, v_count, sizeof(v_count))) {
+
         s->width      = width;
         s->height     = height;
+        memcpy(s->h_count, h_count, sizeof(h_count));
+        memcpy(s->v_count, v_count, sizeof(v_count));
         s->interlaced = 0;
+        s->got_picture = 0;
 
         /* test interlaced mode */
         if (s->first_picture   &&
@@ -325,7 +365,16 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         s->first_picture = 0;
     }
 
-    if (!(s->interlaced && (s->bottom_field == !s->interlace_polarity))) {
+    if (s->got_picture && s->interlaced && (s->bottom_field == !s->interlace_polarity)) {
+        if (s->progressive) {
+            avpriv_request_sample(s->avctx, "progressively coded interlaced picture");
+            return AVERROR_INVALIDDATA;
+        }
+    } else{
+        if (s->v_max == 1 && s->h_max == 1 && s->lossless==1 && (nb_components==3 || nb_components==4))
+            s->rgb = 1;
+        else if (!s->lossless)
+            s->rgb = 0;
     /* XXX: not complete test ! */
     pix_fmt_id = (s->h_count[0] << 28) | (s->v_count[0] << 24) |
                  (s->h_count[1] << 20) | (s->v_count[1] << 16) |
@@ -339,38 +388,156 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
     if (!(pix_fmt_id & 0x0D0D0D0D))
         pix_fmt_id -= (pix_fmt_id & 0x0F0F0F0F) >> 1;
 
+    for (i = 0; i < 8; i++) {
+        int j = 6 + (i&1) - (i&6);
+        int is = (pix_fmt_id >> (4*i)) & 0xF;
+        int js = (pix_fmt_id >> (4*j)) & 0xF;
+
+        if (is == 1 && js != 2 && (i < 2 || i > 5))
+            js = (pix_fmt_id >> ( 8 + 4*(i&1))) & 0xF;
+        if (is == 1 && js != 2 && (i < 2 || i > 5))
+            js = (pix_fmt_id >> (16 + 4*(i&1))) & 0xF;
+
+        if (is == 1 && js == 2) {
+            if (i & 1) s->upscale_h |= 1 << (j/2);
+            else       s->upscale_v |= 1 << (j/2);
+        }
+    }
+
     switch (pix_fmt_id) {
     case 0x11111100:
         if (s->rgb)
-            s->avctx->pix_fmt = AV_PIX_FMT_BGRA;
+            s->avctx->pix_fmt = s->bits <= 9 ? AV_PIX_FMT_BGR24 : AV_PIX_FMT_BGR48;
+        else {
+            if (s->component_id[0] == 'Q' && s->component_id[1] == 'F' && s->component_id[2] == 'A') {
+                s->avctx->pix_fmt = s->bits <= 8 ? AV_PIX_FMT_GBRP : AV_PIX_FMT_GBRP16;
+            } else {
+                if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_YUVJ444P;
+                else              s->avctx->pix_fmt = AV_PIX_FMT_YUV444P16;
+            s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
+            }
+        }
+        av_assert0(s->nb_components == 3);
+        break;
+    case 0x11111111:
+        if (s->rgb)
+            s->avctx->pix_fmt = s->bits <= 9 ? AV_PIX_FMT_ABGR : AV_PIX_FMT_RGBA64;
         else {
-            s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_YUVJ444P;
+            if (s->adobe_transform == 0 && s->bits <= 8) {
+                s->avctx->pix_fmt = AV_PIX_FMT_GBRAP;
+            } else {
+                s->avctx->pix_fmt = s->bits <= 8 ? AV_PIX_FMT_YUVA444P : AV_PIX_FMT_YUVA444P16;
+                s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
+            }
+        }
+        av_assert0(s->nb_components == 4);
+        break;
+    case 0x22111122:
+        if (s->adobe_transform == 0 && s->bits <= 8) {
+            s->avctx->pix_fmt = AV_PIX_FMT_GBRAP;
+            s->upscale_v = 6;
+            s->upscale_h = 6;
+            s->chroma_height = s->height;
+        } else if (s->adobe_transform == 2 && s->bits <= 8) {
+            s->avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
+            s->upscale_v = 6;
+            s->upscale_h = 6;
+            s->chroma_height = s->height;
+            s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
+        } else {
+            if (s->bits <= 8) s->avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
+            else              s->avctx->pix_fmt = AV_PIX_FMT_YUVA420P16;
             s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
         }
-        assert(s->nb_components == 3);
+        av_assert0(s->nb_components == 4);
+        break;
+    case 0x12121100:
+    case 0x22122100:
+    case 0x21211100:
+    case 0x22211200:
+        if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_YUVJ444P;
+        else
+            goto unk_pixfmt;
+        s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
+        s->chroma_height = s->height;
+        break;
+    case 0x22221100:
+    case 0x22112200:
+    case 0x11222200:
+        if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_YUVJ444P;
+        else
+            goto unk_pixfmt;
+        s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
+        s->chroma_height = s->height / 2;
         break;
     case 0x11000000:
-        s->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+    case 0x13000000:
+    case 0x14000000:
+    case 0x31000000:
+    case 0x33000000:
+    case 0x34000000:
+    case 0x41000000:
+    case 0x43000000:
+    case 0x44000000:
+        if(s->bits <= 8)
+            s->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+        else
+            s->avctx->pix_fmt = AV_PIX_FMT_GRAY16;
         break;
     case 0x12111100:
-        s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV440P : AV_PIX_FMT_YUVJ440P;
+    case 0x14121200:
+    case 0x22211100:
+    case 0x22112100:
+        if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV440P : AV_PIX_FMT_YUVJ440P;
+        else
+            goto unk_pixfmt;
         s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
+        s->chroma_height = s->height / 2;
         break;
     case 0x21111100:
-        s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV422P : AV_PIX_FMT_YUVJ422P;
+        if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV422P : AV_PIX_FMT_YUVJ422P;
+        else              s->avctx->pix_fmt = AV_PIX_FMT_YUV422P16;
+        s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
+        break;
+    case 0x22121100:
+    case 0x22111200:
+        if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV422P : AV_PIX_FMT_YUVJ422P;
+        else
+            goto unk_pixfmt;
         s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
         break;
     case 0x22111100:
-        s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUVJ420P;
+    case 0x42111100:
+        if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUVJ420P;
+        else              s->avctx->pix_fmt = AV_PIX_FMT_YUV420P16;
+        s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
+        if (pix_fmt_id == 0x42111100) {
+            s->upscale_h = 6;
+            s->chroma_height = s->height / 2;
+        }
+        break;
+    case 0x41111100:
+        if (s->bits <= 8) s->avctx->pix_fmt = s->cs_itu601 ? AV_PIX_FMT_YUV411P : AV_PIX_FMT_YUVJ411P;
+        else
+            goto unk_pixfmt;
         s->avctx->color_range = s->cs_itu601 ? AVCOL_RANGE_MPEG : AVCOL_RANGE_JPEG;
         break;
     default:
+unk_pixfmt:
         av_log(s->avctx, AV_LOG_ERROR, "Unhandled pixel format 0x%x\n", pix_fmt_id);
+        s->upscale_h = s->upscale_v = 0;
+        return AVERROR_PATCHWELCOME;
+    }
+    if ((s->upscale_h || s->upscale_v) && s->avctx->lowres) {
+        av_log(s->avctx, AV_LOG_ERROR, "lowres not supported for weird subsampling\n");
         return AVERROR_PATCHWELCOME;
     }
     if (s->ls) {
+        s->upscale_h = s->upscale_v = 0;
         if (s->nb_components > 1)
             s->avctx->pix_fmt = AV_PIX_FMT_RGB24;
+        else if (s->palette_index && s->bits <= 8)
+            s->avctx->pix_fmt = AV_PIX_FMT_PAL8;
         else if (s->bits <= 8)
             s->avctx->pix_fmt = AV_PIX_FMT_GRAY8;
         else
@@ -384,15 +551,13 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
     }
 
     av_frame_unref(s->picture_ptr);
-    if (ff_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if (ff_get_buffer(s->avctx, s->picture_ptr, AV_GET_BUFFER_FLAG_REF) < 0)
         return -1;
-    }
     s->picture_ptr->pict_type = AV_PICTURE_TYPE_I;
     s->picture_ptr->key_frame = 1;
     s->got_picture            = 1;
 
-    for (i = 0; i < 3; i++)
+    for (i = 0; i < 4; i++)
         s->linesize[i] = s->picture_ptr->linesize[i] << s->interlaced;
 
     av_dlog(s->avctx, "%d %d %d %d %d %d\n",
@@ -403,6 +568,11 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
         av_log(s->avctx, AV_LOG_DEBUG, "decode_sof0: error, len(%d) mismatch\n", len);
     }
 
+    if (s->rgb && !s->lossless && !s->ls) {
+        av_log(s->avctx, AV_LOG_ERROR, "Unsupported coding and pixel format combination\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
     /* totally blank picture as progressive JPEG will only add details to it */
     if (s->progressive) {
         int bw = (width  + s->h_max * 8 - 1) / (s->h_max * 8);
@@ -411,8 +581,10 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
             int size = bw * bh * s->h_count[i] * s->v_count[i];
             av_freep(&s->blocks[i]);
             av_freep(&s->last_nnz[i]);
-            s->blocks[i]       = av_malloc(size * sizeof(**s->blocks));
-            s->last_nnz[i]     = av_mallocz(size * sizeof(**s->last_nnz));
+            s->blocks[i]       = av_mallocz_array(size, sizeof(**s->blocks));
+            s->last_nnz[i]     = av_mallocz_array(size, sizeof(**s->last_nnz));
+            if (!s->blocks[i] || !s->last_nnz[i])
+                return AVERROR(ENOMEM);
             s->block_stride[i] = bw * s->h_count[i];
         }
         memset(s->coefs_finished, 0, sizeof(s->coefs_finished));
@@ -424,11 +596,11 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
 {
     int code;
     code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2);
-    if (code < 0) {
+    if (code < 0 || code > 16) {
         av_log(s->avctx, AV_LOG_WARNING,
                "mjpeg_decode_dc: bad vlc: %d:%d (%p)\n",
                0, dc_index, &s->vlcs[0][dc_index]);
-        return 0xffff;
+        return 0xfffff;
     }
 
     if (code)
@@ -445,7 +617,7 @@ static int decode_block(MJpegDecodeContext *s, int16_t *block, int component,
 
     /* DC coef */
     val = mjpeg_decode_dc(s, dc_index);
-    if (val == 0xffff) {
+    if (val == 0xfffff) {
         av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
         return AVERROR_INVALIDDATA;
     }
@@ -493,7 +665,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, int16_t *block,
     int val;
     s->bdsp.clear_block(block);
     val = mjpeg_decode_dc(s, dc_index);
-    if (val == 0xffff) {
+    if (val == 0xfffff) {
         av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
         return AVERROR_INVALIDDATA;
     }
@@ -667,46 +839,101 @@ static int decode_block_refinement(MJpegDecodeContext *s, int16_t *block,
 #undef REFINE_BIT
 #undef ZERO_RUN
 
-static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor,
-                                 int point_transform)
+static int handle_rstn(MJpegDecodeContext *s, int nb_components)
+{
+    int i;
+    int reset = 0;
+
+    if (s->restart_interval) {
+        s->restart_count--;
+        if(s->restart_count == 0 && s->avctx->codec_id == AV_CODEC_ID_THP){
+            align_get_bits(&s->gb);
+            for (i = 0; i < nb_components; i++) /* reset dc */
+                s->last_dc[i] = (4 << s->bits);
+        }
+
+        i = 8 + ((-get_bits_count(&s->gb)) & 7);
+        /* skip RSTn */
+        if (s->restart_count == 0) {
+            if(   show_bits(&s->gb, i) == (1 << i) - 1
+               || show_bits(&s->gb, i) == 0xFF) {
+                int pos = get_bits_count(&s->gb);
+                align_get_bits(&s->gb);
+                while (get_bits_left(&s->gb) >= 8 && show_bits(&s->gb, 8) == 0xFF)
+                    skip_bits(&s->gb, 8);
+                if (get_bits_left(&s->gb) >= 8 && (get_bits(&s->gb, 8) & 0xF8) == 0xD0) {
+                    for (i = 0; i < nb_components; i++) /* reset dc */
+                        s->last_dc[i] = (4 << s->bits);
+                    reset = 1;
+                } else
+                    skip_bits_long(&s->gb, pos - get_bits_count(&s->gb));
+            }
+        }
+    }
+    return reset;
+}
+
+static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int nb_components, int predictor, int point_transform)
 {
     int i, mb_x, mb_y;
     uint16_t (*buffer)[4];
-    int left[3], top[3], topleft[3];
+    int left[4], top[4], topleft[4];
     const int linesize = s->linesize[0];
-    const int mask     = (1 << s->bits) - 1;
+    const int mask     = ((1 << s->bits) - 1) << point_transform;
+    int resync_mb_y = 0;
+    int resync_mb_x = 0;
+
+    if (s->nb_components != 3 && s->nb_components != 4)
+        return AVERROR_INVALIDDATA;
+    if (s->v_max != 1 || s->h_max != 1 || !s->lossless)
+        return AVERROR_INVALIDDATA;
+
+
+    s->restart_count = s->restart_interval;
 
     av_fast_malloc(&s->ljpeg_buffer, &s->ljpeg_buffer_size,
                    (unsigned)s->mb_width * 4 * sizeof(s->ljpeg_buffer[0][0]));
     buffer = s->ljpeg_buffer;
 
-    for (i = 0; i < 3; i++)
-        buffer[0][i] = 1 << (s->bits + point_transform - 1);
+    for (i = 0; i < 4; i++)
+        buffer[0][i] = 1 << (s->bits - 1);
 
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
-        const int modified_predictor = mb_y ? predictor : 1;
         uint8_t *ptr = s->picture_ptr->data[0] + (linesize * mb_y);
 
         if (s->interlaced && s->bottom_field)
             ptr += linesize >> 1;
 
-        for (i = 0; i < 3; i++)
+        for (i = 0; i < 4; i++)
             top[i] = left[i] = topleft[i] = buffer[0][i];
 
         for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
-            if (s->restart_interval && !s->restart_count)
+            int modified_predictor = predictor;
+
+            if (s->restart_interval && !s->restart_count){
                 s->restart_count = s->restart_interval;
+                resync_mb_x = mb_x;
+                resync_mb_y = mb_y;
+                for(i=0; i<4; i++)
+                    top[i] = left[i]= topleft[i]= 1 << (s->bits - 1);
+            }
+            if (mb_y == resync_mb_y || mb_y == resync_mb_y+1 && mb_x < resync_mb_x || !mb_x)
+                modified_predictor = 1;
 
-            for (i = 0; i < 3; i++) {
-                int pred;
+            for (i=0;i<nb_components;i++) {
+                int pred, dc;
 
                 topleft[i] = top[i];
                 top[i]     = buffer[mb_x][i];
 
                 PREDICT(pred, topleft[i], top[i], left[i], modified_predictor);
 
+                dc = mjpeg_decode_dc(s, s->dc_index[i]);
+                if(dc == 0xFFFFF)
+                    return -1;
+
                 left[i] = buffer[mb_x][i] =
-                    mask & (pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform));
+                    mask & (pred + (dc << point_transform));
             }
 
             if (s->restart_interval && !--s->restart_count) {
@@ -714,24 +941,47 @@ static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor,
                 skip_bits(&s->gb, 16); /* skip RSTn */
             }
         }
-
-        if (s->rct) {
+        if (s->nb_components == 4) {
+            for(i=0; i<nb_components; i++) {
+                int c= s->comp_index[i];
+                if (s->bits <= 8) {
+                    for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+                        ptr[4*mb_x+3-c] = buffer[mb_x][i];
+                    }
+                } else if(s->bits == 9) {
+                    return AVERROR_PATCHWELCOME;
+                } else {
+                    for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+                        ((uint16_t*)ptr)[4*mb_x+c] = buffer[mb_x][i];
+                    }
+                }
+            }
+        } else if (s->rct) {
             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
-                ptr[4 * mb_x + 1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2] - 0x200) >> 2);
-                ptr[4 * mb_x + 0] = buffer[mb_x][1] + ptr[4 * mb_x + 1];
-                ptr[4 * mb_x + 2] = buffer[mb_x][2] + ptr[4 * mb_x + 1];
+                ptr[3*mb_x + 1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2] - 0x200) >> 2);
+                ptr[3*mb_x + 0] = buffer[mb_x][1] + ptr[3*mb_x + 1];
+                ptr[3*mb_x + 2] = buffer[mb_x][2] + ptr[3*mb_x + 1];
             }
         } else if (s->pegasus_rct) {
             for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
-                ptr[4 * mb_x + 1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2]) >> 2);
-                ptr[4 * mb_x + 0] = buffer[mb_x][1] + ptr[4 * mb_x + 1];
-                ptr[4 * mb_x + 2] = buffer[mb_x][2] + ptr[4 * mb_x + 1];
+                ptr[3*mb_x + 1] = buffer[mb_x][0] - ((buffer[mb_x][1] + buffer[mb_x][2]) >> 2);
+                ptr[3*mb_x + 0] = buffer[mb_x][1] + ptr[3*mb_x + 1];
+                ptr[3*mb_x + 2] = buffer[mb_x][2] + ptr[3*mb_x + 1];
             }
         } else {
-            for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
-                ptr[4 * mb_x + 0] = buffer[mb_x][2];
-                ptr[4 * mb_x + 1] = buffer[mb_x][1];
-                ptr[4 * mb_x + 2] = buffer[mb_x][0];
+            for(i=0; i<nb_components; i++) {
+                int c= s->comp_index[i];
+                if (s->bits <= 8) {
+                    for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+                        ptr[3*mb_x+2-c] = buffer[mb_x][i];
+                    }
+                } else if(s->bits == 9) {
+                    return AVERROR_PATCHWELCOME;
+                } else {
+                    for(mb_x = 0; mb_x < s->mb_width; mb_x++) {
+                        ((uint16_t*)ptr)[3*mb_x+2-c] = buffer[mb_x][i];
+                    }
+                }
             }
         }
     }
@@ -741,48 +991,88 @@ static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor,
 static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor,
                                  int point_transform, int nb_components)
 {
-    int i, mb_x, mb_y;
+    int i, mb_x, mb_y, mask;
+    int bits= (s->bits+7)&~7;
+    int resync_mb_y = 0;
+    int resync_mb_x = 0;
+
+    point_transform += bits - s->bits;
+    mask = ((1 << s->bits) - 1) << point_transform;
+
+    av_assert0(nb_components>=1 && nb_components<=4);
 
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
         for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
-            if (s->restart_interval && !s->restart_count)
+            if (s->restart_interval && !s->restart_count){
                 s->restart_count = s->restart_interval;
+                resync_mb_x = mb_x;
+                resync_mb_y = mb_y;
+            }
 
-            if (mb_x == 0 || mb_y == 0 || s->interlaced) {
+            if(!mb_x || mb_y == resync_mb_y || mb_y == resync_mb_y+1 && mb_x < resync_mb_x || s->interlaced){
+                int toprow  = mb_y == resync_mb_y || mb_y == resync_mb_y+1 && mb_x < resync_mb_x;
+                int leftcol = !mb_x || mb_y == resync_mb_y && mb_x == resync_mb_x;
                 for (i = 0; i < nb_components; i++) {
                     uint8_t *ptr;
+                    uint16_t *ptr16;
                     int n, h, v, x, y, c, j, linesize;
-                    n        = s->nb_blocks[i];
-                    c        = s->comp_index[i];
-                    h        = s->h_scount[i];
-                    v        = s->v_scount[i];
-                    x        = 0;
-                    y        = 0;
-                    linesize = s->linesize[c];
-
-                    for (j = 0; j < n; j++) {
-                        int pred;
-                        // FIXME optimize this crap
-                        ptr = s->picture_ptr->data[c] +
-                              (linesize * (v * mb_y + y)) +
-                              (h * mb_x + x);
-                        if (y == 0 && mb_y == 0) {
-                            if (x == 0 && mb_x == 0)
-                                pred = 128 << point_transform;
-                            else
-                                pred = ptr[-1];
-                        } else {
-                            if (x == 0 && mb_x == 0)
-                                pred = ptr[-linesize];
-                            else
-                                PREDICT(pred, ptr[-linesize - 1],
-                                        ptr[-linesize], ptr[-1], predictor);
-                       }
+                    n = s->nb_blocks[i];
+                    c = s->comp_index[i];
+                    h = s->h_scount[i];
+                    v = s->v_scount[i];
+                    x = 0;
+                    y = 0;
+                    linesize= s->linesize[c];
+
+                    if(bits>8) linesize /= 2;
+
+                    for(j=0; j<n; j++) {
+                        int pred, dc;
+
+                        dc = mjpeg_decode_dc(s, s->dc_index[i]);
+                        if(dc == 0xFFFFF)
+                            return -1;
+                        if(bits<=8){
+                        ptr = s->picture_ptr->data[c] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
+                        if(y==0 && toprow){
+                            if(x==0 && leftcol){
+                                pred= 1 << (bits - 1);
+                            }else{
+                                pred= ptr[-1];
+                            }
+                        }else{
+                            if(x==0 && leftcol){
+                                pred= ptr[-linesize];
+                            }else{
+                                PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
+                            }
+                        }
 
                         if (s->interlaced && s->bottom_field)
                             ptr += linesize >> 1;
-                        *ptr = pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform);
+                        pred &= mask;
+                        *ptr= pred + (dc << point_transform);
+                        }else{
+                            ptr16 = (uint16_t*)(s->picture_ptr->data[c] + 2*(linesize * (v * mb_y + y)) + 2*(h * mb_x + x)); //FIXME optimize this crap
+                            if(y==0 && toprow){
+                                if(x==0 && leftcol){
+                                    pred= 1 << (bits - 1);
+                                }else{
+                                    pred= ptr16[-1];
+                                }
+                            }else{
+                                if(x==0 && leftcol){
+                                    pred= ptr16[-linesize];
+                                }else{
+                                    PREDICT(pred, ptr16[-linesize-1], ptr16[-linesize], ptr16[-1], predictor);
+                                }
+                            }
 
+                            if (s->interlaced && s->bottom_field)
+                                ptr16 += linesize >> 1;
+                            pred &= mask;
+                            *ptr16= pred + (dc << point_transform);
+                        }
                         if (++x == h) {
                             x = 0;
                             y++;
@@ -792,7 +1082,8 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor,
             } else {
                 for (i = 0; i < nb_components; i++) {
                     uint8_t *ptr;
-                    int n, h, v, x, y, c, j, linesize;
+                    uint16_t *ptr16;
+                    int n, h, v, x, y, c, j, linesize, dc;
                     n        = s->nb_blocks[i];
                     c        = s->comp_index[i];
                     h        = s->h_scount[i];
@@ -801,16 +1092,30 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor,
                     y        = 0;
                     linesize = s->linesize[c];
 
+                    if(bits>8) linesize /= 2;
+
                     for (j = 0; j < n; j++) {
                         int pred;
 
-                        // FIXME optimize this crap
-                        ptr = s->picture_ptr->data[c] +
+                        dc = mjpeg_decode_dc(s, s->dc_index[i]);
+                        if(dc == 0xFFFFF)
+                            return -1;
+                        if(bits<=8){
+                            ptr = s->picture_ptr->data[c] +
                               (linesize * (v * mb_y + y)) +
-                              (h * mb_x + x);
-                        PREDICT(pred, ptr[-linesize - 1],
-                                ptr[-linesize], ptr[-1], predictor);
-                        *ptr = pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform);
+                              (h * mb_x + x); //FIXME optimize this crap
+                            PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
+
+                            pred &= mask;
+                            *ptr = pred + (dc << point_transform);
+                        }else{
+                            ptr16 = (uint16_t*)(s->picture_ptr->data[c] + 2*(linesize * (v * mb_y + y)) + 2*(h * mb_x + x)); //FIXME optimize this crap
+                            PREDICT(pred, ptr16[-linesize-1], ptr16[-linesize], ptr16[-1], predictor);
+
+                            pred &= mask;
+                            *ptr16= pred + (dc << point_transform);
+                        }
+
                         if (++x == h) {
                             x = 0;
                             y++;
@@ -827,18 +1132,58 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor,
     return 0;
 }
 
+static av_always_inline void mjpeg_copy_block(MJpegDecodeContext *s,
+                                              uint8_t *dst, const uint8_t *src,
+                                              int linesize, int lowres)
+{
+    switch (lowres) {
+    case 0: s->hdsp.put_pixels_tab[1][0](dst, src, linesize, 8);
+        break;
+    case 1: copy_block4(dst, src, linesize, linesize, 4);
+        break;
+    case 2: copy_block2(dst, src, linesize, linesize, 2);
+        break;
+    case 3: *dst = *src;
+        break;
+    }
+}
+
+static void shift_output(MJpegDecodeContext *s, uint8_t *ptr, int linesize)
+{
+    int block_x, block_y;
+    int size = 8 >> s->avctx->lowres;
+    if (s->bits > 8) {
+        for (block_y=0; block_y<size; block_y++)
+            for (block_x=0; block_x<size; block_x++)
+                *(uint16_t*)(ptr + 2*block_x + block_y*linesize) <<= 16 - s->bits;
+    } else {
+        for (block_y=0; block_y<size; block_y++)
+            for (block_x=0; block_x<size; block_x++)
+                *(ptr + block_x + block_y*linesize) <<= 8 - s->bits;
+    }
+}
+
 static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
                              int Al, const uint8_t *mb_bitmask,
+                             int mb_bitmask_size,
                              const AVFrame *reference)
 {
     int i, mb_x, mb_y;
     uint8_t *data[MAX_COMPONENTS];
     const uint8_t *reference_data[MAX_COMPONENTS];
     int linesize[MAX_COMPONENTS];
-    GetBitContext mb_bitmask_gb;
+    GetBitContext mb_bitmask_gb = {0}; // initialize to silence gcc warning
+    int bytes_per_pixel = 1 + (s->bits > 8);
 
-    if (mb_bitmask)
+    if (mb_bitmask) {
+        if (mb_bitmask_size != (s->mb_width * s->mb_height + 7)>>3) {
+            av_log(s->avctx, AV_LOG_ERROR, "mb_bitmask_size mismatches\n");
+            return AVERROR_INVALIDDATA;
+        }
         init_get_bits(&mb_bitmask_gb, mb_bitmask, s->mb_width * s->mb_height);
+    }
+
+    s->restart_count = 0;
 
     for (i = 0; i < nb_components; i++) {
         int c   = s->comp_index[i];
@@ -871,27 +1216,29 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
                 x = 0;
                 y = 0;
                 for (j = 0; j < n; j++) {
-                    block_offset = ((linesize[c] * (v * mb_y + y) * 8) +
-                                    (h * mb_x + x) * 8);
+                    block_offset = (((linesize[c] * (v * mb_y + y) * 8) +
+                                     (h * mb_x + x) * 8 * bytes_per_pixel) >> s->avctx->lowres);
 
                     if (s->interlaced && s->bottom_field)
                         block_offset += linesize[c] >> 1;
                     ptr = data[c] + block_offset;
                     if (!s->progressive) {
                         if (copy_mb)
-                            s->hdsp.put_pixels_tab[1][0](ptr,
-                                reference_data[c] + block_offset,
-                                linesize[c], 8);
+                            mjpeg_copy_block(s, ptr, reference_data[c] + block_offset,
+                                             linesize[c], s->avctx->lowres);
+
                         else {
                             s->bdsp.clear_block(s->block);
                             if (decode_block(s, s->block, i,
                                              s->dc_index[i], s->ac_index[i],
-                                             s->quant_matrixes[s->quant_index[c]]) < 0) {
+                                             s->quant_matrixes[s->quant_sindex[i]]) < 0) {
                                 av_log(s->avctx, AV_LOG_ERROR,
                                        "error y=%d x=%d\n", mb_y, mb_x);
                                 return AVERROR_INVALIDDATA;
                             }
                             s->idsp.idct_put(ptr, linesize[c], s->block);
+                            if (s->bits & 7)
+                                shift_output(s, ptr, linesize[c]);
                         }
                     } else {
                         int block_idx  = s->block_stride[c] * (v * mb_y + y) +
@@ -899,9 +1246,9 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
                         int16_t *block = s->blocks[c][block_idx];
                         if (Ah)
                             block[0] += get_bits1(&s->gb) *
-                                        s->quant_matrixes[s->quant_index[c]][0] << Al;
+                                        s->quant_matrixes[s->quant_sindex[i]][0] << Al;
                         else if (decode_dc_progressive(s, block, i, s->dc_index[i],
-                                                       s->quant_matrixes[s->quant_index[c]],
+                                                       s->quant_matrixes[s->quant_sindex[i]],
                                                        Al) < 0) {
                             av_log(s->avctx, AV_LOG_ERROR,
                                    "error y=%d x=%d\n", mb_y, mb_x);
@@ -919,72 +1266,50 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
                 }
             }
 
-            if (s->restart_interval) {
-                s->restart_count--;
-                i = 8 + ((-get_bits_count(&s->gb)) & 7);
-                /* skip RSTn */
-                if (show_bits(&s->gb, i) == (1 << i) - 1) {
-                    int pos = get_bits_count(&s->gb);
-                    align_get_bits(&s->gb);
-                    while (get_bits_left(&s->gb) >= 8 && show_bits(&s->gb, 8) == 0xFF)
-                        skip_bits(&s->gb, 8);
-                    if ((get_bits(&s->gb, 8) & 0xF8) == 0xD0) {
-                        for (i = 0; i < nb_components; i++) /* reset dc */
-                            s->last_dc[i] = 1024;
-                    } else
-                        skip_bits_long(&s->gb, pos - get_bits_count(&s->gb));
-                }
-            }
+            handle_rstn(s, nb_components);
         }
     }
     return 0;
 }
 
 static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss,
-                                            int se, int Ah, int Al,
-                                            const uint8_t *mb_bitmask,
-                                            const AVFrame *reference)
+                                            int se, int Ah, int Al)
 {
     int mb_x, mb_y;
     int EOBRUN = 0;
     int c = s->comp_index[0];
     uint8_t *data = s->picture_ptr->data[c];
-    const uint8_t *reference_data = reference ? reference->data[c] : NULL;
     int linesize  = s->linesize[c];
     int last_scan = 0;
-    int16_t *quant_matrix = s->quant_matrixes[s->quant_index[c]];
-    GetBitContext mb_bitmask_gb;
+    int16_t *quant_matrix = s->quant_matrixes[s->quant_sindex[0]];
+    int bytes_per_pixel = 1 + (s->bits > 8);
 
-    if (ss < 0  || ss >= 64 ||
-        se < ss || se >= 64 ||
-        Ah < 0  || Al < 0)
+    av_assert0(ss>=0 && Ah>=0 && Al>=0);
+    if (se < ss || se > 63) {
+        av_log(s->avctx, AV_LOG_ERROR, "SS/SE %d/%d is invalid\n", ss, se);
         return AVERROR_INVALIDDATA;
-
-    if (mb_bitmask)
-        init_get_bits(&mb_bitmask_gb, mb_bitmask, s->mb_width * s->mb_height);
+    }
 
     if (!Al) {
-        s->coefs_finished[c] |= (1LL << (se + 1)) - (1LL << ss);
+        s->coefs_finished[c] |= (2ULL << se) - (1ULL << ss);
         last_scan = !~s->coefs_finished[c];
     }
 
-    if (s->interlaced && s->bottom_field) {
-        int offset      = linesize >> 1;
-        data           += offset;
-        reference_data += offset;
-    }
+    if (s->interlaced && s->bottom_field)
+        data += linesize >> 1;
+
+    s->restart_count = 0;
 
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
-        int block_offset = mb_y * linesize * 8;
-        uint8_t *ptr     = data + block_offset;
+        uint8_t *ptr     = data + (mb_y * linesize * 8 >> s->avctx->lowres);
         int block_idx    = mb_y * s->block_stride[c];
         int16_t (*block)[64] = &s->blocks[c][block_idx];
         uint8_t *last_nnz    = &s->last_nnz[c][block_idx];
         for (mb_x = 0; mb_x < s->mb_width; mb_x++, block++, last_nnz++) {
-            const int copy_mb = mb_bitmask && !get_bits1(&mb_bitmask_gb);
-
-            if (!copy_mb) {
                 int ret;
+                if (s->restart_interval && !s->restart_count)
+                    s->restart_count = s->restart_interval;
+
                 if (Ah)
                     ret = decode_block_refinement(s, *block, last_nnz, s->ac_index[0],
                                                   quant_matrix, ss, se, Al, &EOBRUN);
@@ -996,31 +1321,35 @@ static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss,
                            "error y=%d x=%d\n", mb_y, mb_x);
                     return AVERROR_INVALIDDATA;
                 }
-            }
 
             if (last_scan) {
-                if (copy_mb) {
-                    s->hdsp.put_pixels_tab[1][0](ptr,
-                                                 reference_data + block_offset,
-                                                 linesize, 8);
-                } else {
                     s->idsp.idct_put(ptr, linesize, *block);
-                    ptr += 8;
-                }
+                    if (s->bits & 7)
+                        shift_output(s, ptr, linesize);
+                    ptr += bytes_per_pixel*8 >> s->avctx->lowres;
             }
+            if (handle_rstn(s, 0))
+                EOBRUN = 0;
         }
     }
     return 0;
 }
 
 int ff_mjpeg_decode_sos(MJpegDecodeContext *s, const uint8_t *mb_bitmask,
-                        const AVFrame *reference)
+                        int mb_bitmask_size, const AVFrame *reference)
 {
     int len, nb_components, i, h, v, predictor, point_transform;
     int index, id, ret;
     const int block_size = s->lossless ? 1 : 8;
     int ilv, prev_shift;
 
+    if (!s->got_picture) {
+        av_log(s->avctx, AV_LOG_WARNING,
+                "Can not process SOS before SOF, skipping\n");
+        return -1;
+    }
+
+    av_assert0(s->picture_ptr->data[0]);
     /* XXX: verify len field validity */
     len = get_bits(&s->gb, 16);
     nb_components = get_bits(&s->gb, 8);
@@ -1050,27 +1379,35 @@ int ff_mjpeg_decode_sos(MJpegDecodeContext *s, const uint8_t *mb_bitmask,
             && nb_components == 3 && s->nb_components == 3 && i)
             index = 3 - i;
 
-        s->comp_index[i] = index;
-
+        s->quant_sindex[i] = s->quant_index[index];
         s->nb_blocks[i] = s->h_count[index] * s->v_count[index];
         s->h_scount[i]  = s->h_count[index];
         s->v_scount[i]  = s->v_count[index];
 
+        if(nb_components == 3 && s->nb_components == 3 && s->avctx->pix_fmt == AV_PIX_FMT_GBR24P)
+            index = (i+2)%3;
+        if(nb_components == 1 && s->nb_components == 3 && s->avctx->pix_fmt == AV_PIX_FMT_GBR24P)
+            index = (index+2)%3;
+
+        s->comp_index[i] = index;
+
         s->dc_index[i] = get_bits(&s->gb, 4);
         s->ac_index[i] = get_bits(&s->gb, 4);
 
         if (s->dc_index[i] <  0 || s->ac_index[i] < 0 ||
             s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
             goto out_of_range;
-        if (!s->vlcs[0][s->dc_index[i]].table ||
-            !s->vlcs[1][s->ac_index[i]].table)
+        if (!s->vlcs[0][s->dc_index[i]].table || !(s->progressive ? s->vlcs[2][s->ac_index[0]].table : s->vlcs[1][s->ac_index[i]].table))
             goto out_of_range;
     }
 
     predictor = get_bits(&s->gb, 8);       /* JPEG Ss / lossless JPEG predictor /JPEG-LS NEAR */
     ilv = get_bits(&s->gb, 8);             /* JPEG Se / JPEG-LS ILV */
-    prev_shift      = get_bits(&s->gb, 4); /* Ah */
-    point_transform = get_bits(&s->gb, 4); /* Al */
+    if(s->avctx->codec_tag != AV_RL32("CJPG")){
+        prev_shift      = get_bits(&s->gb, 4); /* Ah */
+        point_transform = get_bits(&s->gb, 4); /* Al */
+    }else
+        prev_shift = point_transform = 0;
 
     if (nb_components > 1) {
         /* interleaved stream */
@@ -1087,10 +1424,10 @@ int ff_mjpeg_decode_sos(MJpegDecodeContext *s, const uint8_t *mb_bitmask,
     }
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
-        av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d ilv:%d bits:%d %s\n",
+        av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d ilv:%d bits:%d skip:%d %s comp:%d\n",
                s->lossless ? "lossless" : "sequential DCT", s->rgb ? "RGB" : "",
-               predictor, point_transform, ilv, s->bits,
-               s->pegasus_rct ? "PRCT" : (s->rct ? "RCT" : ""));
+               predictor, point_transform, ilv, s->bits, s->mjpb_skiptosod,
+               s->pegasus_rct ? "PRCT" : (s->rct ? "RCT" : ""), nb_components);
 
 
     /* mjpeg-b can have padding bytes between sos and image data, skip them */
@@ -1099,9 +1436,10 @@ int ff_mjpeg_decode_sos(MJpegDecodeContext *s, const uint8_t *mb_bitmask,
 
 next_field:
     for (i = 0; i < nb_components; i++)
-        s->last_dc[i] = 1024;
+        s->last_dc[i] = (4 << s->bits);
 
     if (s->lossless) {
+        av_assert0(s->picture_ptr == s->picture);
         if (CONFIG_JPEGLS_DECODER && s->ls) {
 //            for () {
 //            reset_ls_coding_parameters(s, 0);
@@ -1111,8 +1449,7 @@ next_field:
                 return ret;
         } else {
             if (s->rgb) {
-                if ((ret = ljpeg_decode_rgb_scan(s, predictor,
-                                                 point_transform)) < 0)
+                if ((ret = ljpeg_decode_rgb_scan(s, nb_components, predictor, point_transform)) < 0)
                     return ret;
             } else {
                 if ((ret = ljpeg_decode_yuv_scan(s, predictor,
@@ -1123,16 +1460,15 @@ next_field:
         }
     } else {
         if (s->progressive && predictor) {
+            av_assert0(s->picture_ptr == s->picture);
             if ((ret = mjpeg_decode_scan_progressive_ac(s, predictor,
                                                         ilv, prev_shift,
-                                                        point_transform,
-                                                        mb_bitmask,
-                                                        reference)) < 0)
+                                                        point_transform)) < 0)
                 return ret;
         } else {
             if ((ret = mjpeg_decode_scan(s, nb_components,
                                          prev_shift, point_transform,
-                                         mb_bitmask, reference)) < 0)
+                                         mb_bitmask, mb_bitmask_size, reference)) < 0)
                 return ret;
         }
     }
@@ -1143,7 +1479,7 @@ next_field:
         GetBitContext bak = s->gb;
         align_get_bits(&bak);
         if (show_bits(&bak, 16) == 0xFFD1) {
-            av_dlog(s->avctx, "AVRn interlaced picture marker found\n");
+            av_log(s->avctx, AV_LOG_DEBUG, "AVRn interlaced picture marker found\n");
             s->gb = bak;
             skip_bits(&s->gb, 16);
             s->bottom_field ^= 1;
@@ -1176,22 +1512,24 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
     int len, id, i;
 
     len = get_bits(&s->gb, 16);
-    if (len < 5)
+    if (len < 6)
         return AVERROR_INVALIDDATA;
     if (8 * len > get_bits_left(&s->gb))
         return AVERROR_INVALIDDATA;
 
     id   = get_bits_long(&s->gb, 32);
-    id   = av_be2ne32(id);
     len -= 6;
 
-    if (s->avctx->debug & FF_DEBUG_STARTCODE)
-        av_log(s->avctx, AV_LOG_DEBUG, "APPx %8X\n", id);
+    if (s->avctx->debug & FF_DEBUG_STARTCODE) {
+        char id_str[32];
+        av_get_codec_tag_string(id_str, sizeof(id_str), av_bswap32(id));
+        av_log(s->avctx, AV_LOG_DEBUG, "APPx (%s / %8X) len=%d\n", id_str, id, len);
+    }
 
     /* Buggy AVID, it puts EOI only at every 10th frame. */
     /* Also, this fourcc is used by non-avid files too, it holds some
        information, but it's always present in AVID-created files. */
-    if (id == AV_RL32("AVI1")) {
+    if (id == AV_RB32("AVI1")) {
         /* structure:
             4bytes      AVI1
             1bytes      polarity
@@ -1199,12 +1537,9 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
             4bytes      field_size
             4bytes      field_size_less_padding
         */
-        s->buggy_avid = 1;
-        i = get_bits(&s->gb, 8);
-        if (i == 2)
-            s->bottom_field = 1;
-        else if (i == 1)
-            s->bottom_field = 0;
+            s->buggy_avid = 1;
+        i = get_bits(&s->gb, 8); len--;
+        av_log(s->avctx, AV_LOG_DEBUG, "polarity %d\n", i);
 #if 0
         skip_bits(&s->gb, 8);
         skip_bits(&s->gb, 32);
@@ -1216,7 +1551,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
 
 //    len -= 2;
 
-    if (id == AV_RL32("JFIF")) {
+    if (id == AV_RB32("JFIF")) {
         int t_w, t_h, v1, v2;
         skip_bits(&s->gb, 8); /* the trailing zero-byte */
         v1 = get_bits(&s->gb, 8);
@@ -1245,26 +1580,26 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
         goto out;
     }
 
-    if (id == AV_RL32("Adob") && (get_bits(&s->gb, 8) == 'e')) {
-        if (s->avctx->debug & FF_DEBUG_PICT_INFO)
-            av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n");
+    if (id == AV_RB32("Adob") && (get_bits(&s->gb, 8) == 'e')) {
         skip_bits(&s->gb, 16); /* version */
         skip_bits(&s->gb, 16); /* flags0 */
         skip_bits(&s->gb, 16); /* flags1 */
-        skip_bits(&s->gb,  8); /* transform */
+        s->adobe_transform = get_bits(&s->gb,  8);
+        if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found, transform=%d\n", s->adobe_transform);
         len -= 7;
         goto out;
     }
 
-    if (id == AV_RL32("LJIF")) {
+    if (id == AV_RB32("LJIF")) {
         if (s->avctx->debug & FF_DEBUG_PICT_INFO)
             av_log(s->avctx, AV_LOG_INFO,
                    "Pegasus lossless jpeg header found\n");
         skip_bits(&s->gb, 16); /* version ? */
-        skip_bits(&s->gb, 16); /* unknwon always 0? */
-        skip_bits(&s->gb, 16); /* unknwon always 0? */
-        skip_bits(&s->gb, 16); /* unknwon always 0? */
-        switch (get_bits(&s->gb, 8)) {
+        skip_bits(&s->gb, 16); /* unknown always 0? */
+        skip_bits(&s->gb, 16); /* unknown always 0? */
+        skip_bits(&s->gb, 16); /* unknown always 0? */
+        switch (i=get_bits(&s->gb, 8)) {
         case 1:
             s->rgb         = 1;
             s->pegasus_rct = 0;
@@ -1274,19 +1609,106 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
             s->pegasus_rct = 1;
             break;
         default:
-            av_log(s->avctx, AV_LOG_ERROR, "unknown colorspace\n");
+            av_log(s->avctx, AV_LOG_ERROR, "unknown colorspace %d\n", i);
         }
         len -= 9;
         goto out;
     }
+    if (id == AV_RL32("colr") && len > 0) {
+        s->colr = get_bits(&s->gb, 8);
+        if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_INFO, "COLR %d\n", s->colr);
+        len --;
+        goto out;
+    }
+    if (id == AV_RL32("xfrm") && len > 0) {
+        s->xfrm = get_bits(&s->gb, 8);
+        if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_INFO, "XFRM %d\n", s->xfrm);
+        len --;
+        goto out;
+    }
+
+    /* JPS extension by VRex */
+    if (s->start_code == APP3 && id == AV_RB32("_JPS") && len >= 10) {
+        int flags, layout, type;
+        if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+            av_log(s->avctx, AV_LOG_INFO, "_JPSJPS_\n");
+
+        skip_bits(&s->gb, 32); len -= 4;  /* JPS_ */
+        skip_bits(&s->gb, 16); len -= 2;  /* block length */
+        skip_bits(&s->gb, 8);             /* reserved */
+        flags  = get_bits(&s->gb, 8);
+        layout = get_bits(&s->gb, 8);
+        type   = get_bits(&s->gb, 8);
+        len -= 4;
+
+        s->stereo3d = av_stereo3d_alloc();
+        if (!s->stereo3d) {
+            goto out;
+        }
+        if (type == 0) {
+            s->stereo3d->type = AV_STEREO3D_2D;
+        } else if (type == 1) {
+            switch (layout) {
+            case 0x01:
+                s->stereo3d->type = AV_STEREO3D_LINES;
+                break;
+            case 0x02:
+                s->stereo3d->type = AV_STEREO3D_SIDEBYSIDE;
+                break;
+            case 0x03:
+                s->stereo3d->type = AV_STEREO3D_TOPBOTTOM;
+                break;
+            }
+            if (!(flags & 0x04)) {
+                s->stereo3d->flags = AV_STEREO3D_FLAG_INVERT;
+            }
+        }
+        goto out;
+    }
+
+    /* EXIF metadata */
+    if (s->start_code == APP1 && id == AV_RB32("Exif") && len >= 2) {
+        GetByteContext gbytes;
+        int ret, le, ifd_offset, bytes_read;
+        const uint8_t *aligned;
+
+        skip_bits(&s->gb, 16); // skip padding
+        len -= 2;
+
+        // init byte wise reading
+        aligned = align_get_bits(&s->gb);
+        bytestream2_init(&gbytes, aligned, len);
+
+        // read TIFF header
+        ret = ff_tdecode_header(&gbytes, &le, &ifd_offset);
+        if (ret) {
+            av_log(s->avctx, AV_LOG_ERROR, "mjpeg: invalid TIFF header in EXIF data\n");
+        } else {
+            bytestream2_seek(&gbytes, ifd_offset, SEEK_SET);
+
+            // read 0th IFD and store the metadata
+            // (return values > 0 indicate the presence of subimage metadata)
+            ret = avpriv_exif_decode_ifd(s->avctx, &gbytes, le, 0, &s->exif_metadata);
+            if (ret < 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error decoding EXIF data\n");
+            }
+        }
+
+        bytes_read = bytestream2_tell(&gbytes);
+        skip_bits(&s->gb, bytes_read << 3);
+        len -= bytes_read;
+
+        goto out;
+    }
 
     /* Apple MJPEG-A */
     if ((s->start_code == APP1) && (len > (0x28 - 8))) {
         id   = get_bits_long(&s->gb, 32);
-        id   = av_be2ne32(id);
         len -= 4;
         /* Apple MJPEG-A */
-        if (id == AV_RL32("mjpg")) {
+        if (id == AV_RB32("mjpg")) {
 #if 0
             skip_bits(&s->gb, 32); /* field size */
             skip_bits(&s->gb, 32); /* pad field size */
@@ -1328,15 +1750,15 @@ static int mjpeg_decode_com(MJpegDecodeContext *s)
                 cbuf[i] = 0;
 
             if (s->avctx->debug & FF_DEBUG_PICT_INFO)
-                av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf);
+                av_log(s->avctx, AV_LOG_INFO, "comment: '%s'\n", cbuf);
 
             /* buggy avid, it puts EOI only at every 10th frame */
-            if (!strcmp(cbuf, "AVID")) {
-                s->buggy_avid = 1;
+            if (!strncmp(cbuf, "AVID", 4)) {
+                parse_avid(s, cbuf, len);
             } else if (!strcmp(cbuf, "CS=ITU601"))
                 s->cs_itu601 = 1;
-            else if ((len > 20 && !strncmp(cbuf, "Intel(R) JPEG Library", 21)) ||
-                     (len > 19 && !strncmp(cbuf, "Metasoft MJPEG Codec", 20)))
+            else if ((!strncmp(cbuf, "Intel(R) JPEG Library, version 1", 32) && s->avctx->codec_tag) ||
+                     (!strncmp(cbuf, "Metasoft MJPEG Codec", 20)))
                 s->flipped = 1;
 
             av_free(cbuf);
@@ -1353,22 +1775,19 @@ static int find_marker(const uint8_t **pbuf_ptr, const uint8_t *buf_end)
     const uint8_t *buf_ptr;
     unsigned int v, v2;
     int val;
-#ifdef DEBUG
     int skipped = 0;
-#endif
 
     buf_ptr = *pbuf_ptr;
-    while (buf_ptr < buf_end) {
+    while (buf_end - buf_ptr > 1) {
         v  = *buf_ptr++;
         v2 = *buf_ptr;
         if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) {
             val = *buf_ptr++;
             goto found;
         }
-#ifdef DEBUG
         skipped++;
-#endif
     }
+    buf_ptr = buf_end;
     val = -1;
 found:
     av_dlog(NULL, "find_marker skipped %d bytes\n", skipped);
@@ -1414,7 +1833,7 @@ int ff_mjpeg_find_marker(MJpegDecodeContext *s,
         memset(s->buffer + *unescaped_buf_size, 0,
                FF_INPUT_BUFFER_PADDING_SIZE);
 
-        av_log(s->avctx, AV_LOG_DEBUG, "escaping removed %td bytes\n",
+        av_log(s->avctx, AV_LOG_DEBUG, "escaping removed %"PTRDIFF_SPECIFIER" bytes\n",
                (buf_end - *buf_ptr) - (dst - s->buffer));
     } else if (start_code == SOS && s->ls) {
         const uint8_t *src = *buf_ptr;
@@ -1423,8 +1842,6 @@ int ff_mjpeg_find_marker(MJpegDecodeContext *s,
         int t = 0, b = 0;
         PutBitContext pb;
 
-        s->cur_scan++;
-
         /* find marker */
         while (src + t < buf_end) {
             uint8_t x = src[t++];
@@ -1432,7 +1849,7 @@ int ff_mjpeg_find_marker(MJpegDecodeContext *s,
                 while ((src + t < buf_end) && x == 0xff)
                     x = src[t++];
                 if (x & 0x80) {
-                    t -= 2;
+                    t -= FFMIN(2, t);
                     break;
                 }
             }
@@ -1473,11 +1890,16 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     MJpegDecodeContext *s = avctx->priv_data;
     const uint8_t *buf_end, *buf_ptr;
     const uint8_t *unescaped_buf_ptr;
+    int hshift, vshift;
     int unescaped_buf_size;
     int start_code;
+    int i, index;
     int ret = 0;
 
-    s->got_picture = 0; // picture from previous image can not be reused
+    av_dict_free(&s->exif_metadata);
+    av_freep(&s->stereo3d);
+    s->adobe_transform = -1;
+
     buf_ptr = buf;
     buf_end = buf + buf_size;
     while (buf_ptr < buf_end) {
@@ -1487,21 +1909,22 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                                           &unescaped_buf_size);
         /* EOF */
         if (start_code < 0) {
-            goto the_end;
+            break;
         } else if (unescaped_buf_size > INT_MAX / 8) {
             av_log(avctx, AV_LOG_ERROR,
                    "MJPEG packet 0x%x too big (%d/%d), corrupt data?\n",
                    start_code, unescaped_buf_size, buf_size);
             return AVERROR_INVALIDDATA;
         }
-
-        av_log(avctx, AV_LOG_DEBUG, "marker=%x avail_size_in_buf=%td\n",
+        av_log(avctx, AV_LOG_DEBUG, "marker=%x avail_size_in_buf=%"PTRDIFF_SPECIFIER"\n",
                start_code, buf_end - buf_ptr);
 
-        ret = init_get_bits(&s->gb, unescaped_buf_ptr,
-                            unescaped_buf_size * 8);
-        if (ret < 0)
-            return ret;
+        ret = init_get_bits8(&s->gb, unescaped_buf_ptr, unescaped_buf_size);
+
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "invalid buffer\n");
+            goto fail;
+        }
 
         s->start_code = start_code;
         if (s->avctx->debug & FF_DEBUG_STARTCODE)
@@ -1518,6 +1941,8 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         else if (start_code == COM)
             mjpeg_decode_com(s);
 
+        ret = -1;
+
         if (!CONFIG_JPEGLS_DECODER &&
             (start_code == SOF48 || start_code == LSE)) {
             av_log(avctx, AV_LOG_ERROR, "JPEG-LS support not enabled.\n");
@@ -1536,7 +1961,7 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         case DHT:
             if ((ret = ff_mjpeg_decode_dht(s)) < 0) {
                 av_log(avctx, AV_LOG_ERROR, "huffman table decode error\n");
-                return ret;
+                goto fail;
             }
             break;
         case SOF0:
@@ -1545,39 +1970,37 @@ int ff_mjpeg_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             s->ls          = 0;
             s->progressive = 0;
             if ((ret = ff_mjpeg_decode_sof(s)) < 0)
-                return ret;
+                goto fail;
             break;
         case SOF2:
             s->lossless    = 0;
             s->ls          = 0;
             s->progressive = 1;
             if ((ret = ff_mjpeg_decode_sof(s)) < 0)
-                return ret;
+                goto fail;
             break;
         case SOF3:
             s->lossless    = 1;
             s->ls          = 0;
             s->progressive = 0;
             if ((ret = ff_mjpeg_decode_sof(s)) < 0)
-                return ret;
+                goto fail;
             break;
         case SOF48:
             s->lossless    = 1;
             s->ls          = 1;
             s->progressive = 0;
             if ((ret = ff_mjpeg_decode_sof(s)) < 0)
-                return ret;
+                goto fail;
             break;
         case LSE:
             if (!CONFIG_JPEGLS_DECODER ||
                 (ret = ff_jpegls_decode_lse(s)) < 0)
-                return ret;
+                goto fail;
             break;
         case EOI:
-            s->cur_scan = 0;
-            if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
-                break;
 eoi_parser:
+            s->cur_scan = 0;
             if (!s->got_picture) {
                 av_log(avctx, AV_LOG_WARNING,
                        "Found EOI before any SOF, ignoring\n");
@@ -1587,43 +2010,34 @@ eoi_parser:
                 s->bottom_field ^= 1;
                 /* if not bottom field, do not output image yet */
                 if (s->bottom_field == !s->interlace_polarity)
-                    goto not_the_end;
+                    break;
             }
             if ((ret = av_frame_ref(frame, s->picture_ptr)) < 0)
                 return ret;
-            if (s->flipped) {
-                int i;
-                for (i = 0; frame->data[i]; i++) {
-                    int h = frame->height >> ((i == 1 || i == 2) ?
-                                              s->pix_desc->log2_chroma_h : 0);
-                    frame->data[i] += frame->linesize[i] * (h - 1);
-                    frame->linesize[i] *= -1;
-                }
-            }
             *got_frame = 1;
+            s->got_picture = 0;
+
+            if (!s->lossless) {
+                int qp = FFMAX3(s->qscale[0],
+                                s->qscale[1],
+                                s->qscale[2]);
+                int qpw = (s->width + 15) / 16;
+                AVBufferRef *qp_table_buf = av_buffer_alloc(qpw);
+                if (qp_table_buf) {
+                    memset(qp_table_buf->data, qp, qpw);
+                    av_frame_set_qp_table(data, qp_table_buf, 0, FF_QSCALE_TYPE_MPEG1);
+                }
 
-            if (!s->lossless &&
-                avctx->debug & FF_DEBUG_QP) {
-                av_log(avctx, AV_LOG_DEBUG,
-                       "QP: %d\n", FFMAX3(s->qscale[0],
-                                          s->qscale[1],
-                                          s->qscale[2]));
+                if(avctx->debug & FF_DEBUG_QP)
+                    av_log(avctx, AV_LOG_DEBUG, "QP: %d\n", qp);
             }
 
             goto the_end;
         case SOS:
-            if (!s->got_picture) {
-                av_log(avctx, AV_LOG_WARNING,
-                       "Can not process SOS before SOF, skipping\n");
-                break;
-                }
-            if ((ret = ff_mjpeg_decode_sos(s, NULL, NULL)) < 0 &&
+            s->cur_scan++;
+            if ((ret = ff_mjpeg_decode_sos(s, NULL, 0, NULL)) < 0 &&
                 (avctx->err_recognition & AV_EF_EXPLODE))
-                return ret;
-            /* buggy avid puts EOI every 10-20th frame */
-            /* if restart period is over process EOI */
-            if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
-                goto eoi_parser;
+                goto fail;
             break;
         case DRI:
             mjpeg_decode_dri(s);
@@ -1643,21 +2057,158 @@ eoi_parser:
             break;
         }
 
-not_the_end:
         /* eof process start code */
         buf_ptr += (get_bits_count(&s->gb) + 7) / 8;
         av_log(avctx, AV_LOG_DEBUG,
                "marker parser used %d bytes (%d bits)\n",
                (get_bits_count(&s->gb) + 7) / 8, get_bits_count(&s->gb));
     }
-    if (s->got_picture) {
+    if (s->got_picture && s->cur_scan) {
         av_log(avctx, AV_LOG_WARNING, "EOI missing, emulating\n");
         goto eoi_parser;
     }
     av_log(avctx, AV_LOG_FATAL, "No JPEG data found in image\n");
     return AVERROR_INVALIDDATA;
+fail:
+    s->got_picture = 0;
+    return ret;
 the_end:
-    av_log(avctx, AV_LOG_DEBUG, "mjpeg decode frame unused %td bytes\n",
+    if (s->upscale_h) {
+        int p;
+        av_assert0(avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P  ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUVJ440P ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV440P  ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUVA444P ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV420P  ||
+                   avctx->pix_fmt == AV_PIX_FMT_GBRAP
+                  );
+        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        for (p = 0; p<4; p++) {
+            uint8_t *line = s->picture_ptr->data[p];
+            int w = s->width;
+            if (!(s->upscale_h & (1<<p)))
+                continue;
+            if (p==1 || p==2)
+                w >>= hshift;
+            for (i = 0; i < s->chroma_height; i++) {
+                for (index = w - 1; index; index--)
+                    line[index] = (line[index / 2] + line[(index + 1) / 2]) >> 1;
+                line += s->linesize[p];
+            }
+        }
+    }
+    if (s->upscale_v) {
+        int p;
+        av_assert0(avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV444P  ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUV422P  ||
+                   avctx->pix_fmt == AV_PIX_FMT_YUVA444P ||
+                   avctx->pix_fmt == AV_PIX_FMT_GBRAP
+                   );
+        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        for (p = 0; p < 4; p++) {
+            uint8_t *dst = &((uint8_t *)s->picture_ptr->data[p])[(s->height - 1) * s->linesize[p]];
+            int w = s->width;
+            if (!(s->upscale_v & (1<<p)))
+                continue;
+            if (p==1 || p==2)
+                w >>= hshift;
+            for (i = s->height - 1; i; i--) {
+                uint8_t *src1 = &((uint8_t *)s->picture_ptr->data[p])[i / 2 * s->linesize[p]];
+                uint8_t *src2 = &((uint8_t *)s->picture_ptr->data[p])[(i + 1) / 2 * s->linesize[p]];
+                if (src1 == src2) {
+                    memcpy(dst, src1, w);
+                } else {
+                    for (index = 0; index < w; index++)
+                        dst[index] = (src1[index] + src2[index]) >> 1;
+                }
+                dst -= s->linesize[p];
+            }
+        }
+    }
+    if (s->flipped) {
+        int j;
+        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &hshift, &vshift);
+        for (index=0; index<4; index++) {
+            uint8_t *dst = s->picture_ptr->data[index];
+            int w = s->picture_ptr->width;
+            int h = s->picture_ptr->height;
+            if(index && index<3){
+                w = FF_CEIL_RSHIFT(w, hshift);
+                h = FF_CEIL_RSHIFT(h, vshift);
+            }
+            if(dst){
+                uint8_t *dst2 = dst + s->picture_ptr->linesize[index]*(h-1);
+                for (i=0; i<h/2; i++) {
+                    for (j=0; j<w; j++)
+                        FFSWAP(int, dst[j], dst2[j]);
+                    dst  += s->picture_ptr->linesize[index];
+                    dst2 -= s->picture_ptr->linesize[index];
+                }
+            }
+        }
+    }
+    if (s->adobe_transform == 0 && s->avctx->pix_fmt == AV_PIX_FMT_GBRAP) {
+        int w = s->picture_ptr->width;
+        int h = s->picture_ptr->height;
+        for (i=0; i<h; i++) {
+            int j;
+            uint8_t *dst[4];
+            for (index=0; index<4; index++) {
+                dst[index] =   s->picture_ptr->data[index]
+                             + s->picture_ptr->linesize[index]*i;
+            }
+            for (j=0; j<w; j++) {
+                int k = dst[3][j];
+                int r = dst[0][j] * k;
+                int g = dst[1][j] * k;
+                int b = dst[2][j] * k;
+                dst[0][j] = g*257 >> 16;
+                dst[1][j] = b*257 >> 16;
+                dst[2][j] = r*257 >> 16;
+                dst[3][j] = 255;
+            }
+        }
+    }
+    if (s->adobe_transform == 2 && s->avctx->pix_fmt == AV_PIX_FMT_YUVA444P) {
+        int w = s->picture_ptr->width;
+        int h = s->picture_ptr->height;
+        for (i=0; i<h; i++) {
+            int j;
+            uint8_t *dst[4];
+            for (index=0; index<4; index++) {
+                dst[index] =   s->picture_ptr->data[index]
+                             + s->picture_ptr->linesize[index]*i;
+            }
+            for (j=0; j<w; j++) {
+                int k = dst[3][j];
+                int r = (255 - dst[0][j]) * k;
+                int g = (128 - dst[1][j]) * k;
+                int b = (128 - dst[2][j]) * k;
+                dst[0][j] = r*257 >> 16;
+                dst[1][j] = (g*257 >> 16) + 128;
+                dst[2][j] = (b*257 >> 16) + 128;
+                dst[3][j] = 255;
+            }
+        }
+    }
+
+    if (s->stereo3d) {
+        AVStereo3D *stereo = av_stereo3d_create_side_data(data);
+        if (stereo) {
+            stereo->type  = s->stereo3d->type;
+            stereo->flags = s->stereo3d->flags;
+        }
+        av_freep(&s->stereo3d);
+    }
+
+    av_dict_copy(avpriv_frame_get_metadatap(data), s->exif_metadata, 0);
+    av_dict_free(&s->exif_metadata);
+
+    av_log(avctx, AV_LOG_DEBUG, "decode frame unused %"PTRDIFF_SPECIFIER" bytes\n",
            buf_end - buf_ptr);
 //  return buf_end - buf_ptr;
     return buf_ptr - buf;
@@ -1668,13 +2219,18 @@ av_cold int ff_mjpeg_decode_end(AVCodecContext *avctx)
     MJpegDecodeContext *s = avctx->priv_data;
     int i, j;
 
+    if (s->interlaced && s->bottom_field == !s->interlace_polarity && s->got_picture && !avctx->frame_number) {
+        av_log(avctx, AV_LOG_INFO, "Single field\n");
+    }
+
     if (s->picture) {
         av_frame_free(&s->picture);
         s->picture_ptr = NULL;
     } else if (s->picture_ptr)
         av_frame_unref(s->picture_ptr);
 
-    av_free(s->buffer);
+    av_freep(&s->buffer);
+    av_freep(&s->stereo3d);
     av_freep(&s->ljpeg_buffer);
     s->ljpeg_buffer_size = 0;
 
@@ -1686,9 +2242,17 @@ av_cold int ff_mjpeg_decode_end(AVCodecContext *avctx)
         av_freep(&s->blocks[i]);
         av_freep(&s->last_nnz[i]);
     }
+    av_dict_free(&s->exif_metadata);
     return 0;
 }
 
+static void decode_flush(AVCodecContext *avctx)
+{
+    MJpegDecodeContext *s = avctx->priv_data;
+    s->got_picture = 0;
+}
+
+#if CONFIG_MJPEG_DECODER
 #define OFFSET(x) offsetof(MJpegDecodeContext, x)
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
@@ -1713,10 +2277,13 @@ AVCodec ff_mjpeg_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     .decode         = ff_mjpeg_decode_frame,
+    .flush          = decode_flush,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .priv_class     = &mjpegdec_class,
 };
-
+#endif
+#if CONFIG_THP_DECODER
 AVCodec ff_thp_decoder = {
     .name           = "thp",
     .long_name      = NULL_IF_CONFIG_SMALL("Nintendo Gamecube THP video"),
@@ -1726,5 +2293,8 @@ AVCodec ff_thp_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     .decode         = ff_mjpeg_decode_frame,
+    .flush          = decode_flush,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
 };
+#endif
diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
index aa4703a..53a412c 100644
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h
@@ -4,20 +4,20 @@
  * Copyright (c) 2003 Alex Beregszaszi
  * Copyright (c) 2003-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@
 
 #include "libavutil/log.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/stereo3d.h"
 
 #include "avcodec.h"
 #include "blockdsp.h"
@@ -61,14 +62,20 @@ typedef struct MJpegDecodeContext {
     int ls;
     int progressive;
     int rgb;
+    int upscale_h;
+    int chroma_height;
+    int upscale_v;
     int rct;            /* standard rct */
     int pegasus_rct;    /* pegasus reversible colorspace transform */
     int bits;           /* bits per component */
+    int colr;
+    int xfrm;
+    int adobe_transform;
 
     int maxval;
     int near;         ///< near lossless bound (si 0 for lossless)
     int t1,t2,t3;
-    int reset;        ///< context halfing intervall ?rename
+    int reset;        ///< context halfing interval ?rename
 
     int width, height;
     int mb_width, mb_height;
@@ -83,6 +90,7 @@ typedef struct MJpegDecodeContext {
     int nb_blocks[MAX_COMPONENTS];
     int h_scount[MAX_COMPONENTS];
     int v_scount[MAX_COMPONENTS];
+    int quant_sindex[MAX_COMPONENTS];
     int h_max, v_max; /* maximum h and v counts */
     int quant_index[4];   /* quant table index for each component */
     int last_dc[MAX_COMPONENTS]; /* last DEQUANTIZED dc (XXX: am I right to do that ?) */
@@ -95,6 +103,7 @@ typedef struct MJpegDecodeContext {
     int16_t (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
     uint8_t *last_nnz[MAX_COMPONENTS];
     uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
+    int palette_index;
     ScanTable scantable;
     BlockDSPContext bdsp;
     HpelDSPContext hdsp;
@@ -116,6 +125,9 @@ typedef struct MJpegDecodeContext {
     unsigned int ljpeg_buffer_size;
 
     int extern_huff;
+    AVDictionary *exif_metadata;
+
+    AVStereo3D *stereo3d; ///!< stereoscopic information (cached, since it is read before frame allocation)
 
     const AVPixFmtDescriptor *pix_desc;
 } MJpegDecodeContext;
@@ -129,7 +141,8 @@ int ff_mjpeg_decode_dqt(MJpegDecodeContext *s);
 int ff_mjpeg_decode_dht(MJpegDecodeContext *s);
 int ff_mjpeg_decode_sof(MJpegDecodeContext *s);
 int ff_mjpeg_decode_sos(MJpegDecodeContext *s,
-                        const uint8_t *mb_bitmask, const AVFrame *reference);
+                        const uint8_t *mb_bitmask,int mb_bitmask_size,
+                        const AVFrame *reference);
 int ff_mjpeg_find_marker(MJpegDecodeContext *s,
                          const uint8_t **buf_ptr, const uint8_t *buf_end,
                          const uint8_t **unescaped_buf_ptr, int *unescaped_buf_size);
diff --git a/libavcodec/mjpegenc.c b/libavcodec/mjpegenc.c
index fdb882e..ce230ad 100644
--- a/libavcodec/mjpegenc.c
+++ b/libavcodec/mjpegenc.c
@@ -8,20 +8,20 @@
  * aspecting, new decode_frame mechanism and apple mjpeg-b support
  *                                  by Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,8 +30,6 @@
  * MJPEG encoder.
  */
 
-#include <assert.h>
-
 #include "libavutil/pixdesc.h"
 
 #include "avcodec.h"
@@ -44,6 +42,11 @@ av_cold int ff_mjpeg_encode_init(MpegEncContext *s)
 {
     MJpegContext *m;
 
+    if (s->width > 65500 || s->height > 65500) {
+        av_log(s, AV_LOG_ERROR, "JPEG does not support resolutions above 65500x65500\n");
+        return AVERROR(EINVAL);
+    }
+
     m = av_malloc(sizeof(MJpegContext));
     if (!m)
         return AVERROR(ENOMEM);
@@ -121,7 +124,7 @@ static void encode_block(MpegEncContext *s, int16_t *block, int n)
                 mant--;
             }
 
-            nbits= av_log2(val) + 1;
+            nbits= av_log2_16bit(val) + 1;
             code = (run << 4) | nbits;
 
             put_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
@@ -136,23 +139,82 @@ static void encode_block(MpegEncContext *s, int16_t *block, int n)
         put_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
 }
 
-void ff_mjpeg_encode_mb(MpegEncContext *s, int16_t block[6][64])
+void ff_mjpeg_encode_mb(MpegEncContext *s, int16_t block[12][64])
 {
     int i;
-    for(i=0;i<5;i++) {
-        encode_block(s, block[i], i);
-    }
-    if (s->chroma_format == CHROMA_420) {
+    if (s->chroma_format == CHROMA_444) {
+        encode_block(s, block[0], 0);
+        encode_block(s, block[2], 2);
+        encode_block(s, block[4], 4);
+        encode_block(s, block[8], 8);
         encode_block(s, block[5], 5);
+        encode_block(s, block[9], 9);
+
+        if (16*s->mb_x+8 < s->width) {
+            encode_block(s, block[1], 1);
+            encode_block(s, block[3], 3);
+            encode_block(s, block[6], 6);
+            encode_block(s, block[10], 10);
+            encode_block(s, block[7], 7);
+            encode_block(s, block[11], 11);
+        }
     } else {
-        encode_block(s, block[6], 6);
-        encode_block(s, block[5], 5);
-        encode_block(s, block[7], 7);
+        for(i=0;i<5;i++) {
+            encode_block(s, block[i], i);
+        }
+        if (s->chroma_format == CHROMA_420) {
+            encode_block(s, block[5], 5);
+        } else {
+            encode_block(s, block[6], 6);
+            encode_block(s, block[5], 5);
+            encode_block(s, block[7], 7);
+        }
     }
 
     s->i_tex_bits += get_bits_diff(s);
 }
 
+// maximum over s->mjpeg_vsample[i]
+#define V_MAX 2
+static int amv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
+                              const AVFrame *pic_arg, int *got_packet)
+
+{
+    MpegEncContext *s = avctx->priv_data;
+    AVFrame *pic;
+    int i, ret;
+    int chroma_h_shift, chroma_v_shift;
+
+    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
+
+    //CODEC_FLAG_EMU_EDGE have to be cleared
+    if(s->avctx->flags & CODEC_FLAG_EMU_EDGE)
+        return AVERROR(EINVAL);
+
+    if ((avctx->height & 15) && avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Heights which are not a multiple of 16 might fail with some decoders, "
+               "use vstrict=-1 / -strict -1 to use %d anyway.\n", avctx->height);
+        av_log(avctx, AV_LOG_WARNING, "If you have a device that plays AMV videos, please test if videos "
+               "with such heights work with it and report your findings to ffmpeg-devel@ffmpeg.org\n");
+        return AVERROR_EXPERIMENTAL;
+    }
+
+    pic = av_frame_clone(pic_arg);
+    if (!pic)
+        return AVERROR(ENOMEM);
+    //picture should be flipped upside-down
+    for(i=0; i < 3; i++) {
+        int vsample = i ? 2 >> chroma_v_shift : 2;
+        pic->data[i] += pic->linesize[i] * (vsample * s->height / V_MAX - 1);
+        pic->linesize[i] *= -1;
+    }
+    ret = ff_MPV_encode_picture(avctx, pkt, pic, got_packet);
+    av_frame_free(&pic);
+    return ret;
+}
+
+#if CONFIG_MJPEG_ENCODER
 AVCodec ff_mjpeg_encoder = {
     .name           = "mjpeg",
     .long_name      = NULL_IF_CONFIG_SMALL("MJPEG (Motion JPEG)"),
@@ -162,7 +224,24 @@ AVCodec ff_mjpeg_encoder = {
     .init           = ff_MPV_encode_init,
     .encode2        = ff_MPV_encode_picture,
     .close          = ff_MPV_encode_end,
+    .capabilities   = CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .pix_fmts       = (const enum AVPixelFormat[]){
+        AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_NONE
+    },
+};
+#endif
+#if CONFIG_AMV_ENCODER
+AVCodec ff_amv_encoder = {
+    .name           = "amv",
+    .long_name      = NULL_IF_CONFIG_SMALL("AMV Video"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AMV,
+    .priv_data_size = sizeof(MpegEncContext),
+    .init           = ff_MPV_encode_init,
+    .encode2        = amv_encode_picture,
+    .close          = ff_MPV_encode_end,
     .pix_fmts       = (const enum AVPixelFormat[]){
         AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_NONE
     },
 };
+#endif
diff --git a/libavcodec/mjpegenc.h b/libavcodec/mjpegenc.h
index 6054db1..069d43c 100644
--- a/libavcodec/mjpegenc.h
+++ b/libavcodec/mjpegenc.h
@@ -8,20 +8,20 @@
  * aspecting, new decode_frame mechanism and apple mjpeg-b support
  *                                  by Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -51,6 +51,6 @@ typedef struct MJpegContext {
 
 int  ff_mjpeg_encode_init(MpegEncContext *s);
 void ff_mjpeg_encode_close(MpegEncContext *s);
-void ff_mjpeg_encode_mb(MpegEncContext *s, int16_t block[6][64]);
+void ff_mjpeg_encode_mb(MpegEncContext *s, int16_t block[12][64]);
 
 #endif /* AVCODEC_MJPEGENC_H */
diff --git a/libavcodec/mjpegenc_common.c b/libavcodec/mjpegenc_common.c
index adb335e..8a5b99f 100644
--- a/libavcodec/mjpegenc_common.c
+++ b/libavcodec/mjpegenc_common.c
@@ -1,20 +1,22 @@
 /*
  * lossless JPEG shared bits
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2003 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -52,20 +54,43 @@ static int put_huffman_table(PutBitContext *p, int table_class, int table_id,
     return n + 17;
 }
 
-static void jpeg_table_header(PutBitContext *p, ScanTable *intra_scantable,
-                              uint16_t intra_matrix[64])
+static void jpeg_table_header(AVCodecContext *avctx, PutBitContext *p,
+                              ScanTable *intra_scantable,
+                              uint16_t luma_intra_matrix[64],
+                              uint16_t chroma_intra_matrix[64],
+                              int hsample[3])
 {
     int i, j, size;
     uint8_t *ptr;
 
+    if (avctx->codec_id != AV_CODEC_ID_LJPEG) {
+        int matrix_count = 1 + !!memcmp(luma_intra_matrix,
+                                        chroma_intra_matrix,
+                                        sizeof(luma_intra_matrix[0]) * 64);
     /* quant matrixes */
     put_marker(p, DQT);
-    put_bits(p, 16, 2 + 1 * (1 + 64));
+    put_bits(p, 16, 2 + matrix_count * (1 + 64));
     put_bits(p, 4, 0); /* 8 bit precision */
     put_bits(p, 4, 0); /* table 0 */
     for(i=0;i<64;i++) {
         j = intra_scantable->permutated[i];
-        put_bits(p, 8, intra_matrix[j]);
+        put_bits(p, 8, luma_intra_matrix[j]);
+    }
+
+        if (matrix_count > 1) {
+            put_bits(p, 4, 0); /* 8 bit precision */
+            put_bits(p, 4, 1); /* table 1 */
+            for(i=0;i<64;i++) {
+                j = intra_scantable->permutated[i];
+                put_bits(p, 8, chroma_intra_matrix[j]);
+            }
+        }
+    }
+
+    if(avctx->active_thread_type & FF_THREAD_SLICE){
+        put_marker(p, DRI);
+        put_bits(p, 16, 4);
+        put_bits(p, 16, (avctx->width-1)/(8*hsample[0]) + 1);
     }
 
     /* huffman table */
@@ -96,7 +121,7 @@ static void jpeg_put_comments(AVCodecContext *avctx, PutBitContext *p)
         put_marker(p, APP0);
         put_bits(p, 16, 16);
         avpriv_put_string(p, "JFIF", 1); /* this puts the trailing zero-byte too */
-        put_bits(p, 16, 0x0201);         /* v 1.02 */
+        put_bits(p, 16, 0x0102);         /* v 1.02 */
         put_bits(p,  8, 0);              /* units type: 0 - aspect ratio */
         put_bits(p, 16, avctx->sample_aspect_ratio.num);
         put_bits(p, 16, avctx->sample_aspect_ratio.den);
@@ -128,22 +153,22 @@ static void jpeg_put_comments(AVCodecContext *avctx, PutBitContext *p)
     }
 }
 
-void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
-                                    ScanTable *intra_scantable,
-                                    uint16_t intra_matrix[64])
+void ff_mjpeg_init_hvsample(AVCodecContext *avctx, int hsample[3], int vsample[3])
 {
     int chroma_h_shift, chroma_v_shift;
-    const int lossless = avctx->codec_id != AV_CODEC_ID_MJPEG;
-    int hsample[3], vsample[3];
 
     av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
                                      &chroma_v_shift);
-
     if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
-        avctx->pix_fmt   == AV_PIX_FMT_BGR24) {
+        (   avctx->pix_fmt == AV_PIX_FMT_BGR0
+         || avctx->pix_fmt == AV_PIX_FMT_BGRA
+         || avctx->pix_fmt == AV_PIX_FMT_BGR24)) {
         vsample[0] = hsample[0] =
         vsample[1] = hsample[1] =
         vsample[2] = hsample[2] = 1;
+    } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P || avctx->pix_fmt == AV_PIX_FMT_YUVJ444P) {
+        vsample[0] = vsample[1] = vsample[2] = 2;
+        hsample[0] = hsample[1] = hsample[2] = 1;
     } else {
         vsample[0] = 2;
         vsample[1] = 2 >> chroma_v_shift;
@@ -152,21 +177,41 @@ void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
         hsample[1] = 2 >> chroma_h_shift;
         hsample[2] = 2 >> chroma_h_shift;
     }
+}
+
+void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
+                                    ScanTable *intra_scantable,
+                                    uint16_t luma_intra_matrix[64],
+                                    uint16_t chroma_intra_matrix[64])
+{
+    const int lossless = avctx->codec_id != AV_CODEC_ID_MJPEG && avctx->codec_id != AV_CODEC_ID_AMV;
+    int hsample[3], vsample[3];
+    int i;
+    int chroma_matrix = !!memcmp(luma_intra_matrix,
+                                 chroma_intra_matrix,
+                                 sizeof(luma_intra_matrix[0])*64);
+
+    ff_mjpeg_init_hvsample(avctx, hsample, vsample);
 
     put_marker(pb, SOI);
 
+    // hack for AMV mjpeg format
+    if(avctx->codec_id == AV_CODEC_ID_AMV) goto end;
+
     jpeg_put_comments(avctx, pb);
 
-    jpeg_table_header(pb, intra_scantable, intra_matrix);
+    jpeg_table_header(avctx, pb, intra_scantable, luma_intra_matrix, chroma_intra_matrix, hsample);
 
     switch (avctx->codec_id) {
     case AV_CODEC_ID_MJPEG:  put_marker(pb, SOF0 ); break;
     case AV_CODEC_ID_LJPEG:  put_marker(pb, SOF3 ); break;
-    default: assert(0);
+    default: av_assert0(0);
     }
 
     put_bits(pb, 16, 17);
-    if (lossless && avctx->pix_fmt == AV_PIX_FMT_BGR24)
+    if (lossless && (  avctx->pix_fmt == AV_PIX_FMT_BGR0
+                    || avctx->pix_fmt == AV_PIX_FMT_BGRA
+                    || avctx->pix_fmt == AV_PIX_FMT_BGR24))
         put_bits(pb, 8, 9); /* 9 bits/component RCT */
     else
         put_bits(pb, 8, 8); /* 8 bits/component */
@@ -184,13 +229,13 @@ void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
     put_bits(pb, 8, 2); /* component number */
     put_bits(pb, 4, hsample[1]); /* H factor */
     put_bits(pb, 4, vsample[1]); /* V factor */
-    put_bits(pb, 8, 0); /* select matrix */
+    put_bits(pb, 8, lossless ? 0 : chroma_matrix); /* select matrix */
 
     /* Cr component */
     put_bits(pb, 8, 3); /* component number */
     put_bits(pb, 4, hsample[2]); /* H factor */
     put_bits(pb, 4, vsample[2]); /* V factor */
-    put_bits(pb, 8, 0); /* select matrix */
+    put_bits(pb, 8, lossless ? 0 : chroma_matrix); /* select matrix */
 
     /* scan header */
     put_marker(pb, SOS);
@@ -217,20 +262,37 @@ void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
     switch (avctx->codec_id) {
     case AV_CODEC_ID_MJPEG:  put_bits(pb, 8, 63); break; /* Se (not used) */
     case AV_CODEC_ID_LJPEG:  put_bits(pb, 8,  0); break; /* not used */
-    default: assert(0);
+    default: av_assert0(0);
     }
 
     put_bits(pb, 8, 0); /* Ah/Al (not used) */
+
+end:
+    if (!lossless) {
+        MpegEncContext *s = avctx->priv_data;
+        av_assert0(avctx->codec->priv_data_size == sizeof(MpegEncContext));
+
+        s->esc_pos = put_bits_count(pb) >> 3;
+        for(i=1; i<s->slice_context_count; i++)
+            s->thread_context[i]->esc_pos = 0;
+    }
 }
 
-static void escape_FF(PutBitContext *pb, int start)
+void ff_mjpeg_escape_FF(PutBitContext *pb, int start)
 {
-    int size = put_bits_count(pb) - start * 8;
+    int size;
     int i, ff_count;
     uint8_t *buf = pb->buf + start;
     int align= (-(size_t)(buf))&3;
+    int pad = (-put_bits_count(pb))&7;
+
+    if (pad)
+        put_bits(pb, pad, (1<<pad)-1);
 
-    assert((size&7) == 0);
+    flush_put_bits(pb);
+    size = put_bits_count(pb) - start * 8;
+
+    av_assert1((size&7) == 0);
     size >>= 3;
 
     ff_count=0;
@@ -275,21 +337,25 @@ static void escape_FF(PutBitContext *pb, int start)
     }
 }
 
-void ff_mjpeg_encode_stuffing(PutBitContext * pbc)
+void ff_mjpeg_encode_stuffing(MpegEncContext *s)
 {
-    int length;
-    length= (-put_bits_count(pbc))&7;
-    if(length) put_bits(pbc, length, (1<<length)-1);
+    int i;
+    PutBitContext *pbc = &s->pb;
+    int mb_y = s->mb_y - !s->mb_x;
+
+    ff_mjpeg_escape_FF(pbc, s->esc_pos);
+
+    if((s->avctx->active_thread_type & FF_THREAD_SLICE) && mb_y < s->mb_height)
+        put_marker(pbc, RST0 + (mb_y&7));
+    s->esc_pos = put_bits_count(pbc) >> 3;
+
+    for(i=0; i<3; i++)
+        s->last_dc[i] = 128 << s->intra_dc_precision;
 }
 
 void ff_mjpeg_encode_picture_trailer(PutBitContext *pb, int header_bits)
 {
-    ff_mjpeg_encode_stuffing(pb);
-    flush_put_bits(pb);
-
-    assert((header_bits & 7) == 0);
-
-    escape_FF(pb, header_bits >> 3);
+    av_assert1((header_bits & 7) == 0);
 
     put_marker(pb, EOI);
 }
diff --git a/libavcodec/mjpegenc_common.h b/libavcodec/mjpegenc_common.h
index b48911e..38b9b3f 100644
--- a/libavcodec/mjpegenc_common.h
+++ b/libavcodec/mjpegenc_common.h
@@ -1,20 +1,20 @@
 /*
  * lossless JPEG shared bits
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,13 +25,18 @@
 
 #include "avcodec.h"
 #include "idctdsp.h"
+#include "mpegvideo.h"
 #include "put_bits.h"
 
 void ff_mjpeg_encode_picture_header(AVCodecContext *avctx, PutBitContext *pb,
                                     ScanTable *intra_scantable,
-                                    uint16_t intra_matrix[64]);
+                                    uint16_t luma_intra_matrix[64],
+                                    uint16_t chroma_intra_matrix[64]);
 void ff_mjpeg_encode_picture_trailer(PutBitContext *pb, int header_bits);
-void ff_mjpeg_encode_stuffing(PutBitContext *pbc);
+void ff_mjpeg_escape_FF(PutBitContext *pb, int start);
+void ff_mjpeg_encode_stuffing(MpegEncContext *s);
+void ff_mjpeg_init_hvsample(AVCodecContext *avctx, int hsample[3], int vsample[3]);
+
 void ff_mjpeg_encode_dc(PutBitContext *pb, int val,
                         uint8_t *huff_size, uint16_t *huff_code);
 
diff --git a/libavcodec/mlp.c b/libavcodec/mlp.c
index 9615b66..87f7c77 100644
--- a/libavcodec/mlp.c
+++ b/libavcodec/mlp.c
@@ -2,20 +2,20 @@
  * MLP codec common code
  * Copyright (c) 2007-2008 Ian Caulfield
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mlp.h b/libavcodec/mlp.h
index 5a4ee5f..bb9ca26 100644
--- a/libavcodec/mlp.h
+++ b/libavcodec/mlp.h
@@ -2,20 +2,20 @@
  * MLP codec common header file
  * Copyright (c) 2007-2008 Ian Caulfield
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
index 075227f..4bb82ee 100644
--- a/libavcodec/mlp_parser.c
+++ b/libavcodec/mlp_parser.c
@@ -2,20 +2,20 @@
  * MLP parser
  * Copyright (c) 2007 Ian Caulfield
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -44,28 +44,28 @@ static const uint8_t mlp_channels[32] = {
     5, 6, 5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 };
 
-static const uint64_t mlp_layout[32] = {
+const uint64_t ff_mlp_layout[32] = {
     AV_CH_LAYOUT_MONO,
     AV_CH_LAYOUT_STEREO,
     AV_CH_LAYOUT_2_1,
-    AV_CH_LAYOUT_2_2,
+    AV_CH_LAYOUT_QUAD,
     AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY,
     AV_CH_LAYOUT_2_1|AV_CH_LOW_FREQUENCY,
-    AV_CH_LAYOUT_2_2|AV_CH_LOW_FREQUENCY,
+    AV_CH_LAYOUT_QUAD|AV_CH_LOW_FREQUENCY,
     AV_CH_LAYOUT_SURROUND,
     AV_CH_LAYOUT_4POINT0,
-    AV_CH_LAYOUT_5POINT0,
+    AV_CH_LAYOUT_5POINT0_BACK,
     AV_CH_LAYOUT_SURROUND|AV_CH_LOW_FREQUENCY,
     AV_CH_LAYOUT_4POINT0|AV_CH_LOW_FREQUENCY,
-    AV_CH_LAYOUT_5POINT1,
+    AV_CH_LAYOUT_5POINT1_BACK,
     AV_CH_LAYOUT_4POINT0,
-    AV_CH_LAYOUT_5POINT0,
+    AV_CH_LAYOUT_5POINT0_BACK,
     AV_CH_LAYOUT_SURROUND|AV_CH_LOW_FREQUENCY,
     AV_CH_LAYOUT_4POINT0|AV_CH_LOW_FREQUENCY,
-    AV_CH_LAYOUT_5POINT1,
-    AV_CH_LAYOUT_2_2|AV_CH_LOW_FREQUENCY,
-    AV_CH_LAYOUT_5POINT0,
-    AV_CH_LAYOUT_5POINT1,
+    AV_CH_LAYOUT_5POINT1_BACK,
+    AV_CH_LAYOUT_QUAD|AV_CH_LOW_FREQUENCY,
+    AV_CH_LAYOUT_5POINT0_BACK,
+    AV_CH_LAYOUT_5POINT1_BACK,
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
@@ -108,7 +108,7 @@ static int truehd_channels(int chanmap)
     return channels;
 }
 
-static uint64_t truehd_layout(int chanmap)
+uint64_t ff_truehd_layout(int chanmap)
 {
     int i;
     uint64_t layout = 0;
@@ -130,7 +130,7 @@ int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
     int ratebits, channel_arrangement;
     uint16_t checksum;
 
-    assert(get_bits_count(gb) == 0);
+    av_assert1(get_bits_count(gb) == 0);
 
     if (gb->size_in_bits < 28 << 3) {
         av_log(log, AV_LOG_ERROR, "packet too short, unable to read major sync\n");
@@ -158,9 +158,10 @@ int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
 
         skip_bits(gb, 11);
 
+        mh->channel_arrangement=
         channel_arrangement    = get_bits(gb, 5);
         mh->channels_mlp       = mlp_channels[channel_arrangement];
-        mh->channel_layout_mlp = mlp_layout[channel_arrangement];
+        mh->channel_layout_mlp = ff_mlp_layout[channel_arrangement];
     } else if (mh->stream_type == 0xba) {
         mh->group1_bits = 24; // TODO: Is this information actually conveyed anywhere?
         mh->group2_bits = 0;
@@ -174,15 +175,16 @@ int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb)
         mh->channel_modifier_thd_stream0 = get_bits(gb, 2);
         mh->channel_modifier_thd_stream1 = get_bits(gb, 2);
 
+        mh->channel_arrangement=
         channel_arrangement            = get_bits(gb, 5);
         mh->channels_thd_stream1       = truehd_channels(channel_arrangement);
-        mh->channel_layout_thd_stream1 = truehd_layout(channel_arrangement);
+        mh->channel_layout_thd_stream1 = ff_truehd_layout(channel_arrangement);
 
         mh->channel_modifier_thd_stream2 = get_bits(gb, 2);
 
         channel_arrangement            = get_bits(gb, 13);
         mh->channels_thd_stream2       = truehd_channels(channel_arrangement);
-        mh->channel_layout_thd_stream2 = truehd_layout(channel_arrangement);
+        mh->channel_layout_thd_stream2 = ff_truehd_layout(channel_arrangement);
     } else
         return AVERROR_INVALIDDATA;
 
@@ -228,6 +230,7 @@ static int mlp_parse(AVCodecParserContext *s,
     int sync_present;
     uint8_t parity_bits;
     int next;
+    int ret;
     int i, p = 0;
 
     *poutbuf_size = 0;
@@ -249,11 +252,15 @@ static int mlp_parse(AVCodecParserContext *s,
         }
 
         if (!mp->in_sync) {
-            ff_combine_frame(&mp->pc, END_NOT_FOUND, &buf, &buf_size);
+            if (ff_combine_frame(&mp->pc, END_NOT_FOUND, &buf, &buf_size) != -1)
+                av_log(avctx, AV_LOG_WARNING, "ff_combine_frame failed\n");
             return buf_size;
         }
 
-        ff_combine_frame(&mp->pc, i - 7, &buf, &buf_size);
+        if ((ret = ff_combine_frame(&mp->pc, i - 7, &buf, &buf_size)) < 0) {
+            av_log(avctx, AV_LOG_WARNING, "ff_combine_frame failed\n");
+            return ret;
+        }
 
         return i - 7;
     }
@@ -267,13 +274,17 @@ static int mlp_parse(AVCodecParserContext *s,
         }
 
         if (mp->pc.index + buf_size < 2) {
-            ff_combine_frame(&mp->pc, END_NOT_FOUND, &buf, &buf_size);
+            if (ff_combine_frame(&mp->pc, END_NOT_FOUND, &buf, &buf_size) != -1)
+                av_log(avctx, AV_LOG_WARNING, "ff_combine_frame failed\n");
             return buf_size;
         }
 
         mp->bytes_left = ((mp->pc.index > 0 ? mp->pc.buffer[0] : buf[0]) << 8)
                        |  (mp->pc.index > 1 ? mp->pc.buffer[1] : buf[1-mp->pc.index]);
         mp->bytes_left = (mp->bytes_left & 0xfff) * 2;
+        if (mp->bytes_left <= 0) { // prevent infinite loop
+            goto lost_sync;
+        }
         mp->bytes_left -= mp->pc.index;
     }
 
@@ -324,6 +335,7 @@ static int mlp_parse(AVCodecParserContext *s,
         avctx->sample_rate = mh.group1_samplerate;
         s->duration = mh.access_unit_size;
 
+        if(!avctx->channels || !avctx->channel_layout) {
         if (mh.stream_type == 0xbb) {
             /* MLP stream */
 #if FF_API_REQUEST_CHANNELS
@@ -332,8 +344,8 @@ FF_DISABLE_DEPRECATION_WARNINGS
                 mh.num_substreams > 1) {
                 avctx->channels       = 2;
                 avctx->channel_layout = AV_CH_LAYOUT_STEREO;
-            } else
 FF_ENABLE_DEPRECATION_WARNINGS
+            } else
 #endif
             if (avctx->request_channel_layout &&
                 (avctx->request_channel_layout & AV_CH_LAYOUT_STEREO) ==
@@ -357,8 +369,8 @@ FF_DISABLE_DEPRECATION_WARNINGS
                        avctx->request_channels <= mh.channels_thd_stream1) {
                 avctx->channels       = mh.channels_thd_stream1;
                 avctx->channel_layout = mh.channel_layout_thd_stream1;
-            } else
 FF_ENABLE_DEPRECATION_WARNINGS
+            } else
 #endif
                 if (avctx->request_channel_layout &&
                     (avctx->request_channel_layout & AV_CH_LAYOUT_STEREO) ==
@@ -377,6 +389,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 avctx->channel_layout = mh.channel_layout_thd_stream2;
             }
         }
+        }
 
         if (!mh.is_vbr) /* Stream is CBR */
             avctx->bit_rate = mh.peak_bitrate;
diff --git a/libavcodec/mlp_parser.h b/libavcodec/mlp_parser.h
index 7530fac..5d1d2e7 100644
--- a/libavcodec/mlp_parser.h
+++ b/libavcodec/mlp_parser.h
@@ -2,20 +2,20 @@
  * MLP parser prototypes
  * Copyright (c) 2007 Ian Caulfield
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,6 +39,8 @@ typedef struct MLPHeaderInfo
     int group1_samplerate;                  ///< Sample rate of first substream
     int group2_samplerate;                  ///< Sample rate of second substream (MLP only)
 
+    int channel_arrangement;
+
     int channel_modifier_thd_stream0;       ///< Channel modifier for substream 0 of TrueHD sreams ("2-channel presentation")
     int channel_modifier_thd_stream1;       ///< Channel modifier for substream 1 of TrueHD sreams ("6-channel presentation")
     int channel_modifier_thd_stream2;       ///< Channel modifier for substream 2 of TrueHD sreams ("8-channel presentation")
@@ -61,5 +63,8 @@ typedef struct MLPHeaderInfo
 
 
 int ff_mlp_read_major_sync(void *log, MLPHeaderInfo *mh, GetBitContext *gb);
+uint64_t ff_truehd_layout(int chanmap);
+
+extern const uint64_t ff_mlp_layout[32];
 
 #endif /* AVCODEC_MLP_PARSER_H */
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 6baf4c1..ed6a7fb 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -2,20 +2,20 @@
  * MLP decoder
  * Copyright (c) 2007-2008 Ian Caulfield
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -141,6 +141,9 @@ typedef struct MLPDecodeContext {
     /// Index of the last substream to decode - further substreams are skipped.
     uint8_t     max_decoded_substream;
 
+    /// Stream needs channel reordering to comply with FFmpeg's channel order
+    uint8_t     needs_reordering;
+
     /// number of PCM samples contained in each frame
     int         access_unit_size;
     /// next power of two above the number of samples in each frame
@@ -373,10 +376,22 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
      * substream is Stereo. Subsequent substreams' layouts are indicated in the
      * major sync. */
     if (m->avctx->codec_id == AV_CODEC_ID_MLP) {
+        if (mh.stream_type != 0xbb) {
+            avpriv_request_sample(m->avctx,
+                        "unexpected stream_type %X in MLP",
+                        mh.stream_type);
+            return AVERROR_PATCHWELCOME;
+        }
         if ((substr = (mh.num_substreams > 1)))
             m->substream[0].ch_layout = AV_CH_LAYOUT_STEREO;
         m->substream[substr].ch_layout = mh.channel_layout_mlp;
     } else {
+        if (mh.stream_type != 0xba) {
+            avpriv_request_sample(m->avctx,
+                        "unexpected stream_type %X in !MLP",
+                        mh.stream_type);
+            return AVERROR_PATCHWELCOME;
+        }
         if ((substr = (mh.num_substreams > 1)))
             m->substream[0].ch_layout = AV_CH_LAYOUT_STEREO;
         if (mh.num_substreams > 2)
@@ -385,8 +400,17 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
             else
                 m->substream[2].ch_layout = mh.channel_layout_thd_stream1;
         m->substream[substr].ch_layout = mh.channel_layout_thd_stream1;
+
+        if (m->avctx->channels<=2 && m->substream[substr].ch_layout == AV_CH_LAYOUT_MONO && m->max_decoded_substream == 1) {
+            av_log(m->avctx, AV_LOG_DEBUG, "Mono stream with 2 substreams, ignoring 2nd\n");
+            m->max_decoded_substream = 0;
+            if (m->avctx->channels==2)
+                m->avctx->channel_layout = AV_CH_LAYOUT_STEREO;
+        }
     }
 
+    m->needs_reordering = mh.channel_arrangement >= 18 && mh.channel_arrangement <= 20;
+
     /* Parse the TrueHD decoder channel modifiers and set each substream's
      * AVMatrixEncoding accordingly.
      *
@@ -472,7 +496,7 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
     if (max_matrix_channel > std_max_matrix_channel) {
         av_log(m->avctx, AV_LOG_ERROR,
                "Max matrix channel cannot be greater than %d.\n",
-               max_matrix_channel);
+               std_max_matrix_channel);
         return AVERROR_INVALIDDATA;
     }
 
@@ -484,11 +508,11 @@ static int read_restart_header(MLPDecodeContext *m, GetBitContext *gbp,
 
     /* This should happen for TrueHD streams with >6 channels and MLP's noise
      * type. It is not yet known if this is allowed. */
-    if (s->max_channel > MAX_MATRIX_CHANNEL_MLP && !s->noise_type) {
+    if (max_channel > MAX_MATRIX_CHANNEL_MLP && !s->noise_type) {
         avpriv_request_sample(m->avctx,
                               "%d channels (more than the "
                               "maximum supported by the decoder)",
-                              s->max_channel + 2);
+                              max_channel + 2);
         return AVERROR_PATCHWELCOME;
     }
 
@@ -512,8 +536,8 @@ FF_DISABLE_DEPRECATION_WARNINGS
                "Further substreams will be skipped.\n",
                s->max_channel + 1, substr);
         m->max_decoded_substream = substr;
-    } else
 FF_ENABLE_DEPRECATION_WARNINGS
+    } else
 #endif
     if (m->avctx->request_channel_layout && (s->ch_layout & m->avctx->request_channel_layout) ==
         m->avctx->request_channel_layout && m->max_decoded_substream > substr) {
@@ -552,7 +576,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
             ch_assign = av_get_channel_layout_channel_index(s->ch_layout,
                                                             channel);
         }
-        if (ch_assign > s->max_matrix_channel) {
+        if ((unsigned)ch_assign > s->max_matrix_channel) {
             avpriv_request_sample(m->avctx,
                                   "Assignment of matrix channel %d to invalid output channel %d",
                                   ch, ch_assign);
@@ -596,6 +620,20 @@ FF_ENABLE_DEPRECATION_WARNINGS
                                                                s->output_shift,
                                                                s->max_matrix_channel,
                                                                m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
+
+        if (m->avctx->codec_id == AV_CODEC_ID_MLP && m->needs_reordering) {
+            if (m->avctx->channel_layout == (AV_CH_LAYOUT_QUAD|AV_CH_LOW_FREQUENCY) ||
+                m->avctx->channel_layout == AV_CH_LAYOUT_5POINT0_BACK) {
+                int i = s->ch_assign[4];
+                s->ch_assign[4] = s->ch_assign[3];
+                s->ch_assign[3] = s->ch_assign[2];
+                s->ch_assign[2] = i;
+            } else if (m->avctx->channel_layout == AV_CH_LAYOUT_5POINT1_BACK) {
+                FFSWAP(int, s->ch_assign[2], s->ch_assign[4]);
+                FFSWAP(int, s->ch_assign[3], s->ch_assign[5]);
+            }
+        }
+
     }
 
     return 0;
@@ -614,7 +652,7 @@ static int read_filter_params(MLPDecodeContext *m, GetBitContext *gbp,
     int i, order;
 
     // Filter is 0 for FIR, 1 for IIR.
-    assert(filter < 2);
+    av_assert0(filter < 2);
 
     if (m->filter_changed[channel][filter]++ > 1) {
         av_log(m->avctx, AV_LOG_ERROR, "Filters may change only once per access unit.\n");
@@ -669,7 +707,7 @@ static int read_filter_params(MLPDecodeContext *m, GetBitContext *gbp,
             /* TODO: Check validity of state data. */
 
             for (i = 0; i < order; i++)
-                fp->state[i] = get_sbits(gbp, state_bits) << state_shift;
+                fp->state[i] = state_bits ? get_sbits(gbp, state_bits) << state_shift : 0;
         }
     }
 
@@ -788,6 +826,7 @@ static int read_channel_params(MLPDecodeContext *m, unsigned int substr,
 
     if (cp->huff_lsbs > 24) {
         av_log(m->avctx, AV_LOG_ERROR, "Invalid huff_lsbs.\n");
+        cp->huff_lsbs = 0;
         return AVERROR_INVALIDDATA;
     }
 
@@ -814,7 +853,7 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
         if (get_bits1(gbp)) {
             s->blocksize = get_bits(gbp, 9);
             if (s->blocksize < 8 || s->blocksize > m->access_unit_size) {
-                av_log(m->avctx, AV_LOG_ERROR, "Invalid blocksize.");
+                av_log(m->avctx, AV_LOG_ERROR, "Invalid blocksize.\n");
                 s->blocksize = 0;
                 return AVERROR_INVALIDDATA;
             }
@@ -854,7 +893,7 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
     return 0;
 }
 
-#define MSB_MASK(bits)  (-1u << bits)
+#define MSB_MASK(bits)  (-1u << (bits))
 
 /** Generate PCM samples using the prediction filters and residual values
  *  read from the data stream, and update the filter state. */
@@ -1048,10 +1087,8 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
 
     /* get output buffer */
     frame->nb_samples = s->blockpos;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     s->lossless_check_data = m->dsp.mlp_pack_output(s->lossless_check_data,
                                                     s->blockpos,
                                                     m->sample_buffer,
@@ -1091,7 +1128,7 @@ static int read_access_unit(AVCodecContext *avctx, void* data,
     int ret;
 
     if (buf_size < 4)
-        return 0;
+        return AVERROR_INVALIDDATA;
 
     length = (AV_RB16(buf) & 0xfff) * 2;
 
@@ -1270,6 +1307,7 @@ error:
     return AVERROR_INVALIDDATA;
 }
 
+#if CONFIG_MLP_DECODER
 AVCodec ff_mlp_decoder = {
     .name           = "mlp",
     .long_name      = NULL_IF_CONFIG_SMALL("MLP (Meridian Lossless Packing)"),
@@ -1280,7 +1318,7 @@ AVCodec ff_mlp_decoder = {
     .decode         = read_access_unit,
     .capabilities   = CODEC_CAP_DR1,
 };
-
+#endif
 #if CONFIG_TRUEHD_DECODER
 AVCodec ff_truehd_decoder = {
     .name           = "truehd",
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index aded554..3ae8c37 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2007-2008 Ian Caulfield
  *               2009 Ramiro Polla
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
index acd48fc..a0edeb7 100644
--- a/libavcodec/mlpdsp.h
+++ b/libavcodec/mlpdsp.h
@@ -2,20 +2,20 @@
  * MLP codec common header file
  * Copyright (c) 2007-2008 Ian Caulfield
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mmvideo.c b/libavcodec/mmvideo.c
index abec2e8..026d463 100644
--- a/libavcodec/mmvideo.c
+++ b/libavcodec/mmvideo.c
@@ -2,20 +2,20 @@
  * American Laser Games MM Video Decoder
  * Copyright (c) 2006,2008 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -68,17 +68,15 @@ static av_cold int mm_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int mm_decode_pal(MmContext *s)
+static void mm_decode_pal(MmContext *s)
 {
     int i;
 
     bytestream2_skip(&s->gb, 4);
     for (i = 0; i < 128; i++) {
-        s->palette[i] = bytestream2_get_be24(&s->gb);
+        s->palette[i] = 0xFFU << 24 | bytestream2_get_be24(&s->gb);
         s->palette[i+128] = s->palette[i]<<2;
     }
-
-    return 0;
 }
 
 /**
@@ -106,6 +104,9 @@ static int mm_decode_intra(MmContext * s, int half_horiz, int half_vert)
         if (half_horiz)
             run_length *=2;
 
+        if (run_length > s->avctx->width - x)
+            return AVERROR_INVALIDDATA;
+
         if (color) {
             memset(s->frame->data[0] + y*s->frame->linesize[0] + x, color, run_length);
             if (half_vert)
@@ -122,7 +123,7 @@ static int mm_decode_intra(MmContext * s, int half_horiz, int half_vert)
     return 0;
 }
 
-/*
+/**
  * @param half_horiz Half horizontal resolution (0 or 1)
  * @param half_vert Half vertical resolution (0 or 1)
  */
@@ -154,6 +155,8 @@ static int mm_decode_inter(MmContext * s, int half_horiz, int half_vert)
             int replace_array = bytestream2_get_byte(&s->gb);
             for(j=0; j<8; j++) {
                 int replace = (replace_array >> (7-j)) & 1;
+                if (x + half_horiz >= s->avctx->width)
+                    return AVERROR_INVALIDDATA;
                 if (replace) {
                     int color = bytestream2_get_byte(&data_ptr);
                     s->frame->data[0][y*s->frame->linesize[0] + x] = color;
@@ -191,13 +194,11 @@ static int mm_decode_frame(AVCodecContext *avctx,
     buf_size -= MM_PREAMBLE_SIZE;
     bytestream2_init(&s->gb, buf, buf_size);
 
-    if ((res = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((res = ff_reget_buffer(avctx, s->frame)) < 0)
         return res;
-    }
 
     switch(type) {
-    case MM_TYPE_PALETTE   : res = mm_decode_pal(s); return buf_size;
+    case MM_TYPE_PALETTE   : mm_decode_pal(s); return avpkt->size;
     case MM_TYPE_INTRA     : res = mm_decode_intra(s, 0, 0); break;
     case MM_TYPE_INTRA_HH  : res = mm_decode_intra(s, 1, 0); break;
     case MM_TYPE_INTRA_HHV : res = mm_decode_intra(s, 1, 1); break;
@@ -218,7 +219,7 @@ static int mm_decode_frame(AVCodecContext *avctx,
 
     *got_frame      = 1;
 
-    return buf_size;
+    return avpkt->size;
 }
 
 static av_cold int mm_decode_end(AVCodecContext *avctx)
diff --git a/libavcodec/motion-test.c b/libavcodec/motion-test.c
new file mode 100644
index 0000000..7cfe41c
--- /dev/null
+++ b/libavcodec/motion-test.c
@@ -0,0 +1,152 @@
+/*
+ * (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * motion test.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "config.h"
+#include "me_cmp.h"
+#include "libavutil/internal.h"
+#include "libavutil/lfg.h"
+#include "libavutil/mem.h"
+#include "libavutil/time.h"
+
+#undef printf
+
+#define WIDTH 64
+#define HEIGHT 64
+
+static uint8_t img1[WIDTH * HEIGHT];
+static uint8_t img2[WIDTH * HEIGHT];
+
+static void fill_random(uint8_t *tab, int size)
+{
+    int i;
+    AVLFG prng;
+
+    av_lfg_init(&prng, 1);
+    for(i=0;i<size;i++) {
+        tab[i] = av_lfg_get(&prng) % 256;
+    }
+}
+
+static void help(void)
+{
+    printf("motion-test [-h]\n"
+           "test motion implementations\n");
+}
+
+#define NB_ITS 500
+
+int dummy;
+
+static void test_motion(const char *name,
+                 me_cmp_func test_func, me_cmp_func ref_func)
+{
+    int x, y, d1, d2, it;
+    uint8_t *ptr;
+    int64_t ti;
+    printf("testing '%s'\n", name);
+
+    /* test correctness */
+    for(it=0;it<20;it++) {
+
+        fill_random(img1, WIDTH * HEIGHT);
+        fill_random(img2, WIDTH * HEIGHT);
+
+        for(y=0;y<HEIGHT-17;y++) {
+            for(x=0;x<WIDTH-17;x++) {
+                ptr = img2 + y * WIDTH + x;
+                d1 = test_func(NULL, img1, ptr, WIDTH, 8);
+                d2 = ref_func(NULL, img1, ptr, WIDTH, 8);
+                if (d1 != d2) {
+                    printf("error: mmx=%d c=%d\n", d1, d2);
+                }
+            }
+        }
+    }
+    emms_c();
+
+    /* speed test */
+    ti = av_gettime_relative();
+    d1 = 0;
+    for(it=0;it<NB_ITS;it++) {
+        for(y=0;y<HEIGHT-17;y++) {
+            for(x=0;x<WIDTH-17;x++) {
+                ptr = img2 + y * WIDTH + x;
+                d1 += test_func(NULL, img1, ptr, WIDTH, 8);
+            }
+        }
+    }
+    emms_c();
+    dummy = d1; /* avoid optimization */
+    ti = av_gettime_relative() - ti;
+
+    printf("  %0.0f kop/s\n",
+           (double)NB_ITS * (WIDTH - 16) * (HEIGHT - 16) /
+           (double)(ti / 1000.0));
+}
+
+
+int main(int argc, char **argv)
+{
+    AVCodecContext *ctx;
+    int c;
+    MECmpContext cctx, mmxctx;
+    int flags[2] = { AV_CPU_FLAG_MMX, AV_CPU_FLAG_MMXEXT };
+    int flags_size = HAVE_MMXEXT ? 2 : 1;
+
+    if (argc > 1) {
+        help();
+        return 1;
+    }
+
+    printf("ffmpeg motion test\n");
+
+    ctx = avcodec_alloc_context3(NULL);
+    ctx->flags |= CODEC_FLAG_BITEXACT;
+    av_force_cpu_flags(0);
+    memset(&cctx, 0, sizeof(cctx));
+    ff_me_cmp_init(&cctx, ctx);
+    for (c = 0; c < flags_size; c++) {
+        int x;
+        av_force_cpu_flags(flags[c]);
+        memset(&mmxctx, 0, sizeof(mmxctx));
+        ff_me_cmp_init(&mmxctx, ctx);
+
+        for (x = 0; x < 2; x++) {
+            printf("%s for %dx%d pixels\n", c ? "mmx2" : "mmx",
+                   x ? 8 : 16, x ? 8 : 16);
+            test_motion("mmx",     mmxctx.pix_abs[x][0], cctx.pix_abs[x][0]);
+            test_motion("mmx_x2",  mmxctx.pix_abs[x][1], cctx.pix_abs[x][1]);
+            test_motion("mmx_y2",  mmxctx.pix_abs[x][2], cctx.pix_abs[x][2]);
+            test_motion("mmx_xy2", mmxctx.pix_abs[x][3], cctx.pix_abs[x][3]);
+        }
+    }
+    av_free(ctx);
+
+    return 0;
+}
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 26cde77..0eeb2aa 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -5,20 +5,20 @@
  *
  * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -116,7 +116,7 @@ static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, co
     uint8_t * const * const src= c->src[src_index];
     int d;
     //FIXME check chroma 4mv, (no crashes ...)
-        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
+        av_assert2(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
         if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
             const int time_pp= s->pp_time;
             const int time_pb= s->pb_time;
@@ -158,14 +158,14 @@ static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, co
                     c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                     c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
                 }else{
-                    assert((fx>>1) + 16*s->mb_x >= -16);
-                    assert((fy>>1) + 16*s->mb_y >= -16);
-                    assert((fx>>1) + 16*s->mb_x <= s->width);
-                    assert((fy>>1) + 16*s->mb_y <= s->height);
-                    assert((bx>>1) + 16*s->mb_x >= -16);
-                    assert((by>>1) + 16*s->mb_y >= -16);
-                    assert((bx>>1) + 16*s->mb_x <= s->width);
-                    assert((by>>1) + 16*s->mb_y <= s->height);
+                    av_assert2((fx>>1) + 16*s->mb_x >= -16);
+                    av_assert2((fy>>1) + 16*s->mb_y >= -16);
+                    av_assert2((fx>>1) + 16*s->mb_x <= s->width);
+                    av_assert2((fy>>1) + 16*s->mb_y <= s->height);
+                    av_assert2((bx>>1) + 16*s->mb_x >= -16);
+                    av_assert2((by>>1) + 16*s->mb_y >= -16);
+                    av_assert2((bx>>1) + 16*s->mb_x <= s->width);
+                    av_assert2((by>>1) + 16*s->mb_y <= s->height);
 
                     c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
                     c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
@@ -303,13 +303,12 @@ int ff_init_me(MpegEncContext *s){
     int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
     int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
 
-    if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -ME_MAP_SIZE){
+    if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -FFMIN(ME_MAP_SIZE, MAX_SAB_SIZE)){
         av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
         return -1;
     }
-    if (s->me_method != ME_ZERO &&
-        s->me_method != ME_EPZS &&
-        s->me_method != ME_X1) {
+    //special case of snow is needed because snow uses its own iterative ME code
+    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1 && s->avctx->codec_id != AV_CODEC_ID_SNOW){
         av_log(s->avctx, AV_LOG_ERROR, "me_method is only allowed to be set to zero and epzs; for hex,umh,full and others see dia_size\n");
         return -1;
     }
@@ -364,12 +363,14 @@ int ff_init_me(MpegEncContext *s){
     /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
      * not have yet, and even if we had, the motion estimation code
      * does not expect it. */
-    if ((c->avctx->me_cmp & FF_CMP_CHROMA) /* && !s->mecc.me_cmp[2] */)
-        s->mecc.me_cmp[2] = zero_cmp;
-    if ((c->avctx->me_sub_cmp & FF_CMP_CHROMA) && !s->mecc.me_sub_cmp[2])
-        s->mecc.me_sub_cmp[2] = zero_cmp;
-    c->hpel_put[2][0]= c->hpel_put[2][1]=
-    c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
+    if (s->codec_id != AV_CODEC_ID_SNOW) {
+        if ((c->avctx->me_cmp & FF_CMP_CHROMA) /* && !s->mecc.me_cmp[2] */)
+            s->mecc.me_cmp[2] = zero_cmp;
+        if ((c->avctx->me_sub_cmp & FF_CMP_CHROMA) && !s->mecc.me_sub_cmp[2])
+            s->mecc.me_sub_cmp[2] = zero_cmp;
+        c->hpel_put[2][0]= c->hpel_put[2][1]=
+        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
+    }
 
     if(s->codec_id == AV_CODEC_ID_H261){
         c->sub_motion_search= no_sub_motion_search;
@@ -395,10 +396,9 @@ static int sad_hpel_motion_search(MpegEncContext * s,
     int mx, my, dminh;
     uint8_t *pix, *ptr;
     int stride= c->stride;
-    const int flags= c->sub_flags;
     LOAD_COMMON
 
-    assert(flags == 0);
+    av_assert2(c->sub_flags == 0);
 
     if(c->skip){
         *mx_ptr = 0;
@@ -522,6 +522,7 @@ static inline void get_limits(MpegEncContext *s, int x, int y)
 {
     MotionEstContext * const c= &s->me;
     int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
+    int max_range = MAX_MV >> (1 + !!(c->flags&FLAG_QPEL));
 /*
     if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
     else                   c->range= 16;
@@ -529,8 +530,8 @@ static inline void get_limits(MpegEncContext *s, int x, int y)
     if (s->unrestricted_mv) {
         c->xmin = - x - 16;
         c->ymin = - y - 16;
-        c->xmax = - x + s->mb_width *16;
-        c->ymax = - y + s->mb_height*16;
+        c->xmax = - x + s->width;
+        c->ymax = - y + s->height;
     } else if (s->out_format == FMT_H261){
         // Search range of H261 is different from other codec standards
         c->xmin = (x > 15) ? - 15 : 0;
@@ -543,6 +544,8 @@ static inline void get_limits(MpegEncContext *s, int x, int y)
         c->xmax = - x + s->mb_width *16 - 16;
         c->ymax = - y + s->mb_height*16 - 16;
     }
+    if(!range || range > max_range)
+        range = max_range;
     if(range){
         c->xmin = FFMAX(c->xmin,-range);
         c->xmax = FFMIN(c->xmax, range);
@@ -569,10 +572,11 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
     const int h=8;
     int block;
     int P[10][2];
-    int dmin_sum=0, mx4_sum=0, my4_sum=0;
+    int dmin_sum=0, mx4_sum=0, my4_sum=0, i;
     int same=1;
     const int stride= c->stride;
     uint8_t *mv_penalty= c->current_mv_penalty;
+    int saftey_cliping= s->unrestricted_mv && (s->width&15) && (s->height&15);
 
     init_mv4_ref(c);
 
@@ -584,6 +588,11 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
         const int mot_stride = s->b8_stride;
         const int mot_xy = s->block_index[block];
 
+        if(saftey_cliping){
+            c->xmax = - 16*s->mb_x + s->width  - 8*(block &1);
+            c->ymax = - 16*s->mb_y + s->height - 8*(block>>1);
+        }
+
         P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
         P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
 
@@ -611,6 +620,15 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
         }
         P_MV1[0]= mx;
         P_MV1[1]= my;
+        if(saftey_cliping)
+            for(i=1; i<10; i++){
+                if (s->first_slice_line && block<2 && i>1 && i<9)
+                    continue;
+                if (i>4 && i<9)
+                    continue;
+                if(P[i][0] > (c->xmax<<shift)) P[i][0]= (c->xmax<<shift);
+                if(P[i][1] > (c->ymax<<shift)) P[i][1]= (c->ymax<<shift);
+            }
 
         dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
 
@@ -745,8 +763,8 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
             int16_t (*mv_table)[2]= mv_tables[block][field_select];
 
             if(user_field_select){
-                assert(field_select==0 || field_select==1);
-                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
+                av_assert1(field_select==0 || field_select==1);
+                av_assert1(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
                 if(field_select_tables[block][xy] != field_select)
                     continue;
             }
@@ -843,6 +861,10 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
         return lambda>>FF_LAMBDA_SHIFT;
     case FF_CMP_DCT:
         return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
+    case FF_CMP_W53:
+        return (4*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_W97:
+        return (2*lambda)>>(FF_LAMBDA_SHIFT);
     case FF_CMP_SATD:
     case FF_CMP_DCT264:
         return (2*lambda)>>FF_LAMBDA_SHIFT;
@@ -871,9 +893,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
 
     init_ref(c, s->new_picture.f->data, s->last_picture.f->data, NULL, 16*mb_x, 16*mb_y, 0);
 
-    assert(s->quarter_sample==0 || s->quarter_sample==1);
-    assert(s->linesize == c->stride);
-    assert(s->uvlinesize == c->uvstride);
+    av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
+    av_assert0(s->linesize == c->stride);
+    av_assert0(s->uvlinesize == c->uvstride);
 
     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
@@ -1071,7 +1093,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
     const int xy= mb_x + mb_y*s->mb_stride;
     init_ref(c, s->new_picture.f->data, s->last_picture.f->data, NULL, 16*mb_x, 16*mb_y, 0);
 
-    assert(s->quarter_sample==0 || s->quarter_sample==1);
+    av_assert0(s->quarter_sample==0 || s->quarter_sample==1);
 
     c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
     c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
@@ -1449,7 +1471,7 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
         if(s->mv_type == MV_TYPE_16X16) break;
     }
 
-    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
+    av_assert2(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
 
     if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
         s->b_direct_mv_table[mot_xy][0]= 0;
@@ -1666,12 +1688,12 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
     MotionEstContext * const c= &s->me;
     const int f_code= s->f_code;
     int y, range;
-    assert(s->pict_type==AV_PICTURE_TYPE_P);
+    av_assert0(s->pict_type==AV_PICTURE_TYPE_P);
 
     range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
 
-    assert(range <= 16 || !s->msmpeg4_version);
-    assert(range <=256 || !(s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
+    av_assert0(range <= 16 || !s->msmpeg4_version);
+    av_assert0(range <=256 || !(s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
 
     if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
 
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index 01936c6..ae2cbde 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -2,20 +2,20 @@
  * Motion estimation
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -91,19 +91,18 @@ static int hpel_motion_search(MpegEncContext * s,
         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
                      + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
 
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
         unsigned key;
         unsigned map_generation= c->map_generation;
-#ifndef NDEBUG
-        uint32_t *map= c->map;
-#endif
         key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
-        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
+        av_assert2(c->map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
         key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
-        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
+        av_assert2(c->map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
         key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
-        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
+        av_assert2(c->map[(index+1)&(ME_MAP_SIZE-1)] == key);
         key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
-        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
+        av_assert2(c->map[(index-1)&(ME_MAP_SIZE-1)] == key);
+#endif
         if(t<=b){
             CHECK_HALF_MV(0, 1, mx  ,my-1)
             if(l<=r){
@@ -143,7 +142,7 @@ static int hpel_motion_search(MpegEncContext * s,
             }
             CHECK_HALF_MV(0, 1, mx  , my)
         }
-        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
+        av_assert2(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
     }
 
     *mx_ptr = bx;
@@ -181,9 +180,6 @@ static inline int get_mb_score(MpegEncContext *s, int mx, int my,
     cmp_sub        = s->mecc.mb_cmp[size];
     chroma_cmp_sub = s->mecc.mb_cmp[size + 1];
 
-//    assert(!c->skip);
-//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
-
     d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
     //FIXME check cbp before adding penalty for (0,0) vector
     if(add_rate && (mx || my || size>0))
@@ -310,11 +306,11 @@ static int qpel_motion_search(MpegEncContext * s,
 
             cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
 
-            assert(16*cx2 + 4*cx + 32*c == 32*r);
-            assert(16*cx2 - 4*cx + 32*c == 32*l);
-            assert(16*cy2 + 4*cy + 32*c == 32*b);
-            assert(16*cy2 - 4*cy + 32*c == 32*t);
-            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
+            av_assert2(16*cx2 + 4*cx + 32*c == 32*r);
+            av_assert2(16*cx2 - 4*cx + 32*c == 32*l);
+            av_assert2(16*cy2 + 4*cy + 32*c == 32*b);
+            av_assert2(16*cy2 - 4*cy + 32*c == 32*t);
+            av_assert2(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
 
             for(ny= -3; ny <= 3; ny++){
                 for(nx= -3; nx <= 3; nx++){
@@ -347,7 +343,7 @@ static int qpel_motion_search(MpegEncContext * s,
             CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
         }
 
-        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
+        av_assert2(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
 
         *mx_ptr = bx;
         *my_ptr = by;
@@ -364,10 +360,10 @@ static int qpel_motion_search(MpegEncContext * s,
 {\
     const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
-    assert((x) >= xmin);\
-    assert((x) <= xmax);\
-    assert((y) >= ymin);\
-    assert((y) <= ymax);\
+    av_assert2((x) >= xmin);\
+    av_assert2((x) <= xmax);\
+    av_assert2((y) >= ymin);\
+    av_assert2((y) <= ymax);\
     if(map[index]!=key){\
         d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
         map[index]= key;\
@@ -405,10 +401,10 @@ static int qpel_motion_search(MpegEncContext * s,
 }
 
 #define check(x,y,S,v)\
-if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
-if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
-if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
-if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
+if( (x)<(xmin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
+if( (x)>(xmax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
+if( (y)<(ymin<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
+if( (y)>(ymax<<(S)) ) av_log(NULL, AV_LOG_ERROR, "%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
 
 #define LOAD_COMMON2\
     uint32_t *map= c->map;\
@@ -693,6 +689,8 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
     LOAD_COMMON2
     unsigned map_generation = c->map_generation;
 
+    av_assert1(minima_count <= MAX_SAB_SIZE);
+
     cmpf        = s->mecc.me_cmp[size];
     chroma_cmpf = s->mecc.me_cmp[size + 1];
 
@@ -889,7 +887,7 @@ static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int
 
     map_generation= update_map_generation(c);
 
-    assert(cmpf);
+    av_assert2(cmpf);
     dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
     map[0]= map_generation;
     score_map[0]= dmin;
diff --git a/libavcodec/motionpixels.c b/libavcodec/motionpixels.c
index da2727f..19da10a 100644
--- a/libavcodec/motionpixels.c
+++ b/libavcodec/motionpixels.c
@@ -2,20 +2,20 @@
  * Motion Pixels Video Decoder
  * Copyright (c) 2008 Gregory Montoir (cyx@users.sourceforge.net)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -69,13 +69,24 @@ static av_cold int mp_decode_init(AVCodecContext *avctx)
     int w4 = (avctx->width  + 3) & ~3;
     int h4 = (avctx->height + 3) & ~3;
 
+    if(avctx->extradata_size < 2){
+        av_log(avctx, AV_LOG_ERROR, "extradata too small\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     motionpixels_tableinit();
     mp->avctx = avctx;
     ff_bswapdsp_init(&mp->bdsp);
-    mp->changes_map = av_mallocz(avctx->width * h4);
+    mp->changes_map = av_mallocz_array(avctx->width, h4);
     mp->offset_bits_len = av_log2(avctx->width * avctx->height) + 1;
-    mp->vpt = av_mallocz(avctx->height * sizeof(YuvPixel));
-    mp->hpt = av_mallocz(h4 * w4 / 16 * sizeof(YuvPixel));
+    mp->vpt = av_mallocz_array(avctx->height, sizeof(YuvPixel));
+    mp->hpt = av_mallocz_array(h4 / 4, w4 / 4 * sizeof(YuvPixel));
+    if (!mp->changes_map || !mp->vpt || !mp->hpt) {
+        av_freep(&mp->changes_map);
+        av_freep(&mp->vpt);
+        av_freep(&mp->hpt);
+        return AVERROR(ENOMEM);
+    }
     avctx->pix_fmt = AV_PIX_FMT_RGB555;
 
     mp->frame = av_frame_alloc();
@@ -116,38 +127,48 @@ static void mp_read_changes_map(MotionPixelsContext *mp, GetBitContext *gb, int
     }
 }
 
-static void mp_get_code(MotionPixelsContext *mp, GetBitContext *gb, int size, int code)
+static int mp_get_code(MotionPixelsContext *mp, GetBitContext *gb, int size, int code)
 {
     while (get_bits1(gb)) {
         ++size;
         if (size > mp->max_codes_bits) {
             av_log(mp->avctx, AV_LOG_ERROR, "invalid code size %d/%d\n", size, mp->max_codes_bits);
-            return;
+            return AVERROR_INVALIDDATA;
         }
         code <<= 1;
-        mp_get_code(mp, gb, size, code + 1);
+        if (mp_get_code(mp, gb, size, code + 1) < 0)
+            return AVERROR_INVALIDDATA;
     }
     if (mp->current_codes_count >= MAX_HUFF_CODES) {
         av_log(mp->avctx, AV_LOG_ERROR, "too many codes\n");
-        return;
+        return AVERROR_INVALIDDATA;
     }
+
     mp->codes[mp->current_codes_count  ].code = code;
     mp->codes[mp->current_codes_count++].size = size;
+    return 0;
 }
 
-static void mp_read_codes_table(MotionPixelsContext *mp, GetBitContext *gb)
+static int mp_read_codes_table(MotionPixelsContext *mp, GetBitContext *gb)
 {
     if (mp->codes_count == 1) {
         mp->codes[0].delta = get_bits(gb, 4);
     } else {
         int i;
+        int ret;
 
         mp->max_codes_bits = get_bits(gb, 4);
         for (i = 0; i < mp->codes_count; ++i)
             mp->codes[i].delta = get_bits(gb, 4);
         mp->current_codes_count = 0;
-        mp_get_code(mp, gb, 0, 0);
+        if ((ret = mp_get_code(mp, gb, 0, 0)) < 0)
+            return ret;
+        if (mp->current_codes_count < mp->codes_count) {
+            av_log(mp->avctx, AV_LOG_ERROR, "too few codes\n");
+            return AVERROR_INVALIDDATA;
+        }
    }
+   return 0;
 }
 
 static int mp_gradient(MotionPixelsContext *mp, int component, int v)
@@ -180,7 +201,6 @@ static int mp_get_vlc(MotionPixelsContext *mp, GetBitContext *gb)
     int i;
 
     i = (mp->codes_count == 1) ? 0 : get_vlc2(gb, mp->vlc.table, mp->max_codes_bits, 1);
-    i = FFMIN(i, FF_ARRAY_ELEMS(mp->codes) - 1);
     return mp->codes[i].delta;
 }
 
@@ -236,6 +256,8 @@ static void mp_decode_frame_helper(MotionPixelsContext *mp, GetBitContext *gb)
     YuvPixel p;
     int y, y0;
 
+    av_assert1(mp->changes_map[0]);
+
     for (y = 0; y < mp->avctx->height; ++y) {
         if (mp->changes_map[y * mp->avctx->width] != 0) {
             memset(mp->gradient_scale, 1, sizeof(mp->gradient_scale));
@@ -268,20 +290,17 @@ static int mp_decode_frame(AVCodecContext *avctx,
     GetBitContext gb;
     int i, count1, count2, sz, ret;
 
-    if ((ret = ff_reget_buffer(avctx, mp->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, mp->frame)) < 0)
         return ret;
-    }
 
     /* le32 bitstream msb first */
-    av_fast_malloc(&mp->bswapbuf, &mp->bswapbuf_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    av_fast_padded_malloc(&mp->bswapbuf, &mp->bswapbuf_size, buf_size);
     if (!mp->bswapbuf)
         return AVERROR(ENOMEM);
     mp->bdsp.bswap_buf((uint32_t *) mp->bswapbuf, (const uint32_t *) buf,
                        buf_size / 4);
     if (buf_size & 3)
         memcpy(mp->bswapbuf + (buf_size & ~3), buf + (buf_size & ~3), buf_size & 3);
-    memset(mp->bswapbuf + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
     init_get_bits(&gb, mp->bswapbuf, buf_size * 8);
 
     memset(mp->changes_map, 0, avctx->width * avctx->height);
@@ -300,7 +319,8 @@ static int mp_decode_frame(AVCodecContext *avctx,
         *(uint16_t *)mp->frame->data[0] = get_bits(&gb, 15);
         mp->changes_map[0] = 1;
     }
-    mp_read_codes_table(mp, &gb);
+    if (mp_read_codes_table(mp, &gb) < 0)
+        goto end;
 
     sz = get_bits(&gb, 18);
     if (avctx->extradata[0] != 5)
diff --git a/libavcodec/motionpixels_tablegen.c b/libavcodec/motionpixels_tablegen.c
index 410b76f..14b7b9b 100644
--- a/libavcodec/motionpixels_tablegen.c
+++ b/libavcodec/motionpixels_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/motionpixels_tablegen.h b/libavcodec/motionpixels_tablegen.h
index e6c32c7..8fb840f 100644
--- a/libavcodec/motionpixels_tablegen.h
+++ b/libavcodec/motionpixels_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,7 +30,7 @@ typedef struct YuvPixel {
 } YuvPixel;
 
 static int mp_yuv_to_rgb(int y, int v, int u, int clip_rgb) {
-    static const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
     int r, g, b;
 
     r = (1000 * y + 701 * v) / 1000;
diff --git a/libavcodec/movsub_bsf.c b/libavcodec/movsub_bsf.c
index 506750f..123c7a5 100644
--- a/libavcodec/movsub_bsf.c
+++ b/libavcodec/movsub_bsf.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Reimar Döffinger
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,9 +35,8 @@ static int text2movsub(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, co
 }
 
 AVBitStreamFilter ff_text2movsub_bsf={
-    "text2movsub",
-    0,
-    text2movsub,
+    .name   = "text2movsub",
+    .filter = text2movsub,
 };
 
 static int mov2textsub(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
@@ -51,7 +50,6 @@ static int mov2textsub(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, co
 }
 
 AVBitStreamFilter ff_mov2textsub_bsf={
-    "mov2textsub",
-    0,
-    mov2textsub,
+    .name   = "mov2textsub",
+    .filter = mov2textsub,
 };
diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
new file mode 100644
index 0000000..05ff53a
--- /dev/null
+++ b/libavcodec/movtextdec.c
@@ -0,0 +1,116 @@
+/*
+ * 3GPP TS 26.245 Timed Text decoder
+ * Copyright (c) 2012  Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "ass.h"
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/bprint.h"
+#include "libavutil/intreadwrite.h"
+
+static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end)
+{
+    while (text < text_end) {
+        switch (*text) {
+        case '\r':
+            break;
+        case '\n':
+            av_bprintf(buf, "\\N");
+            break;
+        default:
+            av_bprint_chars(buf, *text, 1);
+            break;
+        }
+        text++;
+    }
+
+    av_bprintf(buf, "\r\n");
+    return 0;
+}
+
+static int mov_text_init(AVCodecContext *avctx) {
+    /*
+     * TODO: Handle the default text style.
+     * NB: Most players ignore styles completely, with the result that
+     * it's very common to find files where the default style is broken
+     * and respecting it results in a worse experience than ignoring it.
+     */
+    return ff_ass_subtitle_header_default(avctx);
+}
+
+static int mov_text_decode_frame(AVCodecContext *avctx,
+                            void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVSubtitle *sub = data;
+    int ts_start, ts_end;
+    AVBPrint buf;
+    const char *ptr = avpkt->data;
+    const char *end;
+
+    if (!ptr || avpkt->size < 2)
+        return AVERROR_INVALIDDATA;
+
+    /*
+     * A packet of size two with value zero is an empty subtitle
+     * used to mark the end of the previous non-empty subtitle.
+     * We can just drop them here as we have duration information
+     * already. If the value is non-zero, then it's technically a
+     * bad packet.
+     */
+    if (avpkt->size == 2)
+        return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA;
+
+    /*
+     * The first two bytes of the packet are the length of the text string
+     * In complex cases, there are style descriptors appended to the string
+     * so we can't just assume the packet size is the string size.
+     */
+    end = ptr + FFMIN(2 + AV_RB16(ptr), avpkt->size);
+    ptr += 2;
+
+    ts_start = av_rescale_q(avpkt->pts,
+                            avctx->time_base,
+                            (AVRational){1,100});
+    ts_end   = av_rescale_q(avpkt->pts + avpkt->duration,
+                            avctx->time_base,
+                            (AVRational){1,100});
+
+    // Note that the spec recommends lines be no longer than 2048 characters.
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    text_to_ass(&buf, ptr, end);
+
+    if (!av_bprint_is_complete(&buf))
+        return AVERROR(ENOMEM);
+
+    ff_ass_add_rect(sub, buf.str, ts_start, ts_end-ts_start, 0);
+    *got_sub_ptr = sub->num_rects > 0;
+    av_bprint_finalize(&buf, NULL);
+    return avpkt->size;
+}
+
+AVCodec ff_movtext_decoder = {
+    .name         = "mov_text",
+    .long_name    = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
+    .type         = AVMEDIA_TYPE_SUBTITLE,
+    .id           = AV_CODEC_ID_MOV_TEXT,
+    .init         = mov_text_init,
+    .decode       = mov_text_decode_frame,
+};
diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
new file mode 100644
index 0000000..5d91e94
--- /dev/null
+++ b/libavcodec/movtextenc.c
@@ -0,0 +1,165 @@
+/*
+ * 3GPP TS 26.245 Timed Text encoder
+ * Copyright (c) 2012  Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdarg.h>
+#include "avcodec.h"
+#include "libavutil/avassert.h"
+#include "libavutil/avstring.h"
+#include "libavutil/intreadwrite.h"
+#include "ass_split.h"
+#include "ass.h"
+
+typedef struct {
+    ASSSplitContext *ass_ctx;
+    char buffer[2048];
+    char *ptr;
+    char *end;
+} MovTextContext;
+
+
+static av_cold int mov_text_encode_init(AVCodecContext *avctx)
+{
+    /*
+     * For now, we'll use a fixed default style. When we add styling
+     * support, this will be generated from the ASS style.
+     */
+    static uint8_t text_sample_entry[] = {
+        0x00, 0x00, 0x00, 0x00, // uint32_t displayFlags
+        0x01,                   // int8_t horizontal-justification
+        0xFF,                   // int8_t vertical-justification
+        0x00, 0x00, 0x00, 0x00, // uint8_t background-color-rgba[4]
+        // BoxRecord {
+        0x00, 0x00,             // int16_t top
+        0x00, 0x00,             // int16_t left
+        0x00, 0x00,             // int16_t bottom
+        0x00, 0x00,             // int16_t right
+        // };
+        // StyleRecord {
+        0x00, 0x00,             // uint16_t startChar
+        0x00, 0x00,             // uint16_t endChar
+        0x00, 0x01,             // uint16_t font-ID
+        0x00,                   // uint8_t face-style-flags
+        0x12,                   // uint8_t font-size
+        0xFF, 0xFF, 0xFF, 0xFF, // uint8_t text-color-rgba[4]
+        // };
+        // FontTableBox {
+        0x00, 0x00, 0x00, 0x12, // uint32_t size
+        'f', 't', 'a', 'b',     // uint8_t name[4]
+        0x00, 0x01,             // uint16_t entry-count
+        // FontRecord {
+        0x00, 0x01,             // uint16_t font-ID
+        0x05,                   // uint8_t font-name-length
+        'S', 'e', 'r', 'i', 'f',// uint8_t font[font-name-length]
+        // };
+        // };
+    };
+
+    MovTextContext *s = avctx->priv_data;
+
+    avctx->extradata_size = sizeof text_sample_entry;
+    avctx->extradata = av_mallocz(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!avctx->extradata)
+        return AVERROR(ENOMEM);
+
+    memcpy(avctx->extradata, text_sample_entry, avctx->extradata_size);
+
+    s->ass_ctx = ff_ass_split(avctx->subtitle_header);
+    return s->ass_ctx ? 0 : AVERROR_INVALIDDATA;
+}
+
+static void mov_text_text_cb(void *priv, const char *text, int len)
+{
+    MovTextContext *s = priv;
+    av_assert0(s->end >= s->ptr);
+    av_strlcpy(s->ptr, text, FFMIN(s->end - s->ptr, len + 1));
+    s->ptr += FFMIN(s->end - s->ptr, len);
+}
+
+static void mov_text_new_line_cb(void *priv, int forced)
+{
+    MovTextContext *s = priv;
+    av_assert0(s->end >= s->ptr);
+    av_strlcpy(s->ptr, "\n", FFMIN(s->end - s->ptr, 2));
+    if (s->end > s->ptr)
+        s->ptr++;
+}
+
+static const ASSCodesCallbacks mov_text_callbacks = {
+    .text     = mov_text_text_cb,
+    .new_line = mov_text_new_line_cb,
+};
+
+static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
+                                 int bufsize, const AVSubtitle *sub)
+{
+    MovTextContext *s = avctx->priv_data;
+    ASSDialog *dialog;
+    int i, len, num;
+
+    s->ptr = s->buffer;
+    s->end = s->ptr + sizeof(s->buffer);
+
+    for (i = 0; i < sub->num_rects; i++) {
+
+        if (sub->rects[i]->type != SUBTITLE_ASS) {
+            av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
+            return AVERROR(ENOSYS);
+        }
+
+        dialog = ff_ass_split_dialog(s->ass_ctx, sub->rects[i]->ass, 0, &num);
+        for (; dialog && num--; dialog++) {
+            ff_ass_split_override_codes(&mov_text_callbacks, s, dialog->text);
+        }
+    }
+
+    if (s->ptr == s->buffer)
+        return 0;
+
+    AV_WB16(buf, strlen(s->buffer));
+    buf += 2;
+
+    len = av_strlcpy(buf, s->buffer, bufsize - 2);
+
+    if (len > bufsize-3) {
+        av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
+        return AVERROR(EINVAL);
+    }
+
+    return len + 2;
+}
+
+static int mov_text_encode_close(AVCodecContext *avctx)
+{
+    MovTextContext *s = avctx->priv_data;
+    ff_ass_split_free(s->ass_ctx);
+    return 0;
+}
+
+AVCodec ff_movtext_encoder = {
+    .name           = "mov_text",
+    .long_name      = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_MOV_TEXT,
+    .priv_data_size = sizeof(MovTextContext),
+    .init           = mov_text_encode_init,
+    .encode_sub     = mov_text_encode_frame,
+    .close          = mov_text_encode_close,
+};
diff --git a/libavcodec/mp3_header_decompress_bsf.c b/libavcodec/mp3_header_decompress_bsf.c
new file mode 100644
index 0000000..df45532
--- /dev/null
+++ b/libavcodec/mp3_header_decompress_bsf.c
@@ -0,0 +1,97 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#include "mpegaudiodecheader.h"
+#include "mpegaudiodata.h"
+
+
+static int mp3_header_decompress(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const char *args,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe){
+    uint32_t header;
+    int sample_rate= avctx->sample_rate;
+    int sample_rate_index=0;
+    int lsf, mpeg25, bitrate_index, frame_size;
+
+    header = AV_RB32(buf);
+    if(ff_mpa_check_header(header) >= 0){
+        *poutbuf= (uint8_t *) buf;
+        *poutbuf_size= buf_size;
+
+        return 0;
+    }
+
+    if(avctx->extradata_size != 15 || strcmp(avctx->extradata, "FFCMP3 0.0")){
+        av_log(avctx, AV_LOG_ERROR, "Extradata invalid %d\n", avctx->extradata_size);
+        return -1;
+    }
+
+    header= AV_RB32(avctx->extradata+11) & MP3_MASK;
+
+    lsf     = sample_rate < (24000+32000)/2;
+    mpeg25  = sample_rate < (12000+16000)/2;
+    sample_rate_index= (header>>10)&3;
+    sample_rate= avpriv_mpa_freq_tab[sample_rate_index] >> (lsf + mpeg25); //in case sample rate is a little off
+
+    for(bitrate_index=2; bitrate_index<30; bitrate_index++){
+        frame_size = avpriv_mpa_bitrate_tab[lsf][2][bitrate_index>>1];
+        frame_size = (frame_size * 144000) / (sample_rate << lsf) + (bitrate_index&1);
+        if(frame_size == buf_size + 4)
+            break;
+        if(frame_size == buf_size + 6)
+            break;
+    }
+    if(bitrate_index == 30){
+        av_log(avctx, AV_LOG_ERROR, "Could not find bitrate_index.\n");
+        return -1;
+    }
+
+    header |= (bitrate_index&1)<<9;
+    header |= (bitrate_index>>1)<<12;
+    header |= (frame_size == buf_size + 4)<<16; //FIXME actually set a correct crc instead of 0
+
+    *poutbuf_size= frame_size;
+    *poutbuf= av_malloc(frame_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    memcpy(*poutbuf + frame_size - buf_size, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+    if(avctx->channels==2){
+        uint8_t *p= *poutbuf + frame_size - buf_size;
+        if(lsf){
+            FFSWAP(int, p[1], p[2]);
+            header |= (p[1] & 0xC0)>>2;
+            p[1] &= 0x3F;
+        }else{
+            header |= p[1] & 0x30;
+            p[1] &= 0xCF;
+        }
+    }
+
+    AV_WB32(*poutbuf, header);
+
+    return 1;
+}
+
+AVBitStreamFilter ff_mp3_header_decompress_bsf={
+    .name   = "mp3decomp",
+    .filter = mp3_header_decompress,
+};
diff --git a/libavcodec/mpc.c b/libavcodec/mpc.c
index 763ea2c..7af30bd 100644
--- a/libavcodec/mpc.c
+++ b/libavcodec/mpc.c
@@ -2,20 +2,20 @@
  * Musepack decoder core
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -74,13 +74,13 @@ void ff_mpc_dequantize_and_synth(MPCContext * c, int maxband, int16_t **out,
         for(ch = 0; ch < 2; ch++){
             if(bands[i].res[ch]){
                 j = 0;
-                mul = mpc_CC[bands[i].res[ch] + 1] * mpc_SCF[bands[i].scf_idx[ch][0]+6];
+                mul = (mpc_CC+1)[bands[i].res[ch]] * mpc_SCF[bands[i].scf_idx[ch][0] & 0xFF];
                 for(; j < 12; j++)
                     c->sb_samples[ch][j][i] = mul * c->Q[ch][j + off];
-                mul = mpc_CC[bands[i].res[ch] + 1] * mpc_SCF[bands[i].scf_idx[ch][1]+6];
+                mul = (mpc_CC+1)[bands[i].res[ch]] * mpc_SCF[bands[i].scf_idx[ch][1] & 0xFF];
                 for(; j < 24; j++)
                     c->sb_samples[ch][j][i] = mul * c->Q[ch][j + off];
-                mul = mpc_CC[bands[i].res[ch] + 1] * mpc_SCF[bands[i].scf_idx[ch][2]+6];
+                mul = (mpc_CC+1)[bands[i].res[ch]] * mpc_SCF[bands[i].scf_idx[ch][2] & 0xFF];
                 for(; j < 36; j++)
                     c->sb_samples[ch][j][i] = mul * c->Q[ch][j + off];
             }
diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h
index cdf49c1..4cb85748 100644
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@@ -2,20 +2,20 @@
  * Musepack decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index 2185aec..0f1e34a 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -2,20 +2,20 @@
  * Musepack SV7 decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,10 +36,6 @@
 #include "mpc.h"
 #include "mpc7data.h"
 
-#define BANDS            32
-#define SAMPLES_PER_BAND 36
-#define MPC_FRAME_SIZE   (BANDS * SAMPLES_PER_BAND)
-
 static VLC scfi_vlc, dscf_vlc, hdr_vlc, quant_vlc[MPC7_QUANT_VLC_TABLES][2];
 
 static const uint16_t quant_offsets[MPC7_QUANT_VLC_TABLES*2 + 1] =
@@ -190,7 +186,7 @@ static int get_scale_idx(GetBitContext *gb, int ref)
     int t = get_vlc2(gb, dscf_vlc.table, MPC7_DSCF_BITS, 1) - 7;
     if (t == 8)
         return get_bits(gb, 6);
-    return av_clip_uintp2(ref + t, 7);
+    return ref + t;
 }
 
 static int mpc7_decode_frame(AVCodecContext * avctx, void *data,
@@ -226,11 +222,9 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data,
     buf_size  -= 4;
 
     /* get output buffer */
-    frame->nb_samples = last_frame ? c->lastframelen : MPC_FRAME_SIZE;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    frame->nb_samples = MPC_FRAME_SIZE;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     av_fast_padded_malloc(&c->bits, &c->buf_size, buf_size);
     if (!c->bits)
@@ -246,7 +240,11 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data,
             int t = 4;
             if(i) t = get_vlc2(&gb, hdr_vlc.table, MPC7_HDR_BITS, 1) - 5;
             if(t == 4) bands[i].res[ch] = get_bits(&gb, 4);
-            else bands[i].res[ch] = av_clip(bands[i-1].res[ch] + t, 0, 17);
+            else bands[i].res[ch] = bands[i-1].res[ch] + t;
+            if (bands[i].res[ch] < -1 || bands[i].res[ch] > 17) {
+                av_log(avctx, AV_LOG_ERROR, "subband index invalid\n");
+                return AVERROR_INVALIDDATA;
+            }
         }
 
         if(bands[i].res[0] || bands[i].res[1]){
@@ -293,6 +291,8 @@ static int mpc7_decode_frame(AVCodecContext * avctx, void *data,
             idx_to_quant(c, &gb, bands[i].res[ch], c->Q[ch] + off);
 
     ff_mpc_dequantize_and_synth(c, mb, (int16_t **)frame->extended_data, 2);
+    if(last_frame)
+        frame->nb_samples = c->lastframelen;
 
     bits_used = get_bits_count(&gb);
     bits_avail = buf_size * 8;
diff --git a/libavcodec/mpc7data.h b/libavcodec/mpc7data.h
index f205ffe..5609e8f 100644
--- a/libavcodec/mpc7data.h
+++ b/libavcodec/mpc7data.h
@@ -2,20 +2,20 @@
  * Musepack decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpc8.c b/libavcodec/mpc8.c
index ee05a93..29c65f9 100644
--- a/libavcodec/mpc8.c
+++ b/libavcodec/mpc8.c
@@ -2,20 +2,20 @@
  * Musepack SV8 decoder
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -126,6 +126,10 @@ static av_cold int mpc8_decode_init(AVCodecContext * avctx)
 
     skip_bits(&gb, 3);//sample rate
     c->maxbands = get_bits(&gb, 5) + 1;
+    if (c->maxbands >= BANDS) {
+        av_log(avctx,AV_LOG_ERROR, "maxbands %d too high\n", c->maxbands);
+        return AVERROR_INVALIDDATA;
+    }
     channels = get_bits(&gb, 4) + 1;
     if (channels > 2) {
         avpriv_request_sample(avctx, "Multichannel MPC SV8");
@@ -135,7 +139,8 @@ static av_cold int mpc8_decode_init(AVCodecContext * avctx)
     c->frames = 1 << (get_bits(&gb, 3) * 2);
 
     avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
-    avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
+    avctx->channel_layout = (channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
+    avctx->channels = channels;
 
     if(vlc_initialized) return 0;
     av_log(avctx, AV_LOG_DEBUG, "Initing VLC\n");
@@ -247,10 +252,8 @@ static int mpc8_decode_frame(AVCodecContext * avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = MPC_FRAME_SIZE;
-    if ((res = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((res = ff_get_buffer(avctx, frame, 0)) < 0)
         return res;
-    }
 
     keyframe = c->cur_frame == 0;
 
@@ -267,8 +270,11 @@ static int mpc8_decode_frame(AVCodecContext * avctx, void *data,
         maxband = c->last_max_band + get_vlc2(gb, band_vlc.table, MPC8_BANDS_BITS, 2);
         if(maxband > 32) maxband -= 33;
     }
-    if(maxband > c->maxbands + 1)
+
+    if(maxband > c->maxbands + 1) {
+        av_log(avctx, AV_LOG_ERROR, "maxband %d too large\n",maxband);
         return AVERROR_INVALIDDATA;
+    }
     c->last_max_band = maxband;
 
     /* read subband indexes */
diff --git a/libavcodec/mpc8data.h b/libavcodec/mpc8data.h
index 2940b30..22c2be4 100644
--- a/libavcodec/mpc8data.h
+++ b/libavcodec/mpc8data.h
@@ -2,20 +2,20 @@
  * Musepack SV8 decoder
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpc8huff.h b/libavcodec/mpc8huff.h
index 6005e21..8491037 100644
--- a/libavcodec/mpc8huff.h
+++ b/libavcodec/mpc8huff.h
@@ -2,20 +2,20 @@
  * Musepack SV8 decoder
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpcdata.h b/libavcodec/mpcdata.h
index 15724f3..64fb4ab 100644
--- a/libavcodec/mpcdata.h
+++ b/libavcodec/mpcdata.h
@@ -2,20 +2,20 @@
  * Musepack decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,9 +30,7 @@ static const float mpc_CC[18+1] = {
     4.0002, 2.0001, 1.0000
 };
 
-static const float mpc_SCF[128+6] = {
-    920.016296386718750000, 766.355773925781250000, 638.359558105468750000,
-    531.741149902343750000, 442.930114746093750000, 368.952209472656250000,
+static const float mpc_SCF[256] = {
     307.330047607421875000, 255.999984741210937500, 213.243041992187500000, 177.627334594726562500,
     147.960128784179687500, 123.247924804687500000, 102.663139343261718750, 85.516410827636718750,
     71.233520507812500000, 59.336143493652343750, 49.425861358642578125, 41.170787811279296875,
@@ -64,7 +62,39 @@ static const float mpc_SCF[128+6] = {
     0.000000396931966407, 0.000000330636652279, 0.000000275413924555, 0.000000229414467867,
     0.000000191097811353, 0.000000159180785886, 0.000000132594522029, 0.000000110448674207,
     0.000000092001613439, 0.000000076635565449, 0.000000063835940978, 0.000000053174105119,
-    0.000000044293003043, 0.000000036895215771, 0.000000030733001921, 0.000000025599996789
+    0.000000044293003043, 0.000000036895215771, 0.000000030733001921, 0.000000025599996789,
+    0.000000021324305018, 3689522167600.270019531250000000, 3073300627835.926757812500000000, 2560000000000.002929687500000000,
+    2132430501800.519042968750000000, 1776273376956.721923828125000000, 1479601378343.250244140625000000, 1232479339720.794189453125000000,
+    1026631459710.774291992187500000, 855164155779.391845703125000000, 712335206965.024780273437500000, 593361454233.829101562500000000,
+    494258618594.112609863281250000, 411707872682.763122558593750000, 342944697476.612365722656250000, 285666302081.983886718750000000,
+    237954506209.446411132812500000, 198211502766.368713378906250000, 165106349338.563323974609375000, 137530396629.095306396484375000,
+    114560161209.611633300781250000, 95426399240.062576293945312500, 79488345475.196502685546875000, 66212254855.064872741699218750,
+    55153528064.816276550292968750, 45941822471.611343383789062500, 38268649822.956413269042968750, 31877045369.216873168945312500,
+    26552962442.420688629150390625, 22118104306.789615631103515625, 18423953228.829509735107421875, 15346796808.164905548095703125,
+    12783585007.291271209716796875, 10648479137.463939666748046875, 8869977230.669750213623046875, 7388519530.061036109924316406,
+    6154493909.785535812377929688, 5126574428.270387649536132812, 4270337375.232155323028564453, 3557108465.595236301422119141,
+    2963002574.315670013427734375, 2468123854.056322574615478516, 2055899448.676229715347290039, 1712524489.450022459030151367,
+    1426499787.649837732315063477, 1188246741.404872417449951172, 989786560.561257958412170410, 824473067.192597866058349609,
+    686770123.591610312461853027, 572066234.090648531913757324, 476520111.962911486625671387, 396932039.637152194976806641,
+    330636714.243810534477233887, 275413990.026798009872436523, 229414528.498330980539321899, 191097866.455478429794311523,
+    159180827.835415601730346680, 132594551.788319095969200134, 110448697.892960876226425171, 92001629.793398514389991760,
+    76635578.744844585657119751, 63835955.327594503760337830, 53174116.504741288721561432, 44293010.914454914629459381,
+    36895221.676002673804759979, 30733006.278359245508909225, 25600000.000000011175870895, 21324305.018005173653364182,
+    17762733.769567202776670456, 14796013.783432489261031151, 12324793.397207930684089661, 10266314.597107734531164169,
+    8551641.557793911546468735, 7123352.069650243036448956, 5933614.542338287457823753, 4942586.185941123403608799,
+    4117078.726827629376202822, 3429446.974766122177243233, 2856663.020819837693125010, 2379545.062094463035464287,
+    1982115.027663686312735081, 1651063.493385632522404194, 1375303.966290952404960990, 1145601.612096115713939071,
+    954263.992400625254958868, 794883.454751964658498764, 662122.548550648498348892, 551535.280648162588477135,
+    459418.224716113239992410, 382686.498229563992936164, 318770.453692168579436839, 265529.624424206791445613,
+    221181.043067896069260314, 184239.532288295013131574, 153467.968081648985389620, 127835.850072912653558888,
+    106484.791374639346031472, 88699.772306697457679547, 73885.195300610314006917, 61544.939097855312866159,
+    51265.744282703839417081, 42703.373752321524079889, 35571.084655952341563534, 29630.025743156678800005,
+    24681.238540563208516687, 20558.994486762283486314, 17125.244894500214286381, 14264.997876498367986642,
+    11882.467414048716818797, 9897.865605612574654515, 8244.730671925974093028, 6867.701235916098994494,
+    5720.662340906482313585, 4765.201119629112326948, 3969.320396371519564127, 3306.367142438103201130,
+    2754.139900267978191550, 2294.145284983308101801, 1910.978664554782881169, 1591.808278354154936096,
+    1325.945517883190177599, 1104.486978929608085309, 920.016297933984674273, 766.355787448445425980,
+    638.359553275944676898, 531.741165047412550848, 442.930109144548907807, 368.952216760026544762,
 };
 
 #endif /* AVCODEC_MPCDATA_H */
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index be5227f..27d680f 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000, 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,7 +25,12 @@
  * MPEG-1/2 decoder
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
+
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/timecode.h"
+
 #include "internal.h"
 #include "avcodec.h"
 #include "mpegvideo.h"
@@ -33,6 +38,7 @@
 #include "mpeg12.h"
 #include "mpeg12data.h"
 #include "bytestream.h"
+#include "vdpau_internal.h"
 #include "thread.h"
 
 uint8_t ff_mpeg12_static_rl_table_store[2][2][2*MAX_RUN + MAX_LEVEL + 3];
@@ -194,7 +200,7 @@ int ff_mpeg1_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size,
 */
 
     for (i = 0; i < buf_size; i++) {
-        assert(pc->frame_start_found >= 0 && pc->frame_start_found <= 4);
+        av_assert1(pc->frame_start_found >= 0 && pc->frame_start_found <= 4);
         if (pc->frame_start_found & 1) {
             if (state == EXT_START_CODE && (buf[i] & 0xF0) != 0x80)
                 pc->frame_start_found--;
@@ -235,3 +241,4 @@ int ff_mpeg1_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size,
     pc->state = state;
     return END_NOT_FOUND;
 }
+
diff --git a/libavcodec/mpeg12.h b/libavcodec/mpeg12.h
index 9132dc3..b4ebd23 100644
--- a/libavcodec/mpeg12.h
+++ b/libavcodec/mpeg12.h
@@ -2,20 +2,20 @@
  * MPEG1/2 common code
  * Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpeg12data.c b/libavcodec/mpeg12data.c
index ccc3d2d..e8c4a5d 100644
--- a/libavcodec/mpeg12data.c
+++ b/libavcodec/mpeg12data.c
@@ -3,20 +3,20 @@
  * copyright (c) 2000,2001 Fabrice Bellard
  * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@
 
 #include "mpeg12data.h"
 
-const uint16_t ff_mpeg1_default_intra_matrix[64] = {
+const uint16_t ff_mpeg1_default_intra_matrix[256] = {
         8, 16, 19, 22, 26, 27, 29, 34,
         16, 16, 22, 24, 27, 29, 34, 37,
         19, 22, 26, 27, 29, 34, 34, 38,
@@ -325,6 +325,72 @@ const AVRational ff_mpeg12_frame_rate_tab[16] = {
     {    0,    0},
 };
 
+const AVRational ff_mpeg2_frame_rate_tab[] = {
+    {      1,     1},
+    {      2,     1},
+    {      3,     1},
+    {      4,     1},
+    {      5,     1},
+    {      6,     1},
+    {      8,     1},
+    {      9,     1},
+    {     10,     1},
+    {     12,     1},
+    {     15,     1},
+    {     16,     1},
+    {     18,     1},
+    {     20,     1},
+    {     24,     1},
+    {     25,     1},
+    {     30,     1},
+    {     32,     1},
+    {     36,     1},
+    {     40,     1},
+    {     45,     1},
+    {     48,     1},
+    {     50,     1},
+    {     60,     1},
+    {     72,     1},
+    {     75,     1},
+    {     80,     1},
+    {     90,     1},
+    {     96,     1},
+    {    100,     1},
+    {    120,     1},
+    {    150,     1},
+    {    180,     1},
+    {    200,     1},
+    {    240,     1},
+    {    750,  1001},
+    {    800,  1001},
+    {    960,  1001},
+    {   1000,  1001},
+    {   1200,  1001},
+    {   1250,  1001},
+    {   1500,  1001},
+    {   1600,  1001},
+    {   1875,  1001},
+    {   2000,  1001},
+    {   2400,  1001},
+    {   2500,  1001},
+    {   3000,  1001},
+    {   3750,  1001},
+    {   4000,  1001},
+    {   4800,  1001},
+    {   5000,  1001},
+    {   6000,  1001},
+    {   7500,  1001},
+    {   8000,  1001},
+    {  10000,  1001},
+    {  12000,  1001},
+    {  15000,  1001},
+    {  20000,  1001},
+    {  24000,  1001},
+    {  30000,  1001},
+    {  60000,  1001},
+    {      0,     0},
+};
+
 const float ff_mpeg1_aspect[16]={
     0.0000,
     1.0000,
diff --git a/libavcodec/mpeg12data.h b/libavcodec/mpeg12data.h
index 633a291..65b9485 100644
--- a/libavcodec/mpeg12data.h
+++ b/libavcodec/mpeg12data.h
@@ -3,20 +3,20 @@
  * copyright (c) 2000,2001 Fabrice Bellard
  * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,7 +32,7 @@
 #include "libavutil/rational.h"
 #include "rl.h"
 
-extern const uint16_t ff_mpeg1_default_intra_matrix[64];
+extern const uint16_t ff_mpeg1_default_intra_matrix[];
 extern const uint16_t ff_mpeg1_default_non_intra_matrix[64];
 
 extern const uint16_t ff_mpeg12_vlc_dc_lum_code[12];
@@ -49,6 +49,7 @@ extern const uint8_t ff_mpeg12_mbPatTable[64][2];
 extern const uint8_t ff_mpeg12_mbMotionVectorTable[17][2];
 
 extern const AVRational ff_mpeg12_frame_rate_tab[];
+extern const AVRational ff_mpeg2_frame_rate_tab[];
 
 extern const float ff_mpeg1_aspect[16];
 extern const AVRational ff_mpeg2_aspect[16];
diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index a181fcc..85bb7b0 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -1,22 +1,22 @@
 /*
  * MPEG-1/2 decoder
  * Copyright (c) 2000, 2001 Fabrice Bellard
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2002-2013 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,6 +25,7 @@
  * MPEG-1/2 decoder
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
 #include <inttypes.h>
 
 #include "libavutil/attributes.h"
@@ -43,6 +44,7 @@
 #include "mpegvideo.h"
 #include "thread.h"
 #include "version.h"
+#include "vdpau_internal.h"
 #include "xvmc_internal.h"
 
 typedef struct Mpeg1Context {
@@ -61,7 +63,7 @@ typedef struct Mpeg1Context {
     int save_width, save_height, save_progressive_seq;
     AVRational frame_rate_ext;  /* MPEG-2 specific framerate modificator */
     int sync;                   /* Did we reach a sync point like a GOP/SEQ/KEYFrame? */
-    int closed_gop;             /* GOP is closed */
+    int tmpgexs;
     int first_slice;
     int extradata_decoded;
 } Mpeg1Context;
@@ -158,15 +160,16 @@ static inline int mpeg1_decode_block_intra(MpegEncContext *s,
     i = 0;
     {
         OPEN_READER(re, &s->gb);
+        UPDATE_CACHE(re, &s->gb);
+        if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+            goto end;
+
         /* now quantify & encode AC coefficients */
         for (;;) {
-            UPDATE_CACHE(re, &s->gb);
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0],
                        TEX_VLC_BITS, 2, 0);
 
-            if (level == 127) {
-                break;
-            } else if (level != 0) {
+            if (level != 0) {
                 i += run;
                 check_scantable_index(s, i);
                 j = scantable[i];
@@ -174,7 +177,7 @@ static inline int mpeg1_decode_block_intra(MpegEncContext *s,
                 level = (level - 1) | 1;
                 level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -
                         SHOW_SBITS(re, &s->gb, 1);
-                LAST_SKIP_BITS(re, &s->gb, 1);
+                SKIP_BITS(re, &s->gb, 1);
             } else {
                 /* escape */
                 run = SHOW_UBITS(re, &s->gb, 6) + 1;
@@ -184,10 +187,10 @@ static inline int mpeg1_decode_block_intra(MpegEncContext *s,
                 SKIP_BITS(re, &s->gb, 8);
                 if (level == -128) {
                     level = SHOW_UBITS(re, &s->gb, 8) - 256;
-                    LAST_SKIP_BITS(re, &s->gb, 8);
+                    SKIP_BITS(re, &s->gb, 8);
                 } else if (level == 0) {
                     level = SHOW_UBITS(re, &s->gb, 8);
-                    LAST_SKIP_BITS(re, &s->gb, 8);
+                    SKIP_BITS(re, &s->gb, 8);
                 }
                 i += run;
                 check_scantable_index(s, i);
@@ -204,7 +207,13 @@ static inline int mpeg1_decode_block_intra(MpegEncContext *s,
             }
 
             block[j] = level;
+            if (((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
+               break;
+
+            UPDATE_CACHE(re, &s->gb);
         }
+end:
+        LAST_SKIP_BITS(re, &s->gb, 2);
         CLOSE_READER(re, &s->gb);
     }
     s->block_last_index[n] = i;
@@ -296,6 +305,11 @@ end:
     return 0;
 }
 
+/**
+ * Note: this function can read out of range and crash for corrupt streams.
+ * Changing this would eat up any speed benefits it has.
+ * Do not use "fast" flag if you need the code to be robust.
+ */
 static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s,
                                                 int16_t *block, int n)
 {
@@ -458,6 +472,11 @@ end:
     return 0;
 }
 
+/**
+ * Note: this function can read out of range and crash for corrupt streams.
+ * Changing this would eat up any speed benefits it has.
+ * Do not use "fast" flag if you need the code to be robust.
+ */
 static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
                                                     int16_t *block, int n)
 {
@@ -487,7 +506,6 @@ static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
 
         if (level != 0) {
             i += run;
-            check_scantable_index(s, i);
             j = scantable[i];
             level = ((level * 2 + 1) * qscale) >> 1;
             level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -
@@ -502,7 +520,6 @@ static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
             SKIP_BITS(re, &s->gb, 12);
 
             i += run;
-            check_scantable_index(s, i);
             j = scantable[i];
             if (level < 0) {
                 level = ((-level * 2 + 1) * qscale) >> 1;
@@ -513,8 +530,9 @@ static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
         }
 
         block[j] = level;
-        if (((int32_t) GET_CACHE(re, &s->gb)) <= (int32_t) 0xBFFFFFFF)
+        if (((int32_t) GET_CACHE(re, &s->gb)) <= (int32_t) 0xBFFFFFFF || i > 63)
             break;
+
         UPDATE_CACHE(re, &s->gb);
     }
 end:
@@ -605,6 +623,11 @@ static inline int mpeg2_decode_block_intra(MpegEncContext *s,
     return 0;
 }
 
+/**
+ * Note: this function can read out of range and crash for corrupt streams.
+ * Changing this would eat up any speed benefits it has.
+ * Do not use "fast" flag if you need the code to be robust.
+ */
 static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
                                                 int16_t *block, int n)
 {
@@ -644,11 +667,10 @@ static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0],
                        TEX_VLC_BITS, 2, 0);
 
-            if (level == 127) {
+            if (level >= 64 || i > 63) {
                 break;
             } else if (level != 0) {
                 i += run;
-                check_scantable_index(s, i);
                 j = scantable[i];
                 level = (level * qscale * quant_matrix[j]) >> 4;
                 level = (level ^ SHOW_SBITS(re, &s->gb, 1)) -
@@ -662,7 +684,6 @@ static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
                 level = SHOW_SBITS(re, &s->gb, 12);
                 SKIP_BITS(re, &s->gb, 12);
                 i += run;
-                check_scantable_index(s, i);
                 j = scantable[i];
                 if (level < 0) {
                     level = (-level * qscale * quant_matrix[j]) >> 4;
@@ -701,6 +722,7 @@ static inline int get_qscale(MpegEncContext *s)
         return qscale << 1;
 }
 
+
 /* motion type (for MPEG-2) */
 #define MT_FIELD 1
 #define MT_FRAME 2
@@ -714,7 +736,7 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
 
     av_dlog(s->avctx, "decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y);
 
-    assert(s->mb_skipped == 0);
+    av_assert2(s->mb_skipped == 0);
 
     if (s->mb_skip_run-- != 0) {
         if (s->pict_type == AV_PICTURE_TYPE_P) {
@@ -729,11 +751,12 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
             else
                 // FIXME not sure if this is allowed in MPEG at all
                 mb_type = s->current_picture.mb_type[s->mb_width + (s->mb_y - 1) * s->mb_stride - 1];
-            if (IS_INTRA(mb_type))
+            if (IS_INTRA(mb_type)) {
+                av_log(s->avctx, AV_LOG_ERROR, "skip with previntra\n");
                 return -1;
+            }
             s->current_picture.mb_type[s->mb_x + s->mb_y * s->mb_stride] =
                 mb_type | MB_TYPE_SKIP;
-//            assert(s->current_picture.mb_type[s->mb_x + s->mb_y * s->mb_stride - 1] & (MB_TYPE_16x16 | MB_TYPE_16x8));
 
             if ((s->mv[0][0][0] | s->mv[0][0][1] | s->mv[1][0][0] | s->mv[1][0][1]) == 0)
                 s->mb_skipped = 1;
@@ -813,13 +836,9 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
             memset(s->last_mv, 0, sizeof(s->last_mv));
         }
         s->mb_intra = 1;
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
         // if 1, we memcpy blocks in xvmcvideo
-        if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration > 1)
+        if ((CONFIG_MPEG1_XVMC_HWACCEL || CONFIG_MPEG2_XVMC_HWACCEL) && s->pack_pblocks)
             ff_xvmc_pack_pblocks(s, -1); // inter are always full blocks
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
 
         if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
             if (s->flags2 & CODEC_FLAG2_FAST) {
@@ -837,11 +856,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
     } else {
         if (mb_type & MB_TYPE_ZERO_MV) {
-            assert(mb_type & MB_TYPE_CBP);
+            av_assert2(mb_type & MB_TYPE_CBP);
 
             s->mv_dir = MV_DIR_FORWARD;
             if (s->picture_structure == PICT_FRAME) {
-                if (!s->frame_pred_frame_dct)
+                if (s->picture_structure == PICT_FRAME
+                    && !s->frame_pred_frame_dct)
                     s->interlaced_dct = get_bits1(&s->gb);
                 s->mv_type = MV_TYPE_16X16;
             } else {
@@ -860,10 +880,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
             s->mv[0][0][0]      = 0;
             s->mv[0][0][1]      = 0;
         } else {
-            assert(mb_type & MB_TYPE_L0L1);
+            av_assert2(mb_type & MB_TYPE_L0L1);
             // FIXME decide if MBs in field pictures are MB_TYPE_INTERLACED
             /* get additional motion vector type */
-            if (s->frame_pred_frame_dct) {
+            if (s->picture_structure == PICT_FRAME && s->frame_pred_frame_dct) {
                 motion_type = MT_FRAME;
             } else {
                 motion_type = get_bits(&s->gb, 2);
@@ -943,6 +963,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
                         }
                     }
                 } else {
+                    av_assert0(!s->progressive_sequence);
                     mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
                     for (i = 0; i < 2; i++) {
                         if (USES_LIST(mb_type, i)) {
@@ -959,6 +980,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 }
                 break;
             case MT_DMV:
+                if (s->progressive_sequence){
+                    av_log(s->avctx, AV_LOG_ERROR, "MT_DMV in progressive_sequence\n");
+                    return -1;
+                }
                 s->mv_type = MV_TYPE_DMV;
                 for (i = 0; i < 2; i++) {
                     if (USES_LIST(mb_type, i)) {
@@ -1027,17 +1052,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
             }
             if (cbp <= 0) {
                 av_log(s->avctx, AV_LOG_ERROR,
-                       "invalid cbp at %d %d\n", s->mb_x, s->mb_y);
+                       "invalid cbp %d at %d %d\n", cbp, s->mb_x, s->mb_y);
                 return -1;
             }
 
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
             // if 1, we memcpy blocks in xvmcvideo
-            if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration > 1)
+            if ((CONFIG_MPEG1_XVMC_HWACCEL || CONFIG_MPEG2_XVMC_HWACCEL) && s->pack_pblocks)
                 ff_xvmc_pack_pblocks(s, cbp);
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
 
             if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
                 if (s->flags2 & CODEC_FLAG2_FAST) {
@@ -1161,48 +1182,74 @@ static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm,
         matrix[new_perm[i]] = temp_matrix[old_perm[i]];
 }
 
-#if FF_API_XVMC
-static const enum AVPixelFormat pixfmt_xvmc_mpg2_420[] = {
-    AV_PIX_FMT_XVMC_MPEG2_IDCT,
-    AV_PIX_FMT_XVMC_MPEG2_MC,
+static const enum AVPixelFormat mpeg1_hwaccel_pixfmt_list_420[] = {
+#if CONFIG_MPEG1_XVMC_HWACCEL
+    AV_PIX_FMT_XVMC,
+#endif
+#if CONFIG_MPEG1_VDPAU_HWACCEL
+    AV_PIX_FMT_VDPAU_MPEG1,
+    AV_PIX_FMT_VDPAU,
+#endif
+    AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NONE
 };
-#endif /* FF_API_XVMC */
 
-static const enum AVPixelFormat mpeg12_hwaccel_pixfmt_list_420[] = {
+static const enum AVPixelFormat mpeg2_hwaccel_pixfmt_list_420[] = {
+#if CONFIG_MPEG2_XVMC_HWACCEL
+    AV_PIX_FMT_XVMC,
+#endif
+#if CONFIG_MPEG2_VDPAU_HWACCEL
+    AV_PIX_FMT_VDPAU_MPEG2,
+    AV_PIX_FMT_VDPAU,
+#endif
 #if CONFIG_MPEG2_DXVA2_HWACCEL
     AV_PIX_FMT_DXVA2_VLD,
 #endif
 #if CONFIG_MPEG2_VAAPI_HWACCEL
     AV_PIX_FMT_VAAPI_VLD,
 #endif
-#if CONFIG_MPEG1_VDPAU_HWACCEL | CONFIG_MPEG2_VDPAU_HWACCEL
-    AV_PIX_FMT_VDPAU,
-#endif
     AV_PIX_FMT_YUV420P,
     AV_PIX_FMT_NONE
 };
 
+static inline int uses_vdpau(AVCodecContext *avctx) {
+    return avctx->pix_fmt == AV_PIX_FMT_VDPAU_MPEG1 || avctx->pix_fmt == AV_PIX_FMT_VDPAU_MPEG2;
+}
+
 static enum AVPixelFormat mpeg_get_pixelformat(AVCodecContext *avctx)
 {
     Mpeg1Context *s1  = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
 
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (avctx->xvmc_acceleration)
-        return ff_get_format(avctx, pixfmt_xvmc_mpg2_420);
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
-
     if (s->chroma_format < 2)
-        return ff_get_format(avctx, mpeg12_hwaccel_pixfmt_list_420);
+        return ff_thread_get_format(avctx,
+                                avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO ?
+                                mpeg1_hwaccel_pixfmt_list_420 :
+                                mpeg2_hwaccel_pixfmt_list_420);
     else if (s->chroma_format == 2)
         return AV_PIX_FMT_YUV422P;
     else
         return AV_PIX_FMT_YUV444P;
 }
 
+static void setup_hwaccel_for_pixfmt(AVCodecContext *avctx)
+{
+    // until then pix_fmt may be changed right after codec init
+    if (avctx->hwaccel || uses_vdpau(avctx))
+        if (avctx->idct_algo == FF_IDCT_AUTO)
+            avctx->idct_algo = FF_IDCT_SIMPLE;
+
+    if (avctx->hwaccel && avctx->pix_fmt == AV_PIX_FMT_XVMC) {
+        Mpeg1Context *s1 = avctx->priv_data;
+        MpegEncContext *s = &s1->mpeg_enc_ctx;
+
+        s->pack_pblocks = 1;
+#if FF_API_XVMC
+        avctx->xvmc_acceleration = 2;
+#endif /* FF_API_XVMC */
+    }
+}
+
 /* Call this function when we know all parameters.
  * It may be called in different places for MPEG-1 and MPEG-2. */
 static int mpeg_decode_postinit(AVCodecContext *avctx)
@@ -1218,13 +1265,14 @@ static int mpeg_decode_postinit(AVCodecContext *avctx)
         s1->save_width           != s->width                ||
         s1->save_height          != s->height               ||
         s1->save_aspect_info     != s->aspect_ratio_info    ||
-        s1->save_progressive_seq != s->progressive_sequence ||
+        (s1->save_progressive_seq != s->progressive_sequence && FFALIGN(s->height, 16) != FFALIGN(s->height, 32)) ||
         0) {
         if (s1->mpeg_enc_ctx_allocated) {
             ParseContext pc = s->parse_context;
             s->parse_context.buffer = 0;
             ff_MPV_common_end(s);
             s->parse_context = pc;
+            s1->mpeg_enc_ctx_allocated = 0;
         }
 
         if ((s->width == 0) || (s->height == 0))
@@ -1234,7 +1282,12 @@ static int mpeg_decode_postinit(AVCodecContext *avctx)
         if (ret < 0)
             return ret;
 
-        avctx->bit_rate          = s->bit_rate;
+        if (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->bit_rate) {
+            avctx->rc_max_rate = s->bit_rate;
+        } else if (avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO && s->bit_rate &&
+                   (s->bit_rate != 0x3FFFF*400 || s->vbv_delay != 0xFFFF)) {
+            avctx->bit_rate = s->bit_rate;
+        }
         s1->save_aspect_info     = s->aspect_ratio_info;
         s1->save_width           = s->width;
         s1->save_height          = s->height;
@@ -1300,14 +1353,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx)
         ff_set_sar(s->avctx, s->avctx->sample_aspect_ratio);
 
         avctx->pix_fmt = mpeg_get_pixelformat(avctx);
-        // until then pix_fmt may be changed right after codec init
-#if FF_API_XVMC
-        if ((avctx->pix_fmt == AV_PIX_FMT_XVMC_MPEG2_IDCT ||
-             avctx->hwaccel) && avctx->idct_algo == FF_IDCT_AUTO)
-#else
-        if (avctx->hwaccel && avctx->idct_algo == FF_IDCT_AUTO)
-#endif /* FF_API_XVMC */
-            avctx->idct_algo = FF_IDCT_SIMPLE;
+        setup_hwaccel_for_pixfmt(avctx);
 
         /* Quantization matrices may need reordering
          * if DCT permutation is changed. */
@@ -1341,20 +1387,23 @@ static int mpeg1_decode_picture(AVCodecContext *avctx, const uint8_t *buf,
         return -1;
 
     vbv_delay = get_bits(&s->gb, 16);
+    s->vbv_delay = vbv_delay;
     if (s->pict_type == AV_PICTURE_TYPE_P ||
         s->pict_type == AV_PICTURE_TYPE_B) {
         s->full_pel[0] = get_bits1(&s->gb);
         f_code = get_bits(&s->gb, 3);
-        if (f_code == 0 && (avctx->err_recognition & AV_EF_BITSTREAM))
+        if (f_code == 0 && (avctx->err_recognition & (AV_EF_BITSTREAM|AV_EF_COMPLIANT)))
             return -1;
+        f_code += !f_code;
         s->mpeg_f_code[0][0] = f_code;
         s->mpeg_f_code[0][1] = f_code;
     }
     if (s->pict_type == AV_PICTURE_TYPE_B) {
         s->full_pel[1] = get_bits1(&s->gb);
         f_code = get_bits(&s->gb, 3);
-        if (f_code == 0 && (avctx->err_recognition & AV_EF_BITSTREAM))
+        if (f_code == 0 && (avctx->err_recognition & (AV_EF_BITSTREAM|AV_EF_COMPLIANT)))
             return -1;
+        f_code += !f_code;
         s->mpeg_f_code[1][0] = f_code;
         s->mpeg_f_code[1][1] = f_code;
     }
@@ -1402,8 +1451,8 @@ static void mpeg_decode_sequence_extension(Mpeg1Context *s1)
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(s->avctx, AV_LOG_DEBUG,
-               "profile: %d, level: %d vbv buffer: %d, bitrate:%d\n",
-               s->avctx->profile, s->avctx->level,
+               "profile: %d, level: %d ps: %d cf:%d vbv buffer: %d, bitrate:%d\n",
+               s->avctx->profile, s->avctx->level, s->progressive_sequence, s->chroma_format,
                s->avctx->rc_buffer_size, s->bit_rate);
 }
 
@@ -1478,7 +1527,7 @@ static int load_matrix(MpegEncContext *s, uint16_t matrix0[64],
             return -1;
         }
         if (intra && i == 0 && v != 8) {
-            av_log(s->avctx, AV_LOG_ERROR, "intra matrix invalid, ignoring\n");
+            av_log(s->avctx, AV_LOG_DEBUG, "intra matrix specifies invalid DC quantizer %d, ignoring\n", v);
             v = 8; // needed by pink.mpg / issue1046
         }
         matrix0[j] = v;
@@ -1524,6 +1573,11 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1)
         s->current_picture.f->pict_type = s->pict_type;
         s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
     }
+    s->mpeg_f_code[0][0] += !s->mpeg_f_code[0][0];
+    s->mpeg_f_code[0][1] += !s->mpeg_f_code[0][1];
+    s->mpeg_f_code[1][0] += !s->mpeg_f_code[1][0];
+    s->mpeg_f_code[1][1] += !s->mpeg_f_code[1][1];
+
     s->intra_dc_precision         = get_bits(&s->gb, 2);
     s->picture_structure          = get_bits(&s->gb, 2);
     s->top_field_first            = get_bits1(&s->gb);
@@ -1536,32 +1590,6 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1)
     s->chroma_420_type            = get_bits1(&s->gb);
     s->progressive_frame          = get_bits1(&s->gb);
 
-    if (s->progressive_sequence && !s->progressive_frame) {
-        s->progressive_frame = 1;
-        av_log(s->avctx, AV_LOG_ERROR,
-               "interlaced frame in progressive sequence, ignoring\n");
-    }
-
-    if (s->picture_structure == 0 ||
-        (s->progressive_frame && s->picture_structure != PICT_FRAME)) {
-        av_log(s->avctx, AV_LOG_ERROR,
-               "picture_structure %d invalid, ignoring\n",
-               s->picture_structure);
-        s->picture_structure = PICT_FRAME;
-    }
-
-    if (s->progressive_sequence && !s->frame_pred_frame_dct)
-        av_log(s->avctx, AV_LOG_WARNING, "invalid frame_pred_frame_dct\n");
-
-    if (s->picture_structure == PICT_FRAME) {
-        s->first_field = 0;
-        s->v_edge_pos  = 16 * s->mb_height;
-    } else {
-        s->first_field ^= 1;
-        s->v_edge_pos   = 8 * s->mb_height;
-        memset(s->mbskip_table, 0, s->mb_stride * s->mb_height);
-    }
-
     if (s->alternate_scan) {
         ff_init_scantable(s->idsp.idct_permutation, &s->inter_scantable, ff_alternate_vertical_scan);
         ff_init_scantable(s->idsp.idct_permutation, &s->intra_scantable, ff_alternate_vertical_scan);
@@ -1675,16 +1703,6 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
             return -1;
     }
 
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-// MPV_frame_start will call this function too,
-// but we need to call it on every field
-    if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
-        if (ff_xvmc_field_start(s, avctx) < 0)
-            return -1;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
-
     return 0;
 }
 
@@ -1701,14 +1719,17 @@ static int mpeg_decode_slice(MpegEncContext *s, int mb_y,
                              const uint8_t **buf, int buf_size)
 {
     AVCodecContext *avctx = s->avctx;
+    const int lowres      = s->avctx->lowres;
     const int field_pic   = s->picture_structure != PICT_FRAME;
 
     s->resync_mb_x =
     s->resync_mb_y = -1;
 
-    assert(mb_y < s->mb_height);
+    av_assert0(mb_y < s->mb_height);
 
     init_get_bits(&s->gb, *buf, buf_size * 8);
+    if (s->codec_id != AV_CODEC_ID_MPEG1VIDEO && s->mb_height > 2800/16)
+        skip_bits(&s->gb, 3);
 
     ff_mpeg1_clean_buffers(s);
     s->interlaced_dct = 0;
@@ -1721,8 +1742,8 @@ static int mpeg_decode_slice(MpegEncContext *s, int mb_y,
     }
 
     /* extra slice info */
-    while (get_bits1(&s->gb) != 0)
-        skip_bits(&s->gb, 8);
+    if (skip_1stop_8data_bits(&s->gb) < 0)
+        return AVERROR_INVALIDDATA;
 
     s->mb_x = 0;
 
@@ -1752,7 +1773,7 @@ static int mpeg_decode_slice(MpegEncContext *s, int mb_y,
         return -1;
     }
 
-    if (avctx->hwaccel) {
+    if (avctx->hwaccel && avctx->hwaccel->decode_slice) {
         const uint8_t *buf_end, *buf_start = *buf - 4; /* include start_code */
         int start_code = -1;
         buf_end = avpriv_find_start_code(buf_start + 2, *buf + buf_size, &start_code);
@@ -1792,13 +1813,9 @@ static int mpeg_decode_slice(MpegEncContext *s, int mb_y,
     }
 
     for (;;) {
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
         // If 1, we memcpy blocks in xvmcvideo.
-        if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration > 1)
+        if ((CONFIG_MPEG1_XVMC_HWACCEL || CONFIG_MPEG2_XVMC_HWACCEL) && s->pack_pblocks)
             ff_xvmc_init_block(s); // set s->block
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
 
         if (mpeg_decode_mb(s, s->block) < 0)
             return -1;
@@ -1830,22 +1847,22 @@ FF_ENABLE_DEPRECATION_WARNINGS
                     s->current_picture.motion_val[dir][xy + 1][1] = motion_y;
                     s->current_picture.ref_index [dir][b8_xy]     =
                     s->current_picture.ref_index [dir][b8_xy + 1] = s->field_select[dir][i];
-                    assert(s->field_select[dir][i] == 0 ||
-                           s->field_select[dir][i] == 1);
+                    av_assert2(s->field_select[dir][i] == 0 ||
+                               s->field_select[dir][i] == 1);
                 }
                 xy    += wrap;
                 b8_xy += 2;
             }
         }
 
-        s->dest[0] += 16;
-        s->dest[1] += 16 >> s->chroma_x_shift;
-        s->dest[2] += 16 >> s->chroma_x_shift;
+        s->dest[0] += 16 >> lowres;
+        s->dest[1] +=(16 >> lowres) >> s->chroma_x_shift;
+        s->dest[2] +=(16 >> lowres) >> s->chroma_x_shift;
 
         ff_MPV_decode_mb(s, s->block);
 
         if (++s->mb_x >= s->mb_width) {
-            const int mb_size = 16;
+            const int mb_size = 16 >> s->avctx->lowres;
 
             ff_mpeg_draw_horiz_band(s, mb_size * (s->mb_y >> field_pic), mb_size);
             ff_MPV_report_decode_progress(s);
@@ -1863,15 +1880,36 @@ FF_ENABLE_DEPRECATION_WARNINGS
                              s->progressive_frame == 0
                              /* vbv_delay == 0xBBB || 0xE10 */;
 
+                if (left >= 32 && !is_d10) {
+                    GetBitContext gb = s->gb;
+                    align_get_bits(&gb);
+                    if (show_bits(&gb, 24) == 0x060E2B) {
+                        av_log(avctx, AV_LOG_DEBUG, "Invalid MXF data found in video stream\n");
+                        is_d10 = 1;
+                    }
+                }
+
                 if (left < 0 ||
                     (left && show_bits(&s->gb, FFMIN(left, 23)) && !is_d10) ||
-                    ((avctx->err_recognition & AV_EF_BUFFER) && left > 8)) {
+                    ((avctx->err_recognition & (AV_EF_BITSTREAM | AV_EF_AGGRESSIVE)) && left > 8)) {
                     av_log(avctx, AV_LOG_ERROR, "end mismatch left=%d %0X\n",
                            left, show_bits(&s->gb, FFMIN(left, 23)));
                     return -1;
                 } else
                     goto eos;
             }
+            // There are some files out there which are missing the last slice
+            // in cases where the slice is completely outside the visible
+            // area, we detect this here instead of running into the end expecting
+            // more data
+            if (s->mb_y >= ((s->height + 15) >> 4) &&
+                s->progressive_frame &&
+                !s->progressive_sequence &&
+                get_bits_left(&s->gb) <= 8 &&
+                get_bits_left(&s->gb) >= 0 &&
+                s->mb_skip_run == -1 &&
+                show_bits(&s->gb, 8) == 0)
+                goto eos;
 
             ff_init_block_index(s);
         }
@@ -1937,6 +1975,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
         }
     }
 eos: // end of slice
+    if (get_bits_left(&s->gb) < 0) {
+        av_log(s, AV_LOG_ERROR, "overread %d\n", -get_bits_left(&s->gb));
+        return AVERROR_INVALIDDATA;
+    }
     *buf += (get_bits_count(&s->gb) - 1) / 8;
     av_dlog(s, "y %d %d %d %d\n", s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y);
     return 0;
@@ -1978,7 +2020,10 @@ static int slice_decode_thread(AVCodecContext *c, void *arg)
 
         start_code = -1;
         buf        = avpriv_find_start_code(buf, s->gb.buffer_end, &start_code);
-        mb_y       = (start_code - SLICE_MIN_START_CODE) << field_pic;
+        mb_y       = start_code - SLICE_MIN_START_CODE;
+        if (s->codec_id != AV_CODEC_ID_MPEG1VIDEO && s->mb_height > 2800/16)
+            mb_y += (*buf&0xE0)<<2;
+        mb_y <<= field_pic;
         if (s->picture_structure == PICT_BOTTOM_FIELD)
             mb_y++;
         if (mb_y < 0 || mb_y >= s->end_mb_y)
@@ -2004,15 +2049,8 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict)
                    "hardware accelerator failed to decode picture\n");
     }
 
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
-        ff_xvmc_field_end(s);
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
-
     /* end of slice reached */
-    if (/* s->mb_y << field_pic == s->mb_height && */ !s->first_field) {
+    if (/* s->mb_y << field_pic == s->mb_height && */ !s->first_field && !s1->first_slice) {
         /* end of image */
 
         ff_er_frame_end(&s->er);
@@ -2023,7 +2061,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
             int ret = av_frame_ref(pict, s->current_picture_ptr->f);
             if (ret < 0)
                 return ret;
-            ff_print_debug_info(s, s->current_picture_ptr);
+            ff_print_debug_info(s, s->current_picture_ptr, pict);
+            ff_mpv_export_qp_table(s, pict, s->current_picture_ptr, FF_QSCALE_TYPE_MPEG2);
         } else {
             if (avctx->active_thread_type & FF_THREAD_FRAME)
                 s->picture_number++;
@@ -2033,7 +2072,8 @@ FF_ENABLE_DEPRECATION_WARNINGS
                 int ret = av_frame_ref(pict, s->last_picture_ptr->f);
                 if (ret < 0)
                     return ret;
-                ff_print_debug_info(s, s->last_picture_ptr);
+                ff_print_debug_info(s, s->last_picture_ptr, pict);
+                ff_mpv_export_qp_table(s, pict, s->last_picture_ptr, FF_QSCALE_TYPE_MPEG2);
             }
         }
 
@@ -2058,13 +2098,13 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
     if (width == 0 || height == 0) {
         av_log(avctx, AV_LOG_WARNING,
                "Invalid horizontal or vertical size value.\n");
-        if (avctx->err_recognition & AV_EF_BITSTREAM)
+        if (avctx->err_recognition & (AV_EF_BITSTREAM | AV_EF_COMPLIANT))
             return AVERROR_INVALIDDATA;
     }
     s->aspect_ratio_info = get_bits(&s->gb, 4);
     if (s->aspect_ratio_info == 0) {
         av_log(avctx, AV_LOG_ERROR, "aspect ratio has forbidden 0 value\n");
-        if (avctx->err_recognition & AV_EF_BITSTREAM)
+        if (avctx->err_recognition & (AV_EF_BITSTREAM | AV_EF_COMPLIANT))
             return -1;
     }
     s->frame_rate_index = get_bits(&s->gb, 4);
@@ -2110,11 +2150,13 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
     s->progressive_sequence = 1;
     s->progressive_frame    = 1;
     s->picture_structure    = PICT_FRAME;
+    s->first_field          = 0;
     s->frame_pred_frame_dct = 1;
     s->chroma_format        = 1;
     s->codec_id             =
     s->avctx->codec_id      = AV_CODEC_ID_MPEG1VIDEO;
     s->out_format           = FMT_MPEG1;
+    s->swap_uv              = 0; // AFAIK VCR2 does not have SEQ_HEADER
     if (s->flags & CODEC_FLAG_LOW_DELAY)
         s->low_delay = 1;
 
@@ -2135,6 +2177,7 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
     s->out_format = FMT_MPEG1;
     if (s1->mpeg_enc_ctx_allocated) {
         ff_MPV_common_end(s);
+        s1->mpeg_enc_ctx_allocated = 0;
     }
     s->width            = avctx->coded_width;
     s->height           = avctx->coded_height;
@@ -2142,14 +2185,7 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
     s->low_delay        = 1;
 
     avctx->pix_fmt = mpeg_get_pixelformat(avctx);
-
-#if FF_API_XVMC
-    if ((avctx->pix_fmt == AV_PIX_FMT_XVMC_MPEG2_IDCT || avctx->hwaccel) &&
-        avctx->idct_algo == FF_IDCT_AUTO)
-#else
-    if (avctx->hwaccel && avctx->idct_algo == FF_IDCT_AUTO)
-#endif /* FF_API_XVMC */
-        avctx->idct_algo = FF_IDCT_SIMPLE;
+    setup_hwaccel_for_pixfmt(avctx);
 
     if (ff_MPV_common_init(s) < 0)
         return -1;
@@ -2169,9 +2205,15 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
     s->progressive_sequence  = 1;
     s->progressive_frame     = 1;
     s->picture_structure     = PICT_FRAME;
+    s->first_field           = 0;
     s->frame_pred_frame_dct  = 1;
     s->chroma_format         = 1;
-    s->codec_id              = s->avctx->codec_id = AV_CODEC_ID_MPEG2VIDEO;
+    if (s->codec_tag == AV_RL32("BW10")) {
+        s->codec_id              = s->avctx->codec_id = AV_CODEC_ID_MPEG1VIDEO;
+    } else {
+        s->swap_uv = 1; // in case of xvmc we need to swap uv for each MB
+        s->codec_id              = s->avctx->codec_id = AV_CODEC_ID_MPEG2VIDEO;
+    }
     s1->save_width           = s->width;
     s1->save_height          = s->height;
     s1->save_progressive_seq = s->progressive_sequence;
@@ -2234,9 +2276,23 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
 static void mpeg_decode_user_data(AVCodecContext *avctx,
                                   const uint8_t *p, int buf_size)
 {
+    Mpeg1Context *s = avctx->priv_data;
     const uint8_t *buf_end = p + buf_size;
     Mpeg1Context *s1 = avctx->priv_data;
 
+    if (buf_size > 29){
+        int i;
+        for(i=0; i<20; i++)
+            if (!memcmp(p+i, "\0TMPGEXS\0", 9)){
+                s->tmpgexs= 1;
+            }
+
+/*        for(i=0; !(!p[i-2] && !p[i-1] && p[i]==1) && i<buf_size; i++){
+            av_log(avctx, AV_LOG_ERROR, "%c", p[i]);
+        }
+            av_log(avctx, AV_LOG_ERROR, "\n");*/
+    }
+
     /* we parse the DTG active format information */
     if (buf_end - p >= 5 &&
         p[0] == 'D' && p[1] == 'T' && p[2] == 'G' && p[3] == '1') {
@@ -2295,32 +2351,26 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
 {
     Mpeg1Context *s1  = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
-
-    int time_code_hours, time_code_minutes;
-    int time_code_seconds, time_code_pictures;
     int broken_link;
+    int64_t tc;
 
     init_get_bits(&s->gb, buf, buf_size * 8);
 
-    skip_bits1(&s->gb); /* drop_frame_flag */
-
-    time_code_hours   = get_bits(&s->gb, 5);
-    time_code_minutes = get_bits(&s->gb, 6);
-    skip_bits1(&s->gb); // marker bit
-    time_code_seconds  = get_bits(&s->gb, 6);
-    time_code_pictures = get_bits(&s->gb, 6);
+    tc = avctx->timecode_frame_start = get_bits(&s->gb, 25);
 
-    s1->closed_gop = get_bits1(&s->gb);
+    s->closed_gop = get_bits1(&s->gb);
     /* broken_link indicate that after editing the
      * reference frames of the first B-Frames after GOP I-Frame
      * are missing (open gop) */
     broken_link = get_bits1(&s->gb);
 
-    if (s->avctx->debug & FF_DEBUG_PICT_INFO)
+    if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
+        char tcbuf[AV_TIMECODE_STR_SIZE];
+        av_timecode_make_mpeg_tc_string(tcbuf, tc);
         av_log(s->avctx, AV_LOG_DEBUG,
-               "GOP (%2d:%02d:%02d.[%02d]) closed_gop=%d broken_link=%d\n",
-               time_code_hours, time_code_minutes, time_code_seconds,
-               time_code_pictures, s1->closed_gop, broken_link);
+               "GOP (%s) closed_gop=%d broken_link=%d\n",
+               tcbuf, s->closed_gop, broken_link);
+    }
 }
 
 static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
@@ -2332,6 +2382,7 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
     const uint8_t *buf_end = buf + buf_size;
     int ret, input_size;
     int last_code = 0, skip_frame = 0;
+    int picture_start_code_seen = 0;
 
     for (;;) {
         /* find next start code */
@@ -2343,6 +2394,7 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                     (avctx->active_thread_type & FF_THREAD_SLICE) &&
                     !avctx->hwaccel) {
                     int i;
+                    av_assert0(avctx->thread_count > 1);
 
                     avctx->execute(avctx, slice_decode_thread,
                                    &s2->thread_context[0], NULL,
@@ -2351,6 +2403,10 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                         s2->er.error_count += s2->thread_context[i]->er.error_count;
                 }
 
+                if ((CONFIG_MPEG_VDPAU_DECODER || CONFIG_MPEG1_VDPAU_DECODER)
+                    && uses_vdpau(avctx))
+                    ff_vdpau_mpeg_picture_complete(s2, buf, buf_size, s->slice_count);
+
                 ret = slice_end(avctx, picture);
                 if (ret < 0)
                     return ret;
@@ -2361,13 +2417,17 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                 }
             }
             s2->pict_type = 0;
+
+            if (avctx->err_recognition & AV_EF_EXPLODE && s2->er.error_count)
+                return AVERROR_INVALIDDATA;
+
             return FFMAX(0, buf_ptr - buf - s2->parse_context.last_index);
         }
 
         input_size = buf_end - buf_ptr;
 
         if (avctx->debug & FF_DEBUG_STARTCODE)
-            av_log(avctx, AV_LOG_DEBUG, "%3"PRIX32" at %td left %d\n",
+            av_log(avctx, AV_LOG_DEBUG, "%3"PRIX32" at %"PTRDIFF_SPECIFIER" left %d\n",
                    start_code, buf_ptr - buf, input_size);
 
         /* prepare data for next start code */
@@ -2375,7 +2435,8 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
         case SEQ_START_CODE:
             if (last_code == 0) {
                 mpeg1_decode_sequence(avctx, buf_ptr, input_size);
-                s->sync = 1;
+                if (buf != avctx->extradata)
+                    s->sync = 1;
             } else {
                 av_log(avctx, AV_LOG_ERROR,
                        "ignoring SEQ_START_CODE after %X\n", last_code);
@@ -2385,12 +2446,24 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
             break;
 
         case PICTURE_START_CODE:
+            if (picture_start_code_seen && s2->picture_structure == PICT_FRAME) {
+               /* If it's a frame picture, there can't be more than one picture header.
+                  Yet, it does happen and we need to handle it. */
+               av_log(avctx, AV_LOG_WARNING, "ignoring extra picture following a frame-picture\n");
+               break;
+            }
+            picture_start_code_seen = 1;
+
             if (s2->width <= 0 || s2->height <= 0) {
                 av_log(avctx, AV_LOG_ERROR, "Invalid frame dimensions %dx%d.\n",
                        s2->width, s2->height);
                 return AVERROR_INVALIDDATA;
             }
 
+            if (s->tmpgexs){
+                s2->intra_dc_precision= 3;
+                s2->intra_matrix[0]= 1;
+            }
             if (HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_SLICE) &&
                 !avctx->hwaccel && s->slice_count) {
                 int i;
@@ -2474,14 +2547,50 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
             break;
         default:
             if (start_code >= SLICE_MIN_START_CODE &&
+                start_code <= SLICE_MAX_START_CODE && last_code == PICTURE_START_CODE) {
+                if (s2->progressive_sequence && !s2->progressive_frame) {
+                    s2->progressive_frame = 1;
+                    av_log(s2->avctx, AV_LOG_ERROR,
+                           "interlaced frame in progressive sequence, ignoring\n");
+                }
+
+                if (s2->picture_structure == 0 ||
+                    (s2->progressive_frame && s2->picture_structure != PICT_FRAME)) {
+                    av_log(s2->avctx, AV_LOG_ERROR,
+                           "picture_structure %d invalid, ignoring\n",
+                           s2->picture_structure);
+                    s2->picture_structure = PICT_FRAME;
+                }
+
+                if (s2->progressive_sequence && !s2->frame_pred_frame_dct)
+                    av_log(s2->avctx, AV_LOG_WARNING, "invalid frame_pred_frame_dct\n");
+
+                if (s2->picture_structure == PICT_FRAME) {
+                    s2->first_field = 0;
+                    s2->v_edge_pos  = 16 * s2->mb_height;
+                } else {
+                    s2->first_field ^= 1;
+                    s2->v_edge_pos   = 8 * s2->mb_height;
+                    memset(s2->mbskip_table, 0, s2->mb_stride * s2->mb_height);
+                }
+            }
+            if (start_code >= SLICE_MIN_START_CODE &&
                 start_code <= SLICE_MAX_START_CODE && last_code != 0) {
                 const int field_pic = s2->picture_structure != PICT_FRAME;
-                int mb_y = (start_code - SLICE_MIN_START_CODE) << field_pic;
+                int mb_y = start_code - SLICE_MIN_START_CODE;
                 last_code = SLICE_MIN_START_CODE;
+                if (s2->codec_id != AV_CODEC_ID_MPEG1VIDEO && s2->mb_height > 2800/16)
+                    mb_y += (*buf_ptr&0xE0)<<2;
 
+                mb_y <<= field_pic;
                 if (s2->picture_structure == PICT_BOTTOM_FIELD)
                     mb_y++;
 
+                if (buf_end - buf_ptr < 2) {
+                    av_log(s2->avctx, AV_LOG_ERROR, "slice too small\n");
+                    return AVERROR_INVALIDDATA;
+                }
+
                 if (mb_y >= s2->mb_height) {
                     av_log(s2->avctx, AV_LOG_ERROR,
                            "slice below image (%d >= %d)\n", mb_y, s2->mb_height);
@@ -2492,13 +2601,13 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                     /* Skip B-frames if we do not have reference frames and
                      * GOP is not closed. */
                     if (s2->pict_type == AV_PICTURE_TYPE_B) {
-                        if (!s->closed_gop) {
+                        if (!s2->closed_gop) {
                             skip_frame = 1;
                             break;
                         }
                     }
                 }
-                if (s2->pict_type == AV_PICTURE_TYPE_I)
+                if (s2->pict_type == AV_PICTURE_TYPE_I || (s2->flags2 & CODEC_FLAG2_SHOW_ALL))
                     s->sync = 1;
                 if (s2->next_picture_ptr == NULL) {
                     /* Skip P-frames if we do not have a reference frame or
@@ -2545,12 +2654,18 @@ static int decode_chunks(AVCodecContext *avctx, AVFrame *picture,
                     return AVERROR_INVALIDDATA;
                 }
 
+                if (uses_vdpau(avctx)) {
+                    s->slice_count++;
+                    break;
+                }
+
                 if (HAVE_THREADS &&
                     (avctx->active_thread_type & FF_THREAD_SLICE) &&
                     !avctx->hwaccel) {
                     int threshold = (s2->mb_height * s->slice_count +
                                      s2->slice_context_count / 2) /
                                     s2->slice_context_count;
+                    av_assert0(avctx->thread_count > 1);
                     if (threshold <= mb_y) {
                         MpegEncContext *thread_context = s2->thread_context[s->slice_count];
 
@@ -2593,11 +2708,11 @@ static int mpeg_decode_frame(AVCodecContext *avctx, void *data,
                              int *got_output, AVPacket *avpkt)
 {
     const uint8_t *buf = avpkt->data;
+    int ret;
     int buf_size = avpkt->size;
     Mpeg1Context *s = avctx->priv_data;
     AVFrame *picture = data;
     MpegEncContext *s2 = &s->mpeg_enc_ctx;
-    av_dlog(avctx, "fill_buffer\n");
 
     if (buf_size == 0 || (buf_size == 4 && AV_RB32(buf) == SEQ_END_CODE)) {
         /* special case for last picture */
@@ -2622,20 +2737,33 @@ static int mpeg_decode_frame(AVCodecContext *avctx, void *data,
             return buf_size;
     }
 
-    if (s->mpeg_enc_ctx_allocated == 0 && avctx->codec_tag == AV_RL32("VCR2"))
+    s2->codec_tag = avpriv_toupper4(avctx->codec_tag);
+    if (s->mpeg_enc_ctx_allocated == 0 && (   s2->codec_tag == AV_RL32("VCR2")
+                                           || s2->codec_tag == AV_RL32("BW10")
+                                          ))
         vcr2_init_sequence(avctx);
 
     s->slice_count = 0;
 
     if (avctx->extradata && !s->extradata_decoded) {
-        int ret = decode_chunks(avctx, picture, got_output,
-                                avctx->extradata, avctx->extradata_size);
+        ret = decode_chunks(avctx, picture, got_output,
+                            avctx->extradata, avctx->extradata_size);
+        if (*got_output) {
+            av_log(avctx, AV_LOG_ERROR, "picture in extradata\n");
+            *got_output = 0;
+        }
         s->extradata_decoded = 1;
-        if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
+        if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE)) {
+            s2->current_picture_ptr = NULL;
             return ret;
+        }
     }
 
-    return decode_chunks(avctx, picture, got_output, buf, buf_size);
+    ret = decode_chunks(avctx, picture, got_output, buf, buf_size);
+    if (ret<0 || *got_output)
+        s2->current_picture_ptr = NULL;
+
+    return ret;
 }
 
 static void flush(AVCodecContext *avctx)
@@ -2643,7 +2771,6 @@ static void flush(AVCodecContext *avctx)
     Mpeg1Context *s = avctx->priv_data;
 
     s->sync       = 0;
-    s->closed_gop = 0;
 
     ff_mpeg_flush(avctx);
 }
@@ -2683,6 +2810,7 @@ AVCodec ff_mpeg1video_decoder = {
                              CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY |
                              CODEC_CAP_SLICE_THREADS,
     .flush                 = flush,
+    .max_lowres            = 3,
     .update_thread_context = ONLY_IF_THREADS_ENABLED(mpeg_decode_update_thread_context)
 };
 
@@ -2699,9 +2827,25 @@ AVCodec ff_mpeg2video_decoder = {
                       CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY |
                       CODEC_CAP_SLICE_THREADS,
     .flush          = flush,
+    .max_lowres     = 3,
     .profiles       = NULL_IF_CONFIG_SMALL(mpeg2_video_profiles),
 };
 
+//legacy decoder
+AVCodec ff_mpegvideo_decoder = {
+    .name           = "mpegvideo",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-1 video"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG2VIDEO,
+    .priv_data_size = sizeof(Mpeg1Context),
+    .init           = mpeg_decode_init,
+    .close          = mpeg_decode_end,
+    .decode         = mpeg_decode_frame,
+    .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY | CODEC_CAP_SLICE_THREADS,
+    .flush          = flush,
+    .max_lowres     = 3,
+};
+
 #if FF_API_XVMC
 #if CONFIG_MPEG_XVMC_DECODER
 static av_cold int mpeg_mc_decode_init(AVCodecContext *avctx)
@@ -2737,3 +2881,35 @@ AVCodec ff_mpeg_xvmc_decoder = {
 
 #endif
 #endif /* FF_API_XVMC */
+
+#if CONFIG_MPEG_VDPAU_DECODER
+AVCodec ff_mpeg_vdpau_decoder = {
+    .name           = "mpegvideo_vdpau",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-1/2 video (VDPAU acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG2VIDEO,
+    .priv_data_size = sizeof(Mpeg1Context),
+    .init           = mpeg_decode_init,
+    .close          = mpeg_decode_end,
+    .decode         = mpeg_decode_frame,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED |
+                      CODEC_CAP_HWACCEL_VDPAU | CODEC_CAP_DELAY,
+    .flush          = flush,
+};
+#endif
+
+#if CONFIG_MPEG1_VDPAU_DECODER
+AVCodec ff_mpeg1_vdpau_decoder = {
+    .name           = "mpeg1video_vdpau",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-1 video (VDPAU acceleration)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG1VIDEO,
+    .priv_data_size = sizeof(Mpeg1Context),
+    .init           = mpeg_decode_init,
+    .close          = mpeg_decode_end,
+    .decode         = mpeg_decode_frame,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED |
+                      CODEC_CAP_HWACCEL_VDPAU | CODEC_CAP_DELAY,
+    .flush          = flush,
+};
+#endif
diff --git a/libavcodec/mpeg12enc.c b/libavcodec/mpeg12enc.c
index f87286a..0dbcda8 100644
--- a/libavcodec/mpeg12enc.c
+++ b/libavcodec/mpeg12enc.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,8 +28,10 @@
 #include <stdint.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "libavutil/timecode.h"
 #include "libavutil/stereo3d.h"
 
 #include "avcodec.h"
@@ -37,6 +39,7 @@
 #include "mathops.h"
 #include "mpeg12.h"
 #include "mpeg12data.h"
+#include "mpegutils.h"
 #include "mpegvideo.h"
 
 
@@ -85,7 +88,7 @@ static av_cold void init_uni_ac_vlc(RLTable *rl, uint8_t *uni_ac_vlc_len)
                 /* length of VLC and sign */
                 len = rl->table_vlc[code][1] + 1;
             } else {
-                len = rl->table_vlc[111][1] + 6;    /* rl->n */
+                len = rl->table_vlc[111 /* rl->n */][1] + 6;
 
                 if (alevel < 128)
                     len += 8;
@@ -101,26 +104,37 @@ static av_cold void init_uni_ac_vlc(RLTable *rl, uint8_t *uni_ac_vlc_len)
 static int find_frame_rate_index(MpegEncContext *s)
 {
     int i;
-    int64_t dmin = INT64_MAX;
-    int64_t d;
+    AVRational bestq = (AVRational) {0, 0};
+    AVRational ext;
+    AVRational target = av_inv_q(s->avctx->time_base);
 
     for (i = 1; i < 14; i++) {
-        int64_t n0 = 1001LL / ff_mpeg12_frame_rate_tab[i].den *
-                     ff_mpeg12_frame_rate_tab[i].num * s->avctx->time_base.num;
-        int64_t n1 = 1001LL * s->avctx->time_base.den;
-
         if (s->avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL &&
             i >= 9)
             break;
 
-        d = FFABS(n0 - n1);
-        if (d < dmin) {
-            dmin                = d;
-            s->frame_rate_index = i;
+        for (ext.num=1; ext.num <= 4; ext.num++) {
+            for (ext.den=1; ext.den <= 32; ext.den++) {
+                AVRational q = av_mul_q(ext, ff_mpeg12_frame_rate_tab[i]);
+
+                if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO && (ext.den!=1 || ext.num!=1))
+                    continue;
+                if (av_gcd(ext.den, ext.num) != 1)
+                    continue;
+
+                if (    bestq.num==0
+                    || av_nearer_q(target, bestq, q) < 0
+                    || ext.num==1 && ext.den==1 && av_nearer_q(target, bestq, q) == 0) {
+                    bestq               = q;
+                    s->frame_rate_index = i;
+                    s->mpeg2_frame_rate_ext.num = ext.num;
+                    s->mpeg2_frame_rate_ext.den = ext.den;
+                }
+            }
         }
     }
 
-    if (dmin)
+    if (av_cmp_q(target, bestq))
         return -1;
     else
         return 0;
@@ -130,6 +144,9 @@ static av_cold int encode_init(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
 
+    if (avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO && avctx->height > 2800)
+        avctx->thread_count = 1;
+
     if (ff_MPV_encode_init(avctx) < 0)
         return -1;
 
@@ -175,12 +192,38 @@ static av_cold int encode_init(AVCodecContext *avctx)
         }
     }
 
+    if ((avctx->width & 0xFFF) == 0 && (avctx->height & 0xFFF) == 1) {
+        av_log(avctx, AV_LOG_ERROR, "Width / Height is invalid for MPEG2\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (s->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL) {
+        if ((avctx->width & 0xFFF) == 0 || (avctx->height & 0xFFF) == 0) {
+            av_log(avctx, AV_LOG_ERROR, "Width or Height are not allowed to be multiplies of 4096\n"
+                                        "add '-strict %d' if you want to use them anyway.\n", FF_COMPLIANCE_UNOFFICIAL);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    s->drop_frame_timecode = s->drop_frame_timecode || !!(avctx->flags2 & CODEC_FLAG2_DROP_FRAME_TIMECODE);
+    if (s->drop_frame_timecode)
+        s->tc.flags |= AV_TIMECODE_FLAG_DROPFRAME;
     if (s->drop_frame_timecode && s->frame_rate_index != 4) {
         av_log(avctx, AV_LOG_ERROR,
                "Drop frame time code only allowed with 1001/30000 fps\n");
         return -1;
     }
 
+    if (s->tc_opt_str) {
+        AVRational rate = ff_mpeg12_frame_rate_tab[s->frame_rate_index];
+        int ret = av_timecode_init_from_string(&s->tc, rate, s->tc_opt_str, s);
+        if (ret < 0)
+            return ret;
+        s->drop_frame_timecode = !!(s->tc.flags & AV_TIMECODE_FLAG_DROPFRAME);
+        s->avctx->timecode_frame_start = s->tc.start;
+    } else {
+        s->avctx->timecode_frame_start = 0; // default is -1
+    }
     return 0;
 }
 
@@ -197,11 +240,11 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
     unsigned int vbv_buffer_size, fps, v;
     int i, constraint_parameter_flag;
     uint64_t time_code;
-    float best_aspect_error = 1E10;
-    float aspect_ratio      = av_q2d(s->avctx->sample_aspect_ratio);
+    int64_t best_aspect_error = INT64_MAX;
+    AVRational aspect_ratio = s->avctx->sample_aspect_ratio;
 
-    if (aspect_ratio == 0.0)
-        aspect_ratio = 1.0;             // pixel aspect 1.1 (VGA)
+    if (aspect_ratio.num == 0 || aspect_ratio.den == 0)
+        aspect_ratio = (AVRational){1,1};             // pixel aspect 1.1 (VGA)
 
     if (s->current_picture.f->key_frame) {
         AVRational framerate = ff_mpeg12_frame_rate_tab[s->frame_rate_index];
@@ -209,19 +252,19 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
         /* mpeg1 header repeated every gop */
         put_header(s, SEQ_START_CODE);
 
-        put_sbits(&s->pb, 12, s->width);
-        put_sbits(&s->pb, 12, s->height);
+        put_sbits(&s->pb, 12, s->width  & 0xFFF);
+        put_sbits(&s->pb, 12, s->height & 0xFFF);
 
         for (i = 1; i < 15; i++) {
-            float error = aspect_ratio;
+            int64_t error = aspect_ratio.num * (1LL<<32) / aspect_ratio.den;
             if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || i <= 1)
-                error -= 1.0 / ff_mpeg1_aspect[i];
+                error -= (1LL<<32) / ff_mpeg1_aspect[i];
             else
-                error -= av_q2d(ff_mpeg2_aspect[i]) * s->height / s->width;
+                error -= (1LL<<32)*ff_mpeg2_aspect[i].num * s->height / s->width / ff_mpeg2_aspect[i].den;
 
             error = FFABS(error);
 
-            if (error < best_aspect_error) {
+            if (error - 2 <= best_aspect_error) {
                 best_aspect_error    = error;
                 s->aspect_ratio_info = i;
             }
@@ -268,6 +311,11 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
         ff_write_quant_matrix(&s->pb, s->avctx->inter_matrix);
 
         if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
+            AVFrameSideData *side_data;
+            int width = s->width;
+            int height = s->height;
+            int use_seq_disp_ext;
+
             put_header(s, EXT_START_CODE);
             put_bits(&s->pb, 4, 1);                 // seq ext
 
@@ -284,20 +332,37 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
             put_bits(&s->pb, 1, 1);                 // marker
             put_bits(&s->pb, 8, vbv_buffer_size >> 10); // vbv buffer ext
             put_bits(&s->pb, 1, s->low_delay);
-            put_bits(&s->pb, 2, 0);                 // frame_rate_ext_n
-            put_bits(&s->pb, 5, 0);                 // frame_rate_ext_d
+            put_bits(&s->pb, 2, s->mpeg2_frame_rate_ext.num-1); // frame_rate_ext_n
+            put_bits(&s->pb, 5, s->mpeg2_frame_rate_ext.den-1); // frame_rate_ext_d
+
+            side_data = av_frame_get_side_data(s->current_picture_ptr->f, AV_FRAME_DATA_PANSCAN);
+            if (side_data) {
+                AVPanScan *pan_scan = (AVPanScan *)side_data->data;
+                if (pan_scan->width && pan_scan->height) {
+                    width = pan_scan->width >> 4;
+                    height = pan_scan->height >> 4;
+                }
+            }
 
-            put_header(s, EXT_START_CODE);
-            put_bits(&s->pb, 4, 2);                         // sequence display extension
-            put_bits(&s->pb, 3, 0);                         // video_format: 0 is components
-            put_bits(&s->pb, 1, 1);                         // colour_description
-            put_bits(&s->pb, 8, s->avctx->color_primaries); // colour_primaries
-            put_bits(&s->pb, 8, s->avctx->color_trc);       // transfer_characteristics
-            put_bits(&s->pb, 8, s->avctx->colorspace);      // matrix_coefficients
-            put_bits(&s->pb, 14, s->width);                 // display_horizontal_size
-            put_bits(&s->pb, 1, 1);                         // marker_bit
-            put_bits(&s->pb, 14, s->height);                // display_vertical_size
-            put_bits(&s->pb, 3, 0);                         // remaining 3 bits are zero padding
+            use_seq_disp_ext = (width != s->width ||
+                                height != s->height ||
+                                s->avctx->color_primaries != AVCOL_PRI_UNSPECIFIED ||
+                                s->avctx->color_trc != AVCOL_TRC_UNSPECIFIED ||
+                                s->avctx->colorspace != AVCOL_SPC_UNSPECIFIED);
+
+            if (s->seq_disp_ext == 1 || (s->seq_disp_ext == -1 && use_seq_disp_ext)) {
+                put_header(s, EXT_START_CODE);
+                put_bits(&s->pb, 4, 2);                         // sequence display extension
+                put_bits(&s->pb, 3, 0);                         // video_format: 0 is components
+                put_bits(&s->pb, 1, 1);                         // colour_description
+                put_bits(&s->pb, 8, s->avctx->color_primaries); // colour_primaries
+                put_bits(&s->pb, 8, s->avctx->color_trc);       // transfer_characteristics
+                put_bits(&s->pb, 8, s->avctx->colorspace);      // matrix_coefficients
+                put_bits(&s->pb, 14, width);                    // display_horizontal_size
+                put_bits(&s->pb, 1, 1);                         // marker_bit
+                put_bits(&s->pb, 14, height);                   // display_vertical_size
+                put_bits(&s->pb, 3, 0);                         // remaining 3 bits are zero padding
+            }
         }
 
         put_header(s, GOP_START_CODE);
@@ -309,21 +374,17 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
                     s->avctx->timecode_frame_start;
 
         s->gop_picture_number = s->current_picture_ptr->f->coded_picture_number;
-        if (s->drop_frame_timecode) {
-            /* only works for NTSC 29.97 */
-            int d = time_code / 17982;
-            int m = time_code % 17982;
-            /* not needed since -2,-1 / 1798 in C returns 0 */
-            // if (m < 2)
-            //     m += 2;
-            time_code += 18 * d + 2 * ((m - 2) / 1798);
-        }
+
+        av_assert0(s->drop_frame_timecode == !!(s->tc.flags & AV_TIMECODE_FLAG_DROPFRAME));
+        if (s->drop_frame_timecode)
+            time_code = av_timecode_adjust_ntsc_framenum2(time_code, fps);
+
         put_bits(&s->pb, 5, (uint32_t)((time_code / (fps * 3600)) % 24));
         put_bits(&s->pb, 6, (uint32_t)((time_code / (fps *   60)) % 60));
         put_bits(&s->pb, 1, 1);
         put_bits(&s->pb, 6, (uint32_t)((time_code / fps) % 60));
         put_bits(&s->pb, 6, (uint32_t)((time_code % fps)));
-        put_bits(&s->pb, 1, !!(s->flags & CODEC_FLAG_CLOSED_GOP));
+        put_bits(&s->pb, 1, !!(s->flags & CODEC_FLAG_CLOSED_GOP) || s->intra_only || !s->gop_picture_number);
         put_bits(&s->pb, 1, 0);                     // broken link
     }
 }
@@ -341,7 +402,7 @@ static inline void encode_mb_skip_run(MpegEncContext *s, int run)
 static av_always_inline void put_qscale(MpegEncContext *s)
 {
     if (s->q_scale_type) {
-        assert(s->qscale >= 1 && s->qscale <= 12);
+        av_assert2(s->qscale >= 1 && s->qscale <= 12);
         put_bits(&s->pb, 5, inv_non_linear_qscale[s->qscale]);
     } else {
         put_bits(&s->pb, 5, s->qscale);
@@ -350,7 +411,7 @@ static av_always_inline void put_qscale(MpegEncContext *s)
 
 void ff_mpeg1_encode_slice_header(MpegEncContext *s)
 {
-    if (s->height > 2800) {
+    if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO && s->height > 2800) {
         put_header(s, SLICE_MIN_START_CODE + (s->mb_y & 127));
         /* slice_vertical_position_extension */
         put_bits(&s->pb, 3, s->mb_y >> 7);
@@ -419,7 +480,7 @@ void ff_mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
         }
         put_bits(&s->pb, 2, s->intra_dc_precision);
 
-        assert(s->picture_structure == PICT_FRAME);
+        av_assert0(s->picture_structure == PICT_FRAME);
         put_bits(&s->pb, 2, s->picture_structure);
         if (s->progressive_sequence)
             put_bits(&s->pb, 1, 0);             /* no repeat */
@@ -531,7 +592,7 @@ static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code)
             sign = 1;
         }
 
-        assert(code > 0 && code <= 16);
+        av_assert2(code > 0 && code <= 16);
 
         put_bits(&s->pb,
                  ff_mpeg12_mbMotionVectorTable[code][1],
@@ -656,7 +717,7 @@ next_coef:
 }
 
 static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
-                                                      int16_t block[6][64],
+                                                      int16_t block[8][64],
                                                       int motion_x, int motion_y,
                                                       int mb_block_count)
 {
@@ -673,7 +734,7 @@ static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
 
     if (cbp == 0 && !first_mb && s->mv_type == MV_TYPE_16X16 &&
         (mb_x != s->mb_width - 1 ||
-         (mb_y != s->mb_height - 1 && s->codec_id == AV_CODEC_ID_MPEG1VIDEO)) &&
+         (mb_y != s->end_mb_y - 1 && s->codec_id == AV_CODEC_ID_MPEG1VIDEO)) &&
         ((s->pict_type == AV_PICTURE_TYPE_P && (motion_x | motion_y) == 0) ||
          (s->pict_type == AV_PICTURE_TYPE_B && s->mv_dir == s->last_mv_dir &&
           (((s->mv_dir & MV_DIR_FORWARD)
@@ -695,7 +756,7 @@ static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
         }
     } else {
         if (first_mb) {
-            assert(s->mb_skip_run == 0);
+            av_assert0(s->mb_skip_run == 0);
             encode_mb_skip_run(s, s->mb_x);
         } else {
             encode_mb_skip_run(s, s->mb_skip_run);
@@ -774,7 +835,7 @@ static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
                 s->last_mv[0][1][0] = s->last_mv[0][0][0] = motion_x;
                 s->last_mv[0][1][1] = s->last_mv[0][0][1] = motion_y;
             } else {
-                assert(!s->frame_pred_frame_dct && s->mv_type == MV_TYPE_FIELD);
+                av_assert2(!s->frame_pred_frame_dct && s->mv_type == MV_TYPE_FIELD);
 
                 if (cbp) {
                     if (s->dquant) {
@@ -861,8 +922,8 @@ static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
                     s->b_count++;
                 }
             } else {
-                assert(s->mv_type == MV_TYPE_FIELD);
-                assert(!s->frame_pred_frame_dct);
+                av_assert2(s->mv_type == MV_TYPE_FIELD);
+                av_assert2(!s->frame_pred_frame_dct);
                 if (cbp) {                      // With coded bloc pattern
                     if (s->dquant) {
                         if (s->mv_dir == MV_DIR_FORWARD)
@@ -933,7 +994,7 @@ static av_always_inline void mpeg1_encode_mb_internal(MpegEncContext *s,
     }
 }
 
-void ff_mpeg1_encode_mb(MpegEncContext *s, int16_t block[6][64],
+void ff_mpeg1_encode_mb(MpegEncContext *s, int16_t block[8][64],
                         int motion_x, int motion_y)
 {
     if (s->chroma_format == CHROMA_420)
@@ -1043,6 +1104,8 @@ av_cold void ff_mpeg1_encode_init(MpegEncContext *s)
 #define OFFSET(x) offsetof(MpegEncContext, x)
 #define VE AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 #define COMMON_OPTS                                                           \
+    { "gop_timecode",        "MPEG GOP Timecode in hh:mm:ss[:;.]ff format",   \
+      OFFSET(tc_opt_str), AV_OPT_TYPE_STRING, {.str=NULL}, CHAR_MIN, CHAR_MAX, VE },\
     { "intra_vlc",           "Use MPEG-2 intra VLC table.",                   \
       OFFSET(intra_vlc_format),    AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, \
     { "drop_frame_timecode", "Timecode is in drop frame format.",             \
@@ -1060,6 +1123,10 @@ static const AVOption mpeg2_options[] = {
     COMMON_OPTS
     { "non_linear_quant", "Use nonlinear quantizer.",    OFFSET(q_scale_type),   AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
     { "alternate_scan",   "Enable alternate scantable.", OFFSET(alternate_scan), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "seq_disp_ext",     "Write sequence_display_extension blocks.", OFFSET(seq_disp_ext), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE, "seq_disp_ext" },
+    {     "auto",   NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = -1},  0, 0, VE, "seq_disp_ext" },
+    {     "never",  NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = 0 },  0, 0, VE, "seq_disp_ext" },
+    {     "always", NULL, 0, AV_OPT_TYPE_CONST,  {.i64 = 1 },  0, 0, VE, "seq_disp_ext" },
     FF_MPV_COMMON_OPTS
     { NULL },
 };
@@ -1100,7 +1167,7 @@ AVCodec ff_mpeg2video_encoder = {
     .init                 = encode_init,
     .encode2              = ff_MPV_encode_picture,
     .close                = ff_MPV_encode_end,
-    .supported_framerates = ff_mpeg12_frame_rate_tab + 1,
+    .supported_framerates = ff_mpeg2_frame_rate_tab,
     .pix_fmts             = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV420P,
                                                            AV_PIX_FMT_YUV422P,
                                                            AV_PIX_FMT_NONE },
diff --git a/libavcodec/mpeg4audio.c b/libavcodec/mpeg4audio.c
index 0fb9b96..68448e6 100644
--- a/libavcodec/mpeg4audio.c
+++ b/libavcodec/mpeg4audio.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2008 Baptiste Coudurier <baptiste.coudurier@free.fr>
  * Copyright (c) 2009 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -52,6 +52,8 @@ static int parse_config_ALS(GetBitContext *gb, MPEG4AudioConfig *c)
     return 0;
 }
 
+/* XXX: make sure to update the copies in the different encoders if you change
+ * this table */
 const int avpriv_mpeg4audio_sample_rates[16] = {
     96000, 88200, 64000, 48000, 44100, 32000,
     24000, 22050, 16000, 12000, 11025, 8000, 7350
@@ -82,7 +84,8 @@ int avpriv_mpeg4audio_get_config(MPEG4AudioConfig *c, const uint8_t *buf,
     GetBitContext gb;
     int specific_config_bitindex;
 
-    init_get_bits(&gb, buf, bit_size);
+    if (bit_size <= 0 || init_get_bits(&gb, buf, bit_size) < 0)
+        return AVERROR_INVALIDDATA;
     c->object_type = get_object_type(&gb);
     c->sample_rate = get_sample_rate(&gb, &c->sampling_index);
     c->chan_config = get_bits(&gb, 4);
@@ -123,8 +126,11 @@ int avpriv_mpeg4audio_get_config(MPEG4AudioConfig *c, const uint8_t *buf,
             if (show_bits(&gb, 11) == 0x2b7) { // sync extension
                 get_bits(&gb, 11);
                 c->ext_object_type = get_object_type(&gb);
-                if (c->ext_object_type == AOT_SBR && (c->sbr = get_bits1(&gb)) == 1)
+                if (c->ext_object_type == AOT_SBR && (c->sbr = get_bits1(&gb)) == 1) {
                     c->ext_sample_rate = get_sample_rate(&gb, &c->ext_sampling_index);
+                    if (c->ext_sample_rate == c->sample_rate)
+                        c->sbr = -1;
+                }
                 if (get_bits_left(&gb) > 11 && get_bits(&gb, 11) == 0x548)
                     c->ps = get_bits1(&gb);
                 break;
diff --git a/libavcodec/mpeg4audio.h b/libavcodec/mpeg4audio.h
index e71122d..0f41045 100644
--- a/libavcodec/mpeg4audio.h
+++ b/libavcodec/mpeg4audio.h
@@ -2,20 +2,20 @@
  * MPEG-4 Audio common header
  * Copyright (c) 2008 Baptiste Coudurier <baptiste.coudurier@free.fr>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpeg4data.h b/libavcodec/mpeg4data.h
index 87bb539..1ac5840 100644
--- a/libavcodec/mpeg4data.h
+++ b/libavcodec/mpeg4data.h
@@ -3,20 +3,20 @@
  * H263+ support
  * copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpeg4video.c b/libavcodec/mpeg4video.c
index 84fa26b..38ed5af 100644
--- a/libavcodec/mpeg4video.c
+++ b/libavcodec/mpeg4video.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpeg4video.h b/libavcodec/mpeg4video.h
index b092684..08b4cb5 100644
--- a/libavcodec/mpeg4video.h
+++ b/libavcodec/mpeg4video.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -62,29 +62,30 @@
 typedef struct Mpeg4DecContext {
     MpegEncContext m;
 
-    ///< number of bits to represent the fractional part of time
+    /// number of bits to represent the fractional part of time
     int time_increment_bits;
     int shape;
     int vol_sprite_usage;
     int sprite_brightness_change;
     int num_sprite_warping_points;
-    ///< sprite trajectory points
+    /// sprite trajectory points
     uint16_t sprite_traj[4][2];
-    ///< sprite shift [isChroma]
+    /// sprite shift [isChroma]
     int sprite_shift[2];
 
     // reversible vlc
     int rvlc;
-    ///< could this stream contain resync markers
+    /// could this stream contain resync markers
     int resync_marker;
-    ///< time distance of first I -> B, used for interlaced b frames
+    /// time distance of first I -> B, used for interlaced b frames
     int t_frame;
 
     int new_pred;
     int enhancement_type;
     int scalability;
     int use_intra_dc_vlc;
-    ///< QP above whch the ac VLC should be used for intra dc
+
+    /// QP above whch the ac VLC should be used for intra dc
     int intra_dc_threshold;
 
     /* bug workarounds */
@@ -92,7 +93,8 @@ typedef struct Mpeg4DecContext {
     int divx_build;
     int xvid_build;
     int lavc_build;
-    ///< flag for having shown the warning about divxs invalid b frames
+
+    /// flag for having shown the warning about divxs invalid b frames
     int showed_packed_warning;
     int vol_control_parameters; /**< does the stream contain the low_delay flag,
                                  *   used to work around buggy encoders. */
@@ -148,6 +150,8 @@ int ff_mpeg4_decode_partitions(Mpeg4DecContext *ctx);
 int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
 int ff_mpeg4_decode_video_packet_header(Mpeg4DecContext *ctx);
 void ff_mpeg4_init_direct_mv(MpegEncContext *s);
+void ff_mpeg4videodec_static_init(void);
+int ff_mpeg4_workaround_bugs(AVCodecContext *avctx);
 int ff_mpeg4_frame_end(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
 
 /**
@@ -223,7 +227,7 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext *s, int n, int level,
     } else {
         level += pred;
         ret    = level;
-        if (s->err_recognition & AV_EF_BITSTREAM) {
+        if (s->err_recognition & (AV_EF_BITSTREAM | AV_EF_AGGRESSIVE)) {
             if (level < 0) {
                 av_log(s->avctx, AV_LOG_ERROR,
                        "dc<0 at %dx%d\n", s->mb_x, s->mb_y);
diff --git a/libavcodec/mpeg4video_parser.c b/libavcodec/mpeg4video_parser.c
index 246bb9c..b7718f6 100644
--- a/libavcodec/mpeg4video_parser.c
+++ b/libavcodec/mpeg4video_parser.c
@@ -3,23 +3,25 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
+
 #include "internal.h"
 #include "parser.h"
 #include "mpegvideo.h"
@@ -86,6 +88,8 @@ static int mpeg4_decode_header(AVCodecParserContext *s1, AVCodecContext *avctx,
     if (avctx->extradata_size && pc->first_picture) {
         init_get_bits(gb, avctx->extradata, avctx->extradata_size * 8);
         ret = ff_mpeg4_decode_picture_header(dec_ctx, gb);
+        if (ret < 0)
+            av_log(avctx, AV_LOG_WARNING, "Failed to parse extradata\n");
     }
 
     init_get_bits(gb, buf, 8 * buf_size);
@@ -96,6 +100,13 @@ static int mpeg4_decode_header(AVCodecParserContext *s1, AVCodecContext *avctx,
         if (ret < 0)
             return ret;
     }
+    if((s1->flags & PARSER_FLAG_USE_CODEC_TS) && s->avctx->time_base.den>0 && ret>=0){
+        av_assert1(s1->pts == AV_NOPTS_VALUE);
+        av_assert1(s1->dts == AV_NOPTS_VALUE);
+
+        s1->pts = av_rescale_q(s->time, (AVRational){1, s->avctx->time_base.den}, (AVRational){1, 1200000});
+    }
+
     s1->pict_type     = s->pict_type;
     pc->first_picture = 0;
     return ret;
@@ -105,8 +116,12 @@ static av_cold int mpeg4video_parse_init(AVCodecParserContext *s)
 {
     struct Mp4vParseContext *pc = s->priv_data;
 
+    ff_mpeg4videodec_static_init();
+
     pc->first_picture           = 1;
+    pc->dec_ctx.m.quant_precision     = 5;
     pc->dec_ctx.m.slice_context_count = 1;
+    pc->dec_ctx.showed_packed_warning = 1;
     return 0;
 }
 
diff --git a/libavcodec/mpeg4video_parser.h b/libavcodec/mpeg4video_parser.h
index 0f56e7f..50f8b44 100644
--- a/libavcodec/mpeg4video_parser.h
+++ b/libavcodec/mpeg4video_parser.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index 863dd29..f5eec3f 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -3,23 +3,26 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define UNCHECKED_BITSTREAM_READER 1
+
+#include "libavutil/opt.h"
 #include "error_resilience.h"
 #include "idctdsp.h"
 #include "internal.h"
@@ -109,12 +112,13 @@ void ff_mpeg4_pred_ac(MpegEncContext *s, int16_t *block, int n, int dir)
  * check if the next stuff is a resync marker or the end.
  * @return 0 if not
  */
-static inline int mpeg4_is_resync(MpegEncContext *s)
+static inline int mpeg4_is_resync(Mpeg4DecContext *ctx)
 {
+    MpegEncContext *s = &ctx->m;
     int bits_count = get_bits_count(&s->gb);
     int v          = show_bits(&s->gb, 16);
 
-    if (s->workaround_bugs & FF_BUG_NO_PADDING)
+    if (s->workaround_bugs & FF_BUG_NO_PADDING && !ctx->resync_marker)
         return 0;
 
     while (v <= 0xFF) {
@@ -131,10 +135,11 @@ static inline int mpeg4_is_resync(MpegEncContext *s)
         v  |= 0x7F >> (7 - (bits_count & 7));
 
         if (v == 0x7F)
-            return 1;
+            return s->mb_num;
     } else {
         if (v == ff_mpeg4_resync_prefix[bits_count & 7]) {
-            int len;
+            int len, mb_num;
+            int mb_num_bits = av_log2(s->mb_num - 1) + 1;
             GetBitContext gb = s->gb;
 
             skip_bits(&s->gb, 1);
@@ -144,10 +149,14 @@ static inline int mpeg4_is_resync(MpegEncContext *s)
                 if (get_bits1(&s->gb))
                     break;
 
+            mb_num = get_bits(&s->gb, mb_num_bits);
+            if (!mb_num || mb_num > s->mb_num || get_bits_count(&s->gb)+6 > s->gb.size_in_bits)
+                mb_num= -1;
+
             s->gb = gb;
 
             if (len >= ff_mpeg4_get_video_packet_prefix_length(s))
-                return 1;
+                return mb_num;
         }
     }
     return 0;
@@ -368,6 +377,17 @@ static int mpeg4_decode_sprite_trajectory(Mpeg4DecContext *ctx, GetBitContext *g
     return 0;
 }
 
+static int decode_new_pred(Mpeg4DecContext *ctx, GetBitContext *gb) {
+    int len = FFMIN(ctx->time_increment_bits + 3, 15);
+
+    get_bits(gb, len);
+    if (get_bits1(gb))
+        get_bits(gb, len);
+    check_marker(gb, "after new_pred");
+
+    return 0;
+}
+
 /**
  * Decode the next video packet.
  * @return <0 if something went wrong
@@ -403,19 +423,6 @@ int ff_mpeg4_decode_video_packet_header(Mpeg4DecContext *ctx)
                "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_num);
         return -1;
     }
-    if (s->pict_type == AV_PICTURE_TYPE_B) {
-        int mb_x = 0, mb_y = 0;
-
-        while (s->next_picture.mbskip_table[s->mb_index2xy[mb_num]]) {
-            if (!mb_x)
-                ff_thread_await_progress(&s->next_picture_ptr->tf, mb_y++, 0);
-            mb_num++;
-            if (++mb_x == s->mb_width)
-                mb_x = 0;
-        }
-        if (mb_num >= s->mb_num)
-            return -1;  // slice contains just skipped MBs (already decoded)
-    }
 
     s->mb_x = mb_num % s->mb_width;
     s->mb_y = mb_num / s->mb_width;
@@ -468,7 +475,8 @@ int ff_mpeg4_decode_video_packet_header(Mpeg4DecContext *ctx)
             }
         }
     }
-    // FIXME new-pred stuff
+    if (ctx->new_pred)
+        decode_new_pred(ctx, &s->gb);
 
     return 0;
 }
@@ -563,7 +571,7 @@ static inline int mpeg4_decode_dc(MpegEncContext *s, int n, int *dir_ptr)
 
         if (code > 8) {
             if (get_bits1(&s->gb) == 0) { /* marker */
-                if (s->err_recognition & AV_EF_BITSTREAM) {
+                if (s->err_recognition & (AV_EF_BITSTREAM|AV_EF_COMPLIANT)) {
                     av_log(s->avctx, AV_LOG_ERROR, "dc marker bit missing\n");
                     return -1;
                 }
@@ -608,7 +616,7 @@ static int mpeg4_decode_partition_a(Mpeg4DecContext *ctx)
                     cbpc = get_vlc2(&s->gb, ff_h263_intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2);
                     if (cbpc < 0) {
                         av_log(s->avctx, AV_LOG_ERROR,
-                               "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
+                               "mcbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
                         return -1;
                     }
                 } while (cbpc == 8);
@@ -680,7 +688,7 @@ try_again:
                 cbpc = get_vlc2(&s->gb, ff_h263_inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
                 if (cbpc < 0) {
                     av_log(s->avctx, AV_LOG_ERROR,
-                           "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
+                           "mcbpc corrupted at %d %d\n", s->mb_x, s->mb_y);
                     return -1;
                 }
                 if (cbpc == 20)
@@ -932,7 +940,8 @@ static inline int mpeg4_decode_block(Mpeg4DecContext *ctx, int16_t *block,
                                      int n, int coded, int intra, int rvlc)
 {
     MpegEncContext *s = &ctx->m;
-    int level, i, last, run, qmul, qadd, dc_pred_dir;
+    int level, i, last, run, qmul, qadd;
+    int av_uninit(dc_pred_dir);
     RLTable *rl;
     RL_VLC_ELEM *rl_vlc;
     const uint8_t *scan_table;
@@ -1077,7 +1086,8 @@ static inline int mpeg4_decode_block(Mpeg4DecContext *ctx, int16_t *block,
                                 if (SHOW_UBITS(re, &s->gb, 1) == 0) {
                                     av_log(s->avctx, AV_LOG_ERROR,
                                            "1. marker bit missing in 3. esc\n");
-                                    return -1;
+                                    if (!(s->err_recognition & AV_EF_IGNORE_ERR))
+                                        return -1;
                                 }
                                 SKIP_CACHE(re, &s->gb, 1);
 
@@ -1087,19 +1097,42 @@ static inline int mpeg4_decode_block(Mpeg4DecContext *ctx, int16_t *block,
                                 if (SHOW_UBITS(re, &s->gb, 1) == 0) {
                                     av_log(s->avctx, AV_LOG_ERROR,
                                            "2. marker bit missing in 3. esc\n");
-                                    return -1;
+                                    if (!(s->err_recognition & AV_EF_IGNORE_ERR))
+                                        return -1;
                                 }
 
                                 SKIP_COUNTER(re, &s->gb, 1 + 12 + 1);
                             }
 
+#if 0
+                            if (s->error_recognition >= FF_ER_COMPLIANT) {
+                                const int abs_level= FFABS(level);
+                                if (abs_level<=MAX_LEVEL && run<=MAX_RUN) {
+                                    const int run1= run - rl->max_run[last][abs_level] - 1;
+                                    if (abs_level <= rl->max_level[last][run]) {
+                                        av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n");
+                                        return -1;
+                                    }
+                                    if (s->error_recognition > FF_ER_COMPLIANT) {
+                                        if (abs_level <= rl->max_level[last][run]*2) {
+                                            av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n");
+                                            return -1;
+                                        }
+                                        if (run1 >= 0 && abs_level <= rl->max_level[last][run1]) {
+                                            av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n");
+                                            return -1;
+                                        }
+                                    }
+                                }
+                            }
+#endif
                             if (level > 0)
                                 level = level * qmul + qadd;
                             else
                                 level = level * qmul - qadd;
 
                             if ((unsigned)(level + 2048) > 4095) {
-                                if (s->err_recognition & AV_EF_BITSTREAM) {
+                                if (s->err_recognition & (AV_EF_BITSTREAM|AV_EF_AGGRESSIVE)) {
                                     if (level > 2560 || level < -2560) {
                                         av_log(s->avctx, AV_LOG_ERROR,
                                                "|level| overflow in 3. esc, qp=%d\n",
@@ -1136,6 +1169,7 @@ static inline int mpeg4_decode_block(Mpeg4DecContext *ctx, int16_t *block,
                 level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
                 LAST_SKIP_BITS(re, &s->gb, 1);
             }
+            tprintf(s->avctx, "dct[%d][%d] = %- 4d end?:%d\n", scan_table[i&63]&7, scan_table[i&63] >> 3, level, i>62);
             if (i > 62) {
                 i -= 192;
                 if (i & (~63)) {
@@ -1244,12 +1278,12 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, int16_t block[6][64])
 
     /* per-MB end of slice check */
     if (--s->mb_num_left <= 0) {
-        if (mpeg4_is_resync(s))
+        if (mpeg4_is_resync(ctx))
             return SLICE_END;
         else
             return SLICE_NOEND;
     } else {
-        if (mpeg4_is_resync(s)) {
+        if (mpeg4_is_resync(ctx)) {
             const int delta = s->mb_x + 1 == s->mb_width ? 2 : 1;
             if (s->cbp_table[xy + delta])
                 return SLICE_END;
@@ -1266,7 +1300,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64])
     static int8_t quant_tab[4] = { -1, -2, 1, 2 };
     const int xy = s->mb_x + s->mb_y * s->mb_stride;
 
-    assert(s->h263_pred);
+    av_assert2(s->h263_pred);
 
     if (s->pict_type == AV_PICTURE_TYPE_P ||
         s->pict_type == AV_PICTURE_TYPE_S) {
@@ -1302,7 +1336,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64])
             cbpc = get_vlc2(&s->gb, ff_h263_inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
             if (cbpc < 0) {
                 av_log(s->avctx, AV_LOG_ERROR,
-                       "cbpc damaged at %d %d\n", s->mb_x, s->mb_y);
+                       "mcbpc damaged at %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }
         } while (cbpc == 20);
@@ -1608,20 +1642,23 @@ intra:
 end:
     /* per-MB end of slice check */
     if (s->codec_id == AV_CODEC_ID_MPEG4) {
-        if (mpeg4_is_resync(s)) {
-            const int delta = s->mb_x + 1 == s->mb_width ? 2 : 1;
+        int next = mpeg4_is_resync(ctx);
+        if (next) {
+            if        (s->mb_x + s->mb_y*s->mb_width + 1 >  next && (s->avctx->err_recognition & AV_EF_AGGRESSIVE)) {
+                return -1;
+            } else if (s->mb_x + s->mb_y*s->mb_width + 1 >= next)
+                return SLICE_END;
 
-            if (s->pict_type == AV_PICTURE_TYPE_B &&
-                s->next_picture.mbskip_table[xy + delta]) {
+            if (s->pict_type == AV_PICTURE_TYPE_B) {
+                const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1;
                 ff_thread_await_progress(&s->next_picture_ptr->tf,
                                          (s->mb_x + delta >= s->mb_width)
                                          ? FFMIN(s->mb_y + 1, s->mb_height - 1)
                                          : s->mb_y, 0);
+                if (s->next_picture.mbskip_table[xy + delta])
+                    return SLICE_OK;
             }
 
-            if (s->pict_type == AV_PICTURE_TYPE_B &&
-                s->next_picture.mbskip_table[xy + delta])
-                return SLICE_OK;
             return SLICE_END;
         }
     }
@@ -1632,29 +1669,30 @@ end:
 static int mpeg4_decode_gop_header(MpegEncContext *s, GetBitContext *gb)
 {
     int hours, minutes, seconds;
-    unsigned time_code = show_bits(gb, 18);
-
-    if (time_code & 0x40) {     /* marker_bit */
-        hours   = time_code >> 13;
-        minutes = time_code >> 7 & 0x3f;
-        seconds = time_code & 0x3f;
-        s->time_base = seconds + 60 * (minutes + 60 * hours);
-        skip_bits(gb, 20);      /* time_code, closed_gov, broken_link */
-    } else {
-        av_log(s->avctx, AV_LOG_WARNING, "GOP header missing marker_bit\n");
+
+    if (!show_bits(gb, 23)) {
+        av_log(s->avctx, AV_LOG_WARNING, "GOP header invalid\n");
+        return -1;
     }
 
+    hours   = get_bits(gb, 5);
+    minutes = get_bits(gb, 6);
+    skip_bits1(gb);
+    seconds = get_bits(gb, 6);
+
+    s->time_base = seconds + 60*(minutes + 60*hours);
+
+    skip_bits1(gb);
+    skip_bits1(gb);
+
     return 0;
 }
 
 static int mpeg4_decode_profile_level(MpegEncContext *s, GetBitContext *gb)
 {
-    int profile_and_level_indication;
-
-    profile_and_level_indication = get_bits(gb, 8);
 
-    s->avctx->profile = (profile_and_level_indication & 0xf0) >> 4;
-    s->avctx->level   = (profile_and_level_indication & 0x0f);
+    s->avctx->profile = get_bits(gb, 4);
+    s->avctx->level   = get_bits(gb, 4);
 
     // for Simple profile, level 0
     if (s->avctx->profile == 0 && s->avctx->level == 8) {
@@ -1707,7 +1745,7 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
         }
     } else {
         /* is setting low delay flag only once the smartest thing to do?
-         * low delay detection won't be overriden. */
+         * low delay detection won't be overridden. */
         if (s->picture_number == 0)
             s->low_delay = 0;
     }
@@ -1744,11 +1782,11 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
 
     if (ctx->shape != BIN_ONLY_SHAPE) {
         if (ctx->shape == RECT_SHAPE) {
-            skip_bits1(gb);   /* marker */
+            check_marker(gb, "before width");
             width = get_bits(gb, 13);
-            skip_bits1(gb);   /* marker */
+            check_marker(gb, "before height");
             height = get_bits(gb, 13);
-            skip_bits1(gb);   /* marker */
+            check_marker(gb, "after height");
             if (width && height &&  /* they should be non zero but who knows */
                 !(s->width && s->codec_tag == AV_RL32("MP4S"))) {
                 if (s->width && s->height &&
@@ -1806,6 +1844,9 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
             if (s->quant_precision != 5)
                 av_log(s->avctx, AV_LOG_ERROR,
                        "quant precision %d\n", s->quant_precision);
+            if (s->quant_precision<3 || s->quant_precision>9) {
+                s->quant_precision = 5;
+            }
         } else {
             s->quant_precision = 5;
         }
@@ -1881,6 +1922,11 @@ static int decode_vol_header(Mpeg4DecContext *ctx, GetBitContext *gb)
         else
             s->quarter_sample = 0;
 
+        if (get_bits_left(gb) < 4) {
+            av_log(s->avctx, AV_LOG_ERROR, "VOL Header truncated\n");
+            return AVERROR_INVALIDDATA;
+        }
+
         if (!get_bits1(gb)) {
             int pos               = get_bits_count(gb);
             int estimation_method = get_bits(gb, 2);
@@ -1988,6 +2034,17 @@ no_cplx_est:
         }
     }
 
+    if (s->avctx->debug&FF_DEBUG_PICT_INFO) {
+        av_log(s->avctx, AV_LOG_DEBUG, "tb %d/%d, tincrbits:%d, qp_prec:%d, ps:%d,  %s%s%s%s\n",
+               s->avctx->time_base.num, s->avctx->time_base.den,
+               ctx->time_increment_bits,
+               s->quant_precision,
+               s->progressive_sequence,
+               ctx->scalability ? "scalability " :"" , s->quarter_sample ? "qpel " : "",
+               s->data_partitioning ? "partition " : "", ctx->rvlc ? "rvlc " : ""
+        );
+    }
+
     return 0;
 }
 
@@ -2020,8 +2077,9 @@ static int decode_user_data(Mpeg4DecContext *ctx, GetBitContext *gb)
         ctx->divx_build   = build;
         s->divx_packed  = e == 3 && last == 'p';
         if (s->divx_packed && !ctx->showed_packed_warning) {
-            av_log(s->avctx, AV_LOG_WARNING,
-                   "Invalid and inefficient vfw-avi packed B frames detected\n");
+            av_log(s->avctx, AV_LOG_INFO, "Video uses a non-standard and "
+                   "wasteful way to store B-frames ('packed B-frames'). "
+                   "Consider using a tool like VirtualDub or avidemux to fix it.\n");
             ctx->showed_packed_warning = 1;
         }
     }
@@ -2047,6 +2105,14 @@ static int decode_user_data(Mpeg4DecContext *ctx, GetBitContext *gb)
     if (e == 1)
         ctx->xvid_build = build;
 
+    return 0;
+}
+
+int ff_mpeg4_workaround_bugs(AVCodecContext *avctx)
+{
+    Mpeg4DecContext *ctx = avctx->priv_data;
+    MpegEncContext *s = &ctx->m;
+
     if (ctx->xvid_build == -1 && ctx->divx_version == -1 && ctx->lavc_build == -1) {
         if (s->stream_codec_tag == AV_RL32("XVID") ||
             s->codec_tag        == AV_RL32("XVID") ||
@@ -2067,8 +2133,92 @@ static int decode_user_data(Mpeg4DecContext *ctx, GetBitContext *gb)
         ctx->divx_build   = -1;
     }
 
-    if (CONFIG_MPEG4_DECODER && ctx->xvid_build >= 0)
-        ff_xvididct_init(&s->idsp, s->avctx);
+    if (s->workaround_bugs & FF_BUG_AUTODETECT) {
+        if (s->codec_tag == AV_RL32("XVIX"))
+            s->workaround_bugs |= FF_BUG_XVID_ILACE;
+
+        if (s->codec_tag == AV_RL32("UMP4"))
+            s->workaround_bugs |= FF_BUG_UMP4;
+
+        if (ctx->divx_version >= 500 && ctx->divx_build < 1814)
+            s->workaround_bugs |= FF_BUG_QPEL_CHROMA;
+
+        if (ctx->divx_version > 502 && ctx->divx_build < 1814)
+            s->workaround_bugs |= FF_BUG_QPEL_CHROMA2;
+
+        if (ctx->xvid_build <= 3U)
+            s->padding_bug_score = 256 * 256 * 256 * 64;
+
+        if (ctx->xvid_build <= 1U)
+            s->workaround_bugs |= FF_BUG_QPEL_CHROMA;
+
+        if (ctx->xvid_build <= 12U)
+            s->workaround_bugs |= FF_BUG_EDGE;
+
+        if (ctx->xvid_build <= 32U)
+            s->workaround_bugs |= FF_BUG_DC_CLIP;
+
+#define SET_QPEL_FUNC(postfix1, postfix2)                           \
+    s->qdsp.put_        ## postfix1 = ff_put_        ## postfix2;   \
+    s->qdsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;   \
+    s->qdsp.avg_        ## postfix1 = ff_avg_        ## postfix2;
+
+        if (ctx->lavc_build < 4653U)
+            s->workaround_bugs |= FF_BUG_STD_QPEL;
+
+        if (ctx->lavc_build < 4655U)
+            s->workaround_bugs |= FF_BUG_DIRECT_BLOCKSIZE;
+
+        if (ctx->lavc_build < 4670U)
+            s->workaround_bugs |= FF_BUG_EDGE;
+
+        if (ctx->lavc_build <= 4712U)
+            s->workaround_bugs |= FF_BUG_DC_CLIP;
+
+        if (ctx->divx_version >= 0)
+            s->workaround_bugs |= FF_BUG_DIRECT_BLOCKSIZE;
+        if (ctx->divx_version == 501 && ctx->divx_build == 20020416)
+            s->padding_bug_score = 256 * 256 * 256 * 64;
+
+        if (ctx->divx_version < 500U)
+            s->workaround_bugs |= FF_BUG_EDGE;
+
+        if (ctx->divx_version >= 0)
+            s->workaround_bugs |= FF_BUG_HPEL_CHROMA;
+    }
+
+    if (s->workaround_bugs & FF_BUG_STD_QPEL) {
+        SET_QPEL_FUNC(qpel_pixels_tab[0][5], qpel16_mc11_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][7], qpel16_mc31_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][9], qpel16_mc12_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_old_c)
+
+        SET_QPEL_FUNC(qpel_pixels_tab[1][5], qpel8_mc11_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][7], qpel8_mc31_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][9], qpel8_mc12_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_old_c)
+        SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_old_c)
+    }
+
+    if (avctx->debug & FF_DEBUG_BUGS)
+        av_log(s->avctx, AV_LOG_DEBUG,
+               "bugs: %X lavc_build:%d xvid_build:%d divx_version:%d divx_build:%d %s\n",
+               s->workaround_bugs, ctx->lavc_build, ctx->xvid_build,
+               ctx->divx_version, ctx->divx_build, s->divx_packed ? "p" : "");
+
+#if HAVE_MMX
+    if (CONFIG_MPEG4_DECODER && ctx->xvid_build >= 0 &&
+        s->codec_id == AV_CODEC_ID_MPEG4 &&
+        avctx->idct_algo == FF_IDCT_AUTO &&
+        (av_get_cpu_flags() & AV_CPU_FLAG_MMX)) {
+        avctx->idct_algo = FF_IDCT_XVIDMMX;
+        ff_dct_common_init(s);
+        return 1;
+    }
+#endif
 
     return 0;
 }
@@ -2077,6 +2227,7 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
 {
     MpegEncContext *s = &ctx->m;
     int time_incr, time_increment;
+    int64_t pts;
 
     s->pict_type = get_bits(gb, 2) + AV_PICTURE_TYPE_I;        /* pict type: I = 0 , P = 1 */
     if (s->pict_type == AV_PICTURE_TYPE_B && s->low_delay &&
@@ -2116,6 +2267,9 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
 
         av_log(s->avctx, AV_LOG_ERROR,
                "my guess is %d bits ;)\n", ctx->time_increment_bits);
+        if (s->avctx->time_base.den && 4*s->avctx->time_base.den < 1<<ctx->time_increment_bits) {
+            s->avctx->time_base.den = 1<<ctx->time_increment_bits;
+        }
     }
 
     if (IS_3IV1)
@@ -2156,12 +2310,22 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
                             ROUNDED_DIV(s->last_non_b_time - s->pp_time, ctx->t_frame)) * 2;
         s->pb_field_time = (ROUNDED_DIV(s->time, ctx->t_frame) -
                             ROUNDED_DIV(s->last_non_b_time - s->pp_time, ctx->t_frame)) * 2;
-        if (!s->progressive_sequence) {
-            if (s->pp_field_time <= s->pb_field_time || s->pb_field_time <= 1)
+        if (s->pp_field_time <= s->pb_field_time || s->pb_field_time <= 1) {
+            s->pb_field_time = 2;
+            s->pp_field_time = 4;
+            if (!s->progressive_sequence)
                 return FRAME_SKIPPED;
         }
     }
 
+    if (s->avctx->time_base.num)
+        pts = ROUNDED_DIV(s->time, s->avctx->time_base.num);
+    else
+        pts = AV_NOPTS_VALUE;
+    if (s->avctx->debug&FF_DEBUG_PTS)
+        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %"PRId64"\n",
+               pts);
+
     check_marker(gb, "before vop_coded");
 
     /* vop coded */
@@ -2170,6 +2334,9 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
             av_log(s->avctx, AV_LOG_ERROR, "vop not coded\n");
         return FRAME_SKIPPED;
     }
+    if (ctx->new_pred)
+        decode_new_pred(ctx, gb);
+
     if (ctx->shape != BIN_ONLY_SHAPE &&
                     (s->pict_type == AV_PICTURE_TYPE_P ||
                      (s->pict_type == AV_PICTURE_TYPE_S &&
@@ -2206,6 +2373,10 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
         if (s->pict_type == AV_PICTURE_TYPE_B)
             skip_bits_long(gb, ctx->cplx_estimation_trash_b);
 
+        if (get_bits_left(gb) < 3) {
+            av_log(s->avctx, AV_LOG_ERROR, "Header truncated\n");
+            return -1;
+        }
         ctx->intra_dc_threshold = ff_mpeg4_dc_threshold[get_bits(gb, 3)];
         if (!s->progressive_sequence) {
             s->top_field_first = get_bits1(gb);
@@ -2251,6 +2422,7 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
             if (s->f_code == 0) {
                 av_log(s->avctx, AV_LOG_ERROR,
                        "Error, header damaged or not MPEG4 header (f_code=0)\n");
+                s->f_code = 1;
                 return -1;  // makes no sense to continue, as there is nothing left from the image then
             }
         } else
@@ -2258,22 +2430,31 @@ static int decode_vop_header(Mpeg4DecContext *ctx, GetBitContext *gb)
 
         if (s->pict_type == AV_PICTURE_TYPE_B) {
             s->b_code = get_bits(gb, 3);
+            if (s->b_code == 0) {
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "Error, header damaged or not MPEG4 header (b_code=0)\n");
+                s->b_code=1;
+                return -1; // makes no sense to continue, as the MV decoding will break very quickly
+            }
         } else
             s->b_code = 1;
 
         if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
             av_log(s->avctx, AV_LOG_DEBUG,
-                   "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d ce:%d/%d/%d\n",
+                   "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d ce:%d/%d/%d time:%"PRId64" tincr:%d\n",
                    s->qscale, s->f_code, s->b_code,
                    s->pict_type == AV_PICTURE_TYPE_I ? "I" : (s->pict_type == AV_PICTURE_TYPE_P ? "P" : (s->pict_type == AV_PICTURE_TYPE_B ? "B" : "S")),
-                   gb->size_in_bits, s->progressive_sequence, s->alternate_scan,
+                   gb->size_in_bits,s->progressive_sequence, s->alternate_scan,
                    s->top_field_first, s->quarter_sample ? "q" : "h",
                    s->data_partitioning, ctx->resync_marker,
                    ctx->num_sprite_warping_points, s->sprite_warping_accuracy,
                    1 - s->no_rounding, s->vo_type,
                    ctx->vol_control_parameters ? " VOLC" : " ", ctx->intra_dc_threshold,
                    ctx->cplx_estimation_trash_i, ctx->cplx_estimation_trash_p,
-                   ctx->cplx_estimation_trash_b);
+                   ctx->cplx_estimation_trash_b,
+                   s->time,
+                   time_increment
+                  );
         }
 
         if (!ctx->scalability) {
@@ -2336,8 +2517,8 @@ int ff_mpeg4_decode_picture_header(Mpeg4DecContext *ctx, GetBitContext *gb)
     for (;;) {
         if (get_bits_count(gb) >= gb->size_in_bits) {
             if (gb->size_in_bits == 8 &&
-                (ctx->divx_version >= 0 || ctx->xvid_build >= 0)) {
-                av_log(s->avctx, AV_LOG_WARNING, "frame skip %d\n", gb->size_in_bits);
+                (ctx->divx_version >= 0 || ctx->xvid_build >= 0) || s->codec_tag == AV_RL32("QMP4")) {
+                av_log(s->avctx, AV_LOG_VERBOSE, "frame skip %d\n", gb->size_in_bits);
                 return FRAME_SKIPPED;  // divx bug
             } else
                 return -1;  // end of stream
@@ -2431,64 +2612,33 @@ end:
         s->low_delay = 1;
     s->avctx->has_b_frames = !s->low_delay;
 
-    if (s->workaround_bugs & FF_BUG_AUTODETECT) {
-        if (s->codec_tag == AV_RL32("XVIX"))
-            s->workaround_bugs |= FF_BUG_XVID_ILACE;
-
-        if (s->codec_tag == AV_RL32("UMP4"))
-            s->workaround_bugs |= FF_BUG_UMP4;
-
-        if (ctx->divx_version >= 500 && ctx->divx_build < 1814)
-            s->workaround_bugs |= FF_BUG_QPEL_CHROMA;
-
-        if (ctx->divx_version > 502 && ctx->divx_build < 1814)
-            s->workaround_bugs |= FF_BUG_QPEL_CHROMA2;
-
-        if (ctx->xvid_build <= 3U)
-            s->padding_bug_score = 256 * 256 * 256 * 64;
-
-        if (ctx->xvid_build <= 1U)
-            s->workaround_bugs |= FF_BUG_QPEL_CHROMA;
-
-        if (ctx->xvid_build <= 12U)
-            s->workaround_bugs |= FF_BUG_EDGE;
-
-        if (ctx->xvid_build <= 32U)
-            s->workaround_bugs |= FF_BUG_DC_CLIP;
-
-        if (ctx->lavc_build < 4653U)
-            s->workaround_bugs |= FF_BUG_STD_QPEL;
-
-        if (ctx->lavc_build < 4655U)
-            s->workaround_bugs |= FF_BUG_DIRECT_BLOCKSIZE;
-
-        if (ctx->lavc_build < 4670U)
-            s->workaround_bugs |= FF_BUG_EDGE;
-
-        if (ctx->lavc_build <= 4712U)
-            s->workaround_bugs |= FF_BUG_DC_CLIP;
-
-        if (ctx->divx_version >= 0)
-            s->workaround_bugs |= FF_BUG_DIRECT_BLOCKSIZE;
-
-        if (ctx->divx_version == 501 && ctx->divx_build == 20020416)
-            s->padding_bug_score = 256 * 256 * 256 * 64;
+    return decode_vop_header(ctx, gb);
+}
 
-        if (ctx->divx_version < 500U)
-            s->workaround_bugs |= FF_BUG_EDGE;
+av_cold void ff_mpeg4videodec_static_init(void) {
+    static int done = 0;
 
-        if (ctx->divx_version >= 0)
-            s->workaround_bugs |= FF_BUG_HPEL_CHROMA;
+    if (!done) {
+        ff_init_rl(&ff_mpeg4_rl_intra, ff_mpeg4_static_rl_table_store[0]);
+        ff_init_rl(&ff_rvlc_rl_inter, ff_mpeg4_static_rl_table_store[1]);
+        ff_init_rl(&ff_rvlc_rl_intra, ff_mpeg4_static_rl_table_store[2]);
+        INIT_VLC_RL(ff_mpeg4_rl_intra, 554);
+        INIT_VLC_RL(ff_rvlc_rl_inter, 1072);
+        INIT_VLC_RL(ff_rvlc_rl_intra, 1072);
+        INIT_VLC_STATIC(&dc_lum, DC_VLC_BITS, 10 /* 13 */,
+                        &ff_mpeg4_DCtab_lum[0][1], 2, 1,
+                        &ff_mpeg4_DCtab_lum[0][0], 2, 1, 512);
+        INIT_VLC_STATIC(&dc_chrom, DC_VLC_BITS, 10 /* 13 */,
+                        &ff_mpeg4_DCtab_chrom[0][1], 2, 1,
+                        &ff_mpeg4_DCtab_chrom[0][0], 2, 1, 512);
+        INIT_VLC_STATIC(&sprite_trajectory, SPRITE_TRAJ_VLC_BITS, 15,
+                        &ff_sprite_trajectory_tab[0][1], 4, 2,
+                        &ff_sprite_trajectory_tab[0][0], 4, 2, 128);
+        INIT_VLC_STATIC(&mb_type_b_vlc, MB_TYPE_B_VLC_BITS, 4,
+                        &ff_mb_type_b_tab[0][1], 2, 1,
+                        &ff_mb_type_b_tab[0][0], 2, 1, 16);
+        done = 1;
     }
-
-
-    if (s->avctx->debug & FF_DEBUG_BUGS)
-        av_log(s->avctx, AV_LOG_DEBUG,
-               "bugs: %X lavc_build:%d xvid_build:%d divx_version:%d divx_build:%d %s\n",
-               s->workaround_bugs, ctx->lavc_build, ctx->xvid_build,
-               ctx->divx_version, ctx->divx_build, s->divx_packed ? "p" : "");
-
-    return decode_vop_header(ctx, gb);
 }
 
 int ff_mpeg4_frame_end(AVCodecContext *avctx, const uint8_t *buf, int buf_size)
@@ -2496,33 +2646,31 @@ int ff_mpeg4_frame_end(AVCodecContext *avctx, const uint8_t *buf, int buf_size)
     Mpeg4DecContext *ctx = avctx->priv_data;
     MpegEncContext    *s = &ctx->m;
 
-    /* divx 5.01+ bistream reorder stuff */
+    /* divx 5.01+ bitstream reorder stuff */
+    /* Since this clobbers the input buffer and hwaccel codecs still need the
+     * data during hwaccel->end_frame we should not do this any earlier */
     if (s->divx_packed) {
-        int current_pos     = get_bits_count(&s->gb) >> 3;
+        int current_pos     = s->gb.buffer == s->bitstream_buffer ? 0 : (get_bits_count(&s->gb) >> 3);
         int startcode_found = 0;
 
-        if (buf_size - current_pos > 5) {
+        if (buf_size - current_pos > 7) {
+
             int i;
-            for (i = current_pos; i < buf_size - 3; i++)
+            for (i = current_pos; i < buf_size - 4; i++)
+
                 if (buf[i]     == 0 &&
                     buf[i + 1] == 0 &&
                     buf[i + 2] == 1 &&
                     buf[i + 3] == 0xB6) {
-                    startcode_found = 1;
+                    startcode_found = !(buf[i + 4] & 0x40);
                     break;
                 }
         }
-        if (s->gb.buffer == s->bitstream_buffer && buf_size > 7 &&
-            ctx->xvid_build >= 0) {       // xvid style
-            startcode_found = 1;
-            current_pos     = 0;
-        }
 
         if (startcode_found) {
-            av_fast_malloc(&s->bitstream_buffer,
+            av_fast_padded_malloc(&s->bitstream_buffer,
                            &s->allocated_bitstream_buffer_size,
-                           buf_size - current_pos +
-                           FF_INPUT_BUFFER_PADDING_SIZE);
+                           buf_size - current_pos);
             if (!s->bitstream_buffer)
                 return AVERROR(ENOMEM);
             memcpy(s->bitstream_buffer, buf + current_pos,
@@ -2546,13 +2694,11 @@ static int mpeg4_update_thread_context(AVCodecContext *dst,
     if (ret < 0)
         return ret;
 
+    memcpy(((uint8_t*)s) + sizeof(MpegEncContext), ((uint8_t*)s1) + sizeof(MpegEncContext), sizeof(Mpeg4DecContext) - sizeof(MpegEncContext));
+
     if (CONFIG_MPEG4_DECODER && !init && s1->xvid_build >= 0)
         ff_xvididct_init(&s->m.idsp, dst);
 
-    s->shape               = s1->shape;
-    s->time_increment_bits = s1->time_increment_bits;
-    s->xvid_build          = s1->xvid_build;
-
     return 0;
 }
 
@@ -2561,7 +2707,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
     Mpeg4DecContext *ctx = avctx->priv_data;
     MpegEncContext *s = &ctx->m;
     int ret;
-    static int done = 0;
 
     ctx->divx_version =
     ctx->divx_build   =
@@ -2571,31 +2716,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
     if ((ret = ff_h263_decode_init(avctx)) < 0)
         return ret;
 
-    if (!done) {
-        done = 1;
-
-        ff_init_rl(&ff_mpeg4_rl_intra, ff_mpeg4_static_rl_table_store[0]);
-        ff_init_rl(&ff_rvlc_rl_inter, ff_mpeg4_static_rl_table_store[1]);
-        ff_init_rl(&ff_rvlc_rl_intra, ff_mpeg4_static_rl_table_store[2]);
-        INIT_VLC_RL(ff_mpeg4_rl_intra, 554);
-        INIT_VLC_RL(ff_rvlc_rl_inter, 1072);
-        INIT_VLC_RL(ff_rvlc_rl_intra, 1072);
-        INIT_VLC_STATIC(&dc_lum, DC_VLC_BITS, 10 /* 13 */,
-                        &ff_mpeg4_DCtab_lum[0][1], 2, 1,
-                        &ff_mpeg4_DCtab_lum[0][0], 2, 1, 512);
-        INIT_VLC_STATIC(&dc_chrom, DC_VLC_BITS, 10 /* 13 */,
-                        &ff_mpeg4_DCtab_chrom[0][1], 2, 1,
-                        &ff_mpeg4_DCtab_chrom[0][0], 2, 1, 512);
-        INIT_VLC_STATIC(&sprite_trajectory, SPRITE_TRAJ_VLC_BITS, 15,
-                        &ff_sprite_trajectory_tab[0][1], 4, 2,
-                        &ff_sprite_trajectory_tab[0][0], 4, 2, 128);
-        INIT_VLC_STATIC(&mb_type_b_vlc, MB_TYPE_B_VLC_BITS, 4,
-                        &ff_mb_type_b_tab[0][1], 2, 1,
-                        &ff_mb_type_b_tab[0][0], 2, 1, 16);
-    }
+    ff_mpeg4videodec_static_init();
 
     s->h263_pred = 1;
-    s->low_delay = 0; /* default, might be overriden in the vol header during header parsing */
+    s->low_delay = 0; /* default, might be overridden in the vol header during header parsing */
     s->decode_mb = mpeg4_decode_mb;
     ctx->time_increment_bits = 4; /* default value for broken headers */
 
@@ -2622,6 +2746,27 @@ static const AVProfile mpeg4_video_profiles[] = {
     { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" },
     { FF_PROFILE_MPEG4_SIMPLE_STUDIO,             "Simple Studio Profile" },
     { FF_PROFILE_MPEG4_ADVANCED_SIMPLE,           "Advanced Simple Profile" },
+    { FF_PROFILE_UNKNOWN },
+};
+
+static const AVOption mpeg4_options[] = {
+    {"quarter_sample", "1/4 subpel MC", offsetof(MpegEncContext, quarter_sample), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0},
+    {"divx_packed", "divx style packed b frames", offsetof(MpegEncContext, divx_packed), FF_OPT_TYPE_INT, {.i64 = 0}, 0, 1, 0},
+    {NULL}
+};
+
+static const AVClass mpeg4_class = {
+    "MPEG4 Video Decoder",
+    av_default_item_name,
+    mpeg4_options,
+    LIBAVUTIL_VERSION_INT,
+};
+
+static const AVClass mpeg4_vdpau_class = {
+    "MPEG4 Video VDPAU Decoder",
+    av_default_item_name,
+    mpeg4_options,
+    LIBAVUTIL_VERSION_INT,
 };
 
 AVCodec ff_mpeg4_decoder = {
@@ -2637,7 +2782,28 @@ AVCodec ff_mpeg4_decoder = {
                              CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY |
                              CODEC_CAP_FRAME_THREADS,
     .flush                 = ff_mpeg_flush,
+    .max_lowres            = 3,
     .pix_fmts              = ff_h263_hwaccel_pixfmt_list_420,
     .profiles              = NULL_IF_CONFIG_SMALL(mpeg4_video_profiles),
     .update_thread_context = ONLY_IF_THREADS_ENABLED(mpeg4_update_thread_context),
+    .priv_class = &mpeg4_class,
+};
+
+
+#if CONFIG_MPEG4_VDPAU_DECODER
+AVCodec ff_mpeg4_vdpau_decoder = {
+    .name           = "mpeg4_vdpau",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 (VDPAU)"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG4,
+    .priv_data_size = sizeof(MpegEncContext),
+    .init           = decode_init,
+    .close          = ff_h263_decode_end,
+    .decode         = ff_h263_decode_frame,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY |
+                      CODEC_CAP_HWACCEL_VDPAU,
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_VDPAU_MPEG4,
+                                                  AV_PIX_FMT_NONE },
+    .priv_class     = &mpeg4_vdpau_class,
 };
+#endif
diff --git a/libavcodec/mpeg4videoenc.c b/libavcodec/mpeg4videoenc.c
index 6b87ec7..6a3d7e7 100644
--- a/libavcodec/mpeg4videoenc.c
+++ b/libavcodec/mpeg4videoenc.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -208,7 +208,7 @@ static inline int decide_ac_pred(MpegEncContext *s, int16_t block[6][64],
 }
 
 /**
- * modify mb_type & qscale so that encoding is acually possible in mpeg4
+ * modify mb_type & qscale so that encoding is actually possible in mpeg4
  */
 void ff_clean_mpeg4_qscales(MpegEncContext *s)
 {
@@ -278,19 +278,19 @@ static inline void mpeg4_encode_dc(PutBitContext *s, int level, int n)
 
     if (n < 4) {
         /* luminance */
-        put_bits(&s->pb, ff_mpeg4_DCtab_lum[size][1], ff_mpeg4_DCtab_lum[size][0]);
+        put_bits(s, ff_mpeg4_DCtab_lum[size][1], ff_mpeg4_DCtab_lum[size][0]);
     } else {
         /* chrominance */
-        put_bits(&s->pb, ff_mpeg4_DCtab_chrom[size][1], ff_mpeg4_DCtab_chrom[size][0]);
+        put_bits(s, ff_mpeg4_DCtab_chrom[size][1], ff_mpeg4_DCtab_chrom[size][0]);
     }
 
     /* encode remaining bits */
     if (size > 0) {
         if (level < 0)
             level = (-level) ^ ((1 << size) - 1);
-        put_bits(&s->pb, size, level);
+        put_bits(s, size, level);
         if (size > 8)
-            put_bits(&s->pb, 1, 1);
+            put_bits(s, 1, 1);
     }
 #endif
 }
@@ -525,9 +525,9 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
                     s->last_mv[i][1][1] = 0;
             }
 
-            assert(s->dquant >= -2 && s->dquant <= 2);
-            assert((s->dquant & 1) == 0);
-            assert(mb_type >= 0);
+            av_assert2(s->dquant >= -2 && s->dquant <= 2);
+            av_assert2((s->dquant & 1) == 0);
+            av_assert2(mb_type >= 0);
 
             /* nothing to do if this MB was skipped in the next P Frame */
             if (s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]) {  // FIXME avoid DCT & ...
@@ -547,7 +547,7 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
 
             if ((cbp | motion_x | motion_y | mb_type) == 0) {
                 /* direct MB with MV={0,0} */
-                assert(s->dquant == 0);
+                av_assert2(s->dquant == 0);
 
                 put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
 
@@ -584,12 +584,12 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
                 s->misc_bits += get_bits_diff(s);
 
             if (!mb_type) {
-                assert(s->mv_dir & MV_DIRECT);
+                av_assert2(s->mv_dir & MV_DIRECT);
                 ff_h263_encode_motion_vector(s, motion_x, motion_y, 1);
                 s->b_count++;
                 s->f_count++;
             } else {
-                assert(mb_type > 0 && mb_type < 4);
+                av_assert2(mb_type > 0 && mb_type < 4);
                 if (s->mv_type != MV_TYPE_FIELD) {
                     if (s->mv_dir & MV_DIR_FORWARD) {
                         ff_h263_encode_motion_vector(s,
@@ -669,10 +669,6 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
 
                     x = s->mb_x * 16;
                     y = s->mb_y * 16;
-                    if (x + 16 > s->width)
-                        x = s->width - 16;
-                    if (y + 16 > s->height)
-                        y = s->height - 16;
 
                     offset = x + y * s->linesize;
                     p_pic  = s->new_picture.f->data[0] + offset;
@@ -689,7 +685,21 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
                         b_pic = pic->f->data[0] + offset;
                         if (!pic->shared)
                             b_pic += INPLACE_OFFSET;
-                        diff = s->mecc.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
+
+                        if (x + 16 > s->width || y + 16 > s->height) {
+                            int x1, y1;
+                            int xe = FFMIN(16, s->width - x);
+                            int ye = FFMIN(16, s->height - y);
+                            diff = 0;
+                            for (y1 = 0; y1 < ye; y1++) {
+                                for (x1 = 0; x1 < xe; x1++) {
+                                    diff += FFABS(p_pic[x1 + y1 * s->linesize] - b_pic[x1 + y1 * s->linesize]);
+                                }
+                            }
+                            diff = diff * 256 / (xe * ye);
+                        } else {
+                            diff = s->mecc.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
+                        }
                         if (diff > s->qscale * 70) {  // FIXME check that 70 is optimal
                             s->mb_skipped = 0;
                             break;
@@ -754,7 +764,7 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
                 if (s->dquant)
                     put_bits(pb2, 2, dquant_code[s->dquant + 2]);
 
-                assert(!s->progressive_sequence);
+                av_assert2(!s->progressive_sequence);
                 if (cbp)
                     put_bits(pb2, 1, s->interlaced_dct);
                 put_bits(pb2, 1, 1);
@@ -778,7 +788,7 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
                                              s->mv[0][1][1] - pred_y,
                                              s->f_code);
             } else {
-                assert(s->mv_type == MV_TYPE_8X8);
+                av_assert2(s->mv_type == MV_TYPE_8X8);
                 put_bits(&s->pb,
                          ff_h263_inter_MCBPC_bits[cbpc + 16],
                          ff_h263_inter_MCBPC_code[cbpc + 16]);
@@ -894,7 +904,7 @@ void ff_set_mpeg4_time(MpegEncContext *s)
         ff_mpeg4_init_direct_mv(s);
     } else {
         s->last_time_base = s->time_base;
-        s->time_base      = s->time / s->avctx->time_base.den;
+        s->time_base      = FFUDIV(s->time, s->avctx->time_base.den);
     }
 }
 
@@ -910,13 +920,12 @@ static void mpeg4_encode_gop_header(MpegEncContext *s)
     if (s->reordered_input_picture[1])
         time = FFMIN(time, s->reordered_input_picture[1]->f->pts);
     time = time * s->avctx->time_base.num;
+    s->last_time_base = FFUDIV(time, s->avctx->time_base.den);
 
-    seconds  = time / s->avctx->time_base.den;
-    minutes  = seconds / 60;
-    seconds %= 60;
-    hours    = minutes / 60;
-    minutes %= 60;
-    hours   %= 24;
+    seconds = FFUDIV(time, s->avctx->time_base.den);
+    minutes = FFUDIV(seconds, 60); seconds = FFUMOD(seconds, 60);
+    hours   = FFUDIV(minutes, 60); minutes = FFUMOD(minutes, 60);
+    hours   = FFUMOD(hours  , 24);
 
     put_bits(&s->pb, 5, hours);
     put_bits(&s->pb, 6, minutes);
@@ -926,8 +935,6 @@ static void mpeg4_encode_gop_header(MpegEncContext *s)
     put_bits(&s->pb, 1, !!(s->flags & CODEC_FLAG_CLOSED_GOP));
     put_bits(&s->pb, 1, 0);  // broken link == NO
 
-    s->last_time_base = time / s->avctx->time_base.den;
-
     ff_mpeg4_stuffing(&s->pb);
 }
 
@@ -1011,6 +1018,8 @@ static void mpeg4_encode_vol_header(MpegEncContext *s,
 
     put_bits(&s->pb, 4, s->aspect_ratio_info); /* aspect ratio info */
     if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) {
+        av_reduce(&s->avctx->sample_aspect_ratio.num, &s->avctx->sample_aspect_ratio.den,
+                   s->avctx->sample_aspect_ratio.num,  s->avctx->sample_aspect_ratio.den, 255);
         put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
         put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
     }
@@ -1099,11 +1108,10 @@ void ff_mpeg4_encode_picture_header(MpegEncContext *s, int picture_number)
     put_bits(&s->pb, 16, VOP_STARTCODE);    /* vop header */
     put_bits(&s->pb, 2, s->pict_type - 1);  /* pict type: I = 0 , P = 1 */
 
-    assert(s->time >= 0);
-    time_div  = s->time / s->avctx->time_base.den;
-    time_mod  = s->time % s->avctx->time_base.den;
+    time_div  = FFUDIV(s->time, s->avctx->time_base.den);
+    time_mod  = FFUMOD(s->time, s->avctx->time_base.den);
     time_incr = time_div - s->last_time_base;
-    assert(time_incr >= 0);
+    av_assert0(time_incr >= 0);
     while (time_incr--)
         put_bits(&s->pb, 1, 1);
 
@@ -1191,8 +1199,8 @@ static av_cold void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab,
 {
     int slevel, run, last;
 
-    assert(MAX_LEVEL >= 64);
-    assert(MAX_RUN >= 63);
+    av_assert0(MAX_LEVEL >= 64);
+    av_assert0(MAX_RUN >= 63);
 
     for (slevel = -64; slevel < 64; slevel++) {
         if (slevel == 0)
@@ -1287,6 +1295,11 @@ static av_cold int encode_init(AVCodecContext *avctx)
     int ret;
     static int done = 0;
 
+    if (avctx->width >= (1<<13) || avctx->height >= (1<<13)) {
+        av_log(avctx, AV_LOG_ERROR, "dimensions too large for MPEG-4\n");
+        return AVERROR(EINVAL);
+    }
+
     if ((ret = ff_MPV_encode_init(avctx)) < 0)
         return ret;
 
diff --git a/libavcodec/mpeg_er.c b/libavcodec/mpeg_er.c
index 7a1b6b2..3d90582 100644
--- a/libavcodec/mpeg_er.c
+++ b/libavcodec/mpeg_er.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,6 +24,7 @@ static void set_erpic(ERPicture *dst, Picture *src)
 {
     int i;
 
+    memset(dst, 0, sizeof(*dst));
     if (!src) {
         dst->f  = NULL;
         dst->tf = NULL;
diff --git a/libavcodec/mpeg_er.h b/libavcodec/mpeg_er.h
index 1968184..bd74fbb 100644
--- a/libavcodec/mpeg_er.h
+++ b/libavcodec/mpeg_er.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegaudio.c b/libavcodec/mpegaudio.c
index 1a83635..cba5299 100644
--- a/libavcodec/mpegaudio.c
+++ b/libavcodec/mpegaudio.c
@@ -2,20 +2,20 @@
  * MPEG Audio common code
  * Copyright (c) 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index b556801..1591a17 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,8 +26,8 @@
 #ifndef AVCODEC_MPEGAUDIO_H
 #define AVCODEC_MPEGAUDIO_H
 
-#ifndef CONFIG_FLOAT
-#   define CONFIG_FLOAT 0
+#ifndef USE_FLOATS
+#   define USE_FLOATS 0
 #endif
 
 #include <stdint.h>
@@ -52,11 +52,13 @@
 #define WFRAC_BITS  16   /* fractional bits for window */
 #endif
 
+#define IMDCT_SCALAR 1.759
+
 #define FRAC_ONE    (1 << FRAC_BITS)
 
 #define FIX(a)   ((int)((a) * FRAC_ONE))
 
-#if CONFIG_FLOAT
+#if USE_FLOATS
 #   define INTFLOAT float
 typedef float MPA_INT;
 typedef float OUT_INT;
diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index 58ea452..3d9e946 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -55,6 +55,7 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
             int inc= FFMIN(buf_size - i, s->frame_size);
             i += inc;
             s->frame_size -= inc;
+            state = 0;
 
             if(!s->frame_size){
                 next= i;
@@ -63,10 +64,11 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
         }else{
             while(i<buf_size){
                 int ret, sr, channels, bit_rate, frame_size;
+                enum AVCodecID codec_id;
 
                 state= (state<<8) + buf[i++];
 
-                ret = avpriv_mpa_decode_header(avctx, state, &sr, &channels, &frame_size, &bit_rate);
+                ret = avpriv_mpa_decode_header2(state, &sr, &channels, &frame_size, &bit_rate, &codec_id);
                 if (ret < 4) {
                     if (i > 4)
                         s->header_count = -2;
@@ -77,10 +79,11 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
                     s->header_count++;
                     s->frame_size = ret-4;
 
-                    if (s->header_count > 0) {
+                    if (s->header_count > 0 + (avctx->codec_id != AV_CODEC_ID_NONE && avctx->codec_id != codec_id)) {
                         avctx->sample_rate= sr;
                         avctx->channels   = channels;
                         s1->duration      = frame_size;
+                        avctx->codec_id   = codec_id;
                         if (s->no_bitrate || !avctx->bit_rate) {
                             s->no_bitrate = 1;
                             avctx->bit_rate += (bit_rate - avctx->bit_rate) / s->header_count;
diff --git a/libavcodec/mpegaudio_tablegen.c b/libavcodec/mpegaudio_tablegen.c
index b4c240b..90c9de4 100644
--- a/libavcodec/mpegaudio_tablegen.c
+++ b/libavcodec/mpegaudio_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h
index 8a3e51a..f9557c9 100644
--- a/libavcodec/mpegaudio_tablegen.h
+++ b/libavcodec/mpegaudio_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,6 +39,7 @@ static float exp_table_float[512];
 static float expval_table_float[512][16];
 
 #define FRAC_BITS 23
+#define IMDCT_SCALAR 1.759
 
 static void mpegaudio_tableinit(void)
 {
@@ -48,7 +49,7 @@ static void mpegaudio_tableinit(void)
         double f, fm;
         int e, m;
         /* cbrtf() isn't available on all systems, so we use powf(). */
-        f  = value * powf(value, 1.0 / 3.0) * pow(2, (i & 3) * 0.25);
+        f  = value / IMDCT_SCALAR * pow(value, 1.0 / 3.0) * pow(2, (i & 3) * 0.25);
         fm = frexp(f, &e);
         m  = (uint32_t)(fm * (1LL << 31) + 0.5);
         e += FRAC_BITS - 31 + 5 - 100;
@@ -60,9 +61,9 @@ static void mpegaudio_tableinit(void)
     for (exponent = 0; exponent < 512; exponent++) {
         for (value = 0; value < 16; value++) {
             /* cbrtf() isn't available on all systems, so we use powf(). */
-            double f = (double)value * powf(value, 1.0 / 3.0) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5);
+            double f = (double)value * pow(value, 1.0 / 3.0) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5) / IMDCT_SCALAR;
             /* llrint() isn't always available, so round and cast manually. */
-            expval_table_fixed[exponent][value] = (long long int) (f >= 0 ? floor(f + 0.5) : ceil(f - 0.5));
+            expval_table_fixed[exponent][value] = (long long int) (f < 0xFFFFFFFF ? floor(f + 0.5) : 0xFFFFFFFF);
             expval_table_float[exponent][value] = f;
         }
         exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
diff --git a/libavcodec/mpegaudiodata.c b/libavcodec/mpegaudiodata.c
index 009a02a..0569281 100644
--- a/libavcodec/mpegaudiodata.c
+++ b/libavcodec/mpegaudiodata.c
@@ -2,20 +2,20 @@
  * MPEG Audio common tables
  * copyright (c) 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,11 +29,11 @@
 
 const uint16_t avpriv_mpa_bitrate_tab[2][3][15] = {
     { {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 },
-      {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 },
-      {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 } },
-    { {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256},
-      {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160},
-      {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160}
+      {0, 32, 48, 56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320, 384 },
+      {0, 32, 40, 48,  56,  64,  80,  96, 112, 128, 160, 192, 224, 256, 320 } },
+    { {0, 32, 48, 56,  64,  80,  96, 112, 128, 144, 160, 176, 192, 224, 256},
+      {0,  8, 16, 24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160},
+      {0,  8, 16, 24,  32,  40,  48,  56,  64,  80,  96, 112, 128, 144, 160}
     }
 };
 
diff --git a/libavcodec/mpegaudiodata.h b/libavcodec/mpegaudiodata.h
index 2b8ff65..29a2658 100644
--- a/libavcodec/mpegaudiodata.h
+++ b/libavcodec/mpegaudiodata.h
@@ -2,20 +2,20 @@
  * MPEG Audio common tables
  * copyright (c) 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegaudiodec_fixed.c b/libavcodec/mpegaudiodec_fixed.c
index 6f21b0c..904c885 100644
--- a/libavcodec/mpegaudiodec_fixed.c
+++ b/libavcodec/mpegaudiodec_fixed.c
@@ -1,27 +1,27 @@
 /*
  * Fixed-point MPEG audio decoder
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "config.h"
 #include "libavutil/samplefmt.h"
 
-#define CONFIG_FLOAT 0
+#define USE_FLOATS 0
 
 #include "mpegaudio.h"
 
diff --git a/libavcodec/mpegaudiodec_float.c b/libavcodec/mpegaudiodec_float.c
index 3a76055..35f07fa 100644
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@@ -2,27 +2,27 @@
  * Float MPEG Audio decoder
  * Copyright (c) 2010 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "config.h"
 #include "libavutil/samplefmt.h"
 
-#define CONFIG_FLOAT 1
+#define USE_FLOATS 1
 
 #include "mpegaudio.h"
 
diff --git a/libavcodec/mpegaudiodec_template.c b/libavcodec/mpegaudiodec_template.c
index 9ce03ef..05237070 100644
--- a/libavcodec/mpegaudiodec_template.c
+++ b/libavcodec/mpegaudiodec_template.c
@@ -2,20 +2,20 @@
  * MPEG Audio decoder
  * Copyright (c) 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/float_dsp.h"
+#include "libavutil/libm.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "internal.h"
@@ -184,6 +185,8 @@ static void compute_band_indexes(MPADecodeContext *s, GranuleDef *g)
 {
     if (g->block_type == 2) {
         if (g->switch_point) {
+            if(s->sample_rate_index == 8)
+                avpriv_request_sample(s->avctx, "switch point in 8khz");
             /* if switched mode, we handle the 36 first samples as
                 long blocks.  For 8000Hz, we handle the 72 first
                 exponents as long blocks */
@@ -213,7 +216,7 @@ static inline int l1_unscale(int n, int mant, int scale_factor)
     shift   = scale_factor_modshift[scale_factor];
     mod     = shift & 3;
     shift >>= 2;
-    val     = MUL64(mant + (-1 << n) + 1, scale_factor_mult[n-1][mod]);
+    val     = MUL64((int)(mant + (-1U << n) + 1), scale_factor_mult[n-1][mod]);
     shift  += n;
     /* NOTE: at this point, 1 <= shift >= 21 + 15 */
     return (int)((val + (1LL << (shift - 1))) >> shift);
@@ -243,7 +246,10 @@ static inline int l3_unscale(int value, int exponent)
     e  = table_4_3_exp  [4 * value + (exponent & 3)];
     m  = table_4_3_value[4 * value + (exponent & 3)];
     e -= exponent >> 2;
-    assert(e >= 1);
+#ifdef DEBUG
+    if(e < 1)
+        av_log(NULL, AV_LOG_WARNING, "l3_unscale: e is %d\n", e);
+#endif
     if (e > 31)
         return 0;
     m = (m + (1 << (e - 1))) >> e;
@@ -307,7 +313,7 @@ static av_cold void decode_init_static(void)
                  INIT_VLC_USE_NEW_STATIC);
         offset += huff_vlc_tables_sizes[i];
     }
-    assert(offset == FF_ARRAY_ELEMS(huff_vlc_tables));
+    av_assert0(offset == FF_ARRAY_ELEMS(huff_vlc_tables));
 
     offset = 0;
     for (i = 0; i < 2; i++) {
@@ -318,7 +324,7 @@ static av_cold void decode_init_static(void)
                  INIT_VLC_USE_NEW_STATIC);
         offset += huff_quad_vlc_tables_sizes[i];
     }
-    assert(offset == FF_ARRAY_ELEMS(huff_quad_vlc_tables));
+    av_assert0(offset == FF_ARRAY_ELEMS(huff_quad_vlc_tables));
 
     for (i = 0; i < 9; i++) {
         k = 0;
@@ -371,7 +377,7 @@ static av_cold void decode_init_static(void)
 
         for (j = 0; j < 2; j++) {
             e = -(j + 1) * ((i + 1) >> 1);
-            f = pow(2.0, e / 4.0);
+            f = exp2(e / 4.0);
             k = i & 1;
             is_table_lsf[j][k ^ 1][i] = FIXR(f);
             is_table_lsf[j][k    ][i] = FIXR(1.0);
@@ -386,7 +392,7 @@ static av_cold void decode_init_static(void)
         ci = ci_table[i];
         cs = 1.0 / sqrt(1.0 + ci * ci);
         ca = cs * ci;
-#if !CONFIG_FLOAT
+#if !USE_FLOATS
         csa_table[i][0] = FIXHR(cs/4);
         csa_table[i][1] = FIXHR(ca/4);
         csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
@@ -808,7 +814,7 @@ static void switch_buffer(MPADecodeContext *s, int *pos, int *end_pos,
     if (s->in_gb.buffer && *pos >= s->gb.size_in_bits) {
         s->gb           = s->in_gb;
         s->in_gb.buffer = NULL;
-        assert((get_bits_count(&s->gb) & 7) == 0);
+        av_assert2((get_bits_count(&s->gb) & 7) == 0);
         skip_bits_long(&s->gb, *pos - *end_pos);
         *end_pos2 =
         *end_pos  = *end_pos2 + get_bits_count(&s->gb) - *pos;
@@ -822,7 +828,7 @@ static void switch_buffer(MPADecodeContext *s, int *pos, int *end_pos,
                 v = -v;
             *dst = v;
 */
-#if CONFIG_FLOAT
+#if USE_FLOATS
 #define READ_FLIP_SIGN(dst,src)                     \
     v = AV_RN32A(src) ^ (get_bits1(&s->gb) << 31);  \
     AV_WN32A(dst, v);
@@ -937,7 +943,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
                 s_index -= 4;
                 skip_bits_long(&s->gb, last_pos - pos);
                 av_log(s->avctx, AV_LOG_INFO, "overread, skip %d enddists: %d %d\n", last_pos - pos, end_pos-pos, end_pos2-pos);
-                if(s->err_recognition & AV_EF_BITSTREAM)
+                if(s->err_recognition & (AV_EF_BITSTREAM|AV_EF_COMPLIANT))
                     s_index=0;
                 break;
             }
@@ -964,10 +970,10 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
     }
     /* skip extension bits */
     bits_left = end_pos2 - get_bits_count(&s->gb);
-    if (bits_left < 0 && (s->err_recognition & AV_EF_BUFFER)) {
+    if (bits_left < 0 && (s->err_recognition & (AV_EF_BUFFER|AV_EF_COMPLIANT))) {
         av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
         s_index=0;
-    } else if (bits_left > 0 && (s->err_recognition & AV_EF_BUFFER)) {
+    } else if (bits_left > 0 && (s->err_recognition & (AV_EF_BUFFER|AV_EF_AGGRESSIVE))) {
         av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d\n", bits_left);
         s_index = 0;
     }
@@ -1131,7 +1137,7 @@ found2:
         /* ms stereo ONLY */
         /* NOTE: the 1/sqrt(2) normalization factor is included in the
            global gain */
-#if CONFIG_FLOAT
+#if USE_FLOATS
        s->fdsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576);
 #else
         tab0 = g0->sb_hybrid;
@@ -1146,7 +1152,18 @@ found2:
     }
 }
 
-#if CONFIG_FLOAT
+#if USE_FLOATS
+#if HAVE_MIPSFPU
+#   include "mips/compute_antialias_float.h"
+#endif /* HAVE_MIPSFPU */
+#else
+#if HAVE_MIPSDSPR1
+#   include "mips/compute_antialias_fixed.h"
+#endif /* HAVE_MIPSDSPR1 */
+#endif /* USE_FLOATS */
+
+#ifndef compute_antialias
+#if USE_FLOATS
 #define AA(j) do {                                                      \
         float tmp0 = ptr[-1-j];                                         \
         float tmp1 = ptr[   j];                                         \
@@ -1192,6 +1209,7 @@ static void compute_antialias(MPADecodeContext *s, GranuleDef *g)
         ptr += 18;
     }
 }
+#endif /* compute_antialias */
 
 static void compute_imdct(MPADecodeContext *s, GranuleDef *g,
                           INTFLOAT *sb_samples, INTFLOAT *mdct_buf)
@@ -1361,9 +1379,8 @@ static int mp_decode_layer3(MPADecodeContext *s)
     if (!s->adu_mode) {
         int skip;
         const uint8_t *ptr = s->gb.buffer + (get_bits_count(&s->gb)>>3);
-        int extrasize = av_clip(get_bits_left(&s->gb) >> 3, 0,
-                                FFMAX(0, LAST_BUF_SIZE - s->last_buf_size));
-        assert((get_bits_count(&s->gb) & 7) == 0);
+        int extrasize = av_clip(get_bits_left(&s->gb) >> 3, 0, EXTRABYTES);
+        av_assert1((get_bits_count(&s->gb) & 7) == 0);
         /* now we get bits from the main_data_begin offset */
         av_dlog(s->avctx, "seekback:%d, lastbuf:%d\n",
                 main_data_begin, s->last_buf_size);
@@ -1372,7 +1389,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
         s->in_gb = s->gb;
         init_get_bits(&s->gb, s->last_buf, s->last_buf_size*8);
 #if !UNCHECKED_BITSTREAM_READER
-        s->gb.size_in_bits_plus8 += extrasize * 8;
+        s->gb.size_in_bits_plus8 += FFMAX(extrasize, LAST_BUF_SIZE - s->last_buf_size) * 8;
 #endif
         s->last_buf_size <<= 3;
         for (gr = 0; gr < nb_granules && (s->last_buf_size >> 3) < main_data_begin; gr++) {
@@ -1554,9 +1571,6 @@ static int mp_decode_frame(MPADecodeContext *s, OUT_INT **samples,
     default:
         nb_frames = mp_decode_layer3(s);
 
-        if (nb_frames < 0)
-            return nb_frames;
-
         s->last_buf_size=0;
         if (s->in_gb.buffer) {
             align_get_bits(&s->gb);
@@ -1571,7 +1585,7 @@ static int mp_decode_frame(MPADecodeContext *s, OUT_INT **samples,
         }
 
         align_get_bits(&s->gb);
-        assert((get_bits_count(&s->gb) & 7) == 0);
+        av_assert1((get_bits_count(&s->gb) & 7) == 0);
         i = get_bits_left(&s->gb) >> 3;
 
         if (i < 0 || i > BACKSTEP_SIZE || nb_frames < 0) {
@@ -1579,19 +1593,20 @@ static int mp_decode_frame(MPADecodeContext *s, OUT_INT **samples,
                 av_log(s->avctx, AV_LOG_ERROR, "invalid new backstep %d\n", i);
             i = FFMIN(BACKSTEP_SIZE, buf_size - HEADER_SIZE);
         }
-        assert(i <= buf_size - HEADER_SIZE && i >= 0);
+        av_assert1(i <= buf_size - HEADER_SIZE && i >= 0);
         memcpy(s->last_buf + s->last_buf_size, s->gb.buffer + buf_size - HEADER_SIZE - i, i);
         s->last_buf_size += i;
     }
 
+    if(nb_frames < 0)
+        return nb_frames;
+
     /* get output buffer */
     if (!samples) {
         av_assert0(s->frame != NULL);
         s->frame->nb_samples = s->avctx->frame_size;
-        if ((ret = ff_get_buffer(s->avctx, s->frame, 0)) < 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        if ((ret = ff_get_buffer(s->avctx, s->frame, 0)) < 0)
             return ret;
-        }
         samples = (OUT_INT **)s->frame->extended_data;
     }
 
@@ -1627,10 +1642,19 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
     uint32_t header;
     int ret;
 
+    while(buf_size && !*buf){
+        buf++;
+        buf_size--;
+    }
+
     if (buf_size < HEADER_SIZE)
         return AVERROR_INVALIDDATA;
 
     header = AV_RB32(buf);
+    if (header>>8 == AV_RB32("TAG")>>8) {
+        av_log(avctx, AV_LOG_DEBUG, "discarding ID3 tag\n");
+        return buf_size;
+    }
     if (ff_mpa_check_header(header) < 0) {
         av_log(avctx, AV_LOG_ERROR, "Header missing\n");
         return AVERROR_INVALIDDATA;
@@ -1651,6 +1675,7 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
         av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
         return AVERROR_INVALIDDATA;
     } else if (s->frame_size < buf_size) {
+        av_log(avctx, AV_LOG_DEBUG, "incorrect frame size - multiple frames in buffer?\n");
         buf_size= s->frame_size;
     }
 
@@ -1680,7 +1705,9 @@ static int decode_frame(AVCodecContext * avctx, void *data, int *got_frame_ptr,
 static void mp_flush(MPADecodeContext *ctx)
 {
     memset(ctx->synth_buf, 0, sizeof(ctx->synth_buf));
+    memset(ctx->mdct_buf, 0, sizeof(ctx->mdct_buf));
     ctx->last_buf_size = 0;
+    ctx->dither_state = 0;
 }
 
 static void flush(AVCodecContext *avctx)
@@ -1697,6 +1724,7 @@ static int decode_frame_adu(AVCodecContext *avctx, void *data,
     MPADecodeContext *s = avctx->priv_data;
     uint32_t header;
     int len, ret;
+    int av_unused out_size;
 
     len = buf_size;
 
@@ -1885,10 +1913,8 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = MPA_FRAME_SIZE;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     out_samples = (OUT_INT **)frame->extended_data;
 
     // Discard too short frames
@@ -1902,7 +1928,7 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
         fsize = AV_RB16(buf) >> 4;
         fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE);
         m     = s->mp3decctx[fr];
-        assert(m != NULL);
+        av_assert1(m);
 
         if (fsize < HEADER_SIZE) {
             av_log(avctx, AV_LOG_ERROR, "Frame size smaller than header size\n");
@@ -1910,8 +1936,10 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
         }
         header = (AV_RB32(buf) & 0x000fffff) | s->syncword; // patch header
 
-        if (ff_mpa_check_header(header) < 0) // Bad header, discard block
-            break;
+        if (ff_mpa_check_header(header) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Bad header, discard block\n");
+            return AVERROR_INVALIDDATA;
+        }
 
         avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
 
@@ -1927,8 +1955,13 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
         if (m->nb_channels > 1)
             outptr[1] = out_samples[s->coff[fr] + 1];
 
-        if ((ret = mp_decode_frame(m, outptr, buf, fsize)) < 0)
-            return ret;
+        if ((ret = mp_decode_frame(m, outptr, buf, fsize)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "failed to decode channel %d\n", ch);
+            memset(outptr[0], 0, MPA_FRAME_SIZE*sizeof(OUT_INT));
+            if (m->nb_channels > 1)
+                memset(outptr[1], 0, MPA_FRAME_SIZE*sizeof(OUT_INT));
+            ret = m->nb_channels * MPA_FRAME_SIZE*sizeof(OUT_INT);
+        }
 
         out_size += ret;
         buf      += fsize;
@@ -1936,6 +1969,10 @@ static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
 
         avctx->bit_rate += m->bit_rate;
     }
+    if (ch != avctx->channels) {
+        av_log(avctx, AV_LOG_ERROR, "failed to decode all channels\n");
+        return AVERROR_INVALIDDATA;
+    }
 
     /* update codec info */
     avctx->sample_rate = s->mp3decctx[0]->sample_rate;
diff --git a/libavcodec/mpegaudiodecheader.c b/libavcodec/mpegaudiodecheader.c
index 25e7319..5db1957 100644
--- a/libavcodec/mpegaudiodecheader.c
+++ b/libavcodec/mpegaudiodecheader.c
@@ -2,20 +2,20 @@
  * MPEG Audio header decoder
  * Copyright (c) 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -112,7 +112,7 @@ int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header)
     return 0;
 }
 
-int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate)
+int avpriv_mpa_decode_header2(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate, enum AVCodecID *codec_id)
 {
     MPADecodeHeader s1, *s = &s1;
 
@@ -125,16 +125,16 @@ int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_r
 
     switch(s->layer) {
     case 1:
-        avctx->codec_id = AV_CODEC_ID_MP1;
+        *codec_id = AV_CODEC_ID_MP1;
         *frame_size = 384;
         break;
     case 2:
-        avctx->codec_id = AV_CODEC_ID_MP2;
+        *codec_id = AV_CODEC_ID_MP2;
         *frame_size = 1152;
         break;
     default:
     case 3:
-        avctx->codec_id = AV_CODEC_ID_MP3;
+        *codec_id = AV_CODEC_ID_MP3;
         if (s->lsf)
             *frame_size = 576;
         else
@@ -147,3 +147,8 @@ int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_r
     *bit_rate = s->bit_rate;
     return s->frame_size;
 }
+
+int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate)
+{
+    return avpriv_mpa_decode_header2(head, sample_rate, channels, frame_size, bit_rate, &avctx->codec_id);
+}
diff --git a/libavcodec/mpegaudiodecheader.h b/libavcodec/mpegaudiodecheader.h
index 764e8ab..444b85f 100644
--- a/libavcodec/mpegaudiodecheader.h
+++ b/libavcodec/mpegaudiodecheader.h
@@ -2,20 +2,20 @@
  * MPEG Audio header decoder
  * Copyright (c) 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -56,6 +56,8 @@ int avpriv_mpegaudio_decode_header(MPADecodeHeader *s, uint32_t header);
    header, otherwise the coded frame size in bytes */
 int avpriv_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
 
+int avpriv_mpa_decode_header2(uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate, enum AVCodecID *codec_id);
+
 /* fast header check for resync */
 static inline int ff_mpa_check_header(uint32_t header){
     /* header */
diff --git a/libavcodec/mpegaudiodectab.h b/libavcodec/mpegaudiodectab.h
index 1221657..accd12b 100644
--- a/libavcodec/mpegaudiodectab.h
+++ b/libavcodec/mpegaudiodectab.h
@@ -2,20 +2,20 @@
  * MPEG Audio decoder
  * copyright (c) 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegaudiodsp.c b/libavcodec/mpegaudiodsp.c
index 58ea1d1..5fe3444 100644
--- a/libavcodec/mpegaudiodsp.c
+++ b/libavcodec/mpegaudiodsp.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2011 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -45,4 +45,6 @@ av_cold void ff_mpadsp_init(MPADSPContext *s)
     if (ARCH_ARM)     ff_mpadsp_init_arm(s);
     if (ARCH_PPC)     ff_mpadsp_init_ppc(s);
     if (ARCH_X86)     ff_mpadsp_init_x86(s);
+    if (HAVE_MIPSFPU)   ff_mpadsp_init_mipsfpu(s);
+    if (HAVE_MIPSDSPR1) ff_mpadsp_init_mipsdspr1(s);
 }
diff --git a/libavcodec/mpegaudiodsp.h b/libavcodec/mpegaudiodsp.h
index 909c652..a722a2f 100644
--- a/libavcodec/mpegaudiodsp.h
+++ b/libavcodec/mpegaudiodsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,6 +29,7 @@ typedef struct MPADSPContext {
                                int *dither_state, int16_t *samples, int incr);
     void (*dct32_float)(float *dst, const float *src);
     void (*dct32_fixed)(int *dst, const int *src);
+
     void (*imdct36_blocks_float)(float *out, float *buf, float *in,
                                  int count, int switch_point, int block_type);
     void (*imdct36_blocks_fixed)(int *out, int *buf, int *in,
@@ -58,6 +59,8 @@ void ff_mpadsp_init_aarch64(MPADSPContext *s);
 void ff_mpadsp_init_arm(MPADSPContext *s);
 void ff_mpadsp_init_ppc(MPADSPContext *s);
 void ff_mpadsp_init_x86(MPADSPContext *s);
+void ff_mpadsp_init_mipsfpu(MPADSPContext *s);
+void ff_mpadsp_init_mipsdspr1(MPADSPContext *s);
 
 void ff_mpa_synth_init_float(float *window);
 void ff_mpa_synth_init_fixed(int32_t *window);
diff --git a/libavcodec/mpegaudiodsp_data.c b/libavcodec/mpegaudiodsp_data.c
index 5cf86b8..4550de9 100644
--- a/libavcodec/mpegaudiodsp_data.c
+++ b/libavcodec/mpegaudiodsp_data.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegaudiodsp_fixed.c b/libavcodec/mpegaudiodsp_fixed.c
index 3c49a56..83c9d66 100644
--- a/libavcodec/mpegaudiodsp_fixed.c
+++ b/libavcodec/mpegaudiodsp_fixed.c
@@ -1,20 +1,20 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#define CONFIG_FLOAT 0
+#define USE_FLOATS 0
 #include "mpegaudiodsp_template.c"
diff --git a/libavcodec/mpegaudiodsp_float.c b/libavcodec/mpegaudiodsp_float.c
index 2d8d53e..c45b136 100644
--- a/libavcodec/mpegaudiodsp_float.c
+++ b/libavcodec/mpegaudiodsp_float.c
@@ -1,20 +1,20 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#define CONFIG_FLOAT 1
+#define USE_FLOATS 1
 #include "mpegaudiodsp_template.c"
diff --git a/libavcodec/mpegaudiodsp_template.c b/libavcodec/mpegaudiodsp_template.c
index 621bbd4..62454ca 100644
--- a/libavcodec/mpegaudiodsp_template.c
+++ b/libavcodec/mpegaudiodsp_template.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2001, 2002 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@
 #include "mpegaudiodsp.h"
 #include "mpegaudio.h"
 
-#if CONFIG_FLOAT
+#if USE_FLOATS
 #define RENAME(n) n##_float
 
 static inline float round_sample(float *sum)
@@ -125,7 +125,7 @@ void RENAME(ff_mpadsp_apply_window)(MPA_INT *synth_buf, MPA_INT *window,
     register const MPA_INT *w, *w2, *p;
     int j;
     OUT_INT *samples2;
-#if CONFIG_FLOAT
+#if USE_FLOATS
     float sum, sum2;
 #else
     int64_t sum, sum2;
@@ -200,7 +200,7 @@ av_cold void RENAME(ff_mpa_synth_init)(MPA_INT *window)
     for(i=0;i<257;i++) {
         INTFLOAT v;
         v = ff_mpa_enwindow[i];
-#if CONFIG_FLOAT
+#if USE_FLOATS
         v *= 1.0 / (1LL<<(16 + FRAC_BITS));
 #endif
         window[i] = v;
@@ -243,7 +243,7 @@ av_cold void RENAME(ff_init_mpadsp_tabs)(void)
                 else if (i <  18) d = 1;
             }
             //merge last stage of imdct into the window coefficients
-            d *= 0.5 / cos(M_PI * (2 * i + 19) / 72);
+            d *= 0.5 * IMDCT_SCALAR / cos(M_PI * (2 * i + 19) / 72);
 
             if (j == 2)
                 RENAME(ff_mdct_win)[j][i/3] = FIXHR((d / (1<<5)));
@@ -398,3 +398,4 @@ void RENAME(ff_imdct36_blocks)(INTFLOAT *out, INTFLOAT *buf, INTFLOAT *in,
         out++;
     }
 }
+
diff --git a/libavcodec/mpegaudioenc_fixed.c b/libavcodec/mpegaudioenc_fixed.c
new file mode 100644
index 0000000..022b6fe
--- /dev/null
+++ b/libavcodec/mpegaudioenc_fixed.c
@@ -0,0 +1,41 @@
+/*
+ * The simplest mpeg audio layer 2 encoder
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "mpegaudioenc_template.c"
+
+AVCodec ff_mp2fixed_encoder = {
+    .name                  = "mp2fixed",
+    .long_name             = NULL_IF_CONFIG_SMALL("MP2 fixed point (MPEG audio layer 2)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_MP2,
+    .priv_data_size        = sizeof(MpegAudioContext),
+    .init                  = MPA_encode_init,
+    .encode2               = MPA_encode_frame,
+    .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
+                                                            AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]){
+        44100, 48000,  32000, 22050, 24000, 16000, 0
+    },
+    .channel_layouts       = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
+                                                 AV_CH_LAYOUT_STEREO,
+                                                 0 },
+    .defaults              = mp2_defaults,
+};
diff --git a/libavcodec/mpegaudioenc_float.c b/libavcodec/mpegaudioenc_float.c
new file mode 100644
index 0000000..4d4ab2d
--- /dev/null
+++ b/libavcodec/mpegaudioenc_float.c
@@ -0,0 +1,42 @@
+/*
+ * The simplest mpeg audio layer 2 encoder
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define USE_FLOATS 1
+#include "mpegaudioenc_template.c"
+
+AVCodec ff_mp2_encoder = {
+    .name                  = "mp2",
+    .long_name             = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_MP2,
+    .priv_data_size        = sizeof(MpegAudioContext),
+    .init                  = MPA_encode_init,
+    .encode2               = MPA_encode_frame,
+    .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
+                                                            AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]){
+        44100, 48000,  32000, 22050, 24000, 16000, 0
+    },
+    .channel_layouts       = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
+                                                 AV_CH_LAYOUT_STEREO,
+                                                 0 },
+    .defaults              = mp2_defaults,
+};
diff --git a/libavcodec/mpegaudioenc.c b/libavcodec/mpegaudioenc_template.c
index 51a6f5b..e9571d8 100644
--- a/libavcodec/mpegaudioenc.c
+++ b/libavcodec/mpegaudioenc_template.c
@@ -2,20 +2,20 @@
  * The simplest mpeg audio layer 2 encoder
  * Copyright (c) 2000, 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -64,7 +64,12 @@ typedef struct MpegAudioContext {
     int16_t filter_bank[512];
     int scale_factor_table[64];
     unsigned char scale_diff_table[128];
+#if USE_FLOATS
     float scale_factor_inv_table[64];
+#else
+    int8_t scale_factor_shift[64];
+    unsigned short scale_factor_mult[64];
+#endif
     unsigned short total_quant_bits[17]; /* total number of bits per allocation group */
 } MpegAudioContext;
 
@@ -103,10 +108,15 @@ static av_cold int MPA_encode_init(AVCodecContext *avctx)
     s->freq_index = i;
 
     /* encoding bitrate & frequency */
-    for(i=0;i<15;i++) {
+    for(i=1;i<15;i++) {
         if (avpriv_mpa_bitrate_tab[s->lsf][1][i] == bitrate)
             break;
     }
+    if (i == 15 && !avctx->bit_rate) {
+        i = 14;
+        bitrate = avpriv_mpa_bitrate_tab[s->lsf][1][i];
+        avctx->bit_rate = bitrate * 1000;
+    }
     if (i == 15){
         av_log(avctx, AV_LOG_ERROR, "bitrate %d is not allowed in mp2\n", bitrate);
         return AVERROR(EINVAL);
@@ -149,11 +159,17 @@ static av_cold int MPA_encode_init(AVCodecContext *avctx)
     }
 
     for(i=0;i<64;i++) {
-        v = (int)(pow(2.0, (3 - i) / 3.0) * (1 << 20));
+        v = (int)(exp2((3 - i) / 3.0) * (1 << 20));
         if (v <= 0)
             v = 1;
         s->scale_factor_table[i] = v;
-        s->scale_factor_inv_table[i] = pow(2.0, -(3 - i) / 3.0) / (float)(1 << 20);
+#if USE_FLOATS
+        s->scale_factor_inv_table[i] = exp2(-(3 - i) / 3.0) / (float)(1 << 20);
+#else
+#define P 15
+        s->scale_factor_shift[i] = 21 - P - (i / 3);
+        s->scale_factor_mult[i] = (1 << P) * exp2((i % 3) / 3.0);
+#endif
     }
     for(i=0;i<128;i++) {
         v = i - 64;
@@ -397,7 +413,7 @@ static void compute_scale_factors(MpegAudioContext *s,
             av_dlog(NULL, "%2d:%d in=%x %x %d\n",
                     j, i, vmax, s->scale_factor_table[index], index);
             /* store the scale factor */
-            assert(index >=0 && index <= 63);
+            av_assert2(index >=0 && index <= 63);
             sf[i] = index;
         }
 
@@ -459,7 +475,7 @@ static void compute_scale_factors(MpegAudioContext *s,
             sf[1] = sf[2] = sf[0];
             break;
         default:
-            assert(0); //cannot happen
+            av_assert2(0); //cannot happen
             code = 0;           /* kill warning */
         }
 
@@ -579,7 +595,7 @@ static void compute_bit_allocation(MpegAudioContext *s,
         }
     }
     *padding = max_frame_size - current_frame_size;
-    assert(*padding >= 0);
+    av_assert0(*padding >= 0);
 }
 
 /*
@@ -668,14 +684,36 @@ static void encode_frame(MpegAudioContext *s,
                         qindex = s->alloc_table[j+b];
                         steps = ff_mpa_quant_steps[qindex];
                         for(m=0;m<3;m++) {
-                            float a;
                             sample = s->sb_samples[ch][k][l + m][i];
                             /* divide by scale factor */
-                            a = (float)sample * s->scale_factor_inv_table[s->scale_factors[ch][i][k]];
-                            q[m] = (int)((a + 1.0) * steps * 0.5);
+#if USE_FLOATS
+                            {
+                                float a;
+                                a = (float)sample * s->scale_factor_inv_table[s->scale_factors[ch][i][k]];
+                                q[m] = (int)((a + 1.0) * steps * 0.5);
+                            }
+#else
+                            {
+                                int q1, e, shift, mult;
+                                e = s->scale_factors[ch][i][k];
+                                shift = s->scale_factor_shift[e];
+                                mult = s->scale_factor_mult[e];
+
+                                /* normalize to P bits */
+                                if (shift < 0)
+                                    q1 = sample << (-shift);
+                                else
+                                    q1 = sample >> shift;
+                                q1 = (q1 * mult) >> P;
+                                q1 += 1 << P;
+                                if (q1 < 0)
+                                    q1 = 0;
+                                q[m] = (q1 * (unsigned)steps) >> (P + 1);
+                            }
+#endif
                             if (q[m] >= steps)
                                 q[m] = steps - 1;
-                            assert(q[m] >= 0 && q[m] < steps);
+                            av_assert2(q[m] >= 0 && q[m] < steps);
                         }
                         bits = ff_mpa_quant_bits[qindex];
                         if (bits < 0) {
@@ -725,10 +763,8 @@ static int MPA_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     }
     compute_bit_allocation(s, smr, bit_alloc, &padding);
 
-    if ((ret = ff_alloc_packet(avpkt, MPA_MAX_CODED_FRAME_SIZE))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, MPA_MAX_CODED_FRAME_SIZE)) < 0)
         return ret;
-    }
 
     init_put_bits(&s->pb, avpkt->data, avpkt->size);
 
@@ -743,25 +779,7 @@ static int MPA_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 }
 
 static const AVCodecDefault mp2_defaults[] = {
-    { "b", "384000" },
+    { "b", "0" },
     { NULL },
 };
 
-AVCodec ff_mp2_encoder = {
-    .name                  = "mp2",
-    .long_name             = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
-    .type                  = AVMEDIA_TYPE_AUDIO,
-    .id                    = AV_CODEC_ID_MP2,
-    .priv_data_size        = sizeof(MpegAudioContext),
-    .init                  = MPA_encode_init,
-    .encode2               = MPA_encode_frame,
-    .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
-                                                            AV_SAMPLE_FMT_NONE },
-    .supported_samplerates = (const int[]){
-        44100, 48000,  32000, 22050, 24000, 16000, 0
-    },
-    .channel_layouts       = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
-                                                 AV_CH_LAYOUT_STEREO,
-                                                 0 },
-    .defaults              = mp2_defaults,
-};
diff --git a/libavcodec/mpegaudiotab.h b/libavcodec/mpegaudiotab.h
index d30ef1b..42d42d8 100644
--- a/libavcodec/mpegaudiotab.h
+++ b/libavcodec/mpegaudiotab.h
@@ -4,20 +4,20 @@
  *
  * Copyright (c) 2000, 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegutils.c b/libavcodec/mpegutils.c
index bc430f0..62cc36a 100644
--- a/libavcodec/mpegutils.c
+++ b/libavcodec/mpegutils.c
@@ -1,20 +1,20 @@
 /*
  * Mpeg video formats-related defines and utility functions
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegutils.h b/libavcodec/mpegutils.h
index 9446724..6d59c22 100644
--- a/libavcodec/mpegutils.h
+++ b/libavcodec/mpegutils.h
@@ -1,20 +1,20 @@
 /*
  * Mpeg video formats-related defines and utility functions
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index da42541..4672359 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -5,20 +5,20 @@
  *
  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,7 @@
 #include "libavutil/timer.h"
 #include "avcodec.h"
 #include "blockdsp.h"
+#include "h264chroma.h"
 #include "idctdsp.h"
 #include "internal.h"
 #include "mathops.h"
@@ -42,7 +43,6 @@
 #include "mjpegenc.h"
 #include "msmpeg4.h"
 #include "qpeldsp.h"
-#include "xvmc_internal.h"
 #include "thread.h"
 #include <limits.h>
 
@@ -137,10 +137,7 @@ static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
 
     nCoeffs= s->block_last_index[n];
 
-    if (n < 4)
-        block[0] = block[0] * s->y_dc_scale;
-    else
-        block[0] = block[0] * s->c_dc_scale;
+    block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
     /* XXX: only mpeg1 */
     quant_matrix = s->intra_matrix;
     for(i=1;i<=nCoeffs;i++) {
@@ -199,10 +196,7 @@ static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
     if(s->alternate_scan) nCoeffs= 63;
     else nCoeffs= s->block_last_index[n];
 
-    if (n < 4)
-        block[0] = block[0] * s->y_dc_scale;
-    else
-        block[0] = block[0] * s->c_dc_scale;
+    block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
     quant_matrix = s->intra_matrix;
     for(i=1;i<=nCoeffs;i++) {
         int j= s->intra_scantable.permutated[i];
@@ -230,10 +224,8 @@ static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
     if(s->alternate_scan) nCoeffs= 63;
     else nCoeffs= s->block_last_index[n];
 
-    if (n < 4)
-        block[0] = block[0] * s->y_dc_scale;
-    else
-        block[0] = block[0] * s->c_dc_scale;
+    block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
+    sum += block[0];
     quant_matrix = s->intra_matrix;
     for(i=1;i<=nCoeffs;i++) {
         int j= s->intra_scantable.permutated[i];
@@ -290,15 +282,12 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
     int i, level, qmul, qadd;
     int nCoeffs;
 
-    assert(s->block_last_index[n]>=0);
+    av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
     qmul = qscale << 1;
 
     if (!s->h263_aic) {
-        if (n < 4)
-            block[0] = block[0] * s->y_dc_scale;
-        else
-            block[0] = block[0] * s->c_dc_scale;
+        block[0] *= n < 4 ? s->y_dc_scale : s->c_dc_scale;
         qadd = (qscale - 1) | 1;
     }else{
         qadd = 0;
@@ -327,7 +316,7 @@ static void dct_unquantize_h263_inter_c(MpegEncContext *s,
     int i, level, qmul, qadd;
     int nCoeffs;
 
-    assert(s->block_last_index[n]>=0);
+    av_assert2(s->block_last_index[n]>=0);
 
     qadd = (qscale - 1) | 1;
     qmul = qscale << 1;
@@ -370,20 +359,47 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
     s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift);
     s->dest[2] = s->current_picture.f->data[2] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift);
 
-    assert(ref == 0);
+    if (ref)
+        av_log(s->avctx, AV_LOG_DEBUG, "Interlaced error concealment is not fully implemented\n");
     ff_MPV_decode_mb(s, s->block);
 }
 
+static void gray16(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
+{
+    while(h--)
+        memset(dst + h*linesize, 128, 16);
+}
+
+static void gray8(uint8_t *dst, const uint8_t *src, ptrdiff_t linesize, int h)
+{
+    while(h--)
+        memset(dst + h*linesize, 128, 8);
+}
+
 /* init common dct for both encoder and decoder */
 av_cold int ff_dct_common_init(MpegEncContext *s)
 {
     ff_blockdsp_init(&s->bdsp, s->avctx);
+    ff_h264chroma_init(&s->h264chroma, 8); //for lowres
     ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
     ff_idctdsp_init(&s->idsp, s->avctx);
     ff_me_cmp_init(&s->mecc, s->avctx);
     ff_mpegvideodsp_init(&s->mdsp);
     ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
 
+    if (s->avctx->debug & FF_DEBUG_NOMC) {
+        int i;
+        for (i=0; i<4; i++) {
+            s->hdsp.avg_pixels_tab[0][i] = gray16;
+            s->hdsp.put_pixels_tab[0][i] = gray16;
+            s->hdsp.put_no_rnd_pixels_tab[0][i] = gray16;
+
+            s->hdsp.avg_pixels_tab[1][i] = gray8;
+            s->hdsp.put_pixels_tab[1][i] = gray8;
+            s->hdsp.put_no_rnd_pixels_tab[1][i] = gray8;
+        }
+    }
+
     s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
     s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
     s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
@@ -396,6 +412,8 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
     if (HAVE_INTRINSICS_NEON)
         ff_MPV_common_init_neon(s);
 
+    if (ARCH_ALPHA)
+        ff_MPV_common_init_axp(s);
     if (ARCH_ARM)
         ff_MPV_common_init_arm(s);
     if (ARCH_PPC)
@@ -421,17 +439,26 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
 
 static int frame_size_alloc(MpegEncContext *s, int linesize)
 {
-    int alloc_size = FFALIGN(FFABS(linesize) + 32, 32);
+    int alloc_size = FFALIGN(FFABS(linesize) + 64, 32);
+
+    if (s->avctx->hwaccel || s->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
+        return 0;
+
+    if (linesize < 24) {
+        av_log(s->avctx, AV_LOG_ERROR, "Image too small, temporary buffers cannot function\n");
+        return AVERROR_PATCHWELCOME;
+    }
 
     // edge emu needs blocksize + filter length - 1
     // (= 17x17 for  halfpel / 21x21 for  h264)
     // VC1 computes luma and chroma simultaneously and needs 19X19 + 9x9
     // at uvlinesize. It supports only YUV420 so 24x24 is enough
     // linesize * interlaced * MBsize
-    FF_ALLOCZ_OR_GOTO(s->avctx, s->edge_emu_buffer, alloc_size * 2 * 24,
+    // we also use this buffer for encoding in encode_mb_internal() needig an additional 32 lines
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->edge_emu_buffer, alloc_size * 4 * 68,
                       fail);
 
-    FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad, alloc_size * 2 * 16 * 3,
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->me.scratchpad, alloc_size * 4 * 16 * 2,
                       fail)
     s->me.temp         = s->me.scratchpad;
     s->rd_scratchpad   = s->me.scratchpad;
@@ -530,6 +557,9 @@ void ff_free_picture_tables(Picture *pic)
 {
     int i;
 
+    pic->alloc_mb_width  =
+    pic->alloc_mb_height = 0;
+
     av_buffer_unref(&pic->mb_var_buf);
     av_buffer_unref(&pic->mc_mb_var_buf);
     av_buffer_unref(&pic->mb_mean_buf);
@@ -566,7 +596,7 @@ static int alloc_picture_tables(MpegEncContext *s, Picture *pic)
             return AVERROR(ENOMEM);
     }
 
-    if (s->out_format == FMT_H263 || s->encoding) {
+    if (s->out_format == FMT_H263 || s->encoding || s->avctx->debug_mv) {
         int mv_size        = 2 * (b8_array_size + 4) * sizeof(int16_t);
         int ref_index_size = 4 * mb_array_size;
 
@@ -578,6 +608,9 @@ static int alloc_picture_tables(MpegEncContext *s, Picture *pic)
         }
     }
 
+    pic->alloc_mb_width  = s->mb_width;
+    pic->alloc_mb_height = s->mb_height;
+
     return 0;
 }
 
@@ -614,11 +647,16 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared)
 {
     int i, ret;
 
+    if (pic->qscale_table_buf)
+        if (   pic->alloc_mb_width  != s->mb_width
+            || pic->alloc_mb_height != s->mb_height)
+            ff_free_picture_tables(pic);
+
     if (shared) {
-        assert(pic->f->data[0]);
+        av_assert0(pic->f->data[0]);
         pic->shared = 1;
     } else {
-        assert(!pic->f->buf[0]);
+        av_assert0(!pic->f->buf[0]);
 
         if (alloc_frame_buffer(s, pic) < 0)
             return -1;
@@ -723,6 +761,9 @@ do {\
         dst->ref_index[i]  = src->ref_index[i];
     }
 
+    dst->alloc_mb_width  = src->alloc_mb_width;
+    dst->alloc_mb_height = src->alloc_mb_height;
+
     return 0;
 }
 
@@ -780,6 +821,9 @@ static int init_duplicate_context(MpegEncContext *s)
     int yc_size = y_size + 2 * c_size;
     int i;
 
+    if (s->mb_height & 1)
+        yc_size += 2*s->b8_stride + 2*s->mb_stride;
+
     s->edge_emu_buffer =
     s->me.scratchpad   =
     s->me.temp         =
@@ -898,9 +942,11 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst,
     int i, ret;
     MpegEncContext *s = dst->priv_data, *s1 = src->priv_data;
 
-    if (dst == src || !s1->context_initialized)
+    if (dst == src)
         return 0;
 
+    av_assert0(s != s1);
+
     // FIXME can parameters change on I-frames?
     // in that case dst may need a reinit
     if (!s->context_initialized) {
@@ -910,16 +956,23 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst,
         s->bitstream_buffer      = NULL;
         s->bitstream_buffer_size = s->allocated_bitstream_buffer_size = 0;
 
-        ff_MPV_common_init(s);
+        if (s1->context_initialized){
+//             s->picture_range_start  += MAX_PICTURE_COUNT;
+//             s->picture_range_end    += MAX_PICTURE_COUNT;
+            if((ret = ff_MPV_common_init(s)) < 0){
+                memset(s, 0, sizeof(MpegEncContext));
+                s->avctx = dst;
+                return ret;
+            }
+        }
     }
 
     if (s->height != s1->height || s->width != s1->width || s->context_reinit) {
-        int err;
         s->context_reinit = 0;
         s->height = s1->height;
         s->width  = s1->width;
-        if ((err = ff_MPV_common_frame_size_change(s)) < 0)
-            return err;
+        if ((ret = ff_MPV_common_frame_size_change(s)) < 0)
+            return ret;
     }
 
     s->avctx->coded_height  = s1->avctx->coded_height;
@@ -930,6 +983,8 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst,
     s->coded_picture_number = s1->coded_picture_number;
     s->picture_number       = s1->picture_number;
 
+    av_assert0(!s->picture || s->picture != s1->picture);
+    if(s->picture)
     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
         ff_mpeg_unref_picture(s, &s->picture[i]);
         if (s1->picture[i].f->buf[0] &&
@@ -940,7 +995,7 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst,
 #define UPDATE_PICTURE(pic)\
 do {\
     ff_mpeg_unref_picture(s, &s->pic);\
-    if (s1->pic.f->buf[0])\
+    if (s1->pic.f && s1->pic.f->buf[0])\
         ret = ff_mpeg_ref_picture(s, &s->pic, &s1->pic);\
     else\
         ret = update_picture_tables(&s->pic, &s1->pic);\
@@ -959,6 +1014,7 @@ do {\
     // Error/bug resilience
     s->next_p_frame_damaged = s1->next_p_frame_damaged;
     s->workaround_bugs      = s1->workaround_bugs;
+    s->padding_bug_score    = s1->padding_bug_score;
 
     // MPEG4 timing info
     memcpy(&s->last_time_base, &s1->last_time_base,
@@ -997,7 +1053,6 @@ do {\
         } else {
             av_log(s->avctx, AV_LOG_ERROR, "Context scratch buffers could not "
                    "be allocated due to unknown size.\n");
-            return AVERROR_BUG;
         }
 
     // MPEG2/interlacing info
@@ -1097,7 +1152,7 @@ static int init_context_frame(MpegEncContext *s)
     mb_array_size = s->mb_height * s->mb_stride;
     mv_table_size = (s->mb_height + 2) * s->mb_stride + 1;
 
-    /* set default edge pos, will be overriden
+    /* set default edge pos, will be overridden
      * in decode_header if needed */
     s->h_edge_pos = s->mb_width * 16;
     s->v_edge_pos = s->mb_height * 16;
@@ -1115,44 +1170,35 @@ static int init_context_frame(MpegEncContext *s)
     c_size  = s->mb_stride * (s->mb_height + 1);
     yc_size = y_size + 2   * c_size;
 
-    FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num + 1) * sizeof(int),
-                      fail); // error ressilience code looks cleaner with this
+    if (s->mb_height & 1)
+        yc_size += 2*s->b8_stride + 2*s->mb_stride;
+
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_index2xy, (s->mb_num + 1) * sizeof(int), fail); // error ressilience code looks cleaner with this
     for (y = 0; y < s->mb_height; y++)
         for (x = 0; x < s->mb_width; x++)
             s->mb_index2xy[x + y * s->mb_width] = x + y * s->mb_stride;
 
-    s->mb_index2xy[s->mb_height * s->mb_width] =
-        (s->mb_height - 1) * s->mb_stride + s->mb_width; // FIXME really needed?
+    s->mb_index2xy[s->mb_height * s->mb_width] = (s->mb_height - 1) * s->mb_stride + s->mb_width; // FIXME really needed?
 
     if (s->encoding) {
         /* Allocate MV tables */
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base,
-                          mv_table_size * 2 * sizeof(int16_t), fail);
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base,
-                          mv_table_size * 2 * sizeof(int16_t), fail);
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base,
-                          mv_table_size * 2 * sizeof(int16_t), fail);
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base,
-                          mv_table_size * 2 * sizeof(int16_t), fail);
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base,
-                          mv_table_size * 2 * sizeof(int16_t), fail);
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base,
-                          mv_table_size * 2 * sizeof(int16_t), fail);
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->p_mv_table_base,                 mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_forw_mv_table_base,            mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_back_mv_table_base,            mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_forw_mv_table_base,      mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_bidir_back_mv_table_base,      mv_table_size * 2 * sizeof(int16_t), fail)
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->b_direct_mv_table_base,          mv_table_size * 2 * sizeof(int16_t), fail)
         s->p_mv_table            = s->p_mv_table_base + s->mb_stride + 1;
         s->b_forw_mv_table       = s->b_forw_mv_table_base + s->mb_stride + 1;
         s->b_back_mv_table       = s->b_back_mv_table_base + s->mb_stride + 1;
-        s->b_bidir_forw_mv_table = s->b_bidir_forw_mv_table_base +
-                                   s->mb_stride + 1;
-        s->b_bidir_back_mv_table = s->b_bidir_back_mv_table_base +
-                                   s->mb_stride + 1;
+        s->b_bidir_forw_mv_table = s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
+        s->b_bidir_back_mv_table = s->b_bidir_back_mv_table_base + s->mb_stride + 1;
         s->b_direct_mv_table     = s->b_direct_mv_table_base + s->mb_stride + 1;
 
         /* Allocate MB type table */
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type, mb_array_size *
-                          sizeof(uint16_t), fail); // needed for encoding
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->mb_type, mb_array_size * sizeof(uint16_t), fail) // needed for encoding
 
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size *
-                          sizeof(int), fail);
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->lambda_table, mb_array_size * sizeof(int), fail)
 
         FF_ALLOC_OR_GOTO(s->avctx, s->cplx_tab,
                          mb_array_size * sizeof(float), fail);
@@ -1175,34 +1221,27 @@ static int init_context_frame(MpegEncContext *s)
                     s->b_field_mv_table[i][j][k] = s->b_field_mv_table_base[i][j][k] +
                                                    s->mb_stride + 1;
                 }
-                FF_ALLOCZ_OR_GOTO(s->avctx, s->b_field_select_table [i][j],
-                                  mb_array_size * 2 * sizeof(uint8_t), fail);
-                FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_mv_table_base[i][j],
-                                  mv_table_size * 2 * sizeof(int16_t), fail);
-                s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j]
-                                            + s->mb_stride + 1;
+                FF_ALLOCZ_OR_GOTO(s->avctx, s->b_field_select_table [i][j], mb_array_size * 2 * sizeof(uint8_t), fail)
+                FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_mv_table_base[i][j], mv_table_size * 2 * sizeof(int16_t), fail)
+                s->p_field_mv_table[i][j] = s->p_field_mv_table_base[i][j] + s->mb_stride + 1;
             }
-            FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i],
-                              mb_array_size * 2 * sizeof(uint8_t), fail);
+            FF_ALLOCZ_OR_GOTO(s->avctx, s->p_field_select_table[i], mb_array_size * 2 * sizeof(uint8_t), fail)
         }
     }
     if (s->out_format == FMT_H263) {
         /* cbp values */
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size, fail);
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->coded_block_base, y_size + (s->mb_height&1)*2*s->b8_stride, fail);
         s->coded_block = s->coded_block_base + s->b8_stride + 1;
 
         /* cbp, ac_pred, pred_dir */
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table,
-                          mb_array_size * sizeof(uint8_t), fail);
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table,
-                          mb_array_size * sizeof(uint8_t), fail);
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->cbp_table     , mb_array_size * sizeof(uint8_t), fail);
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->pred_dir_table, mb_array_size * sizeof(uint8_t), fail);
     }
 
     if (s->h263_pred || s->h263_plus || !s->encoding) {
         /* dc values */
         // MN: we need these for  error resilience of intra-frames
-        FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base,
-                          yc_size * sizeof(int16_t), fail);
+        FF_ALLOCZ_OR_GOTO(s->avctx, s->dc_val_base, yc_size * sizeof(int16_t), fail);
         s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
         s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
         s->dc_val[2] = s->dc_val[1] + c_size;
@@ -1269,9 +1308,9 @@ av_cold int ff_MPV_common_init(MpegEncContext *s)
     s->flags2 = s->avctx->flags2;
 
     /* set chroma shifts */
-    av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
-                                     &s->chroma_x_shift,
-                                     &s->chroma_y_shift);
+    avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,
+                                  &s->chroma_x_shift,
+                                  &s->chroma_y_shift);
 
     /* convert fourcc to upper case */
     s->codec_tag          = avpriv_toupper4(s->avctx->codec_tag);
@@ -1302,17 +1341,15 @@ av_cold int ff_MPV_common_init(MpegEncContext *s)
     if (!s->new_picture.f)
         goto fail;
 
-    if (s->width && s->height) {
         if (init_context_frame(s))
             goto fail;
 
         s->parse_context.state = -1;
-    }
 
-    s->context_initialized = 1;
-    s->thread_context[0]   = s;
+        s->context_initialized = 1;
+        s->thread_context[0]   = s;
 
-    if (s->width && s->height) {
+//     if (s->width && s->height) {
         if (nb_slices > 1) {
             for (i = 1; i < nb_slices; i++) {
                 s->thread_context[i] = av_malloc(sizeof(MpegEncContext));
@@ -1334,7 +1371,7 @@ av_cold int ff_MPV_common_init(MpegEncContext *s)
             s->end_mb_y   = s->mb_height;
         }
         s->slice_context_count = nb_slices;
-    }
+//     }
 
     return 0;
  fail:
@@ -1389,6 +1426,7 @@ static int free_context_frame(MpegEncContext *s)
     av_freep(&s->er.er_temp_buffer);
     av_freep(&s->mb_index2xy);
     av_freep(&s->lambda_table);
+
     av_freep(&s->cplx_tab);
     av_freep(&s->bits_tab);
 
@@ -1455,7 +1493,8 @@ int ff_MPV_common_frame_size_change(MpegEncContext *s)
                         (s->mb_height * (i + 1) + nb_slices / 2) / nb_slices;
             }
         } else {
-            if (init_duplicate_context(s) < 0)
+            err = init_duplicate_context(s);
+            if (err < 0)
                 goto fail;
             s->start_mb_y = 0;
             s->end_mb_y   = s->mb_height;
@@ -1625,6 +1664,8 @@ static void release_unused_pictures(MpegEncContext *s)
 
 static inline int pic_is_unused(MpegEncContext *s, Picture *pic)
 {
+    if (pic == s->last_picture_ptr)
+        return 0;
     if (pic->f->buf[0] == NULL)
         return 1;
     if (pic->needs_realloc && !(pic->reference & DELAYED_PIC_REF))
@@ -1638,7 +1679,7 @@ static int find_unused_picture(MpegEncContext *s, int shared)
 
     if (shared) {
         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
-            if (s->picture[i].f->buf[0] == NULL)
+            if (s->picture[i].f->buf[0] == NULL && &s->picture[i] != s->last_picture_ptr)
                 return i;
         }
     } else {
@@ -1648,7 +1689,21 @@ static int find_unused_picture(MpegEncContext *s, int shared)
         }
     }
 
-    return AVERROR_INVALIDDATA;
+    av_log(s->avctx, AV_LOG_FATAL,
+           "Internal error, picture buffer overflow\n");
+    /* We could return -1, but the codec would crash trying to draw into a
+     * non-existing frame anyway. This is safer than waiting for a random crash.
+     * Also the return of this is never useful, an encoder must only allocate
+     * as much as allowed in the specification. This has no relationship to how
+     * much libavcodec could allocate (and MAX_PICTURE_COUNT is always large
+     * enough for such valid streams).
+     * Plus, a decoder has to check stream validity and remove frames if too
+     * many reference frames are around. Waiting for "OOM" is not correct at
+     * all. Similarly, missing reference frames have to be replaced by
+     * interpolated/MC frames, anything else is a bug in the codec ...
+     */
+    abort();
+    return -1;
 }
 
 int ff_find_unused_picture(MpegEncContext *s, int shared)
@@ -1665,6 +1720,22 @@ int ff_find_unused_picture(MpegEncContext *s, int shared)
     return ret;
 }
 
+static void gray_frame(AVFrame *frame)
+{
+    int i, h_chroma_shift, v_chroma_shift;
+
+    av_pix_fmt_get_chroma_sub_sample(frame->format, &h_chroma_shift, &v_chroma_shift);
+
+    for(i=0; i<frame->height; i++)
+        memset(frame->data[0] + frame->linesize[0]*i, 0x80, frame->width);
+    for(i=0; i<FF_CEIL_RSHIFT(frame->height, v_chroma_shift); i++) {
+        memset(frame->data[1] + frame->linesize[1]*i,
+               0x80, FF_CEIL_RSHIFT(frame->width, h_chroma_shift));
+        memset(frame->data[2] + frame->linesize[2]*i,
+               0x80, FF_CEIL_RSHIFT(frame->width, h_chroma_shift));
+    }
+}
+
 /**
  * generic function called after decoding
  * the header and before a frame is decoded.
@@ -1675,6 +1746,11 @@ int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
     Picture *pic;
     s->mb_skipped = 0;
 
+    if (!ff_thread_can_start_frame(avctx)) {
+        av_log(avctx, AV_LOG_ERROR, "Attempt to start a frame outside SETUP state\n");
+        return -1;
+    }
+
     /* mark & release old frames */
     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
         s->last_picture_ptr != s->next_picture_ptr &&
@@ -1765,11 +1841,14 @@ int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         int h_chroma_shift, v_chroma_shift;
         av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
                                          &h_chroma_shift, &v_chroma_shift);
-        if (s->pict_type != AV_PICTURE_TYPE_I)
+        if (s->pict_type == AV_PICTURE_TYPE_B && s->next_picture_ptr && s->next_picture_ptr->f->buf[0])
+            av_log(avctx, AV_LOG_DEBUG,
+                   "allocating dummy last picture for B frame\n");
+        else if (s->pict_type != AV_PICTURE_TYPE_I)
             av_log(avctx, AV_LOG_ERROR,
                    "warning: first frame is no keyframe\n");
         else if (s->picture_structure != PICT_FRAME)
-            av_log(avctx, AV_LOG_INFO,
+            av_log(avctx, AV_LOG_DEBUG,
                    "allocate dummy last picture for field based first keyframe\n");
 
         /* Allocate a dummy frame */
@@ -1781,21 +1860,30 @@ int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         s->last_picture_ptr = &s->picture[i];
 
         s->last_picture_ptr->reference   = 3;
-        s->last_picture_ptr->f->pict_type = AV_PICTURE_TYPE_I;
+        s->last_picture_ptr->f->key_frame = 0;
+        s->last_picture_ptr->f->pict_type = AV_PICTURE_TYPE_P;
 
         if (ff_alloc_picture(s, s->last_picture_ptr, 0) < 0) {
             s->last_picture_ptr = NULL;
             return -1;
         }
 
-        memset(s->last_picture_ptr->f->data[0], 0,
-               avctx->height * s->last_picture_ptr->f->linesize[0]);
-        memset(s->last_picture_ptr->f->data[1], 0x80,
-               (avctx->height >> v_chroma_shift) *
-               s->last_picture_ptr->f->linesize[1]);
-        memset(s->last_picture_ptr->f->data[2], 0x80,
-               (avctx->height >> v_chroma_shift) *
-               s->last_picture_ptr->f->linesize[2]);
+        if (!avctx->hwaccel && !(avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)) {
+            for(i=0; i<avctx->height; i++)
+                memset(s->last_picture_ptr->f->data[0] + s->last_picture_ptr->f->linesize[0]*i,
+                       0x80, avctx->width);
+            for(i=0; i<FF_CEIL_RSHIFT(avctx->height, v_chroma_shift); i++) {
+                memset(s->last_picture_ptr->f->data[1] + s->last_picture_ptr->f->linesize[1]*i,
+                       0x80, FF_CEIL_RSHIFT(avctx->width, h_chroma_shift));
+                memset(s->last_picture_ptr->f->data[2] + s->last_picture_ptr->f->linesize[2]*i,
+                       0x80, FF_CEIL_RSHIFT(avctx->width, h_chroma_shift));
+            }
+
+            if(s->codec_id == AV_CODEC_ID_FLV1 || s->codec_id == AV_CODEC_ID_H263){
+                for(i=0; i<avctx->height; i++)
+                memset(s->last_picture_ptr->f->data[0] + s->last_picture_ptr->f->linesize[0]*i, 16, avctx->width);
+            }
+        }
 
         ff_thread_report_progress(&s->last_picture_ptr->tf, INT_MAX, 0);
         ff_thread_report_progress(&s->last_picture_ptr->tf, INT_MAX, 1);
@@ -1812,7 +1900,8 @@ int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         s->next_picture_ptr = &s->picture[i];
 
         s->next_picture_ptr->reference   = 3;
-        s->next_picture_ptr->f->pict_type = AV_PICTURE_TYPE_I;
+        s->next_picture_ptr->f->key_frame = 0;
+        s->next_picture_ptr->f->pict_type = AV_PICTURE_TYPE_P;
 
         if (ff_alloc_picture(s, s->next_picture_ptr, 0) < 0) {
             s->next_picture_ptr = NULL;
@@ -1822,6 +1911,10 @@ int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         ff_thread_report_progress(&s->next_picture_ptr->tf, INT_MAX, 1);
     }
 
+#if 0 // BUFREF-FIXME
+    memset(s->last_picture.f->data, 0, sizeof(s->last_picture.f->data));
+    memset(s->next_picture.f->data, 0, sizeof(s->next_picture.f->data));
+#endif
     if (s->last_picture_ptr) {
         ff_mpeg_unref_picture(s, &s->last_picture);
         if (s->last_picture_ptr->f->buf[0] &&
@@ -1837,12 +1930,8 @@ int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             return ret;
     }
 
-    if (s->pict_type != AV_PICTURE_TYPE_I &&
-        !(s->last_picture_ptr && s->last_picture_ptr->f->buf[0])) {
-        av_log(s, AV_LOG_ERROR,
-               "Non-reference picture received and no reference available\n");
-        return AVERROR_INVALIDDATA;
-    }
+    av_assert0(s->pict_type == AV_PICTURE_TYPE_I || (s->last_picture_ptr &&
+                                                 s->last_picture_ptr->f->buf[0]));
 
     if (s->picture_structure!= PICT_FRAME) {
         int i;
@@ -1873,12 +1962,9 @@ int ff_MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
     }
 
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-    if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration)
-        return ff_xvmc_field_start(s, avctx);
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
+    if (s->avctx->debug & FF_DEBUG_NOMC) {
+        gray_frame(s->current_picture_ptr->f);
+    }
 
     return 0;
 }
@@ -1886,119 +1972,827 @@ FF_ENABLE_DEPRECATION_WARNINGS
 /* called after a frame has been decoded. */
 void ff_MPV_frame_end(MpegEncContext *s)
 {
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-    /* redraw edges for the frame if decoding didn't complete */
-    // just to make sure that all data is rendered.
-    if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration) {
-        ff_xvmc_field_end(s);
-    } else
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
-
     emms_c();
 
     if (s->current_picture.reference)
         ff_thread_report_progress(&s->current_picture_ptr->tf, INT_MAX, 0);
 }
 
+
+static int clip_line(int *sx, int *sy, int *ex, int *ey, int maxx)
+{
+    if(*sx > *ex)
+        return clip_line(ex, ey, sx, sy, maxx);
+
+    if (*sx < 0) {
+        if (*ex < 0)
+            return 1;
+        *sy = *ey + (*sy - *ey) * (int64_t)*ex / (*ex - *sx);
+        *sx = 0;
+    }
+
+    if (*ex > maxx) {
+        if (*sx > maxx)
+            return 1;
+        *ey = *sy + (*ey - *sy) * (int64_t)(maxx - *sx) / (*ex - *sx);
+        *ex = maxx;
+    }
+    return 0;
+}
+
+
+/**
+ * Draw a line from (ex, ey) -> (sx, sy).
+ * @param w width of the image
+ * @param h height of the image
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey,
+                      int w, int h, int stride, int color)
+{
+    int x, y, fr, f;
+
+    if (clip_line(&sx, &sy, &ex, &ey, w - 1))
+        return;
+    if (clip_line(&sy, &sx, &ey, &ex, h - 1))
+        return;
+
+    sx = av_clip(sx, 0, w - 1);
+    sy = av_clip(sy, 0, h - 1);
+    ex = av_clip(ex, 0, w - 1);
+    ey = av_clip(ey, 0, h - 1);
+
+    buf[sy * stride + sx] += color;
+
+    if (FFABS(ex - sx) > FFABS(ey - sy)) {
+        if (sx > ex) {
+            FFSWAP(int, sx, ex);
+            FFSWAP(int, sy, ey);
+        }
+        buf += sx + sy * stride;
+        ex  -= sx;
+        f    = ((ey - sy) << 16) / ex;
+        for (x = 0; x <= ex; x++) {
+            y  = (x * f) >> 16;
+            fr = (x * f) & 0xFFFF;
+            buf[y * stride + x]       += (color * (0x10000 - fr)) >> 16;
+            if(fr) buf[(y + 1) * stride + x] += (color *            fr ) >> 16;
+        }
+    } else {
+        if (sy > ey) {
+            FFSWAP(int, sx, ex);
+            FFSWAP(int, sy, ey);
+        }
+        buf += sx + sy * stride;
+        ey  -= sy;
+        if (ey)
+            f = ((ex - sx) << 16) / ey;
+        else
+            f = 0;
+        for(y= 0; y <= ey; y++){
+            x  = (y*f) >> 16;
+            fr = (y*f) & 0xFFFF;
+            buf[y * stride + x]     += (color * (0x10000 - fr)) >> 16;
+            if(fr) buf[y * stride + x + 1] += (color *            fr ) >> 16;
+        }
+    }
+}
+
+/**
+ * Draw an arrow from (ex, ey) -> (sx, sy).
+ * @param w width of the image
+ * @param h height of the image
+ * @param stride stride/linesize of the image
+ * @param color color of the arrow
+ */
+static void draw_arrow(uint8_t *buf, int sx, int sy, int ex,
+                       int ey, int w, int h, int stride, int color, int tail, int direction)
+{
+    int dx,dy;
+
+    if (direction) {
+        FFSWAP(int, sx, ex);
+        FFSWAP(int, sy, ey);
+    }
+
+    sx = av_clip(sx, -100, w + 100);
+    sy = av_clip(sy, -100, h + 100);
+    ex = av_clip(ex, -100, w + 100);
+    ey = av_clip(ey, -100, h + 100);
+
+    dx = ex - sx;
+    dy = ey - sy;
+
+    if (dx * dx + dy * dy > 3 * 3) {
+        int rx =  dx + dy;
+        int ry = -dx + dy;
+        int length = ff_sqrt((rx * rx + ry * ry) << 8);
+
+        // FIXME subpixel accuracy
+        rx = ROUNDED_DIV(rx * 3 << 4, length);
+        ry = ROUNDED_DIV(ry * 3 << 4, length);
+
+        if (tail) {
+            rx = -rx;
+            ry = -ry;
+        }
+
+        draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
+        draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
+    }
+    draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
+}
+
 /**
  * Print debugging info for the given picture.
  */
-void ff_print_debug_info(MpegEncContext *s, Picture *p)
+void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_table,
+                         uint32_t *mbtype_table, int8_t *qscale_table, int16_t (*motion_val[2])[2],
+                         int *low_delay,
+                         int mb_width, int mb_height, int mb_stride, int quarter_sample)
 {
-    AVFrame *pict;
-    if (s->avctx->hwaccel || !p || !p->mb_type)
+    if (avctx->hwaccel || !mbtype_table
+        || (avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU))
         return;
-    pict = p->f;
 
-    if (s->avctx->debug & (FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)) {
+
+    if (avctx->debug & (FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)) {
         int x,y;
 
-        av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
-        switch (pict->pict_type) {
-        case AV_PICTURE_TYPE_I:
-            av_log(s->avctx,AV_LOG_DEBUG,"I\n");
-            break;
-        case AV_PICTURE_TYPE_P:
-            av_log(s->avctx,AV_LOG_DEBUG,"P\n");
-            break;
-        case AV_PICTURE_TYPE_B:
-            av_log(s->avctx,AV_LOG_DEBUG,"B\n");
-            break;
-        case AV_PICTURE_TYPE_S:
-            av_log(s->avctx,AV_LOG_DEBUG,"S\n");
-            break;
-        case AV_PICTURE_TYPE_SI:
-            av_log(s->avctx,AV_LOG_DEBUG,"SI\n");
-            break;
-        case AV_PICTURE_TYPE_SP:
-            av_log(s->avctx,AV_LOG_DEBUG,"SP\n");
-            break;
-        }
-        for (y = 0; y < s->mb_height; y++) {
-            for (x = 0; x < s->mb_width; x++) {
-                if (s->avctx->debug & FF_DEBUG_SKIP) {
-                    int count = s->mbskip_table[x + y * s->mb_stride];
+        av_log(avctx, AV_LOG_DEBUG, "New frame, type: %c\n",
+               av_get_picture_type_char(pict->pict_type));
+        for (y = 0; y < mb_height; y++) {
+            for (x = 0; x < mb_width; x++) {
+                if (avctx->debug & FF_DEBUG_SKIP) {
+                    int count = mbskip_table[x + y * mb_stride];
                     if (count > 9)
                         count = 9;
-                    av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
+                    av_log(avctx, AV_LOG_DEBUG, "%1d", count);
                 }
-                if (s->avctx->debug & FF_DEBUG_QP) {
-                    av_log(s->avctx, AV_LOG_DEBUG, "%2d",
-                           p->qscale_table[x + y * s->mb_stride]);
+                if (avctx->debug & FF_DEBUG_QP) {
+                    av_log(avctx, AV_LOG_DEBUG, "%2d",
+                           qscale_table[x + y * mb_stride]);
                 }
-                if (s->avctx->debug & FF_DEBUG_MB_TYPE) {
-                    int mb_type = p->mb_type[x + y * s->mb_stride];
+                if (avctx->debug & FF_DEBUG_MB_TYPE) {
+                    int mb_type = mbtype_table[x + y * mb_stride];
                     // Type & MV direction
                     if (IS_PCM(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "P");
+                        av_log(avctx, AV_LOG_DEBUG, "P");
                     else if (IS_INTRA(mb_type) && IS_ACPRED(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "A");
+                        av_log(avctx, AV_LOG_DEBUG, "A");
                     else if (IS_INTRA4x4(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "i");
+                        av_log(avctx, AV_LOG_DEBUG, "i");
                     else if (IS_INTRA16x16(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "I");
+                        av_log(avctx, AV_LOG_DEBUG, "I");
                     else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "d");
+                        av_log(avctx, AV_LOG_DEBUG, "d");
                     else if (IS_DIRECT(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "D");
+                        av_log(avctx, AV_LOG_DEBUG, "D");
                     else if (IS_GMC(mb_type) && IS_SKIP(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "g");
+                        av_log(avctx, AV_LOG_DEBUG, "g");
                     else if (IS_GMC(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "G");
+                        av_log(avctx, AV_LOG_DEBUG, "G");
                     else if (IS_SKIP(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "S");
+                        av_log(avctx, AV_LOG_DEBUG, "S");
                     else if (!USES_LIST(mb_type, 1))
-                        av_log(s->avctx, AV_LOG_DEBUG, ">");
+                        av_log(avctx, AV_LOG_DEBUG, ">");
                     else if (!USES_LIST(mb_type, 0))
-                        av_log(s->avctx, AV_LOG_DEBUG, "<");
+                        av_log(avctx, AV_LOG_DEBUG, "<");
                     else {
-                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
-                        av_log(s->avctx, AV_LOG_DEBUG, "X");
+                        av_assert2(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        av_log(avctx, AV_LOG_DEBUG, "X");
                     }
 
                     // segmentation
                     if (IS_8X8(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "+");
+                        av_log(avctx, AV_LOG_DEBUG, "+");
                     else if (IS_16X8(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "-");
+                        av_log(avctx, AV_LOG_DEBUG, "-");
                     else if (IS_8X16(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "|");
+                        av_log(avctx, AV_LOG_DEBUG, "|");
                     else if (IS_INTRA(mb_type) || IS_16X16(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, " ");
+                        av_log(avctx, AV_LOG_DEBUG, " ");
                     else
-                        av_log(s->avctx, AV_LOG_DEBUG, "?");
+                        av_log(avctx, AV_LOG_DEBUG, "?");
 
 
                     if (IS_INTERLACED(mb_type))
-                        av_log(s->avctx, AV_LOG_DEBUG, "=");
+                        av_log(avctx, AV_LOG_DEBUG, "=");
                     else
-                        av_log(s->avctx, AV_LOG_DEBUG, " ");
+                        av_log(avctx, AV_LOG_DEBUG, " ");
+                }
+            }
+            av_log(avctx, AV_LOG_DEBUG, "\n");
+        }
+    }
+
+    if ((avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) ||
+        (avctx->debug_mv)) {
+        const int shift = 1 + quarter_sample;
+        int mb_y;
+        uint8_t *ptr;
+        int i;
+        int h_chroma_shift, v_chroma_shift, block_height;
+        const int width          = avctx->width;
+        const int height         = avctx->height;
+        const int mv_sample_log2 = avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_SVQ3 ? 2 : 1;
+        const int mv_stride      = (mb_width << mv_sample_log2) +
+                                   (avctx->codec->id == AV_CODEC_ID_H264 ? 0 : 1);
+
+        *low_delay = 0; // needed to see the vectors without trashing the buffers
+
+        avcodec_get_chroma_sub_sample(avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
+
+        av_frame_make_writable(pict);
+
+        pict->opaque = NULL;
+        ptr          = pict->data[0];
+        block_height = 16 >> v_chroma_shift;
+
+        for (mb_y = 0; mb_y < mb_height; mb_y++) {
+            int mb_x;
+            for (mb_x = 0; mb_x < mb_width; mb_x++) {
+                const int mb_index = mb_x + mb_y * mb_stride;
+                if ((avctx->debug_mv) && motion_val[0]) {
+                    int type;
+                    for (type = 0; type < 3; type++) {
+                        int direction = 0;
+                        switch (type) {
+                        case 0:
+                            if ((!(avctx->debug_mv & FF_DEBUG_VIS_MV_P_FOR)) ||
+                                (pict->pict_type!= AV_PICTURE_TYPE_P))
+                                continue;
+                            direction = 0;
+                            break;
+                        case 1:
+                            if ((!(avctx->debug_mv & FF_DEBUG_VIS_MV_B_FOR)) ||
+                                (pict->pict_type!= AV_PICTURE_TYPE_B))
+                                continue;
+                            direction = 0;
+                            break;
+                        case 2:
+                            if ((!(avctx->debug_mv & FF_DEBUG_VIS_MV_B_BACK)) ||
+                                (pict->pict_type!= AV_PICTURE_TYPE_B))
+                                continue;
+                            direction = 1;
+                            break;
+                        }
+                        if (!USES_LIST(mbtype_table[mb_index], direction))
+                            continue;
+
+                        if (IS_8X8(mbtype_table[mb_index])) {
+                            int i;
+                            for (i = 0; i < 4; i++) {
+                                int sx = mb_x * 16 + 4 + 8 * (i & 1);
+                                int sy = mb_y * 16 + 4 + 8 * (i >> 1);
+                                int xy = (mb_x * 2 + (i & 1) +
+                                          (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
+                                int mx = (motion_val[direction][xy][0] >> shift) + sx;
+                                int my = (motion_val[direction][xy][1] >> shift) + sy;
+                                draw_arrow(ptr, sx, sy, mx, my, width,
+                                           height, pict->linesize[0], 100, 0, direction);
+                            }
+                        } else if (IS_16X8(mbtype_table[mb_index])) {
+                            int i;
+                            for (i = 0; i < 2; i++) {
+                                int sx = mb_x * 16 + 8;
+                                int sy = mb_y * 16 + 4 + 8 * i;
+                                int xy = (mb_x * 2 + (mb_y * 2 + i) * mv_stride) << (mv_sample_log2 - 1);
+                                int mx = (motion_val[direction][xy][0] >> shift);
+                                int my = (motion_val[direction][xy][1] >> shift);
+
+                                if (IS_INTERLACED(mbtype_table[mb_index]))
+                                    my *= 2;
+
+                                draw_arrow(ptr, sx, sy, mx + sx, my + sy, width,
+                                           height, pict->linesize[0], 100, 0, direction);
+                            }
+                        } else if (IS_8X16(mbtype_table[mb_index])) {
+                            int i;
+                            for (i = 0; i < 2; i++) {
+                                int sx = mb_x * 16 + 4 + 8 * i;
+                                int sy = mb_y * 16 + 8;
+                                int xy = (mb_x * 2 + i + mb_y * 2 * mv_stride) << (mv_sample_log2 - 1);
+                                int mx = motion_val[direction][xy][0] >> shift;
+                                int my = motion_val[direction][xy][1] >> shift;
+
+                                if (IS_INTERLACED(mbtype_table[mb_index]))
+                                    my *= 2;
+
+                                draw_arrow(ptr, sx, sy, mx + sx, my + sy, width,
+                                           height, pict->linesize[0], 100, 0, direction);
+                            }
+                        } else {
+                              int sx= mb_x * 16 + 8;
+                              int sy= mb_y * 16 + 8;
+                              int xy= (mb_x + mb_y * mv_stride) << mv_sample_log2;
+                              int mx= (motion_val[direction][xy][0]>>shift) + sx;
+                              int my= (motion_val[direction][xy][1]>>shift) + sy;
+                              draw_arrow(ptr, sx, sy, mx, my, width, height, pict->linesize[0], 100, 0, direction);
+                        }
+                    }
+                }
+                if ((avctx->debug & FF_DEBUG_VIS_QP)) {
+                    uint64_t c = (qscale_table[mb_index] * 128 / 31) *
+                                 0x0101010101010101ULL;
+                    int y;
+                    for (y = 0; y < block_height; y++) {
+                        *(uint64_t *)(pict->data[1] + 8 * mb_x +
+                                      (block_height * mb_y + y) *
+                                      pict->linesize[1]) = c;
+                        *(uint64_t *)(pict->data[2] + 8 * mb_x +
+                                      (block_height * mb_y + y) *
+                                      pict->linesize[2]) = c;
+                    }
+                }
+                if ((avctx->debug & FF_DEBUG_VIS_MB_TYPE) &&
+                    motion_val[0]) {
+                    int mb_type = mbtype_table[mb_index];
+                    uint64_t u,v;
+                    int y;
+#define COLOR(theta, r) \
+    u = (int)(128 + r * cos(theta * 3.141592 / 180)); \
+    v = (int)(128 + r * sin(theta * 3.141592 / 180));
+
+
+                    u = v = 128;
+                    if (IS_PCM(mb_type)) {
+                        COLOR(120, 48)
+                    } else if ((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) ||
+                               IS_INTRA16x16(mb_type)) {
+                        COLOR(30, 48)
+                    } else if (IS_INTRA4x4(mb_type)) {
+                        COLOR(90, 48)
+                    } else if (IS_DIRECT(mb_type) && IS_SKIP(mb_type)) {
+                        // COLOR(120, 48)
+                    } else if (IS_DIRECT(mb_type)) {
+                        COLOR(150, 48)
+                    } else if (IS_GMC(mb_type) && IS_SKIP(mb_type)) {
+                        COLOR(170, 48)
+                    } else if (IS_GMC(mb_type)) {
+                        COLOR(190, 48)
+                    } else if (IS_SKIP(mb_type)) {
+                        // COLOR(180, 48)
+                    } else if (!USES_LIST(mb_type, 1)) {
+                        COLOR(240, 48)
+                    } else if (!USES_LIST(mb_type, 0)) {
+                        COLOR(0, 48)
+                    } else {
+                        av_assert2(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
+                        COLOR(300,48)
+                    }
+
+                    u *= 0x0101010101010101ULL;
+                    v *= 0x0101010101010101ULL;
+                    for (y = 0; y < block_height; y++) {
+                        *(uint64_t *)(pict->data[1] + 8 * mb_x +
+                                      (block_height * mb_y + y) * pict->linesize[1]) = u;
+                        *(uint64_t *)(pict->data[2] + 8 * mb_x +
+                                      (block_height * mb_y + y) * pict->linesize[2]) = v;
+                    }
+
+                    // segmentation
+                    if (IS_8X8(mb_type) || IS_16X8(mb_type)) {
+                        *(uint64_t *)(pict->data[0] + 16 * mb_x + 0 +
+                                      (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
+                        *(uint64_t *)(pict->data[0] + 16 * mb_x + 8 +
+                                      (16 * mb_y + 8) * pict->linesize[0]) ^= 0x8080808080808080ULL;
+                    }
+                    if (IS_8X8(mb_type) || IS_8X16(mb_type)) {
+                        for (y = 0; y < 16; y++)
+                            pict->data[0][16 * mb_x + 8 + (16 * mb_y + y) *
+                                          pict->linesize[0]] ^= 0x80;
+                    }
+                    if (IS_8X8(mb_type) && mv_sample_log2 >= 2) {
+                        int dm = 1 << (mv_sample_log2 - 2);
+                        for (i = 0; i < 4; i++) {
+                            int sx = mb_x * 16 + 8 * (i & 1);
+                            int sy = mb_y * 16 + 8 * (i >> 1);
+                            int xy = (mb_x * 2 + (i & 1) +
+                                     (mb_y * 2 + (i >> 1)) * mv_stride) << (mv_sample_log2 - 1);
+                            // FIXME bidir
+                            int32_t *mv = (int32_t *) &motion_val[0][xy];
+                            if (mv[0] != mv[dm] ||
+                                mv[dm * mv_stride] != mv[dm * (mv_stride + 1)])
+                                for (y = 0; y < 8; y++)
+                                    pict->data[0][sx + 4 + (sy + y) * pict->linesize[0]] ^= 0x80;
+                            if (mv[0] != mv[dm * mv_stride] || mv[dm] != mv[dm * (mv_stride + 1)])
+                                *(uint64_t *)(pict->data[0] + sx + (sy + 4) *
+                                              pict->linesize[0]) ^= 0x8080808080808080ULL;
+                        }
+                    }
+
+                    if (IS_INTERLACED(mb_type) &&
+                        avctx->codec->id == AV_CODEC_ID_H264) {
+                        // hmm
+                    }
+                }
+                mbskip_table[mb_index] = 0;
+            }
+        }
+    }
+}
+
+void ff_print_debug_info(MpegEncContext *s, Picture *p, AVFrame *pict)
+{
+    ff_print_debug_info2(s->avctx, pict, s->mbskip_table, p->mb_type,
+                         p->qscale_table, p->motion_val, &s->low_delay,
+                         s->mb_width, s->mb_height, s->mb_stride, s->quarter_sample);
+}
+
+int ff_mpv_export_qp_table(MpegEncContext *s, AVFrame *f, Picture *p, int qp_type)
+{
+    AVBufferRef *ref = av_buffer_ref(p->qscale_table_buf);
+    int offset = 2*s->mb_stride + 1;
+    if(!ref)
+        return AVERROR(ENOMEM);
+    av_assert0(ref->size >= offset + s->mb_stride * ((f->height+15)/16));
+    ref->size -= offset;
+    ref->data += offset;
+    return av_frame_set_qp_table(f, ref, s->mb_stride, qp_type);
+}
+
+static inline int hpel_motion_lowres(MpegEncContext *s,
+                                     uint8_t *dest, uint8_t *src,
+                                     int field_based, int field_select,
+                                     int src_x, int src_y,
+                                     int width, int height, ptrdiff_t stride,
+                                     int h_edge_pos, int v_edge_pos,
+                                     int w, int h, h264_chroma_mc_func *pix_op,
+                                     int motion_x, int motion_y)
+{
+    const int lowres   = s->avctx->lowres;
+    const int op_index = FFMIN(lowres, 3);
+    const int s_mask   = (2 << lowres) - 1;
+    int emu = 0;
+    int sx, sy;
+
+    if (s->quarter_sample) {
+        motion_x /= 2;
+        motion_y /= 2;
+    }
+
+    sx = motion_x & s_mask;
+    sy = motion_y & s_mask;
+    src_x += motion_x >> lowres + 1;
+    src_y += motion_y >> lowres + 1;
+
+    src   += src_y * stride + src_x;
+
+    if ((unsigned)src_x > FFMAX( h_edge_pos - (!!sx) - w,                 0) ||
+        (unsigned)src_y > FFMAX((v_edge_pos >> field_based) - (!!sy) - h, 0)) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
+                                 s->linesize, s->linesize,
+                                 w + 1, (h + 1) << field_based,
+                                 src_x, src_y   << field_based,
+                                 h_edge_pos, v_edge_pos);
+        src = s->edge_emu_buffer;
+        emu = 1;
+    }
+
+    sx = (sx << 2) >> lowres;
+    sy = (sy << 2) >> lowres;
+    if (field_select)
+        src += s->linesize;
+    pix_op[op_index](dest, src, stride, h, sx, sy);
+    return emu;
+}
+
+/* apply one mpeg motion vector to the three components */
+static av_always_inline void mpeg_motion_lowres(MpegEncContext *s,
+                                                uint8_t *dest_y,
+                                                uint8_t *dest_cb,
+                                                uint8_t *dest_cr,
+                                                int field_based,
+                                                int bottom_field,
+                                                int field_select,
+                                                uint8_t **ref_picture,
+                                                h264_chroma_mc_func *pix_op,
+                                                int motion_x, int motion_y,
+                                                int h, int mb_y)
+{
+    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
+    int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, sx, sy, uvsx, uvsy;
+    ptrdiff_t uvlinesize, linesize;
+    const int lowres     = s->avctx->lowres;
+    const int op_index   = FFMIN(lowres-1+s->chroma_x_shift, 3);
+    const int block_s    = 8>>lowres;
+    const int s_mask     = (2 << lowres) - 1;
+    const int h_edge_pos = s->h_edge_pos >> lowres;
+    const int v_edge_pos = s->v_edge_pos >> lowres;
+    linesize   = s->current_picture.f->linesize[0] << field_based;
+    uvlinesize = s->current_picture.f->linesize[1] << field_based;
+
+    // FIXME obviously not perfect but qpel will not work in lowres anyway
+    if (s->quarter_sample) {
+        motion_x /= 2;
+        motion_y /= 2;
+    }
+
+    if(field_based){
+        motion_y += (bottom_field - field_select)*((1 << lowres)-1);
+    }
+
+    sx = motion_x & s_mask;
+    sy = motion_y & s_mask;
+    src_x = s->mb_x * 2 * block_s + (motion_x >> lowres + 1);
+    src_y = (mb_y * 2 * block_s >> field_based) + (motion_y >> lowres + 1);
+
+    if (s->out_format == FMT_H263) {
+        uvsx    = ((motion_x >> 1) & s_mask) | (sx & 1);
+        uvsy    = ((motion_y >> 1) & s_mask) | (sy & 1);
+        uvsrc_x = src_x >> 1;
+        uvsrc_y = src_y >> 1;
+    } else if (s->out_format == FMT_H261) {
+        // even chroma mv's are full pel in H261
+        mx      = motion_x / 4;
+        my      = motion_y / 4;
+        uvsx    = (2 * mx) & s_mask;
+        uvsy    = (2 * my) & s_mask;
+        uvsrc_x = s->mb_x * block_s + (mx >> lowres);
+        uvsrc_y =    mb_y * block_s + (my >> lowres);
+    } else {
+        if(s->chroma_y_shift){
+            mx      = motion_x / 2;
+            my      = motion_y / 2;
+            uvsx    = mx & s_mask;
+            uvsy    = my & s_mask;
+            uvsrc_x = s->mb_x * block_s                 + (mx >> lowres + 1);
+            uvsrc_y =   (mb_y * block_s >> field_based) + (my >> lowres + 1);
+        } else {
+            if(s->chroma_x_shift){
+            //Chroma422
+                mx = motion_x / 2;
+                uvsx = mx & s_mask;
+                uvsy = motion_y & s_mask;
+                uvsrc_y = src_y;
+                uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
+            } else {
+            //Chroma444
+                uvsx = motion_x & s_mask;
+                uvsy = motion_y & s_mask;
+                uvsrc_x = src_x;
+                uvsrc_y = src_y;
+            }
+        }
+    }
+
+    ptr_y  = ref_picture[0] + src_y   * linesize   + src_x;
+    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
+    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
+
+    if ((unsigned) src_x > FFMAX( h_edge_pos - (!!sx) - 2 * block_s,       0) || uvsrc_y<0 ||
+        (unsigned) src_y > FFMAX((v_edge_pos >> field_based) - (!!sy) - h, 0)) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr_y,
+                                 linesize >> field_based, linesize >> field_based,
+                                 17, 17 + field_based,
+                                src_x, src_y << field_based, h_edge_pos,
+                                v_edge_pos);
+        ptr_y = s->edge_emu_buffer;
+        if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
+            uint8_t *ubuf = s->edge_emu_buffer + 18 * s->linesize;
+            uint8_t *vbuf =ubuf + 9 * s->uvlinesize;
+            s->vdsp.emulated_edge_mc(ubuf,  ptr_cb,
+                                     uvlinesize >> field_based, uvlinesize >> field_based,
+                                     9, 9 + field_based,
+                                    uvsrc_x, uvsrc_y << field_based,
+                                    h_edge_pos >> 1, v_edge_pos >> 1);
+            s->vdsp.emulated_edge_mc(vbuf,  ptr_cr,
+                                     uvlinesize >> field_based,uvlinesize >> field_based,
+                                     9, 9 + field_based,
+                                    uvsrc_x, uvsrc_y << field_based,
+                                    h_edge_pos >> 1, v_edge_pos >> 1);
+            ptr_cb = ubuf;
+            ptr_cr = vbuf;
+        }
+    }
+
+    // FIXME use this for field pix too instead of the obnoxious hack which changes picture.f->data
+    if (bottom_field) {
+        dest_y  += s->linesize;
+        dest_cb += s->uvlinesize;
+        dest_cr += s->uvlinesize;
+    }
+
+    if (field_select) {
+        ptr_y   += s->linesize;
+        ptr_cb  += s->uvlinesize;
+        ptr_cr  += s->uvlinesize;
+    }
+
+    sx = (sx << 2) >> lowres;
+    sy = (sy << 2) >> lowres;
+    pix_op[lowres - 1](dest_y, ptr_y, linesize, h, sx, sy);
+
+    if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
+        int hc = s->chroma_y_shift ? (h+1-bottom_field)>>1 : h;
+        uvsx = (uvsx << 2) >> lowres;
+        uvsy = (uvsy << 2) >> lowres;
+        if (hc) {
+            pix_op[op_index](dest_cb, ptr_cb, uvlinesize, hc, uvsx, uvsy);
+            pix_op[op_index](dest_cr, ptr_cr, uvlinesize, hc, uvsx, uvsy);
+        }
+    }
+    // FIXME h261 lowres loop filter
+}
+
+static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
+                                            uint8_t *dest_cb, uint8_t *dest_cr,
+                                            uint8_t **ref_picture,
+                                            h264_chroma_mc_func * pix_op,
+                                            int mx, int my)
+{
+    const int lowres     = s->avctx->lowres;
+    const int op_index   = FFMIN(lowres, 3);
+    const int block_s    = 8 >> lowres;
+    const int s_mask     = (2 << lowres) - 1;
+    const int h_edge_pos = s->h_edge_pos >> lowres + 1;
+    const int v_edge_pos = s->v_edge_pos >> lowres + 1;
+    int emu = 0, src_x, src_y, sx, sy;
+    ptrdiff_t offset;
+    uint8_t *ptr;
+
+    if (s->quarter_sample) {
+        mx /= 2;
+        my /= 2;
+    }
+
+    /* In case of 8X8, we construct a single chroma motion vector
+       with a special rounding */
+    mx = ff_h263_round_chroma(mx);
+    my = ff_h263_round_chroma(my);
+
+    sx = mx & s_mask;
+    sy = my & s_mask;
+    src_x = s->mb_x * block_s + (mx >> lowres + 1);
+    src_y = s->mb_y * block_s + (my >> lowres + 1);
+
+    offset = src_y * s->uvlinesize + src_x;
+    ptr = ref_picture[1] + offset;
+    if ((unsigned) src_x > FFMAX(h_edge_pos - (!!sx) - block_s, 0) ||
+        (unsigned) src_y > FFMAX(v_edge_pos - (!!sy) - block_s, 0)) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
+                                 s->uvlinesize, s->uvlinesize,
+                                 9, 9,
+                                 src_x, src_y, h_edge_pos, v_edge_pos);
+        ptr = s->edge_emu_buffer;
+        emu = 1;
+    }
+    sx = (sx << 2) >> lowres;
+    sy = (sy << 2) >> lowres;
+    pix_op[op_index](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
+
+    ptr = ref_picture[2] + offset;
+    if (emu) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr,
+                                 s->uvlinesize, s->uvlinesize,
+                                 9, 9,
+                                 src_x, src_y, h_edge_pos, v_edge_pos);
+        ptr = s->edge_emu_buffer;
+    }
+    pix_op[op_index](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
+}
+
+/**
+ * motion compensation of a single macroblock
+ * @param s context
+ * @param dest_y luma destination pointer
+ * @param dest_cb chroma cb/u destination pointer
+ * @param dest_cr chroma cr/v destination pointer
+ * @param dir direction (0->forward, 1->backward)
+ * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
+ * @param pix_op halfpel motion compensation function (average or put normally)
+ * the motion vectors are taken from s->mv and the MV type from s->mv_type
+ */
+static inline void MPV_motion_lowres(MpegEncContext *s,
+                                     uint8_t *dest_y, uint8_t *dest_cb,
+                                     uint8_t *dest_cr,
+                                     int dir, uint8_t **ref_picture,
+                                     h264_chroma_mc_func *pix_op)
+{
+    int mx, my;
+    int mb_x, mb_y, i;
+    const int lowres  = s->avctx->lowres;
+    const int block_s = 8 >>lowres;
+
+    mb_x = s->mb_x;
+    mb_y = s->mb_y;
+
+    switch (s->mv_type) {
+    case MV_TYPE_16X16:
+        mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                           0, 0, 0,
+                           ref_picture, pix_op,
+                           s->mv[dir][0][0], s->mv[dir][0][1],
+                           2 * block_s, mb_y);
+        break;
+    case MV_TYPE_8X8:
+        mx = 0;
+        my = 0;
+        for (i = 0; i < 4; i++) {
+            hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) *
+                               s->linesize) * block_s,
+                               ref_picture[0], 0, 0,
+                               (2 * mb_x + (i & 1)) * block_s,
+                               (2 * mb_y + (i >> 1)) * block_s,
+                               s->width, s->height, s->linesize,
+                               s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
+                               block_s, block_s, pix_op,
+                               s->mv[dir][i][0], s->mv[dir][i][1]);
+
+            mx += s->mv[dir][i][0];
+            my += s->mv[dir][i][1];
+        }
+
+        if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY))
+            chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture,
+                                     pix_op, mx, my);
+        break;
+    case MV_TYPE_FIELD:
+        if (s->picture_structure == PICT_FRAME) {
+            /* top field */
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                               1, 0, s->field_select[dir][0],
+                               ref_picture, pix_op,
+                               s->mv[dir][0][0], s->mv[dir][0][1],
+                               block_s, mb_y);
+            /* bottom field */
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                               1, 1, s->field_select[dir][1],
+                               ref_picture, pix_op,
+                               s->mv[dir][1][0], s->mv[dir][1][1],
+                               block_s, mb_y);
+        } else {
+            if (s->picture_structure != s->field_select[dir][0] + 1 &&
+                s->pict_type != AV_PICTURE_TYPE_B && !s->first_field) {
+                ref_picture = s->current_picture_ptr->f->data;
+
+            }
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                               0, 0, s->field_select[dir][0],
+                               ref_picture, pix_op,
+                               s->mv[dir][0][0],
+                               s->mv[dir][0][1], 2 * block_s, mb_y >> 1);
+            }
+        break;
+    case MV_TYPE_16X8:
+        for (i = 0; i < 2; i++) {
+            uint8_t **ref2picture;
+
+            if (s->picture_structure == s->field_select[dir][i] + 1 ||
+                s->pict_type == AV_PICTURE_TYPE_B || s->first_field) {
+                ref2picture = ref_picture;
+            } else {
+                ref2picture = s->current_picture_ptr->f->data;
+            }
+
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                               0, 0, s->field_select[dir][i],
+                               ref2picture, pix_op,
+                               s->mv[dir][i][0], s->mv[dir][i][1] +
+                               2 * block_s * i, block_s, mb_y >> 1);
+
+            dest_y  +=  2 * block_s *  s->linesize;
+            dest_cb += (2 * block_s >> s->chroma_y_shift) * s->uvlinesize;
+            dest_cr += (2 * block_s >> s->chroma_y_shift) * s->uvlinesize;
+        }
+        break;
+    case MV_TYPE_DMV:
+        if (s->picture_structure == PICT_FRAME) {
+            for (i = 0; i < 2; i++) {
+                int j;
+                for (j = 0; j < 2; j++) {
+                    mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                                       1, j, j ^ i,
+                                       ref_picture, pix_op,
+                                       s->mv[dir][2 * i + j][0],
+                                       s->mv[dir][2 * i + j][1],
+                                       block_s, mb_y);
+                }
+                pix_op = s->h264chroma.avg_h264_chroma_pixels_tab;
+            }
+        } else {
+            for (i = 0; i < 2; i++) {
+                mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
+                                   0, 0, s->picture_structure != i + 1,
+                                   ref_picture, pix_op,
+                                   s->mv[dir][2 * i][0],s->mv[dir][2 * i][1],
+                                   2 * block_s, mb_y >> 1);
+
+                // after put we make avg of the same block
+                pix_op = s->h264chroma.avg_h264_chroma_pixels_tab;
+
+                // opposite parity is always in the same
+                // frame if this is second field
+                if (!s->first_field) {
+                    ref_picture = s->current_picture_ptr->f->data;
                 }
             }
-            av_log(s->avctx, AV_LOG_DEBUG, "\n");
         }
+        break;
+    default:
+        av_assert2(0);
     }
 }
 
@@ -2112,18 +2906,15 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
  */
 static av_always_inline
 void MPV_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
-                            int is_mpeg12)
+                            int lowres_flag, int is_mpeg12)
 {
     const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
 
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-    if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){
-        ff_xvmc_decode_mb(s);//xvmc uses pblocks
+    if (CONFIG_XVMC &&
+        s->avctx->hwaccel && s->avctx->hwaccel->decode_mb) {
+        s->avctx->hwaccel->decode_mb(s);//xvmc uses pblocks
         return;
     }
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
 
     if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
        /* print DCT coefficients */
@@ -2154,15 +2945,17 @@ FF_ENABLE_DEPRECATION_WARNINGS
     else if (!is_mpeg12 && (s->h263_pred || s->h263_aic))
         s->mbintra_table[mb_xy]=1;
 
-    if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==AV_PICTURE_TYPE_B) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
+    if (   (s->flags&CODEC_FLAG_PSNR)
+        || s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor
+        || !(s->encoding && (s->intra_only || s->pict_type==AV_PICTURE_TYPE_B) && s->avctx->mb_decision != FF_MB_DECISION_RD)) { //FIXME precalc
         uint8_t *dest_y, *dest_cb, *dest_cr;
         int dct_linesize, dct_offset;
         op_pixels_func (*op_pix)[4];
         qpel_mc_func (*op_qpix)[16];
         const int linesize   = s->current_picture.f->linesize[0]; //not s->linesize as this would be wrong for field pics
         const int uvlinesize = s->current_picture.f->linesize[1];
-        const int readable= s->pict_type != AV_PICTURE_TYPE_B || s->encoding || s->avctx->draw_horiz_band;
-        const int block_size = 8;
+        const int readable= s->pict_type != AV_PICTURE_TYPE_B || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
+        const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
 
         /* avoid copy if macroblock skipped in last frame too */
         /* skip only during decoding as we might trash the buffers during encoding a bit */
@@ -2171,7 +2964,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
             if (s->mb_skipped) {
                 s->mb_skipped= 0;
-                assert(s->pict_type!=AV_PICTURE_TYPE_I);
+                av_assert2(s->pict_type!=AV_PICTURE_TYPE_I);
                 *mbskip_ptr = 1;
             } else if(!s->current_picture.reference) {
                 *mbskip_ptr = 1;
@@ -2211,19 +3004,31 @@ FF_ENABLE_DEPRECATION_WARNINGS
                     }
                 }
 
-                op_qpix= s->me.qpel_put;
-                if ((!s->no_rounding) || s->pict_type==AV_PICTURE_TYPE_B){
-                    op_pix = s->hdsp.put_pixels_tab;
+                if(lowres_flag){
+                    h264_chroma_mc_func *op_pix = s->h264chroma.put_h264_chroma_pixels_tab;
+
+                    if (s->mv_dir & MV_DIR_FORWARD) {
+                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f->data, op_pix);
+                        op_pix = s->h264chroma.avg_h264_chroma_pixels_tab;
+                    }
+                    if (s->mv_dir & MV_DIR_BACKWARD) {
+                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f->data, op_pix);
+                    }
                 }else{
-                    op_pix = s->hdsp.put_no_rnd_pixels_tab;
-                }
-                if (s->mv_dir & MV_DIR_FORWARD) {
-                    ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f->data, op_pix, op_qpix);
-                    op_pix = s->hdsp.avg_pixels_tab;
-                    op_qpix= s->me.qpel_avg;
-                }
-                if (s->mv_dir & MV_DIR_BACKWARD) {
-                    ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f->data, op_pix, op_qpix);
+                    op_qpix = s->me.qpel_put;
+                    if ((!s->no_rounding) || s->pict_type==AV_PICTURE_TYPE_B){
+                        op_pix = s->hdsp.put_pixels_tab;
+                    }else{
+                        op_pix = s->hdsp.put_no_rnd_pixels_tab;
+                    }
+                    if (s->mv_dir & MV_DIR_FORWARD) {
+                        ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f->data, op_pix, op_qpix);
+                        op_pix = s->hdsp.avg_pixels_tab;
+                        op_qpix= s->me.qpel_avg;
+                    }
+                    if (s->mv_dir & MV_DIR_BACKWARD) {
+                        ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f->data, op_pix, op_qpix);
+                    }
                 }
             }
 
@@ -2269,17 +3074,17 @@ FF_ENABLE_DEPRECATION_WARNINGS
                     }else{
                         //chroma422
                         dct_linesize = uvlinesize << s->interlaced_dct;
-                        dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize * 8;
+                        dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize*block_size;
 
                         add_dct(s, block[4], 4, dest_cb, dct_linesize);
                         add_dct(s, block[5], 5, dest_cr, dct_linesize);
                         add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
                         add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
                         if(!s->chroma_x_shift){//Chroma444
-                            add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
-                            add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
-                            add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
-                            add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
+                            add_dct(s, block[8], 8, dest_cb+block_size, dct_linesize);
+                            add_dct(s, block[9], 9, dest_cr+block_size, dct_linesize);
+                            add_dct(s, block[10], 10, dest_cb+block_size+dct_offset, dct_linesize);
+                            add_dct(s, block[11], 11, dest_cr+block_size+dct_offset, dct_linesize);
                         }
                     }
                 }//fi gray
@@ -2321,17 +3126,17 @@ FF_ENABLE_DEPRECATION_WARNINGS
                     }else{
 
                         dct_linesize = uvlinesize << s->interlaced_dct;
-                        dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize * 8;
+                        dct_offset   = s->interlaced_dct ? uvlinesize : uvlinesize*block_size;
 
                         s->idsp.idct_put(dest_cb,              dct_linesize, block[4]);
                         s->idsp.idct_put(dest_cr,              dct_linesize, block[5]);
                         s->idsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
                         s->idsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
                         if(!s->chroma_x_shift){//Chroma444
-                            s->idsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
-                            s->idsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
-                            s->idsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
-                            s->idsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
+                            s->idsp.idct_put(dest_cb + block_size,              dct_linesize, block[8]);
+                            s->idsp.idct_put(dest_cr + block_size,              dct_linesize, block[9]);
+                            s->idsp.idct_put(dest_cb + block_size + dct_offset, dct_linesize, block[10]);
+                            s->idsp.idct_put(dest_cr + block_size + dct_offset, dct_linesize, block[11]);
                         }
                     }
                 }//gray
@@ -2349,23 +3154,25 @@ skip_idct:
 void ff_MPV_decode_mb(MpegEncContext *s, int16_t block[12][64]){
 #if !CONFIG_SMALL
     if(s->out_format == FMT_MPEG1) {
-        MPV_decode_mb_internal(s, block, 1);
+        if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 1);
+        else                 MPV_decode_mb_internal(s, block, 0, 1);
     } else
 #endif
-        MPV_decode_mb_internal(s, block, 0);
+    if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1, 0);
+    else                  MPV_decode_mb_internal(s, block, 0, 0);
 }
 
 void ff_mpeg_draw_horiz_band(MpegEncContext *s, int y, int h)
 {
-    ff_draw_horiz_band(s->avctx, s->current_picture.f,
-                       s->last_picture.f, y, h, s->picture_structure,
+    ff_draw_horiz_band(s->avctx, s->current_picture_ptr->f,
+                       s->last_picture_ptr ? s->last_picture_ptr->f : NULL, y, h, s->picture_structure,
                        s->first_field, s->low_delay);
 }
 
 void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     const int linesize   = s->current_picture.f->linesize[0]; //not s->linesize as this would be wrong for field pics
     const int uvlinesize = s->current_picture.f->linesize[1];
-    const int mb_size= 4;
+    const int mb_size= 4 - s->avctx->lowres;
 
     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
@@ -2389,7 +3196,7 @@ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
             s->dest[0] += (s->mb_y>>1) *   linesize << mb_size;
             s->dest[1] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
             s->dest[2] += (s->mb_y>>1) * uvlinesize << (mb_size - s->chroma_y_shift);
-            assert((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
+            av_assert1((s->mb_y&1) == (s->picture_structure == PICT_BOTTOM_FIELD));
         }
     }
 }
@@ -2439,6 +3246,7 @@ void ff_mpeg_flush(AVCodecContext *avctx){
     ff_mpeg_unref_picture(s, &s->next_picture);
 
     s->mb_x= s->mb_y= 0;
+    s->closed_gop= 0;
 
     s->parse_context.state= -1;
     s->parse_context.frame_start_found= 0;
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 1333d44..2e819c0 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,6 +33,7 @@
 #include "error_resilience.h"
 #include "fdctdsp.h"
 #include "get_bits.h"
+#include "h264chroma.h"
 #include "h263dsp.h"
 #include "hpeldsp.h"
 #include "idctdsp.h"
@@ -50,6 +51,7 @@
 #include "videodsp.h"
 
 #include "libavutil/opt.h"
+#include "libavutil/timecode.h"
 
 #define FRAME_SKIPPED 100 ///< return value for header parsers if frame is not coded
 
@@ -61,11 +63,10 @@ enum OutputFormat {
 };
 
 #define MAX_FCODE 7
-#define MAX_MV 2048
+#define MAX_MV 4096
 
-#define MAX_THREADS 16
-
-#define MAX_PICTURE_COUNT 32
+#define MAX_THREADS 32
+#define MAX_PICTURE_COUNT 36
 
 #define MAX_B_FRAMES 16
 
@@ -121,6 +122,9 @@ typedef struct Picture{
     AVBufferRef *mc_mb_var_buf;
     uint16_t *mc_mb_var;        ///< Table for motion compensated MB variances
 
+    int alloc_mb_width;         ///< mb_width used to allocate tables
+    int alloc_mb_height;        ///< mb_height used to allocate tables
+
     AVBufferRef *mb_mean_buf;
     uint8_t *mb_mean;           ///< Table for MB luminance
 
@@ -132,14 +136,16 @@ typedef struct Picture{
 
     int field_picture;          ///< whether or not the picture was encoded in separate fields
 
-    int mb_var_sum;             ///< sum of MB variance for current frame
-    int mc_mb_var_sum;          ///< motion compensated MB variance for current frame
+    int64_t mb_var_sum;         ///< sum of MB variance for current frame
+    int64_t mc_mb_var_sum;      ///< motion compensated MB variance for current frame
 
-    int b_frame_score;          /* */
+    int b_frame_score;
     int needs_realloc;          ///< Picture needs to be reallocated (eg due to a frame size change)
 
     int reference;
     int shared;
+
+    uint64_t error[AV_NUM_DATA_POINTERS];
 } Picture;
 
 /**
@@ -182,8 +188,8 @@ typedef struct MotionEstContext{
     int stride;
     int uvstride;
     /* temp variables for picture complexity calculation */
-    int mc_mb_var_sum_temp;
-    int mb_var_sum_temp;
+    int64_t mc_mb_var_sum_temp;
+    int64_t mb_var_sum_temp;
     int scene_change_score;
 /*    cmp, chroma_cmp;*/
     op_pixels_func (*hpel_put)[4];
@@ -317,7 +323,7 @@ typedef struct MpegEncContext {
     uint8_t *coded_block_base;
     uint8_t *coded_block;          ///< used for coded block pattern prediction (msmpeg4v3, wmv1)
     int16_t (*ac_val_base)[16];
-    int16_t (*ac_val[3])[16];      ///< used for for mpeg4 AC prediction, all 3 arrays must be continuous
+    int16_t (*ac_val[3])[16];      ///< used for mpeg4 AC prediction, all 3 arrays must be continuous
     int mb_skipped;                ///< MUST BE SET only during DECODING
     uint8_t *mbskip_table;        /**< used to avoid copy if macroblock skipped (for black regions for example)
                                    and used for b-frame encoding & decoding (contains skip table of next P Frame) */
@@ -336,11 +342,14 @@ typedef struct MpegEncContext {
     int *lambda_table;
     int adaptive_quant;         ///< use adaptive quantization
     int dquant;                 ///< qscale difference to prev qscale
+    int closed_gop;             ///< MPEG1/2 GOP is closed
     int pict_type;              ///< AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, ...
+    int vbv_delay;
     int last_pict_type; //FIXME removes
     int last_non_b_pict_type;   ///< used for mpeg4 gmc b-frames & ratecontrol
     int droppable;
     int frame_rate_index;
+    AVRational mpeg2_frame_rate_ext;
     int last_lambda_for[5];     ///< last lambda for a specific pict type
     int skipdct;                ///< skip dct and code zero residual
 
@@ -350,6 +359,7 @@ typedef struct MpegEncContext {
 
     BlockDSPContext bdsp;
     FDCTDSPContext fdsp;
+    H264ChromaContext h264chroma;
     HpelDSPContext hdsp;
     IDCTDSPContext idsp;
     MECmpContext mecc;
@@ -436,13 +446,15 @@ typedef struct MpegEncContext {
     uint8_t *luma_dc_vlc_length;
 #define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level))
 
-    int coded_score[8];
+    int coded_score[12];
 
     /** precomputed matrix (combine qscale and DCT renorm) */
     int (*q_intra_matrix)[64];
+    int (*q_chroma_intra_matrix)[64];
     int (*q_inter_matrix)[64];
     /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/
     uint16_t (*q_intra_matrix16)[2][64];
+    uint16_t (*q_chroma_intra_matrix16)[2][64];
     uint16_t (*q_inter_matrix16)[2][64];
 
     /* noise reduction */
@@ -453,6 +465,7 @@ typedef struct MpegEncContext {
     /* bit rate control */
     int64_t total_bits;
     int frame_bits;                ///< bits used for the current frame
+    int stuffing_bits;             ///< bits used for stuffing
     int next_lambda;               ///< next lambda used for retrying to encode a frame
     RateControlContext rc_context; ///< contains stuff only accessed in ratecontrol.c
 
@@ -485,6 +498,7 @@ typedef struct MpegEncContext {
     int prev_mb_info, last_mb_info;
     uint8_t *mb_info_ptr;
     int mb_info_size;
+    int ehc_mode;
 
     /* H.263+ specific */
     int umvplus;                    ///< == H263+ && unrestricted_mv
@@ -535,6 +549,7 @@ typedef struct MpegEncContext {
 
     /* MJPEG specific */
     struct MJpegContext *mjpeg_ctx;
+    int esc_pos;
 
     /* MSMPEG4 specific */
     int mv_table_index;
@@ -576,11 +591,13 @@ typedef struct MpegEncContext {
     int q_scale_type;
     int intra_vlc_format;
     int alternate_scan;
+    int seq_disp_ext;
     int repeat_first_field;
     int chroma_420_type;
     int chroma_format;
 #define CHROMA_420 1
 #define CHROMA_422 2
+#define CHROMA_444 3
     int chroma_x_shift;//depend on pix_format, that depend on chroma_format
     int chroma_y_shift;
 
@@ -594,11 +611,16 @@ typedef struct MpegEncContext {
     /* RTP specific */
     int rtp_mode;
 
+    char *tc_opt_str;        ///< timecode option string
+    AVTimecode tc;           ///< timecode context
+
     uint8_t *ptr_lastgob;
+    int swap_uv;             //vcr2 codec is an MPEG-2 variant with U and V swapped
+    int pack_pblocks;        //xvmc needs to keep blocks without gaps.
     int16_t (*pblocks[12])[64];
 
     int16_t (*block)[64]; ///< points to one of the following blocks
-    int16_t (*blocks)[8][64]; // for HQ mode we need to keep the best block
+    int16_t (*blocks)[12][64]; // for HQ mode we need to keep the best block
     int (*decode_mb)(struct MpegEncContext *s, int16_t block[6][64]); // used by some codecs to avoid a switch()
 #define SLICE_OK         0
 #define SLICE_ERROR     -1
@@ -702,15 +724,24 @@ int ff_MPV_encode_init(AVCodecContext *avctx);
 int ff_MPV_encode_end(AVCodecContext *avctx);
 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
                           const AVFrame *frame, int *got_packet);
-void ff_MPV_encode_init_x86(MpegEncContext *s);
+void ff_dct_encode_init_x86(MpegEncContext *s);
 void ff_MPV_common_init_x86(MpegEncContext *s);
+void ff_MPV_common_init_axp(MpegEncContext *s);
 void ff_MPV_common_init_arm(MpegEncContext *s);
 void ff_MPV_common_init_neon(MpegEncContext *s);
 void ff_MPV_common_init_ppc(MpegEncContext *s);
 void ff_clean_intra_table_entries(MpegEncContext *s);
 void ff_mpeg_draw_horiz_band(MpegEncContext *s, int y, int h);
 void ff_mpeg_flush(AVCodecContext *avctx);
-void ff_print_debug_info(MpegEncContext *s, Picture *p);
+
+void ff_print_debug_info(MpegEncContext *s, Picture *p, AVFrame *pict);
+void ff_print_debug_info2(AVCodecContext *avctx, AVFrame *pict, uint8_t *mbskip_table,
+                         uint32_t *mbtype_table, int8_t *qscale_table, int16_t (*motion_val[2])[2],
+                         int *low_delay,
+                         int mb_width, int mb_height, int mb_stride, int quarter_sample);
+
+int ff_mpv_export_qp_table(MpegEncContext *s, AVFrame *f, Picture *p, int qp_type);
+
 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix);
 int ff_find_unused_picture(MpegEncContext *s, int shared);
 void ff_denoise_dct(MpegEncContext *s, int16_t *block);
@@ -721,6 +752,7 @@ int ff_mpeg_update_thread_context(AVCodecContext *dst, const AVCodecContext *src
 void ff_set_qscale(MpegEncContext * s, int qscale);
 
 int ff_dct_common_init(MpegEncContext *s);
+int ff_dct_encode_init(MpegEncContext *s);
 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64],
                        const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra);
 int ff_dct_quantize_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
@@ -747,7 +779,7 @@ int ff_alloc_picture(MpegEncContext *s, Picture *pic, int shared);
 void ff_block_permute(int16_t *block, uint8_t *permutation, const uint8_t *scantable, int last);
 
 static inline void ff_update_block_index(MpegEncContext *s){
-    const int block_size = 8;
+    const int block_size= 8 >> s->avctx->lowres;
 
     s->block_index[0]+=2;
     s->block_index[1]+=2;
@@ -840,4 +872,5 @@ int ff_mpeg_ref_picture(MpegEncContext *s, Picture *dst, Picture *src);
 void ff_mpeg_unref_picture(MpegEncContext *s, Picture *picture);
 void ff_free_picture_tables(Picture *pic);
 
+
 #endif /* AVCODEC_MPEGVIDEO_H */
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 2d0cd83..eefde7a 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -5,20 +5,20 @@
  *
  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -57,11 +57,12 @@
 #include "internal.h"
 #include "bytestream.h"
 #include <limits.h>
+#include "sp5x.h"
 
 #define QUANT_BIAS_SHIFT 8
 
 #define QMAT_SHIFT_MMX 16
-#define QMAT_SHIFT 22
+#define QMAT_SHIFT 21
 
 static int encode_picture(MpegEncContext *s, int picture_number);
 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
@@ -112,8 +113,7 @@ void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
                  *           3444240 >= (1 << 36) / (x) >= 275 */
 
                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
-                                        (ff_aanscales[i] * qscale *
-                                         quant_matrix[j]));
+                                        (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
             }
         } else {
             for (i = 0; i < 64; i++) {
@@ -233,6 +233,23 @@ static void MPV_encode_defaults(MpegEncContext *s)
     s->picture_in_gop_number = 0;
 }
 
+av_cold int ff_dct_encode_init(MpegEncContext *s) {
+    if (ARCH_X86)
+        ff_dct_encode_init_x86(s);
+
+    if (CONFIG_H263_ENCODER)
+        ff_h263dsp_init(&s->h263dsp);
+    if (!s->dct_quantize)
+        s->dct_quantize = ff_dct_quantize_c;
+    if (!s->denoise_dct)
+        s->denoise_dct  = denoise_dct_c;
+    s->fast_dct_quantize = s->dct_quantize;
+    if (s->avctx->trellis)
+        s->dct_quantize  = dct_quantize_trellis_c;
+
+    return 0;
+}
+
 /* init video encoder */
 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 {
@@ -251,18 +268,22 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
         }
         break;
     case AV_CODEC_ID_MJPEG:
+    case AV_CODEC_ID_AMV:
         format_supported = 0;
         /* JPEG color space */
         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
+            avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
             (avctx->color_range == AVCOL_RANGE_JPEG &&
              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
-              avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
+              avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
+              avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
             format_supported = 1;
         /* MPEG color space */
         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
-                  avctx->pix_fmt == AV_PIX_FMT_YUV422P))
+                  avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
+                  avctx->pix_fmt == AV_PIX_FMT_YUV444P))
             format_supported = 1;
 
         if (!format_supported) {
@@ -278,6 +299,10 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     }
 
     switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_YUVJ444P:
+    case AV_PIX_FMT_YUV444P:
+        s->chroma_format = CHROMA_444;
+        break;
     case AV_PIX_FMT_YUVJ422P:
     case AV_PIX_FMT_YUV422P:
         s->chroma_format = CHROMA_422;
@@ -294,8 +319,9 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     s->height   = avctx->height;
     if (avctx->gop_size > 600 &&
         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Warning keyframe interval too large! reducing it ...\n");
+        av_log(avctx, AV_LOG_WARNING,
+               "keyframe interval too large!, reducing it from %d to %d\n",
+               avctx->gop_size, 600);
         avctx->gop_size = 600;
     }
     s->gop_size     = avctx->gop_size;
@@ -305,6 +331,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     if (avctx->max_b_frames > MAX_B_FRAMES) {
         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
                "is %d.\n", MAX_B_FRAMES);
+        avctx->max_b_frames = MAX_B_FRAMES;
     }
     s->max_b_frames = avctx->max_b_frames;
     s->codec_id     = avctx->codec->id;
@@ -313,6 +340,24 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     s->mpeg_quant         = avctx->mpeg_quant;
     s->rtp_mode           = !!avctx->rtp_payload_size;
     s->intra_dc_precision = avctx->intra_dc_precision;
+
+    // workaround some differences between how applications specify dc precission
+    if (s->intra_dc_precision < 0) {
+        s->intra_dc_precision += 8;
+    } else if (s->intra_dc_precision >= 8)
+        s->intra_dc_precision -= 8;
+
+    if (s->intra_dc_precision < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+                "intra dc precision must be positive, note some applications use"
+                " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
+        av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
+        return AVERROR(EINVAL);
+    }
     s->user_specified_pts = AV_NOPTS_VALUE;
 
     if (s->gop_size <= 1) {
@@ -339,9 +384,33 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 
     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
-        av_log(avctx, AV_LOG_ERROR,
-               "a vbv buffer size is needed, "
-               "for encoding with a maximum bitrate\n");
+        switch(avctx->codec_id) {
+        case AV_CODEC_ID_MPEG1VIDEO:
+        case AV_CODEC_ID_MPEG2VIDEO:
+            avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
+            break;
+        case AV_CODEC_ID_MPEG4:
+        case AV_CODEC_ID_MSMPEG4V1:
+        case AV_CODEC_ID_MSMPEG4V2:
+        case AV_CODEC_ID_MSMPEG4V3:
+            if       (avctx->rc_max_rate >= 15000000) {
+                avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
+            } else if(avctx->rc_max_rate >=  2000000) {
+                avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
+            } else if(avctx->rc_max_rate >=   384000) {
+                avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
+            } else
+                avctx->rc_buffer_size = 40;
+            avctx->rc_buffer_size *= 16384;
+            break;
+        }
+        if (avctx->rc_buffer_size) {
+            av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
+        }
+    }
+
+    if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
+        av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
         return -1;
     }
 
@@ -356,7 +425,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     }
 
     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
-        av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
+        av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
         return -1;
     }
 
@@ -377,9 +446,9 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     if (!s->fixed_qscale &&
         avctx->bit_rate * av_q2d(avctx->time_base) >
             avctx->bit_rate_tolerance) {
-        av_log(avctx, AV_LOG_ERROR,
-               "bitrate tolerance too small for bitrate\n");
-        return -1;
+        av_log(avctx, AV_LOG_WARNING,
+               "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
+        avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
     }
 
     if (s->avctx->rc_max_rate &&
@@ -418,18 +487,74 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
         return -1;
     }
+    if (s->max_b_frames < 0) {
+        av_log(avctx, AV_LOG_ERROR,
+               "max b frames must be 0 or positive for mpegvideo based encoders\n");
+        return -1;
+    }
 
     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
          s->codec_id == AV_CODEC_ID_H263  ||
          s->codec_id == AV_CODEC_ID_H263P) &&
         (avctx->sample_aspect_ratio.num > 255 ||
          avctx->sample_aspect_ratio.den > 255)) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
+        av_log(avctx, AV_LOG_WARNING,
+               "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
+        av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
+                   avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
+    }
+
+    if ((s->codec_id == AV_CODEC_ID_H263  ||
+         s->codec_id == AV_CODEC_ID_H263P) &&
+        (avctx->width  > 2048 ||
+         avctx->height > 1152 )) {
+        av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
+        return -1;
+    }
+    if ((s->codec_id == AV_CODEC_ID_H263  ||
+         s->codec_id == AV_CODEC_ID_H263P) &&
+        ((avctx->width &3) ||
+         (avctx->height&3) )) {
+        av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
+        return -1;
+    }
+
+    if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
+        (avctx->width  > 4095 ||
+         avctx->height > 4095 )) {
+        av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
+        return -1;
+    }
+
+    if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
+        (avctx->width  > 16383 ||
+         avctx->height > 16383 )) {
+        av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
         return -1;
     }
 
+    if (s->codec_id == AV_CODEC_ID_RV10 &&
+        (avctx->width &15 ||
+         avctx->height&15 )) {
+        av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (s->codec_id == AV_CODEC_ID_RV20 &&
+        (avctx->width &3 ||
+         avctx->height&3 )) {
+        av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((s->codec_id == AV_CODEC_ID_WMV1 ||
+         s->codec_id == AV_CODEC_ID_WMV2) &&
+         avctx->width & 1) {
+         av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
+         return -1;
+    }
+
     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
@@ -437,7 +562,8 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     }
 
     // FIXME mpeg2 uses that too
-    if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
+    if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
+                          && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
         av_log(avctx, AV_LOG_ERROR,
                "mpeg2 style quantization not supported by codec\n");
         return -1;
@@ -487,6 +613,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
         s->codec_id != AV_CODEC_ID_MPEG4      &&
         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
+        s->codec_id != AV_CODEC_ID_MJPEG      &&
         (s->codec_id != AV_CODEC_ID_H263P)) {
         av_log(avctx, AV_LOG_ERROR,
                "multi threaded encoding not supported by codec\n");
@@ -495,14 +622,17 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 
     if (s->avctx->thread_count < 1) {
         av_log(avctx, AV_LOG_ERROR,
-               "automatic thread number detection not supported by codec,"
+               "automatic thread number detection not supported by codec, "
                "patch welcome\n");
         return -1;
     }
 
-    if (s->avctx->thread_count > 1)
+    if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
         s->rtp_mode = 1;
 
+    if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
+        s->h263_slice_structured = 1;
+
     if (!avctx->time_base.den || !avctx->time_base.num) {
         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
         return -1;
@@ -529,8 +659,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
         //return -1;
     }
 
-    if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
-        s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
+    if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
         // (a + x * 3 / 8) / x
         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
         s->inter_quant_bias = 0;
@@ -540,11 +669,18 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
     }
 
+    if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
+        return AVERROR(EINVAL);
+    }
+
     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
         s->intra_quant_bias = avctx->intra_quant_bias;
     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
         s->inter_quant_bias = avctx->inter_quant_bias;
 
+    av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
+
     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
         s->avctx->time_base.den > (1 << 16) - 1) {
         av_log(avctx, AV_LOG_ERROR,
@@ -569,6 +705,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
         s->rtp_mode   = 1;
         break;
     case AV_CODEC_ID_MJPEG:
+    case AV_CODEC_ID_AMV:
         s->out_format = FMT_MJPEG;
         s->intra_only = 1; /* force intra only for jpeg */
         if (!CONFIG_MJPEG_ENCODER ||
@@ -593,13 +730,13 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
         break;
     case AV_CODEC_ID_H263:
         if (!CONFIG_H263_ENCODER)
-        return -1;
+            return -1;
         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
                              s->width, s->height) == 8) {
-            av_log(avctx, AV_LOG_INFO,
+            av_log(avctx, AV_LOG_ERROR,
                    "The specified picture size of %dx%d is not valid for "
                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
-                   "352x288, 704x576, and 1408x1152."
+                   "352x288, 704x576, and 1408x1152. "
                    "Try H.263+.\n", s->width, s->height);
             return -1;
         }
@@ -703,9 +840,6 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     if (ff_MPV_common_init(s) < 0)
         return -1;
 
-    if (ARCH_X86)
-        ff_MPV_encode_init_x86(s);
-
     ff_fdctdsp_init(&s->fdsp, avctx);
     ff_me_cmp_init(&s->mecc, avctx);
     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
@@ -722,8 +856,10 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 
     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
+    FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
@@ -735,15 +871,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
                           2 * 64 * sizeof(uint16_t), fail);
     }
 
-    if (CONFIG_H263_ENCODER)
-        ff_h263dsp_init(&s->h263dsp);
-    if (!s->dct_quantize)
-        s->dct_quantize = ff_dct_quantize_c;
-    if (!s->denoise_dct)
-        s->denoise_dct  = denoise_dct_c;
-    s->fast_dct_quantize = s->dct_quantize;
-    if (avctx->trellis)
-        s->dct_quantize  = dct_quantize_trellis_c;
+    ff_dct_encode_init(s);
 
     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
@@ -864,6 +992,10 @@ av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
     av_freep(&s->avctx->stats_out);
     av_freep(&s->ac_stats);
 
+    if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
+    if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
+    s->q_chroma_intra_matrix=   NULL;
+    s->q_chroma_intra_matrix16= NULL;
     av_freep(&s->q_intra_matrix);
     av_freep(&s->q_inter_matrix);
     av_freep(&s->q_intra_matrix16);
@@ -928,18 +1060,17 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 
         if (pts != AV_NOPTS_VALUE) {
             if (s->user_specified_pts != AV_NOPTS_VALUE) {
-                int64_t time = pts;
                 int64_t last = s->user_specified_pts;
 
-                if (time <= last) {
+                if (pts <= last) {
                     av_log(s->avctx, AV_LOG_ERROR,
-                           "Error, Invalid timestamp=%"PRId64", "
-                           "last=%"PRId64"\n", pts, s->user_specified_pts);
-                    return -1;
+                           "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
+                           pts, last);
+                    return AVERROR(EINVAL);
                 }
 
                 if (!s->low_delay && display_picture_number == 1)
-                    s->dts_delta = time - last;
+                    s->dts_delta = pts - last;
             }
             s->user_specified_pts = pts;
         } else {
@@ -956,7 +1087,7 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
     }
 
     if (pic_arg) {
-        if (!pic_arg->buf[0]);
+        if (!pic_arg->buf[0])
             direct = 0;
         if (pic_arg->linesize[0] != s->linesize)
             direct = 0;
@@ -964,8 +1095,14 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
             direct = 0;
         if (pic_arg->linesize[2] != s->uvlinesize)
             direct = 0;
+        if ((s->width & 15) || (s->height & 15))
+            direct = 0;
+        if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
+            direct = 0;
+        if (s->linesize & (STRIDE_ALIGN-1))
+            direct = 0;
 
-        av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
+        av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
 
         if (direct) {
@@ -1012,6 +1149,12 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
                     int h = s->height >> v_shift;
                     uint8_t *src = pic_arg->data[i];
                     uint8_t *dst = pic->f->data[i];
+                    int vpad = 16;
+
+                    if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
+                        && !s->progressive_sequence
+                        && FFALIGN(s->height, 32) - s->height > 16)
+                        vpad = 32;
 
                     if (!s->avctx->rc_buffer_size)
                         dst += INPLACE_OFFSET;
@@ -1019,12 +1162,21 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
                     if (src_stride == dst_stride)
                         memcpy(dst, src, src_stride * h);
                     else {
-                        while (h--) {
-                            memcpy(dst, src, w);
-                            dst += dst_stride;
+                        int h2 = h;
+                        uint8_t *dst2 = dst;
+                        while (h2--) {
+                            memcpy(dst2, src, w);
+                            dst2 += dst_stride;
                             src += src_stride;
                         }
                     }
+                    if ((s->width & 15) || (s->height & (vpad-1))) {
+                        s->mpvencdsp.draw_edges(dst, dst_stride,
+                                                w, h,
+                                                16>>h_shift,
+                                                vpad>>v_shift,
+                                                EDGE_BOTTOM);
+                    }
                 }
             }
         }
@@ -1061,19 +1213,23 @@ static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
 
-                switch (s->avctx->frame_skip_exp) {
+                switch (FFABS(s->avctx->frame_skip_exp)) {
                 case 0: score    =  FFMAX(score, v);          break;
                 case 1: score   += FFABS(v);                  break;
-                case 2: score   += v * v;                     break;
-                case 3: score64 += FFABS(v * v * (int64_t)v); break;
-                case 4: score64 += v * v * (int64_t)(v * v);  break;
+                case 2: score64 += v * (int64_t)v;                       break;
+                case 3: score64 += FFABS(v * (int64_t)v * v);            break;
+                case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
                 }
             }
         }
     }
+    emms_c();
 
     if (score)
         score64 = score;
+    if (s->avctx->frame_skip_exp < 0)
+        score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
+                      -1.0/s->avctx->frame_skip_exp);
 
     if (score64 < s->avctx->frame_skip_threshold)
         return 1;
@@ -1106,7 +1262,7 @@ static int estimate_best_b_count(MpegEncContext *s)
     int64_t best_rd  = INT64_MAX;
     int best_b_count = -1;
 
-    assert(scale >= 0 && scale <= 3);
+    av_assert0(scale >= 0 && scale <= 3);
 
     //emms_c();
     //s->next_picture_ptr->quality;
@@ -1136,29 +1292,31 @@ static int estimate_best_b_count(MpegEncContext *s)
     for (i = 0; i < s->max_b_frames + 2; i++) {
         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
                                                 s->next_picture_ptr;
+        uint8_t *data[4];
 
         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
             pre_input = *pre_input_ptr;
+            memcpy(data, pre_input_ptr->f->data, sizeof(data));
 
             if (!pre_input.shared && i) {
-                pre_input.f->data[0] += INPLACE_OFFSET;
-                pre_input.f->data[1] += INPLACE_OFFSET;
-                pre_input.f->data[2] += INPLACE_OFFSET;
+                data[0] += INPLACE_OFFSET;
+                data[1] += INPLACE_OFFSET;
+                data[2] += INPLACE_OFFSET;
             }
 
             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
                                        s->tmp_frames[i]->linesize[0],
-                                       pre_input.f->data[0],
+                                       data[0],
                                        pre_input.f->linesize[0],
                                        c->width, c->height);
             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
                                        s->tmp_frames[i]->linesize[1],
-                                       pre_input.f->data[1],
+                                       data[1],
                                        pre_input.f->linesize[1],
                                        c->width >> 1, c->height >> 1);
             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
                                        s->tmp_frames[i]->linesize[2],
-                                       pre_input.f->data[2],
+                                       data[2],
                                        pre_input.f->linesize[2],
                                        c->width >> 1, c->height >> 1);
         }
@@ -1221,6 +1379,19 @@ static int select_input_picture(MpegEncContext *s)
 
     /* set next picture type & ordering */
     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
+        if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
+            if (s->picture_in_gop_number < s->gop_size &&
+                s->next_picture_ptr &&
+                skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
+                // FIXME check that te gop check above is +-1 correct
+                av_frame_unref(s->input_picture[0]->f);
+
+                ff_vbv_update(s, 0);
+
+                goto no_output_pic;
+            }
+        }
+
         if (/*s->picture_in_gop_number >= s->gop_size ||*/
             s->next_picture_ptr == NULL || s->intra_only) {
             s->reordered_input_picture[0] = s->input_picture[0];
@@ -1230,19 +1401,6 @@ static int select_input_picture(MpegEncContext *s)
         } else {
             int b_frames;
 
-            if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
-                if (s->picture_in_gop_number < s->gop_size &&
-                    skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
-                    // FIXME check that te gop check above is +-1 correct
-                    av_frame_unref(s->input_picture[0]->f);
-
-                    emms_c();
-                    ff_vbv_update(s, 0);
-
-                    goto no_output_pic;
-                }
-            }
-
             if (s->flags & CODEC_FLAG_PASS2) {
                 for (i = 0; i < s->max_b_frames + 1; i++) {
                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
@@ -1391,25 +1549,26 @@ no_output_pic:
 
 static void frame_end(MpegEncContext *s)
 {
-    int i;
-
     if (s->unrestricted_mv &&
         s->current_picture.reference &&
         !s->intra_only) {
         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
         int hshift = desc->log2_chroma_w;
         int vshift = desc->log2_chroma_h;
-        s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
+        s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
+                                s->current_picture.f->linesize[0],
                                 s->h_edge_pos, s->v_edge_pos,
                                 EDGE_WIDTH, EDGE_WIDTH,
                                 EDGE_TOP | EDGE_BOTTOM);
-        s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
+        s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
+                                s->current_picture.f->linesize[1],
                                 s->h_edge_pos >> hshift,
                                 s->v_edge_pos >> vshift,
                                 EDGE_WIDTH >> hshift,
                                 EDGE_WIDTH >> vshift,
                                 EDGE_TOP | EDGE_BOTTOM);
-        s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
+        s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
+                                s->current_picture.f->linesize[2],
                                 s->h_edge_pos >> hshift,
                                 s->v_edge_pos >> vshift,
                                 EDGE_WIDTH >> hshift,
@@ -1424,14 +1583,6 @@ static void frame_end(MpegEncContext *s)
     if (s->pict_type!= AV_PICTURE_TYPE_B)
         s->last_non_b_pict_type = s->pict_type;
 
-    if (s->encoding) {
-        /* release non-reference frames */
-        for (i = 0; i < MAX_PICTURE_COUNT; i++) {
-            if (!s->picture[i].reference)
-                ff_mpeg_unref_picture(s, &s->picture[i]);
-        }
-    }
-
     s->avctx->coded_frame = s->current_picture_ptr->f;
 
 }
@@ -1522,7 +1673,7 @@ static int frame_start(MpegEncContext *s)
     }
 
     if (s->dct_error_sum) {
-        assert(s->avctx->noise_reduction && s->encoding);
+        av_assert2(s->avctx->noise_reduction && s->encoding);
         update_noise_reduction(s);
     }
 
@@ -1547,8 +1698,7 @@ int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
 
     /* output? */
     if (s->new_picture.f->data[0]) {
-        if (!pkt->data &&
-            (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
+        if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
             return ret;
         if (s->mb_info) {
             s->mb_info_ptr = av_packet_new_side_data(pkt,
@@ -1626,14 +1776,16 @@ vbv_retry:
                 goto vbv_retry;
             }
 
-            assert(s->avctx->rc_max_rate);
+            av_assert0(s->avctx->rc_max_rate);
         }
 
         if (s->flags & CODEC_FLAG_PASS1)
             ff_write_pass1_stats(s);
 
         for (i = 0; i < 4; i++) {
-            s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
+            s->current_picture_ptr->f->error[i] =
+            s->current_picture.f->error[i] =
+                s->current_picture.error[i];
             avctx->error[i] += s->current_picture_ptr->f->error[i];
         }
 
@@ -1645,6 +1797,7 @@ vbv_retry:
         s->frame_bits  = put_bits_count(&s->pb);
 
         stuffing_count = ff_vbv_update(s, s->frame_bits);
+        s->stuffing_bits = 8*stuffing_count;
         if (stuffing_count) {
             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
                     stuffing_count + 50) {
@@ -1699,7 +1852,7 @@ vbv_retry:
 
             vbv_delay = FFMAX(vbv_delay, min_delay);
 
-            assert(vbv_delay < 0xFFFF);
+            av_assert0(vbv_delay < 0xFFFF);
 
             s->vbv_delay_ptr[0] &= 0xF8;
             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
@@ -1727,7 +1880,14 @@ vbv_retry:
     } else {
         s->frame_bits = 0;
     }
-    assert((s->frame_bits & 7) == 0);
+
+    /* release non-reference frames */
+    for (i = 0; i < MAX_PICTURE_COUNT; i++) {
+        if (!s->picture[i].reference)
+            ff_mpeg_unref_picture(s, &s->picture[i]);
+    }
+
+    av_assert1((s->frame_bits & 7) == 0);
 
     pkt->size = s->frame_bits / 8;
     *got_packet = !!pkt->size;
@@ -1851,15 +2011,17 @@ static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
 static av_always_inline void encode_mb_internal(MpegEncContext *s,
                                                 int motion_x, int motion_y,
                                                 int mb_block_height,
+                                                int mb_block_width,
                                                 int mb_block_count)
 {
-    int16_t weight[8][64];
-    int16_t orig[8][64];
+    int16_t weight[12][64];
+    int16_t orig[12][64];
     const int mb_x = s->mb_x;
     const int mb_y = s->mb_y;
     int i;
-    int skip_dct[8];
+    int skip_dct[12];
     int dct_offset = s->linesize * 8; // default for progressive frames
+    int uv_dct_offset = s->uvlinesize * 8;
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     ptrdiff_t wrap_y, wrap_c;
 
@@ -1901,27 +2063,31 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
     ptr_y  = s->new_picture.f->data[0] +
              (mb_y * 16 * wrap_y)              + mb_x * 16;
     ptr_cb = s->new_picture.f->data[1] +
-             (mb_y * mb_block_height * wrap_c) + mb_x * 8;
+             (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
     ptr_cr = s->new_picture.f->data[2] +
-             (mb_y * mb_block_height * wrap_c) + mb_x * 8;
+             (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
 
-    if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
-        uint8_t *ebuf = s->edge_emu_buffer + 32;
+    if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
+        uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
+        int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
+        int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
                                  wrap_y, wrap_y,
                                  16, 16, mb_x * 16, mb_y * 16,
                                  s->width, s->height);
         ptr_y = ebuf;
-        s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
+        s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
                                  wrap_c, wrap_c,
-                                 8, mb_block_height, mb_x * 8, mb_y * 8,
-                                 s->width >> 1, s->height >> 1);
-        ptr_cb = ebuf + 18 * wrap_y;
-        s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
+                                 mb_block_width, mb_block_height,
+                                 mb_x * mb_block_width, mb_y * mb_block_height,
+                                 cw, ch);
+        ptr_cb = ebuf + 16 * wrap_y;
+        s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
                                  wrap_c, wrap_c,
-                                 8, mb_block_height, mb_x * 8, mb_y * 8,
-                                 s->width >> 1, s->height >> 1);
-        ptr_cr = ebuf + 18 * wrap_y + 8;
+                                 mb_block_width, mb_block_height,
+                                 mb_x * mb_block_width, mb_y * mb_block_height,
+                                 cw, ch);
+        ptr_cr = ebuf + 16 * wrap_y + 16;
     }
 
     if (s->mb_intra) {
@@ -1942,8 +2108,10 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
                     s->interlaced_dct = 1;
 
                     dct_offset = wrap_y;
+                    uv_dct_offset = wrap_c;
                     wrap_y <<= 1;
-                    if (s->chroma_format == CHROMA_422)
+                    if (s->chroma_format == CHROMA_422 ||
+                        s->chroma_format == CHROMA_444)
                         wrap_c <<= 1;
                 }
             }
@@ -1960,11 +2128,16 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
         } else {
             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
-            if (!s->chroma_y_shift) { /* 422 */
-                s->pdsp.get_pixels(s->block[6],
-                                   ptr_cb + (dct_offset >> 1), wrap_c);
-                s->pdsp.get_pixels(s->block[7],
-                                   ptr_cr + (dct_offset >> 1), wrap_c);
+            if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
+                s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
+                s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
+            } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
+                s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
+                s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
+                s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
+                s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
+                s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
+                s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
             }
         }
     } else {
@@ -2020,6 +2193,7 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
                     s->interlaced_dct = 1;
 
                     dct_offset = wrap_y;
+                    uv_dct_offset = wrap_c;
                     wrap_y <<= 1;
                     if (s->chroma_format == CHROMA_422)
                         wrap_c <<= 1;
@@ -2041,10 +2215,10 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
             if (!s->chroma_y_shift) { /* 422 */
-                s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
-                                    dest_cb + (dct_offset >> 1), wrap_c);
-                s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
-                                    dest_cr + (dct_offset >> 1), wrap_c);
+                s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
+                                    dest_cb + uv_dct_offset, wrap_c);
+                s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
+                                    dest_cr + uv_dct_offset, wrap_c);
             }
         }
         /* pre quantization */
@@ -2066,12 +2240,12 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
                 skip_dct[5] = 1;
             if (!s->chroma_y_shift) { /* 422 */
-                if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
-                                   dest_cb + (dct_offset >> 1),
+                if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
+                                   dest_cb + uv_dct_offset,
                                    wrap_c, 8) < 20 * s->qscale)
                     skip_dct[6] = 1;
-                if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
-                                   dest_cr + (dct_offset >> 1),
+                if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
+                                   dest_cr + uv_dct_offset,
                                    wrap_c, 8) < 20 * s->qscale)
                     skip_dct[7] = 1;
             }
@@ -2093,17 +2267,17 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
             get_visual_weight(weight[5], ptr_cr                , wrap_c);
         if (!s->chroma_y_shift) { /* 422 */
             if (!skip_dct[6])
-                get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
+                get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
                                   wrap_c);
             if (!skip_dct[7])
-                get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
+                get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
                                   wrap_c);
         }
         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
     }
 
     /* DCT & quantize */
-    assert(s->out_format != FMT_MJPEG || s->qscale == 8);
+    av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
     {
         for (i = 0; i < mb_block_count; i++) {
             if (!skip_dct[i]) {
@@ -2149,6 +2323,12 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
         s->block_last_index[5] = 0;
         s->block[4][0] =
         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
+        if (!s->chroma_y_shift) { /* 422 / 444 */
+            for (i=6; i<12; i++) {
+                s->block_last_index[i] = 0;
+                s->block[i][0] = s->block[4][0];
+            }
+        }
     }
 
     // non c quantize code returns incorrect block_last_index FIXME
@@ -2199,18 +2379,20 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
         break;
     case AV_CODEC_ID_MJPEG:
+    case AV_CODEC_ID_AMV:
         if (CONFIG_MJPEG_ENCODER)
             ff_mjpeg_encode_mb(s, s->block);
         break;
     default:
-        assert(0);
+        av_assert1(0);
     }
 }
 
 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 {
-    if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
-    else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
+    if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
+    else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
+    else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
 }
 
 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
@@ -2301,7 +2483,7 @@ static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegE
         s->dest[0] = s->rd_scratchpad;
         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
-        assert(s->linesize >= 32); //FIXME
+        av_assert0(s->linesize >= 32); //FIXME
     }
 
     encode_mb(s, motion_x, motion_y);
@@ -2347,7 +2529,7 @@ static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, in
         }
     }
 
-    assert(acc>=0);
+    av_assert2(acc>=0);
 
     return acc;
 }
@@ -2397,6 +2579,8 @@ static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
 static int estimate_motion_thread(AVCodecContext *c, void *arg){
     MpegEncContext *s= *(void**)arg;
 
+    ff_check_alignment();
+
     s->me.dia_size= s->avctx->dia_size;
     s->first_slice_line=1;
     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
@@ -2423,6 +2607,8 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
     MpegEncContext *s= *(void**)arg;
     int mb_x, mb_y;
 
+    ff_check_alignment();
+
     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
             int xx = mb_x * 16;
@@ -2450,7 +2636,7 @@ static void write_slice_end(MpegEncContext *s){
 
         ff_mpeg4_stuffing(&s->pb);
     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
-        ff_mjpeg_encode_stuffing(&s->pb);
+        ff_mjpeg_encode_stuffing(s);
     }
 
     avpriv_align_put_bits(&s->pb);
@@ -2514,6 +2700,8 @@ static int encode_thread(AVCodecContext *c, void *arg){
     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
     PutBitContext pb[2], pb2[2], tex_pb[2];
 
+    ff_check_alignment();
+
     for(i=0; i<2; i++){
         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
@@ -2535,7 +2723,12 @@ static int encode_thread(AVCodecContext *c, void *arg){
         /* note: quant matrix value (8) is implied here */
         s->last_dc[i] = 128 << s->intra_dc_precision;
 
-        s->current_picture.f->error[i] = 0;
+        s->current_picture.error[i] = 0;
+    }
+    if(s->codec_id==AV_CODEC_ID_AMV){
+        s->last_dc[0] = 128*8/13;
+        s->last_dc[1] = 128*8/14;
+        s->last_dc[2] = 128*8/14;
     }
     s->mb_skip_run = 0;
     memset(s->last_mv, 0, sizeof(s->last_mv));
@@ -2580,7 +2773,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
             if(s->data_partitioning){
                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
-                    av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                    av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
                     return -1;
                 }
             }
@@ -2616,6 +2809,9 @@ static int encode_thread(AVCodecContext *c, void *arg){
                 case AV_CODEC_ID_MPEG1VIDEO:
                     if(s->mb_skip_run) is_gob_start=0;
                     break;
+                case AV_CODEC_ID_MJPEG:
+                    if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
+                    break;
                 }
 
                 if(is_gob_start){
@@ -2627,7 +2823,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         }
                     }
 
-                    assert((put_bits_count(&s->pb)&7) == 0);
+                    av_assert2((put_bits_count(&s->pb)&7) == 0);
                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
 
                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
@@ -2834,8 +3030,9 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         int16_t ac[6][16];
                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
                         static const int dquant_tab[4]={-1,1,-2,2};
+                        int storecoefs = s->mb_intra && s->dc_val[0];
 
-                        assert(backup_s.dquant == 0);
+                        av_assert2(backup_s.dquant == 0);
 
                         //FIXME intra
                         s->mv_dir= best_s.mv_dir;
@@ -2853,7 +3050,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
                                 continue;
                             backup_s.dquant= dquant;
-                            if(s->mb_intra && s->dc_val[0]){
+                            if(storecoefs){
                                 for(i=0; i<6; i++){
                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
@@ -2863,7 +3060,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
                             if(best_s.qscale != qp){
-                                if(s->mb_intra && s->dc_val[0]){
+                                if(storecoefs){
                                     for(i=0; i<6; i++){
                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
@@ -3093,13 +3290,13 @@ static int encode_thread(AVCodecContext *c, void *arg){
                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
 
-                s->current_picture.f->error[0] += sse(
+                s->current_picture.error[0] += sse(
                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                     s->dest[0], w, h, s->linesize);
-                s->current_picture.f->error[1] += sse(
+                s->current_picture.error[1] += sse(
                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
-                s->current_picture.f->error[2] += sse(
+                s->current_picture.error[2] += sse(
                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
             }
@@ -3152,9 +3349,9 @@ static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src)
     MERGE(misc_bits);
     MERGE(er.error_count);
     MERGE(padding_bug_score);
-    MERGE(current_picture.f->error[0]);
-    MERGE(current_picture.f->error[1]);
-    MERGE(current_picture.f->error[2]);
+    MERGE(current_picture.error[0]);
+    MERGE(current_picture.error[1]);
+    MERGE(current_picture.error[2]);
 
     if(dst->avctx->noise_reduction){
         for(i=0; i<64; i++){
@@ -3207,7 +3404,7 @@ static int estimate_qp(MpegEncContext *s, int dry_run){
 
 /* must be called before writing the header */
 static void set_frame_distances(MpegEncContext * s){
-    assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
+    av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
 
     if(s->pict_type==AV_PICTURE_TYPE_B){
@@ -3263,6 +3460,13 @@ static int encode_picture(MpegEncContext *s, int picture_number)
         update_qscale(s);
     }
 
+    if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
+        if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
+        if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
+        s->q_chroma_intra_matrix   = s->q_intra_matrix;
+        s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
+    }
+
     s->mb_intra=0; //for the rate distortion & bit compare functions
     for(i=1; i<context_count; i++){
         ret = ff_update_duplicate_context(s->thread_context[i], s);
@@ -3305,7 +3509,9 @@ static int encode_picture(MpegEncContext *s, int picture_number)
         s->pict_type= AV_PICTURE_TYPE_I;
         for(i=0; i<s->mb_stride*s->mb_height; i++)
             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
-        av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
+        if(s->msmpeg4_version >= 3)
+            s->no_rounding=1;
+        av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
     }
 
@@ -3370,17 +3576,50 @@ static int encode_picture(MpegEncContext *s, int picture_number)
         s->qscale= 3; //reduce clipping problems
 
     if (s->out_format == FMT_MJPEG) {
+        const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
+        const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
+
+        if (s->avctx->intra_matrix) {
+            chroma_matrix =
+            luma_matrix = s->avctx->intra_matrix;
+        }
+        if (s->avctx->chroma_intra_matrix)
+            chroma_matrix = s->avctx->chroma_intra_matrix;
+
         /* for mjpeg, we do include qscale in the matrix */
         for(i=1;i<64;i++){
             int j = s->idsp.idct_permutation[i];
 
-            s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
+            s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
+            s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
         }
         s->y_dc_scale_table=
         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
+        s->chroma_intra_matrix[0] =
         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
+        ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
+                       s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
+        s->qscale= 8;
+    }
+    if(s->codec_id == AV_CODEC_ID_AMV){
+        static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
+        static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
+        for(i=1;i<64;i++){
+            int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
+
+            s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
+            s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
+        }
+        s->y_dc_scale_table= y;
+        s->c_dc_scale_table= c;
+        s->intra_matrix[0] = 13;
+        s->chroma_intra_matrix[0] = 14;
+        ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
+                       s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
+        ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
+                       s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
         s->qscale= 8;
     }
 
@@ -3393,12 +3632,13 @@ static int encode_picture(MpegEncContext *s, int picture_number)
     if (s->current_picture.f->key_frame)
         s->picture_in_gop_number=0;
 
+    s->mb_x = s->mb_y = 0;
     s->last_bits= put_bits_count(&s->pb);
     switch(s->out_format) {
     case FMT_MJPEG:
         if (CONFIG_MJPEG_ENCODER)
             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
-                                           s->intra_matrix);
+                                           s->intra_matrix, s->chroma_intra_matrix);
         break;
     case FMT_H261:
         if (CONFIG_H261_ENCODER)
@@ -3425,7 +3665,7 @@ static int encode_picture(MpegEncContext *s, int picture_number)
             ff_mpeg1_encode_picture_header(s, picture_number);
         break;
     default:
-        assert(0);
+        av_assert0(0);
     }
     bits= put_bits_count(&s->pb);
     s->header_bits= bits - s->last_bits;
@@ -3516,7 +3756,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
         block[0] = (block[0] + (q >> 1)) / q;
         start_i = 1;
         last_non_zero = 0;
-        qmat = s->q_intra_matrix[qscale];
+        qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
             bias= 1<<(QMAT_SHIFT-1);
         length     = s->intra_ac_vlc_length;
@@ -3562,7 +3802,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
 //                coeff[2][k]= -level+2;
             }
             coeff_count[i]= FFMIN(level, 2);
-            assert(coeff_count[i]);
+            av_assert2(coeff_count[i]);
             max |=level;
         }else{
             coeff[0][i]= (level>>31)|1;
@@ -3596,9 +3836,9 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             const int alevel= FFABS(level);
             int unquant_coeff;
 
-            assert(level);
+            av_assert2(level);
 
-            if(s->out_format == FMT_H263){
+            if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
                 unquant_coeff= alevel*qmul + qadd;
             }else{ //MPEG1
                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
@@ -3627,7 +3867,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
                     }
                 }
 
-                if(s->out_format == FMT_H263){
+                if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
                     for(j=survivor_count-1; j>=0; j--){
                         int run= i - survivor[j];
                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
@@ -3653,7 +3893,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
                     }
                 }
 
-                if(s->out_format == FMT_H263){
+                if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
                   for(j=survivor_count-1; j>=0; j--){
                         int run= i - survivor[j];
                         int score= distortion + score_tab[i-run];
@@ -3686,7 +3926,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
         survivor[ survivor_count++ ]= i+1;
     }
 
-    if(s->out_format != FMT_H263){
+    if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
         last_score= 256*256*256*120;
         for(i= survivor[0]; i<=last_non_zero + 1; i++){
             int score= score_tab[i];
@@ -3719,7 +3959,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             int alevel= FFABS(level);
             int unquant_coeff, score, distortion;
 
-            if(s->out_format == FMT_H263){
+            if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
                     unquant_coeff= (alevel*qmul + qadd)>>3;
             }else{ //MPEG1
                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
@@ -3745,7 +3985,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     }
 
     i= last_i;
-    assert(last_level);
+    av_assert2(last_level);
 
     block[ perm_scantable[last_non_zero] ]= last_level;
     i -= last_run + 1;
@@ -3861,8 +4101,8 @@ STOP_TIMER("memset rem[]")}
         weight[i] = w;
 //        w=weight[i] = (63*qns + (w/2)) / w;
 
-        assert(w>0);
-        assert(w<(1<<6));
+        av_assert2(w>0);
+        av_assert2(w<(1<<6));
         sum += w*w;
     }
     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
@@ -3928,7 +4168,7 @@ STOP_TIMER("dct")}
             const int level= block[0];
             int change, old_coeff;
 
-            assert(s->mb_intra);
+            av_assert2(s->mb_intra);
 
             old_coeff= q*level;
 
@@ -3972,7 +4212,7 @@ STOP_TIMER("dct")}
             }else{
                 old_coeff=0;
                 run2--;
-                assert(run2>=0 || i >= last_non_zero );
+                av_assert2(run2>=0 || i >= last_non_zero );
             }
 
             for(change=-1; change<=1; change+=2){
@@ -4000,7 +4240,7 @@ STOP_TIMER("dct")}
                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
                         }
                     }else{
-                        assert(FFABS(new_level)==1);
+                        av_assert2(FFABS(new_level)==1);
 
                         if(analyze_gradient){
                             int g= d1[ scantable[i] ];
@@ -4033,7 +4273,7 @@ STOP_TIMER("dct")}
                     }
                 }else{
                     new_coeff=0;
-                    assert(FFABS(level)==1);
+                    av_assert2(FFABS(level)==1);
 
                     if(i < last_non_zero){
                         int next_i= i + run2 + 1;
@@ -4062,7 +4302,7 @@ STOP_TIMER("dct")}
                 score *= lambda;
 
                 unquant_change= new_coeff - old_coeff;
-                assert((score < 100*lambda && score > -100*lambda) || lambda==0);
+                av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
 
                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
                                                    unquant_change);
@@ -4094,7 +4334,7 @@ STOP_TIMER("iterative step")}
 
             if(best_coeff > last_non_zero){
                 last_non_zero= best_coeff;
-                assert(block[j]);
+                av_assert2(block[j]);
 #ifdef REFINE_STATS
 after_last++;
 #endif
@@ -4122,7 +4362,7 @@ if(block[j]){
 #ifdef REFINE_STATS
 count++;
 if(256*256*256*64 % count == 0){
-    printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
+    av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
 }
 #endif
             run=0;
@@ -4185,7 +4425,7 @@ int ff_dct_quantize_c(MpegEncContext *s,
         block[0] = (block[0] + (q >> 1)) / q;
         start_i = 1;
         last_non_zero = 0;
-        qmat = s->q_intra_matrix[qscale];
+        qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
     } else {
         start_i = 0;
diff --git a/libavcodec/mpegvideo_motion.c b/libavcodec/mpegvideo_motion.c
index b399db8..0d3ba08 100644
--- a/libavcodec/mpegvideo_motion.c
+++ b/libavcodec/mpegvideo_motion.c
@@ -4,25 +4,26 @@
  *
  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <string.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/internal.h"
 #include "avcodec.h"
 #include "h261.h"
@@ -209,18 +210,16 @@ static inline int hpel_motion(MpegEncContext *s,
         dxy |= (motion_y & 1) << 1;
     src += src_y * s->linesize + src_x;
 
-    if (s->unrestricted_mv) {
         if ((unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x & 1) - 8, 0) ||
             (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y & 1) - 8, 0)) {
             s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
                                      s->linesize, s->linesize,
                                      9, 9,
-                                     src_x, src_y, s->h_edge_pos,
-                                     s->v_edge_pos);
+                                     src_x, src_y,
+                                     s->h_edge_pos, s->v_edge_pos);
             src = s->edge_emu_buffer;
             emu = 1;
         }
-    }
     pix_op[dxy](dest, src, s->linesize, 8);
     return emu;
 }
@@ -308,7 +307,7 @@ void mpeg_motion_internal(MpegEncContext *s,
     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
 
     if ((unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x & 1) - 16, 0) ||
-        (unsigned)src_y > FFMAX(v_edge_pos - (motion_y & 1) - h, 0)) {
+        (unsigned)src_y > FFMAX(   v_edge_pos - (motion_y & 1) - h , 0)) {
         if (is_mpeg12 ||
             s->codec_id == AV_CODEC_ID_MPEG2VIDEO ||
             s->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
@@ -324,19 +323,20 @@ void mpeg_motion_internal(MpegEncContext *s,
                                  s->h_edge_pos, s->v_edge_pos);
         ptr_y = s->edge_emu_buffer;
         if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
-            uint8_t *uvbuf = s->edge_emu_buffer + 18 * s->linesize;
-            s->vdsp.emulated_edge_mc(uvbuf, ptr_cb,
+            uint8_t *ubuf = s->edge_emu_buffer + 18 * s->linesize;
+            uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
+            s->vdsp.emulated_edge_mc(ubuf, ptr_cb,
                                      s->uvlinesize, s->uvlinesize,
                                      9, 9 + field_based,
                                      uvsrc_x, uvsrc_y << field_based,
                                      s->h_edge_pos >> 1, s->v_edge_pos >> 1);
-            s->vdsp.emulated_edge_mc(uvbuf + 16, ptr_cr,
+            s->vdsp.emulated_edge_mc(vbuf, ptr_cr,
                                      s->uvlinesize, s->uvlinesize,
                                      9, 9 + field_based,
                                      uvsrc_x, uvsrc_y << field_based,
                                      s->h_edge_pos >> 1, s->v_edge_pos >> 1);
-            ptr_cb = uvbuf;
-            ptr_cr = uvbuf + 16;
+            ptr_cb = ubuf;
+            ptr_cr = vbuf;
         }
     }
 
@@ -394,7 +394,7 @@ static void mpeg_motion_field(MpegEncContext *s, uint8_t *dest_y,
                               int motion_x, int motion_y, int h, int mb_y)
 {
 #if !CONFIG_SMALL
-    if(s->out_format == FMT_MPEG1)
+    if (s->out_format == FMT_MPEG1)
         mpeg_motion_internal(s, dest_y, dest_cb, dest_cr, 1,
                              bottom_field, field_select, ref_picture, pix_op,
                              motion_x, motion_y, h, 1, mb_y);
@@ -469,7 +469,7 @@ static inline void obmc_motion(MpegEncContext *s,
     int i;
     uint8_t *ptr[5];
 
-    assert(s->quarter_sample == 0);
+    av_assert2(s->quarter_sample == 0);
 
     for (i = 0; i < 5; i++) {
         if (i && mv[i][0] == mv[MID][0] && mv[i][1] == mv[MID][1]) {
@@ -537,7 +537,7 @@ static inline void qpel_motion(MpegEncContext *s,
     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
 
     if ((unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x & 3) - 16, 0) ||
-        (unsigned)src_y > FFMAX(v_edge_pos - (motion_y & 3) - h, 0)) {
+        (unsigned)src_y > FFMAX(   v_edge_pos - (motion_y & 3) - h, 0)) {
         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ptr_y,
                                  s->linesize, s->linesize,
                                  17, 17 + field_based,
@@ -545,19 +545,20 @@ static inline void qpel_motion(MpegEncContext *s,
                                  s->h_edge_pos, s->v_edge_pos);
         ptr_y = s->edge_emu_buffer;
         if (!CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
-            uint8_t *uvbuf = s->edge_emu_buffer + 18 * s->linesize;
-            s->vdsp.emulated_edge_mc(uvbuf, ptr_cb,
+            uint8_t *ubuf = s->edge_emu_buffer + 18 * s->linesize;
+            uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
+            s->vdsp.emulated_edge_mc(ubuf, ptr_cb,
                                      s->uvlinesize, s->uvlinesize,
                                      9, 9 + field_based,
                                      uvsrc_x, uvsrc_y << field_based,
                                      s->h_edge_pos >> 1, s->v_edge_pos >> 1);
-            s->vdsp.emulated_edge_mc(uvbuf + 16, ptr_cr,
+            s->vdsp.emulated_edge_mc(vbuf, ptr_cr,
                                      s->uvlinesize, s->uvlinesize,
                                      9, 9 + field_based,
                                      uvsrc_x, uvsrc_y << field_based,
                                      s->h_edge_pos >> 1, s->v_edge_pos >> 1);
-            ptr_cb = uvbuf;
-            ptr_cr = uvbuf + 16;
+            ptr_cb = ubuf;
+            ptr_cr = vbuf;
         }
     }
 
@@ -671,7 +672,7 @@ static inline void apply_obmc(MpegEncContext *s,
     const int mot_xy     = mb_x * 2 + mb_y * 2 * mot_stride;
     int mx, my, i;
 
-    assert(!s->mb_skipped);
+    av_assert2(!s->mb_skipped);
 
     AV_COPY32(mv_cache[1][1], cur_frame->motion_val[0][mot_xy]);
     AV_COPY32(mv_cache[1][2], cur_frame->motion_val[0][mot_xy + 1]);
@@ -898,8 +899,8 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s,
                                   s->mv[dir][1][0], s->mv[dir][1][1], 8, mb_y);
             }
         } else {
-            if (s->picture_structure != s->field_select[dir][0] + 1 &&
-                s->pict_type != AV_PICTURE_TYPE_B && !s->first_field) {
+            if (   s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != AV_PICTURE_TYPE_B && !s->first_field
+                || !ref_picture[0]) {
                 ref_picture = s->current_picture_ptr->f->data;
             }
 
@@ -913,8 +914,8 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s,
         for (i = 0; i < 2; i++) {
             uint8_t **ref2picture;
 
-            if (s->picture_structure == s->field_select[dir][i] + 1
-                || s->pict_type == AV_PICTURE_TYPE_B || s->first_field) {
+            if ((s->picture_structure == s->field_select[dir][i] + 1
+                || s->pict_type == AV_PICTURE_TYPE_B || s->first_field) && ref_picture[0]) {
                 ref2picture = ref_picture;
             } else {
                 ref2picture = s->current_picture_ptr->f->data;
@@ -943,6 +944,9 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s,
                 pix_op = s->hdsp.avg_pixels_tab;
             }
         } else {
+            if (!ref_picture[0]) {
+                ref_picture = s->current_picture_ptr->f->data;
+            }
             for (i = 0; i < 2; i++) {
                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
                             s->picture_structure != i + 1,
@@ -961,7 +965,7 @@ static av_always_inline void MPV_motion_internal(MpegEncContext *s,
             }
         }
         break;
-    default: assert(0);
+    default: av_assert2(0);
     }
 }
 
diff --git a/libavcodec/mpegvideo_parser.c b/libavcodec/mpegvideo_parser.c
index bec1b36..7aa3660 100644
--- a/libavcodec/mpegvideo_parser.c
+++ b/libavcodec/mpegvideo_parser.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -44,6 +44,8 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
     int top_field_first, repeat_first_field, progressive_frame;
     int horiz_size_ext, vert_size_ext, bit_rate_ext;
     int did_set_size=0;
+    int bit_rate = 0;
+    int vbv_delay = 0;
 //FIXME replace the crap with get_bits()
     s->repeat_pict = 0;
 
@@ -55,6 +57,8 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
         case PICTURE_START_CODE:
             if (bytes_left >= 2) {
                 s->pict_type = (buf[1] >> 3) & 7;
+                if (bytes_left >= 4)
+                vbv_delay = ((buf[1] & 0x07) << 13) | (buf[2] << 5) | (buf[3]  >> 3);
             }
             break;
         case SEQ_START_CODE:
@@ -68,7 +72,7 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
                 frame_rate_index = buf[3] & 0xf;
                 pc->frame_rate.den = avctx->time_base.den = ff_mpeg12_frame_rate_tab[frame_rate_index].num;
                 pc->frame_rate.num = avctx->time_base.num = ff_mpeg12_frame_rate_tab[frame_rate_index].den;
-                avctx->bit_rate = ((buf[4]<<10) | (buf[5]<<2) | (buf[6]>>6))*400;
+                bit_rate = (buf[4]<<10) | (buf[5]<<2) | (buf[6]>>6);
                 avctx->codec_id = AV_CODEC_ID_MPEG1VIDEO;
             }
             break;
@@ -88,7 +92,7 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
 
                         pc->width  |=(horiz_size_ext << 12);
                         pc->height |=( vert_size_ext << 12);
-                        avctx->bit_rate += (bit_rate_ext << 18) * 400;
+                        bit_rate = (bit_rate&0x3FFFF) | (bit_rate_ext << 18);
                         if(did_set_size)
                             ff_set_dimensions(avctx, pc->width, pc->height);
                         avctx->time_base.den = pc->frame_rate.den * (frame_rate_ext_n + 1) * 2;
@@ -139,6 +143,12 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
         }
     }
  the_end: ;
+    if (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO && bit_rate) {
+        avctx->rc_max_rate = 400*bit_rate;
+    } else if (avctx->codec_id == AV_CODEC_ID_MPEG1VIDEO && bit_rate &&
+               (bit_rate != 0x3FFFF || vbv_delay != 0xFFFF)) {
+        avctx->bit_rate = 400*bit_rate;
+    }
 }
 
 static int mpegvideo_parse(AVCodecParserContext *s,
@@ -179,18 +189,28 @@ static int mpegvideo_split(AVCodecContext *avctx,
 {
     int i;
     uint32_t state= -1;
+    int found=0;
 
     for(i=0; i<buf_size; i++){
         state= (state<<8) | buf[i];
-        if(state != 0x1B3 && state != 0x1B5 && state < 0x200 && state >= 0x100)
+        if(state == 0x1B3){
+            found=1;
+        }else if(found && state != 0x1B5 && state < 0x200 && state >= 0x100)
             return i-3;
     }
     return 0;
 }
 
+static int mpegvideo_parse_init(AVCodecParserContext *s)
+{
+    s->pict_type = AV_PICTURE_TYPE_NONE; // first frame might be partial
+    return 0;
+}
+
 AVCodecParser ff_mpegvideo_parser = {
     .codec_ids      = { AV_CODEC_ID_MPEG1VIDEO, AV_CODEC_ID_MPEG2VIDEO },
     .priv_data_size = sizeof(struct MpvParseContext),
+    .parser_init    = mpegvideo_parse_init,
     .parser_parse   = mpegvideo_parse,
     .parser_close   = ff_parse_close,
     .split          = mpegvideo_split,
diff --git a/libavcodec/mpegvideo_xvmc.c b/libavcodec/mpegvideo_xvmc.c
index a8e068b..7d7f7ec 100644
--- a/libavcodec/mpegvideo_xvmc.c
+++ b/libavcodec/mpegvideo_xvmc.c
@@ -2,20 +2,20 @@
  * XVideo Motion Compensation
  * Copyright (c) 2003 Ivan Kalvachev
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,8 +33,6 @@
 #include "xvmc_internal.h"
 #include "version.h"
 
-#if FF_API_XVMC
-
 /**
  * Initialize the block field of the MpegEncContext pointer passed as
  * parameter after making sure that the data is not corrupted.
@@ -50,6 +48,15 @@ void ff_xvmc_init_block(MpegEncContext *s)
     s->block = (int16_t (*)[64])(render->data_blocks + render->next_free_data_block_num * 64);
 }
 
+static void exchange_uv(MpegEncContext *s)
+{
+    int16_t (*tmp)[64];
+
+    tmp           = s->pblocks[4];
+    s->pblocks[4] = s->pblocks[5];
+    s->pblocks[5] = tmp;
+}
+
 /**
  * Fill individual block pointers, so there are no gaps in the data_block array
  * in case not all blocks in the macroblock are coded.
@@ -67,6 +74,9 @@ void ff_xvmc_pack_pblocks(MpegEncContext *s, int cbp)
             s->pblocks[i] = NULL;
         cbp += cbp;
     }
+    if (s->swap_uv) {
+        exchange_uv(s);
+    }
 }
 
 /**
@@ -74,8 +84,9 @@ void ff_xvmc_pack_pblocks(MpegEncContext *s, int cbp)
  * This function should be called for every new field and/or frame.
  * It should be safe to call the function a few times for the same field.
  */
-int ff_xvmc_field_start(MpegEncContext *s, AVCodecContext *avctx)
+static int ff_xvmc_field_start(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size)
 {
+    struct MpegEncContext *s = avctx->priv_data;
     struct xvmc_pix_fmt *last, *next, *render = (struct xvmc_pix_fmt*)s->current_picture.f->data[2];
     const int mb_block_count = 4 + (1 << s->chroma_format);
 
@@ -142,20 +153,22 @@ return -1;
  * some leftover blocks, for example from error_resilience(), may remain.
  * It should be safe to call the function a few times for the same field.
  */
-void ff_xvmc_field_end(MpegEncContext *s)
+static int ff_xvmc_field_end(AVCodecContext *avctx)
 {
+    struct MpegEncContext *s = avctx->priv_data;
     struct xvmc_pix_fmt *render = (struct xvmc_pix_fmt*)s->current_picture.f->data[2];
     assert(render);
 
     if (render->filled_mv_blocks_num > 0)
         ff_mpeg_draw_horiz_band(s, 0, 0);
+    return 0;
 }
 
 /**
  * Synthesize the data needed by XvMC to render one macroblock of data.
  * Fill all relevant fields, if necessary do IDCT.
  */
-void ff_xvmc_decode_mb(MpegEncContext *s)
+static void ff_xvmc_decode_mb(struct MpegEncContext *s)
 {
     XvMCMacroBlock *mv_block;
     struct xvmc_pix_fmt *render;
@@ -314,7 +327,7 @@ void ff_xvmc_decode_mb(MpegEncContext *s)
                  * slowdown. */
             }
             // copy blocks only if the codec doesn't support pblocks reordering
-            if (s->avctx->xvmc_acceleration == 1) {
+            if (!s->pack_pblocks) {
                 memcpy(&render->data_blocks[render->next_free_data_block_num*64],
                        s->pblocks[i], sizeof(*s->pblocks[i]));
             }
@@ -334,4 +347,30 @@ void ff_xvmc_decode_mb(MpegEncContext *s)
         ff_mpeg_draw_horiz_band(s, 0, 0);
 }
 
-#endif /* FF_API_XVMC */
+#if CONFIG_MPEG1_XVMC_HWACCEL
+AVHWAccel ff_mpeg1_xvmc_hwaccel = {
+    .name           = "mpeg1_xvmc",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG1VIDEO,
+    .pix_fmt        = AV_PIX_FMT_XVMC,
+    .start_frame    = ff_xvmc_field_start,
+    .end_frame      = ff_xvmc_field_end,
+    .decode_slice   = NULL,
+    .decode_mb      = ff_xvmc_decode_mb,
+    .priv_data_size = 0,
+};
+#endif
+
+#if CONFIG_MPEG2_XVMC_HWACCEL
+AVHWAccel ff_mpeg2_xvmc_hwaccel = {
+    .name           = "mpeg2_xvmc",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MPEG2VIDEO,
+    .pix_fmt        = AV_PIX_FMT_XVMC,
+    .start_frame    = ff_xvmc_field_start,
+    .end_frame      = ff_xvmc_field_end,
+    .decode_slice   = NULL,
+    .decode_mb      = ff_xvmc_decode_mb,
+    .priv_data_size = 0,
+};
+#endif
diff --git a/libavcodec/mpegvideodsp.c b/libavcodec/mpegvideodsp.c
index 915a844..a58e45a 100644
--- a/libavcodec/mpegvideodsp.c
+++ b/libavcodec/mpegvideodsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegvideodsp.h b/libavcodec/mpegvideodsp.h
index b0f45db..293e254 100644
--- a/libavcodec/mpegvideodsp.h
+++ b/libavcodec/mpegvideodsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpegvideoencdsp.c b/libavcodec/mpegvideoencdsp.c
index 109bbe5..860c2d8 100644
--- a/libavcodec/mpegvideoencdsp.c
+++ b/libavcodec/mpegvideoencdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -21,6 +21,7 @@
 #include <string.h>
 
 #include "config.h"
+#include "libavutil/avassert.h"
 #include "libavutil/attributes.h"
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
@@ -40,7 +41,7 @@ static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
                           (BASIS_SHIFT - RECON_SHIFT));
         int w = weight[i];
         b >>= RECON_SHIFT;
-        assert(-512 < b && b < 512);
+        av_assert2(-512 < b && b < 512);
 
         sum += (w * b) * (w * b) >> 4;
     }
diff --git a/libavcodec/mpegvideoencdsp.h b/libavcodec/mpegvideoencdsp.h
index 91a292a..e12f4c6 100644
--- a/libavcodec/mpegvideoencdsp.h
+++ b/libavcodec/mpegvideoencdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mpl2dec.c b/libavcodec/mpl2dec.c
new file mode 100644
index 0000000..a777c7c
--- /dev/null
+++ b/libavcodec/mpl2dec.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * MPL2 subtitles decoder
+ *
+ * @see http://web.archive.org/web/20090328040233/http://napisy.ussbrowarek.org/mpl2-eng.html
+ */
+
+#include "avcodec.h"
+#include "ass.h"
+#include "libavutil/bprint.h"
+
+static int mpl2_event_to_ass(AVBPrint *buf, const char *p)
+{
+    if (*p == ' ')
+        p++;
+
+    while (*p) {
+        int got_style = 0;
+
+        while (*p && strchr("/\\_", *p)) {
+            if      (*p == '/')  av_bprintf(buf, "{\\i1}");
+            else if (*p == '\\') av_bprintf(buf, "{\\b1}");
+            else if (*p == '_')  av_bprintf(buf, "{\\u1}");
+            got_style = 1;
+            p++;
+        }
+
+        while (*p && *p != '|') {
+            if (*p != '\r' && *p != '\n')
+                av_bprint_chars(buf, *p, 1);
+            p++;
+        }
+
+        if (*p == '|') {
+            if (got_style)
+                av_bprintf(buf, "{\\r}");
+            av_bprintf(buf, "\\N");
+            p++;
+        }
+    }
+
+    av_bprintf(buf, "\r\n");
+    return 0;
+}
+
+static int mpl2_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVBPrint buf;
+    AVSubtitle *sub = data;
+    const char *ptr = avpkt->data;
+    const int ts_start     = av_rescale_q(avpkt->pts,      avctx->time_base, (AVRational){1,100});
+    const int ts_duration  = avpkt->duration != -1 ?
+                             av_rescale_q(avpkt->duration, avctx->time_base, (AVRational){1,100}) : -1;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    if (ptr && avpkt->size > 0 && *ptr && !mpl2_event_to_ass(&buf, ptr)) {
+        if (!av_bprint_is_complete(&buf)) {
+            av_bprint_finalize(&buf, NULL);
+            return AVERROR(ENOMEM);
+        }
+        ff_ass_add_rect(sub, buf.str, ts_start, ts_duration, 0);
+    }
+    *got_sub_ptr = sub->num_rects > 0;
+    av_bprint_finalize(&buf, NULL);
+    return avpkt->size;
+}
+
+AVCodec ff_mpl2_decoder = {
+    .name           = "mpl2",
+    .long_name      = NULL_IF_CONFIG_SMALL("MPL2 subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_MPL2,
+    .decode         = mpl2_decode_frame,
+    .init           = ff_ass_subtitle_header_default,
+};
diff --git a/libavcodec/mqc.c b/libavcodec/mqc.c
index 0144581..f2d1e3b 100644
--- a/libavcodec/mqc.c
+++ b/libavcodec/mqc.c
@@ -2,20 +2,20 @@
  * MQ-coder encoder and decoder common functions
  * Copyright (c) 2007 Kamil Nowosad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mqc.h b/libavcodec/mqc.h
index a65433e..c0827bd 100644
--- a/libavcodec/mqc.h
+++ b/libavcodec/mqc.h
@@ -2,20 +2,20 @@
  * MQ-coder: structures, common and decoder functions
  * Copyright (c) 2007 Kamil Nowosad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -45,6 +45,20 @@ typedef struct MqcState {
     uint8_t cx_states[19];
 } MqcState;
 
+/* encoder */
+
+/** initialize the encoder */
+void ff_mqc_initenc(MqcState *mqc, uint8_t *bp);
+
+/** code bit d with context cx */
+void ff_mqc_encode(MqcState *mqc, uint8_t *cxstate, int d);
+
+/** number of encoded bytes */
+int ff_mqc_length(MqcState *mqc);
+
+/** flush the encoder [returns number of bytes encoded] */
+int ff_mqc_flush(MqcState *mqc);
+
 /* decoder */
 
 /**
diff --git a/libavcodec/mqcdec.c b/libavcodec/mqcdec.c
index 889763a..3625069 100644
--- a/libavcodec/mqcdec.c
+++ b/libavcodec/mqcdec.c
@@ -2,20 +2,20 @@
  * MQ-coder decoder
  * Copyright (c) 2007 Kamil Nowosad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mqcenc.c b/libavcodec/mqcenc.c
new file mode 100644
index 0000000..97d352b
--- /dev/null
+++ b/libavcodec/mqcenc.c
@@ -0,0 +1,119 @@
+/*
+ * MQ-coder encoder
+ * Copyright (c) 2007 Kamil Nowosad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * MQ-coder encoder
+ * @file
+ * @author Kamil Nowosad
+ */
+
+#include "mqc.h"
+
+static void byteout(MqcState *mqc)
+{
+retry:
+    if (*mqc->bp == 0xff){
+        mqc->bp++;
+        *mqc->bp = mqc->c >> 20;
+        mqc->c &= 0xfffff;
+        mqc->ct = 7;
+    } else if ((mqc->c & 0x8000000)){
+        (*mqc->bp)++;
+        mqc->c &= 0x7ffffff;
+        goto retry;
+    } else{
+        mqc->bp++;
+        *mqc->bp = mqc->c >> 19;
+        mqc->c &= 0x7ffff;
+        mqc->ct = 8;
+    }
+}
+
+static void renorme(MqcState *mqc)
+{
+    do{
+        mqc->a += mqc->a;
+        mqc->c += mqc->c;
+        if (!--mqc->ct)
+            byteout(mqc);
+    } while (!(mqc->a & 0x8000));
+}
+
+static void setbits(MqcState *mqc)
+{
+    int tmp = mqc->c + mqc->a;
+    mqc->c |= 0xffff;
+    if (mqc->c >= tmp)
+        mqc->c -= 0x8000;
+}
+
+void ff_mqc_initenc(MqcState *mqc, uint8_t *bp)
+{
+    ff_mqc_init_contexts(mqc);
+    mqc->a = 0x8000;
+    mqc->c = 0;
+    mqc->bp = bp-1;
+    mqc->bpstart = bp;
+    mqc->ct = 12 + (*mqc->bp == 0xff);
+}
+
+void ff_mqc_encode(MqcState *mqc, uint8_t *cxstate, int d)
+{
+    int qe;
+
+    qe = ff_mqc_qe[*cxstate];
+    mqc->a -= qe;
+    if ((*cxstate & 1) == d){
+        if (!(mqc->a & 0x8000)){
+            if (mqc->a < qe)
+                mqc->a = qe;
+            else
+                mqc->c += qe;
+            *cxstate = ff_mqc_nmps[*cxstate];
+            renorme(mqc);
+        } else
+            mqc->c += qe;
+    } else{
+        if (mqc->a < qe)
+            mqc->c += qe;
+        else
+            mqc->a = qe;
+        *cxstate = ff_mqc_nlps[*cxstate];
+        renorme(mqc);
+    }
+}
+
+int ff_mqc_length(MqcState *mqc)
+{
+    return mqc->bp - mqc->bpstart;
+}
+
+int ff_mqc_flush(MqcState *mqc)
+{
+    setbits(mqc);
+    mqc->c = mqc->c << mqc->ct;
+    byteout(mqc);
+    mqc->c = mqc->c << mqc->ct;
+    byteout(mqc);
+    if (*mqc->bp != 0xff)
+        mqc->bp++;
+    return mqc->bp - mqc->bpstart;
+}
diff --git a/libavcodec/msgsmdec.c b/libavcodec/msgsmdec.c
index be5062a..4c4ddb4 100644
--- a/libavcodec/msgsmdec.c
+++ b/libavcodec/msgsmdec.c
@@ -2,20 +2,20 @@
  * gsm 06.10 decoder, Microsoft variant
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/msgsmdec.h b/libavcodec/msgsmdec.h
index adbda9a..b2a1a62 100644
--- a/libavcodec/msgsmdec.h
+++ b/libavcodec/msgsmdec.h
@@ -2,20 +2,20 @@
  * gsm 06.10 decoder, Microsoft variant
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c
index 95b5c93..7300af3 100644
--- a/libavcodec/msmpeg4.c
+++ b/libavcodec/msmpeg4.c
@@ -5,20 +5,20 @@
  *
  * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,6 +36,7 @@
 #include "mpeg4video.h"
 #include "msmpeg4data.h"
 #include "vc1data.h"
+#include "libavutil/imgutils.h"
 
 /*
  * You can also call this codec : MPEG4 with a twist !
@@ -51,6 +52,9 @@ static av_cold void init_h263_dc_for_msmpeg4(void)
 {
         int level, uni_code, uni_len;
 
+        if(ff_v2_dc_chroma_table[255 + 256][1])
+            return;
+
         for(level=-256; level<256; level++){
             int size, v, l;
             /* find number of bits */
@@ -103,8 +107,6 @@ static av_cold void init_h263_dc_for_msmpeg4(void)
 
 av_cold void ff_msmpeg4_common_init(MpegEncContext *s)
 {
-    static int initialized=0;
-
     switch(s->msmpeg4_version){
     case 1:
     case 2:
@@ -143,11 +145,7 @@ av_cold void ff_msmpeg4_common_init(MpegEncContext *s)
     }
     //Note the default tables are set in common_init in mpegvideo.c
 
-    if(!initialized){
-        initialized=1;
-
-        init_h263_dc_for_msmpeg4();
-    }
+    init_h263_dc_for_msmpeg4();
 }
 
 /* predict coded block */
@@ -177,13 +175,13 @@ int ff_msmpeg4_coded_block_pred(MpegEncContext * s, int n, uint8_t **coded_block
     return pred;
 }
 
-static int get_dc(uint8_t *src, int stride, int scale)
+static int get_dc(uint8_t *src, int stride, int scale, int block_size)
 {
     int y;
     int sum=0;
-    for(y=0; y<8; y++){
+    for(y=0; y<block_size; y++){
         int x;
-        for(x=0; x<8; x++){
+        for(x=0; x<block_size; x++){
             sum+=src[x + y*stride];
         }
     }
@@ -229,13 +227,13 @@ int ff_msmpeg4_pred_dc(MpegEncContext *s, int n,
         "addl %%eax, %2         \n\t"
         "addl %%eax, %1         \n\t"
         "addl %0, %%eax         \n\t"
-        "mull %4                \n\t"
+        "imull %4               \n\t"
         "movl %%edx, %0         \n\t"
         "movl %1, %%eax         \n\t"
-        "mull %4                \n\t"
+        "imull %4               \n\t"
         "movl %%edx, %1         \n\t"
         "movl %2, %%eax         \n\t"
-        "mull %4                \n\t"
+        "imull %4               \n\t"
         "movl %%edx, %2         \n\t"
         : "+b" (a), "+c" (b), "+D" (c)
         : "g" (scale), "S" (ff_inverse[scale])
@@ -275,17 +273,18 @@ int ff_msmpeg4_pred_dc(MpegEncContext *s, int n,
                     *dir_ptr = 0;
                 }
             }else{
+                int bs = 8 >> s->avctx->lowres;
                 if(n<4){
                     wrap= s->linesize;
-                    dest= s->current_picture.f->data[0] + (((n >> 1) + 2*s->mb_y) * 8*  wrap ) + ((n & 1) + 2*s->mb_x) * 8;
+                    dest= s->current_picture.f->data[0] + (((n >> 1) + 2*s->mb_y) * bs*  wrap ) + ((n & 1) + 2*s->mb_x) * bs;
                 }else{
                     wrap= s->uvlinesize;
-                    dest= s->current_picture.f->data[n - 3] + (s->mb_y * 8 * wrap) + s->mb_x * 8;
+                    dest= s->current_picture.f->data[n - 3] + (s->mb_y * bs * wrap) + s->mb_x * bs;
                 }
                 if(s->mb_x==0) a= (1024 + (scale>>1))/scale;
-                else           a= get_dc(dest-8, wrap, scale*8);
+                else           a= get_dc(dest-bs, wrap, scale*8>>(2*s->avctx->lowres), bs);
                 if(s->mb_y==0) c= (1024 + (scale>>1))/scale;
-                else           c= get_dc(dest-8*wrap, wrap, scale*8);
+                else           c= get_dc(dest-bs*wrap, wrap, scale*8>>(2*s->avctx->lowres), bs);
 
                 if (s->h263_aic_dir==0) {
                     pred= a;
diff --git a/libavcodec/msmpeg4.h b/libavcodec/msmpeg4.h
index 0a8ecd9..db77ce1 100644
--- a/libavcodec/msmpeg4.h
+++ b/libavcodec/msmpeg4.h
@@ -2,20 +2,20 @@
  * MSMPEG4 backend for encoder and decoder
  * copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,10 +59,12 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64]);
 #define CONFIG_MSMPEG4_DECODER (CONFIG_MSMPEG4V1_DECODER || \
                                 CONFIG_MSMPEG4V2_DECODER || \
                                 CONFIG_MSMPEG4V3_DECODER || \
+                                CONFIG_WMV1_DECODER      || \
                                 CONFIG_WMV2_DECODER      || \
                                 CONFIG_VC1_DECODER)
 #define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V2_ENCODER || \
                                 CONFIG_MSMPEG4V3_ENCODER || \
+                                CONFIG_WMV1_ENCODER      || \
                                 CONFIG_WMV2_ENCODER)
 
 #endif /* AVCODEC_MSMPEG4_H */
diff --git a/libavcodec/msmpeg4data.c b/libavcodec/msmpeg4data.c
index cf291af..8eb07e9 100644
--- a/libavcodec/msmpeg4data.c
+++ b/libavcodec/msmpeg4data.c
@@ -5,20 +5,20 @@
  *
  * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/msmpeg4data.h b/libavcodec/msmpeg4data.h
index ca2dac1..24a10d9 100644
--- a/libavcodec/msmpeg4data.h
+++ b/libavcodec/msmpeg4data.h
@@ -5,20 +5,20 @@
  *
  * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/msmpeg4dec.c b/libavcodec/msmpeg4dec.c
index 191f81a..94ea3c2 100644
--- a/libavcodec/msmpeg4dec.c
+++ b/libavcodec/msmpeg4dec.c
@@ -1,24 +1,24 @@
 /*
  * MSMPEG4 backend for encoder and decoder
  * Copyright (c) 2001 Fabrice Bellard
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2002-2013 Michael Niedermayer <michaelni@gmx.at>
  *
  * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
 #include "mpegutils.h"
 #include "mpegvideo.h"
 #include "msmpeg4.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/x86/asm.h"
 #include "h263.h"
 #include "mpeg4video.h"
@@ -102,6 +103,7 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
 static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64])
 {
     int cbp, code, i;
+    uint32_t * const mb_type_ptr = &s->current_picture.mb_type[s->mb_x + s->mb_y*s->mb_stride];
 
     if (s->pict_type == AV_PICTURE_TYPE_P) {
         if (s->use_skip_mb_code) {
@@ -115,6 +117,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64])
                 s->mv[0][0][0] = 0;
                 s->mv[0][0][1] = 0;
                 s->mb_skipped = 1;
+                *mb_type_ptr = MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16;
                 return 0;
             }
         }
@@ -163,6 +166,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64])
         s->mv_type = MV_TYPE_16X16;
         s->mv[0][0][0] = mx;
         s->mv[0][0][1] = my;
+        *mb_type_ptr = MB_TYPE_L0 | MB_TYPE_16x16;
     } else {
         if(s->msmpeg4_version==2){
             s->ac_pred = get_bits1(&s->gb);
@@ -172,6 +176,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64])
             cbp|= get_vlc2(&s->gb, ff_h263_cbpy_vlc.table, CBPY_VLC_BITS, 1)<<2; //FIXME check errors
             if(s->pict_type==AV_PICTURE_TYPE_P) cbp^=0x3C;
         }
+        *mb_type_ptr = MB_TYPE_INTRA;
     }
 
     s->bdsp.clear_blocks(s->block[0]);
@@ -281,18 +286,19 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, int16_t block[6][64])
 av_cold int ff_msmpeg4_decode_init(AVCodecContext *avctx)
 {
     MpegEncContext *s = avctx->priv_data;
-    static int done = 0;
-    int i;
+    static volatile int done = 0;
+    int i, ret;
     MVTable *mv;
 
+    if ((ret = av_image_check_size(avctx->width, avctx->height, 0, avctx)) < 0)
+        return ret;
+
     if (ff_h263_decode_init(avctx) < 0)
         return -1;
 
     ff_msmpeg4_common_init(s);
 
     if (!done) {
-        done = 1;
-
         for(i=0;i<NB_RL_TABLES;i++) {
             ff_init_rl(&ff_rl_table[i], ff_static_rl_table_store[i]);
         }
@@ -362,6 +368,7 @@ av_cold int ff_msmpeg4_decode_init(AVCodecContext *avctx)
         INIT_VLC_STATIC(&ff_inter_intra_vlc, INTER_INTRA_VLC_BITS, 4,
                  &ff_table_inter_intra[0][1], 2, 1,
                  &ff_table_inter_intra[0][0], 2, 1, 8);
+        done = 1;
     }
 
     switch(s->msmpeg4_version){
@@ -579,8 +586,11 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
         } else {
             level = get_vlc2(&s->gb, v2_dc_chroma_vlc.table, DC_VLC_BITS, 3);
         }
-        if (level < 0)
+        if (level < 0) {
+            av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n");
+            *dir_ptr = 0;
             return -1;
+        }
         level-=256;
     }else{  //FIXME optimize use unified tables & index
         if (n < 4) {
@@ -590,6 +600,7 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
         }
         if (level < 0){
             av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n");
+            *dir_ptr = 0;
             return -1;
         }
 
@@ -646,7 +657,6 @@ int ff_msmpeg4_decode_block(MpegEncContext * s, int16_t * block,
         if (level < 0){
             av_log(s->avctx, AV_LOG_ERROR, "dc overflow- block: %d qscale: %d//\n", n, s->qscale);
             if(s->inter_intra_pred) level=0;
-            else                    return -1;
         }
         if (n < 4) {
             rl = &ff_rl_table[s->rl_table_index];
@@ -832,8 +842,9 @@ int ff_msmpeg4_decode_block(MpegEncContext * s, int16_t * block,
             i-= 192;
             if(i&(~63)){
                 const int left= get_bits_left(&s->gb);
-                if(((i+192 == 64 && level/qmul==-1) || !(s->err_recognition&AV_EF_BITSTREAM)) && left>=0){
+                if(((i+192 == 64 && level/qmul==-1) || !(s->err_recognition&(AV_EF_BITSTREAM|AV_EF_COMPLIANT))) && left>=0){
                     av_log(s->avctx, AV_LOG_ERROR, "ignoring overflow at %d %d\n", s->mb_x, s->mb_y);
+                    i = 63;
                     break;
                 }else{
                     av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
@@ -910,6 +921,7 @@ AVCodec ff_msmpeg4v1_decoder = {
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
     .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE
@@ -926,6 +938,7 @@ AVCodec ff_msmpeg4v2_decoder = {
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
     .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE
@@ -942,6 +955,7 @@ AVCodec ff_msmpeg4v3_decoder = {
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
     .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE
@@ -958,6 +972,7 @@ AVCodec ff_wmv1_decoder = {
     .close          = ff_h263_decode_end,
     .decode         = ff_h263_decode_frame,
     .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE
diff --git a/libavcodec/msmpeg4enc.c b/libavcodec/msmpeg4enc.c
index 45ef208..e7f51db 100644
--- a/libavcodec/msmpeg4enc.c
+++ b/libavcodec/msmpeg4enc.c
@@ -5,20 +5,20 @@
  *
  * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,7 +34,6 @@
 #include "libavutil/avutil.h"
 #include "libavutil/mem.h"
 #include "mpegvideo.h"
-#include "msmpeg4.h"
 #include "h263.h"
 #include "mpeg4video.h"
 #include "msmpeg4.h"
@@ -150,8 +149,8 @@ av_cold void ff_msmpeg4_encode_init(MpegEncContext *s)
 static void find_best_tables(MpegEncContext * s)
 {
     int i;
-    int best       =-1, best_size       =9999999;
-    int chroma_best=-1, best_chroma_size=9999999;
+    int best        = 0, best_size        = INT_MAX;
+    int chroma_best = 0, best_chroma_size = INT_MAX;
 
     for(i=0; i<3; i++){
         int level;
@@ -274,14 +273,15 @@ void ff_msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
 
 void ff_msmpeg4_encode_ext_header(MpegEncContext * s)
 {
-        put_bits(&s->pb, 5, s->avctx->time_base.den / s->avctx->time_base.num); //yes 29.97 -> 29
+        unsigned fps = s->avctx->time_base.den / s->avctx->time_base.num / FFMAX(s->avctx->ticks_per_frame, 1);
+        put_bits(&s->pb, 5, FFMIN(fps, 31)); //yes 29.97 -> 29
 
         put_bits(&s->pb, 11, FFMIN(s->bit_rate/1024, 2047));
 
         if(s->msmpeg4_version>=3)
             put_bits(&s->pb, 1, s->flipflop_rounding);
         else
-            assert(s->flipflop_rounding==0);
+            av_assert0(s->flipflop_rounding==0);
 }
 
 void ff_msmpeg4_encode_motion(MpegEncContext * s,
@@ -494,7 +494,7 @@ void ff_msmpeg4_encode_mb(MpegEncContext * s,
 static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr)
 {
     int sign, code;
-    int pred, extquant;
+    int pred, av_uninit(extquant);
     int extrabits = 0;
 
     int16_t *dc_val;
diff --git a/libavcodec/msrle.c b/libavcodec/msrle.c
index 4b39c92..9f0cac6 100644
--- a/libavcodec/msrle.c
+++ b/libavcodec/msrle.c
@@ -2,20 +2,20 @@
  * Microsoft RLE video decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,6 +35,7 @@
 #include "avcodec.h"
 #include "internal.h"
 #include "msrledec.h"
+#include "libavutil/imgutils.h"
 
 typedef struct MsrleContext {
     AVCodecContext *avctx;
@@ -50,10 +51,14 @@ typedef struct MsrleContext {
 static av_cold int msrle_decode_init(AVCodecContext *avctx)
 {
     MsrleContext *s = avctx->priv_data;
+    int i;
 
     s->avctx = avctx;
 
     switch (avctx->bits_per_coded_sample) {
+    case 1:
+        avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
+        break;
     case 4:
     case 8:
         avctx->pix_fmt = AV_PIX_FMT_PAL8;
@@ -70,6 +75,10 @@ static av_cold int msrle_decode_init(AVCodecContext *avctx)
     if (!s->frame)
         return AVERROR(ENOMEM);
 
+    if (avctx->extradata_size >= 4)
+        for (i = 0; i < FFMIN(avctx->extradata_size, AVPALETTE_SIZE)/4; i++)
+            s->pal[i] = 0xFFU<<24 | AV_RL32(avctx->extradata+4*i);
+
     return 0;
 }
 
@@ -86,30 +95,30 @@ static int msrle_decode_frame(AVCodecContext *avctx,
     s->buf = buf;
     s->size = buf_size;
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
-    if (avctx->bits_per_coded_sample <= 8) {
+    if (avctx->bits_per_coded_sample > 1 && avctx->bits_per_coded_sample <= 8) {
         const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, NULL);
 
         if (pal) {
             s->frame->palette_has_changed = 1;
             memcpy(s->pal, pal, AVPALETTE_SIZE);
         }
-
         /* make the palette available */
         memcpy(s->frame->data[1], s->pal, AVPALETTE_SIZE);
     }
 
     /* FIXME how to correctly detect RLE ??? */
     if (avctx->height * istride == avpkt->size) { /* assume uncompressed */
-        int linesize = avctx->width * avctx->bits_per_coded_sample / 8;
+        int linesize = av_image_get_linesize(avctx->pix_fmt, avctx->width, 0);
         uint8_t *ptr = s->frame->data[0];
         uint8_t *buf = avpkt->data + (avctx->height-1)*istride;
         int i, j;
 
+        if (linesize < 0)
+            return linesize;
+
         for (i = 0; i < avctx->height; i++) {
             if (avctx->bits_per_coded_sample == 4) {
                 for (j = 0; j < avctx->width - 1; j += 2) {
diff --git a/libavcodec/msrledec.c b/libavcodec/msrledec.c
index af2a247..4d3da5b 100644
--- a/libavcodec/msrledec.c
+++ b/libavcodec/msrledec.c
@@ -2,20 +2,20 @@
  * Microsoft RLE decoder
  * Copyright (C) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -138,7 +138,8 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic,
     unsigned int width= FFABS(pic->linesize[0]) / (depth >> 3);
 
     output     = pic->data[0] + (avctx->height - 1) * pic->linesize[0];
-    output_end = pic->data[0] +  avctx->height      * pic->linesize[0];
+    output_end = output + FFABS(pic->linesize[0]);
+
     while (bytestream2_get_bytes_left(gb) > 0) {
         p1 = bytestream2_get_byteu(gb);
         if(p1 == 0) { //Escape code
@@ -155,6 +156,7 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic,
                     }
                 }
                 output = pic->data[0] + line * pic->linesize[0];
+                output_end = output + FFABS(pic->linesize[0]);
                 pos = 0;
                 continue;
             } else if(p2 == 1) { //End-of-picture
@@ -169,11 +171,11 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic,
                     return -1;
                 }
                 output = pic->data[0] + line * pic->linesize[0] + pos * (depth >> 3);
+                output_end = pic->data[0] + line * pic->linesize[0] + FFABS(pic->linesize[0]);
                 continue;
             }
             // Copy data
-            if ((pic->linesize[0] > 0 && output + p2 * (depth >> 3) > output_end) ||
-                (pic->linesize[0] < 0 && output + p2 * (depth >> 3) < output_end)) {
+            if (output + p2 * (depth >> 3) > output_end) {
                 bytestream2_skip(gb, 2 * (depth >> 3));
                 continue;
             } else if (bytestream2_get_bytes_left(gb) < p2 * (depth >> 3)) {
@@ -182,9 +184,9 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic,
             }
 
             if ((depth == 8) || (depth == 24)) {
-                for(i = 0; i < p2 * (depth >> 3); i++) {
-                    *output++ = bytestream2_get_byteu(gb);
-                }
+                bytestream2_get_bufferu(gb, output, p2 * (depth >> 3));
+                output += p2 * (depth >> 3);
+
                 // RLE8 copy is actually padded - and runs are not!
                 if(depth == 8 && (p2 & 1)) {
                     bytestream2_skip(gb, 1);
@@ -203,36 +205,39 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic,
             pos += p2;
         } else { //run of pixels
             uint8_t pix[3]; //original pixel
-            switch(depth){
-            case  8: pix[0] = bytestream2_get_byte(gb);
-                     break;
-            case 16: pix16  = bytestream2_get_le16(gb);
-                     break;
-            case 24: pix[0] = bytestream2_get_byte(gb);
-                     pix[1] = bytestream2_get_byte(gb);
-                     pix[2] = bytestream2_get_byte(gb);
-                     break;
-            case 32: pix32  = bytestream2_get_le32(gb);
-                     break;
-            }
-            if ((pic->linesize[0] > 0 && output + p1 * (depth >> 3) > output_end) ||
-                (pic->linesize[0] < 0 && output + p1 * (depth >> 3) < output_end))
+            if (output + p1 * (depth >> 3) > output_end)
                 continue;
-            for(i = 0; i < p1; i++) {
-                switch(depth){
-                case  8: *output++ = pix[0];
-                         break;
-                case 16: *(uint16_t*)output = pix16;
-                         output += 2;
-                         break;
-                case 24: *output++ = pix[0];
-                         *output++ = pix[1];
-                         *output++ = pix[2];
-                         break;
-                case 32: *(uint32_t*)output = pix32;
-                         output += 4;
-                         break;
+
+            switch(depth){
+            case  8:
+                pix[0] = bytestream2_get_byte(gb);
+                memset(output, pix[0], p1);
+                output += p1;
+                break;
+            case 16:
+                pix16  = bytestream2_get_le16(gb);
+                for(i = 0; i < p1; i++) {
+                        *(uint16_t*)output = pix16;
+                        output += 2;
+                }
+                break;
+            case 24:
+                pix[0] = bytestream2_get_byte(gb);
+                pix[1] = bytestream2_get_byte(gb);
+                pix[2] = bytestream2_get_byte(gb);
+                for(i = 0; i < p1; i++) {
+                        *output++ = pix[0];
+                        *output++ = pix[1];
+                        *output++ = pix[2];
+                }
+                break;
+            case 32:
+                pix32  = bytestream2_get_le32(gb);
+                for(i = 0; i < p1; i++) {
+                        *(uint32_t*)output = pix32;
+                        output += 4;
                 }
+                break;
             }
             pos += p1;
         }
diff --git a/libavcodec/msrledec.h b/libavcodec/msrledec.h
index a594de3..3f66636 100644
--- a/libavcodec/msrledec.h
+++ b/libavcodec/msrledec.h
@@ -2,20 +2,20 @@
  * Microsoft RLE decoder
  * Copyright (C) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c
index a67a942..6bb524b 100644
--- a/libavcodec/mss1.c
+++ b/libavcodec/mss1.c
@@ -2,20 +2,20 @@
  * Microsoft Screen 1 (aka Windows Media Video V7 Screen) decoder
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -60,7 +60,7 @@ static void arith_normalise(ArithCoder *c)
     }
 }
 
-ARITH_GET_BIT()
+ARITH_GET_BIT(arith)
 
 static int arith_get_bits(ArithCoder *c, int bits)
 {
@@ -105,7 +105,7 @@ static int arith_get_prob(ArithCoder *c, int16_t *probs)
     return sym;
 }
 
-ARITH_GET_MODEL_SYM()
+ARITH_GET_MODEL_SYM(arith)
 
 static void arith_init(ArithCoder *c, GetBitContext *gb)
 {
@@ -130,7 +130,7 @@ static int decode_pal(MSS12Context *ctx, ArithCoder *acoder)
         r = arith_get_bits(acoder, 8);
         g = arith_get_bits(acoder, 8);
         b = arith_get_bits(acoder, 8);
-        *pal++ = (r << 16) | (g << 8) | b;
+        *pal++ = (0xFFU << 24) | (r << 16) | (g << 8) | b;
     }
 
     return !!ncol;
@@ -139,8 +139,6 @@ static int decode_pal(MSS12Context *ctx, ArithCoder *acoder)
 static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                              AVPacket *avpkt)
 {
-    const uint8_t *buf = avpkt->data;
-    int buf_size = avpkt->size;
     MSS1Context *ctx = avctx->priv_data;
     MSS12Context *c = &ctx->ctx;
     GetBitContext gb;
@@ -148,13 +146,13 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     int pal_changed = 0;
     int ret;
 
-    init_get_bits(&gb, buf, buf_size * 8);
+    if ((ret = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
+        return ret;
+
     arith_init(&acoder, &gb);
 
-    if ((ret = ff_reget_buffer(avctx, ctx->pic)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, ctx->pic)) < 0)
         return ret;
-    }
 
     c->pal_pic    =  ctx->pic->data[0] + ctx->pic->linesize[0] * (avctx->height - 1);
     c->pal_stride = -ctx->pic->linesize[0];
@@ -184,7 +182,7 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     *got_frame      = 1;
 
     /* always report that the buffer was completely consumed */
-    return buf_size;
+    return avpkt->size;
 }
 
 static av_cold int mss1_decode_init(AVCodecContext *avctx)
diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c
index d4b621f..7d54d29 100644
--- a/libavcodec/mss12.c
+++ b/libavcodec/mss12.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -450,7 +450,7 @@ static int decode_pivot(SliceContext *sc, ArithCoder *acoder, int base)
         val = acoder->get_number(acoder, (base + 1) / 2 - 2) + 3;
     }
 
-    if (val >= base)
+    if ((unsigned)val >= base)
         return -1;
 
     return inv ? base - val : val;
@@ -588,6 +588,11 @@ av_cold int ff_mss12_decode_init(MSS12Context *c, int version,
                avctx->coded_width, avctx->coded_height);
         return AVERROR_INVALIDDATA;
     }
+    if (avctx->coded_width < 1 || avctx->coded_height < 1) {
+        av_log(avctx, AV_LOG_ERROR, "Frame dimensions %dx%d too small",
+               avctx->coded_width, avctx->coded_height);
+        return AVERROR_INVALIDDATA;
+    }
 
     av_log(avctx, AV_LOG_DEBUG, "Encoder version %"PRIu32".%"PRIu32"\n",
            AV_RB32(avctx->extradata + 4), AV_RB32(avctx->extradata + 8));
@@ -647,7 +652,7 @@ av_cold int ff_mss12_decode_init(MSS12Context *c, int version,
     }
 
     for (i = 0; i < 256; i++)
-        c->pal[i] = AV_RB24(avctx->extradata + 52 +
+        c->pal[i] = 0xFFU << 24 | AV_RB24(avctx->extradata + 52 +
                             (version ? 8 : 0) + i * 3);
 
     c->mask_stride = FFALIGN(avctx->width, 16);
diff --git a/libavcodec/mss12.h b/libavcodec/mss12.h
index 5b1fee8..f953167 100644
--- a/libavcodec/mss12.h
+++ b/libavcodec/mss12.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -99,8 +99,8 @@ int ff_mss12_decode_init(MSS12Context *c, int version,
                          SliceContext *sc1, SliceContext *sc2);
 int ff_mss12_decode_end(MSS12Context *ctx);
 
-#define ARITH_GET_BIT(VERSION)                                          \
-static int arith ## VERSION ## _get_bit(ArithCoder *c)                  \
+#define ARITH_GET_BIT(prefix)                                           \
+static int prefix ## _get_bit(ArithCoder *c)                            \
 {                                                                       \
     int range = c->high - c->low + 1;                                   \
     int bit   = 2 * c->value - c->low >= c->high;                       \
@@ -110,22 +110,22 @@ static int arith ## VERSION ## _get_bit(ArithCoder *c)                  \
     else                                                                \
         c->high = c->low + (range >> 1) - 1;                            \
                                                                         \
-    arith ## VERSION ## _normalise(c);                                  \
+    prefix ## _normalise(c);                                            \
                                                                         \
     return bit;                                                         \
 }
 
-#define ARITH_GET_MODEL_SYM(VERSION)                                    \
-static int arith ## VERSION ## _get_model_sym(ArithCoder *c, Model *m)  \
+#define ARITH_GET_MODEL_SYM(prefix)                                     \
+static int prefix ## _get_model_sym(ArithCoder *c, Model *m)            \
 {                                                                       \
     int idx, val;                                                       \
                                                                         \
-    idx = arith ## VERSION ## _get_prob(c, m->cum_prob);                \
+    idx = prefix ## _get_prob(c, m->cum_prob);                          \
                                                                         \
     val = m->idx2sym[idx];                                              \
     ff_mss12_model_update(m, idx);                                      \
                                                                         \
-    arith ## VERSION ## _normalise(c);                                  \
+    prefix ## _normalise(c);                                            \
                                                                         \
     return val;                                                         \
 }
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index ffbba6d..35c5d27 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c
@@ -1,20 +1,20 @@
 /*
  * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -57,7 +57,7 @@ static void arith2_normalise(ArithCoder *c)
     }
 }
 
-ARITH_GET_BIT(2)
+ARITH_GET_BIT(arith2)
 
 /* L. Stuiver and A. Moffat: "Piecewise Integer Mapping for Arithmetic Coding."
  * In Proc. 8th Data Compression Conference (DCC '98), pp. 3-12, Mar. 1998 */
@@ -130,7 +130,7 @@ static int arith2_get_prob(ArithCoder *c, int16_t *probs)
     return i;
 }
 
-ARITH_GET_MODEL_SYM(2)
+ARITH_GET_MODEL_SYM(arith2)
 
 static int arith2_get_consumed_bytes(ArithCoder *c)
 {
@@ -380,7 +380,8 @@ static int decode_wmv9(AVCodecContext *avctx, const uint8_t *buf, int buf_size,
 
     ff_mpeg_flush(avctx);
 
-    init_get_bits(&s->gb, buf, buf_size * 8);
+    if ((ret = init_get_bits8(&s->gb, buf, buf_size)) < 0)
+        return ret;
 
     s->loop_filter = avctx->skip_loop_filter < AVDISCARD_ALL;
 
@@ -423,8 +424,8 @@ static int decode_wmv9(AVCodecContext *avctx, const uint8_t *buf, int buf_size,
 
     if (v->respic == 3) {
         ctx->dsp.upsample_plane(f->data[0], f->linesize[0], w,      h);
-        ctx->dsp.upsample_plane(f->data[1], f->linesize[1], w >> 1, h >> 1);
-        ctx->dsp.upsample_plane(f->data[2], f->linesize[2], w >> 1, h >> 1);
+        ctx->dsp.upsample_plane(f->data[1], f->linesize[1], w+1 >> 1, h+1 >> 1);
+        ctx->dsp.upsample_plane(f->data[2], f->linesize[2], w+1 >> 1, h+1 >> 1);
     } else if (v->respic)
         avpriv_request_sample(v->s.avctx,
                               "Asymmetric WMV9 rectangle subsampling");
@@ -478,7 +479,8 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     av_assert0(FF_INPUT_BUFFER_PADDING_SIZE >=
                ARITH2_PADDING + (MIN_CACHE_BITS + 7) / 8);
 
-    init_get_bits(&gb, buf, buf_size * 8);
+    if ((ret = init_get_bits8(&gb, buf, buf_size)) < 0)
+        return ret;
 
     if (keyframe = get_bits1(&gb))
         skip_bits(&gb, 7);
@@ -594,10 +596,8 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (c->mvX < 0 || c->mvY < 0) {
         FFSWAP(uint8_t *, c->pal_pic, c->last_pal_pic);
 
-        if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
             return ret;
-        }
 
         if (ctx->last_pic->data[0]) {
             av_assert0(frame->linesize[0] == ctx->last_pic->linesize[0]);
@@ -608,10 +608,8 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             return AVERROR_INVALIDDATA;
         }
     } else {
-        if ((ret = ff_reget_buffer(avctx, ctx->last_pic)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        if ((ret = ff_reget_buffer(avctx, ctx->last_pic)) < 0)
             return ret;
-        }
         if ((ret = av_frame_ref(frame, ctx->last_pic)) < 0)
             return ret;
 
@@ -640,7 +638,8 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                 ff_mss12_slicecontext_reset(&ctx->sc[1]);
         }
         if (is_rle) {
-            init_get_bits(&gb, buf, buf_size * 8);
+            if ((ret = init_get_bits8(&gb, buf, buf_size)) < 0)
+                return ret;
             if (ret = decode_rle(&gb, c->pal_pic, c->pal_stride,
                                  c->rgb_pic, c->rgb_stride, c->pal, keyframe,
                                  ctx->split_position, 0,
@@ -818,10 +817,11 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx)
     c->avctx = avctx;
     if (ret = ff_mss12_decode_init(c, 1, &ctx->sc[0], &ctx->sc[1]))
         return ret;
+    ctx->last_pic   = av_frame_alloc();
     c->pal_stride   = c->mask_stride;
     c->pal_pic      = av_mallocz(c->pal_stride * avctx->height);
     c->last_pal_pic = av_mallocz(c->pal_stride * avctx->height);
-    if (!c->pal_pic || !c->last_pal_pic) {
+    if (!c->pal_pic || !c->last_pal_pic || !ctx->last_pic) {
         mss2_decode_end(avctx);
         return AVERROR(ENOMEM);
     }
@@ -835,11 +835,6 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx)
     avctx->pix_fmt = c->free_colours == 127 ? AV_PIX_FMT_RGB555
                                             : AV_PIX_FMT_RGB24;
 
-    ctx->last_pic = av_frame_alloc();
-    if (!ctx->last_pic) {
-        mss2_decode_end(avctx);
-        return AVERROR(ENOMEM);
-    }
 
     return 0;
 }
diff --git a/libavcodec/mss2dsp.c b/libavcodec/mss2dsp.c
index aa13577..c5fc1f8 100644
--- a/libavcodec/mss2dsp.c
+++ b/libavcodec/mss2dsp.c
@@ -1,20 +1,20 @@
 /*
  * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -106,6 +106,9 @@ static void upsample_plane_c(uint8_t *plane, int plane_stride, int w, int h)
     uint8_t *src1, *src2, *dst1, *dst2, *p, a, b;
     int i, j;
 
+    if(!w || !h)
+        return;
+
     w += (w & 1);
     h += (h & 1);
 
diff --git a/libavcodec/mss2dsp.h b/libavcodec/mss2dsp.h
index 61c3a04..7368abb 100644
--- a/libavcodec/mss2dsp.h
+++ b/libavcodec/mss2dsp.h
@@ -1,20 +1,20 @@
 /*
  * Microsoft Screen 2 (aka Windows Media Video V9 Screen) decoder
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mss3.c b/libavcodec/mss3.c
index 9a0b1fb..075685b 100644
--- a/libavcodec/mss3.c
+++ b/libavcodec/mss3.c
@@ -2,20 +2,20 @@
  * Microsoft Screen 3 (aka Microsoft ATC Screen) decoder
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -296,7 +296,7 @@ static void rac_normalise(RangeCoder *c)
             c->low |= *c->src++;
         } else if (!c->low) {
             c->got_error = 1;
-            return;
+            c->low = 1;
         }
         if (c->range >= RAC_BOTTOM)
             return;
@@ -731,10 +731,8 @@ static int mss3_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return buf_size;
     c->got_error = 0;
 
-    if ((ret = ff_reget_buffer(avctx, c->pic)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, c->pic)) < 0)
         return ret;
-    }
     c->pic->key_frame = keyframe;
     c->pic->pict_type = keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
     if (!bytestream2_get_bytes_left(&gb)) {
@@ -840,6 +838,7 @@ static av_cold int mss3_decode_init(AVCodecContext *avctx)
                                             b_width * b_height);
         if (!c->dct_coder[i].prev_dc) {
             av_log(avctx, AV_LOG_ERROR, "Cannot allocate buffer\n");
+            av_frame_free(&c->pic);
             while (i >= 0) {
                 av_freep(&c->dct_coder[i].prev_dc);
                 i--;
diff --git a/libavcodec/mss34dsp.c b/libavcodec/mss34dsp.c
index 11abb2d..0397add 100644
--- a/libavcodec/mss34dsp.c
+++ b/libavcodec/mss34dsp.c
@@ -2,20 +2,20 @@
  * Common stuff for some Microsoft Screen codecs
  * Copyright (C) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -84,8 +84,8 @@ void ff_mss34_gen_quant_mat(uint16_t *qmat, int quality, int luma)
     blk[6 * step] = (-(t3 + t7) + t8 + tA) >> shift;                \
     blk[7 * step] = (-(t1 + t6) + t9 + tB) >> shift;                \
 
-#define SOP_ROW(a) ((a) << 16) + 0x2000
-#define SOP_COL(a) ((a + 32) << 16)
+#define SOP_ROW(a) (((a) << 16) + 0x2000)
+#define SOP_COL(a) (((a) + 32) << 16)
 
 void ff_mss34_dct_put(uint8_t *dst, int stride, int *block)
 {
diff --git a/libavcodec/mss34dsp.h b/libavcodec/mss34dsp.h
index b2cc550..2f9827d 100644
--- a/libavcodec/mss34dsp.h
+++ b/libavcodec/mss34dsp.h
@@ -2,20 +2,20 @@
  * Common stuff for some Microsoft Screen codecs
  * Copyright (C) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mss4.c b/libavcodec/mss4.c
index 9198acc..c5b8e07 100644
--- a/libavcodec/mss4.c
+++ b/libavcodec/mss4.c
@@ -2,20 +2,20 @@
  * Microsoft Screen 4 (aka Microsoft Expression Encoder Screen) decoder
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -125,7 +125,7 @@ static const uint8_t mss4_vec_entry_vlc_syms[2][9] = {
 #define MAX_ENTRIES  162
 
 typedef struct MSS4Context {
-    AVFrame   *pic;
+    AVFrame    *pic;
 
     VLC        dc_vlc[2], ac_vlc[2];
     VLC        vec_entry_vlc[2];
@@ -363,7 +363,7 @@ static int get_value_cached(GetBitContext *gb, int vec_pos, uint8_t *vec,
     return prev[component];
 }
 
-#define MKVAL(vals)  (vals[0] | (vals[1] << 3) | (vals[2] << 6))
+#define MKVAL(vals)  ((vals)[0] | ((vals)[1] << 3) | ((vals)[2] << 6))
 
 /* Image mode - the hardest to comprehend MSS4 coding mode.
  *
@@ -553,10 +553,8 @@ static int mss4_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_reget_buffer(avctx, c->pic)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, c->pic)) < 0)
         return ret;
-    }
     c->pic->key_frame = (frame_type == INTRA_FRAME);
     c->pic->pict_type = (frame_type == INTRA_FRAME) ? AV_PICTURE_TYPE_I
                                                    : AV_PICTURE_TYPE_P;
@@ -574,7 +572,7 @@ static int mss4_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             ff_mss34_gen_quant_mat(c->quant_mat[i], quality, !i);
     }
 
-    init_get_bits(&gb, buf + HEADER_SIZE, (buf_size - HEADER_SIZE) * 8);
+    init_get_bits8(&gb, buf + HEADER_SIZE, (buf_size - HEADER_SIZE));
 
     mb_width  = FFALIGN(width,  16) >> 4;
     mb_height = FFALIGN(height, 16) >> 4;
diff --git a/libavcodec/msvideo1.c b/libavcodec/msvideo1.c
index 7fd8633..e60abee 100644
--- a/libavcodec/msvideo1.c
+++ b/libavcodec/msvideo1.c
@@ -2,20 +2,20 @@
  * Microsoft Video-1 Decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -67,6 +67,8 @@ static av_cold int msvideo1_decode_init(AVCodecContext *avctx)
     if (s->avctx->bits_per_coded_sample == 8) {
         s->mode_8bit = 1;
         avctx->pix_fmt = AV_PIX_FMT_PAL8;
+        if (avctx->extradata_size >= AVPALETTE_SIZE)
+            memcpy(s->pal, avctx->extradata, AVPALETTE_SIZE);
     } else {
         s->mode_8bit = 0;
         avctx->pix_fmt = AV_PIX_FMT_RGB555;
@@ -300,10 +302,8 @@ static int msvideo1_decode_frame(AVCodecContext *avctx,
     s->buf = buf;
     s->size = buf_size;
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     if (s->mode_8bit) {
         const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, NULL);
diff --git a/libavcodec/msvideo1enc.c b/libavcodec/msvideo1enc.c
new file mode 100644
index 0000000..6852c98
--- /dev/null
+++ b/libavcodec/msvideo1enc.c
@@ -0,0 +1,305 @@
+/*
+ * Microsoft Video-1 Encoder
+ * Copyright (c) 2009 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Microsoft Video-1 encoder
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "bytestream.h"
+#include "libavutil/lfg.h"
+#include "elbg.h"
+#include "libavutil/imgutils.h"
+/**
+ * Encoder context
+ */
+typedef struct Msvideo1EncContext {
+    AVCodecContext *avctx;
+    AVLFG rnd;
+    uint8_t *prev;
+
+    int block[16*3];
+    int block2[16*3];
+    int codebook[8*3];
+    int codebook2[8*3];
+    int output[16*3];
+    int output2[16*3];
+    int avg[3];
+    int bestpos;
+    int keyint;
+} Msvideo1EncContext;
+
+enum MSV1Mode{
+    MODE_SKIP = 0,
+    MODE_FILL,
+    MODE_2COL,
+    MODE_8COL,
+};
+
+#define SKIP_PREFIX 0x8400
+#define SKIPS_MAX 0x03FF
+#define MKRGB555(in, off) (((in)[off] << 10) | ((in)[(off) + 1] << 5) | ((in)[(off) + 2]))
+
+static const int remap[16] = { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 };
+
+static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                               const AVFrame *pict, int *got_packet)
+{
+    Msvideo1EncContext * const c = avctx->priv_data;
+    const AVFrame *p = pict;
+    uint16_t *src;
+    uint8_t *prevptr;
+    uint8_t *dst, *buf;
+    int keyframe = 0;
+    int no_skips = 1;
+    int i, j, k, x, y, ret;
+    int skips = 0;
+    int quality = 24;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width*avctx->height*9 + FF_MIN_BUFFER_SIZE)) < 0)
+        return ret;
+    dst= buf= pkt->data;
+
+    if(!c->prev)
+        c->prev = av_malloc(avctx->width * 3 * (avctx->height + 3));
+    prevptr = c->prev + avctx->width * 3 * (FFALIGN(avctx->height, 4) - 1);
+    src = (uint16_t*)(p->data[0] + p->linesize[0]*(FFALIGN(avctx->height, 4) - 1));
+    if(c->keyint >= avctx->keyint_min)
+        keyframe = 1;
+
+
+    for(y = 0; y < avctx->height; y += 4){
+        for(x = 0; x < avctx->width; x += 4){
+            int bestmode = MODE_SKIP;
+            int bestscore = INT_MAX;
+            int flags = 0;
+            int score;
+
+            for(j = 0; j < 4; j++){
+                for(i = 0; i < 4; i++){
+                    uint16_t val = src[x + i - j*p->linesize[0]/2];
+                    for(k = 0; k < 3; k++){
+                        c->block[(i + j*4)*3 + k] =
+                        c->block2[remap[i + j*4]*3 + k] = (val >> (10-k*5)) & 0x1F;
+                    }
+                }
+            }
+            if(!keyframe){
+                bestscore = 0;
+                for(j = 0; j < 4; j++){
+                    for(i = 0; i < 4*3; i++){
+                        int t = prevptr[x*3 + i - j*3*avctx->width] - c->block[i + j*4*3];
+                        bestscore += t*t;
+                    }
+                }
+                bestscore /= quality;
+            }
+            // try to find optimal value to fill whole 4x4 block
+            score = 0;
+            avpriv_init_elbg(c->block, 3, 16, c->avg, 1, 1, c->output, &c->rnd);
+            avpriv_do_elbg  (c->block, 3, 16, c->avg, 1, 1, c->output, &c->rnd);
+            if(c->avg[0] == 1) // red component = 1 will be written as skip code
+                c->avg[0] = 0;
+            for(j = 0; j < 4; j++){
+                for(i = 0; i < 4; i++){
+                    for(k = 0; k < 3; k++){
+                        int t = c->avg[k] - c->block[(i+j*4)*3+k];
+                        score += t*t;
+                    }
+                }
+            }
+            score /= quality;
+            score += 2;
+            if(score < bestscore){
+                bestscore = score;
+                bestmode = MODE_FILL;
+            }
+            // search for optimal filling of 2-color block
+            score = 0;
+            avpriv_init_elbg(c->block, 3, 16, c->codebook, 2, 1, c->output, &c->rnd);
+            avpriv_do_elbg  (c->block, 3, 16, c->codebook, 2, 1, c->output, &c->rnd);
+            // last output value should be always 1, swap codebooks if needed
+            if(!c->output[15]){
+                for(i = 0; i < 3; i++)
+                    FFSWAP(uint8_t, c->codebook[i], c->codebook[i+3]);
+                for(i = 0; i < 16; i++)
+                    c->output[i] ^= 1;
+            }
+            for(j = 0; j < 4; j++){
+                for(i = 0; i < 4; i++){
+                    for(k = 0; k < 3; k++){
+                        int t = c->codebook[c->output[i+j*4]*3 + k] - c->block[i*3+k+j*4*3];
+                        score += t*t;
+                    }
+                }
+            }
+            score /= quality;
+            score += 6;
+            if(score < bestscore){
+                bestscore = score;
+                bestmode = MODE_2COL;
+            }
+            // search for optimal filling of 2-color 2x2 subblocks
+            score = 0;
+            for(i = 0; i < 4; i++){
+                avpriv_init_elbg(c->block2 + i*4*3, 3, 4, c->codebook2 + i*2*3, 2, 1, c->output2 + i*4, &c->rnd);
+                avpriv_do_elbg  (c->block2 + i*4*3, 3, 4, c->codebook2 + i*2*3, 2, 1, c->output2 + i*4, &c->rnd);
+            }
+            // last value should be always 1, swap codebooks if needed
+            if(!c->output2[15]){
+                for(i = 0; i < 3; i++)
+                    FFSWAP(uint8_t, c->codebook2[i+18], c->codebook2[i+21]);
+                for(i = 12; i < 16; i++)
+                    c->output2[i] ^= 1;
+            }
+            for(j = 0; j < 4; j++){
+                for(i = 0; i < 4; i++){
+                    for(k = 0; k < 3; k++){
+                        int t = c->codebook2[(c->output2[remap[i+j*4]] + (i&2) + (j&2)*2)*3+k] - c->block[i*3+k + j*4*3];
+                        score += t*t;
+                    }
+                }
+            }
+            score /= quality;
+            score += 18;
+            if(score < bestscore){
+                bestscore = score;
+                bestmode = MODE_8COL;
+            }
+
+            if(bestmode == MODE_SKIP){
+                skips++;
+                no_skips = 0;
+            }
+            if((bestmode != MODE_SKIP && skips) || skips == SKIPS_MAX){
+                bytestream_put_le16(&dst, skips | SKIP_PREFIX);
+                skips = 0;
+            }
+
+            switch(bestmode){
+            case MODE_FILL:
+                bytestream_put_le16(&dst, MKRGB555(c->avg,0) | 0x8000);
+                for(j = 0; j < 4; j++)
+                    for(i = 0; i < 4; i++)
+                        for(k = 0; k < 3; k++)
+                            prevptr[x*3 + i*3 + k - j*3*avctx->width] = c->avg[k];
+                break;
+            case MODE_2COL:
+                for(j = 0; j < 4; j++){
+                    for(i = 0; i < 4; i++){
+                        flags |= (c->output[i + j*4]^1) << (i + j*4);
+                        for(k = 0; k < 3; k++)
+                            prevptr[x*3 + i*3 + k - j*3*avctx->width] = c->codebook[c->output[i + j*4]*3 + k];
+                    }
+                }
+                bytestream_put_le16(&dst, flags);
+                bytestream_put_le16(&dst, MKRGB555(c->codebook, 0));
+                bytestream_put_le16(&dst, MKRGB555(c->codebook, 3));
+                break;
+            case MODE_8COL:
+                for(j = 0; j < 4; j++){
+                    for(i = 0; i < 4; i++){
+                        flags |= (c->output2[remap[i + j*4]]^1) << (i + j*4);
+                        for(k = 0; k < 3; k++)
+                            prevptr[x*3 + i*3 + k - j*3*avctx->width] = c->codebook2[(c->output2[remap[i+j*4]] + (i&2) + (j&2)*2)*3 + k];
+                    }
+                }
+                bytestream_put_le16(&dst, flags);
+                bytestream_put_le16(&dst, MKRGB555(c->codebook2, 0) | 0x8000);
+                for(i = 3; i < 24; i += 3)
+                    bytestream_put_le16(&dst, MKRGB555(c->codebook2, i));
+                break;
+            }
+        }
+        src     -= p->linesize[0] << 1;
+        prevptr -= avctx->width * 3 * 4;
+    }
+    if(skips)
+        bytestream_put_le16(&dst, skips | SKIP_PREFIX);
+    //EOF
+    bytestream_put_byte(&dst, 0);
+    bytestream_put_byte(&dst, 0);
+
+    if(no_skips)
+        keyframe = 1;
+    if(keyframe)
+        c->keyint = 0;
+    else
+        c->keyint++;
+    if (keyframe) pkt->flags |= AV_PKT_FLAG_KEY;
+    pkt->size = dst - buf;
+    *got_packet = 1;
+
+    return 0;
+}
+
+
+/**
+ * init encoder
+ */
+static av_cold int encode_init(AVCodecContext *avctx)
+{
+    Msvideo1EncContext * const c = avctx->priv_data;
+
+    c->avctx = avctx;
+    if (av_image_check_size(avctx->width, avctx->height, 0, avctx) < 0) {
+        return -1;
+    }
+    if((avctx->width&3) || (avctx->height&3)){
+        av_log(avctx, AV_LOG_ERROR, "width and height must be multiplies of 4\n");
+        return -1;
+    }
+
+    avctx->bits_per_coded_sample = 16;
+
+    c->keyint = avctx->keyint_min;
+    av_lfg_init(&c->rnd, 1);
+
+    return 0;
+}
+
+
+
+/**
+ * Uninit encoder
+ */
+static av_cold int encode_end(AVCodecContext *avctx)
+{
+    Msvideo1EncContext * const c = avctx->priv_data;
+
+    av_freep(&c->prev);
+
+    return 0;
+}
+
+AVCodec ff_msvideo1_encoder = {
+    .name           = "msvideo1",
+    .long_name = NULL_IF_CONFIG_SMALL("Microsoft Video-1"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_MSVIDEO1,
+    .priv_data_size = sizeof(Msvideo1EncContext),
+    .init           = encode_init,
+    .encode2        = encode_frame,
+    .close          = encode_end,
+    .pix_fmts = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB555, AV_PIX_FMT_NONE},
+};
diff --git a/libavcodec/mvcdec.c b/libavcodec/mvcdec.c
index 7cc0329..69f0ee2 100644
--- a/libavcodec/mvcdec.c
+++ b/libavcodec/mvcdec.c
@@ -2,20 +2,20 @@
  * Silicon Graphics Motion Video Compressor 1 & 2 decoder
  * Copyright (c) 2012 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/mxpegdec.c b/libavcodec/mxpegdec.c
index bfaae34..fbd4767 100644
--- a/libavcodec/mxpegdec.c
+++ b/libavcodec/mxpegdec.c
@@ -2,20 +2,20 @@
  * MxPEG decoder
  * Copyright (c) 2011 Anatoly Nenashev
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -54,6 +54,7 @@ static av_cold int mxpeg_decode_end(AVCodecContext *avctx)
     for (i = 0; i < 2; ++i)
         av_frame_free(&s->picture[i]);
 
+    s->bitmask_size = 0;
     av_freep(&s->mxm_bitmask);
     av_freep(&s->completion_bitmask);
 
@@ -105,6 +106,7 @@ static int mxpeg_decode_mxm(MXpegDecodeContext *s,
     }
 
     if (s->bitmask_size != bitmask_size) {
+        s->bitmask_size = 0;
         av_freep(&s->mxm_bitmask);
         s->mxm_bitmask = av_malloc(bitmask_size);
         if (!s->mxm_bitmask) {
@@ -272,11 +274,9 @@ static int mxpeg_decode_frame(AVCodecContext *avctx,
                     }
                     /* use stored SOF data to allocate current picture */
                     av_frame_unref(jpg->picture_ptr);
-                    if (ff_get_buffer(avctx, jpg->picture_ptr,
-                                      AV_GET_BUFFER_FLAG_REF) < 0) {
-                        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-                        return AVERROR(ENOMEM);
-                    }
+                    if ((ret = ff_get_buffer(avctx, jpg->picture_ptr,
+                                             AV_GET_BUFFER_FLAG_REF)) < 0)
+                        return ret;
                     jpg->picture_ptr->pict_type = AV_PICTURE_TYPE_P;
                     jpg->picture_ptr->key_frame = 0;
                     jpg->got_picture = 1;
@@ -292,17 +292,15 @@ static int mxpeg_decode_frame(AVCodecContext *avctx,
 
                     /* allocate dummy reference picture if needed */
                     if (!reference_ptr->data[0] &&
-                        ff_get_buffer(avctx, reference_ptr,
-                                      AV_GET_BUFFER_FLAG_REF) < 0) {
-                        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-                        return AVERROR(ENOMEM);
-                    }
+                        (ret = ff_get_buffer(avctx, reference_ptr,
+                                             AV_GET_BUFFER_FLAG_REF)) < 0)
+                        return ret;
 
-                    ret = ff_mjpeg_decode_sos(jpg, s->mxm_bitmask, reference_ptr);
+                    ret = ff_mjpeg_decode_sos(jpg, s->mxm_bitmask, s->bitmask_size, reference_ptr);
                     if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
                         return ret;
                 } else {
-                    ret = ff_mjpeg_decode_sos(jpg, NULL, NULL);
+                    ret = ff_mjpeg_decode_sos(jpg, NULL, 0, NULL);
                     if (ret < 0 && (avctx->err_recognition & AV_EF_EXPLODE))
                         return ret;
                 }
@@ -346,4 +344,5 @@ AVCodec ff_mxpeg_decoder = {
     .close          = mxpeg_decode_end,
     .decode         = mxpeg_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
 };
diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c
index 3b9b77c..ef16fd6 100644
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -142,16 +142,19 @@ static int decode_tag(AVCodecContext *avctx, void *data,
 {
     AVFrame *frame     = data;
     const uint8_t *buf = avpkt->data;
+    const uint8_t *side=av_packet_get_side_data(avpkt, 'F', NULL);
     int buf_size = avpkt->size;
     NellyMoserDecodeContext *s = avctx->priv_data;
     int blocks, i, ret;
     float   *samples_flt;
 
     blocks     = buf_size / NELLY_BLOCK_LEN;
+
     if (blocks <= 0) {
         av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
         return AVERROR_INVALIDDATA;
     }
+
     if (buf_size % NELLY_BLOCK_LEN) {
         av_log(avctx, AV_LOG_WARNING, "Leftover bytes: %d.\n",
                buf_size % NELLY_BLOCK_LEN);
@@ -163,13 +166,13 @@ static int decode_tag(AVCodecContext *avctx, void *data,
      * 22050 Hz - 4
      * 44100 Hz - 8
      */
+    if(side && blocks>1 && avctx->sample_rate%11025==0 && (1<<((side[0]>>2)&3)) == blocks)
+        avctx->sample_rate= 11025*(blocks/2);
 
     /* get output buffer */
     frame->nb_samples = NELLY_SAMPLES * blocks;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples_flt = (float *)frame->data[0];
 
     for (i=0 ; i<blocks ; i++) {
diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c
index 5732163..98e33f0 100644
--- a/libavcodec/nellymoserenc.c
+++ b/libavcodec/nellymoserenc.c
@@ -4,20 +4,20 @@
  *
  * Copyright (c) 2008 Bartlomiej Wolowiec
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,7 +28,7 @@
  *
  * Generic codec information: libavcodec/nellymoserdec.c
  *
- * Some information also from: http://samples.libav.org/A-codecs/Nelly_Moser/ASAO/ASAO.zip
+ * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
  *                             (Copyright Joseph Artsimovich and UAB "DKD")
  *
  * for more information about nellymoser format, visit:
@@ -62,8 +62,8 @@ typedef struct NellyMoserEncodeContext {
     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
-    float           (*opt )[NELLY_BANDS];
-    uint8_t         (*path)[NELLY_BANDS];
+    float           (*opt )[OPT_SIZE];
+    uint8_t         (*path)[OPT_SIZE];
 } NellyMoserEncodeContext;
 
 static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
@@ -173,7 +173,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 
     /* Generate overlap window */
-    ff_sine_window_init(ff_sine_128, 128);
+    ff_init_ff_sine_windows(7);
     for (i = 0; i < POW_TABLE_SIZE; i++)
         pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
 
@@ -228,8 +228,8 @@ static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *i
     int i, j, band, best_idx;
     float power_candidate, best_val;
 
-    float  (*opt )[NELLY_BANDS] = s->opt ;
-    uint8_t(*path)[NELLY_BANDS] = s->path;
+    float  (*opt )[OPT_SIZE] = s->opt ;
+    uint8_t(*path)[OPT_SIZE] = s->path;
 
     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
         opt[0][i] = INFINITY;
@@ -392,10 +392,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         s->last_frame = 1;
     }
 
-    if ((ret = ff_alloc_packet(avpkt, NELLY_BLOCK_LEN))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
         return ret;
-    }
     encode_block(s, avpkt->data, avpkt->size);
 
     /* Get the next frame pts/duration */
diff --git a/libavcodec/neon/mpegvideo.c b/libavcodec/neon/mpegvideo.c
index cb9bd66..f569725 100644
--- a/libavcodec/neon/mpegvideo.c
+++ b/libavcodec/neon/mpegvideo.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2010 Mans Rullgard
  * Copyright (c) 2014 James Yu <james.yu@linaro.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/noise_bsf.c b/libavcodec/noise_bsf.c
index 3e552e2..4f609de 100644
--- a/libavcodec/noise_bsf.c
+++ b/libavcodec/noise_bsf.c
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,7 +32,12 @@ static int noise(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const ch
     int amount= args ? atoi(args) : (*state % 10001+1);
     int i;
 
+    if(amount <= 0)
+        return AVERROR(EINVAL);
+
     *poutbuf= av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!*poutbuf)
+        return AVERROR(ENOMEM);
 
     memcpy(*poutbuf, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
     for(i=0; i<buf_size; i++){
@@ -44,7 +49,7 @@ static int noise(AVBitStreamFilterContext *bsfc, AVCodecContext *avctx, const ch
 }
 
 AVBitStreamFilter ff_noise_bsf={
-    "noise",
-    sizeof(int),
-    noise,
+    .name           = "noise",
+    .priv_data_size = sizeof(int),
+    .filter         = noise,
 };
diff --git a/libavcodec/nuv.c b/libavcodec/nuv.c
index c31ff11..048ad6f 100644
--- a/libavcodec/nuv.c
+++ b/libavcodec/nuv.c
@@ -2,25 +2,26 @@
  * NuppelVideo decoder
  * Copyright (c) 2006 Reimar Doeffinger
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <limits.h>
 
 #include "libavutil/bswap.h"
 #include "libavutil/common.h"
@@ -121,23 +122,26 @@ static int codec_reinit(AVCodecContext *avctx, int width, int height,
     if (quality >= 0)
         get_quant_quality(c, quality);
     if (width != c->width || height != c->height) {
-        void *ptr;
+        // also reserve space for a possible additional header
+        int buf_size = height * width * 3 / 2
+                     + FFMAX(AV_LZO_OUTPUT_PADDING, FF_INPUT_BUFFER_PADDING_SIZE)
+                     + RTJPEG_HEADER_SIZE;
+        if (buf_size > INT_MAX/8)
+            return -1;
         if ((ret = av_image_check_size(height, width, 0, avctx)) < 0)
             return ret;
         avctx->width  = c->width  = width;
         avctx->height = c->height = height;
-        ptr = av_fast_realloc(c->decomp_buf, &c->decomp_size,
-                              c->height * c->width * 3 / 2 +
-                              FF_INPUT_BUFFER_PADDING_SIZE +
-                              RTJPEG_HEADER_SIZE);
-        if (!ptr) {
+        av_fast_malloc(&c->decomp_buf, &c->decomp_size,
+                       buf_size);
+        if (!c->decomp_buf) {
             av_log(avctx, AV_LOG_ERROR,
                    "Can't allocate decompression buffer.\n");
             return AVERROR(ENOMEM);
-        } else
-            c->decomp_buf = ptr;
+        }
         ff_rtjpeg_decode_init(&c->rtj, c->width, c->height, c->lq, c->cq);
         av_frame_unref(c->pic);
+        return 1;
     } else if (quality != c->quality)
         ff_rtjpeg_decode_init(&c->rtj, c->width, c->height, c->lq, c->cq);
 
@@ -153,6 +157,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     AVFrame *picture   = data;
     int orig_size      = buf_size;
     int keyframe, ret;
+    int size_change = 0;
     int result, init_frame = !avctx->frame_number;
     enum {
         NUV_UNCOMPRESSED  = '0',
@@ -181,7 +186,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return orig_size;
     }
 
-    if (buf[0] != 'V' || buf_size < 12) {
+    if (buf_size < 12 || buf[0] != 'V') {
         av_log(avctx, AV_LOG_ERROR, "not a nuv video frame\n");
         return AVERROR_INVALIDDATA;
     }
@@ -198,24 +203,32 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         keyframe = 1;
         break;
     }
+retry:
     // skip rest of the frameheader.
     buf       = &buf[12];
     buf_size -= 12;
     if (comptype == NUV_RTJPEG_IN_LZO || comptype == NUV_LZO) {
-        int outlen = c->decomp_size - FF_INPUT_BUFFER_PADDING_SIZE;
+        int outlen = c->decomp_size - FFMAX(FF_INPUT_BUFFER_PADDING_SIZE, AV_LZO_OUTPUT_PADDING);
         int inlen  = buf_size;
         if (av_lzo1x_decode(c->decomp_buf, &outlen, buf, &inlen)) {
             av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n");
             return AVERROR_INVALIDDATA;
         }
         buf      = c->decomp_buf;
-        buf_size = outlen;
+        buf_size = c->decomp_size - FFMAX(FF_INPUT_BUFFER_PADDING_SIZE, AV_LZO_OUTPUT_PADDING) - outlen;
+        memset(c->decomp_buf + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
     }
     if (c->codec_frameheader) {
         int w, h, q;
-        if (buf_size < RTJPEG_HEADER_SIZE || buf[4] != RTJPEG_HEADER_SIZE ||
-            buf[5] != RTJPEG_FILE_VERSION) {
-            av_log(avctx, AV_LOG_ERROR, "invalid nuv video frame\n");
+        if (buf_size < RTJPEG_HEADER_SIZE) {
+            av_log(avctx, AV_LOG_ERROR, "Too small NUV video frame\n");
+            return AVERROR_INVALIDDATA;
+        }
+        // There seem to exist two variants of this header: one starts with 'V'
+        // and 5 bytes unknown, the other matches current MythTV and is 4 bytes size,
+        // 1 byte header size (== 12), 1 byte version (== 0)
+        if (buf[0] != 'V' && AV_RL16(&buf[4]) != 0x000c) {
+            av_log(avctx, AV_LOG_ERROR, "Unknown secondary frame header (wrong codec_tag?)\n");
             return AVERROR_INVALIDDATA;
         }
         w = AV_RL16(&buf[6]);
@@ -223,22 +236,23 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         q = buf[10];
         if ((result = codec_reinit(avctx, w, h, q)) < 0)
             return result;
-        if (comptype == NUV_RTJPEG_IN_LZO || comptype == NUV_LZO)
-            buf = c->decomp_buf;
+        if (result) {
+            buf = avpkt->data;
+            buf_size = avpkt->size;
+            size_change = 1;
+            goto retry;
+        }
         buf       = &buf[RTJPEG_HEADER_SIZE];
         buf_size -= RTJPEG_HEADER_SIZE;
     }
 
-    if (keyframe) {
+    if (size_change || keyframe) {
         av_frame_unref(c->pic);
         init_frame = 1;
     }
 
-    result = ff_reget_buffer(avctx, c->pic);
-    if (result < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((result = ff_reget_buffer(avctx, c->pic)) < 0)
         return result;
-    }
     if (init_frame) {
         memset(c->pic->data[0], 0,    avctx->height * c->pic->linesize[0]);
         memset(c->pic->data[1], 0x80, avctx->height * c->pic->linesize[1] / 2);
@@ -256,7 +270,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             av_log(avctx, AV_LOG_ERROR, "uncompressed frame too short\n");
             height = buf_size / c->width / 3 * 2;
         }
-        copy_frame(c->pic, buf, c->width, height);
+        if(height > 0)
+            copy_frame(c->pic, buf, c->width, height);
         break;
     }
     case NUV_RTJPEG_IN_LZO:
diff --git a/libavcodec/old_codec_ids.h b/libavcodec/old_codec_ids.h
new file mode 100644
index 0000000..c7aa0e0
--- /dev/null
+++ b/libavcodec/old_codec_ids.h
@@ -0,0 +1,397 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_OLD_CODEC_IDS_H
+#define AVCODEC_OLD_CODEC_IDS_H
+
+/*
+ * This header exists to prevent new codec IDs from being accidentally added to
+ * the deprecated list.
+ * Do not include it directly. It will be removed on next major bump
+ *
+ * Do not add new items to this list. Use the AVCodecID enum instead.
+ */
+
+    CODEC_ID_NONE = AV_CODEC_ID_NONE,
+
+    /* video codecs */
+    CODEC_ID_MPEG1VIDEO,
+    CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
+#if FF_API_XVMC
+    CODEC_ID_MPEG2VIDEO_XVMC,
+#endif
+    CODEC_ID_H261,
+    CODEC_ID_H263,
+    CODEC_ID_RV10,
+    CODEC_ID_RV20,
+    CODEC_ID_MJPEG,
+    CODEC_ID_MJPEGB,
+    CODEC_ID_LJPEG,
+    CODEC_ID_SP5X,
+    CODEC_ID_JPEGLS,
+    CODEC_ID_MPEG4,
+    CODEC_ID_RAWVIDEO,
+    CODEC_ID_MSMPEG4V1,
+    CODEC_ID_MSMPEG4V2,
+    CODEC_ID_MSMPEG4V3,
+    CODEC_ID_WMV1,
+    CODEC_ID_WMV2,
+    CODEC_ID_H263P,
+    CODEC_ID_H263I,
+    CODEC_ID_FLV1,
+    CODEC_ID_SVQ1,
+    CODEC_ID_SVQ3,
+    CODEC_ID_DVVIDEO,
+    CODEC_ID_HUFFYUV,
+    CODEC_ID_CYUV,
+    CODEC_ID_H264,
+    CODEC_ID_INDEO3,
+    CODEC_ID_VP3,
+    CODEC_ID_THEORA,
+    CODEC_ID_ASV1,
+    CODEC_ID_ASV2,
+    CODEC_ID_FFV1,
+    CODEC_ID_4XM,
+    CODEC_ID_VCR1,
+    CODEC_ID_CLJR,
+    CODEC_ID_MDEC,
+    CODEC_ID_ROQ,
+    CODEC_ID_INTERPLAY_VIDEO,
+    CODEC_ID_XAN_WC3,
+    CODEC_ID_XAN_WC4,
+    CODEC_ID_RPZA,
+    CODEC_ID_CINEPAK,
+    CODEC_ID_WS_VQA,
+    CODEC_ID_MSRLE,
+    CODEC_ID_MSVIDEO1,
+    CODEC_ID_IDCIN,
+    CODEC_ID_8BPS,
+    CODEC_ID_SMC,
+    CODEC_ID_FLIC,
+    CODEC_ID_TRUEMOTION1,
+    CODEC_ID_VMDVIDEO,
+    CODEC_ID_MSZH,
+    CODEC_ID_ZLIB,
+    CODEC_ID_QTRLE,
+    CODEC_ID_TSCC,
+    CODEC_ID_ULTI,
+    CODEC_ID_QDRAW,
+    CODEC_ID_VIXL,
+    CODEC_ID_QPEG,
+    CODEC_ID_PNG,
+    CODEC_ID_PPM,
+    CODEC_ID_PBM,
+    CODEC_ID_PGM,
+    CODEC_ID_PGMYUV,
+    CODEC_ID_PAM,
+    CODEC_ID_FFVHUFF,
+    CODEC_ID_RV30,
+    CODEC_ID_RV40,
+    CODEC_ID_VC1,
+    CODEC_ID_WMV3,
+    CODEC_ID_LOCO,
+    CODEC_ID_WNV1,
+    CODEC_ID_AASC,
+    CODEC_ID_INDEO2,
+    CODEC_ID_FRAPS,
+    CODEC_ID_TRUEMOTION2,
+    CODEC_ID_BMP,
+    CODEC_ID_CSCD,
+    CODEC_ID_MMVIDEO,
+    CODEC_ID_ZMBV,
+    CODEC_ID_AVS,
+    CODEC_ID_SMACKVIDEO,
+    CODEC_ID_NUV,
+    CODEC_ID_KMVC,
+    CODEC_ID_FLASHSV,
+    CODEC_ID_CAVS,
+    CODEC_ID_JPEG2000,
+    CODEC_ID_VMNC,
+    CODEC_ID_VP5,
+    CODEC_ID_VP6,
+    CODEC_ID_VP6F,
+    CODEC_ID_TARGA,
+    CODEC_ID_DSICINVIDEO,
+    CODEC_ID_TIERTEXSEQVIDEO,
+    CODEC_ID_TIFF,
+    CODEC_ID_GIF,
+    CODEC_ID_DXA,
+    CODEC_ID_DNXHD,
+    CODEC_ID_THP,
+    CODEC_ID_SGI,
+    CODEC_ID_C93,
+    CODEC_ID_BETHSOFTVID,
+    CODEC_ID_PTX,
+    CODEC_ID_TXD,
+    CODEC_ID_VP6A,
+    CODEC_ID_AMV,
+    CODEC_ID_VB,
+    CODEC_ID_PCX,
+    CODEC_ID_SUNRAST,
+    CODEC_ID_INDEO4,
+    CODEC_ID_INDEO5,
+    CODEC_ID_MIMIC,
+    CODEC_ID_RL2,
+    CODEC_ID_ESCAPE124,
+    CODEC_ID_DIRAC,
+    CODEC_ID_BFI,
+    CODEC_ID_CMV,
+    CODEC_ID_MOTIONPIXELS,
+    CODEC_ID_TGV,
+    CODEC_ID_TGQ,
+    CODEC_ID_TQI,
+    CODEC_ID_AURA,
+    CODEC_ID_AURA2,
+    CODEC_ID_V210X,
+    CODEC_ID_TMV,
+    CODEC_ID_V210,
+    CODEC_ID_DPX,
+    CODEC_ID_MAD,
+    CODEC_ID_FRWU,
+    CODEC_ID_FLASHSV2,
+    CODEC_ID_CDGRAPHICS,
+    CODEC_ID_R210,
+    CODEC_ID_ANM,
+    CODEC_ID_BINKVIDEO,
+    CODEC_ID_IFF_ILBM,
+    CODEC_ID_IFF_BYTERUN1,
+    CODEC_ID_KGV1,
+    CODEC_ID_YOP,
+    CODEC_ID_VP8,
+    CODEC_ID_PICTOR,
+    CODEC_ID_ANSI,
+    CODEC_ID_A64_MULTI,
+    CODEC_ID_A64_MULTI5,
+    CODEC_ID_R10K,
+    CODEC_ID_MXPEG,
+    CODEC_ID_LAGARITH,
+    CODEC_ID_PRORES,
+    CODEC_ID_JV,
+    CODEC_ID_DFA,
+    CODEC_ID_WMV3IMAGE,
+    CODEC_ID_VC1IMAGE,
+    CODEC_ID_UTVIDEO,
+    CODEC_ID_BMV_VIDEO,
+    CODEC_ID_VBLE,
+    CODEC_ID_DXTORY,
+    CODEC_ID_V410,
+    CODEC_ID_XWD,
+    CODEC_ID_CDXL,
+    CODEC_ID_XBM,
+    CODEC_ID_ZEROCODEC,
+    CODEC_ID_MSS1,
+    CODEC_ID_MSA1,
+    CODEC_ID_TSCC2,
+    CODEC_ID_MTS2,
+    CODEC_ID_CLLC,
+    CODEC_ID_Y41P       = MKBETAG('Y','4','1','P'),
+    CODEC_ID_ESCAPE130  = MKBETAG('E','1','3','0'),
+    CODEC_ID_EXR        = MKBETAG('0','E','X','R'),
+    CODEC_ID_AVRP       = MKBETAG('A','V','R','P'),
+
+    CODEC_ID_G2M        = MKBETAG( 0 ,'G','2','M'),
+    CODEC_ID_AVUI       = MKBETAG('A','V','U','I'),
+    CODEC_ID_AYUV       = MKBETAG('A','Y','U','V'),
+    CODEC_ID_V308       = MKBETAG('V','3','0','8'),
+    CODEC_ID_V408       = MKBETAG('V','4','0','8'),
+    CODEC_ID_YUV4       = MKBETAG('Y','U','V','4'),
+    CODEC_ID_SANM       = MKBETAG('S','A','N','M'),
+    CODEC_ID_PAF_VIDEO  = MKBETAG('P','A','F','V'),
+    CODEC_ID_SNOW       = AV_CODEC_ID_SNOW,
+
+    /* various PCM "codecs" */
+    CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
+    CODEC_ID_PCM_S16LE = 0x10000,
+    CODEC_ID_PCM_S16BE,
+    CODEC_ID_PCM_U16LE,
+    CODEC_ID_PCM_U16BE,
+    CODEC_ID_PCM_S8,
+    CODEC_ID_PCM_U8,
+    CODEC_ID_PCM_MULAW,
+    CODEC_ID_PCM_ALAW,
+    CODEC_ID_PCM_S32LE,
+    CODEC_ID_PCM_S32BE,
+    CODEC_ID_PCM_U32LE,
+    CODEC_ID_PCM_U32BE,
+    CODEC_ID_PCM_S24LE,
+    CODEC_ID_PCM_S24BE,
+    CODEC_ID_PCM_U24LE,
+    CODEC_ID_PCM_U24BE,
+    CODEC_ID_PCM_S24DAUD,
+    CODEC_ID_PCM_ZORK,
+    CODEC_ID_PCM_S16LE_PLANAR,
+    CODEC_ID_PCM_DVD,
+    CODEC_ID_PCM_F32BE,
+    CODEC_ID_PCM_F32LE,
+    CODEC_ID_PCM_F64BE,
+    CODEC_ID_PCM_F64LE,
+    CODEC_ID_PCM_BLURAY,
+    CODEC_ID_PCM_LXF,
+    CODEC_ID_S302M,
+    CODEC_ID_PCM_S8_PLANAR,
+
+    /* various ADPCM codecs */
+    CODEC_ID_ADPCM_IMA_QT = 0x11000,
+    CODEC_ID_ADPCM_IMA_WAV,
+    CODEC_ID_ADPCM_IMA_DK3,
+    CODEC_ID_ADPCM_IMA_DK4,
+    CODEC_ID_ADPCM_IMA_WS,
+    CODEC_ID_ADPCM_IMA_SMJPEG,
+    CODEC_ID_ADPCM_MS,
+    CODEC_ID_ADPCM_4XM,
+    CODEC_ID_ADPCM_XA,
+    CODEC_ID_ADPCM_ADX,
+    CODEC_ID_ADPCM_EA,
+    CODEC_ID_ADPCM_G726,
+    CODEC_ID_ADPCM_CT,
+    CODEC_ID_ADPCM_SWF,
+    CODEC_ID_ADPCM_YAMAHA,
+    CODEC_ID_ADPCM_SBPRO_4,
+    CODEC_ID_ADPCM_SBPRO_3,
+    CODEC_ID_ADPCM_SBPRO_2,
+    CODEC_ID_ADPCM_THP,
+    CODEC_ID_ADPCM_IMA_AMV,
+    CODEC_ID_ADPCM_EA_R1,
+    CODEC_ID_ADPCM_EA_R3,
+    CODEC_ID_ADPCM_EA_R2,
+    CODEC_ID_ADPCM_IMA_EA_SEAD,
+    CODEC_ID_ADPCM_IMA_EA_EACS,
+    CODEC_ID_ADPCM_EA_XAS,
+    CODEC_ID_ADPCM_EA_MAXIS_XA,
+    CODEC_ID_ADPCM_IMA_ISS,
+    CODEC_ID_ADPCM_G722,
+    CODEC_ID_ADPCM_IMA_APC,
+    CODEC_ID_VIMA       = MKBETAG('V','I','M','A'),
+
+    /* AMR */
+    CODEC_ID_AMR_NB = 0x12000,
+    CODEC_ID_AMR_WB,
+
+    /* RealAudio codecs*/
+    CODEC_ID_RA_144 = 0x13000,
+    CODEC_ID_RA_288,
+
+    /* various DPCM codecs */
+    CODEC_ID_ROQ_DPCM = 0x14000,
+    CODEC_ID_INTERPLAY_DPCM,
+    CODEC_ID_XAN_DPCM,
+    CODEC_ID_SOL_DPCM,
+
+    /* audio codecs */
+    CODEC_ID_MP2 = 0x15000,
+    CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3
+    CODEC_ID_AAC,
+    CODEC_ID_AC3,
+    CODEC_ID_DTS,
+    CODEC_ID_VORBIS,
+    CODEC_ID_DVAUDIO,
+    CODEC_ID_WMAV1,
+    CODEC_ID_WMAV2,
+    CODEC_ID_MACE3,
+    CODEC_ID_MACE6,
+    CODEC_ID_VMDAUDIO,
+    CODEC_ID_FLAC,
+    CODEC_ID_MP3ADU,
+    CODEC_ID_MP3ON4,
+    CODEC_ID_SHORTEN,
+    CODEC_ID_ALAC,
+    CODEC_ID_WESTWOOD_SND1,
+    CODEC_ID_GSM, ///< as in Berlin toast format
+    CODEC_ID_QDM2,
+    CODEC_ID_COOK,
+    CODEC_ID_TRUESPEECH,
+    CODEC_ID_TTA,
+    CODEC_ID_SMACKAUDIO,
+    CODEC_ID_QCELP,
+    CODEC_ID_WAVPACK,
+    CODEC_ID_DSICINAUDIO,
+    CODEC_ID_IMC,
+    CODEC_ID_MUSEPACK7,
+    CODEC_ID_MLP,
+    CODEC_ID_GSM_MS, /* as found in WAV */
+    CODEC_ID_ATRAC3,
+    CODEC_ID_VOXWARE,
+    CODEC_ID_APE,
+    CODEC_ID_NELLYMOSER,
+    CODEC_ID_MUSEPACK8,
+    CODEC_ID_SPEEX,
+    CODEC_ID_WMAVOICE,
+    CODEC_ID_WMAPRO,
+    CODEC_ID_WMALOSSLESS,
+    CODEC_ID_ATRAC3P,
+    CODEC_ID_EAC3,
+    CODEC_ID_SIPR,
+    CODEC_ID_MP1,
+    CODEC_ID_TWINVQ,
+    CODEC_ID_TRUEHD,
+    CODEC_ID_MP4ALS,
+    CODEC_ID_ATRAC1,
+    CODEC_ID_BINKAUDIO_RDFT,
+    CODEC_ID_BINKAUDIO_DCT,
+    CODEC_ID_AAC_LATM,
+    CODEC_ID_QDMC,
+    CODEC_ID_CELT,
+    CODEC_ID_G723_1,
+    CODEC_ID_G729,
+    CODEC_ID_8SVX_EXP,
+    CODEC_ID_8SVX_FIB,
+    CODEC_ID_BMV_AUDIO,
+    CODEC_ID_RALF,
+    CODEC_ID_IAC,
+    CODEC_ID_ILBC,
+    CODEC_ID_FFWAVESYNTH = MKBETAG('F','F','W','S'),
+    CODEC_ID_SONIC       = MKBETAG('S','O','N','C'),
+    CODEC_ID_SONIC_LS    = MKBETAG('S','O','N','L'),
+    CODEC_ID_PAF_AUDIO   = MKBETAG('P','A','F','A'),
+    CODEC_ID_OPUS        = MKBETAG('O','P','U','S'),
+
+    /* subtitle codecs */
+    CODEC_ID_FIRST_SUBTITLE = 0x17000,          ///< A dummy ID pointing at the start of subtitle codecs.
+    CODEC_ID_DVD_SUBTITLE = 0x17000,
+    CODEC_ID_DVB_SUBTITLE,
+    CODEC_ID_TEXT,  ///< raw UTF-8 text
+    CODEC_ID_XSUB,
+    CODEC_ID_SSA,
+    CODEC_ID_MOV_TEXT,
+    CODEC_ID_HDMV_PGS_SUBTITLE,
+    CODEC_ID_DVB_TELETEXT,
+    CODEC_ID_SRT,
+    CODEC_ID_MICRODVD   = MKBETAG('m','D','V','D'),
+    CODEC_ID_EIA_608    = MKBETAG('c','6','0','8'),
+    CODEC_ID_JACOSUB    = MKBETAG('J','S','U','B'),
+    CODEC_ID_SAMI       = MKBETAG('S','A','M','I'),
+    CODEC_ID_REALTEXT   = MKBETAG('R','T','X','T'),
+    CODEC_ID_SUBVIEWER  = MKBETAG('S','u','b','V'),
+
+    /* other specific kind of codecs (generally used for attachments) */
+    CODEC_ID_FIRST_UNKNOWN = 0x18000,           ///< A dummy ID pointing at the start of various fake codecs.
+    CODEC_ID_TTF = 0x18000,
+    CODEC_ID_BINTEXT    = MKBETAG('B','T','X','T'),
+    CODEC_ID_XBIN       = MKBETAG('X','B','I','N'),
+    CODEC_ID_IDF        = MKBETAG( 0 ,'I','D','F'),
+    CODEC_ID_OTF        = MKBETAG( 0 ,'O','T','F'),
+
+    CODEC_ID_PROBE = 0x19000, ///< codec_id is not known (like CODEC_ID_NONE) but lavf should attempt to identify it
+
+    CODEC_ID_MPEG2TS = 0x20000, /**< _FAKE_ codec to indicate a raw MPEG-2 TS
+                                * stream (only used by libavformat) */
+    CODEC_ID_MPEG4SYSTEMS = 0x20001, /**< _FAKE_ codec to indicate a MPEG-4 Systems
+                                * stream (only used by libavformat) */
+    CODEC_ID_FFMETADATA = 0x21000,   ///< Dummy codec for streams containing only metadata information.
+
+#endif /* AVCODEC_OLD_CODEC_IDS_H */
diff --git a/libavcodec/on2avc.c b/libavcodec/on2avc.c
index 60f451c..ab6048b 100644
--- a/libavcodec/on2avc.c
+++ b/libavcodec/on2avc.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2013 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -172,7 +172,7 @@ static int on2avc_decode_band_scales(On2AVCContext *c, GetBitContext *gb)
             } else {
                 scale += get_vlc2(gb, c->scale_diff.table, 9, 3) - 60;
             }
-            if (scale < 0 || scale > 128) {
+            if (scale < 0 || scale > 127) {
                 av_log(c->avctx, AV_LOG_ERROR, "Invalid scale value %d\n",
                        scale);
                 return AVERROR_INVALIDDATA;
@@ -313,7 +313,7 @@ static void zero_head_and_tail(float *src, int len, int order0, int order1)
 }
 
 static void pretwiddle(float *src, float *dst, int dst_len, int tab_step,
-                       int step, int order0, int order1, const double **tabs)
+                       int step, int order0, int order1, const double * const *tabs)
 {
     float *src2, *out;
     const double *tab;
@@ -341,7 +341,7 @@ static void pretwiddle(float *src, float *dst, int dst_len, int tab_step,
 
 static void twiddle(float *src1, float *src2, int src2_len,
                     const double *tab, int tab_len, int step,
-                    int order0, int order1, const double **tabs)
+                    int order0, int order1, const double * const *tabs)
 {
     int steps;
     int mask;
diff --git a/libavcodec/on2avcdata.c b/libavcodec/on2avcdata.c
index d039f23..abe5983 100644
--- a/libavcodec/on2avcdata.c
+++ b/libavcodec/on2avcdata.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2013 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -7641,11 +7641,11 @@ static const double tabs_4_10[4 * 2][10] = {
      -0.099339873,     -0.041293536,    0.31028851,     0.17727433,    -0.92756648 }
 };
 
-const double *ff_on2avc_tabs_4_10_1[4] = {
+const double * const ff_on2avc_tabs_4_10_1[4] = {
     tabs_4_10[0], tabs_4_10[1], tabs_4_10[2], tabs_4_10[3]
 };
 
-const double *ff_on2avc_tabs_4_10_2[4] = {
+const double * const ff_on2avc_tabs_4_10_2[4] = {
     tabs_4_10[4], tabs_4_10[5], tabs_4_10[6], tabs_4_10[7]
 };
 
@@ -7724,12 +7724,12 @@ static const double tabs_9_20[9 * 2][20] = {
       0.22783, 0.058894795, -0.61350902, 0.69559873, -0.27013783, }
 };
 
-const double* ff_on2avc_tabs_9_20_1[9] = {
+const double* const ff_on2avc_tabs_9_20_1[9] = {
     tabs_9_20[0], tabs_9_20[1], tabs_9_20[2], tabs_9_20[3], tabs_9_20[4],
     tabs_9_20[5], tabs_9_20[6], tabs_9_20[7], tabs_9_20[8]
 };
 
-const double* ff_on2avc_tabs_9_20_2[9] = {
+const double* const ff_on2avc_tabs_9_20_2[9] = {
     tabs_9_20[ 9], tabs_9_20[10], tabs_9_20[11], tabs_9_20[12], tabs_9_20[13],
     tabs_9_20[14], tabs_9_20[15], tabs_9_20[16], tabs_9_20[17]
 };
@@ -7927,7 +7927,7 @@ static const double tabs_19_40[19 * 2][40] = {
       0.019871848, -0.11989559, 0.036659135, 0.26632201, -0.3057397, -0.23220335, 0.68741352, -0.54024027, }
 };
 
-const double* ff_on2avc_tabs_19_40_1[19] = {
+const double* const ff_on2avc_tabs_19_40_1[19] = {
     tabs_19_40[ 0], tabs_19_40[ 1], tabs_19_40[ 2], tabs_19_40[ 3],
     tabs_19_40[ 4], tabs_19_40[ 5], tabs_19_40[ 6], tabs_19_40[ 7],
     tabs_19_40[ 8], tabs_19_40[ 9], tabs_19_40[10], tabs_19_40[11],
@@ -7935,7 +7935,7 @@ const double* ff_on2avc_tabs_19_40_1[19] = {
     tabs_19_40[16], tabs_19_40[17], tabs_19_40[18],
 };
 
-const double* ff_on2avc_tabs_19_40_2[19] = {
+const double* const ff_on2avc_tabs_19_40_2[19] = {
     tabs_19_40[19], tabs_19_40[20], tabs_19_40[21], tabs_19_40[22],
     tabs_19_40[23], tabs_19_40[24], tabs_19_40[25], tabs_19_40[26],
     tabs_19_40[27], tabs_19_40[28], tabs_19_40[29], tabs_19_40[30],
@@ -8826,7 +8826,7 @@ static const double tabs_20_84[20 * 4][84] = {
       0.51434408, -0.41486443, 0.27672635, -0.10432054, },
 };
 
-const double* ff_on2avc_tabs_20_84_1[20] = {
+const double* const ff_on2avc_tabs_20_84_1[20] = {
     tabs_20_84[ 0], tabs_20_84[ 1], tabs_20_84[ 2], tabs_20_84[ 3],
     tabs_20_84[ 4], tabs_20_84[ 5], tabs_20_84[ 6], tabs_20_84[ 7],
     tabs_20_84[ 8], tabs_20_84[ 9], tabs_20_84[10], tabs_20_84[11],
@@ -8834,7 +8834,7 @@ const double* ff_on2avc_tabs_20_84_1[20] = {
     tabs_20_84[16], tabs_20_84[17], tabs_20_84[18], tabs_20_84[19]
 };
 
-const double* ff_on2avc_tabs_20_84_2[20] = {
+const double* const ff_on2avc_tabs_20_84_2[20] = {
     tabs_20_84[20], tabs_20_84[21], tabs_20_84[22], tabs_20_84[23],
     tabs_20_84[24], tabs_20_84[25], tabs_20_84[26], tabs_20_84[27],
     tabs_20_84[28], tabs_20_84[29], tabs_20_84[30], tabs_20_84[31],
@@ -8842,7 +8842,7 @@ const double* ff_on2avc_tabs_20_84_2[20] = {
     tabs_20_84[36], tabs_20_84[37], tabs_20_84[38], tabs_20_84[39]
 };
 
-const double* ff_on2avc_tabs_20_84_3[20] = {
+const double* const ff_on2avc_tabs_20_84_3[20] = {
     tabs_20_84[40], tabs_20_84[41], tabs_20_84[42], tabs_20_84[43],
     tabs_20_84[44], tabs_20_84[45], tabs_20_84[46], tabs_20_84[47],
     tabs_20_84[48], tabs_20_84[49], tabs_20_84[50], tabs_20_84[51],
@@ -8850,7 +8850,7 @@ const double* ff_on2avc_tabs_20_84_3[20] = {
     tabs_20_84[56], tabs_20_84[57], tabs_20_84[58], tabs_20_84[59]
 };
 
-const double* ff_on2avc_tabs_20_84_4[20] = {
+const double* const ff_on2avc_tabs_20_84_4[20] = {
     tabs_20_84[60], tabs_20_84[61], tabs_20_84[62], tabs_20_84[63],
     tabs_20_84[64], tabs_20_84[65], tabs_20_84[66], tabs_20_84[67],
     tabs_20_84[68], tabs_20_84[69], tabs_20_84[70], tabs_20_84[71],
diff --git a/libavcodec/on2avcdata.h b/libavcodec/on2avcdata.h
index 39d2911..7f498e5 100644
--- a/libavcodec/on2avcdata.h
+++ b/libavcodec/on2avcdata.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2013 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -64,16 +64,16 @@ extern const double ff_on2avc_tab_84_1[];
 extern const double ff_on2avc_tab_84_2[];
 extern const double ff_on2avc_tab_84_3[];
 extern const double ff_on2avc_tab_84_4[];
-extern const double* ff_on2avc_tabs_4_10_1[4];
-extern const double* ff_on2avc_tabs_4_10_2[4];
-extern const double* ff_on2avc_tabs_9_20_1[9];
-extern const double* ff_on2avc_tabs_9_20_2[9];
-extern const double* ff_on2avc_tabs_19_40_1[19];
-extern const double* ff_on2avc_tabs_19_40_2[19];
-extern const double* ff_on2avc_tabs_20_84_1[20];
-extern const double* ff_on2avc_tabs_20_84_2[20];
-extern const double* ff_on2avc_tabs_20_84_3[20];
-extern const double* ff_on2avc_tabs_20_84_4[20];
+extern const double* const ff_on2avc_tabs_4_10_1[4];
+extern const double* const ff_on2avc_tabs_4_10_2[4];
+extern const double* const ff_on2avc_tabs_9_20_1[9];
+extern const double* const ff_on2avc_tabs_9_20_2[9];
+extern const double* const ff_on2avc_tabs_19_40_1[19];
+extern const double* const ff_on2avc_tabs_19_40_2[19];
+extern const double* const ff_on2avc_tabs_20_84_1[20];
+extern const double* const ff_on2avc_tabs_20_84_2[20];
+extern const double* const ff_on2avc_tabs_20_84_3[20];
+extern const double* const ff_on2avc_tabs_20_84_4[20];
 extern const float ff_on2avc_ctab_1[2048];
 extern const float ff_on2avc_ctab_2[2048];
 extern const float ff_on2avc_ctab_3[2048];
diff --git a/libavcodec/options.c b/libavcodec/options.c
index e3ded73..64b27e5 100644
--- a/libavcodec/options.c
+++ b/libavcodec/options.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -67,6 +67,13 @@ static const AVClass *codec_child_class_next(const AVClass *prev)
     return NULL;
 }
 
+static AVClassCategory get_category(void *ptr)
+{
+    AVCodecContext* avctx = ptr;
+    if(avctx->codec && avctx->codec->decode) return AV_CLASS_CATEGORY_DECODER;
+    else                                     return AV_CLASS_CATEGORY_ENCODER;
+}
+
 static const AVClass av_codec_context_class = {
     .class_name              = "AVCodecContext",
     .item_name               = context_to_name,
@@ -75,17 +82,28 @@ static const AVClass av_codec_context_class = {
     .log_level_offset_offset = offsetof(AVCodecContext, log_level_offset),
     .child_next              = codec_child_next,
     .child_class_next        = codec_child_class_next,
+    .category                = AV_CLASS_CATEGORY_ENCODER,
+    .get_category            = get_category,
 };
 
 int avcodec_get_context_defaults3(AVCodecContext *s, const AVCodec *codec)
 {
+    int flags=0;
     memset(s, 0, sizeof(AVCodecContext));
 
     s->av_class = &av_codec_context_class;
 
     s->codec_type = codec ? codec->type : AVMEDIA_TYPE_UNKNOWN;
-    s->codec      = codec;
-    av_opt_set_defaults(s);
+    if (codec)
+        s->codec_id = codec->id;
+
+    if(s->codec_type == AVMEDIA_TYPE_AUDIO)
+        flags= AV_OPT_FLAG_AUDIO_PARAM;
+    else if(s->codec_type == AVMEDIA_TYPE_VIDEO)
+        flags= AV_OPT_FLAG_VIDEO_PARAM;
+    else if(s->codec_type == AVMEDIA_TYPE_SUBTITLE)
+        flags= AV_OPT_FLAG_SUBTITLE_PARAM;
+    av_opt_set_defaults2(s, flags, flags);
 
     s->time_base           = (AVRational){0,1};
     s->get_buffer2         = avcodec_default_get_buffer2;
@@ -95,6 +113,7 @@ int avcodec_get_context_defaults3(AVCodecContext *s, const AVCodec *codec)
     s->sample_aspect_ratio = (AVRational){0,1};
     s->pix_fmt             = AV_PIX_FMT_NONE;
     s->sample_fmt          = AV_SAMPLE_FMT_NONE;
+    s->timecode_frame_start = -1;
 
     s->reordered_opaque    = AV_NOPTS_VALUE;
     if(codec && codec->priv_data_size){
@@ -161,9 +180,16 @@ int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src)
                src, dest);
         return AVERROR(EINVAL);
     }
+
+    av_opt_free(dest);
+
     memcpy(dest, src, sizeof(*dest));
 
     dest->priv_data       = orig_priv_data;
+
+    if (orig_priv_data)
+        av_opt_copy(orig_priv_data, src->priv_data);
+
     dest->codec           = orig_codec;
 
     /* set values specific to opened codecs back to their default state */
@@ -198,8 +224,8 @@ int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src)
     alloc_and_copy_or_fail(intra_matrix, 64 * sizeof(int16_t), 0);
     alloc_and_copy_or_fail(inter_matrix, 64 * sizeof(int16_t), 0);
     alloc_and_copy_or_fail(rc_override,  src->rc_override_count * sizeof(*src->rc_override), 0);
-    alloc_and_copy_or_fail(subtitle_header, src->subtitle_header_size, 0);
-    dest->subtitle_header_size = src->subtitle_header_size;
+    alloc_and_copy_or_fail(subtitle_header, src->subtitle_header_size, 1);
+    av_assert0(dest->subtitle_header_size == src->subtitle_header_size);
 #undef alloc_and_copy_or_fail
 
     return 0;
@@ -217,3 +243,55 @@ const AVClass *avcodec_get_class(void)
 {
     return &av_codec_context_class;
 }
+
+#define FOFFSET(x) offsetof(AVFrame,x)
+
+static const AVOption frame_options[]={
+{"best_effort_timestamp", "", FOFFSET(best_effort_timestamp), AV_OPT_TYPE_INT64, {.i64 = AV_NOPTS_VALUE }, INT64_MIN, INT64_MAX, 0},
+{"pkt_pos", "", FOFFSET(pkt_pos), AV_OPT_TYPE_INT64, {.i64 = -1 }, INT64_MIN, INT64_MAX, 0},
+{"pkt_size", "", FOFFSET(pkt_size), AV_OPT_TYPE_INT64, {.i64 = -1 }, INT64_MIN, INT64_MAX, 0},
+{"sample_aspect_ratio", "", FOFFSET(sample_aspect_ratio), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
+{"width", "", FOFFSET(width), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"height", "", FOFFSET(height), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"format", "", FOFFSET(format), AV_OPT_TYPE_INT, {.i64 = -1 }, 0, INT_MAX, 0},
+{"channel_layout", "", FOFFSET(channel_layout), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, INT64_MAX, 0},
+{"sample_rate", "", FOFFSET(sample_rate), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{NULL},
+};
+
+static const AVClass av_frame_class = {
+    .class_name              = "AVFrame",
+    .item_name               = NULL,
+    .option                  = frame_options,
+    .version                 = LIBAVUTIL_VERSION_INT,
+};
+
+const AVClass *avcodec_get_frame_class(void)
+{
+    return &av_frame_class;
+}
+
+#define SROFFSET(x) offsetof(AVSubtitleRect,x)
+
+static const AVOption subtitle_rect_options[]={
+{"x", "", SROFFSET(x), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"y", "", SROFFSET(y), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"w", "", SROFFSET(w), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"h", "", SROFFSET(h), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"type", "", SROFFSET(type), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, 0},
+{"flags", "", SROFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, 1, 0, "flags"},
+{"forced", "", SROFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0}, 0, 1, 0},
+{NULL},
+};
+
+static const AVClass av_subtitle_rect_class = {
+    .class_name             = "AVSubtitleRect",
+    .item_name              = NULL,
+    .option                 = subtitle_rect_options,
+    .version                = LIBAVUTIL_VERSION_INT,
+};
+
+const AVClass *avcodec_get_subtitle_rect_class(void)
+{
+    return &av_subtitle_rect_class;
+}
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 61cde0f..37382a5 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -1,19 +1,21 @@
 /*
+ * Copyright (c) 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +29,6 @@
 #include "libavutil/opt.h"
 #include "avcodec.h"
 #include "version.h"
-#include "config.h"
 
 #define OFFSET(x) offsetof(AVCodecContext,x)
 #define DEFAULT 0 //should be NAN but it does not work as it is not a constant in glibc as required by ANSI/ISO C
@@ -41,12 +42,13 @@
 #define AV_CODEC_DEFAULT_BITRATE 200*1000
 
 static const AVOption avcodec_options[] = {
-{"b", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT, {.i64 = AV_CODEC_DEFAULT_BITRATE }, INT_MIN, INT_MAX, V|A|E},
+{"b", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT, {.i64 = AV_CODEC_DEFAULT_BITRATE }, 0, INT_MAX, A|V|E},
+{"ab", "set bitrate (in bits/s)", OFFSET(bit_rate), AV_OPT_TYPE_INT, {.i64 = 128*1000 }, 0, INT_MAX, A|E},
 {"bt", "Set video bitrate tolerance (in bits/s). In 1-pass mode, bitrate tolerance specifies how far "
        "ratecontrol is willing to deviate from the target average bitrate value. This is not related "
        "to minimum/maximum bitrate. Lowering tolerance too much has an adverse effect on quality.",
        OFFSET(bit_rate_tolerance), AV_OPT_TYPE_INT, {.i64 = AV_CODEC_DEFAULT_BITRATE*20 }, 1, INT_MAX, V|E},
-{"flags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, UINT_MAX, V|A|E|D, "flags"},
+{"flags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, UINT_MAX, V|A|S|E|D, "flags"},
 {"unaligned", "allow decoders to produce unaligned output", 0, AV_OPT_TYPE_CONST, { .i64 = CODEC_FLAG_UNALIGNED }, INT_MIN, INT_MAX, V | D, "flags" },
 {"mv4", "use four motion vectors per macroblock (MPEG-4)", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_4MV }, INT_MIN, INT_MAX, V|E, "flags"},
 {"qpel", "use 1/4-pel motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_QPEL }, INT_MIN, INT_MAX, V|E, "flags"},
@@ -82,8 +84,10 @@ static const AVOption avcodec_options[] = {
 {"output_corrupt", "Output even potentially corrupted frames", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG_OUTPUT_CORRUPT }, INT_MIN, INT_MAX, V|D, "flags"},
 {"fast", "allow non-spec-compliant speedup tricks", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_FAST }, INT_MIN, INT_MAX, V|E, "flags2"},
 {"noout", "skip bitstream encoding", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_NO_OUTPUT }, INT_MIN, INT_MAX, V|E, "flags2"},
-{"ignorecrop", "ignore cropping information from sps", 1, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_IGNORE_CROP }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"ignorecrop", "ignore cropping information from sps", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_IGNORE_CROP }, INT_MIN, INT_MAX, V|D, "flags2"},
 {"local_header", "place global headers at every keyframe instead of in extradata", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_LOCAL_HEADER }, INT_MIN, INT_MAX, V|E, "flags2"},
+{"chunks", "Frame data might be split into multiple chunks", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_CHUNKS }, INT_MIN, INT_MAX, V|D, "flags2"},
+{"showall", "Show all frames before the first keyframe", 0, AV_OPT_TYPE_CONST, {.i64 = CODEC_FLAG2_SHOW_ALL }, INT_MIN, INT_MAX, V|D, "flags2"},
 {"me_method", "set motion estimation method", OFFSET(me_method), AV_OPT_TYPE_INT, {.i64 = ME_EPZS }, INT_MIN, INT_MAX, V|E, "me_method"},
 {"zero", "zero motion estimation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ZERO }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"full", "full motion estimation (slowest)", 0, AV_OPT_TYPE_CONST, {.i64 = ME_FULL }, INT_MIN, INT_MAX, V|E, "me_method" },
@@ -96,6 +100,7 @@ static const AVOption avcodec_options[] = {
 {"x1", "X1 motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_X1 }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"hex", "hex motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_HEX }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"umh", "umh motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_UMH }, INT_MIN, INT_MAX, V|E, "me_method" },
+{"iter", "iter motion estimation", 0, AV_OPT_TYPE_CONST, {.i64 = ME_ITER }, INT_MIN, INT_MAX, V|E, "me_method" },
 {"extradata_size", NULL, OFFSET(extradata_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"time_base", NULL, OFFSET(time_base), AV_OPT_TYPE_RATIONAL, {.dbl = 0}, INT_MIN, INT_MAX},
 {"g", "set the group of picture (GOP) size", OFFSET(gop_size), AV_OPT_TYPE_INT, {.i64 = 12 }, INT_MIN, INT_MAX, V|E},
@@ -110,9 +115,9 @@ static const AVOption avcodec_options[] = {
           OFFSET(qcompress), AV_OPT_TYPE_FLOAT, {.dbl = 0.5 }, -FLT_MAX, FLT_MAX, V|E},
 {"qblur", "video quantizer scale blur (VBR)", OFFSET(qblur), AV_OPT_TYPE_FLOAT, {.dbl = 0.5 }, -1, FLT_MAX, V|E},
 {"qmin", "minimum video quantizer scale (VBR)", OFFSET(qmin), AV_OPT_TYPE_INT, {.i64 = 2 }, -1, 69, V|E},
-{"qmax", "maximum video quantizer scale (VBR)", OFFSET(qmax), AV_OPT_TYPE_INT, {.i64 = 31 }, -1, 69, V|E},
+{"qmax", "maximum video quantizer scale (VBR)", OFFSET(qmax), AV_OPT_TYPE_INT, {.i64 = 31 }, -1, 1024, V|E},
 {"qdiff", "maximum difference between the quantizer scales (VBR)", OFFSET(max_qdiff), AV_OPT_TYPE_INT, {.i64 = 3 }, INT_MIN, INT_MAX, V|E},
-{"bf", "use 'frames' B frames", OFFSET(max_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, -1, INT_MAX, V|E},
+{"bf", "set maximum number of B frames between non-B-frames", OFFSET(max_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, -1, INT_MAX, V|E},
 {"b_qfactor", "QP factor between P- and B-frames", OFFSET(b_quant_factor), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
 {"rc_strategy", "ratecontrol method", OFFSET(rc_strategy), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"b_strategy", "strategy to choose between I/P/B-frames", OFFSET(b_frame_strategy), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, V|E},
@@ -155,11 +160,15 @@ static const AVOption avcodec_options[] = {
 {"unofficial", "allow unofficial extensions", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_UNOFFICIAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
 {"experimental", "allow non-standardized experimental things", 0, AV_OPT_TYPE_CONST, {.i64 = FF_COMPLIANCE_EXPERIMENTAL }, INT_MIN, INT_MAX, V|D|E, "strict"},
 {"b_qoffset", "QP offset between P- and B-frames", OFFSET(b_quant_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.25 }, -FLT_MAX, FLT_MAX, V|E},
-{"err_detect", "set error detection flags", OFFSET(err_recognition), AV_OPT_TYPE_FLAGS, {.i64 = 0}, INT_MIN, INT_MAX, A|V|D, "err_detect"},
-{"crccheck", "verify embedded CRCs", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, V|D, "err_detect"},
-{"bitstream", "detect bitstream specification deviations", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BITSTREAM }, INT_MIN, INT_MAX, V|D, "err_detect"},
-{"buffer", "detect improper bitstream length", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BUFFER }, INT_MIN, INT_MAX, V|D, "err_detect"},
-{"explode", "abort decoding on minor error detection", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_EXPLODE }, INT_MIN, INT_MAX, V|D, "err_detect"},
+{"err_detect", "set error detection flags", OFFSET(err_recognition), AV_OPT_TYPE_FLAGS, {.i64 = 0 }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
+{"crccheck", "verify embedded CRCs", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CRCCHECK }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
+{"bitstream", "detect bitstream specification deviations", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BITSTREAM }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
+{"buffer", "detect improper bitstream length", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_BUFFER }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
+{"explode", "abort decoding on minor error detection", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_EXPLODE }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
+{"ignore_err", "ignore errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_IGNORE_ERR }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
+{"careful",    "consider things that violate the spec, are fast to check and have not been seen in the wild as errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_CAREFUL }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
+{"compliant",  "consider all spec non compliancies as errors", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_COMPLIANT }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
+{"aggressive", "consider things that a sane encoder should not do as an error", 0, AV_OPT_TYPE_CONST, {.i64 = AV_EF_AGGRESSIVE }, INT_MIN, INT_MAX, A|V|D, "err_detect"},
 {"has_b_frames", NULL, OFFSET(has_b_frames), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"block_align", NULL, OFFSET(block_align), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"mpeg_quant", "use MPEG quantizers instead of H.263", OFFSET(mpeg_quant), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
@@ -172,8 +181,8 @@ static const AVOption avcodec_options[] = {
           "bits2qp(bits), qp2bits(qp). Also the following constants are available: iTex pTex tex mv "
           "fCode iCount mcVar var isI isP isB avgQP qComp avgIITex avgPITex avgPPTex avgBPTex avgTex.",
           OFFSET(rc_eq), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, V|E},
-{"maxrate", "Set maximum bitrate tolerance (in bits/s). Requires bufsize to be set.", OFFSET(rc_max_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
-{"minrate", "Set minimum bitrate tolerance (in bits/s). Most useful in setting up a CBR encode. It is of little use otherwise.",
+{"maxrate", "maximum bitrate (in bits/s). Used for VBV together with bufsize.", OFFSET(rc_max_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|A|E},
+{"minrate", "minimum bitrate (in bits/s). Most useful in setting up a CBR encode. It is of little use otherwise.",
             OFFSET(rc_min_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
 {"bufsize", "set ratecontrol buffer size (in bits)", OFFSET(rc_buffer_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, A|V|E},
 {"rc_buf_aggressivity", "currently useless", OFFSET(rc_buffer_aggressivity), AV_OPT_TYPE_FLOAT, {.dbl = 1.0 }, -FLT_MAX, FLT_MAX, V|E},
@@ -216,10 +225,12 @@ static const AVOption avcodec_options[] = {
 #endif /* FF_API_UNUSED_MEMBERS */
 {"xvidmmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_XVIDMMX }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"faani", "floating point AAN IDCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_FAAN }, INT_MIN, INT_MAX, V|D|E, "idct"},
+{"simpleauto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_IDCT_SIMPLEAUTO }, INT_MIN, INT_MAX, V|E|D, "idct"},
 {"slice_count", NULL, OFFSET(slice_count), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"ec", "set error concealment strategy", OFFSET(error_concealment), AV_OPT_TYPE_FLAGS, {.i64 = 3 }, INT_MIN, INT_MAX, V|D, "ec"},
 {"guess_mvs", "iterative motion vector (MV) search (slow)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_GUESS_MVS }, INT_MIN, INT_MAX, V|D, "ec"},
 {"deblock", "use strong deblock filter for damaged MBs", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_DEBLOCK }, INT_MIN, INT_MAX, V|D, "ec"},
+{"favor_inter", "favor predicting from the previous frame", 0, AV_OPT_TYPE_CONST, {.i64 = FF_EC_FAVOR_INTER }, INT_MIN, INT_MAX, V|D, "ec"},
 {"bits_per_coded_sample", NULL, OFFSET(bits_per_coded_sample), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"pred", "prediction method", OFFSET(prediction_method), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "pred"},
 {"left", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PRED_LEFT }, INT_MIN, INT_MAX, V|E, "pred"},
@@ -244,18 +255,15 @@ static const AVOption avcodec_options[] = {
 {"er", "error recognition", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_ER }, INT_MIN, INT_MAX, V|D, "debug"},
 {"mmco", "memory management control operations (H.264)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_MMCO }, INT_MIN, INT_MAX, V|D, "debug"},
 {"bugs", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUGS }, INT_MIN, INT_MAX, V|D, "debug"},
-#if FF_API_DEBUG_MV
 {"vis_qp", "visualize quantization parameter (QP), lower QP are tinted greener", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_QP }, INT_MIN, INT_MAX, V|D, "debug"},
 {"vis_mb_type", "visualize block types", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_MB_TYPE }, INT_MIN, INT_MAX, V|D, "debug"},
-#endif
 {"buffers", "picture buffer allocations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_BUFFERS }, INT_MIN, INT_MAX, V|D, "debug"},
-{"thread_ops", "threading operations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_THREADS }, INT_MIN, INT_MAX, V|D, "debug"},
-#if FF_API_DEBUG_MV
-{"vismv", "visualize motion vectors (MVs)", OFFSET(debug_mv), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, INT_MAX, V|D, "debug_mv"},
+{"thread_ops", "threading operations", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_THREADS }, INT_MIN, INT_MAX, V|A|D, "debug"},
+{"nomc", "skip motion compensation", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_NOMC }, INT_MIN, INT_MAX, V|A|D, "debug"},
+{"vismv", "visualize motion vectors (MVs)", OFFSET(debug_mv), AV_OPT_TYPE_FLAGS, {.i64 = DEFAULT }, 0, INT_MAX, V|D, "debug_mv"},
 {"pf", "forward predicted MVs of P-frames", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_MV_P_FOR }, INT_MIN, INT_MAX, V|D, "debug_mv"},
 {"bf", "forward predicted MVs of B-frames", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_MV_B_FOR }, INT_MIN, INT_MAX, V|D, "debug_mv"},
 {"bb", "backward predicted MVs of B-frames", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DEBUG_VIS_MV_B_BACK }, INT_MIN, INT_MAX, V|D, "debug_mv"},
-#endif
 {"cmp", "full-pel ME compare function", OFFSET(me_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"subcmp", "sub-pel ME compare function", OFFSET(me_sub_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"mbcmp", "macroblock compare function", OFFSET(mb_cmp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "cmp_func"},
@@ -275,6 +283,10 @@ static const AVOption avcodec_options[] = {
 {"vsad", "sum of absolute vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSAD }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"vsse", "sum of squared vertical differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_VSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"nsse", "noise preserving sum of squared differences", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_NSSE }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#if CONFIG_SNOW_ENCODER
+{"w53", "5/3 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W53 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"w97", "9/7 wavelet, only used in snow", 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_W97 }, INT_MIN, INT_MAX, V|E, "cmp_func"},
+#endif
 {"dctmax", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_DCTMAX }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"chroma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_CMP_CHROMA }, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"pre_dia_size", "diamond type & size for motion estimation pre-pass", OFFSET(pre_dia_size), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
@@ -297,7 +309,7 @@ static const AVOption avcodec_options[] = {
 #if FF_API_XVMC
 {"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 #endif /* FF_API_XVMC */
-{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E, "mbd"},
+{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, 2, V|E, "mbd"},
 {"simple", "use mbcmp (default)", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_SIMPLE }, INT_MIN, INT_MAX, V|E, "mbd"},
 {"bits", "use fewest bits", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_BITS }, INT_MIN, INT_MAX, V|E, "mbd"},
 {"rd", "use best rate distortion", 0, AV_OPT_TYPE_CONST, {.i64 = FF_MB_DECISION_RD }, INT_MIN, INT_MAX, V|E, "mbd"},
@@ -311,11 +323,11 @@ static const AVOption avcodec_options[] = {
 #if FF_API_ERROR_RATE
 {"error", NULL, OFFSET(error_rate), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 #endif
-{"threads", NULL, OFFSET(thread_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, V|E|D, "threads"},
+{"threads", NULL, OFFSET(thread_count), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, V|A|E|D, "threads"},
 {"auto", "autodetect a suitable number of threads to use", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, V|E|D, "threads"},
 {"me_threshold", "motion estimation threshold", OFFSET(me_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"mb_threshold", "macroblock threshold", OFFSET(mb_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
-{"dc", "intra_dc_precision", OFFSET(intra_dc_precision), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, V|E},
+{"dc", "intra_dc_precision", OFFSET(intra_dc_precision), AV_OPT_TYPE_INT, {.i64 = 0 }, -8, 16, V|E},
 {"nssew", "nsse weight", OFFSET(nsse_weight), AV_OPT_TYPE_INT, {.i64 = 8 }, INT_MIN, INT_MAX, V|E},
 {"skip_top", "number of macroblock rows at the top which are skipped", OFFSET(skip_top), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|D},
 {"skip_bottom", "number of macroblock rows at the bottom which are skipped", OFFSET(skip_bottom), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|D},
@@ -338,6 +350,7 @@ static const AVOption avcodec_options[] = {
 {"dts_hd_ma", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_PROFILE_DTS_HD_MA }, INT_MIN, INT_MAX, A|E, "profile"},
 {"level", NULL, OFFSET(level), AV_OPT_TYPE_INT, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
 {"unknown", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LEVEL_UNKNOWN }, INT_MIN, INT_MAX, V|A|E, "level"},
+{"lowres", "decode at 1= 1/2, 2=1/4, 3=1/8 resolutions", OFFSET(lowres), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|A|D},
 {"skip_threshold", "frame skip threshold", OFFSET(frame_skip_threshold), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skip_factor", "frame skip factor", OFFSET(frame_skip_factor), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"skip_exp", "frame skip exponent", OFFSET(frame_skip_exp), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
@@ -346,18 +359,19 @@ static const AVOption avcodec_options[] = {
 {"mblmin", "minimum macroblock Lagrange factor (VBR)", OFFSET(mb_lmin), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 2 }, 1, FF_LAMBDA_MAX, V|E},
 {"mblmax", "maximum macroblock Lagrange factor (VBR)", OFFSET(mb_lmax), AV_OPT_TYPE_INT, {.i64 = FF_QP2LAMBDA * 31 }, 1, FF_LAMBDA_MAX, V|E},
 {"mepc", "motion estimation bitrate penalty compensation (1.0 = 256)", OFFSET(me_penalty_compensation), AV_OPT_TYPE_INT, {.i64 = 256 }, INT_MIN, INT_MAX, V|E},
-{"skip_loop_filter", NULL, OFFSET(skip_loop_filter), AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"skip_idct"       , NULL, OFFSET(skip_idct)       , AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"skip_frame"      , NULL, OFFSET(skip_frame)      , AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"none"            , NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONE    }, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"default"         , NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"noref"           , NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONREF  }, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"bidir"           , NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_BIDIR   }, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"nokey"           , NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONKEY  }, INT_MIN, INT_MAX, V|D, "avdiscard"},
-{"all"             , NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_ALL     }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"skip_loop_filter", "skip loop filtering process for the selected frames", OFFSET(skip_loop_filter), AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"skip_idct"       , "skip IDCT/dequantization for the selected frames",    OFFSET(skip_idct),        AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"skip_frame"      , "skip decoding for the selected frames",               OFFSET(skip_frame),       AV_OPT_TYPE_INT, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"none"            , "discard no frame",                    0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONE    }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"default"         , "discard useless frames",              0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_DEFAULT }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"noref"           , "discard all non-reference frames",    0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONREF  }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"bidir"           , "discard all bidirectional frames",    0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_BIDIR   }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"nokey"           , "discard all frames except keyframes", 0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONKEY  }, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"nointra"         , "discard all frames except I frames",  0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_NONINTRA}, INT_MIN, INT_MAX, V|D, "avdiscard"},
+{"all"             , "discard all frames",                  0, AV_OPT_TYPE_CONST, {.i64 = AVDISCARD_ALL     }, INT_MIN, INT_MAX, V|D, "avdiscard"},
 {"bidir_refine", "refine the two motion vectors used in bidirectional macroblocks", OFFSET(bidir_refine), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, 4, V|E},
 {"brd_scale", "downscale frames for dynamic B-frame decision", OFFSET(brd_scale), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, 0, 10, V|E},
-{"keyint_min", "minimum interval between IDR-frames (x264)", OFFSET(keyint_min), AV_OPT_TYPE_INT, {.i64 = 25 }, INT_MIN, INT_MAX, V|E},
+{"keyint_min", "minimum interval between IDR-frames", OFFSET(keyint_min), AV_OPT_TYPE_INT, {.i64 = 25 }, INT_MIN, INT_MAX, V|E},
 {"refs", "reference frames to consider for motion compensation", OFFSET(refs), AV_OPT_TYPE_INT, {.i64 = 1 }, INT_MIN, INT_MAX, V|E},
 {"chromaoffset", "chroma QP offset from luma", OFFSET(chromaoffset), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|E},
 {"trellis", "rate-distortion optimal quantization", OFFSET(trellis), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX, V|A|E},
@@ -376,7 +390,7 @@ static const AVOption avcodec_options[] = {
 {"bits_per_raw_sample", NULL, OFFSET(bits_per_raw_sample), AV_OPT_TYPE_INT, {.i64 = DEFAULT }, INT_MIN, INT_MAX},
 {"channel_layout", NULL, OFFSET(channel_layout), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, 0, INT64_MAX, A|E|D, "channel_layout"},
 {"request_channel_layout", NULL, OFFSET(request_channel_layout), AV_OPT_TYPE_INT64, {.i64 = DEFAULT }, 0, INT64_MAX, A|D, "request_channel_layout"},
-{"rc_max_vbv_use", NULL, OFFSET(rc_max_available_vbv_use), AV_OPT_TYPE_FLOAT, {.dbl = 1.0/3 }, 0.0, FLT_MAX, V|E},
+{"rc_max_vbv_use", NULL, OFFSET(rc_max_available_vbv_use), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, 0.0, FLT_MAX, V|E},
 {"rc_min_vbv_use", NULL, OFFSET(rc_min_vbv_overflow_use),  AV_OPT_TYPE_FLOAT, {.dbl = 3 },     0.0, FLT_MAX, V|E},
 {"ticks_per_frame", NULL, OFFSET(ticks_per_frame), AV_OPT_TYPE_INT, {.i64 = 1 }, 1, INT_MAX, A|V|E|D},
 {"color_primaries", "color primaries", OFFSET(color_primaries), AV_OPT_TYPE_INT, {.i64 = AVCOL_PRI_UNSPECIFIED }, 1, AVCOL_PRI_NB-1, V|E|D, "color_primaries_type"},
@@ -421,7 +435,7 @@ static const AVOption avcodec_options[] = {
 {"chroma_sample_location", NULL, OFFSET(chroma_sample_location), AV_OPT_TYPE_INT, {.i64 = AVCHROMA_LOC_UNSPECIFIED }, 0, AVCHROMA_LOC_NB-1, V|E|D},
 {"log_level_offset", "set the log level offset", OFFSET(log_level_offset), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX },
 {"slices", "number of slices, used in parallelized encoding", OFFSET(slices), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, V|E},
-{"thread_type", "select multithreading type", OFFSET(thread_type), AV_OPT_TYPE_FLAGS, {.i64 = FF_THREAD_SLICE|FF_THREAD_FRAME }, 0, INT_MAX, V|E|D, "thread_type"},
+{"thread_type", "select multithreading type", OFFSET(thread_type), AV_OPT_TYPE_FLAGS, {.i64 = FF_THREAD_SLICE|FF_THREAD_FRAME }, 0, INT_MAX, V|A|E|D, "thread_type"},
 {"slice", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_THREAD_SLICE }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
 {"frame", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_THREAD_FRAME }, INT_MIN, INT_MAX, V|E|D, "thread_type"},
 {"audio_service_type", "audio service type", OFFSET(audio_service_type), AV_OPT_TYPE_INT, {.i64 = AV_AUDIO_SERVICE_TYPE_MAIN }, 0, AV_AUDIO_SERVICE_TYPE_NB-1, A|E, "audio_service_type"},
@@ -434,19 +448,22 @@ static const AVOption avcodec_options[] = {
 {"em", "Emergency",          0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_EMERGENCY },         INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {"vo", "Voice Over",         0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_VOICE_OVER },        INT_MIN, INT_MAX, A|E, "audio_service_type"},
 {"ka", "Karaoke",            0, AV_OPT_TYPE_CONST, {.i64 = AV_AUDIO_SERVICE_TYPE_KARAOKE },           INT_MIN, INT_MAX, A|E, "audio_service_type"},
-{"request_sample_fmt", NULL, OFFSET(request_sample_fmt), AV_OPT_TYPE_INT, {.i64 = AV_SAMPLE_FMT_NONE }, AV_SAMPLE_FMT_NONE, AV_SAMPLE_FMT_NB-1, A|D, "request_sample_fmt"},
-{"u8" , "8-bit unsigned integer", 0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_U8  }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"s16", "16-bit signed integer",  0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_S16 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"s32", "32-bit signed integer",  0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_S32 }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"flt", "32-bit float",           0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_FLT }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"dbl", "64-bit double",          0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_DBL }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"u8p" , "8-bit unsigned integer planar", 0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_U8P  }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"s16p", "16-bit signed integer planar",  0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_S16P }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"s32p", "32-bit signed integer planar",  0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_S32P }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"fltp", "32-bit float planar",           0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_FLTP }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
-{"dblp", "64-bit double planar",          0, AV_OPT_TYPE_CONST, {.i64 = AV_SAMPLE_FMT_DBLP }, INT_MIN, INT_MAX, A|D, "request_sample_fmt"},
+{"request_sample_fmt", "sample format audio decoders should prefer", OFFSET(request_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT, {.i64=AV_SAMPLE_FMT_NONE}, -1, INT_MAX, A|D, "request_sample_fmt"},
+{"pkt_timebase", NULL, OFFSET(pkt_timebase), AV_OPT_TYPE_RATIONAL, {.dbl = 0 }, 0, INT_MAX, 0},
+{"sub_charenc", "set input text subtitles character encoding", OFFSET(sub_charenc), AV_OPT_TYPE_STRING, {.str = NULL}, CHAR_MIN, CHAR_MAX, S|D},
+{"sub_charenc_mode", "set input text subtitles character encoding mode", OFFSET(sub_charenc_mode), AV_OPT_TYPE_FLAGS, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC}, -1, INT_MAX, S|D, "sub_charenc_mode"},
+{"do_nothing",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING},  INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
+{"auto",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC},   INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
+{"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"refcounted_frames", NULL, OFFSET(refcounted_frames), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, A|V|D },
 {"side_data_only_packets", NULL, OFFSET(side_data_only_packets), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, A|V|E },
+{"skip_alpha", "Skip processing alpha", OFFSET(skip_alpha), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 1, V|D },
+{"field_order", "Field order", OFFSET(field_order), AV_OPT_TYPE_INT, {.i64 = AV_FIELD_UNKNOWN }, 0, 5, V|D|E, "field_order" },
+{"progressive", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_PROGRESSIVE }, 0, 0, V|D|E, "field_order" },
+{"tt", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_TT }, 0, 0, V|D|E, "field_order" },
+{"bb", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_BB }, 0, 0, V|D|E, "field_order" },
+{"tb", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_TB }, 0, 0, V|D|E, "field_order" },
+{"bt", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = AV_FIELD_BT }, 0, 0, V|D|E, "field_order" },
 {NULL},
 };
 
diff --git a/libavcodec/opus.c b/libavcodec/opus.c
index 91021ce..e76c510 100644
--- a/libavcodec/opus.c
+++ b/libavcodec/opus.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2012 Andrew D'Addesio
  * Copyright (c) 2013-2014 Mozilla Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/opus.h b/libavcodec/opus.h
index c2fac06..543d90c 100644
--- a/libavcodec/opus.h
+++ b/libavcodec/opus.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2012 Andrew D'Addesio
  * Copyright (c) 2013-2014 Mozilla Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,7 +29,7 @@
 #include "libavutil/float_dsp.h"
 #include "libavutil/frame.h"
 
-#include "libavresample/avresample.h"
+#include "libswresample/swresample.h"
 
 #include "avcodec.h"
 #include "get_bits.h"
@@ -57,7 +57,7 @@
 #define SILK_HISTORY                 322
 #define SILK_MAX_LPC                 16
 
-#define ROUND_MULL(a,b,s) (((MUL64(a, b) >> (s - 1)) + 1) >> 1)
+#define ROUND_MULL(a,b,s) (((MUL64(a, b) >> ((s) - 1)) + 1) >> 1)
 #define ROUND_MUL16(a,b)  ((MUL16(a, b) + 16384) >> 15)
 #define opus_ilog(i) (av_log2(i) + !!(i))
 
@@ -95,19 +95,19 @@ typedef struct SilkContext SilkContext;
 typedef struct CeltContext CeltContext;
 
 typedef struct OpusPacket {
-    int packet_size;                /** packet size */
-    int data_size;                  /** size of the useful data -- packet size - padding */
-    int code;                       /** packet code: specifies the frame layout */
-    int stereo;                     /** whether this packet is mono or stereo */
-    int vbr;                        /** vbr flag */
-    int config;                     /** configuration: tells the audio mode,
+    int packet_size;                /**< packet size */
+    int data_size;                  /**< size of the useful data -- packet size - padding */
+    int code;                       /**< packet code: specifies the frame layout */
+    int stereo;                     /**< whether this packet is mono or stereo */
+    int vbr;                        /**< vbr flag */
+    int config;                     /**< configuration: tells the audio mode,
                                      **                bandwidth, and frame duration */
-    int frame_count;                /** frame count */
-    int frame_offset[MAX_FRAMES];   /** frame offsets */
-    int frame_size[MAX_FRAMES];     /** frame sizes */
-    int frame_duration;             /** frame duration, in samples @ 48kHz */
-    enum OpusMode mode;             /** mode */
-    enum OpusBandwidth bandwidth;   /** bandwidth */
+    int frame_count;                /**< frame count */
+    int frame_offset[MAX_FRAMES];   /**< frame offsets */
+    int frame_size[MAX_FRAMES];     /**< frame sizes */
+    int frame_duration;             /**< frame duration, in samples @ 48kHz */
+    enum OpusMode mode;             /**< mode */
+    enum OpusBandwidth bandwidth;   /**< bandwidth */
 } OpusPacket;
 
 typedef struct OpusStreamContext {
@@ -135,7 +135,7 @@ typedef struct OpusStreamContext {
     float *out_dummy;
     int    out_dummy_allocated_size;
 
-    AVAudioResampleContext *avr;
+    SwrContext *swr;
     AVAudioFifo *celt_delay;
     int silk_samplerate;
     /* number of samples we still want to get from the resampler */
diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c
index e77ca6f..fad471b 100644
--- a/libavcodec/opus_celt.c
+++ b/libavcodec/opus_celt.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2012 Andrew D'Addesio
  * Copyright (c) 2013-2014 Mozilla Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -991,7 +991,7 @@ static inline int celt_pulses2bits(const uint8_t *cache, int pulses)
    return (pulses == 0) ? 0 : cache[pulses] + 1;
 }
 
-static inline void celt_normalize_residual(const int * restrict iy, float * restrict X,
+static inline void celt_normalize_residual(const int * av_restrict iy, float * av_restrict X,
                                            int N, float g)
 {
     int i;
@@ -1295,7 +1295,7 @@ static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, unsigned int
 {
     unsigned int idx;
 #define CELT_PVQ_U(n, k) (celt_pvq_u_row[FFMIN(n, k)][FFMAX(n, k)])
-#define CELT_PVQ_V(n, k) (CELT_PVQ_U(n, k) + CELT_PVQ_U(n, k + 1))
+#define CELT_PVQ_V(n, k) (CELT_PVQ_U(n, k) + CELT_PVQ_U(n, (k) + 1))
     idx = opus_rc_unimodel(rc, CELT_PVQ_V(N, K));
     return celt_cwrsi(N, K, idx, y);
 }
diff --git a/libavcodec/opus_imdct.c b/libavcodec/opus_imdct.c
index 38674ed..0a6fca9 100644
--- a/libavcodec/opus_imdct.c
+++ b/libavcodec/opus_imdct.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2013-2014 Mozilla Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/opus_imdct.h b/libavcodec/opus_imdct.h
index d4bff9a..0ca4d97 100644
--- a/libavcodec/opus_imdct.h
+++ b/libavcodec/opus_imdct.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/opus_parser.c b/libavcodec/opus_parser.c
index 8a2bc22..7eb72f9 100644
--- a/libavcodec/opus_parser.c
+++ b/libavcodec/opus_parser.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2013-2014 Mozilla Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/opus_silk.c b/libavcodec/opus_silk.c
index 3552484..7a89479 100644
--- a/libavcodec/opus_silk.c
+++ b/libavcodec/opus_silk.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2012 Andrew D'Addesio
  * Copyright (c) 2013-2014 Mozilla Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/opusdec.c b/libavcodec/opusdec.c
index bf3a54b..b28edfb 100644
--- a/libavcodec/opusdec.c
+++ b/libavcodec/opusdec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2012 Andrew D'Addesio
  * Copyright (c) 2013-2014 Mozilla Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -40,7 +40,7 @@
 #include "libavutil/channel_layout.h"
 #include "libavutil/opt.h"
 
-#include "libavresample/avresample.h"
+#include "libswresample/swresample.h"
 
 #include "avcodec.h"
 #include "celp_filters.h"
@@ -114,9 +114,9 @@ static int opus_flush_resample(OpusStreamContext *s, int nb_samples)
 {
     int celt_size = av_audio_fifo_size(s->celt_delay);
     int ret, i;
-
-    ret = avresample_convert(s->avr, (uint8_t**)s->out, s->out_size, nb_samples,
-                             NULL, 0, 0);
+    ret = swr_convert(s->swr,
+                      (uint8_t**)s->out, nb_samples,
+                      NULL, 0);
     if (ret < 0)
         return ret;
     else if (ret != nb_samples) {
@@ -155,19 +155,20 @@ static int opus_flush_resample(OpusStreamContext *s, int nb_samples)
 
 static int opus_init_resample(OpusStreamContext *s)
 {
-    float delay[16] = { 0.0 };
-    uint8_t *delayptr[2] = { (uint8_t*)delay, (uint8_t*)delay };
+    static const float delay[16] = { 0.0 };
+    const uint8_t *delayptr[2] = { (uint8_t*)delay, (uint8_t*)delay };
     int ret;
 
-    av_opt_set_int(s->avr, "in_sample_rate", s->silk_samplerate, 0);
-    ret = avresample_open(s->avr);
+    av_opt_set_int(s->swr, "in_sample_rate", s->silk_samplerate, 0);
+    ret = swr_init(s->swr);
     if (ret < 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Error opening the resampler.\n");
         return ret;
     }
 
-    ret = avresample_convert(s->avr, NULL, 0, 0, delayptr, sizeof(delay),
-                             silk_resample_delay[s->packet.bandwidth]);
+    ret = swr_convert(s->swr,
+                      NULL, 0,
+                      delayptr, silk_resample_delay[s->packet.bandwidth]);
     if (ret < 0) {
         av_log(s->avctx, AV_LOG_ERROR,
                "Error feeding initial silence to the resampler.\n");
@@ -218,7 +219,7 @@ static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size
 
     /* decode the silk frame */
     if (s->packet.mode == OPUS_MODE_SILK || s->packet.mode == OPUS_MODE_HYBRID) {
-        if (!avresample_is_open(s->avr)) {
+        if (!swr_is_initialized(s->swr)) {
             ret = opus_init_resample(s);
             if (ret < 0)
                 return ret;
@@ -232,16 +233,14 @@ static int opus_decode_frame(OpusStreamContext *s, const uint8_t *data, int size
             av_log(s->avctx, AV_LOG_ERROR, "Error decoding a SILK frame.\n");
             return samples;
         }
-
-        samples = avresample_convert(s->avr, (uint8_t**)s->out, s->out_size,
-                                     s->packet.frame_duration,
-                                     (uint8_t**)s->silk_output,
-                                     sizeof(s->silk_buf[0]),
-                                     samples);
+        samples = swr_convert(s->swr,
+                              (uint8_t**)s->out, s->packet.frame_duration,
+                              (const uint8_t**)s->silk_output, samples);
         if (samples < 0) {
             av_log(s->avctx, AV_LOG_ERROR, "Error resampling SILK data.\n");
             return samples;
         }
+        av_assert2((samples & 7) == 0);
         s->delayed_samples += s->packet.frame_duration - samples;
     } else
         ff_silk_flush(s->silk);
@@ -374,10 +373,10 @@ static int opus_decode_subpacket(OpusStreamContext *s,
     int i, j, ret;
 
     /* check if we need to flush the resampler */
-    if (avresample_is_open(s->avr)) {
+    if (swr_is_initialized(s->swr)) {
         if (buf) {
             int64_t cur_samplerate;
-            av_opt_get_int(s->avr, "in_sample_rate", 0, &cur_samplerate);
+            av_opt_get_int(s->swr, "in_sample_rate", 0, &cur_samplerate);
             flush_needed = (s->packet.mode == OPUS_MODE_CELT) || (cur_samplerate != s->silk_samplerate);
         } else {
             flush_needed = !!s->delayed_samples;
@@ -406,7 +405,7 @@ static int opus_decode_subpacket(OpusStreamContext *s,
             av_log(s->avctx, AV_LOG_ERROR, "Error flushing the resampler.\n");
             return ret;
         }
-        avresample_close(s->avr);
+        swr_close(s->swr);
         output_samples += s->delayed_samples;
         s->delayed_samples = 0;
 
@@ -555,7 +554,7 @@ static av_cold void opus_decode_flush(AVCodecContext *ctx)
 
         if (s->celt_delay)
             av_audio_fifo_drain(s->celt_delay, av_audio_fifo_size(s->celt_delay));
-        avresample_close(s->avr);
+        swr_close(s->swr);
 
         ff_silk_flush(s->silk);
         ff_celt_flush(s->celt);
@@ -577,7 +576,7 @@ static av_cold int opus_decode_close(AVCodecContext *avctx)
         s->out_dummy_allocated_size = 0;
 
         av_audio_fifo_free(s->celt_delay);
-        avresample_free(&s->avr);
+        swr_free(&s->swr);
     }
 
     av_freep(&c->streams);
@@ -627,16 +626,17 @@ static av_cold int opus_decode_init(AVCodecContext *avctx)
 
         s->fdsp = &c->fdsp;
 
-        s->avr = avresample_alloc_context();
-        if (!s->avr)
+        s->swr =swr_alloc();
+        if (!s->swr)
             goto fail;
 
         layout = (s->output_channels == 1) ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
-        av_opt_set_int(s->avr, "in_sample_fmt",      avctx->sample_fmt,  0);
-        av_opt_set_int(s->avr, "out_sample_fmt",     avctx->sample_fmt,  0);
-        av_opt_set_int(s->avr, "in_channel_layout",  layout,             0);
-        av_opt_set_int(s->avr, "out_channel_layout", layout,             0);
-        av_opt_set_int(s->avr, "out_sample_rate",    avctx->sample_rate, 0);
+        av_opt_set_int(s->swr, "in_sample_fmt",      avctx->sample_fmt,  0);
+        av_opt_set_int(s->swr, "out_sample_fmt",     avctx->sample_fmt,  0);
+        av_opt_set_int(s->swr, "in_channel_layout",  layout,             0);
+        av_opt_set_int(s->swr, "out_channel_layout", layout,             0);
+        av_opt_set_int(s->swr, "out_sample_rate",    avctx->sample_rate, 0);
+        av_opt_set_int(s->swr, "filter_size",        16,                 0);
 
         ret = ff_silk_init(avctx, &s->silk, s->output_channels);
         if (ret < 0)
diff --git a/libavcodec/paf.h b/libavcodec/paf.h
new file mode 100644
index 0000000..ce8245f
--- /dev/null
+++ b/libavcodec/paf.h
@@ -0,0 +1,28 @@
+/*
+ * Packed Animation File decoder/demuxer common code
+ * Copyright (c) 2012 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PAF_H
+#define AVCODEC_PAF_H
+
+#define PAF_SOUND_SAMPLES     2205
+#define PAF_SOUND_FRAME_SIZE  ((256 + PAF_SOUND_SAMPLES) * 2)
+
+#endif /* AVCODEC_PAF_H */
diff --git a/libavcodec/pafaudio.c b/libavcodec/pafaudio.c
index 52aaabf..9b48533 100644
--- a/libavcodec/pafaudio.c
+++ b/libavcodec/pafaudio.c
@@ -2,20 +2,20 @@
  * Packed Animation File audio decoder
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pafvideo.c b/libavcodec/pafvideo.c
index 650d036..0e92989 100644
--- a/libavcodec/pafvideo.c
+++ b/libavcodec/pafvideo.c
@@ -2,39 +2,49 @@
  * Packed Animation File video decoder
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/imgutils.h"
 
+#include "libavcodec/paf.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "copy_block.h"
 #include "internal.h"
 
+
 static const uint8_t block_sequences[16][8] = {
-    { 0, 0, 0, 0, 0, 0, 0, 0 }, { 2, 0, 0, 0, 0, 0, 0, 0 },
-    { 5, 7, 0, 0, 0, 0, 0, 0 }, { 5, 0, 0, 0, 0, 0, 0, 0 },
-    { 6, 0, 0, 0, 0, 0, 0, 0 }, { 5, 7, 5, 7, 0, 0, 0, 0 },
-    { 5, 7, 5, 0, 0, 0, 0, 0 }, { 5, 7, 6, 0, 0, 0, 0, 0 },
-    { 5, 5, 0, 0, 0, 0, 0, 0 }, { 3, 0, 0, 0, 0, 0, 0, 0 },
-    { 6, 6, 0, 0, 0, 0, 0, 0 }, { 2, 4, 0, 0, 0, 0, 0, 0 },
-    { 2, 4, 5, 7, 0, 0, 0, 0 }, { 2, 4, 5, 0, 0, 0, 0, 0 },
-    { 2, 4, 6, 0, 0, 0, 0, 0 }, { 2, 4, 5, 7, 5, 7, 0, 0 },
+    { 0, 0, 0, 0, 0, 0, 0, 0 },
+    { 2, 0, 0, 0, 0, 0, 0, 0 },
+    { 5, 7, 0, 0, 0, 0, 0, 0 },
+    { 5, 0, 0, 0, 0, 0, 0, 0 },
+    { 6, 0, 0, 0, 0, 0, 0, 0 },
+    { 5, 7, 5, 7, 0, 0, 0, 0 },
+    { 5, 7, 5, 0, 0, 0, 0, 0 },
+    { 5, 7, 6, 0, 0, 0, 0, 0 },
+    { 5, 5, 0, 0, 0, 0, 0, 0 },
+    { 3, 0, 0, 0, 0, 0, 0, 0 },
+    { 6, 6, 0, 0, 0, 0, 0, 0 },
+    { 2, 4, 0, 0, 0, 0, 0, 0 },
+    { 2, 4, 5, 7, 0, 0, 0, 0 },
+    { 2, 4, 5, 0, 0, 0, 0, 0 },
+    { 2, 4, 6, 0, 0, 0, 0, 0 },
+    { 2, 4, 5, 7, 5, 7, 0, 0 },
 };
 
 typedef struct PAFVideoDecContext {
@@ -156,9 +166,11 @@ static int decode_0(PAFVideoDecContext *c, uint8_t *pkt, uint8_t code)
     i = bytestream2_get_byte(&c->gb);
     if (i) {
         if (code & 0x10) {
-            int pos = bytestream2_tell(&c->gb) & 3;
-            if (pos)
-                bytestream2_skip(&c->gb, 4 - pos);
+            int align;
+
+            align = bytestream2_tell(&c->gb) & 3;
+            if (align)
+                bytestream2_skip(&c->gb, 4 - align);
         }
         do {
             int page, val, x, y;
diff --git a/libavcodec/pamenc.c b/libavcodec/pamenc.c
index 8535d3d..64ab2b5 100644
--- a/libavcodec/pamenc.c
+++ b/libavcodec/pamenc.c
@@ -2,52 +2,39 @@
  * PAM image format
  * Copyright (c) 2002, 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
-#include "bytestream.h"
 #include "internal.h"
 
 static int pam_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                            const AVFrame *pict, int *got_packet)
+                            const AVFrame *p, int *got_packet)
 {
     uint8_t *bytestream_start, *bytestream, *bytestream_end;
-    const AVFrame * const p = pict;
     int i, h, w, n, linesize, depth, maxval, ret;
     const char *tuple_type;
     uint8_t *ptr;
 
-    if ((ret = ff_alloc_packet(pkt, avpicture_get_size(avctx->pix_fmt,
-                                                       avctx->width,
-                                                       avctx->height) + 200)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "encoded frame too large\n");
-        return ret;
-    }
-
-    bytestream_start =
-    bytestream       = pkt->data;
-    bytestream_end   = pkt->data + pkt->size;
-
     h = avctx->height;
     w = avctx->width;
     switch (avctx->pix_fmt) {
-    case AV_PIX_FMT_MONOWHITE:
-        n          = (w + 7) >> 3;
+    case AV_PIX_FMT_MONOBLACK:
+        n          = w;
         depth      = 1;
         maxval     = 1;
         tuple_type = "BLACKANDWHITE";
@@ -58,21 +45,53 @@ static int pam_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         maxval     = 255;
         tuple_type = "GRAYSCALE";
         break;
+    case AV_PIX_FMT_GRAY16BE:
+        n          = w * 2;
+        depth      = 1;
+        maxval     = 0xFFFF;
+        tuple_type = "GRAYSCALE";
+        break;
+    case AV_PIX_FMT_GRAY8A:
+        n          = w * 2;
+        depth      = 2;
+        maxval     = 255;
+        tuple_type = "GRAYSCALE_ALPHA";
+        break;
     case AV_PIX_FMT_RGB24:
         n          = w * 3;
         depth      = 3;
         maxval     = 255;
         tuple_type = "RGB";
         break;
-    case AV_PIX_FMT_RGB32:
+    case AV_PIX_FMT_RGBA:
         n          = w * 4;
         depth      = 4;
         maxval     = 255;
         tuple_type = "RGB_ALPHA";
         break;
+    case AV_PIX_FMT_RGB48BE:
+        n          = w * 6;
+        depth      = 3;
+        maxval     = 0xFFFF;
+        tuple_type = "RGB";
+        break;
+    case AV_PIX_FMT_RGBA64BE:
+        n          = w * 8;
+        depth      = 4;
+        maxval     = 0xFFFF;
+        tuple_type = "RGB_ALPHA";
+        break;
     default:
         return -1;
     }
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, n*h + 200)) < 0)
+        return ret;
+
+    bytestream_start =
+    bytestream       = pkt->data;
+    bytestream_end   = pkt->data + pkt->size;
+
     snprintf(bytestream, bytestream_end - bytestream,
              "P7\nWIDTH %d\nHEIGHT %d\nDEPTH %d\nMAXVAL %d\nTUPLTYPE %s\nENDHDR\n",
              w, h, depth, maxval, tuple_type);
@@ -81,16 +100,11 @@ static int pam_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     ptr      = p->data[0];
     linesize = p->linesize[0];
 
-    if (avctx->pix_fmt == AV_PIX_FMT_RGB32) {
+    if (avctx->pix_fmt == AV_PIX_FMT_MONOBLACK){
         int j;
-        unsigned int v;
-
         for (i = 0; i < h; i++) {
-            for (j = 0; j < w; j++) {
-                v = ((uint32_t *)ptr)[j];
-                bytestream_put_be24(&bytestream, v);
-                *bytestream++ = v >> 24;
-            }
+            for (j = 0; j < w; j++)
+                *bytestream++ = ptr[j >> 3] >> (7 - j & 7) & 1;
             ptr += linesize;
         }
     } else {
@@ -134,7 +148,6 @@ AVCodec ff_pam_encoder = {
     .close          = pam_encode_close,
     .encode2        = pam_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){
-        AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB32, AV_PIX_FMT_GRAY8, AV_PIX_FMT_MONOWHITE,
-        AV_PIX_FMT_NONE
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA, AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE, AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY8A, AV_PIX_FMT_GRAY16BE, AV_PIX_FMT_MONOBLACK, AV_PIX_FMT_NONE
     },
 };
diff --git a/libavcodec/parser.c b/libavcodec/parser.c
index 6d20516..71449ca 100644
--- a/libavcodec/parser.c
+++ b/libavcodec/parser.c
@@ -3,26 +3,27 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <stdint.h>
 #include <string.h>
 
+#include "libavutil/atomic.h"
 #include "libavutil/mem.h"
 
 #include "parser.h"
@@ -39,13 +40,14 @@ AVCodecParser *av_parser_next(const AVCodecParser *p)
 
 void av_register_codec_parser(AVCodecParser *parser)
 {
-    parser->next = av_first_parser;
-    av_first_parser = parser;
+    do {
+        parser->next = av_first_parser;
+    } while (parser->next != avpriv_atomic_ptr_cas((void * volatile *)&av_first_parser, parser->next, parser));
 }
 
 AVCodecParserContext *av_parser_init(int codec_id)
 {
-    AVCodecParserContext *s;
+    AVCodecParserContext *s = NULL;
     AVCodecParser *parser;
     int ret;
 
@@ -65,31 +67,30 @@ AVCodecParserContext *av_parser_init(int codec_id)
 found:
     s = av_mallocz(sizeof(AVCodecParserContext));
     if (!s)
-        return NULL;
+        goto err_out;
     s->parser = parser;
-    if (parser->priv_data_size) {
-        s->priv_data = av_mallocz(parser->priv_data_size);
-        if (!s->priv_data) {
-            av_free(s);
-            return NULL;
-        }
-    }
+    s->priv_data = av_mallocz(parser->priv_data_size);
+    if (!s->priv_data)
+        goto err_out;
+    s->fetch_timestamp=1;
+    s->pict_type = AV_PICTURE_TYPE_I;
     if (parser->parser_init) {
         ret = parser->parser_init(s);
-        if (ret != 0) {
-            av_free(s->priv_data);
-            av_free(s);
-            return NULL;
-        }
+        if (ret != 0)
+            goto err_out;
     }
-    s->fetch_timestamp      = 1;
-    s->pict_type            = AV_PICTURE_TYPE_I;
     s->key_frame            = -1;
     s->convergence_duration = 0;
     s->dts_sync_point       = INT_MIN;
     s->dts_ref_dts_delta    = INT_MIN;
     s->pts_dts_delta        = INT_MIN;
     return s;
+
+err_out:
+    if (s)
+        av_freep(&s->priv_data);
+    av_free(s);
+    return NULL;
 }
 
 void ff_fetch_timestamp(AVCodecParserContext *s, int off, int remove)
@@ -103,8 +104,10 @@ void ff_fetch_timestamp(AVCodecParserContext *s, int off, int remove)
     for (i = 0; i < AV_PARSER_PTS_NB; i++) {
         if (s->cur_offset + off >= s->cur_frame_offset[i] &&
             (s->frame_offset < s->cur_frame_offset[i] ||
-             (!s->frame_offset && !s->next_frame_offset)) &&
-            s->cur_frame_end[i]) {
+             (!s->frame_offset && !s->next_frame_offset)) && // first field/frame
+            // check disabled since MPEG-TS does not send complete PES packets
+            /*s->next_frame_offset + off <*/  s->cur_frame_end[i]){
+
             s->dts    = s->cur_frame_dts[i];
             s->pts    = s->cur_frame_pts[i];
             s->pos    = s->cur_frame_pos[i];
@@ -240,8 +243,10 @@ int ff_combine_frame(ParseContext *pc, int next,
                                            *buf_size + pc->index +
                                            FF_INPUT_BUFFER_PADDING_SIZE);
 
-        if (!new_buffer)
+        if (!new_buffer) {
+            pc->index = 0;
             return AVERROR(ENOMEM);
+        }
         pc->buffer = new_buffer;
         memcpy(&pc->buffer[pc->index], *buf, *buf_size);
         pc->index += *buf_size;
@@ -256,9 +261,11 @@ int ff_combine_frame(ParseContext *pc, int next,
         void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
                                            next + pc->index +
                                            FF_INPUT_BUFFER_PADDING_SIZE);
-
-        if (!new_buffer)
+        if (!new_buffer) {
+            pc->overread_index =
+            pc->index = 0;
             return AVERROR(ENOMEM);
+        }
         pc->buffer = new_buffer;
         if (next > -FF_INPUT_BUFFER_PADDING_SIZE)
             memcpy(&pc->buffer[pc->index], *buf,
diff --git a/libavcodec/parser.h b/libavcodec/parser.h
index ea1cae2..7fe0e11 100644
--- a/libavcodec/parser.h
+++ b/libavcodec/parser.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2003 Fabrice Bellard
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pcm-bluray.c b/libavcodec/pcm-bluray.c
index 7e4dcf8..695bc31 100644
--- a/libavcodec/pcm-bluray.c
+++ b/libavcodec/pcm-bluray.c
@@ -2,20 +2,20 @@
  * LPCM codecs for PCM format found in Blu-ray PCM streams
  * Copyright (c) 2009, 2013 Christian Schmidt
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -71,13 +71,14 @@ static int pcm_bluray_parse_header(AVCodecContext *avctx,
 
     /* get the sample depth and derive the sample format from it */
     avctx->bits_per_coded_sample = bits_per_samples[header[3] >> 6];
-    if (!avctx->bits_per_coded_sample) {
-        av_log(avctx, AV_LOG_ERROR, "reserved sample depth (0)\n");
+    if (!(avctx->bits_per_coded_sample == 16 || avctx->bits_per_coded_sample == 24)) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported sample depth (%d)\n", avctx->bits_per_coded_sample);
         return AVERROR_INVALIDDATA;
     }
     avctx->sample_fmt = avctx->bits_per_coded_sample == 16 ? AV_SAMPLE_FMT_S16
                                                            : AV_SAMPLE_FMT_S32;
-    avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
+    if (avctx->sample_fmt == AV_SAMPLE_FMT_S32)
+        avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
 
     /* get the sample rate. Not all values are used. */
     switch (header[2] & 0x0f) {
@@ -154,10 +155,8 @@ static int pcm_bluray_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = samples;
-    if ((retval = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((retval = ff_get_buffer(avctx, frame, 0)) < 0)
         return retval;
-    }
     dst16 = (int16_t *)frame->data[0];
     dst32 = (int32_t *)frame->data[0];
 
diff --git a/libavcodec/pcm-dvd.c b/libavcodec/pcm-dvd.c
index 0872d29..9b4c40e 100644
--- a/libavcodec/pcm-dvd.c
+++ b/libavcodec/pcm-dvd.c
@@ -2,20 +2,20 @@
  * LPCM codecs for PCM formats found in Video DVD streams
  * Copyright (c) 2013 Christian Schmidt
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@
 typedef struct PCMDVDContext {
     uint32_t last_header;    // Cached header to see if parsing is needed
     int block_size;          // Size of a block of samples in bytes
+    int last_block_size;     // Size of the last block of samples in bytes
     int samples_per_block;   // Number of samples per channel per block
     int groups_per_block;    // Number of 20/24bit sample groups per block
     uint8_t *extra_samples;  // Pointer to leftover samples from a frame
@@ -69,6 +70,7 @@ static int pcm_dvd_parse_header(AVCodecContext *avctx, const uint8_t *header)
     /* early exit if the header didn't change apart from the frame number */
     if (s->last_header == header_int)
         return 0;
+    s->last_header = -1;
 
     if (avctx->debug & FF_DEBUG_PICT_INFO)
         av_dlog(avctx, "pcm_dvd_parse_header: header = %02x%02x%02x\n",
@@ -85,7 +87,9 @@ static int pcm_dvd_parse_header(AVCodecContext *avctx, const uint8_t *header)
     /* get the sample depth and derive the sample format from it */
     avctx->bits_per_coded_sample = 16 + (header[1] >> 6 & 3) * 4;
     if (avctx->bits_per_coded_sample == 28) {
-        av_log(avctx, AV_LOG_ERROR, "PCM DVD unsupported sample depth\n");
+        av_log(avctx, AV_LOG_ERROR,
+               "PCM DVD unsupported sample depth %i\n",
+               avctx->bits_per_coded_sample);
         return AVERROR_INVALIDDATA;
     }
     avctx->sample_fmt = avctx->bits_per_coded_sample == 16 ? AV_SAMPLE_FMT_S16
@@ -170,6 +174,17 @@ static void *pcm_dvd_decode_samples(AVCodecContext *avctx, const uint8_t *src,
 #endif
         return dst16;
     case 20:
+        if (avctx->channels == 1) {
+            do {
+                for (i = 2; i; i--) {
+                    dst32[0] = bytestream2_get_be16u(&gb) << 16;
+                    dst32[1] = bytestream2_get_be16u(&gb) << 16;
+                    t = bytestream2_get_byteu(&gb);
+                    *dst32++ += (t & 0xf0) << 8;
+                    *dst32++ += (t & 0x0f) << 12;
+                }
+            } while (--blocks);
+        } else {
         do {
             for (i = s->groups_per_block; i; i--) {
                 dst32[0] = bytestream2_get_be16u(&gb) << 16;
@@ -184,8 +199,19 @@ static void *pcm_dvd_decode_samples(AVCodecContext *avctx, const uint8_t *src,
                 *dst32++ += (t & 0x0f) << 12;
             }
         } while (--blocks);
+        }
         return dst32;
     case 24:
+        if (avctx->channels == 1) {
+            do {
+                for (i = 2; i; i--) {
+                    dst32[0] = bytestream2_get_be16u(&gb) << 16;
+                    dst32[1] = bytestream2_get_be16u(&gb) << 16;
+                    *dst32++ += bytestream2_get_byteu(&gb) << 8;
+                    *dst32++ += bytestream2_get_byteu(&gb) << 8;
+                }
+            } while (--blocks);
+        } else {
         do {
             for (i = s->groups_per_block; i; i--) {
                 dst32[0] = bytestream2_get_be16u(&gb) << 16;
@@ -198,6 +224,7 @@ static void *pcm_dvd_decode_samples(AVCodecContext *avctx, const uint8_t *src,
                 *dst32++ += bytestream2_get_byteu(&gb) << 8;
             }
         } while (--blocks);
+        }
         return dst32;
     default:
         return NULL;
@@ -222,6 +249,11 @@ static int pcm_dvd_decode_frame(AVCodecContext *avctx, void *data,
 
     if ((retval = pcm_dvd_parse_header(avctx, src)))
         return retval;
+    if (s->last_block_size && s->last_block_size != s->block_size) {
+        av_log(avctx, AV_LOG_WARNING, "block_size has changed %d != %d\n", s->last_block_size, s->block_size);
+        s->extra_sample_count = 0;
+    }
+    s->last_block_size = s->block_size;
     src      += 3;
     buf_size -= 3;
 
@@ -229,10 +261,8 @@ static int pcm_dvd_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = blocks * s->samples_per_block;
-    if ((retval = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((retval = ff_get_buffer(avctx, frame, 0)) < 0)
         return retval;
-    }
     dst = frame->data[0];
 
     /* consume leftover samples from last packet */
diff --git a/libavcodec/pcm.c b/libavcodec/pcm.c
index 9fadcb7..0a4ad0b 100644
--- a/libavcodec/pcm.c
+++ b/libavcodec/pcm.c
@@ -2,20 +2,20 @@
  * PCM codecs
  * Copyright (c) 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -48,16 +48,6 @@ static av_cold int pcm_encode_init(AVCodecContext *avctx)
     avctx->bits_per_coded_sample = av_get_bits_per_sample(avctx->codec->id);
     avctx->block_align           = avctx->channels * avctx->bits_per_coded_sample / 8;
     avctx->bit_rate              = avctx->block_align * avctx->sample_rate * 8;
-    avctx->coded_frame           = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
-    return 0;
-}
-
-static av_cold int pcm_encode_close(AVCodecContext *avctx)
-{
-    av_freep(&avctx->coded_frame);
 
     return 0;
 }
@@ -79,13 +69,24 @@ static av_cold int pcm_encode_close(AVCodecContext *avctx)
         bytestream_put_ ## endian(&dst, v);                             \
     }
 
+#define ENCODE_PLANAR(type, endian, dst, n, shift, offset)              \
+    n /= avctx->channels;                                               \
+    for (c = 0; c < avctx->channels; c++) {                             \
+        int i;                                                          \
+        samples_ ## type = (const type *) frame->extended_data[c];      \
+        for (i = n; i > 0; i--) {                                       \
+            register type v = (*samples_ ## type++ >> shift) + offset;  \
+            bytestream_put_ ## endian(&dst, v);                         \
+        }                                                               \
+    }
+
 static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                             const AVFrame *frame, int *got_packet_ptr)
 {
-    int n, sample_size, v, ret;
+    int n, c, sample_size, v, ret;
     const short *samples;
     unsigned char *dst;
-    const uint8_t *srcu8;
+    const uint8_t *samples_uint8_t;
     const int16_t *samples_int16_t;
     const int32_t *samples_int32_t;
     const int64_t *samples_int64_t;
@@ -96,10 +97,8 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     n           = frame->nb_samples * avctx->channels;
     samples     = (const short *)frame->data[0];
 
-    if ((ret = ff_alloc_packet(avpkt, n * sample_size))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, n * sample_size)) < 0)
         return ret;
-    }
     dst = avpkt->data;
 
     switch (avctx->codec->id) {
@@ -112,6 +111,9 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     case AV_CODEC_ID_PCM_S24LE:
         ENCODE(int32_t, le24, samples, dst, n, 8, 0)
         break;
+    case AV_CODEC_ID_PCM_S24LE_PLANAR:
+        ENCODE_PLANAR(int32_t, le24, dst, n, 8, 0)
+        break;
     case AV_CODEC_ID_PCM_S24BE:
         ENCODE(int32_t, be24, samples, dst, n, 8, 0)
         break;
@@ -137,11 +139,10 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         ENCODE(uint16_t, be16, samples, dst, n, 0, 0x8000)
         break;
     case AV_CODEC_ID_PCM_S8:
-        srcu8 = frame->data[0];
-        for (; n > 0; n--) {
-            v      = *srcu8++;
-            *dst++ = v - 128;
-        }
+        ENCODE(uint8_t, byte, samples, dst, n, 0, -128)
+        break;
+    case AV_CODEC_ID_PCM_S8_PLANAR:
+        ENCODE_PLANAR(uint8_t, byte, dst, n, 0, -128)
         break;
 #if HAVE_BIGENDIAN
     case AV_CODEC_ID_PCM_F64LE:
@@ -151,9 +152,15 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     case AV_CODEC_ID_PCM_F32LE:
         ENCODE(int32_t, le32, samples, dst, n, 0, 0)
         break;
+    case AV_CODEC_ID_PCM_S32LE_PLANAR:
+        ENCODE_PLANAR(int32_t, le32, dst, n, 0, 0)
+        break;
     case AV_CODEC_ID_PCM_S16LE:
         ENCODE(int16_t, le16, samples, dst, n, 0, 0)
         break;
+    case AV_CODEC_ID_PCM_S16LE_PLANAR:
+        ENCODE_PLANAR(int16_t, le16, dst, n, 0, 0)
+        break;
     case AV_CODEC_ID_PCM_F64BE:
     case AV_CODEC_ID_PCM_F32BE:
     case AV_CODEC_ID_PCM_S32BE:
@@ -169,6 +176,9 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     case AV_CODEC_ID_PCM_S16BE:
         ENCODE(int16_t, be16, samples, dst, n, 0, 0)
         break;
+    case AV_CODEC_ID_PCM_S16BE_PLANAR:
+        ENCODE_PLANAR(int16_t, be16, dst, n, 0, 0)
+        break;
     case AV_CODEC_ID_PCM_F64LE:
     case AV_CODEC_ID_PCM_F32LE:
     case AV_CODEC_ID_PCM_S32LE:
@@ -176,7 +186,18 @@ static int pcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 #endif /* HAVE_BIGENDIAN */
     case AV_CODEC_ID_PCM_U8:
         memcpy(dst, samples, n * sample_size);
-        dst += n * sample_size;
+        break;
+#if HAVE_BIGENDIAN
+    case AV_CODEC_ID_PCM_S16BE_PLANAR:
+#else
+    case AV_CODEC_ID_PCM_S16LE_PLANAR:
+    case AV_CODEC_ID_PCM_S32LE_PLANAR:
+#endif /* HAVE_BIGENDIAN */
+        n /= avctx->channels;
+        for (c = 0; c < avctx->channels; c++) {
+            const uint8_t *src = frame->extended_data[c];
+            bytestream_put_buffer(&dst, src, n * sample_size);
+        }
         break;
     case AV_CODEC_ID_PCM_ALAW:
         for (; n > 0; n--) {
@@ -212,7 +233,7 @@ static av_cold int pcm_decode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    switch (avctx->codec->id) {
+    switch (avctx->codec_id) {
     case AV_CODEC_ID_PCM_ALAW:
         for (i = 0; i < 256; i++)
             s->table[i] = alaw2linear(i);
@@ -228,7 +249,7 @@ static av_cold int pcm_decode_init(AVCodecContext *avctx)
     avctx->sample_fmt = avctx->codec->sample_fmts[0];
 
     if (avctx->sample_fmt == AV_SAMPLE_FMT_S32)
-        avctx->bits_per_raw_sample = av_get_bits_per_sample(avctx->codec->id);
+        avctx->bits_per_raw_sample = av_get_bits_per_sample(avctx->codec_id);
 
     return 0;
 }
@@ -250,29 +271,17 @@ static av_cold int pcm_decode_init(AVCodecContext *avctx)
         dst += size / 8;                                                \
     }
 
-#if HAVE_BIGENDIAN
-#define DECODE_PLANAR(size, endian, src, dst, n, shift, offset)         \
-    {                                                                   \
-        int av_unused n2;                                               \
-        n /= avctx->channels;                                           \
-        for (c = 0; c < avctx->channels; c++) {                         \
-            samples = frame->extended_data[c];                          \
-            n2 = n;                                                     \
-            DECODE(size, endian, src, samples, n2, 0, 0)                \
-        }                                                               \
-    }
-#else
 #define DECODE_PLANAR(size, endian, src, dst, n, shift, offset)         \
-    {                                                                   \
-        int av_unused n2;                                               \
-        n /= avctx->channels;                                           \
-        for (c = 0; c < avctx->channels; c++) {                         \
-            samples = frame->extended_data[c];                          \
-            memcpy(samples, src, n * size / 8);                         \
-            src += n * size / 8;                                        \
+    n /= avctx->channels;                                               \
+    for (c = 0; c < avctx->channels; c++) {                             \
+        int i;                                                          \
+        dst = frame->extended_data[c];                                \
+        for (i = n; i > 0; i--) {                                       \
+            uint ## size ## _t v = bytestream_get_ ## endian(&src);     \
+            AV_WN ## size ## A(dst, (v - offset) << shift);             \
+            dst += size / 8;                                            \
         }                                                               \
     }
-#endif /* HAVE_BIGENDIAN */
 
 static int pcm_decode_frame(AVCodecContext *avctx, void *data,
                             int *got_frame_ptr, AVPacket *avpkt)
@@ -300,12 +309,24 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR(EINVAL);
     }
 
+    if (avctx->channels == 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid number of channels\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->codec_id != avctx->codec->id) {
+        av_log(avctx, AV_LOG_ERROR, "codec ids mismatch\n");
+        return AVERROR(EINVAL);
+    }
+
     n = avctx->channels * sample_size;
 
     if (n && buf_size % n) {
         if (buf_size < n) {
-            av_log(avctx, AV_LOG_ERROR, "invalid PCM packet\n");
-            return -1;
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid PCM packet, data has size %d but at least a size of %d was expected\n",
+                   buf_size, n);
+            return AVERROR_INVALIDDATA;
         } else
             buf_size -= buf_size % n;
     }
@@ -314,13 +335,11 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = n * samples_per_block / avctx->channels;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = frame->data[0];
 
-    switch (avctx->codec->id) {
+    switch (avctx->codec_id) {
     case AV_CODEC_ID_PCM_U32LE:
         DECODE(32, le32, src, samples, n, 0, 0x80000000)
         break;
@@ -330,6 +349,9 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
     case AV_CODEC_ID_PCM_S24LE:
         DECODE(32, le24, src, samples, n, 8, 0)
         break;
+    case AV_CODEC_ID_PCM_S24LE_PLANAR:
+        DECODE_PLANAR(32, le24, src, samples, n, 8, 0);
+        break;
     case AV_CODEC_ID_PCM_S24BE:
         DECODE(32, be24, src, samples, n, 8, 0)
         break;
@@ -348,15 +370,6 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
             samples += 2;
         }
         break;
-    case AV_CODEC_ID_PCM_S16LE_PLANAR:
-        DECODE_PLANAR(16, le16, src, samples, n, 0, 0);
-        break;
-    case AV_CODEC_ID_PCM_S24LE_PLANAR:
-        DECODE_PLANAR(32, le24, src, samples, n, 8, 0);
-        break;
-    case AV_CODEC_ID_PCM_S32LE_PLANAR:
-        DECODE_PLANAR(32, le32, src, samples, n, 0, 0);
-        break;
     case AV_CODEC_ID_PCM_U16LE:
         DECODE(16, le16, src, samples, n, 0, 0x8000)
         break;
@@ -367,6 +380,15 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
         for (; n > 0; n--)
             *samples++ = *src++ + 128;
         break;
+    case AV_CODEC_ID_PCM_S8_PLANAR:
+        n /= avctx->channels;
+        for (c = 0; c < avctx->channels; c++) {
+            int i;
+            samples = frame->extended_data[c];
+            for (i = n; i > 0; i--)
+                *samples++ = *src++ + 128;
+        }
+        break;
 #if HAVE_BIGENDIAN
     case AV_CODEC_ID_PCM_F64LE:
         DECODE(64, le64, src, samples, n, 0, 0)
@@ -375,9 +397,15 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
     case AV_CODEC_ID_PCM_F32LE:
         DECODE(32, le32, src, samples, n, 0, 0)
         break;
+    case AV_CODEC_ID_PCM_S32LE_PLANAR:
+        DECODE_PLANAR(32, le32, src, samples, n, 0, 0);
+        break;
     case AV_CODEC_ID_PCM_S16LE:
         DECODE(16, le16, src, samples, n, 0, 0)
         break;
+    case AV_CODEC_ID_PCM_S16LE_PLANAR:
+        DECODE_PLANAR(16, le16, src, samples, n, 0, 0);
+        break;
     case AV_CODEC_ID_PCM_F64BE:
     case AV_CODEC_ID_PCM_F32BE:
     case AV_CODEC_ID_PCM_S32BE:
@@ -393,6 +421,9 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
     case AV_CODEC_ID_PCM_S16BE:
         DECODE(16, be16, src, samples, n, 0, 0)
         break;
+    case AV_CODEC_ID_PCM_S16BE_PLANAR:
+        DECODE_PLANAR(16, be16, src, samples, n, 0, 0);
+        break;
     case AV_CODEC_ID_PCM_F64LE:
     case AV_CODEC_ID_PCM_F32LE:
     case AV_CODEC_ID_PCM_S32LE:
@@ -401,6 +432,18 @@ static int pcm_decode_frame(AVCodecContext *avctx, void *data,
     case AV_CODEC_ID_PCM_U8:
         memcpy(samples, src, n * sample_size);
         break;
+#if HAVE_BIGENDIAN
+    case AV_CODEC_ID_PCM_S16BE_PLANAR:
+#else
+    case AV_CODEC_ID_PCM_S16LE_PLANAR:
+    case AV_CODEC_ID_PCM_S32LE_PLANAR:
+#endif /* HAVE_BIGENDIAN */
+        n /= avctx->channels;
+        for (c = 0; c < avctx->channels; c++) {
+            samples = frame->extended_data[c];
+            bytestream_get_buffer(&src, samples, n * sample_size);
+        }
+        break;
     case AV_CODEC_ID_PCM_ZORK:
         for (; n > 0; n--) {
             int v = *src++;
@@ -458,7 +501,6 @@ AVCodec ff_ ## name_ ## _encoder = {                                        \
     .id           = AV_CODEC_ID_ ## id_,                                    \
     .init         = pcm_encode_init,                                        \
     .encode2      = pcm_encode_frame,                                       \
-    .close        = pcm_encode_close,                                       \
     .capabilities = CODEC_CAP_VARIABLE_FRAME_SIZE,                          \
     .sample_fmts  = (const enum AVSampleFormat[]){ sample_fmt_,             \
                                                    AV_SAMPLE_FMT_NONE },    \
@@ -498,24 +540,26 @@ AVCodec ff_ ## name_ ## _decoder = {                                        \
     PCM_DECODER(id, sample_fmt_, name, long_name_)
 
 /* Note: Do not forget to add new entries to the Makefile as well. */
-PCM_CODEC  (PCM_ALAW,         AV_SAMPLE_FMT_S16, pcm_alaw,         "PCM A-law");
+PCM_CODEC  (PCM_ALAW,         AV_SAMPLE_FMT_S16, pcm_alaw,         "PCM A-law / G.711 A-law");
 PCM_CODEC  (PCM_F32BE,        AV_SAMPLE_FMT_FLT, pcm_f32be,        "PCM 32-bit floating point big-endian");
 PCM_CODEC  (PCM_F32LE,        AV_SAMPLE_FMT_FLT, pcm_f32le,        "PCM 32-bit floating point little-endian");
 PCM_CODEC  (PCM_F64BE,        AV_SAMPLE_FMT_DBL, pcm_f64be,        "PCM 64-bit floating point big-endian");
 PCM_CODEC  (PCM_F64LE,        AV_SAMPLE_FMT_DBL, pcm_f64le,        "PCM 64-bit floating point little-endian");
-PCM_DECODER(PCM_LXF,          AV_SAMPLE_FMT_S32P, pcm_lxf,          "PCM signed 20-bit little-endian planar");
-PCM_CODEC  (PCM_MULAW,        AV_SAMPLE_FMT_S16, pcm_mulaw,        "PCM mu-law");
+PCM_DECODER(PCM_LXF,          AV_SAMPLE_FMT_S32P,pcm_lxf,          "PCM signed 20-bit little-endian planar");
+PCM_CODEC  (PCM_MULAW,        AV_SAMPLE_FMT_S16, pcm_mulaw,        "PCM mu-law / G.711 mu-law");
 PCM_CODEC  (PCM_S8,           AV_SAMPLE_FMT_U8,  pcm_s8,           "PCM signed 8-bit");
+PCM_CODEC  (PCM_S8_PLANAR,    AV_SAMPLE_FMT_U8P, pcm_s8_planar,    "PCM signed 8-bit planar");
 PCM_CODEC  (PCM_S16BE,        AV_SAMPLE_FMT_S16, pcm_s16be,        "PCM signed 16-bit big-endian");
+PCM_CODEC  (PCM_S16BE_PLANAR, AV_SAMPLE_FMT_S16P,pcm_s16be_planar, "PCM signed 16-bit big-endian planar");
 PCM_CODEC  (PCM_S16LE,        AV_SAMPLE_FMT_S16, pcm_s16le,        "PCM signed 16-bit little-endian");
-PCM_DECODER(PCM_S16LE_PLANAR, AV_SAMPLE_FMT_S16P, pcm_s16le_planar, "PCM 16-bit little-endian planar");
+PCM_CODEC  (PCM_S16LE_PLANAR, AV_SAMPLE_FMT_S16P,pcm_s16le_planar, "PCM signed 16-bit little-endian planar");
 PCM_CODEC  (PCM_S24BE,        AV_SAMPLE_FMT_S32, pcm_s24be,        "PCM signed 24-bit big-endian");
 PCM_CODEC  (PCM_S24DAUD,      AV_SAMPLE_FMT_S16, pcm_s24daud,      "PCM D-Cinema audio signed 24-bit");
 PCM_CODEC  (PCM_S24LE,        AV_SAMPLE_FMT_S32, pcm_s24le,        "PCM signed 24-bit little-endian");
-PCM_DECODER(PCM_S24LE_PLANAR, AV_SAMPLE_FMT_S32P,pcm_s24le_planar, "PCM signed 24-bit little-endian planar");
+PCM_CODEC  (PCM_S24LE_PLANAR, AV_SAMPLE_FMT_S32P,pcm_s24le_planar, "PCM signed 24-bit little-endian planar");
 PCM_CODEC  (PCM_S32BE,        AV_SAMPLE_FMT_S32, pcm_s32be,        "PCM signed 32-bit big-endian");
 PCM_CODEC  (PCM_S32LE,        AV_SAMPLE_FMT_S32, pcm_s32le,        "PCM signed 32-bit little-endian");
-PCM_DECODER(PCM_S32LE_PLANAR, AV_SAMPLE_FMT_S32P,pcm_s32le_planar, "PCM signed 32-bit little-endian planar");
+PCM_CODEC  (PCM_S32LE_PLANAR, AV_SAMPLE_FMT_S32P,pcm_s32le_planar, "PCM signed 32-bit little-endian planar");
 PCM_CODEC  (PCM_U8,           AV_SAMPLE_FMT_U8,  pcm_u8,           "PCM unsigned 8-bit");
 PCM_CODEC  (PCM_U16BE,        AV_SAMPLE_FMT_S16, pcm_u16be,        "PCM unsigned 16-bit big-endian");
 PCM_CODEC  (PCM_U16LE,        AV_SAMPLE_FMT_S16, pcm_u16le,        "PCM unsigned 16-bit little-endian");
@@ -524,3 +568,4 @@ PCM_CODEC  (PCM_U24LE,        AV_SAMPLE_FMT_S32, pcm_u24le,        "PCM unsigned
 PCM_CODEC  (PCM_U32BE,        AV_SAMPLE_FMT_S32, pcm_u32be,        "PCM unsigned 32-bit big-endian");
 PCM_CODEC  (PCM_U32LE,        AV_SAMPLE_FMT_S32, pcm_u32le,        "PCM unsigned 32-bit little-endian");
 PCM_DECODER(PCM_ZORK,         AV_SAMPLE_FMT_U8,  pcm_zork,         "PCM Zork");
+
diff --git a/libavcodec/pcm_tablegen.c b/libavcodec/pcm_tablegen.c
index 7b4bc8c..bf8e7fb 100644
--- a/libavcodec/pcm_tablegen.c
+++ b/libavcodec/pcm_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pcm_tablegen.h b/libavcodec/pcm_tablegen.h
index 79d6561..1387210 100644
--- a/libavcodec/pcm_tablegen.h
+++ b/libavcodec/pcm_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pcx.c b/libavcodec/pcx.c
index 61c971e..6487aa5 100644
--- a/libavcodec/pcx.c
+++ b/libavcodec/pcx.c
@@ -5,20 +5,20 @@
  * This decoder does not support CGA palettes. I am unable to find samples
  * and Netpbm cannot generate them.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,44 +28,37 @@
 #include "get_bits.h"
 #include "internal.h"
 
-/**
- * @return advanced src pointer
- */
-static const uint8_t *pcx_rle_decode(const uint8_t *src,
-                                     const uint8_t *end,
-                                     uint8_t *dst,
-                                     unsigned int bytes_per_scanline,
-                                     int compressed)
+static void pcx_rle_decode(GetByteContext *gb,
+                           uint8_t *dst,
+                           unsigned int bytes_per_scanline,
+                           int compressed)
 {
     unsigned int i = 0;
     unsigned char run, value;
 
     if (compressed) {
-        while (i < bytes_per_scanline && src < end) {
+        while (i < bytes_per_scanline && bytestream2_get_bytes_left(gb)>0) {
             run   = 1;
-            value = *src++;
-            if (value >= 0xc0 && src < end) {
+            value = bytestream2_get_byte(gb);
+            if (value >= 0xc0 && bytestream2_get_bytes_left(gb)>0) {
                 run   = value & 0x3f;
-                value = *src++;
+                value = bytestream2_get_byte(gb);
             }
             while (i < bytes_per_scanline && run--)
                 dst[i++] = value;
         }
     } else {
-        memcpy(dst, src, bytes_per_scanline);
-        src += bytes_per_scanline;
+        bytestream2_get_buffer(gb, dst, bytes_per_scanline);
     }
-
-    return src;
 }
 
-static void pcx_palette(const uint8_t **src, uint32_t *dst,
-                        unsigned int pallen)
+static void pcx_palette(GetByteContext *gb, uint32_t *dst, int pallen)
 {
-    unsigned int i;
+    int i;
 
+    pallen = FFMIN(pallen, bytestream2_get_bytes_left(gb) / 3);
     for (i = 0; i < pallen; i++)
-        *dst++ = bytestream_get_be24(src);
+        *dst++ = 0xFF000000 | bytestream2_get_be24u(gb);
     if (pallen < 256)
         memset(dst, 0, (256 - pallen) * sizeof(*dst));
 }
@@ -73,28 +66,32 @@ static void pcx_palette(const uint8_t **src, uint32_t *dst,
 static int pcx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                             AVPacket *avpkt)
 {
-    const uint8_t *buf = avpkt->data;
-    int buf_size       = avpkt->size;
-    AVFrame *const p   = data;
+    GetByteContext gb;
+    AVFrame * const p  = data;
     int compressed, xmin, ymin, xmax, ymax;
+    int ret;
     unsigned int w, h, bits_per_pixel, bytes_per_line, nplanes, stride, y, x,
                  bytes_per_scanline;
-    uint8_t *ptr;
-    const uint8_t *buf_end = buf + buf_size;
-    uint8_t const *bufstart = buf;
-    uint8_t *scanline;
-    int ret = -1;
+    uint8_t *ptr, *scanline;
+
+    if (avpkt->size < 128)
+        return AVERROR_INVALIDDATA;
+
+    bytestream2_init(&gb, avpkt->data, avpkt->size);
 
-    if (buf[0] != 0x0a || buf[1] > 5) {
+    if (bytestream2_get_byteu(&gb) != 0x0a || bytestream2_get_byteu(&gb) > 5) {
         av_log(avctx, AV_LOG_ERROR, "this is not PCX encoded data\n");
         return AVERROR_INVALIDDATA;
     }
 
-    compressed = buf[2];
-    xmin       = AV_RL16(buf + 4);
-    ymin       = AV_RL16(buf + 6);
-    xmax       = AV_RL16(buf + 8);
-    ymax       = AV_RL16(buf + 10);
+    compressed                     = bytestream2_get_byteu(&gb);
+    bits_per_pixel                 = bytestream2_get_byteu(&gb);
+    xmin                           = bytestream2_get_le16u(&gb);
+    ymin                           = bytestream2_get_le16u(&gb);
+    xmax                           = bytestream2_get_le16u(&gb);
+    ymax                           = bytestream2_get_le16u(&gb);
+    avctx->sample_aspect_ratio.num = bytestream2_get_le16u(&gb);
+    avctx->sample_aspect_ratio.den = bytestream2_get_le16u(&gb);
 
     if (xmax < xmin || ymax < ymin) {
         av_log(avctx, AV_LOG_ERROR, "invalid image dimensions\n");
@@ -104,13 +101,13 @@ static int pcx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     w = xmax - xmin + 1;
     h = ymax - ymin + 1;
 
-    bits_per_pixel     = buf[3];
-    bytes_per_line     = AV_RL16(buf + 66);
-    nplanes            = buf[65];
+    bytestream2_skipu(&gb, 49);
+    nplanes            = bytestream2_get_byteu(&gb);
+    bytes_per_line     = bytestream2_get_le16u(&gb);
     bytes_per_scanline = nplanes * bytes_per_line;
 
     if (bytes_per_scanline < (w * bits_per_pixel * nplanes + 7) / 8 ||
-        (!compressed && bytes_per_scanline > buf_size / h)) {
+        (!compressed && bytes_per_scanline > bytestream2_get_bytes_left(&gb) / h)) {
         av_log(avctx, AV_LOG_ERROR, "PCX data is corrupted\n");
         return AVERROR_INVALIDDATA;
     }
@@ -133,29 +130,26 @@ static int pcx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR_INVALIDDATA;
     }
 
-    buf += 128;
+    bytestream2_skipu(&gb, 60);
 
     if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
         return ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
 
     p->pict_type = AV_PICTURE_TYPE_I;
 
     ptr    = p->data[0];
     stride = p->linesize[0];
 
-    scanline = av_malloc(bytes_per_scanline);
+    scanline = av_malloc(bytes_per_scanline + FF_INPUT_BUFFER_PADDING_SIZE);
     if (!scanline)
         return AVERROR(ENOMEM);
 
     if (nplanes == 3 && bits_per_pixel == 8) {
         for (y = 0; y < h; y++) {
-            buf = pcx_rle_decode(buf, buf_end,
-                                 scanline, bytes_per_scanline, compressed);
+            pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
 
             for (x = 0; x < w; x++) {
                 ptr[3 * x]     = scanline[x];
@@ -166,39 +160,37 @@ static int pcx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             ptr += stride;
         }
     } else if (nplanes == 1 && bits_per_pixel == 8) {
-        const uint8_t *palstart = bufstart + buf_size - 769;
+        int palstart = avpkt->size - 769;
 
-        if (buf_size < 769) {
+        if (avpkt->size < 769) {
             av_log(avctx, AV_LOG_ERROR, "File is too short\n");
             ret = avctx->err_recognition & AV_EF_EXPLODE ?
-                  AVERROR_INVALIDDATA : buf_size;
+                  AVERROR_INVALIDDATA : avpkt->size;
             goto end;
         }
 
         for (y = 0; y < h; y++, ptr += stride) {
-            buf = pcx_rle_decode(buf, buf_end,
-                                 scanline, bytes_per_scanline, compressed);
+            pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
             memcpy(ptr, scanline, w);
         }
 
-        if (buf != palstart) {
+        if (bytestream2_tell(&gb) != palstart) {
             av_log(avctx, AV_LOG_WARNING, "image data possibly corrupted\n");
-            buf = palstart;
+            bytestream2_seek(&gb, palstart, SEEK_SET);
         }
-        if (*buf++ != 12) {
+        if (bytestream2_get_byte(&gb) != 12) {
             av_log(avctx, AV_LOG_ERROR, "expected palette after image data\n");
             ret = avctx->err_recognition & AV_EF_EXPLODE ?
-                  AVERROR_INVALIDDATA : buf_size;
+                  AVERROR_INVALIDDATA : avpkt->size;
             goto end;
         }
     } else if (nplanes == 1) {   /* all packed formats, max. 16 colors */
         GetBitContext s;
 
         for (y = 0; y < h; y++) {
-            init_get_bits(&s, scanline, bytes_per_scanline << 3);
+            init_get_bits8(&s, scanline, bytes_per_scanline);
 
-            buf = pcx_rle_decode(buf, buf_end,
-                                 scanline, bytes_per_scanline, compressed);
+            pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
 
             for (x = 0; x < w; x++)
                 ptr[x] = get_bits(&s, bits_per_pixel);
@@ -208,8 +200,7 @@ static int pcx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         int i;
 
         for (y = 0; y < h; y++) {
-            buf = pcx_rle_decode(buf, buf_end,
-                                 scanline, bytes_per_scanline, compressed);
+            pcx_rle_decode(&gb, scanline, bytes_per_scanline, compressed);
 
             for (x = 0; x < w; x++) {
                 int m = 0x80 >> (x & 7), v = 0;
@@ -223,16 +214,20 @@ static int pcx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         }
     }
 
+    ret = bytestream2_tell(&gb);
     if (nplanes == 1 && bits_per_pixel == 8) {
-        pcx_palette(&buf, (uint32_t *)p->data[1], 256);
+        pcx_palette(&gb, (uint32_t *)p->data[1], 256);
+        ret += 256 * 3;
+    } else if (bits_per_pixel * nplanes == 1) {
+        AV_WN32A(p->data[1]  , 0xFF000000);
+        AV_WN32A(p->data[1]+4, 0xFFFFFFFF);
     } else if (bits_per_pixel < 8) {
-        const uint8_t *palette = bufstart + 16;
-        pcx_palette(&palette, (uint32_t *)p->data[1], 16);
+        bytestream2_seek(&gb, 16, SEEK_SET);
+        pcx_palette(&gb, (uint32_t *)p->data[1], 16);
     }
 
     *got_frame = 1;
 
-    ret = buf - bufstart;
 end:
     av_free(scanline);
     return ret;
diff --git a/libavcodec/pcxenc.c b/libavcodec/pcxenc.c
index 4bf7377..f48063b 100644
--- a/libavcodec/pcxenc.c
+++ b/libavcodec/pcxenc.c
@@ -2,20 +2,20 @@
  * PC Paintbrush PCX (.pcx) image encoder
  * Copyright (c) 2009 Daniel Verkamp <daniel at drv.nu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,7 @@
 
 #include "avcodec.h"
 #include "bytestream.h"
+#include "libavutil/imgutils.h"
 #include "internal.h"
 
 static const uint32_t monoblack_pal[16] = { 0x000000, 0xFFFFFF };
@@ -106,8 +107,9 @@ static int pcx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     const uint8_t *buf_end;
     uint8_t *buf;
 
-    int bpp, nplanes, i, y, line_bytes, written, ret, max_pkt_size;
+    int bpp, nplanes, i, y, line_bytes, written, ret, max_pkt_size, sw, sh;
     const uint32_t *pal = NULL;
+    uint32_t palette256[256];
     const uint8_t *src;
 
     if (avctx->width > 65535 || avctx->height > 65535) {
@@ -125,6 +127,11 @@ static int pcx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     case AV_PIX_FMT_RGB4_BYTE:
     case AV_PIX_FMT_BGR4_BYTE:
     case AV_PIX_FMT_GRAY8:
+        bpp = 8;
+        nplanes = 1;
+        avpriv_set_systematic_pal2(palette256, avctx->pix_fmt);
+        pal = palette256;
+        break;
     case AV_PIX_FMT_PAL8:
         bpp = 8;
         nplanes = 1;
@@ -144,13 +151,16 @@ static int pcx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     line_bytes = (line_bytes + 1) & ~1;
 
     max_pkt_size = 128 + avctx->height * 2 * line_bytes * nplanes + (pal ? 256*3 + 1 : 0);
-    if ((ret = ff_alloc_packet(pkt, max_pkt_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", max_pkt_size);
+    if ((ret = ff_alloc_packet2(avctx, pkt, max_pkt_size)) < 0)
         return ret;
-    }
     buf     = pkt->data;
     buf_end = pkt->data + pkt->size;
 
+    sw = avctx->sample_aspect_ratio.num;
+    sh = avctx->sample_aspect_ratio.den;
+    if (sw > 0xFFFFu || sh > 0xFFFFu)
+        av_reduce(&sw, &sh, sw, sh, 0xFFFFu);
+
     bytestream_put_byte(&buf, 10);                  // manufacturer
     bytestream_put_byte(&buf, 5);                   // version
     bytestream_put_byte(&buf, 1);                   // encoding
@@ -159,8 +169,8 @@ static int pcx_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     bytestream_put_le16(&buf, 0);                   // y min
     bytestream_put_le16(&buf, avctx->width - 1);    // x max
     bytestream_put_le16(&buf, avctx->height - 1);   // y max
-    bytestream_put_le16(&buf, 0);                   // horizontal DPI
-    bytestream_put_le16(&buf, 0);                   // vertical DPI
+    bytestream_put_le16(&buf, sw);                  // horizontal DPI
+    bytestream_put_le16(&buf, sh);                  // vertical DPI
     for (i = 0; i < 16; i++)
         bytestream_put_be24(&buf, pal ? pal[i] : 0);// palette (<= 16 color only)
     bytestream_put_byte(&buf, 0);                   // reserved
diff --git a/libavcodec/pel_template.c b/libavcodec/pel_template.c
index b832ae7..6da7a56 100644
--- a/libavcodec/pel_template.c
+++ b/libavcodec/pel_template.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
index 6217c40..36f1f82 100644
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -2,20 +2,20 @@
  * PGS subtitle decoder
  * Copyright (c) 2009 Stephen Backway
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@
 
 #include "libavutil/colorspace.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
 
 #define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
 #define MAX_EPOCH_PALETTES 8   // Max 8 allowed per PGS epoch
@@ -90,9 +91,11 @@ typedef struct PGSSubPalettes {
 } PGSSubPalettes;
 
 typedef struct PGSSubContext {
+    AVClass *class;
     PGSSubPresentation presentation;
     PGSSubPalettes     palettes;
     PGSSubObjects      objects;
+    int forced_subs_only;
 } PGSSubContext;
 
 static void flush_cache(AVCodecContext *avctx)
@@ -133,7 +136,7 @@ static PGSSubPalette * find_palette(int id, PGSSubPalettes *palettes)
 
 static av_cold int init_decoder(AVCodecContext *avctx)
 {
-    avctx->pix_fmt = AV_PIX_FMT_PAL8;
+    avctx->pix_fmt     = AV_PIX_FMT_PAL8;
 
     return 0;
 }
@@ -148,7 +151,7 @@ static av_cold int close_decoder(AVCodecContext *avctx)
 /**
  * Decode the RLE data.
  *
- * The subtitle is stored as an Run Length Encoded image.
+ * The subtitle is stored as a Run Length Encoded image.
  *
  * @param avctx contains the current codec context
  * @param sub pointer to the processed subtitle data
@@ -295,7 +298,7 @@ static int parse_object_segment(AVCodecContext *avctx,
     object->w = width;
     object->h = height;
 
-    av_fast_malloc(&object->rle, &object->rle_buffer_size, rle_bitmap_len);
+    av_fast_padded_malloc(&object->rle, &object->rle_buffer_size, rle_bitmap_len);
 
     if (!object->rle)
         return AVERROR(ENOMEM);
@@ -378,8 +381,8 @@ static int parse_presentation_segment(AVCodecContext *avctx,
                                       int64_t pts)
 {
     PGSSubContext *ctx = avctx->priv_data;
-
     int i, state, ret;
+    const uint8_t *buf_end = buf + buf_size;
 
     // Video descriptor
     int w = bytestream_get_be16(&buf);
@@ -428,8 +431,16 @@ static int parse_presentation_segment(AVCodecContext *avctx,
         }
     }
 
+
     for (i = 0; i < ctx->presentation.object_count; i++)
     {
+
+        if (buf_end - buf < 8) {
+            av_log(avctx, AV_LOG_ERROR, "Insufficent space for object\n");
+            ctx->presentation.object_count = i;
+            return AVERROR_INVALIDDATA;
+        }
+
         ctx->presentation.objects[i].id = bytestream_get_be16(&buf);
         ctx->presentation.objects[i].window_id = bytestream_get_byte(&buf);
         ctx->presentation.objects[i].composition_flag = bytestream_get_byte(&buf);
@@ -480,11 +491,14 @@ static int display_end_segment(AVCodecContext *avctx, void *data,
 {
     AVSubtitle    *sub = data;
     PGSSubContext *ctx = avctx->priv_data;
+    int64_t pts;
     PGSSubPalette *palette;
     int i, ret;
 
+    pts = ctx->presentation.pts != AV_NOPTS_VALUE ? ctx->presentation.pts : sub->pts;
     memset(sub, 0, sizeof(*sub));
-    sub->pts = ctx->presentation.pts;
+    sub->pts = pts;
+    ctx->presentation.pts = AV_NOPTS_VALUE;
     sub->start_display_time = 0;
     // There is no explicit end time for PGS subtitles.  The end time
     // is defined by the start of the next sub which may contain no
@@ -570,6 +584,7 @@ static int display_end_segment(AVCodecContext *avctx, void *data,
             return AVERROR(ENOMEM);
         }
 
+        if (!ctx->forced_subs_only || ctx->presentation.objects[i].composition_flag & 0x40)
         memcpy(sub->rects[i]->pict.data[1], palette->clut, sub->rects[i]->nb_colors * sizeof(uint32_t));
 
     }
@@ -625,7 +640,7 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size,
             ret = parse_object_segment(avctx, buf, segment_length);
             break;
         case PRESENTATION_SEGMENT:
-            ret = parse_presentation_segment(avctx, buf, segment_length, avpkt->pts);
+            ret = parse_presentation_segment(avctx, buf, segment_length, ((AVSubtitle*)(data))->pts);
             break;
         case WINDOW_SEGMENT:
             /*
@@ -657,6 +672,20 @@ static int decode(AVCodecContext *avctx, void *data, int *data_size,
     return buf_size;
 }
 
+#define OFFSET(x) offsetof(PGSSubContext, x)
+#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    {"forced_subs_only", "Only show forced subtitles", OFFSET(forced_subs_only), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, SD},
+    { NULL },
+};
+
+static const AVClass pgsdec_class = {
+    .class_name = "PGS subtitle decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_pgssub_decoder = {
     .name           = "pgssub",
     .long_name      = NULL_IF_CONFIG_SMALL("HDMV Presentation Graphic Stream subtitles"),
@@ -666,4 +695,5 @@ AVCodec ff_pgssub_decoder = {
     .init           = init_decoder,
     .close          = close_decoder,
     .decode         = decode,
+    .priv_class     = &pgsdec_class,
 };
diff --git a/libavcodec/pictordec.c b/libavcodec/pictordec.c
index 33c4545..1bc51bc 100644
--- a/libavcodec/pictordec.c
+++ b/libavcodec/pictordec.c
@@ -2,20 +2,20 @@
  * Pictor/PC Paint decoder
  * Copyright (c) 2010 Peter Ross <pross@xvid.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -105,7 +105,7 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame *frame = data;
     uint32_t *palette;
     int bits_per_plane, bpp, etype, esize, npal, pos_after_pal;
-    int i, x, y, plane, tmp, ret;
+    int i, x, y, plane, tmp, ret, val;
 
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
 
@@ -127,7 +127,7 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR_PATCHWELCOME;
     }
 
-    if (bytestream2_peek_byte(&s->g) == 0xFF) {
+    if (bytestream2_peek_byte(&s->g) == 0xFF || bpp == 1 || bpp == 4 || bpp == 8) {
         bytestream2_skip(&s->g, 2);
         etype = bytestream2_get_le16(&s->g);
         esize = bytestream2_get_le16(&s->g);
@@ -140,16 +140,16 @@ static int decode_frame(AVCodecContext *avctx,
 
     avctx->pix_fmt = AV_PIX_FMT_PAL8;
 
+    if (av_image_check_size(s->width, s->height, 0, avctx) < 0)
+        return -1;
     if (s->width != avctx->width && s->height != avctx->height) {
         ret = ff_set_dimensions(avctx, s->width, s->height);
         if (ret < 0)
             return ret;
     }
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     memset(frame->data[0], 0, s->height * frame->linesize[0]);
     frame->pict_type           = AV_PICTURE_TYPE_I;
     frame->palette_has_changed = 1;
@@ -165,7 +165,7 @@ static int decode_frame(AVCodecContext *avctx,
         npal = FFMIN(esize, 16);
         for (i = 0; i < npal; i++) {
             int pal_idx = bytestream2_get_byte(&s->g);
-            palette[i]  = ff_cga_palette[FFMIN(pal_idx, 16)];
+            palette[i]  = ff_cga_palette[FFMIN(pal_idx, 15)];
         }
     } else if (etype == 3) {
         npal = FFMIN(esize, 16);
@@ -175,13 +175,15 @@ static int decode_frame(AVCodecContext *avctx,
         }
     } else if (etype == 4 || etype == 5) {
         npal = FFMIN(esize / 3, 256);
-        for (i = 0; i < npal; i++)
+        for (i = 0; i < npal; i++) {
             palette[i] = bytestream2_get_be24(&s->g) << 2;
+            palette[i] |= 0xFFU << 24 | palette[i] >> 6 & 0x30303;
+        }
     } else {
         if (bpp == 1) {
             npal = 2;
-            palette[0] = 0x000000;
-            palette[1] = 0xFFFFFF;
+            palette[0] = 0xFF000000;
+            palette[1] = 0xFFFFFFFF;
         } else if (bpp == 2) {
             npal = 4;
             for (i = 0; i < npal; i++)
@@ -196,10 +198,11 @@ static int decode_frame(AVCodecContext *avctx,
     // skip remaining palette bytes
     bytestream2_seek(&s->g, pos_after_pal, SEEK_SET);
 
-    x = 0;
+    val = 0;
     y = s->height - 1;
-    plane = 0;
     if (bytestream2_get_le16(&s->g)) {
+        x = 0;
+        plane = 0;
         while (bytestream2_get_bytes_left(&s->g) >= 6) {
             int stop_size, marker, t1, t2;
 
@@ -213,7 +216,7 @@ static int decode_frame(AVCodecContext *avctx,
             while (plane < s->nb_planes &&
                    bytestream2_get_bytes_left(&s->g) > stop_size) {
                 int run = 1;
-                int val = bytestream2_get_byte(&s->g);
+                val = bytestream2_get_byte(&s->g);
                 if (val == marker) {
                     run = bytestream2_get_byte(&s->g);
                     if (run == 0)
@@ -232,9 +235,20 @@ static int decode_frame(AVCodecContext *avctx,
                 }
             }
         }
+
+        if (x < avctx->width) {
+            int run = (y + 1) * avctx->width - x;
+            if (bits_per_plane == 8)
+                picmemset_8bpp(s, frame, val, run, &x, &y);
+            else
+                picmemset(s, frame, val, run / (8 / bits_per_plane), &x, &y, &plane, bits_per_plane);
+        }
     } else {
-        avpriv_request_sample(avctx, "Uncompressed image");
-        return avpkt->size;
+        while (y >= 0 && bytestream2_get_bytes_left(&s->g) > 0) {
+            memcpy(frame->data[0] + y * frame->linesize[0], s->g.buffer, FFMIN(avctx->width, bytestream2_get_bytes_left(&s->g)));
+            bytestream2_skip(&s->g, avctx->width);
+            y--;
+        }
     }
 finish:
 
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index 71423f9..ebde68b 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,7 +30,7 @@
 #define BIT_DEPTH 8
 #include "pixblockdsp_template.c"
 
-static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
+static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
                           const uint8_t *s2, int stride)
 {
     int i;
@@ -60,13 +60,19 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
     switch (avctx->bits_per_raw_sample) {
     case 9:
     case 10:
+    case 12:
+    case 14:
         c->get_pixels = get_pixels_16_c;
         break;
     default:
-        c->get_pixels = get_pixels_8_c;
+        if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
+            c->get_pixels = get_pixels_8_c;
+        }
         break;
     }
 
+    if (ARCH_ALPHA)
+        ff_pixblockdsp_init_alpha(c, avctx, high_bit_depth);
     if (ARCH_ARM)
         ff_pixblockdsp_init_arm(c, avctx, high_bit_depth);
     if (ARCH_PPC)
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index 8094d14..2f94f8b 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,8 @@ typedef struct PixblockDSPContext {
 } PixblockDSPContext;
 
 void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx);
+void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
+                               unsigned high_bit_depth);
 void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx,
                              unsigned high_bit_depth);
 void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx,
diff --git a/libavcodec/pixblockdsp_template.c b/libavcodec/pixblockdsp_template.c
index 71d3cf1..3aeddf5 100644
--- a/libavcodec/pixblockdsp_template.c
+++ b/libavcodec/pixblockdsp_template.c
@@ -1,24 +1,24 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "bit_depth_template.c"
 
-static void FUNCC(get_pixels)(int16_t *restrict block, const uint8_t *_pixels,
+static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels,
                               int line_size)
 {
     const pixel *pixels = (const pixel *) _pixels;
diff --git a/libavcodec/pixels.h b/libavcodec/pixels.h
index d9d2fde..98eacd4 100644
--- a/libavcodec/pixels.h
+++ b/libavcodec/pixels.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/png.c b/libavcodec/png.c
index cd75dc1..ef52b51 100644
--- a/libavcodec/png.c
+++ b/libavcodec/png.c
@@ -2,29 +2,25 @@
  * PNG image format
  * Copyright (c) 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
-#include "bytestream.h"
 #include "png.h"
 
-const uint8_t ff_pngsig[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };
-const uint8_t ff_mngsig[8] = { 138, 77, 78, 71, 13, 10, 26, 10 };
-
 /* Mask to determine which y pixels are valid in a pass */
 const uint8_t ff_png_pass_ymask[NB_PASSES] = {
     0x80, 0x80, 0x08, 0x88, 0x22, 0xaa, 0x55,
@@ -40,11 +36,6 @@ static const uint8_t ff_png_pass_xshift[NB_PASSES] = {
     3, 3, 2, 2, 1, 1, 0
 };
 
-/* Mask to determine which pixels are valid in a pass */
-const uint8_t ff_png_pass_mask[NB_PASSES] = {
-    0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff
-};
-
 void *ff_png_zalloc(void *opaque, unsigned int items, unsigned int size)
 {
     return av_mallocz_array(items, size);
diff --git a/libavcodec/png.h b/libavcodec/png.h
index b8c72ee..948c2f7 100644
--- a/libavcodec/png.h
+++ b/libavcodec/png.h
@@ -2,20 +2,20 @@
  * PNG image format
  * Copyright (c) 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -49,15 +49,12 @@
 
 #define NB_PASSES 7
 
-extern const uint8_t ff_pngsig[8];
-extern const uint8_t ff_mngsig[8];
+#define PNGSIG 0x89504e470d0a1a0a
+#define MNGSIG 0x8a4d4e470d0a1a0a
 
 /* Mask to determine which y pixels are valid in a pass */
 extern const uint8_t ff_png_pass_ymask[NB_PASSES];
 
-/* Mask to determine which pixels are valid in a pass */
-extern const uint8_t ff_png_pass_mask[NB_PASSES];
-
 void *ff_png_zalloc(void *opaque, unsigned int items, unsigned int size);
 
 void ff_png_zfree(void *opaque, void *ptr);
diff --git a/libavcodec/png_parser.c b/libavcodec/png_parser.c
index d07f288..530d5a0 100644
--- a/libavcodec/png_parser.c
+++ b/libavcodec/png_parser.c
@@ -2,20 +2,20 @@
  * PNG parser
  * Copyright (c) 2009 Peter Holik
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,20 +24,14 @@
  * PNG parser
  */
 
-#include "libavutil/intreadwrite.h"
-#include "libavutil/common.h"
-
 #include "parser.h"
-
-#define PNG_SIGNATURE UINT64_C(0x89504e470d0a1a0a)
-#define MNG_SIGNATURE UINT64_C(0x8a4d4e470d0a1a0a)
+#include "png.h"
 
 typedef struct PNGParseContext {
     ParseContext pc;
-
-    int chunk_pos;          ///< position inside current chunk
-    int chunk_length;       ///< length of the current chunk
-    int remaining_size;     ///< remaining size of the current chunk
+    uint32_t chunk_pos;           ///< position inside current chunk
+    uint32_t chunk_length;        ///< length of the current chunk
+    uint32_t remaining_size;      ///< remaining size of the current chunk
 } PNGParseContext;
 
 static int png_parse(AVCodecParserContext *s, AVCodecContext *avctx,
@@ -48,6 +42,8 @@ static int png_parse(AVCodecParserContext *s, AVCodecContext *avctx,
     int next = END_NOT_FOUND;
     int i = 0;
 
+    s->pict_type = AV_PICTURE_TYPE_NONE;
+
     *poutbuf_size = 0;
     if (buf_size == 0)
         return 0;
@@ -56,8 +52,7 @@ static int png_parse(AVCodecParserContext *s, AVCodecContext *avctx,
         uint64_t state64 = ppc->pc.state64;
         for (; i < buf_size; i++) {
             state64 = (state64 << 8) | buf[i];
-            if (state64 == PNG_SIGNATURE ||
-                state64 == MNG_SIGNATURE) {
+            if (state64 == PNGSIG || state64 == MNGSIG) {
                 i++;
                 ppc->pc.frame_start_found = 1;
                 break;
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index fa7f7cc..92dc25a 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -2,40 +2,43 @@
  * PNG image format
  * Copyright (c) 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
+//#define DEBUG
+
+#include "libavutil/bprint.h"
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
 #include "png.h"
 #include "pngdsp.h"
-
-/* TODO:
- * - add 2, 4 and 16 bit depth support
- */
+#include "thread.h"
 
 #include <zlib.h>
 
 typedef struct PNGDecContext {
     PNGDSPContext dsp;
+    AVCodecContext *avctx;
 
     GetByteContext gb;
-    AVFrame *prev;
+    ThreadFrame last_picture;
+    ThreadFrame picture;
 
     int state;
     int width, height;
@@ -53,7 +56,11 @@ typedef struct PNGDecContext {
     uint32_t palette[256];
     uint8_t *crow_buf;
     uint8_t *last_row;
+    unsigned int last_row_size;
     uint8_t *tmp_row;
+    unsigned int tmp_row_size;
+    uint8_t *buffer;
+    int buffer_size;
     int pass;
     int crow_size; /* compressed row size (include filter type) */
     int row_size; /* decompressed row size */
@@ -62,9 +69,14 @@ typedef struct PNGDecContext {
     z_stream zstream;
 } PNGDecContext;
 
+/* Mask to determine which pixels are valid in a pass */
+static const uint8_t png_pass_mask[NB_PASSES] = {
+    0x01, 0x01, 0x11, 0x11, 0x55, 0x55, 0xff,
+};
+
 /* Mask to determine which y pixels can be written in a pass */
 static const uint8_t png_pass_dsp_ymask[NB_PASSES] = {
-    0xff, 0xff, 0x0f, 0xcc, 0x33, 0xff, 0x55,
+    0xff, 0xff, 0x0f, 0xff, 0x33, 0xff, 0x55,
 };
 
 /* Mask to determine which pixels to overwrite while displaying */
@@ -83,40 +95,55 @@ static void png_put_interlaced_row(uint8_t *dst, int width,
     uint8_t *d;
     const uint8_t *s;
 
-    mask     = ff_png_pass_mask[pass];
+    mask     = png_pass_mask[pass];
     dsp_mask = png_pass_dsp_mask[pass];
 
     switch (bits_per_pixel) {
     case 1:
-        /* we must initialize the line to zero before writing to it */
-        if (pass == 0)
-            memset(dst, 0, (width + 7) >> 3);
         src_x = 0;
         for (x = 0; x < width; x++) {
             j = (x & 7);
             if ((dsp_mask << j) & 0x80) {
                 b = (src[src_x >> 3] >> (7 - (src_x & 7))) & 1;
+                dst[x >> 3] &= 0xFF7F>>j;
                 dst[x >> 3] |= b << (7 - j);
             }
             if ((mask << j) & 0x80)
                 src_x++;
         }
         break;
+    case 2:
+        src_x = 0;
+        for (x = 0; x < width; x++) {
+            int j2 = 2 * (x & 3);
+            j = (x & 7);
+            if ((dsp_mask << j) & 0x80) {
+                b = (src[src_x >> 2] >> (6 - 2*(src_x & 3))) & 3;
+                dst[x >> 2] &= 0xFF3F>>j2;
+                dst[x >> 2] |= b << (6 - j2);
+            }
+            if ((mask << j) & 0x80)
+                src_x++;
+        }
+        break;
+    case 4:
+        src_x = 0;
+        for (x = 0; x < width; x++) {
+            int j2 = 4*(x&1);
+            j = (x & 7);
+            if ((dsp_mask << j) & 0x80) {
+                b = (src[src_x >> 1] >> (4 - 4*(src_x & 1))) & 15;
+                dst[x >> 1] &= 0xFF0F>>j2;
+                dst[x >> 1] |= b << (4 - j2);
+            }
+            if ((mask << j) & 0x80)
+                src_x++;
+        }
+        break;
     default:
         bpp = bits_per_pixel >> 3;
         d   = dst;
         s   = src;
-        if (color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
-            for (x = 0; x < width; x++) {
-                j = x & 7;
-                if ((dsp_mask << j) & 0x80) {
-                    *(uint32_t *)d = (s[3] << 24) | (s[0] << 16) | (s[1] << 8) | s[2];
-                }
-                d += bpp;
-                if ((mask << j) & 0x80)
-                    s += bpp;
-            }
-        } else {
             for (x = 0; x < width; x++) {
                 j = x & 7;
                 if ((dsp_mask << j) & 0x80) {
@@ -126,7 +153,6 @@ static void png_put_interlaced_row(uint8_t *dst, int width,
                 if ((mask << j) & 0x80)
                     s += bpp;
             }
-        }
         break;
     }
 }
@@ -168,7 +194,7 @@ void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top,
             b = dst[2];                                                       \
         if (bpp >= 4)                                                         \
             a = dst[3];                                                       \
-        for (; i < size; i += bpp) {                                          \
+        for (; i <= size - bpp; i += bpp) {                                   \
             dst[i + 0] = r = op(r, src[i + 0], last[i + 0]);                  \
             if (bpp == 1)                                                     \
                 continue;                                                     \
@@ -191,12 +217,9 @@ void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top,
         UNROLL1(3, op)                                                        \
     } else if (bpp == 4) {                                                    \
         UNROLL1(4, op)                                                        \
-    } else {                                                                  \
-        for (; i < size; i += bpp) {                                          \
-            int j;                                                            \
-            for (j = 0; j < bpp; j++)                                         \
-                dst[i + j] = op(dst[i + j - bpp], src[i + j], last[i + j]);   \
-        }                                                                     \
+    }                                                                         \
+    for (; i < size; i++) {                                                   \
+        dst[i] = op(dst[i - bpp], src[i], last[i]);                           \
     }
 
 /* NOTE: 'dst' can be equal to 'last' */
@@ -215,12 +238,12 @@ static void png_filter_row(PNGDSPContext *dsp, uint8_t *dst, int filter_type,
         if (bpp == 4) {
             p = *(int *)dst;
             for (; i < size; i += bpp) {
-                int s = *(int *)(src + i);
+                unsigned s = *(int *)(src + i);
                 p = ((s & 0x7f7f7f7f) + (p & 0x7f7f7f7f)) ^ ((s ^ p) & 0x80808080);
                 *(int *)(dst + i) = p;
             }
         } else {
-#define OP_SUB(x, s, l) x + s
+#define OP_SUB(x, s, l) ((x) + (s))
             UNROLL_FILTER(OP_SUB);
         }
         break;
@@ -232,7 +255,7 @@ static void png_filter_row(PNGDSPContext *dsp, uint8_t *dst, int filter_type,
             p      = (last[i] >> 1);
             dst[i] = p + src[i];
         }
-#define OP_AVG(x, s, l) (((x + l) >> 1) + s) & 0xff
+#define OP_AVG(x, s, l) (((((x) + (l)) >> 1) + (s)) & 0xff)
         UNROLL_FILTER(OP_AVG);
         break;
     case PNG_FILTER_VALUE_PAETH:
@@ -252,46 +275,21 @@ static void png_filter_row(PNGDSPContext *dsp, uint8_t *dst, int filter_type,
     }
 }
 
-static av_always_inline void convert_to_rgb32_loco(uint8_t *dst,
-                                                   const uint8_t *src,
-                                                   int width, int loco)
-{
-    int j;
-    unsigned int r, g, b, a;
-
-    for (j = 0; j < width; j++) {
-        r = src[0];
-        g = src[1];
-        b = src[2];
-        a = src[3];
-        if (loco) {
-            r = (r + g) & 0xff;
-            b = (b + g) & 0xff;
-        }
-        *(uint32_t *) dst = (a << 24) | (r << 16) | (g << 8) | b;
-        dst += 4;
-        src += 4;
-    }
+/* This used to be called "deloco" in FFmpeg
+ * and is actually an inverse reversible colorspace transformation */
+#define YUV2RGB(NAME, TYPE) \
+static void deloco_ ## NAME(TYPE *dst, int size, int alpha) \
+{ \
+    int i; \
+    for (i = 0; i < size; i += 3 + alpha) { \
+        int g = dst [i + 1]; \
+        dst[i + 0] += g; \
+        dst[i + 2] += g; \
+    } \
 }
 
-static void convert_to_rgb32(uint8_t *dst, const uint8_t *src,
-                             int width, int loco)
-{
-    if (loco)
-        convert_to_rgb32_loco(dst, src, width, 1);
-    else
-        convert_to_rgb32_loco(dst, src, width, 0);
-}
-
-static void deloco_rgb24(uint8_t *dst, int size)
-{
-    int i;
-    for (i = 0; i < size; i += 3) {
-        int g = dst[i + 1];
-        dst[i + 0] += g;
-        dst[i + 2] += g;
-    }
-}
+YUV2RGB(rgb8, uint8_t)
+YUV2RGB(rgb16, uint16_t)
 
 /* process exactly one decompressed row */
 static void png_handle_row(PNGDecContext *s)
@@ -301,15 +299,6 @@ static void png_handle_row(PNGDecContext *s)
 
     if (!s->interlace_type) {
         ptr = s->image_buf + s->image_linesize * s->y;
-        /* need to swap bytes correctly for RGB_ALPHA */
-        if (s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
-            png_filter_row(&s->dsp, s->tmp_row, s->crow_buf[0], s->crow_buf + 1,
-                           s->last_row, s->row_size, s->bpp);
-            convert_to_rgb32(ptr, s->tmp_row, s->width,
-                             s->filter_type == PNG_FILTER_TYPE_LOCO);
-            FFSWAP(uint8_t *, s->last_row, s->tmp_row);
-        } else {
-            /* in normal case, we avoid one copy */
             if (s->y == 0)
                 last_row = s->last_row;
             else
@@ -317,17 +306,28 @@ static void png_handle_row(PNGDecContext *s)
 
             png_filter_row(&s->dsp, ptr, s->crow_buf[0], s->crow_buf + 1,
                            last_row, s->row_size, s->bpp);
-        }
         /* loco lags by 1 row so that it doesn't interfere with top prediction */
-        if (s->filter_type == PNG_FILTER_TYPE_LOCO &&
-            s->color_type == PNG_COLOR_TYPE_RGB && s->y > 0)
-            deloco_rgb24(ptr - s->image_linesize, s->row_size);
+        if (s->filter_type == PNG_FILTER_TYPE_LOCO && s->y > 0) {
+            if (s->bit_depth == 16) {
+                deloco_rgb16((uint16_t *)(ptr - s->image_linesize), s->row_size / 2,
+                             s->color_type == PNG_COLOR_TYPE_RGB_ALPHA);
+            } else {
+                deloco_rgb8(ptr - s->image_linesize, s->row_size,
+                            s->color_type == PNG_COLOR_TYPE_RGB_ALPHA);
+            }
+        }
         s->y++;
         if (s->y == s->height) {
             s->state |= PNG_ALLIMAGE;
-            if (s->filter_type == PNG_FILTER_TYPE_LOCO &&
-                s->color_type == PNG_COLOR_TYPE_RGB)
-                deloco_rgb24(ptr, s->row_size);
+            if (s->filter_type == PNG_FILTER_TYPE_LOCO) {
+                if (s->bit_depth == 16) {
+                    deloco_rgb16((uint16_t *)ptr, s->row_size / 2,
+                                 s->color_type == PNG_COLOR_TYPE_RGB_ALPHA);
+                } else {
+                    deloco_rgb8(ptr, s->row_size,
+                                s->color_type == PNG_COLOR_TYPE_RGB_ALPHA);
+                }
+            }
         }
     } else {
         got_line = 0;
@@ -341,15 +341,16 @@ static void png_handle_row(PNGDecContext *s)
                 png_filter_row(&s->dsp, s->tmp_row, s->crow_buf[0], s->crow_buf + 1,
                                s->last_row, s->pass_row_size, s->bpp);
                 FFSWAP(uint8_t *, s->last_row, s->tmp_row);
+                FFSWAP(unsigned int, s->last_row_size, s->tmp_row_size);
                 got_line = 1;
             }
             if ((png_pass_dsp_ymask[s->pass] << (s->y & 7)) & 0x80) {
-                /* NOTE: RGB32 is handled directly in png_put_interlaced_row */
                 png_put_interlaced_row(ptr, s->width, s->bits_per_pixel, s->pass,
                                        s->color_type, s->last_row);
             }
             s->y++;
             if (s->y == s->height) {
+                memset(s->last_row, 0, s->row_size);
                 for (;;) {
                     if (s->pass == NB_PASSES - 1) {
                         s->state |= PNG_ALLIMAGE;
@@ -376,14 +377,15 @@ static int png_decode_idat(PNGDecContext *s, int length)
 {
     int ret;
     s->zstream.avail_in = FFMIN(length, bytestream2_get_bytes_left(&s->gb));
-    s->zstream.next_in  = s->gb.buffer;
+    s->zstream.next_in  = (unsigned char *)s->gb.buffer;
     bytestream2_skip(&s->gb, length);
 
     /* decode one line if possible */
     while (s->zstream.avail_in > 0) {
         ret = inflate(&s->zstream, Z_PARTIAL_FLUSH);
         if (ret != Z_OK && ret != Z_STREAM_END) {
-            return -1;
+            av_log(s->avctx, AV_LOG_ERROR, "inflate returned error %d\n", ret);
+            return AVERROR_EXTERNAL;
         }
         if (s->zstream.avail_out == 0) {
             if (!(s->state & PNG_ALLIMAGE)) {
@@ -401,6 +403,122 @@ static int png_decode_idat(PNGDecContext *s, int length)
     return 0;
 }
 
+static int decode_zbuf(AVBPrint *bp, const uint8_t *data,
+                       const uint8_t *data_end)
+{
+    z_stream zstream;
+    unsigned char *buf;
+    unsigned buf_size;
+    int ret;
+
+    zstream.zalloc = ff_png_zalloc;
+    zstream.zfree  = ff_png_zfree;
+    zstream.opaque = NULL;
+    if (inflateInit(&zstream) != Z_OK)
+        return AVERROR_EXTERNAL;
+    zstream.next_in  = (unsigned char *)data;
+    zstream.avail_in = data_end - data;
+    av_bprint_init(bp, 0, -1);
+
+    while (zstream.avail_in > 0) {
+        av_bprint_get_buffer(bp, 1, &buf, &buf_size);
+        if (!buf_size) {
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+        zstream.next_out  = buf;
+        zstream.avail_out = buf_size;
+        ret = inflate(&zstream, Z_PARTIAL_FLUSH);
+        if (ret != Z_OK && ret != Z_STREAM_END) {
+            ret = AVERROR_EXTERNAL;
+            goto fail;
+        }
+        bp->len += zstream.next_out - buf;
+        if (ret == Z_STREAM_END)
+            break;
+    }
+    inflateEnd(&zstream);
+    bp->str[bp->len] = 0;
+    return 0;
+
+fail:
+    inflateEnd(&zstream);
+    av_bprint_finalize(bp, NULL);
+    return ret;
+}
+
+static uint8_t *iso88591_to_utf8(const uint8_t *in, size_t size_in)
+{
+    size_t extra = 0, i;
+    uint8_t *out, *q;
+
+    for (i = 0; i < size_in; i++)
+        extra += in[i] >= 0x80;
+    if (size_in == SIZE_MAX || extra > SIZE_MAX - size_in - 1)
+        return NULL;
+    q = out = av_malloc(size_in + extra + 1);
+    if (!out)
+        return NULL;
+    for (i = 0; i < size_in; i++) {
+        if (in[i] >= 0x80) {
+            *(q++) = 0xC0 | (in[i] >> 6);
+            *(q++) = 0x80 | (in[i] & 0x3F);
+        } else {
+            *(q++) = in[i];
+        }
+    }
+    *(q++) = 0;
+    return out;
+}
+
+static int decode_text_chunk(PNGDecContext *s, uint32_t length, int compressed,
+                             AVDictionary **dict)
+{
+    int ret, method;
+    const uint8_t *data        = s->gb.buffer;
+    const uint8_t *data_end    = data + length;
+    const uint8_t *keyword     = data;
+    const uint8_t *keyword_end = memchr(keyword, 0, data_end - keyword);
+    uint8_t *kw_utf8 = NULL, *text, *txt_utf8 = NULL;
+    unsigned text_len;
+    AVBPrint bp;
+
+    if (!keyword_end)
+        return AVERROR_INVALIDDATA;
+    data = keyword_end + 1;
+
+    if (compressed) {
+        if (data == data_end)
+            return AVERROR_INVALIDDATA;
+        method = *(data++);
+        if (method)
+            return AVERROR_INVALIDDATA;
+        if ((ret = decode_zbuf(&bp, data, data_end)) < 0)
+            return ret;
+        text_len = bp.len;
+        av_bprint_finalize(&bp, (char **)&text);
+        if (!text)
+            return AVERROR(ENOMEM);
+    } else {
+        text = (uint8_t *)data;
+        text_len = data_end - text;
+    }
+
+    kw_utf8  = iso88591_to_utf8(keyword, keyword_end - keyword);
+    txt_utf8 = iso88591_to_utf8(text, text_len);
+    if (text != data)
+        av_free(text);
+    if (!(kw_utf8 && txt_utf8)) {
+        av_free(kw_utf8);
+        av_free(txt_utf8);
+        return AVERROR(ENOMEM);
+    }
+
+    av_dict_set(dict, kw_utf8, txt_utf8,
+                AV_DICT_DONT_STRDUP_KEY | AV_DICT_DONT_STRDUP_VAL);
+    return 0;
+}
+
 static int decode_frame(AVCodecContext *avctx,
                         void *data, int *got_frame,
                         AVPacket *avpkt)
@@ -408,18 +526,26 @@ static int decode_frame(AVCodecContext *avctx,
     PNGDecContext *const s = avctx->priv_data;
     const uint8_t *buf     = avpkt->data;
     int buf_size           = avpkt->size;
-    AVFrame *p             = data;
-    uint8_t *crow_buf_base = NULL;
+    AVFrame *p;
+    AVDictionary *metadata  = NULL;
     uint32_t tag, length;
+    int64_t sig;
     int ret;
 
+    ff_thread_release_buffer(avctx, &s->last_picture);
+    FFSWAP(ThreadFrame, s->picture, s->last_picture);
+    p = s->picture.f;
+
+    bytestream2_init(&s->gb, buf, buf_size);
+
     /* check signature */
-    if (buf_size < 8 ||
-        memcmp(buf, ff_pngsig, 8) != 0 &&
-        memcmp(buf, ff_mngsig, 8) != 0)
-        return -1;
+    sig = bytestream2_get_be64(&s->gb);
+    if (sig != PNGSIG &&
+        sig != MNGSIG) {
+        av_log(avctx, AV_LOG_ERROR, "Missing png signature\n");
+        return AVERROR_INVALIDDATA;
+    }
 
-    bytestream2_init(&s->gb, buf + 8, buf_size - 8);
     s->y = s->state = 0;
 
     /* init the zlib */
@@ -427,16 +553,27 @@ static int decode_frame(AVCodecContext *avctx,
     s->zstream.zfree  = ff_png_zfree;
     s->zstream.opaque = NULL;
     ret = inflateInit(&s->zstream);
-    if (ret != Z_OK)
-        return -1;
+    if (ret != Z_OK) {
+        av_log(avctx, AV_LOG_ERROR, "inflateInit returned error %d\n", ret);
+        return AVERROR_EXTERNAL;
+    }
     for (;;) {
-        if (bytestream2_get_bytes_left(&s->gb) <= 0)
+        if (bytestream2_get_bytes_left(&s->gb) <= 0) {
+            av_log(avctx, AV_LOG_ERROR, "No bytes left\n");
+            if (   s->state & PNG_ALLIMAGE
+                && avctx->strict_std_compliance <= FF_COMPLIANCE_NORMAL)
+                goto exit_loop;
             goto fail;
+        }
+
         length = bytestream2_get_be32(&s->gb);
-        if (length > 0x7fffffff)
+        if (length > 0x7fffffff || length > bytestream2_get_bytes_left(&s->gb))  {
+            av_log(avctx, AV_LOG_ERROR, "chunk too big\n");
             goto fail;
+        }
         tag = bytestream2_get_le32(&s->gb);
-        av_dlog(avctx, "png: tag=%c%c%c%c length=%u\n",
+        if (avctx->debug & FF_DEBUG_STARTCODE)
+            av_log(avctx, AV_LOG_DEBUG, "png: tag=%c%c%c%c length=%u\n",
                 (tag & 0xff),
                 ((tag >> 8) & 0xff),
                 ((tag >> 16) & 0xff),
@@ -449,6 +586,7 @@ static int decode_frame(AVCodecContext *avctx,
             s->height = bytestream2_get_be32(&s->gb);
             if (av_image_check_size(s->width, s->height, 0, avctx)) {
                 s->width = s->height = 0;
+                av_log(avctx, AV_LOG_ERROR, "Invalid image size\n");
                 goto fail;
             }
             s->bit_depth        = bytestream2_get_byte(&s->gb);
@@ -458,14 +596,29 @@ static int decode_frame(AVCodecContext *avctx,
             s->interlace_type   = bytestream2_get_byte(&s->gb);
             bytestream2_skip(&s->gb, 4); /* crc */
             s->state |= PNG_IHDR;
-            av_dlog(avctx, "width=%d height=%d depth=%d color_type=%d "
+            if (avctx->debug & FF_DEBUG_PICT_INFO)
+                av_log(avctx, AV_LOG_DEBUG, "width=%d height=%d depth=%d color_type=%d "
                            "compression_type=%d filter_type=%d interlace_type=%d\n",
                     s->width, s->height, s->bit_depth, s->color_type,
                     s->compression_type, s->filter_type, s->interlace_type);
             break;
+        case MKTAG('p', 'H', 'Y', 's'):
+            if (s->state & PNG_IDAT) {
+                av_log(avctx, AV_LOG_ERROR, "pHYs after IDAT\n");
+                goto fail;
+            }
+            avctx->sample_aspect_ratio.num = bytestream2_get_be32(&s->gb);
+            avctx->sample_aspect_ratio.den = bytestream2_get_be32(&s->gb);
+            if (avctx->sample_aspect_ratio.num < 0 || avctx->sample_aspect_ratio.den < 0)
+                avctx->sample_aspect_ratio = (AVRational){ 0, 1 };
+            bytestream2_skip(&s->gb, 1); /* unit specifier */
+            bytestream2_skip(&s->gb, 4); /* crc */
+            break;
         case MKTAG('I', 'D', 'A', 'T'):
-            if (!(s->state & PNG_IHDR))
+            if (!(s->state & PNG_IHDR)) {
+                av_log(avctx, AV_LOG_ERROR, "IDAT without IHDR\n");
                 goto fail;
+            }
             if (!(s->state & PNG_IDAT)) {
                 /* init image info */
                 avctx->width  = s->width;
@@ -476,13 +629,13 @@ static int decode_frame(AVCodecContext *avctx,
                 s->bpp            = (s->bits_per_pixel + 7) >> 3;
                 s->row_size       = (avctx->width * s->bits_per_pixel + 7) >> 3;
 
-                if (s->bit_depth == 8 &&
+                if ((s->bit_depth == 2 || s->bit_depth == 4 || s->bit_depth == 8) &&
                     s->color_type == PNG_COLOR_TYPE_RGB) {
                     avctx->pix_fmt = AV_PIX_FMT_RGB24;
-                } else if (s->bit_depth == 8 &&
+                } else if ((s->bit_depth == 2 || s->bit_depth == 4 || s->bit_depth == 8) &&
                            s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
-                    avctx->pix_fmt = AV_PIX_FMT_RGB32;
-                } else if (s->bit_depth == 8 &&
+                    avctx->pix_fmt = AV_PIX_FMT_RGBA;
+                } else if ((s->bit_depth == 2 || s->bit_depth == 4 || s->bit_depth == 8) &&
                            s->color_type == PNG_COLOR_TYPE_GRAY) {
                     avctx->pix_fmt = AV_PIX_FMT_GRAY8;
                 } else if (s->bit_depth == 16 &&
@@ -491,12 +644,14 @@ static int decode_frame(AVCodecContext *avctx,
                 } else if (s->bit_depth == 16 &&
                            s->color_type == PNG_COLOR_TYPE_RGB) {
                     avctx->pix_fmt = AV_PIX_FMT_RGB48BE;
-                } else if (s->bit_depth == 1 &&
-                           s->color_type == PNG_COLOR_TYPE_GRAY) {
-                    avctx->pix_fmt = AV_PIX_FMT_MONOBLACK;
-                } else if (s->bit_depth == 8 &&
+                } else if (s->bit_depth == 16 &&
+                           s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
+                    avctx->pix_fmt = AV_PIX_FMT_RGBA64BE;
+                } else if ((s->bits_per_pixel == 1 || s->bits_per_pixel == 2 || s->bits_per_pixel == 4 || s->bits_per_pixel == 8) &&
                            s->color_type == PNG_COLOR_TYPE_PALETTE) {
                     avctx->pix_fmt = AV_PIX_FMT_PAL8;
+                } else if (s->bit_depth == 1) {
+                    avctx->pix_fmt = AV_PIX_FMT_MONOBLACK;
                 } else if (s->bit_depth == 8 &&
                            s->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
                     avctx->pix_fmt = AV_PIX_FMT_YA8;
@@ -504,13 +659,16 @@ static int decode_frame(AVCodecContext *avctx,
                            s->color_type == PNG_COLOR_TYPE_GRAY_ALPHA) {
                     avctx->pix_fmt = AV_PIX_FMT_YA16BE;
                 } else {
+                    av_log(avctx, AV_LOG_ERROR, "unsupported bit depth %d "
+                                                "and color type %d\n",
+                                                 s->bit_depth, s->color_type);
                     goto fail;
                 }
 
-                if (ff_get_buffer(avctx, p, AV_GET_BUFFER_FLAG_REF) < 0) {
-                    av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                if (ff_thread_get_buffer(avctx, &s->picture, AV_GET_BUFFER_FLAG_REF) < 0)
                     goto fail;
-                }
+                ff_thread_finish_setup(avctx);
+
                 p->pict_type        = AV_PICTURE_TYPE_I;
                 p->key_frame        = 1;
                 p->interlaced_frame = !!s->interlace_type;
@@ -530,25 +688,25 @@ static int decode_frame(AVCodecContext *avctx,
                 s->image_buf      = p->data[0];
                 s->image_linesize = p->linesize[0];
                 /* copy the palette if needed */
-                if (s->color_type == PNG_COLOR_TYPE_PALETTE)
+                if (avctx->pix_fmt == AV_PIX_FMT_PAL8)
                     memcpy(p->data[1], s->palette, 256 * sizeof(uint32_t));
                 /* empty row is used if differencing to the first row */
-                s->last_row = av_mallocz(s->row_size);
+                av_fast_padded_mallocz(&s->last_row, &s->last_row_size, s->row_size);
                 if (!s->last_row)
                     goto fail;
                 if (s->interlace_type ||
                     s->color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
-                    s->tmp_row = av_malloc(s->row_size);
+                    av_fast_padded_malloc(&s->tmp_row, &s->tmp_row_size, s->row_size);
                     if (!s->tmp_row)
                         goto fail;
                 }
                 /* compressed row */
-                crow_buf_base = av_malloc(s->row_size + 16);
-                if (!crow_buf_base)
+                av_fast_padded_malloc(&s->buffer, &s->buffer_size, s->row_size + 16);
+                if (!s->buffer)
                     goto fail;
 
                 /* we want crow_buf+1 to be 16-byte aligned */
-                s->crow_buf          = crow_buf_base + 15;
+                s->crow_buf          = s->buffer + 15;
                 s->zstream.avail_out = s->crow_size;
                 s->zstream.next_out  = s->crow_buf;
             }
@@ -569,10 +727,10 @@ static int decode_frame(AVCodecContext *avctx,
                 r = bytestream2_get_byte(&s->gb);
                 g = bytestream2_get_byte(&s->gb);
                 b = bytestream2_get_byte(&s->gb);
-                s->palette[i] = (0xff << 24) | (r << 16) | (g << 8) | b;
+                s->palette[i] = (0xFFU << 24) | (r << 16) | (g << 8) | b;
             }
             for (; i < 256; i++)
-                s->palette[i] = (0xff << 24);
+                s->palette[i] = (0xFFU << 24);
             s->state |= PNG_PLTE;
             bytestream2_skip(&s->gb, 4);     /* crc */
         }
@@ -593,9 +751,22 @@ static int decode_frame(AVCodecContext *avctx,
             bytestream2_skip(&s->gb, 4);     /* crc */
         }
         break;
+        case MKTAG('t', 'E', 'X', 't'):
+            if (decode_text_chunk(s, length, 0, &metadata) < 0)
+                av_log(avctx, AV_LOG_WARNING, "Broken tEXt chunk\n");
+            bytestream2_skip(&s->gb, length + 4);
+            break;
+        case MKTAG('z', 'T', 'X', 't'):
+            if (decode_text_chunk(s, length, 1, &metadata) < 0)
+                av_log(avctx, AV_LOG_WARNING, "Broken zTXt chunk\n");
+            bytestream2_skip(&s->gb, length + 4);
+            break;
         case MKTAG('I', 'E', 'N', 'D'):
             if (!(s->state & PNG_ALLIMAGE))
+                av_log(avctx, AV_LOG_ERROR, "IEND without all image\n");
+            if (!(s->state & (PNG_ALLIMAGE|PNG_IDAT))) {
                 goto fail;
+            }
             bytestream2_skip(&s->gb, 4); /* crc */
             goto exit_loop;
         default:
@@ -606,13 +777,91 @@ skip_tag:
         }
     }
 exit_loop:
+
+    if (s->bits_per_pixel == 1 && s->color_type == PNG_COLOR_TYPE_PALETTE){
+        int i, j, k;
+        uint8_t *pd = p->data[0];
+        for (j = 0; j < s->height; j++) {
+            i = s->width / 8;
+            for (k = 7; k >= 1; k--)
+                if ((s->width&7) >= k)
+                    pd[8*i + k - 1] = (pd[i]>>8-k) & 1;
+            for (i--; i >= 0; i--) {
+                pd[8*i + 7]=  pd[i]     & 1;
+                pd[8*i + 6]= (pd[i]>>1) & 1;
+                pd[8*i + 5]= (pd[i]>>2) & 1;
+                pd[8*i + 4]= (pd[i]>>3) & 1;
+                pd[8*i + 3]= (pd[i]>>4) & 1;
+                pd[8*i + 2]= (pd[i]>>5) & 1;
+                pd[8*i + 1]= (pd[i]>>6) & 1;
+                pd[8*i + 0]=  pd[i]>>7;
+            }
+            pd += s->image_linesize;
+        }
+    }
+    if (s->bits_per_pixel == 2){
+        int i, j;
+        uint8_t *pd = p->data[0];
+        for (j = 0; j < s->height; j++) {
+            i = s->width / 4;
+            if (s->color_type == PNG_COLOR_TYPE_PALETTE){
+                if ((s->width&3) >= 3) pd[4*i + 2]= (pd[i] >> 2) & 3;
+                if ((s->width&3) >= 2) pd[4*i + 1]= (pd[i] >> 4) & 3;
+                if ((s->width&3) >= 1) pd[4*i + 0]=  pd[i] >> 6;
+                for (i--; i >= 0; i--) {
+                    pd[4*i + 3]=  pd[i]     & 3;
+                    pd[4*i + 2]= (pd[i]>>2) & 3;
+                    pd[4*i + 1]= (pd[i]>>4) & 3;
+                    pd[4*i + 0]=  pd[i]>>6;
+                }
+            } else {
+                if ((s->width&3) >= 3) pd[4*i + 2]= ((pd[i]>>2) & 3)*0x55;
+                if ((s->width&3) >= 2) pd[4*i + 1]= ((pd[i]>>4) & 3)*0x55;
+                if ((s->width&3) >= 1) pd[4*i + 0]= ( pd[i]>>6     )*0x55;
+                for (i--; i >= 0; i--) {
+                    pd[4*i + 3]= ( pd[i]     & 3)*0x55;
+                    pd[4*i + 2]= ((pd[i]>>2) & 3)*0x55;
+                    pd[4*i + 1]= ((pd[i]>>4) & 3)*0x55;
+                    pd[4*i + 0]= ( pd[i]>>6     )*0x55;
+                }
+            }
+            pd += s->image_linesize;
+        }
+    }
+    if (s->bits_per_pixel == 4){
+        int i, j;
+        uint8_t *pd = p->data[0];
+        for (j = 0; j < s->height; j++) {
+            i = s->width/2;
+            if (s->color_type == PNG_COLOR_TYPE_PALETTE){
+                if (s->width&1) pd[2*i+0]= pd[i]>>4;
+                for (i--; i >= 0; i--) {
+                pd[2*i + 1] = pd[i] & 15;
+                pd[2*i + 0] = pd[i] >> 4;
+            }
+            } else {
+                if (s->width & 1) pd[2*i + 0]= (pd[i] >> 4) * 0x11;
+                for (i--; i >= 0; i--) {
+                    pd[2*i + 1] = (pd[i] & 15) * 0x11;
+                    pd[2*i + 0] = (pd[i] >> 4) * 0x11;
+                }
+            }
+            pd += s->image_linesize;
+        }
+    }
+
     /* handle p-frames only if a predecessor frame is available */
-    if (s->prev->data[0]) {
-        if (!(avpkt->flags & AV_PKT_FLAG_KEY)) {
+    if (s->last_picture.f->data[0]) {
+        if (   !(avpkt->flags & AV_PKT_FLAG_KEY) && avctx->codec_tag != AV_RL32("MPNG")
+            && s->last_picture.f->width == p->width
+            && s->last_picture.f->height== p->height
+            && s->last_picture.f->format== p->format
+         ) {
             int i, j;
             uint8_t *pd      = p->data[0];
-            uint8_t *pd_last = s->prev->data[0];
+            uint8_t *pd_last = s->last_picture.f->data[0];
 
+            ff_thread_await_progress(&s->last_picture, INT_MAX, 0);
             for (j = 0; j < s->height; j++) {
                 for (i = 0; i < s->width * s->bpp; i++)
                     pd[i] += pd_last[i];
@@ -621,35 +870,57 @@ exit_loop:
             }
         }
     }
+    ff_thread_report_progress(&s->picture, INT_MAX, 0);
+
+    av_frame_set_metadata(p, metadata);
+    metadata   = NULL;
 
-    av_frame_unref(s->prev);
-    if ((ret = av_frame_ref(s->prev, p)) < 0)
-        goto fail;
+    if ((ret = av_frame_ref(data, s->picture.f)) < 0)
+        return ret;
 
     *got_frame = 1;
 
     ret = bytestream2_tell(&s->gb);
 the_end:
     inflateEnd(&s->zstream);
-    av_free(crow_buf_base);
     s->crow_buf = NULL;
-    av_freep(&s->last_row);
-    av_freep(&s->tmp_row);
     return ret;
 fail:
-    ret = -1;
+    av_dict_free(&metadata);
+    ff_thread_report_progress(&s->picture, INT_MAX, 0);
+    ret = AVERROR_INVALIDDATA;
     goto the_end;
 }
 
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    PNGDecContext *psrc = src->priv_data;
+    PNGDecContext *pdst = dst->priv_data;
+
+    if (dst == src)
+        return 0;
+
+    ff_thread_release_buffer(dst, &pdst->picture);
+    if (psrc->picture.f->data[0])
+        return ff_thread_ref_frame(&pdst->picture, &psrc->picture);
+
+    return 0;
+}
+
 static av_cold int png_dec_init(AVCodecContext *avctx)
 {
     PNGDecContext *s = avctx->priv_data;
 
-    s->prev = av_frame_alloc();
-    if (!s->prev)
+    s->avctx = avctx;
+    s->last_picture.f = av_frame_alloc();
+    s->picture.f = av_frame_alloc();
+    if (!s->last_picture.f || !s->picture.f)
         return AVERROR(ENOMEM);
 
-    ff_pngdsp_init(&s->dsp);
+    if (!avctx->internal->is_copy) {
+        avctx->internal->allocate_progress = 1;
+        ff_pngdsp_init(&s->dsp);
+    }
 
     return 0;
 }
@@ -658,7 +929,16 @@ static av_cold int png_dec_end(AVCodecContext *avctx)
 {
     PNGDecContext *s = avctx->priv_data;
 
-    av_frame_free(&s->prev);
+    ff_thread_release_buffer(avctx, &s->last_picture);
+    av_frame_free(&s->last_picture.f);
+    ff_thread_release_buffer(avctx, &s->picture);
+    av_frame_free(&s->picture.f);
+    av_freep(&s->buffer);
+    s->buffer_size = 0;
+    av_freep(&s->last_row);
+    s->last_row_size = 0;
+    av_freep(&s->tmp_row);
+    s->tmp_row_size = 0;
 
     return 0;
 }
@@ -672,5 +952,7 @@ AVCodec ff_png_decoder = {
     .init           = png_dec_init,
     .close          = png_dec_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(png_dec_init),
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(update_thread_context),
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
 };
diff --git a/libavcodec/pngdsp.c b/libavcodec/pngdsp.c
index c0e9402..d275316 100644
--- a/libavcodec/pngdsp.c
+++ b/libavcodec/pngdsp.c
@@ -2,20 +2,20 @@
  * PNG image format
  * Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pngdsp.h b/libavcodec/pngdsp.h
index 98d29a8..1475b0c 100644
--- a/libavcodec/pngdsp.h
+++ b/libavcodec/pngdsp.h
@@ -2,20 +2,20 @@
  * PNG image format
  * Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
index 3d11f37..401b1a2 100644
--- a/libavcodec/pngenc.c
+++ b/libavcodec/pngenc.c
@@ -2,37 +2,38 @@
  * PNG image format
  * Copyright (c) 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
+#include "internal.h"
 #include "bytestream.h"
 #include "huffyuvencdsp.h"
 #include "png.h"
 
-/* TODO:
- * - add 2, 4 and 16 bit depth support
- */
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
 
 #include <zlib.h>
 
 #define IOBUF_SIZE 4096
 
 typedef struct PNGEncContext {
+    AVClass *class;
     HuffYUVEncDSPContext hdsp;
 
     uint8_t *bytestream;
@@ -43,6 +44,8 @@ typedef struct PNGEncContext {
 
     z_stream zstream;
     uint8_t buf[IOBUF_SIZE];
+    int dpi;                     ///< Physical pixel density, in dots per inch, if set
+    int dpm;                     ///< Physical pixel density, in dots per meter, if set
 } PNGEncContext;
 
 static void png_get_interlaced_row(uint8_t *dst, int row_size,
@@ -52,8 +55,9 @@ static void png_get_interlaced_row(uint8_t *dst, int row_size,
     int x, mask, dst_x, j, b, bpp;
     uint8_t *d;
     const uint8_t *s;
+    static const int masks[] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff};
 
-    mask = ff_png_pass_mask[pass];
+    mask = masks[pass];
     switch (bits_per_pixel) {
     case 1:
         memset(dst, 0, row_size);
@@ -111,6 +115,22 @@ static void sub_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top,
     }
 }
 
+static void sub_left_prediction(PNGEncContext *c, uint8_t *dst, const uint8_t *src, int bpp, int size)
+{
+    const uint8_t *src1 = src + bpp;
+    const uint8_t *src2 = src;
+    int x, unaligned_w;
+
+    memcpy(dst, src, bpp);
+    dst += bpp;
+    size -= bpp;
+    unaligned_w = FFMIN(32 - bpp, size);
+    for (x = 0; x < unaligned_w; x++)
+        *dst++ = *src1++ - *src2++;
+    size -= unaligned_w;
+    c->hdsp.diff_bytes(dst, src1, src2, size);
+}
+
 static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type,
                            uint8_t *src, uint8_t *top, int size, int bpp)
 {
@@ -121,8 +141,7 @@ static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type,
         memcpy(dst, src, size);
         break;
     case PNG_FILTER_VALUE_SUB:
-        c->hdsp.diff_bytes(dst, src, src - bpp, size);
-        memcpy(dst, src, bpp);
+        sub_left_prediction(c, dst, src, bpp, size);
         break;
     case PNG_FILTER_VALUE_UP:
         c->hdsp.diff_bytes(dst, src, top, size);
@@ -145,7 +164,7 @@ static uint8_t *png_choose_filter(PNGEncContext *s, uint8_t *dst,
                                   uint8_t *src, uint8_t *top, int size, int bpp)
 {
     int pred = s->filter_type;
-    assert(bpp || !pred);
+    av_assert0(bpp || !pred);
     if (!top && pred)
         pred = PNG_FILTER_VALUE_SUB;
     if (pred == PNG_FILTER_VALUE_MIXED) {
@@ -171,23 +190,6 @@ static uint8_t *png_choose_filter(PNGEncContext *s, uint8_t *dst,
     }
 }
 
-static void convert_from_rgb32(uint8_t *dst, const uint8_t *src, int width)
-{
-    uint8_t *d;
-    int j;
-    unsigned int v;
-
-    d = dst;
-    for (j = 0; j < width; j++) {
-        v    = ((const uint32_t *) src)[j];
-        d[0] = v >> 16;
-        d[1] = v >> 8;
-        d[2] = v;
-        d[3] = v >> 24;
-        d   += 4;
-    }
-}
-
 static void png_write_chunk(uint8_t **f, uint32_t tag,
                             const uint8_t *buf, int length)
 {
@@ -235,12 +237,12 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     PNGEncContext *s       = avctx->priv_data;
     const AVFrame *const p = pict;
     int bit_depth, color_type, y, len, row_size, ret, is_progressive;
-    int bits_per_pixel, pass_row_size, enc_row_size, max_packet_size;
+    int bits_per_pixel, pass_row_size, enc_row_size;
+    int64_t max_packet_size;
     int compression_level;
     uint8_t *ptr, *top, *crow_buf, *crow;
     uint8_t *crow_base       = NULL;
     uint8_t *progressive_buf = NULL;
-    uint8_t *rgba_buf        = NULL;
     uint8_t *top_buf         = NULL;
 
     is_progressive = !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
@@ -253,7 +255,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         bit_depth = 16;
         color_type = PNG_COLOR_TYPE_RGB;
         break;
-    case AV_PIX_FMT_RGB32:
+    case AV_PIX_FMT_RGBA:
         bit_depth  = 8;
         color_type = PNG_COLOR_TYPE_RGB_ALPHA;
         break;
@@ -269,6 +271,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         bit_depth  = 8;
         color_type = PNG_COLOR_TYPE_GRAY;
         break;
+    case AV_PIX_FMT_GRAY8A:
+        bit_depth = 8;
+        color_type = PNG_COLOR_TYPE_GRAY_ALPHA;
+        break;
     case AV_PIX_FMT_MONOBLACK:
         bit_depth  = 1;
         color_type = PNG_COLOR_TYPE_GRAY;
@@ -295,15 +301,13 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         return -1;
 
     enc_row_size    = deflateBound(&s->zstream, row_size);
-    max_packet_size = avctx->height * (enc_row_size +
+    max_packet_size = avctx->height * (int64_t)(enc_row_size +
                                        ((enc_row_size + IOBUF_SIZE - 1) / IOBUF_SIZE) * 12)
                       + FF_MIN_BUFFER_SIZE;
-    if (!pkt->data &&
-        (ret = av_new_packet(pkt, max_packet_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate output packet of size %d.\n",
-               max_packet_size);
+    if (max_packet_size > INT_MAX)
+        return AVERROR(ENOMEM);
+    if ((ret = ff_alloc_packet2(avctx, pkt, max_packet_size)) < 0)
         return ret;
-    }
 
     s->bytestream_start =
     s->bytestream       = pkt->data;
@@ -319,19 +323,14 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         if (!progressive_buf)
             goto fail;
     }
-    if (color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
-        rgba_buf = av_malloc(row_size + 1);
-        if (!rgba_buf)
-            goto fail;
-    }
-    if (is_progressive || color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
+    if (is_progressive) {
         top_buf = av_malloc(row_size + 1);
         if (!top_buf)
             goto fail;
     }
 
     /* write png header */
-    memcpy(s->bytestream, ff_pngsig, 8);
+    AV_WB64(s->bytestream, PNGSIG);
     s->bytestream += 8;
 
     AV_WB32(s->buf, avctx->width);
@@ -344,6 +343,17 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     png_write_chunk(&s->bytestream, MKTAG('I', 'H', 'D', 'R'), s->buf, 13);
 
+    if (s->dpm) {
+      AV_WB32(s->buf, s->dpm);
+      AV_WB32(s->buf + 4, s->dpm);
+      s->buf[8] = 1; /* unit specifier is meter */
+    } else {
+      AV_WB32(s->buf, avctx->sample_aspect_ratio.num);
+      AV_WB32(s->buf + 4, avctx->sample_aspect_ratio.den);
+      s->buf[8] = 0; /* unit specifier is unknown */
+    }
+    png_write_chunk(&s->bytestream, MKTAG('p', 'H', 'Y', 's'), s->buf, 9);
+
     /* put the palette if needed */
     if (color_type == PNG_COLOR_TYPE_PALETTE) {
         int has_alpha, alpha, i;
@@ -358,7 +368,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         for (i = 0; i < 256; i++) {
             v     = palette[i];
             alpha = v >> 24;
-            if (alpha && alpha != 0xff)
+            if (alpha != 0xff)
                 has_alpha = 1;
             *alpha_ptr++ = alpha;
             bytestream_put_be24(&ptr, v);
@@ -387,10 +397,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                     if ((ff_png_pass_ymask[pass] << (y & 7)) & 0x80) {
                         ptr = p->data[0] + y * p->linesize[0];
                         FFSWAP(uint8_t *, progressive_buf, top_buf);
-                        if (color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
-                            convert_from_rgb32(rgba_buf, ptr, avctx->width);
-                            ptr = rgba_buf;
-                        }
                         png_get_interlaced_row(progressive_buf, pass_row_size,
                                                bits_per_pixel, pass,
                                                ptr, avctx->width);
@@ -405,11 +411,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         top = NULL;
         for (y = 0; y < avctx->height; y++) {
             ptr = p->data[0] + y * p->linesize[0];
-            if (color_type == PNG_COLOR_TYPE_RGB_ALPHA) {
-                FFSWAP(uint8_t *, rgba_buf, top_buf);
-                convert_from_rgb32(rgba_buf, ptr, avctx->width);
-                ptr = rgba_buf;
-            }
             crow = png_choose_filter(s, crow_buf, ptr, top,
                                      row_size, bits_per_pixel >> 3);
             png_write_row(s, crow, row_size + 1);
@@ -442,7 +443,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 the_end:
     av_free(crow_base);
     av_free(progressive_buf);
-    av_free(rgba_buf);
     av_free(top_buf);
     deflateEnd(&s->zstream);
     return ret;
@@ -455,6 +455,23 @@ static av_cold int png_enc_init(AVCodecContext *avctx)
 {
     PNGEncContext *s = avctx->priv_data;
 
+    switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_RGBA:
+        avctx->bits_per_coded_sample = 32;
+        break;
+    case AV_PIX_FMT_RGB24:
+        avctx->bits_per_coded_sample = 24;
+        break;
+    case AV_PIX_FMT_GRAY8:
+        avctx->bits_per_coded_sample = 0x28;
+        break;
+    case AV_PIX_FMT_MONOBLACK:
+        avctx->bits_per_coded_sample = 1;
+        break;
+    case AV_PIX_FMT_PAL8:
+        avctx->bits_per_coded_sample = 8;
+    }
+
     avctx->coded_frame = av_frame_alloc();
     if (!avctx->coded_frame)
         return AVERROR(ENOMEM);
@@ -470,6 +487,13 @@ static av_cold int png_enc_init(AVCodecContext *avctx)
     if (avctx->pix_fmt == AV_PIX_FMT_MONOBLACK)
         s->filter_type = PNG_FILTER_VALUE_NONE;
 
+    if (s->dpi && s->dpm) {
+      av_log(avctx, AV_LOG_ERROR, "Only one of 'dpi' or 'dpm' options should be set\n");
+      return AVERROR(EINVAL);
+    } else if (s->dpi) {
+      s->dpm = s->dpi * 10000 / 254;
+    }
+
     return 0;
 }
 
@@ -479,6 +503,21 @@ static av_cold int png_enc_close(AVCodecContext *avctx)
     return 0;
 }
 
+#define OFFSET(x) offsetof(PNGEncContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    {"dpi", "Set image resolution (in dots per inch)",  OFFSET(dpi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 0x10000, VE},
+    {"dpm", "Set image resolution (in dots per meter)", OFFSET(dpm), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 0x10000, VE},
+    { NULL }
+};
+
+static const AVClass pngenc_class = {
+    .class_name = "PNG encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_png_encoder = {
     .name           = "png",
     .long_name      = NULL_IF_CONFIG_SMALL("PNG (Portable Network Graphics) image"),
@@ -488,9 +527,14 @@ AVCodec ff_png_encoder = {
     .init           = png_enc_init,
     .close          = png_enc_close,
     .encode2        = encode_frame,
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
-        AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB32, AV_PIX_FMT_PAL8, AV_PIX_FMT_GRAY8,
-        AV_PIX_FMT_RGBA64BE, AV_PIX_FMT_RGB48BE, AV_PIX_FMT_GRAY16BE,
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA,
+        AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGBA64BE,
+        AV_PIX_FMT_PAL8,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY8A,
+        AV_PIX_FMT_GRAY16BE,
         AV_PIX_FMT_MONOBLACK, AV_PIX_FMT_NONE
     },
+    .priv_class     = &pngenc_class,
 };
diff --git a/libavcodec/pnm.c b/libavcodec/pnm.c
index 1c380b0..502e550 100644
--- a/libavcodec/pnm.c
+++ b/libavcodec/pnm.c
@@ -2,20 +2,20 @@
  * PNM image format
  * Copyright (c) 2002, 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,12 +37,12 @@ static void pnm_get(PNMContext *sc, char *str, int buf_size)
     int c;
 
     /* skip spaces and comments */
-    for (;;) {
+    while (sc->bytestream < sc->bytestream_end) {
         c = *sc->bytestream++;
         if (c == '#')  {
-            do {
+            while (c != '\n' && sc->bytestream < sc->bytestream_end) {
                 c = *sc->bytestream++;
-            } while (c != '\n' && sc->bytestream < sc->bytestream_end);
+            }
         } else if (!pnm_space(c)) {
             break;
         }
@@ -63,9 +63,9 @@ int ff_pnm_decode_header(AVCodecContext *avctx, PNMContext * const s)
     int h, w, depth, maxval;
 
     pnm_get(s, buf1, sizeof(buf1));
-    s->type= buf1[1]-'0';
     if(buf1[0] != 'P')
         return AVERROR_INVALIDDATA;
+    s->type= buf1[1]-'0';
 
     if (s->type==1 || s->type==4) {
         avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
@@ -107,26 +107,35 @@ int ff_pnm_decode_header(AVCodecContext *avctx, PNMContext * const s)
             }
         }
         /* check that all tags are present */
-        if (w <= 0 || h <= 0 || maxval <= 0 || depth <= 0 || tuple_type[0] == '\0' || av_image_check_size(w, h, 0, avctx))
+        if (w <= 0 || h <= 0 || maxval <= 0 || depth <= 0 || tuple_type[0] == '\0' || av_image_check_size(w, h, 0, avctx) || s->bytestream >= s->bytestream_end)
             return AVERROR_INVALIDDATA;
 
         avctx->width  = w;
         avctx->height = h;
+        s->maxval     = maxval;
         if (depth == 1) {
-            if (maxval == 1)
-                avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
-            else
+            if (maxval == 1) {
+                avctx->pix_fmt = AV_PIX_FMT_MONOBLACK;
+            } else if (maxval < 256) {
                 avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+            } else {
+                avctx->pix_fmt = AV_PIX_FMT_GRAY16;
+            }
+        } else if (depth == 2) {
+            if (maxval == 255)
+                avctx->pix_fmt = AV_PIX_FMT_GRAY8A;
         } else if (depth == 3) {
             if (maxval < 256) {
-            avctx->pix_fmt = AV_PIX_FMT_RGB24;
+                avctx->pix_fmt = AV_PIX_FMT_RGB24;
             } else {
-                av_log(avctx, AV_LOG_ERROR, "16-bit components are only supported for grayscale\n");
-                avctx->pix_fmt = AV_PIX_FMT_NONE;
-                return AVERROR_INVALIDDATA;
+                avctx->pix_fmt = AV_PIX_FMT_RGB48;
             }
         } else if (depth == 4) {
-            avctx->pix_fmt = AV_PIX_FMT_RGB32;
+            if (maxval < 256) {
+                avctx->pix_fmt = AV_PIX_FMT_RGBA;
+            } else {
+                avctx->pix_fmt = AV_PIX_FMT_RGBA64;
+            }
         } else {
             return AVERROR_INVALIDDATA;
         }
@@ -135,14 +144,16 @@ int ff_pnm_decode_header(AVCodecContext *avctx, PNMContext * const s)
         return AVERROR_INVALIDDATA;
     }
     pnm_get(s, buf1, sizeof(buf1));
-    avctx->width = atoi(buf1);
-    if (avctx->width <= 0)
-        return AVERROR_INVALIDDATA;
+    w = atoi(buf1);
     pnm_get(s, buf1, sizeof(buf1));
-    avctx->height = atoi(buf1);
-    if(av_image_check_size(avctx->width, avctx->height, 0, avctx))
+    h = atoi(buf1);
+    if(w <= 0 || h <= 0 || av_image_check_size(w, h, 0, avctx) || s->bytestream >= s->bytestream_end)
         return AVERROR_INVALIDDATA;
-    if (avctx->pix_fmt != AV_PIX_FMT_MONOWHITE) {
+
+    avctx->width  = w;
+    avctx->height = h;
+
+    if (avctx->pix_fmt != AV_PIX_FMT_MONOWHITE && avctx->pix_fmt != AV_PIX_FMT_MONOBLACK) {
         pnm_get(s, buf1, sizeof(buf1));
         s->maxval = atoi(buf1);
         if (s->maxval <= 0) {
@@ -151,17 +162,14 @@ int ff_pnm_decode_header(AVCodecContext *avctx, PNMContext * const s)
         }
         if (s->maxval >= 256) {
             if (avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
-                avctx->pix_fmt = AV_PIX_FMT_GRAY16BE;
-                if (s->maxval != 65535)
-                    avctx->pix_fmt = AV_PIX_FMT_GRAY16;
+                avctx->pix_fmt = AV_PIX_FMT_GRAY16;
             } else if (avctx->pix_fmt == AV_PIX_FMT_RGB24) {
-                if (s->maxval > 255)
-                    avctx->pix_fmt = AV_PIX_FMT_RGB48BE;
+                avctx->pix_fmt = AV_PIX_FMT_RGB48;
             } else if (avctx->pix_fmt == AV_PIX_FMT_YUV420P && s->maxval < 65536) {
                 if (s->maxval < 512)
-                    avctx->pix_fmt = AV_PIX_FMT_YUV420P9BE;
+                    avctx->pix_fmt = AV_PIX_FMT_YUV420P9;
                 else if (s->maxval < 1024)
-                    avctx->pix_fmt = AV_PIX_FMT_YUV420P10BE;
+                    avctx->pix_fmt = AV_PIX_FMT_YUV420P10;
                 else
                     avctx->pix_fmt = AV_PIX_FMT_YUV420P16;
             } else {
diff --git a/libavcodec/pnm.h b/libavcodec/pnm.h
index 5fc6513..5bc0aad 100644
--- a/libavcodec/pnm.h
+++ b/libavcodec/pnm.h
@@ -2,20 +2,20 @@
  * PNM image format
  * Copyright (c) 2002, 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pnm_parser.c b/libavcodec/pnm_parser.c
index 2e00c0a..2a9e3e1 100644
--- a/libavcodec/pnm_parser.c
+++ b/libavcodec/pnm_parser.c
@@ -2,20 +2,20 @@
  * PNM image parser
  * Copyright (c) 2002, 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pnmdec.c b/libavcodec/pnmdec.c
index 9074d9b..c84b6eb 100644
--- a/libavcodec/pnmdec.c
+++ b/libavcodec/pnmdec.c
@@ -2,29 +2,39 @@
  * PNM image format
  * Copyright (c) 2002, 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
-#include "bytestream.h"
 #include "internal.h"
 #include "put_bits.h"
 #include "pnm.h"
 
+static void samplecpy(uint8_t *dst, const uint8_t *src, int n, int maxval)
+{
+    if (maxval <= 255) {
+        memcpy(dst, src, n);
+    } else {
+        int i;
+        for (i=0; i<n/2; i++) {
+            ((uint16_t *)dst)[i] = AV_RB16(src+2*i);
+        }
+    }
+}
 
 static int pnm_decode_frame(AVCodecContext *avctx, void *data,
                             int *got_frame, AVPacket *avpkt)
@@ -33,36 +43,51 @@ static int pnm_decode_frame(AVCodecContext *avctx, void *data,
     int buf_size         = avpkt->size;
     PNMContext * const s = avctx->priv_data;
     AVFrame * const p    = data;
-    int i, j, n, linesize, h, upgrade = 0;
+    int i, j, n, linesize, h, upgrade = 0, is_mono = 0;
     unsigned char *ptr;
     int components, sample_len, ret;
 
     s->bytestream_start =
-    s->bytestream       = buf;
-    s->bytestream_end   = buf + buf_size;
+    s->bytestream       = (uint8_t *)buf;
+    s->bytestream_end   = (uint8_t *)buf + buf_size;
 
     if ((ret = ff_pnm_decode_header(avctx, s)) < 0)
         return ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
     p->pict_type = AV_PICTURE_TYPE_I;
     p->key_frame = 1;
+    avctx->bits_per_raw_sample = av_log2(s->maxval) + 1;
 
     switch (avctx->pix_fmt) {
     default:
         return AVERROR(EINVAL);
-    case AV_PIX_FMT_RGB48BE:
+    case AV_PIX_FMT_RGBA64:
+        n = avctx->width * 8;
+        components=4;
+        sample_len=16;
+        if (s->maxval < 65535)
+            upgrade = 2;
+        goto do_read;
+    case AV_PIX_FMT_RGB48:
         n = avctx->width * 6;
         components=3;
         sample_len=16;
+        if (s->maxval < 65535)
+            upgrade = 2;
+        goto do_read;
+    case AV_PIX_FMT_RGBA:
+        n = avctx->width * 4;
+        components=4;
+        sample_len=8;
         goto do_read;
     case AV_PIX_FMT_RGB24:
         n = avctx->width * 3;
         components=3;
         sample_len=8;
+        if (s->maxval < 255)
+            upgrade = 1;
         goto do_read;
     case AV_PIX_FMT_GRAY8:
         n = avctx->width;
@@ -71,8 +96,12 @@ static int pnm_decode_frame(AVCodecContext *avctx, void *data,
         if (s->maxval < 255)
             upgrade = 1;
         goto do_read;
-    case AV_PIX_FMT_GRAY16BE:
-    case AV_PIX_FMT_GRAY16LE:
+    case AV_PIX_FMT_GRAY8A:
+        n = avctx->width * 2;
+        components=2;
+        sample_len=8;
+        goto do_read;
+    case AV_PIX_FMT_GRAY16:
         n = avctx->width * 2;
         components=1;
         sample_len=16;
@@ -84,35 +113,47 @@ static int pnm_decode_frame(AVCodecContext *avctx, void *data,
         n = (avctx->width + 7) >> 3;
         components=1;
         sample_len=1;
+        is_mono = 1;
     do_read:
         ptr      = p->data[0];
         linesize = p->linesize[0];
         if (s->bytestream + n * avctx->height > s->bytestream_end)
             return AVERROR_INVALIDDATA;
-        if(s->type < 4){
+        if(s->type < 4 || (is_mono && s->type==7)){
             for (i=0; i<avctx->height; i++) {
                 PutBitContext pb;
                 init_put_bits(&pb, ptr, linesize);
                 for(j=0; j<avctx->width * components; j++){
                     unsigned int c=0;
                     int v=0;
+                    if(s->type < 4)
                     while(s->bytestream < s->bytestream_end && (*s->bytestream < '0' || *s->bytestream > '9' ))
                         s->bytestream++;
                     if(s->bytestream >= s->bytestream_end)
                         return AVERROR_INVALIDDATA;
-                    do{
-                        v= 10*v + c;
-                        c= (*s->bytestream++) - '0';
-                    }while(c <= 9);
-                    put_bits(&pb, sample_len, (((1<<sample_len)-1)*v + (s->maxval>>1))/s->maxval);
+                    if (is_mono) {
+                        /* read a single digit */
+                        v = (*s->bytestream++)&1;
+                    } else {
+                        /* read a sequence of digits */
+                        do {
+                            v = 10*v + c;
+                            c = (*s->bytestream++) - '0';
+                        } while (c <= 9);
+                    }
+                    if (sample_len == 16) {
+                        ((uint16_t*)ptr)[j] = (((1<<sample_len)-1)*v + (s->maxval>>1))/s->maxval;
+                    } else
+                        put_bits(&pb, sample_len, (((1<<sample_len)-1)*v + (s->maxval>>1))/s->maxval);
                 }
-                flush_put_bits(&pb);
+                if (sample_len != 16)
+                    flush_put_bits(&pb);
                 ptr+= linesize;
             }
         }else{
         for (i = 0; i < avctx->height; i++) {
             if (!upgrade)
-                memcpy(ptr, s->bytestream, n);
+                samplecpy(ptr, s->bytestream, n, s->maxval);
             else if (upgrade == 1) {
                 unsigned int j, f = (255 * 128 + s->maxval / 2) / s->maxval;
                 for (j = 0; j < n; j++)
@@ -130,8 +171,8 @@ static int pnm_decode_frame(AVCodecContext *avctx, void *data,
         }
         break;
     case AV_PIX_FMT_YUV420P:
-    case AV_PIX_FMT_YUV420P9BE:
-    case AV_PIX_FMT_YUV420P10BE:
+    case AV_PIX_FMT_YUV420P9:
+    case AV_PIX_FMT_YUV420P10:
         {
             unsigned char *ptr1, *ptr2;
 
@@ -143,7 +184,7 @@ static int pnm_decode_frame(AVCodecContext *avctx, void *data,
             if (s->bytestream + n * avctx->height * 3 / 2 > s->bytestream_end)
                 return AVERROR_INVALIDDATA;
             for (i = 0; i < avctx->height; i++) {
-                memcpy(ptr, s->bytestream, n);
+                samplecpy(ptr, s->bytestream, n, s->maxval);
                 s->bytestream += n;
                 ptr           += linesize;
             }
@@ -152,9 +193,9 @@ static int pnm_decode_frame(AVCodecContext *avctx, void *data,
             n >>= 1;
             h = avctx->height >> 1;
             for (i = 0; i < h; i++) {
-                memcpy(ptr1, s->bytestream, n);
+                samplecpy(ptr1, s->bytestream, n, s->maxval);
                 s->bytestream += n;
-                memcpy(ptr2, s->bytestream, n);
+                samplecpy(ptr2, s->bytestream, n, s->maxval);
                 s->bytestream += n;
                 ptr1 += p->linesize[1];
                 ptr2 += p->linesize[2];
@@ -202,24 +243,6 @@ static int pnm_decode_frame(AVCodecContext *avctx, void *data,
             }
         }
         break;
-    case AV_PIX_FMT_RGB32:
-        ptr      = p->data[0];
-        linesize = p->linesize[0];
-        if (s->bytestream + avctx->width * avctx->height * 4 > s->bytestream_end)
-            return AVERROR_INVALIDDATA;
-        for (i = 0; i < avctx->height; i++) {
-            int j, r, g, b, a;
-
-            for (j = 0; j < avctx->width; j++) {
-                r = *s->bytestream++;
-                g = *s->bytestream++;
-                b = *s->bytestream++;
-                a = *s->bytestream++;
-                ((uint32_t *)ptr)[j] = (a << 24) | (r << 16) | (g << 8) | b;
-            }
-            ptr += linesize;
-        }
-        break;
     }
     *got_frame = 1;
 
diff --git a/libavcodec/pnmenc.c b/libavcodec/pnmenc.c
index 7513552..e6c3635 100644
--- a/libavcodec/pnmenc.c
+++ b/libavcodec/pnmenc.c
@@ -2,42 +2,38 @@
  * PNM image format
  * Copyright (c) 2002, 2003 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/pixdesc.h"
 #include "avcodec.h"
-#include "bytestream.h"
 #include "internal.h"
 
 static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                            const AVFrame *pict, int *got_packet)
+                            const AVFrame *p, int *got_packet)
 {
     uint8_t *bytestream, *bytestream_start, *bytestream_end;
-    const AVFrame * const p = pict;
     int i, h, h1, c, n, linesize, ret;
     uint8_t *ptr, *ptr1, *ptr2;
 
-    if ((ret = ff_alloc_packet(pkt, avpicture_get_size(avctx->pix_fmt,
+    if ((ret = ff_alloc_packet2(avctx, pkt, avpicture_get_size(avctx->pix_fmt,
                                                        avctx->width,
-                                                       avctx->height) + 200)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                                                       avctx->height) + 200)) < 0)
         return ret;
-    }
 
     bytestream_start =
     bytestream       = pkt->data;
@@ -67,6 +63,10 @@ static int pnm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         n  = avctx->width * 6;
         break;
     case AV_PIX_FMT_YUV420P:
+        if (avctx->width & 1 || avctx->height & 1) {
+            av_log(avctx, AV_LOG_ERROR, "pgmyuv needs even width and height\n");
+            return AVERROR(EINVAL);
+        }
         c  = '5';
         n  = avctx->width;
         h1 = (h * 3) / 2;
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 29c8415..de7a7c7 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -3,9 +3,10 @@ OBJS                                   += ppc/fmtconvert_altivec.o      \
 OBJS-$(CONFIG_AUDIODSP)                += ppc/audiodsp.o
 OBJS-$(CONFIG_BLOCKDSP)                += ppc/blockdsp.o
 OBJS-$(CONFIG_FFT)                     += ppc/fft_init.o                \
-                                          ppc/fft_altivec.o
+                                          ppc/fft_altivec.o             \
+                                          ppc/fft_vsx.o
 OBJS-$(CONFIG_H264CHROMA)              += ppc/h264chroma_init.o
-OBJS-$(CONFIG_H264DSP)                 += ppc/h264dsp.o
+OBJS-$(CONFIG_H264DSP)                 += ppc/h264dsp.o ppc/hpeldsp_altivec.o
 OBJS-$(CONFIG_H264QPEL)                += ppc/h264qpel.o
 OBJS-$(CONFIG_HPELDSP)                 += ppc/hpeldsp_altivec.o
 OBJS-$(CONFIG_HUFFYUVDSP)              += ppc/huffyuvdsp_altivec.o
@@ -20,7 +21,7 @@ OBJS-$(CONFIG_PIXBLOCKDSP)             += ppc/pixblockdsp.o
 OBJS-$(CONFIG_VIDEODSP)                += ppc/videodsp_ppc.o
 OBJS-$(CONFIG_VP3DSP)                  += ppc/vp3dsp_altivec.o
 
-OBJS-$(CONFIG_APE_DECODER)             += ppc/apedsp_altivec.o
+OBJS-$(CONFIG_LLAUDDSP)                += ppc/lossless_audiodsp_altivec.o
 OBJS-$(CONFIG_SVQ1_ENCODER)            += ppc/svq1enc_altivec.o
 OBJS-$(CONFIG_VC1_DECODER)             += ppc/vc1dsp_altivec.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += ppc/vorbisdsp_altivec.o
diff --git a/libavcodec/ppc/asm.S b/libavcodec/ppc/asm.S
index 141dee9..a3edeed 100644
--- a/libavcodec/ppc/asm.S
+++ b/libavcodec/ppc/asm.S
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Loren Merritt
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/audiodsp.c b/libavcodec/ppc/audiodsp.c
index 36506ce..c88c3d9 100644
--- a/libavcodec/ppc/audiodsp.c
+++ b/libavcodec/ppc/audiodsp.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/blockdsp.c b/libavcodec/ppc/blockdsp.c
index 679bc04..0059b3b 100644
--- a/libavcodec/ppc/blockdsp.c
+++ b/libavcodec/ppc/blockdsp.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002 Dieter Shirley
  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/dct-test.c b/libavcodec/ppc/dct-test.c
index 37fd8bb..2328516 100644
--- a/libavcodec/ppc/dct-test.c
+++ b/libavcodec/ppc/dct-test.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/fdct.h b/libavcodec/ppc/fdct.h
index 7471035..437f815 100644
--- a/libavcodec/ppc/fdct.h
+++ b/libavcodec/ppc/fdct.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/fdctdsp.c b/libavcodec/ppc/fdctdsp.c
index 51417a5..c3ebe0d 100644
--- a/libavcodec/ppc/fdctdsp.c
+++ b/libavcodec/ppc/fdctdsp.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2003  James Klicman <james@klicman.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/fft_altivec.S b/libavcodec/ppc/fft_altivec.S
index c92b30b..aab669e 100644
--- a/libavcodec/ppc/fft_altivec.S
+++ b/libavcodec/ppc/fft_altivec.S
@@ -5,20 +5,20 @@
  * This algorithm (though not any of the implementation details) is
  * based on libdjbfft by D. J. Bernstein.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/fft_init.c b/libavcodec/ppc/fft_init.c
index 8fcc033..675fa33 100644
--- a/libavcodec/ppc/fft_init.c
+++ b/libavcodec/ppc/fft_init.c
@@ -3,20 +3,20 @@
  * AltiVec-enabled
  * Copyright (c) 2009 Loren Merritt
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,8 +36,12 @@
  * It also assumes all FFTComplex are 8 bytes-aligned pairs of floats.
  */
 
+#if HAVE_VSX
+#include "fft_vsx.h"
+#else
 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_interleave_altivec(FFTContext *s, FFTComplex *z);
+#endif
 
 #if HAVE_GNU_AS && HAVE_ALTIVEC
 static void imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample *input)
@@ -94,7 +98,11 @@ static void imdct_half_altivec(FFTContext *s, FFTSample *output, const FFTSample
         k--;
     } while(k >= 0);
 
+#if HAVE_VSX
+    ff_fft_calc_vsx(s, (FFTComplex*)output);
+#else
     ff_fft_calc_altivec(s, (FFTComplex*)output);
+#endif
 
     /* post rotation + reordering */
     j = -n32;
@@ -147,7 +155,11 @@ av_cold void ff_fft_init_ppc(FFTContext *s)
     if (!PPC_ALTIVEC(av_get_cpu_flags()))
         return;
 
+#if HAVE_VSX
+    s->fft_calc = ff_fft_calc_interleave_vsx;
+#else
     s->fft_calc   = ff_fft_calc_interleave_altivec;
+#endif
     if (s->mdct_bits >= 5) {
         s->imdct_calc = imdct_calc_altivec;
         s->imdct_half = imdct_half_altivec;
diff --git a/libavcodec/ppc/fft_vsx.c b/libavcodec/ppc/fft_vsx.c
new file mode 100644
index 0000000..e92975f
--- /dev/null
+++ b/libavcodec/ppc/fft_vsx.c
@@ -0,0 +1,227 @@
+/*
+ * FFT  transform, optimized with VSX built-in functions
+ * Copyright (c) 2014 Rong Yan
+ *
+ * This algorithm (though not any of the implementation details) is
+ * based on libdjbfft by D. J. Bernstein.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/fft-internal.h"
+#include "fft_vsx.h"
+
+#if HAVE_VSX
+
+static void fft32_vsx_interleave(FFTComplex *z)
+{
+    fft16_vsx_interleave(z);
+    fft8_vsx_interleave(z+16);
+    fft8_vsx_interleave(z+24);
+    pass_vsx_interleave(z,ff_cos_32,4);
+}
+
+static void fft64_vsx_interleave(FFTComplex *z)
+{
+    fft32_vsx_interleave(z);
+    fft16_vsx_interleave(z+32);
+    fft16_vsx_interleave(z+48);
+    pass_vsx_interleave(z,ff_cos_64, 8);
+}
+static void fft128_vsx_interleave(FFTComplex *z)
+{
+    fft64_vsx_interleave(z);
+    fft32_vsx_interleave(z+64);
+    fft32_vsx_interleave(z+96);
+    pass_vsx_interleave(z,ff_cos_128,16);
+}
+static void fft256_vsx_interleave(FFTComplex *z)
+{
+    fft128_vsx_interleave(z);
+    fft64_vsx_interleave(z+128);
+    fft64_vsx_interleave(z+192);
+    pass_vsx_interleave(z,ff_cos_256,32);
+}
+static void fft512_vsx_interleave(FFTComplex *z)
+{
+    fft256_vsx_interleave(z);
+    fft128_vsx_interleave(z+256);
+    fft128_vsx_interleave(z+384);
+    pass_vsx_interleave(z,ff_cos_512,64);
+}
+static void fft1024_vsx_interleave(FFTComplex *z)
+{
+    fft512_vsx_interleave(z);
+    fft256_vsx_interleave(z+512);
+    fft256_vsx_interleave(z+768);
+    pass_vsx_interleave(z,ff_cos_1024,128);
+
+}
+static void fft2048_vsx_interleave(FFTComplex *z)
+{
+    fft1024_vsx_interleave(z);
+    fft512_vsx_interleave(z+1024);
+    fft512_vsx_interleave(z+1536);
+    pass_vsx_interleave(z,ff_cos_2048,256);
+}
+static void fft4096_vsx_interleave(FFTComplex *z)
+{
+    fft2048_vsx_interleave(z);
+    fft1024_vsx_interleave(z+2048);
+    fft1024_vsx_interleave(z+3072);
+    pass_vsx_interleave(z,ff_cos_4096, 512);
+}
+static void fft8192_vsx_interleave(FFTComplex *z)
+{
+    fft4096_vsx_interleave(z);
+    fft2048_vsx_interleave(z+4096);
+    fft2048_vsx_interleave(z+6144);
+    pass_vsx_interleave(z,ff_cos_8192,1024);
+}
+static void fft16384_vsx_interleave(FFTComplex *z)
+{
+    fft8192_vsx_interleave(z);
+    fft4096_vsx_interleave(z+8192);
+    fft4096_vsx_interleave(z+12288);
+    pass_vsx_interleave(z,ff_cos_16384,2048);
+}
+static void fft32768_vsx_interleave(FFTComplex *z)
+{
+    fft16384_vsx_interleave(z);
+    fft8192_vsx_interleave(z+16384);
+    fft8192_vsx_interleave(z+24576);
+    pass_vsx_interleave(z,ff_cos_32768,4096);
+}
+static void fft65536_vsx_interleave(FFTComplex *z)
+{
+    fft32768_vsx_interleave(z);
+    fft16384_vsx_interleave(z+32768);
+    fft16384_vsx_interleave(z+49152);
+    pass_vsx_interleave(z,ff_cos_65536,8192);
+}
+
+static void fft32_vsx(FFTComplex *z)
+{
+    fft16_vsx(z);
+    fft8_vsx(z+16);
+    fft8_vsx(z+24);
+    pass_vsx(z,ff_cos_32,4);
+}
+
+static void fft64_vsx(FFTComplex *z)
+{
+    fft32_vsx(z);
+    fft16_vsx(z+32);
+    fft16_vsx(z+48);
+    pass_vsx(z,ff_cos_64, 8);
+}
+static void fft128_vsx(FFTComplex *z)
+{
+    fft64_vsx(z);
+    fft32_vsx(z+64);
+    fft32_vsx(z+96);
+    pass_vsx(z,ff_cos_128,16);
+}
+static void fft256_vsx(FFTComplex *z)
+{
+    fft128_vsx(z);
+    fft64_vsx(z+128);
+    fft64_vsx(z+192);
+    pass_vsx(z,ff_cos_256,32);
+}
+static void fft512_vsx(FFTComplex *z)
+{
+    fft256_vsx(z);
+    fft128_vsx(z+256);
+    fft128_vsx(z+384);
+    pass_vsx(z,ff_cos_512,64);
+}
+static void fft1024_vsx(FFTComplex *z)
+{
+    fft512_vsx(z);
+    fft256_vsx(z+512);
+    fft256_vsx(z+768);
+    pass_vsx(z,ff_cos_1024,128);
+
+}
+static void fft2048_vsx(FFTComplex *z)
+{
+    fft1024_vsx(z);
+    fft512_vsx(z+1024);
+    fft512_vsx(z+1536);
+    pass_vsx(z,ff_cos_2048,256);
+}
+static void fft4096_vsx(FFTComplex *z)
+{
+    fft2048_vsx(z);
+    fft1024_vsx(z+2048);
+    fft1024_vsx(z+3072);
+    pass_vsx(z,ff_cos_4096, 512);
+}
+static void fft8192_vsx(FFTComplex *z)
+{
+    fft4096_vsx(z);
+    fft2048_vsx(z+4096);
+    fft2048_vsx(z+6144);
+    pass_vsx(z,ff_cos_8192,1024);
+}
+static void fft16384_vsx(FFTComplex *z)
+{
+    fft8192_vsx(z);
+    fft4096_vsx(z+8192);
+    fft4096_vsx(z+12288);
+    pass_vsx(z,ff_cos_16384,2048);
+}
+static void fft32768_vsx(FFTComplex *z)
+{
+    fft16384_vsx(z);
+    fft8192_vsx(z+16384);
+    fft8192_vsx(z+24576);
+    pass_vsx(z,ff_cos_32768,4096);
+}
+static void fft65536_vsx(FFTComplex *z)
+{
+    fft32768_vsx(z);
+    fft16384_vsx(z+32768);
+    fft16384_vsx(z+49152);
+    pass_vsx(z,ff_cos_65536,8192);
+}
+
+static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
+    fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
+    fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
+};
+static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
+    fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
+    fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
+    fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
+};
+void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
+{
+     fft_dispatch_vsx_interleave[s->nbits-2](z);
+}
+void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
+{
+     fft_dispatch_vsx[s->nbits-2](z);
+}
+#endif /* HAVE_VSX */
diff --git a/libavcodec/ppc/fft_vsx.h b/libavcodec/ppc/fft_vsx.h
new file mode 100644
index 0000000..a85475d
--- /dev/null
+++ b/libavcodec/ppc/fft_vsx.h
@@ -0,0 +1,830 @@
+#ifndef AVCODEC_PPC_FFT_VSX_H
+#define AVCODEC_PPC_FFT_VSX_H
+/*
+ * FFT  transform, optimized with VSX built-in functions
+ * Copyright (c) 2014 Rong Yan  Copyright (c) 2009 Loren Merritt
+ *
+ * This algorithm (though not any of the implementation details) is
+ * based on libdjbfft by D. J. Bernstein, and fft_altivec_s.S.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+#include "libavcodec/fft.h"
+#include "libavcodec/fft-internal.h"
+
+#if HAVE_VSX
+
+void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z);
+
+
+#define byte_2complex (2*sizeof(FFTComplex))
+#define byte_4complex (4*sizeof(FFTComplex))
+#define byte_6complex (6*sizeof(FFTComplex))
+#define byte_8complex (8*sizeof(FFTComplex))
+#define byte_10complex (10*sizeof(FFTComplex))
+#define byte_12complex (12*sizeof(FFTComplex))
+#define byte_14complex (14*sizeof(FFTComplex))
+
+inline static void pass_vsx_interleave(FFTComplex *z, const FFTSample *wre, unsigned int n)
+{
+    int o1 = n<<1;
+    int o2 = n<<2;
+    int o3 = o1+o2;
+    int i1, i2, i3;
+    FFTSample* out = (FFTSample*)z;
+    const FFTSample *wim = wre+o1;
+    vec_f vz0, vzo1, vzo2, vzo3;
+    vec_f x0, x1, x2, x3;
+    vec_f x4, x5, x6, x7;
+    vec_f x8, x9, x10, x11;
+    vec_f x12, x13, x14, x15;
+    vec_f x16, x17, x18, x19;
+    vec_f x20, x21, x22, x23;
+    vec_f vz0plus1, vzo1plus1, vzo2plus1, vzo3plus1;
+    vec_f y0, y1, y2, y3;
+    vec_f y4, y5, y8, y9;
+    vec_f y10, y13, y14, y15;
+    vec_f y16, y17, y18, y19;
+    vec_f y20, y21, y22, y23;
+    vec_f wr1, wi1, wr0, wi0;
+    vec_f wr2, wi2, wr3, wi3;
+    vec_f xmulwi0, xmulwi1, ymulwi2, ymulwi3;
+
+    n = n-2;
+    i1 = o1*sizeof(FFTComplex);
+    i2 = o2*sizeof(FFTComplex);
+    i3 = o3*sizeof(FFTComplex);
+    vzo2 = vec_ld(i2, &(out[0]));  // zo2.r  zo2.i  z(o2+1).r  z(o2+1).i
+    vzo2plus1 = vec_ld(i2+16, &(out[0]));
+    vzo3 = vec_ld(i3, &(out[0]));  // zo3.r  zo3.i  z(o3+1).r  z(o3+1).i
+    vzo3plus1 = vec_ld(i3+16, &(out[0]));
+    vz0 = vec_ld(0, &(out[0]));    // z0.r  z0.i  z1.r  z1.i
+    vz0plus1 = vec_ld(16, &(out[0]));
+    vzo1 = vec_ld(i1, &(out[0]));  // zo1.r  zo1.i  z(o1+1).r  z(o1+1).i
+    vzo1plus1 = vec_ld(i1+16, &(out[0]));
+
+    x0 = vec_add(vzo2, vzo3);
+    x1 = vec_sub(vzo2, vzo3);
+    y0 = vec_add(vzo2plus1, vzo3plus1);
+    y1 = vec_sub(vzo2plus1, vzo3plus1);
+
+    wr1 = vec_splats(wre[1]);
+    wi1 = vec_splats(wim[-1]);
+    wi2 = vec_splats(wim[-2]);
+    wi3 = vec_splats(wim[-3]);
+    wr2 = vec_splats(wre[2]);
+    wr3 = vec_splats(wre[3]);
+
+    x2 = vec_perm(x0, x1, vcprm(2,s2,3,s3));
+    x3 = vec_perm(x0, x1, vcprm(s3,3,s2,2));
+
+    y4 = vec_perm(y0, y1, vcprm(s1,1,s0,0));
+    y5 = vec_perm(y0, y1, vcprm(s3,3,s2,2));
+    y2 = vec_perm(y0, y1, vcprm(0,s0,1,s1));
+    y3 = vec_perm(y0, y1, vcprm(2,s2,3,s3));
+
+    ymulwi2 = vec_mul(y4, wi2);
+    ymulwi3 = vec_mul(y5, wi3);
+    x4 = vec_mul(x2, wr1);
+    x5 = vec_mul(x3, wi1);
+    y8 = vec_madd(y2, wr2, ymulwi2);
+    y9 = vec_msub(y2, wr2, ymulwi2);
+    x6 = vec_add(x4, x5);
+    x7 = vec_sub(x4, x5);
+    y13 = vec_madd(y3, wr3, ymulwi3);
+    y14 = vec_msub(y3, wr3, ymulwi3);
+
+    x8 = vec_perm(x6, x7, vcprm(0,1,s2,s3));
+    y10 = vec_perm(y8, y9, vcprm(0,1,s2,s3));
+    y15 = vec_perm(y13, y14, vcprm(0,1,s2,s3));
+
+    x9 = vec_perm(x0, x8, vcprm(0,1,s0,s2));
+    x10 = vec_perm(x1, x8, vcprm(1,0,s3,s1));
+
+    y16 = vec_perm(y10, y15, vcprm(0,2,s0,s2));
+    y17 = vec_perm(y10, y15, vcprm(3,1,s3,s1));
+
+    x11 = vec_add(vz0, x9);
+    x12 = vec_sub(vz0, x9);
+    x13 = vec_add(vzo1, x10);
+    x14 = vec_sub(vzo1, x10);
+
+    y18 = vec_add(vz0plus1, y16);
+    y19 = vec_sub(vz0plus1, y16);
+    y20 = vec_add(vzo1plus1, y17);
+    y21 = vec_sub(vzo1plus1, y17);
+
+    x15 = vec_perm(x13, x14, vcprm(0,s1,2,s3));
+    x16 = vec_perm(x13, x14, vcprm(s0,1,s2,3));
+    y22 = vec_perm(y20, y21, vcprm(0,s1,2,s3));
+    y23 = vec_perm(y20, y21, vcprm(s0,1,s2,3));
+
+
+    vec_st(x11, 0, &(out[0]));
+    vec_st(y18, 16, &(out[0]));
+    vec_st(x15, i1, &(out[0]));
+    vec_st(y22, i1+16, &(out[0]));
+    vec_st(x12, i2, &(out[0]));
+    vec_st(y19, i2+16, &(out[0]));
+    vec_st(x16, i3, &(out[0]));
+    vec_st(y23, i3+16, &(out[0]));
+
+    do {
+        out += 8;
+        wre += 4;
+        wim -= 4;
+        wr0 = vec_splats(wre[0]);
+        wr1 = vec_splats(wre[1]);
+        wi0 = vec_splats(wim[0]);
+        wi1 = vec_splats(wim[-1]);
+
+        wr2 = vec_splats(wre[2]);
+        wr3 = vec_splats(wre[3]);
+        wi2 = vec_splats(wim[-2]);
+        wi3 = vec_splats(wim[-3]);
+
+        vzo2 = vec_ld(i2, &(out[0]));  // zo2.r  zo2.i  z(o2+1).r  z(o2+1).i
+        vzo2plus1 = vec_ld(i2+16, &(out[0]));
+        vzo3 = vec_ld(i3, &(out[0]));  // zo3.r  zo3.i  z(o3+1).r  z(o3+1).i
+        vzo3plus1 = vec_ld(i3+16, &(out[0]));
+        vz0 = vec_ld(0, &(out[0]));    // z0.r  z0.i  z1.r  z1.i
+        vz0plus1 = vec_ld(16, &(out[0]));
+        vzo1 = vec_ld(i1, &(out[0])); // zo1.r  zo1.i  z(o1+1).r  z(o1+1).i
+        vzo1plus1 = vec_ld(i1+16, &(out[0]));
+
+        x0 = vec_add(vzo2, vzo3);
+        x1 = vec_sub(vzo2, vzo3);
+
+        y0 = vec_add(vzo2plus1, vzo3plus1);
+        y1 = vec_sub(vzo2plus1, vzo3plus1);
+
+        x4 = vec_perm(x0, x1, vcprm(s1,1,s0,0));
+        x5 = vec_perm(x0, x1, vcprm(s3,3,s2,2));
+        x2 = vec_perm(x0, x1, vcprm(0,s0,1,s1));
+        x3 = vec_perm(x0, x1, vcprm(2,s2,3,s3));
+
+        y2 = vec_perm(y0, y1, vcprm(0,s0,1,s1));
+        y3 = vec_perm(y0, y1, vcprm(2,s2,3,s3));
+        xmulwi0 = vec_mul(x4, wi0);
+        xmulwi1 = vec_mul(x5, wi1);
+
+        y4 = vec_perm(y0, y1, vcprm(s1,1,s0,0));
+        y5 = vec_perm(y0, y1, vcprm(s3,3,s2,2));
+
+        x8 = vec_madd(x2, wr0, xmulwi0);
+        x9 = vec_msub(x2, wr0, xmulwi0);
+        ymulwi2 = vec_mul(y4, wi2);
+        ymulwi3 = vec_mul(y5, wi3);
+
+        x13 = vec_madd(x3, wr1, xmulwi1);
+        x14 = vec_msub(x3, wr1, xmulwi1);
+
+        y8 = vec_madd(y2, wr2, ymulwi2);
+        y9 = vec_msub(y2, wr2, ymulwi2);
+        y13 = vec_madd(y3, wr3, ymulwi3);
+        y14 = vec_msub(y3, wr3, ymulwi3);
+
+        x10 = vec_perm(x8, x9, vcprm(0,1,s2,s3));
+        x15 = vec_perm(x13, x14, vcprm(0,1,s2,s3));
+
+        y10 = vec_perm(y8, y9, vcprm(0,1,s2,s3));
+        y15 = vec_perm(y13, y14, vcprm(0,1,s2,s3));
+
+        x16 = vec_perm(x10, x15, vcprm(0,2,s0,s2));
+        x17 = vec_perm(x10, x15, vcprm(3,1,s3,s1));
+
+        y16 = vec_perm(y10, y15, vcprm(0,2,s0,s2));
+        y17 = vec_perm(y10, y15, vcprm(3,1,s3,s1));
+
+        x18 = vec_add(vz0, x16);
+        x19 = vec_sub(vz0, x16);
+        x20 = vec_add(vzo1, x17);
+        x21 = vec_sub(vzo1, x17);
+
+        y18 = vec_add(vz0plus1, y16);
+        y19 = vec_sub(vz0plus1, y16);
+        y20 = vec_add(vzo1plus1, y17);
+        y21 = vec_sub(vzo1plus1, y17);
+
+        x22 = vec_perm(x20, x21, vcprm(0,s1,2,s3));
+        x23 = vec_perm(x20, x21, vcprm(s0,1,s2,3));
+
+        y22 = vec_perm(y20, y21, vcprm(0,s1,2,s3));
+        y23 = vec_perm(y20, y21, vcprm(s0,1,s2,3));
+
+        vec_st(x18, 0, &(out[0]));
+        vec_st(y18, 16, &(out[0]));
+        vec_st(x22, i1, &(out[0]));
+        vec_st(y22, i1+16, &(out[0]));
+        vec_st(x19, i2, &(out[0]));
+        vec_st(y19, i2+16, &(out[0]));
+        vec_st(x23, i3, &(out[0]));
+        vec_st(y23, i3+16, &(out[0]));
+    } while (n-=2);
+}
+
+inline static void fft2_vsx_interleave(FFTComplex *z)
+{
+    FFTSample r1, i1;
+
+    r1 = z[0].re - z[1].re;
+    z[0].re += z[1].re;
+    z[1].re = r1;
+
+    i1 = z[0].im - z[1].im;
+    z[0].im += z[1].im;
+    z[1].im = i1;
+ }
+
+inline static void fft4_vsx_interleave(FFTComplex *z)
+{
+    vec_f a, b, c, d;
+    float* out=  (float*)z;
+    a = vec_ld(0, &(out[0]));
+    b = vec_ld(byte_2complex, &(out[0]));
+
+    c = vec_perm(a, b, vcprm(0,1,s2,s1));
+    d = vec_perm(a, b, vcprm(2,3,s0,s3));
+    a = vec_add(c, d);
+    b = vec_sub(c, d);
+
+    c = vec_perm(a, b, vcprm(0,1,s0,s1));
+    d = vec_perm(a, b, vcprm(2,3,s3,s2));
+
+    a = vec_add(c, d);
+    b = vec_sub(c, d);
+    vec_st(a, 0, &(out[0]));
+    vec_st(b, byte_2complex, &(out[0]));
+}
+
+inline static void fft8_vsx_interleave(FFTComplex *z)
+{
+    vec_f vz0, vz1, vz2, vz3;
+    vec_f x0, x1, x2, x3;
+    vec_f x4, x5, x6, x7;
+    vec_f x8, x9, x10, x11;
+    vec_f x12, x13, x14, x15;
+    vec_f x16, x17, x18, x19;
+    vec_f x20, x21, x22, x23;
+    vec_f x24, x25, x26, x27;
+    vec_f x28, x29, x30, x31;
+    vec_f x32, x33, x34;
+
+    float* out=  (float*)z;
+    vec_f vc1 = {sqrthalf, sqrthalf, sqrthalf, sqrthalf};
+
+    vz0 = vec_ld(0, &(out[0]));
+    vz1 = vec_ld(byte_2complex, &(out[0]));
+    vz2 = vec_ld(byte_4complex, &(out[0]));
+    vz3 = vec_ld(byte_6complex, &(out[0]));
+
+    x0 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
+    x1 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
+    x2 = vec_perm(vz2, vz3, vcprm(2,1,s0,s1));
+    x3 = vec_perm(vz2, vz3, vcprm(0,3,s2,s3));
+
+    x4 = vec_add(x0, x1);
+    x5 = vec_sub(x0, x1);
+    x6 = vec_add(x2, x3);
+    x7 = vec_sub(x2, x3);
+
+    x8 = vec_perm(x4, x5, vcprm(0,1,s0,s1));
+    x9 = vec_perm(x4, x5, vcprm(2,3,s3,s2));
+    x10 = vec_perm(x6, x7, vcprm(2,1,s2,s1));
+    x11 = vec_perm(x6, x7, vcprm(0,3,s0,s3));
+
+    x12 = vec_add(x8, x9);
+    x13 = vec_sub(x8, x9);
+    x14 = vec_add(x10, x11);
+    x15 = vec_sub(x10, x11);
+    x16 = vec_perm(x12, x13, vcprm(0,s0,1,s1));
+    x17 = vec_perm(x14, x15, vcprm(0,s0,1,s1));
+    x18 = vec_perm(x16, x17, vcprm(s0,s3,s2,s1));
+    x19 = vec_add(x16, x18); // z0.r  z2.r  z0.i  z2.i
+    x20 = vec_sub(x16, x18); // z4.r  z6.r  z4.i  z6.i
+
+    x21 = vec_perm(x12, x13, vcprm(2,s2,3,s3));
+    x22 = vec_perm(x14, x15, vcprm(2,3,s2,s3));
+    x23 = vec_perm(x14, x15, vcprm(3,2,s3,s2));
+    x24 = vec_add(x22, x23);
+    x25 = vec_sub(x22, x23);
+    x26 = vec_mul( vec_perm(x24, x25, vcprm(2,s2,0,s0)), vc1);
+
+    x27 = vec_add(x21, x26); // z1.r  z7.r z1.i z3.i
+    x28 = vec_sub(x21, x26); //z5.r  z3.r z5.i z7.i
+
+    x29 = vec_perm(x19, x27, vcprm(0,2,s0,s2)); // z0.r  z0.i  z1.r  z1.i
+    x30 = vec_perm(x19, x27, vcprm(1,3,s1,s3)); // z2.r  z2.i  z7.r  z3.i
+    x31 = vec_perm(x20, x28, vcprm(0,2,s0,s2)); // z4.r  z4.i  z5.r  z5.i
+    x32 = vec_perm(x20, x28, vcprm(1,3,s1,s3)); // z6.r  z6.i  z3.r  z7.i
+    x33 = vec_perm(x30, x32, vcprm(0,1,s2,3));  // z2.r  z2.i  z3.r  z3.i
+    x34 = vec_perm(x30, x32, vcprm(s0,s1,2,s3)); // z6.r  z6.i  z7.r  z7.i
+
+    vec_st(x29, 0, &(out[0]));
+    vec_st(x33, byte_2complex, &(out[0]));
+    vec_st(x31, byte_4complex, &(out[0]));
+    vec_st(x34, byte_6complex, &(out[0]));
+}
+
+inline static void fft16_vsx_interleave(FFTComplex *z)
+{
+    float* out=  (float*)z;
+    vec_f vc0 = {sqrthalf, sqrthalf, sqrthalf, sqrthalf};
+    vec_f vc1 = {ff_cos_16[1], ff_cos_16[1], ff_cos_16[1], ff_cos_16[1]};
+    vec_f vc2 = {ff_cos_16[3], ff_cos_16[3], ff_cos_16[3], ff_cos_16[3]};
+    vec_f vz0, vz1, vz2, vz3;
+    vec_f vz4, vz5, vz6, vz7;
+    vec_f x0, x1, x2, x3;
+    vec_f x4, x5, x6, x7;
+    vec_f x8, x9, x10, x11;
+    vec_f x12, x13, x14, x15;
+    vec_f x16, x17, x18, x19;
+    vec_f x20, x21, x22, x23;
+    vec_f x24, x25, x26, x27;
+    vec_f x28, x29, x30, x31;
+    vec_f x32, x33, x34, x35;
+    vec_f x36, x37, x38, x39;
+    vec_f x40, x41, x42, x43;
+    vec_f x44, x45, x46, x47;
+    vec_f x48, x49, x50, x51;
+    vec_f x52, x53, x54, x55;
+    vec_f x56, x57, x58, x59;
+    vec_f x60, x61, x62, x63;
+    vec_f x64, x65, x66, x67;
+    vec_f x68, x69, x70, x71;
+    vec_f x72, x73, x74, x75;
+    vec_f x76, x77, x78, x79;
+    vec_f x80, x81, x82, x83;
+    vec_f x84, x85, x86;
+
+    vz0 = vec_ld(0, &(out[0]));
+    vz1 = vec_ld(byte_2complex, &(out[0]));
+    vz2 = vec_ld(byte_4complex, &(out[0]));
+    vz3 = vec_ld(byte_6complex, &(out[0]));
+    vz4 = vec_ld(byte_8complex, &(out[0]));
+    vz5 = vec_ld(byte_10complex, &(out[0]));
+    vz6 = vec_ld(byte_12complex, &(out[0]));
+    vz7 = vec_ld(byte_14complex, &(out[0]));
+
+    x0 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
+    x1 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
+    x2 = vec_perm(vz2, vz3, vcprm(0,1,s0,s1));
+    x3 = vec_perm(vz2, vz3, vcprm(2,3,s2,s3));
+
+    x4 = vec_perm(vz4, vz5, vcprm(0,1,s2,s1));
+    x5 = vec_perm(vz4, vz5, vcprm(2,3,s0,s3));
+    x6 = vec_perm(vz6, vz7, vcprm(0,1,s2,s1));
+    x7 = vec_perm(vz6, vz7, vcprm(2,3,s0,s3));
+
+    x8 = vec_add(x0, x1);
+    x9 = vec_sub(x0, x1);
+    x10 = vec_add(x2, x3);
+    x11 = vec_sub(x2, x3);
+
+    x12 = vec_add(x4, x5);
+    x13 = vec_sub(x4, x5);
+    x14 = vec_add(x6, x7);
+    x15 = vec_sub(x6, x7);
+
+    x16 = vec_perm(x8, x9, vcprm(0,1,s0,s1));
+    x17 = vec_perm(x8, x9, vcprm(2,3,s3,s2));
+    x18 = vec_perm(x10, x11, vcprm(2,1,s1,s2));
+    x19 = vec_perm(x10, x11, vcprm(0,3,s0,s3));
+    x20 = vec_perm(x12, x14, vcprm(0,1,s0, s1));
+    x21 = vec_perm(x12, x14, vcprm(2,3,s2,s3));
+    x22 = vec_perm(x13, x15, vcprm(0,1,s0,s1));
+    x23 = vec_perm(x13, x15, vcprm(3,2,s3,s2));
+
+    x24 = vec_add(x16, x17);
+    x25 = vec_sub(x16, x17);
+    x26 = vec_add(x18, x19);
+    x27 = vec_sub(x18, x19);
+    x28 = vec_add(x20, x21);
+    x29 = vec_sub(x20, x21);
+    x30 = vec_add(x22, x23);
+    x31 = vec_sub(x22, x23);
+
+    x32 = vec_add(x24, x26);
+    x33 = vec_sub(x24, x26);
+    x34 = vec_perm(x32, x33, vcprm(0,1,s0,s1));
+
+    x35 = vec_perm(x28, x29, vcprm(2,1,s1,s2));
+    x36 = vec_perm(x28, x29, vcprm(0,3,s0,s3));
+    x37 = vec_add(x35, x36);
+    x38 = vec_sub(x35, x36);
+    x39 = vec_perm(x37, x38, vcprm(0,1,s1,s0));
+
+    x40 = vec_perm(x27, x38, vcprm(3,2,s2,s3));
+    x41 = vec_perm(x26,  x37, vcprm(2,3,s3,s2));
+    x42 = vec_add(x40, x41);
+    x43 = vec_sub(x40, x41);
+    x44 = vec_mul(x42, vc0);
+    x45 = vec_mul(x43, vc0);
+
+    x46 = vec_add(x34, x39);  // z0.r  z0.i  z4.r  z4.i
+    x47 = vec_sub(x34, x39);  // z8.r  z8.i  z12.r  z12.i
+
+    x48 = vec_perm(x30, x31, vcprm(2,1,s1,s2));
+    x49 = vec_perm(x30, x31, vcprm(0,3,s3,s0));
+    x50 = vec_add(x48, x49);
+    x51 = vec_sub(x48, x49);
+    x52 = vec_mul(x50, vc1);
+    x53 = vec_mul(x50, vc2);
+    x54 = vec_mul(x51, vc1);
+    x55 = vec_mul(x51, vc2);
+
+    x56 = vec_perm(x24, x25, vcprm(2,3,s2,s3));
+    x57 = vec_perm(x44, x45, vcprm(0,1,s1,s0));
+    x58 = vec_add(x56, x57);
+    x59 = vec_sub(x56, x57);
+
+    x60 = vec_perm(x54, x55, vcprm(1,0,3,2));
+    x61 = vec_perm(x54, x55, vcprm(s1,s0,s3,s2));
+    x62 = vec_add(x52, x61);
+    x63 = vec_sub(x52, x61);
+    x64 = vec_add(x60, x53);
+    x65 = vec_sub(x60, x53);
+    x66 = vec_perm(x62, x64, vcprm(0,1,s3,s2));
+    x67 = vec_perm(x63, x65, vcprm(s0,s1,3,2));
+
+    x68 = vec_add(x58, x66); // z1.r    z1.i  z3.r    z3.i
+    x69 = vec_sub(x58, x66); // z9.r    z9.i  z11.r  z11.i
+    x70 = vec_add(x59, x67); // z5.r    z5.i  z15.r  z15.i
+    x71 = vec_sub(x59, x67); // z13.r  z13.i z7.r   z7.i
+
+    x72 = vec_perm(x25, x27, vcprm(s1,s0,s2,s3));
+    x73 = vec_add(x25, x72);
+    x74 = vec_sub(x25, x72);
+    x75 = vec_perm(x73, x74, vcprm(0,1,s0,s1));
+    x76 = vec_perm(x44, x45, vcprm(3,2,s2,s3));
+    x77 = vec_add(x75, x76); // z2.r   z2.i    z6.r    z6.i
+    x78 = vec_sub(x75, x76); // z10.r  z10.i  z14.r  z14.i
+
+    x79 = vec_perm(x46, x68, vcprm(0,1,s0,s1)); // z0.r  z0.i  z1.r  z1.i
+    x80 = vec_perm(x77, x68, vcprm(0,1,s2,s3)); // z2.r  z2.i  z3.r  z3.i
+    x81 = vec_perm(x46, x70, vcprm(2,3,s0,s1)); // z4.r  z4.i  z5.r  z5.i
+    x82 = vec_perm(x71, x77, vcprm(s2,s3,2,3)); // z6.r  z6.i  z7.r  z7.i
+    vec_st(x79, 0, &(out[0]));
+    vec_st(x80, byte_2complex, &(out[0]));
+    vec_st(x81, byte_4complex, &(out[0]));
+    vec_st(x82, byte_6complex, &(out[0]));
+    x83 = vec_perm(x47, x69, vcprm(0,1,s0,s1)); // z8.r  z8.i  z9.r  z9.i
+    x84 = vec_perm(x78, x69, vcprm(0,1,s2,s3)); // z10.r  z10.i  z11.r  z11.i
+    x85 = vec_perm(x47, x71, vcprm(2,3,s0,s1)); // z12.r  z12.i  z13.r  z13.i
+    x86 = vec_perm(x70, x78, vcprm(s2,s3,2,3)); // z14.r  z14.i  z15.r  z15.i
+    vec_st(x83, byte_8complex, &(out[0]));
+    vec_st(x84, byte_10complex, &(out[0]));
+    vec_st(x85, byte_12complex, &(out[0]));
+    vec_st(x86, byte_14complex, &(out[0]));
+}
+
+inline static void fft4_vsx(FFTComplex *z)
+{
+    vec_f a, b, c, d;
+    float* out=  (float*)z;
+    a = vec_ld(0, &(out[0]));
+    b = vec_ld(byte_2complex, &(out[0]));
+
+    c = vec_perm(a, b, vcprm(0,1,s2,s1));
+    d = vec_perm(a, b, vcprm(2,3,s0,s3));
+    a = vec_add(c, d);
+    b = vec_sub(c, d);
+
+    c = vec_perm(a,b, vcprm(0,s0,1,s1));
+    d = vec_perm(a, b, vcprm(2,s3,3,s2));
+
+    a = vec_add(c, d);
+    b = vec_sub(c, d);
+
+    c = vec_perm(a, b, vcprm(0,1,s0,s1));
+    d = vec_perm(a, b, vcprm(2,3,s2,s3));
+
+    vec_st(c, 0, &(out[0]));
+    vec_st(d, byte_2complex, &(out[0]));
+    return;
+}
+
+inline static void fft8_vsx(FFTComplex *z)
+{
+    vec_f vz0, vz1, vz2, vz3;
+    vec_f vz4, vz5, vz6, vz7, vz8;
+
+    float* out=  (float*)z;
+    vec_f vc0 = {0.0, 0.0, 0.0, 0.0};
+    vec_f vc1 = {-sqrthalf, sqrthalf, sqrthalf, -sqrthalf};
+    vec_f vc2 = {sqrthalf, sqrthalf, sqrthalf, sqrthalf};
+
+    vz0 = vec_ld(0, &(out[0]));
+    vz1 = vec_ld(byte_2complex, &(out[0]));
+    vz2 = vec_ld(byte_4complex, &(out[0]));
+    vz3 = vec_ld(byte_6complex, &(out[0]));
+
+    vz6 = vec_perm(vz2, vz3, vcprm(0,s0,1,s1));
+    vz7 = vec_perm(vz2, vz3, vcprm(2,s2,3,s3));
+    vz4 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
+    vz5 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
+
+    vz2 = vec_add(vz6, vz7);
+    vz3 = vec_sub(vz6, vz7);
+    vz8 = vec_perm(vz3, vz3, vcprm(2,3,0,1));
+
+    vz0 = vec_add(vz4, vz5);
+    vz1 = vec_sub(vz4, vz5);
+
+    vz3 = vec_madd(vz3, vc1, vc0);
+    vz3 = vec_madd(vz8, vc2, vz3);
+
+    vz4 = vec_perm(vz0, vz1, vcprm(0,s0,1,s1));
+    vz5 = vec_perm(vz0, vz1, vcprm(2,s3,3,s2));
+    vz6 = vec_perm(vz2, vz3, vcprm(1,2,s3,s0));
+    vz7 = vec_perm(vz2, vz3, vcprm(0,3,s2,s1));
+
+    vz0 = vec_add(vz4, vz5);
+    vz1 = vec_sub(vz4, vz5);
+    vz2 = vec_add(vz6, vz7);
+    vz3 = vec_sub(vz6, vz7);
+
+    vz4 = vec_perm(vz0, vz1, vcprm(0,1,s0,s1));
+    vz5 = vec_perm(vz0, vz1, vcprm(2,3,s2,s3));
+    vz6 = vec_perm(vz2, vz3, vcprm(0,2,s1,s3));
+    vz7 = vec_perm(vz2, vz3, vcprm(1,3,s0,s2));
+
+
+    vz2 = vec_sub(vz4, vz6);
+    vz3 = vec_sub(vz5, vz7);
+
+    vz0 = vec_add(vz4, vz6);
+    vz1 = vec_add(vz5, vz7);
+
+    vec_st(vz0, 0, &(out[0]));
+    vec_st(vz1, byte_2complex, &(out[0]));
+    vec_st(vz2, byte_4complex, &(out[0]));
+    vec_st(vz3, byte_6complex, &(out[0]));
+    return;
+}
+
+inline static void fft16_vsx(FFTComplex *z)
+{
+    float* out=  (float*)z;
+    vec_f vc0 = {0.0, 0.0, 0.0, 0.0};
+    vec_f vc1 = {-sqrthalf, sqrthalf, sqrthalf, -sqrthalf};
+    vec_f vc2 = {sqrthalf, sqrthalf, sqrthalf, sqrthalf};
+    vec_f vc3 = {1.0, 0.92387953, sqrthalf, 0.38268343};
+    vec_f vc4 = {0.0, 0.38268343, sqrthalf, 0.92387953};
+    vec_f vc5 = {-0.0, -0.38268343, -sqrthalf, -0.92387953};
+
+    vec_f vz0, vz1, vz2, vz3;
+    vec_f vz4, vz5, vz6, vz7;
+    vec_f vz8, vz9, vz10, vz11;
+    vec_f vz12, vz13;
+
+    vz0 = vec_ld(byte_8complex, &(out[0]));
+    vz1 = vec_ld(byte_10complex, &(out[0]));
+    vz2 = vec_ld(byte_12complex, &(out[0]));
+    vz3 = vec_ld(byte_14complex, &(out[0]));
+
+    vz4 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
+    vz5 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
+    vz6 = vec_perm(vz2, vz3, vcprm(0,1,s2,s1));
+    vz7 = vec_perm(vz2, vz3, vcprm(2,3,s0,s3));
+
+    vz0 = vec_add(vz4, vz5);
+    vz1= vec_sub(vz4, vz5);
+    vz2 = vec_add(vz6, vz7);
+    vz3 = vec_sub(vz6, vz7);
+
+    vz4 = vec_perm(vz0, vz1, vcprm(0,s0,1,s1));
+    vz5 = vec_perm(vz0, vz1, vcprm(2,s3,3,s2));
+    vz6 = vec_perm(vz2, vz3, vcprm(0,s0,1,s1));
+    vz7 = vec_perm(vz2, vz3, vcprm(2,s3,3,s2));
+
+    vz0 = vec_add(vz4, vz5);
+    vz1 = vec_sub(vz4, vz5);
+    vz2 = vec_add(vz6, vz7);
+    vz3 = vec_sub(vz6, vz7);
+
+    vz4 = vec_perm(vz0, vz1, vcprm(0,1,s0,s1));
+    vz5 = vec_perm(vz0, vz1, vcprm(2,3,s2,s3));
+
+    vz6 = vec_perm(vz2, vz3, vcprm(0,1,s0,s1));
+    vz7 = vec_perm(vz2, vz3, vcprm(2,3,s2,s3));
+
+    vz0 = vec_ld(0, &(out[0]));
+    vz1 = vec_ld(byte_2complex, &(out[0]));
+    vz2 = vec_ld(byte_4complex, &(out[0]));
+    vz3 = vec_ld(byte_6complex, &(out[0]));
+    vz10 = vec_perm(vz2, vz3, vcprm(0,s0,1,s1));
+    vz11 = vec_perm(vz2, vz3, vcprm(2,s2,3,s3));
+    vz8 = vec_perm(vz0, vz1, vcprm(0,1,s2,s1));
+    vz9 = vec_perm(vz0, vz1, vcprm(2,3,s0,s3));
+
+    vz2 = vec_add(vz10, vz11);
+    vz3 = vec_sub(vz10, vz11);
+    vz12 = vec_perm(vz3, vz3, vcprm(2,3,0,1));
+    vz0 = vec_add(vz8, vz9);
+    vz1 = vec_sub(vz8, vz9);
+
+    vz3 = vec_madd(vz3, vc1, vc0);
+    vz3 = vec_madd(vz12, vc2, vz3);
+    vz8 = vec_perm(vz0, vz1, vcprm(0,s0,1,s1));
+    vz9 = vec_perm(vz0, vz1, vcprm(2,s3,3,s2));
+    vz10 = vec_perm(vz2, vz3, vcprm(1,2,s3,s0));
+    vz11 = vec_perm(vz2, vz3, vcprm(0,3,s2,s1));
+
+    vz0 = vec_add(vz8, vz9);
+    vz1 = vec_sub(vz8, vz9);
+    vz2 = vec_add(vz10, vz11);
+    vz3 = vec_sub(vz10, vz11);
+
+    vz8 = vec_perm(vz0, vz1, vcprm(0,1,s0,s1));
+    vz9 = vec_perm(vz0, vz1, vcprm(2,3,s2,s3));
+    vz10 = vec_perm(vz2, vz3, vcprm(0,2,s1,s3));
+    vz11 = vec_perm(vz2, vz3, vcprm(1,3,s0,s2));
+
+    vz2 = vec_sub(vz8, vz10);
+    vz3 = vec_sub(vz9, vz11);
+    vz0 = vec_add(vz8, vz10);
+    vz1 = vec_add(vz9, vz11);
+
+    vz8 = vec_madd(vz4, vc3, vc0);
+    vz9 = vec_madd(vz5, vc3, vc0);
+    vz10 = vec_madd(vz6, vc3, vc0);
+    vz11 = vec_madd(vz7, vc3, vc0);
+
+    vz8 = vec_madd(vz5, vc4, vz8);
+    vz9 = vec_madd(vz4, vc5, vz9);
+    vz10 = vec_madd(vz7, vc5, vz10);
+    vz11 = vec_madd(vz6, vc4, vz11);
+
+    vz12 = vec_sub(vz10, vz8);
+    vz10 = vec_add(vz10, vz8);
+
+    vz13 = vec_sub(vz9, vz11);
+    vz11 = vec_add(vz9, vz11);
+
+    vz4 = vec_sub(vz0, vz10);
+    vz0 = vec_add(vz0, vz10);
+
+    vz7= vec_sub(vz3, vz12);
+    vz3= vec_add(vz3, vz12);
+
+    vz5 = vec_sub(vz1, vz11);
+    vz1 = vec_add(vz1, vz11);
+
+    vz6 = vec_sub(vz2, vz13);
+    vz2 = vec_add(vz2, vz13);
+
+    vec_st(vz0, 0, &(out[0]));
+    vec_st(vz1, byte_2complex, &(out[0]));
+    vec_st(vz2, byte_4complex, &(out[0]));
+    vec_st(vz3, byte_6complex, &(out[0]));
+    vec_st(vz4, byte_8complex, &(out[0]));
+    vec_st(vz5, byte_10complex, &(out[0]));
+    vec_st(vz6, byte_12complex, &(out[0]));
+    vec_st(vz7, byte_14complex, &(out[0]));
+    return;
+
+}
+inline static void pass_vsx(FFTComplex * z, const FFTSample * wre, unsigned int n)
+{
+    int o1 = n<<1;
+    int o2 = n<<2;
+    int o3 = o1+o2;
+    int i1, i2, i3;
+    FFTSample* out = (FFTSample*)z;
+    const FFTSample *wim = wre+o1;
+    vec_f v0, v1, v2, v3;
+    vec_f v4, v5, v6, v7;
+    vec_f v8, v9, v10, v11;
+    vec_f v12, v13;
+
+    n = n-2;
+    i1 = o1*sizeof(FFTComplex);
+    i2 = o2*sizeof(FFTComplex);
+    i3 = o3*sizeof(FFTComplex);
+
+    v8 = vec_ld(0, &(wre[0]));
+    v10 = vec_ld(0, &(wim[0]));
+    v9 = vec_ld(0, &(wim[-4]));
+    v9 = vec_perm(v9, v10, vcprm(s0,3,2,1));
+
+    v4 = vec_ld(i2, &(out[0]));
+    v5 = vec_ld(i2+16, &(out[0]));
+    v6 = vec_ld(i3, &(out[0]));
+    v7 = vec_ld(i3+16, &(out[0]));
+    v10 = vec_mul(v4, v8); // r2*wre
+    v11 = vec_mul(v5, v8); // i2*wre
+    v12 = vec_mul(v6, v8); // r3*wre
+    v13 = vec_mul(v7, v8); // i3*wre
+
+    v0 = vec_ld(0, &(out[0])); // r0
+    v3 = vec_ld(i1+16, &(out[0])); // i1
+    v10 = vec_madd(v5, v9, v10); // r2*wim
+    v11 = vec_nmsub(v4, v9, v11); // i2*wim
+    v12 = vec_nmsub(v7, v9, v12); // r3*wim
+    v13 = vec_madd(v6, v9, v13); // i3*wim
+
+    v1 = vec_ld(16, &(out[0])); // i0
+    v2 = vec_ld(i1, &(out[0])); // r1
+    v8 = vec_sub(v12, v10);
+    v12 = vec_add(v12, v10);
+    v9 = vec_sub(v11, v13);
+    v13 = vec_add(v11, v13);
+    v4 = vec_sub(v0, v12);
+    v0 = vec_add(v0, v12);
+    v7 = vec_sub(v3, v8);
+    v3 = vec_add(v3, v8);
+
+    vec_st(v0, 0, &(out[0])); // r0
+    vec_st(v3, i1+16, &(out[0])); // i1
+    vec_st(v4, i2, &(out[0])); // r2
+    vec_st(v7, i3+16, &(out[0]));// i3
+
+    v5 = vec_sub(v1, v13);
+    v1 = vec_add(v1, v13);
+    v6 = vec_sub(v2, v9);
+    v2 = vec_add(v2, v9);
+
+    vec_st(v1, 16, &(out[0])); // i0
+    vec_st(v2, i1, &(out[0])); // r1
+    vec_st(v5, i2+16, &(out[0])); // i2
+    vec_st(v6, i3, &(out[0])); // r3
+
+    do {
+        out += 8;
+        wre += 4;
+        wim -= 4;
+
+        v8 = vec_ld(0, &(wre[0]));
+        v10 = vec_ld(0, &(wim[0]));
+        v9 = vec_ld(0, &(wim[-4]));
+        v9 = vec_perm(v9, v10, vcprm(s0,3,2,1));
+
+        v4 = vec_ld(i2, &(out[0])); // r2
+        v5 = vec_ld(i2+16, &(out[0])); // i2
+        v6 = vec_ld(i3, &(out[0])); // r3
+        v7 = vec_ld(i3+16, &(out[0]));// i3
+        v10 = vec_mul(v4, v8); // r2*wre
+        v11 = vec_mul(v5, v8); // i2*wre
+        v12 = vec_mul(v6, v8); // r3*wre
+        v13 = vec_mul(v7, v8); // i3*wre
+
+        v0 = vec_ld(0, &(out[0])); // r0
+        v3 = vec_ld(i1+16, &(out[0])); // i1
+        v10 = vec_madd(v5, v9, v10); // r2*wim
+        v11 = vec_nmsub(v4, v9, v11); // i2*wim
+        v12 = vec_nmsub(v7, v9, v12); // r3*wim
+        v13 = vec_madd(v6, v9, v13); // i3*wim
+
+        v1 = vec_ld(16, &(out[0])); // i0
+        v2 = vec_ld(i1, &(out[0])); // r1
+        v8 = vec_sub(v12, v10);
+        v12 = vec_add(v12, v10);
+        v9 = vec_sub(v11, v13);
+        v13 = vec_add(v11, v13);
+        v4 = vec_sub(v0, v12);
+        v0 = vec_add(v0, v12);
+        v7 = vec_sub(v3, v8);
+        v3 = vec_add(v3, v8);
+
+        vec_st(v0, 0, &(out[0])); // r0
+        vec_st(v3, i1+16, &(out[0])); // i1
+        vec_st(v4, i2, &(out[0])); // r2
+        vec_st(v7, i3+16, &(out[0])); // i3
+
+        v5 = vec_sub(v1, v13);
+        v1 = vec_add(v1, v13);
+        v6 = vec_sub(v2, v9);
+        v2 = vec_add(v2, v9);
+
+        vec_st(v1, 16, &(out[0])); // i0
+        vec_st(v2, i1, &(out[0])); // r1
+        vec_st(v5, i2+16, &(out[0])); // i2
+        vec_st(v6, i3, &(out[0])); // r3
+    } while (n-=2);
+}
+
+#endif
+
+#endif /* AVCODEC_PPC_FFT_VSX_H */
diff --git a/libavcodec/ppc/fmtconvert_altivec.c b/libavcodec/ppc/fmtconvert_altivec.c
index 14aaf6b..10a7169 100644
--- a/libavcodec/ppc/fmtconvert_altivec.c
+++ b/libavcodec/ppc/fmtconvert_altivec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/h264chroma_init.c b/libavcodec/ppc/h264chroma_init.c
index 6d656d4..876efec 100644
--- a/libavcodec/ppc/h264chroma_init.c
+++ b/libavcodec/ppc/h264chroma_init.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/h264chroma_template.c b/libavcodec/ppc/h264chroma_template.c
index 293fef5..7436e11 100644
--- a/libavcodec/ppc/h264chroma_template.c
+++ b/libavcodec/ppc/h264chroma_template.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c
index 31dc141..7fc7e0b 100644
--- a/libavcodec/ppc/h264dsp.c
+++ b/libavcodec/ppc/h264dsp.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/h264qpel.c b/libavcodec/ppc/h264qpel.c
index f840277..4a01f17 100644
--- a/libavcodec/ppc/h264qpel.c
+++ b/libavcodec/ppc/h264qpel.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/h264qpel_template.c b/libavcodec/ppc/h264qpel_template.c
index fe83146..360e9e3 100644
--- a/libavcodec/ppc/h264qpel_template.c
+++ b/libavcodec/ppc/h264qpel_template.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/hpeldsp_altivec.c b/libavcodec/ppc/hpeldsp_altivec.c
index fd6ae73..7c3b5a1 100644
--- a/libavcodec/ppc/hpeldsp_altivec.c
+++ b/libavcodec/ppc/hpeldsp_altivec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002 Dieter Shirley
  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/hpeldsp_altivec.h b/libavcodec/ppc/hpeldsp_altivec.h
index 98dd80e..590809f 100644
--- a/libavcodec/ppc/hpeldsp_altivec.h
+++ b/libavcodec/ppc/hpeldsp_altivec.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2002 Dieter Shirley
  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/huffyuvdsp_altivec.c b/libavcodec/ppc/huffyuvdsp_altivec.c
index 7c34a67..6701524 100644
--- a/libavcodec/ppc/huffyuvdsp_altivec.c
+++ b/libavcodec/ppc/huffyuvdsp_altivec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002 Dieter Shirley
  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,7 +33,7 @@
 #include "libavcodec/huffyuvdsp.h"
 
 #if HAVE_ALTIVEC
-static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w)
+static void add_bytes_altivec(uint8_t *dst, uint8_t *src, intptr_t w)
 {
     register int i;
     register vector unsigned char vdst, vsrc;
diff --git a/libavcodec/ppc/idctdsp.c b/libavcodec/ppc/idctdsp.c
index 17f7dbb..5ef514b 100644
--- a/libavcodec/ppc/idctdsp.c
+++ b/libavcodec/ppc/idctdsp.c
@@ -1,28 +1,28 @@
 /*
  * Copyright (c) 2001 Michel Lespinasse
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /* NOTE: This code is based on GPL code from the libmpeg2 project.  The
  * author, Michel Lespinasses, has given explicit permission to release
- * under LGPL as part of Libav.
+ * under LGPL as part of FFmpeg.
  *
- * Libav integration by Dieter Shirley
+ * FFmpeg integration by Dieter Shirley
  *
  * This file is a direct copy of the AltiVec IDCT module from the libmpeg2
  * project.  I've deleted all of the libmpeg2-specific code, renamed the
@@ -153,6 +153,22 @@ static const vec_s16 constants[5] = {
     { 19266, 26722, 25172, 22654,  19266,  22654, 25172, 26722 }
 };
 
+static void idct_altivec(int16_t *blk)
+{
+    vec_s16 *block = (vec_s16 *) blk;
+
+    IDCT;
+
+    block[0] = vx0;
+    block[1] = vx1;
+    block[2] = vx2;
+    block[3] = vx3;
+    block[4] = vx4;
+    block[5] = vx5;
+    block[6] = vx6;
+    block[7] = vx7;
+}
+
 static void idct_put_altivec(uint8_t *dest, int stride, int16_t *blk)
 {
     vec_s16 *block = (vec_s16 *) blk;
@@ -234,9 +250,10 @@ av_cold void ff_idctdsp_init_ppc(IDCTDSPContext *c, AVCodecContext *avctx,
     if (!PPC_ALTIVEC(av_get_cpu_flags()))
         return;
 
-    if (!high_bit_depth) {
-        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
+    if (!high_bit_depth && avctx->lowres == 0) {
+        if ((avctx->idct_algo == FF_IDCT_AUTO && !(avctx->flags & CODEC_FLAG_BITEXACT)) ||
             (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
+            c->idct      = idct_altivec;
             c->idct_add  = idct_add_altivec;
             c->idct_put  = idct_put_altivec;
             c->perm_type = FF_IDCT_PERM_TRANSPOSE;
diff --git a/libavcodec/ppc/apedsp_altivec.c b/libavcodec/ppc/lossless_audiodsp_altivec.c
index d8bf4bd..1ebb0f4 100644
--- a/libavcodec/ppc/apedsp_altivec.c
+++ b/libavcodec/ppc/lossless_audiodsp_altivec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,7 +27,7 @@
 #include "libavutil/cpu.h"
 #include "libavutil/ppc/cpu.h"
 #include "libavutil/ppc/types_altivec.h"
-#include "libavcodec/apedsp.h"
+#include "libavcodec/lossless_audiodsp.h"
 
 #if HAVE_ALTIVEC
 static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
@@ -71,7 +71,7 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
 }
 #endif /* HAVE_ALTIVEC */
 
-av_cold void ff_apedsp_init_ppc(APEDSPContext *c)
+av_cold void ff_llauddsp_init_ppc(LLAudDSPContext *c)
 {
 #if HAVE_ALTIVEC
     if (!PPC_ALTIVEC(av_get_cpu_flags()))
diff --git a/libavcodec/ppc/mathops.h b/libavcodec/ppc/mathops.h
index 34ddb11..dbd714f 100644
--- a/libavcodec/ppc/mathops.h
+++ b/libavcodec/ppc/mathops.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2001, 2002 Fabrice Bellard
  * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/me_cmp.c b/libavcodec/ppc/me_cmp.c
index 88c7fea..18f2c6e 100644
--- a/libavcodec/ppc/me_cmp.c
+++ b/libavcodec/ppc/me_cmp.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002 Dieter Shirley
  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/mpegaudiodsp_altivec.c b/libavcodec/ppc/mpegaudiodsp_altivec.c
index c37f8ec..ddfe5dc 100644
--- a/libavcodec/ppc/mpegaudiodsp_altivec.c
+++ b/libavcodec/ppc/mpegaudiodsp_altivec.c
@@ -2,20 +2,20 @@
  * Altivec optimized MP3 decoding functions
  * Copyright (c) 2010 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/mpegvideo_altivec.c b/libavcodec/ppc/mpegvideo_altivec.c
index 98ef143..06eed2f 100644
--- a/libavcodec/ppc/mpegvideo_altivec.c
+++ b/libavcodec/ppc/mpegvideo_altivec.c
@@ -4,20 +4,20 @@
  * dct_unquantize_h263_altivec:
  * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/mpegvideodsp.c b/libavcodec/ppc/mpegvideodsp.c
index 2bdf909..7696954 100644
--- a/libavcodec/ppc/mpegvideodsp.c
+++ b/libavcodec/ppc/mpegvideodsp.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -66,7 +66,7 @@ static void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
                                                    vec_lvsl(0, src));
 
     if (src_really_odd != 0x0000000F)
-        /* If src & 0xF == 0xF, then (src + 1) is properly aligned
+        /* If (src & 0xF) == 0xF, then (src + 1) is properly aligned
          * on the second vector. */
         srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
     else
@@ -88,7 +88,7 @@ static void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
         srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
 
         if (src_really_odd != 0x0000000F)
-            /* If src & 0xF == 0xF, then (src + 1) is properly aligned
+            /* If (src & 0xF) == 0xF, then (src + 1) is properly aligned
              * on the second vector. */
             srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
         else
diff --git a/libavcodec/ppc/mpegvideoencdsp.c b/libavcodec/ppc/mpegvideoencdsp.c
index b5348e6..00ae2a6 100644
--- a/libavcodec/ppc/mpegvideoencdsp.c
+++ b/libavcodec/ppc/mpegvideoencdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c
index 698d655..42c5be8 100644
--- a/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2002 Dieter Shirley
  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/svq1enc_altivec.c b/libavcodec/ppc/svq1enc_altivec.c
index 564f129..4e25e25 100644
--- a/libavcodec/ppc/svq1enc_altivec.c
+++ b/libavcodec/ppc/svq1enc_altivec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2007 Luca Barbato <lu_zero@gentoo.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index 90c3d27..2128b56 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -2,20 +2,20 @@
  * VC-1 and WMV3 decoder - DSP functions AltiVec-optimized
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/videodsp_ppc.c b/libavcodec/ppc/videodsp_ppc.c
index b9e003b..9157022 100644
--- a/libavcodec/ppc/videodsp_ppc.c
+++ b/libavcodec/ppc/videodsp_ppc.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2003-2004 Romain Dolbeau
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/vorbisdsp_altivec.c b/libavcodec/ppc/vorbisdsp_altivec.c
index 43f4d03..d7557c8 100644
--- a/libavcodec/ppc/vorbisdsp_altivec.c
+++ b/libavcodec/ppc/vorbisdsp_altivec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/vp3dsp_altivec.c b/libavcodec/ppc/vp3dsp_altivec.c
index bce49e3..9d81b3f 100644
--- a/libavcodec/ppc/vp3dsp_altivec.c
+++ b/libavcodec/ppc/vp3dsp_altivec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2009 David Conrad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ppc/vp8dsp_altivec.c b/libavcodec/ppc/vp8dsp_altivec.c
index e010dee..91ff8cc 100644
--- a/libavcodec/ppc/vp8dsp_altivec.c
+++ b/libavcodec/ppc/vp8dsp_altivec.c
@@ -3,20 +3,20 @@
  *
  * Copyright (C) 2010 David Conrad
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/proresdata.c b/libavcodec/proresdata.c
index fcaf32a..9849b5c 100644
--- a/libavcodec/proresdata.c
+++ b/libavcodec/proresdata.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/proresdata.h b/libavcodec/proresdata.h
index 1e5d05e..ee8278d 100644
--- a/libavcodec/proresdata.h
+++ b/libavcodec/proresdata.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/proresdec.h b/libavcodec/proresdec.h
new file mode 100644
index 0000000..14ede5d
--- /dev/null
+++ b/libavcodec/proresdec.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2010-2011 Maxim Poliakovski
+ * Copyright (c) 2010-2011 Elvis Presley
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_PRORESDEC_H
+#define AVCODEC_PRORESDEC_H
+
+#include "blockdsp.h"
+#include "proresdsp.h"
+
+typedef struct {
+    const uint8_t *data;
+    unsigned mb_x;
+    unsigned mb_y;
+    unsigned mb_count;
+    unsigned data_size;
+    int ret;
+} SliceContext;
+
+typedef struct {
+    BlockDSPContext bdsp;
+    ProresDSPContext prodsp;
+    AVFrame *frame;
+    int frame_type;              ///< 0 = progressive, 1 = tff, 2 = bff
+    uint8_t qmat_luma[64];
+    uint8_t qmat_chroma[64];
+    SliceContext *slices;
+    int slice_count;             ///< number of slices in the current picture
+    unsigned mb_width;           ///< width of the current picture in mb
+    unsigned mb_height;          ///< height of the current picture in mb
+    uint8_t progressive_scan[64];
+    uint8_t interlaced_scan[64];
+    const uint8_t *scan;
+    int first_field;
+    int alpha_info;
+} ProresContext;
+
+#endif /* AVCODEC_PRORESDEC_H */
diff --git a/libavcodec/proresdec2.c b/libavcodec/proresdec2.c
new file mode 100644
index 0000000..4d04a0a
--- /dev/null
+++ b/libavcodec/proresdec2.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (c) 2010-2011 Maxim Poliakovski
+ * Copyright (c) 2010-2011 Elvis Presley
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Known FOURCCs: 'apch' (HQ), 'apcn' (SD), 'apcs' (LT), 'acpo' (Proxy), 'ap4h' (4444)
+ */
+
+//#define DEBUG
+
+#define LONG_BITSTREAM_READER
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "idctdsp.h"
+#include "internal.h"
+#include "simple_idct.h"
+#include "proresdec.h"
+#include "proresdata.h"
+
+static void permute(uint8_t *dst, const uint8_t *src, const uint8_t permutation[64])
+{
+    int i;
+    for (i = 0; i < 64; i++)
+        dst[i] = permutation[src[i]];
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    ProresContext *ctx = avctx->priv_data;
+    uint8_t idct_permutation[64];
+
+    avctx->bits_per_raw_sample = 10;
+
+    ff_blockdsp_init(&ctx->bdsp, avctx);
+    ff_proresdsp_init(&ctx->prodsp, avctx);
+
+    ff_init_scantable_permutation(idct_permutation,
+                                  ctx->prodsp.idct_permutation_type);
+
+    permute(ctx->progressive_scan, ff_prores_progressive_scan, idct_permutation);
+    permute(ctx->interlaced_scan, ff_prores_interlaced_scan, idct_permutation);
+
+    return 0;
+}
+
+static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
+                               const int data_size, AVCodecContext *avctx)
+{
+    int hdr_size, width, height, flags;
+    int version;
+    const uint8_t *ptr;
+
+    hdr_size = AV_RB16(buf);
+    av_dlog(avctx, "header size %d\n", hdr_size);
+    if (hdr_size > data_size) {
+        av_log(avctx, AV_LOG_ERROR, "error, wrong header size\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    version = AV_RB16(buf + 2);
+    av_dlog(avctx, "%.4s version %d\n", buf+4, version);
+    if (version > 1) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported version: %d\n", version);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    width  = AV_RB16(buf + 8);
+    height = AV_RB16(buf + 10);
+    if (width != avctx->width || height != avctx->height) {
+        av_log(avctx, AV_LOG_ERROR, "picture resolution change: %dx%d -> %dx%d\n",
+               avctx->width, avctx->height, width, height);
+        return AVERROR_PATCHWELCOME;
+    }
+
+    ctx->frame_type = (buf[12] >> 2) & 3;
+    ctx->alpha_info = buf[17] & 0xf;
+
+    if (ctx->alpha_info > 2) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid alpha mode %d\n", ctx->alpha_info);
+        return AVERROR_INVALIDDATA;
+    }
+    if (avctx->skip_alpha) ctx->alpha_info = 0;
+
+    av_dlog(avctx, "frame type %d\n", ctx->frame_type);
+
+    if (ctx->frame_type == 0) {
+        ctx->scan = ctx->progressive_scan; // permuted
+    } else {
+        ctx->scan = ctx->interlaced_scan; // permuted
+        ctx->frame->interlaced_frame = 1;
+        ctx->frame->top_field_first = ctx->frame_type == 1;
+    }
+
+    if (ctx->alpha_info) {
+        avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUVA444P10 : AV_PIX_FMT_YUVA422P10;
+    } else {
+        avctx->pix_fmt = (buf[12] & 0xC0) == 0xC0 ? AV_PIX_FMT_YUV444P10 : AV_PIX_FMT_YUV422P10;
+    }
+
+    ptr   = buf + 20;
+    flags = buf[19];
+    av_dlog(avctx, "flags %x\n", flags);
+
+    if (flags & 2) {
+        if(buf + data_size - ptr < 64) {
+            av_log(avctx, AV_LOG_ERROR, "Header truncated\n");
+            return AVERROR_INVALIDDATA;
+        }
+        permute(ctx->qmat_luma, ctx->prodsp.idct_permutation, ptr);
+        ptr += 64;
+    } else {
+        memset(ctx->qmat_luma, 4, 64);
+    }
+
+    if (flags & 1) {
+        if(buf + data_size - ptr < 64) {
+            av_log(avctx, AV_LOG_ERROR, "Header truncated\n");
+            return AVERROR_INVALIDDATA;
+        }
+        permute(ctx->qmat_chroma, ctx->prodsp.idct_permutation, ptr);
+    } else {
+        memset(ctx->qmat_chroma, 4, 64);
+    }
+
+    return hdr_size;
+}
+
+static int decode_picture_header(AVCodecContext *avctx, const uint8_t *buf, const int buf_size)
+{
+    ProresContext *ctx = avctx->priv_data;
+    int i, hdr_size, slice_count;
+    unsigned pic_data_size;
+    int log2_slice_mb_width, log2_slice_mb_height;
+    int slice_mb_count, mb_x, mb_y;
+    const uint8_t *data_ptr, *index_ptr;
+
+    hdr_size = buf[0] >> 3;
+    if (hdr_size < 8 || hdr_size > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "error, wrong picture header size\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    pic_data_size = AV_RB32(buf + 1);
+    if (pic_data_size > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "error, wrong picture data size\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    log2_slice_mb_width  = buf[7] >> 4;
+    log2_slice_mb_height = buf[7] & 0xF;
+    if (log2_slice_mb_width > 3 || log2_slice_mb_height) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported slice resolution: %dx%d\n",
+               1 << log2_slice_mb_width, 1 << log2_slice_mb_height);
+        return AVERROR_INVALIDDATA;
+    }
+
+    ctx->mb_width  = (avctx->width  + 15) >> 4;
+    if (ctx->frame_type)
+        ctx->mb_height = (avctx->height + 31) >> 5;
+    else
+        ctx->mb_height = (avctx->height + 15) >> 4;
+
+    slice_count = AV_RB16(buf + 5);
+
+    if (ctx->slice_count != slice_count || !ctx->slices) {
+        av_freep(&ctx->slices);
+        ctx->slices = av_mallocz_array(slice_count, sizeof(*ctx->slices));
+        if (!ctx->slices)
+            return AVERROR(ENOMEM);
+        ctx->slice_count = slice_count;
+    }
+
+    if (!slice_count)
+        return AVERROR(EINVAL);
+
+    if (hdr_size + slice_count*2 > buf_size) {
+        av_log(avctx, AV_LOG_ERROR, "error, wrong slice count\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // parse slice information
+    index_ptr = buf + hdr_size;
+    data_ptr  = index_ptr + slice_count*2;
+
+    slice_mb_count = 1 << log2_slice_mb_width;
+    mb_x = 0;
+    mb_y = 0;
+
+    for (i = 0; i < slice_count; i++) {
+        SliceContext *slice = &ctx->slices[i];
+
+        slice->data = data_ptr;
+        data_ptr += AV_RB16(index_ptr + i*2);
+
+        while (ctx->mb_width - mb_x < slice_mb_count)
+            slice_mb_count >>= 1;
+
+        slice->mb_x = mb_x;
+        slice->mb_y = mb_y;
+        slice->mb_count = slice_mb_count;
+        slice->data_size = data_ptr - slice->data;
+
+        if (slice->data_size < 6) {
+            av_log(avctx, AV_LOG_ERROR, "error, wrong slice data size\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        mb_x += slice_mb_count;
+        if (mb_x == ctx->mb_width) {
+            slice_mb_count = 1 << log2_slice_mb_width;
+            mb_x = 0;
+            mb_y++;
+        }
+        if (data_ptr > buf + buf_size) {
+            av_log(avctx, AV_LOG_ERROR, "error, slice out of bounds\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    if (mb_x || mb_y != ctx->mb_height) {
+        av_log(avctx, AV_LOG_ERROR, "error wrong mb count y %d h %d\n",
+               mb_y, ctx->mb_height);
+        return AVERROR_INVALIDDATA;
+    }
+
+    return pic_data_size;
+}
+
+#define DECODE_CODEWORD(val, codebook)                                  \
+    do {                                                                \
+        unsigned int rice_order, exp_order, switch_bits;                \
+        unsigned int q, buf, bits;                                      \
+                                                                        \
+        UPDATE_CACHE(re, gb);                                           \
+        buf = GET_CACHE(re, gb);                                        \
+                                                                        \
+        /* number of bits to switch between rice and exp golomb */      \
+        switch_bits =  codebook & 3;                                    \
+        rice_order  =  codebook >> 5;                                   \
+        exp_order   = (codebook >> 2) & 7;                              \
+                                                                        \
+        q = 31 - av_log2(buf);                                          \
+                                                                        \
+        if (q > switch_bits) { /* exp golomb */                         \
+            bits = exp_order - switch_bits + (q<<1);                    \
+            val = SHOW_UBITS(re, gb, bits) - (1 << exp_order) +         \
+                ((switch_bits + 1) << rice_order);                      \
+            SKIP_BITS(re, gb, bits);                                    \
+        } else if (rice_order) {                                        \
+            SKIP_BITS(re, gb, q+1);                                     \
+            val = (q << rice_order) + SHOW_UBITS(re, gb, rice_order);   \
+            SKIP_BITS(re, gb, rice_order);                              \
+        } else {                                                        \
+            val = q;                                                    \
+            SKIP_BITS(re, gb, q+1);                                     \
+        }                                                               \
+    } while (0)
+
+#define TOSIGNED(x) (((x) >> 1) ^ (-((x) & 1)))
+
+#define FIRST_DC_CB 0xB8
+
+static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
+
+static av_always_inline void decode_dc_coeffs(GetBitContext *gb, int16_t *out,
+                                              int blocks_per_slice)
+{
+    int16_t prev_dc;
+    int code, i, sign;
+
+    OPEN_READER(re, gb);
+
+    DECODE_CODEWORD(code, FIRST_DC_CB);
+    prev_dc = TOSIGNED(code);
+    out[0] = prev_dc;
+
+    out += 64; // dc coeff for the next block
+
+    code = 5;
+    sign = 0;
+    for (i = 1; i < blocks_per_slice; i++, out += 64) {
+        DECODE_CODEWORD(code, dc_codebook[FFMIN(code, 6U)]);
+        if(code) sign ^= -(code & 1);
+        else     sign  = 0;
+        prev_dc += (((code + 1) >> 1) ^ sign) - sign;
+        out[0] = prev_dc;
+    }
+    CLOSE_READER(re, gb);
+}
+
+// adaptive codebook switching lut according to previous run/level values
+static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29, 0x29, 0x29, 0x29, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x4C };
+static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28, 0x28, 0x28, 0x28, 0x4C };
+
+static av_always_inline int decode_ac_coeffs(AVCodecContext *avctx, GetBitContext *gb,
+                                             int16_t *out, int blocks_per_slice)
+{
+    ProresContext *ctx = avctx->priv_data;
+    int block_mask, sign;
+    unsigned pos, run, level;
+    int max_coeffs, i, bits_left;
+    int log2_block_count = av_log2(blocks_per_slice);
+
+    OPEN_READER(re, gb);
+    UPDATE_CACHE(re, gb);                                           \
+    run   = 4;
+    level = 2;
+
+    max_coeffs = 64 << log2_block_count;
+    block_mask = blocks_per_slice - 1;
+
+    for (pos = block_mask;;) {
+        bits_left = gb->size_in_bits - re_index;
+        if (!bits_left || (bits_left < 32 && !SHOW_UBITS(re, gb, bits_left)))
+            break;
+
+        DECODE_CODEWORD(run, run_to_cb[FFMIN(run,  15)]);
+        pos += run + 1;
+        if (pos >= max_coeffs) {
+            av_log(avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", pos, max_coeffs);
+            return AVERROR_INVALIDDATA;
+        }
+
+        DECODE_CODEWORD(level, lev_to_cb[FFMIN(level, 9)]);
+        level += 1;
+
+        i = pos >> log2_block_count;
+
+        sign = SHOW_SBITS(re, gb, 1);
+        SKIP_BITS(re, gb, 1);
+        out[((pos & block_mask) << 6) + ctx->scan[i]] = ((level ^ sign) - sign);
+    }
+
+    CLOSE_READER(re, gb);
+    return 0;
+}
+
+static int decode_slice_luma(AVCodecContext *avctx, SliceContext *slice,
+                             uint16_t *dst, int dst_stride,
+                             const uint8_t *buf, unsigned buf_size,
+                             const int16_t *qmat)
+{
+    ProresContext *ctx = avctx->priv_data;
+    LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
+    int16_t *block;
+    GetBitContext gb;
+    int i, blocks_per_slice = slice->mb_count<<2;
+    int ret;
+
+    for (i = 0; i < blocks_per_slice; i++)
+        ctx->bdsp.clear_block(blocks+(i<<6));
+
+    init_get_bits(&gb, buf, buf_size << 3);
+
+    decode_dc_coeffs(&gb, blocks, blocks_per_slice);
+    if ((ret = decode_ac_coeffs(avctx, &gb, blocks, blocks_per_slice)) < 0)
+        return ret;
+
+    block = blocks;
+    for (i = 0; i < slice->mb_count; i++) {
+        ctx->prodsp.idct_put(dst, dst_stride, block+(0<<6), qmat);
+        ctx->prodsp.idct_put(dst             +8, dst_stride, block+(1<<6), qmat);
+        ctx->prodsp.idct_put(dst+4*dst_stride  , dst_stride, block+(2<<6), qmat);
+        ctx->prodsp.idct_put(dst+4*dst_stride+8, dst_stride, block+(3<<6), qmat);
+        block += 4*64;
+        dst += 16;
+    }
+    return 0;
+}
+
+static int decode_slice_chroma(AVCodecContext *avctx, SliceContext *slice,
+                               uint16_t *dst, int dst_stride,
+                               const uint8_t *buf, unsigned buf_size,
+                               const int16_t *qmat, int log2_blocks_per_mb)
+{
+    ProresContext *ctx = avctx->priv_data;
+    LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
+    int16_t *block;
+    GetBitContext gb;
+    int i, j, blocks_per_slice = slice->mb_count << log2_blocks_per_mb;
+    int ret;
+
+    for (i = 0; i < blocks_per_slice; i++)
+        ctx->bdsp.clear_block(blocks+(i<<6));
+
+    init_get_bits(&gb, buf, buf_size << 3);
+
+    decode_dc_coeffs(&gb, blocks, blocks_per_slice);
+    if ((ret = decode_ac_coeffs(avctx, &gb, blocks, blocks_per_slice)) < 0)
+        return ret;
+
+    block = blocks;
+    for (i = 0; i < slice->mb_count; i++) {
+        for (j = 0; j < log2_blocks_per_mb; j++) {
+            ctx->prodsp.idct_put(dst,              dst_stride, block+(0<<6), qmat);
+            ctx->prodsp.idct_put(dst+4*dst_stride, dst_stride, block+(1<<6), qmat);
+            block += 2*64;
+            dst += 8;
+        }
+    }
+    return 0;
+}
+
+static void unpack_alpha(GetBitContext *gb, uint16_t *dst, int num_coeffs,
+                         const int num_bits)
+{
+    const int mask = (1 << num_bits) - 1;
+    int i, idx, val, alpha_val;
+
+    idx       = 0;
+    alpha_val = mask;
+    do {
+        do {
+            if (get_bits1(gb)) {
+                val = get_bits(gb, num_bits);
+            } else {
+                int sign;
+                val  = get_bits(gb, num_bits == 16 ? 7 : 4);
+                sign = val & 1;
+                val  = (val + 2) >> 1;
+                if (sign)
+                    val = -val;
+            }
+            alpha_val = (alpha_val + val) & mask;
+            if (num_bits == 16) {
+                dst[idx++] = alpha_val >> 6;
+            } else {
+                dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
+            }
+            if (idx >= num_coeffs)
+                break;
+        } while (get_bits_left(gb)>0 && get_bits1(gb));
+        val = get_bits(gb, 4);
+        if (!val)
+            val = get_bits(gb, 11);
+        if (idx + val > num_coeffs)
+            val = num_coeffs - idx;
+        if (num_bits == 16) {
+            for (i = 0; i < val; i++)
+                dst[idx++] = alpha_val >> 6;
+        } else {
+            for (i = 0; i < val; i++)
+                dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
+
+        }
+    } while (idx < num_coeffs);
+}
+
+/**
+ * Decode alpha slice plane.
+ */
+static void decode_slice_alpha(ProresContext *ctx,
+                               uint16_t *dst, int dst_stride,
+                               const uint8_t *buf, int buf_size,
+                               int blocks_per_slice)
+{
+    GetBitContext gb;
+    int i;
+    LOCAL_ALIGNED_16(int16_t, blocks, [8*4*64]);
+    int16_t *block;
+
+    for (i = 0; i < blocks_per_slice<<2; i++)
+        ctx->bdsp.clear_block(blocks+(i<<6));
+
+    init_get_bits(&gb, buf, buf_size << 3);
+
+    if (ctx->alpha_info == 2) {
+        unpack_alpha(&gb, blocks, blocks_per_slice * 4 * 64, 16);
+    } else {
+        unpack_alpha(&gb, blocks, blocks_per_slice * 4 * 64, 8);
+    }
+
+    block = blocks;
+    for (i = 0; i < 16; i++) {
+        memcpy(dst, block, 16 * blocks_per_slice * sizeof(*dst));
+        dst   += dst_stride >> 1;
+        block += 16 * blocks_per_slice;
+    }
+}
+
+static int decode_slice_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr)
+{
+    ProresContext *ctx = avctx->priv_data;
+    SliceContext *slice = &ctx->slices[jobnr];
+    const uint8_t *buf = slice->data;
+    AVFrame *pic = ctx->frame;
+    int i, hdr_size, qscale, log2_chroma_blocks_per_mb;
+    int luma_stride, chroma_stride;
+    int y_data_size, u_data_size, v_data_size, a_data_size;
+    uint8_t *dest_y, *dest_u, *dest_v, *dest_a;
+    int16_t qmat_luma_scaled[64];
+    int16_t qmat_chroma_scaled[64];
+    int mb_x_shift;
+    int ret;
+
+    slice->ret = -1;
+    //av_log(avctx, AV_LOG_INFO, "slice %d mb width %d mb x %d y %d\n",
+    //       jobnr, slice->mb_count, slice->mb_x, slice->mb_y);
+
+    // slice header
+    hdr_size = buf[0] >> 3;
+    qscale = av_clip(buf[1], 1, 224);
+    qscale = qscale > 128 ? qscale - 96 << 2: qscale;
+    y_data_size = AV_RB16(buf + 2);
+    u_data_size = AV_RB16(buf + 4);
+    v_data_size = slice->data_size - y_data_size - u_data_size - hdr_size;
+    if (hdr_size > 7) v_data_size = AV_RB16(buf + 6);
+    a_data_size = slice->data_size - y_data_size - u_data_size -
+                  v_data_size - hdr_size;
+
+    if (y_data_size < 0 || u_data_size < 0 || v_data_size < 0
+        || hdr_size+y_data_size+u_data_size+v_data_size > slice->data_size){
+        av_log(avctx, AV_LOG_ERROR, "invalid plane data size\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    buf += hdr_size;
+
+    for (i = 0; i < 64; i++) {
+        qmat_luma_scaled  [i] = ctx->qmat_luma  [i] * qscale;
+        qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * qscale;
+    }
+
+    if (ctx->frame_type == 0) {
+        luma_stride   = pic->linesize[0];
+        chroma_stride = pic->linesize[1];
+    } else {
+        luma_stride   = pic->linesize[0] << 1;
+        chroma_stride = pic->linesize[1] << 1;
+    }
+
+    if (avctx->pix_fmt == AV_PIX_FMT_YUV444P10 || avctx->pix_fmt == AV_PIX_FMT_YUVA444P10) {
+        mb_x_shift = 5;
+        log2_chroma_blocks_per_mb = 2;
+    } else {
+        mb_x_shift = 4;
+        log2_chroma_blocks_per_mb = 1;
+    }
+
+    dest_y = pic->data[0] + (slice->mb_y << 4) * luma_stride + (slice->mb_x << 5);
+    dest_u = pic->data[1] + (slice->mb_y << 4) * chroma_stride + (slice->mb_x << mb_x_shift);
+    dest_v = pic->data[2] + (slice->mb_y << 4) * chroma_stride + (slice->mb_x << mb_x_shift);
+    dest_a = pic->data[3] + (slice->mb_y << 4) * luma_stride + (slice->mb_x << 5);
+
+    if (ctx->frame_type && ctx->first_field ^ ctx->frame->top_field_first) {
+        dest_y += pic->linesize[0];
+        dest_u += pic->linesize[1];
+        dest_v += pic->linesize[2];
+        dest_a += pic->linesize[3];
+    }
+
+    ret = decode_slice_luma(avctx, slice, (uint16_t*)dest_y, luma_stride,
+                            buf, y_data_size, qmat_luma_scaled);
+    if (ret < 0)
+        return ret;
+
+    if (!(avctx->flags & CODEC_FLAG_GRAY)) {
+        ret = decode_slice_chroma(avctx, slice, (uint16_t*)dest_u, chroma_stride,
+                                  buf + y_data_size, u_data_size,
+                                  qmat_chroma_scaled, log2_chroma_blocks_per_mb);
+        if (ret < 0)
+            return ret;
+
+        ret = decode_slice_chroma(avctx, slice, (uint16_t*)dest_v, chroma_stride,
+                                  buf + y_data_size + u_data_size, v_data_size,
+                                  qmat_chroma_scaled, log2_chroma_blocks_per_mb);
+        if (ret < 0)
+            return ret;
+    }
+    /* decode alpha plane if available */
+    if (ctx->alpha_info && pic->data[3] && a_data_size)
+        decode_slice_alpha(ctx, (uint16_t*)dest_a, luma_stride,
+                           buf + y_data_size + u_data_size + v_data_size,
+                           a_data_size, slice->mb_count);
+
+    slice->ret = 0;
+    return 0;
+}
+
+static int decode_picture(AVCodecContext *avctx)
+{
+    ProresContext *ctx = avctx->priv_data;
+    int i;
+
+    avctx->execute2(avctx, decode_slice_thread, NULL, NULL, ctx->slice_count);
+
+    for (i = 0; i < ctx->slice_count; i++)
+        if (ctx->slices[i].ret < 0)
+            return ctx->slices[i].ret;
+
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                        AVPacket *avpkt)
+{
+    ProresContext *ctx = avctx->priv_data;
+    AVFrame *frame = data;
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    int frame_hdr_size, pic_size, ret;
+
+    if (buf_size < 28 || AV_RL32(buf + 4) != AV_RL32("icpf")) {
+        av_log(avctx, AV_LOG_ERROR, "invalid frame header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ctx->frame = frame;
+    ctx->frame->pict_type = AV_PICTURE_TYPE_I;
+    ctx->frame->key_frame = 1;
+    ctx->first_field = 1;
+
+    buf += 8;
+    buf_size -= 8;
+
+    frame_hdr_size = decode_frame_header(ctx, buf, buf_size, avctx);
+    if (frame_hdr_size < 0)
+        return frame_hdr_size;
+
+    buf += frame_hdr_size;
+    buf_size -= frame_hdr_size;
+
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+ decode_picture:
+    pic_size = decode_picture_header(avctx, buf, buf_size);
+    if (pic_size < 0) {
+        av_log(avctx, AV_LOG_ERROR, "error decoding picture header\n");
+        return pic_size;
+    }
+
+    if ((ret = decode_picture(avctx)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "error decoding picture\n");
+        return ret;
+    }
+
+    buf += pic_size;
+    buf_size -= pic_size;
+
+    if (ctx->frame_type && buf_size > 0 && ctx->first_field) {
+        ctx->first_field = 0;
+        goto decode_picture;
+    }
+
+    *got_frame      = 1;
+
+    return avpkt->size;
+}
+
+static av_cold int decode_close(AVCodecContext *avctx)
+{
+    ProresContext *ctx = avctx->priv_data;
+
+    av_freep(&ctx->slices);
+
+    return 0;
+}
+
+AVCodec ff_prores_decoder = {
+    .name           = "prores",
+    .long_name      = NULL_IF_CONFIG_SMALL("ProRes"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_PRORES,
+    .priv_data_size = sizeof(ProresContext),
+    .init           = decode_init,
+    .close          = decode_close,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
+};
diff --git a/libavcodec/proresdec.c b/libavcodec/proresdec_lgpl.c
index 03f63d9..f2e24ec 100644
--- a/libavcodec/proresdec.c
+++ b/libavcodec/proresdec_lgpl.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -86,7 +86,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
     ctx->slice_data       = NULL;
 
     avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
-    ff_proresdsp_init(&ctx->dsp);
+    ff_proresdsp_init(&ctx->dsp, avctx);
 
     ctx->scantable_type = -1;   // set scantable type to uninitialized
     memset(ctx->qmat_luma, 4, 64);
@@ -140,6 +140,7 @@ static int decode_frame_header(ProresContext *ctx, const uint8_t *buf,
         av_log(avctx, AV_LOG_ERROR, "Invalid alpha mode %d\n", ctx->alpha_info);
         return AVERROR_INVALIDDATA;
     }
+    if (avctx->skip_alpha) ctx->alpha_info = 0;
 
     switch (ctx->chroma_factor) {
     case 2:
@@ -505,8 +506,9 @@ static void unpack_alpha(GetBitContext *gb, uint16_t *dst, int num_coeffs,
                 dst[idx++] = alpha_val >> 6;
             else
                 dst[idx++] = (alpha_val << 2) | (alpha_val >> 6);
-            if (idx >= num_coeffs - 1)
+            if (idx >= num_coeffs) {
                 break;
+            }
         } while (get_bits1(gb));
         val = get_bits(gb, 4);
         if (!val)
@@ -618,7 +620,7 @@ static int decode_slice(AVCodecContext *avctx, void *tdata)
     coff[2]     = coff[1] + u_data_size;
     v_data_size = hdr_size > 7 ? AV_RB16(buf + 6) : slice_data_size - coff[2];
     coff[3]     = coff[2] + v_data_size;
-    a_data_size = slice_data_size - coff[3];
+    a_data_size = ctx->alpha_info ? slice_data_size - coff[3] : 0;
 
     /* if V or alpha component size is negative that means that previous
        component sizes are too large */
@@ -768,8 +770,8 @@ static av_cold int decode_close(AVCodecContext *avctx)
 }
 
 
-AVCodec ff_prores_decoder = {
-    .name           = "prores",
+AVCodec ff_prores_lgpl_decoder = {
+    .name           = "prores_lgpl",
     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PRORES,
diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index 3af2f0b..82d6009 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,7 +31,7 @@
 #define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
 #define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
 
-#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
+#define CLIP(x) (av_clip((x), CLIP_MIN, CLIP_MAX))
 
 /**
  * Add bias value, clamp and output pixels of a slice
@@ -44,7 +44,7 @@ static void put_pixels(uint16_t *dst, int stride, const int16_t *in)
         for (x = 0; x < 8; x++) {
             src_offset = (y << 3) + x;
 
-            dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]);
+            dst[dst_offset + x] = CLIP(in[src_offset]);
         }
     }
 }
@@ -55,13 +55,13 @@ static void prores_idct_put_c(uint16_t *out, int linesize, int16_t *block, const
     put_pixels(out, linesize >> 1, block);
 }
 
-av_cold void ff_proresdsp_init(ProresDSPContext *dsp)
+av_cold void ff_proresdsp_init(ProresDSPContext *dsp, AVCodecContext *avctx)
 {
     dsp->idct_put = prores_idct_put_c;
     dsp->idct_permutation_type = FF_IDCT_PERM_NONE;
 
     if (ARCH_X86)
-        ff_proresdsp_init_x86(dsp);
+        ff_proresdsp_init_x86(dsp, avctx);
 
     ff_init_scantable_permutation(dsp->idct_permutation,
                                   dsp->idct_permutation_type);
diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h
index e8a3ea9..159862e 100644
--- a/libavcodec/proresdsp.h
+++ b/libavcodec/proresdsp.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,6 +24,7 @@
 #define AVCODEC_PRORESDSP_H
 
 #include <stdint.h>
+#include "avcodec.h"
 
 #define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder
 
@@ -33,8 +34,8 @@ typedef struct ProresDSPContext {
     void (* idct_put) (uint16_t *out, int linesize, int16_t *block, const int16_t *qmat);
 } ProresDSPContext;
 
-void ff_proresdsp_init(ProresDSPContext *dsp);
+void ff_proresdsp_init(ProresDSPContext *dsp, AVCodecContext *avctx);
 
-void ff_proresdsp_init_x86(ProresDSPContext *dsp);
+void ff_proresdsp_init_x86(ProresDSPContext *dsp, AVCodecContext *avctx);
 
 #endif /* AVCODEC_PRORESDSP_H */
diff --git a/libavcodec/proresenc_anatoliy.c b/libavcodec/proresenc_anatoliy.c
new file mode 100644
index 0000000..f471f49
--- /dev/null
+++ b/libavcodec/proresenc_anatoliy.c
@@ -0,0 +1,636 @@
+/*
+ * Apple ProRes encoder
+ *
+ * Copyright (c) 2011 Anatoliy Wasserman
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Apple ProRes encoder (Anatoliy Wasserman version)
+ * Known FOURCCs: 'apch' (HQ), 'apcn' (SD), 'apcs' (LT), 'acpo' (Proxy)
+ */
+
+#include "avcodec.h"
+#include "dct.h"
+#include "internal.h"
+#include "put_bits.h"
+#include "bytestream.h"
+#include "fdctdsp.h"
+
+#define DEFAULT_SLICE_MB_WIDTH 8
+
+#define FF_PROFILE_PRORES_PROXY     0
+#define FF_PROFILE_PRORES_LT        1
+#define FF_PROFILE_PRORES_STANDARD  2
+#define FF_PROFILE_PRORES_HQ        3
+
+static const AVProfile profiles[] = {
+    { FF_PROFILE_PRORES_PROXY,    "apco"},
+    { FF_PROFILE_PRORES_LT,       "apcs"},
+    { FF_PROFILE_PRORES_STANDARD, "apcn"},
+    { FF_PROFILE_PRORES_HQ,       "apch"},
+    { FF_PROFILE_UNKNOWN }
+};
+
+static const int qp_start_table[4] = { 4, 1, 1, 1 };
+static const int qp_end_table[4]   = { 8, 9, 6, 6 };
+static const int bitrate_table[5]  = { 1000, 2100, 3500, 5400 };
+
+static const uint8_t progressive_scan[64] = {
+     0,  1,  8,  9,  2,  3, 10, 11,
+    16, 17, 24, 25, 18, 19, 26, 27,
+     4,  5, 12, 20, 13,  6,  7, 14,
+    21, 28, 29, 22, 15, 23, 30, 31,
+    32, 33, 40, 48, 41, 34, 35, 42,
+    49, 56, 57, 50, 43, 36, 37, 44,
+    51, 58, 59, 52, 45, 38, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static const uint8_t QMAT_LUMA[4][64] = {
+    {
+         4,  7,  9, 11, 13, 14, 15, 63,
+         7,  7, 11, 12, 14, 15, 63, 63,
+         9, 11, 13, 14, 15, 63, 63, 63,
+        11, 11, 13, 14, 63, 63, 63, 63,
+        11, 13, 14, 63, 63, 63, 63, 63,
+        13, 14, 63, 63, 63, 63, 63, 63,
+        13, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63
+    }, {
+         4,  5,  6,  7,  9, 11, 13, 15,
+         5,  5,  7,  8, 11, 13, 15, 17,
+         6,  7,  9, 11, 13, 15, 15, 17,
+         7,  7,  9, 11, 13, 15, 17, 19,
+         7,  9, 11, 13, 14, 16, 19, 23,
+         9, 11, 13, 14, 16, 19, 23, 29,
+         9, 11, 13, 15, 17, 21, 28, 35,
+        11, 13, 16, 17, 21, 28, 35, 41
+    }, {
+         4,  4,  5,  5,  6,  7,  7,  9,
+         4,  4,  5,  6,  7,  7,  9,  9,
+         5,  5,  6,  7,  7,  9,  9, 10,
+         5,  5,  6,  7,  7,  9,  9, 10,
+         5,  6,  7,  7,  8,  9, 10, 12,
+         6,  7,  7,  8,  9, 10, 12, 15,
+         6,  7,  7,  9, 10, 11, 14, 17,
+         7,  7,  9, 10, 11, 14, 17, 21
+    }, {
+         4,  4,  4,  4,  4,  4,  4,  4,
+         4,  4,  4,  4,  4,  4,  4,  4,
+         4,  4,  4,  4,  4,  4,  4,  4,
+         4,  4,  4,  4,  4,  4,  4,  5,
+         4,  4,  4,  4,  4,  4,  5,  5,
+         4,  4,  4,  4,  4,  5,  5,  6,
+         4,  4,  4,  4,  5,  5,  6,  7,
+         4,  4,  4,  4,  5,  6,  7,  7
+    }
+};
+
+static const uint8_t QMAT_CHROMA[4][64] = {
+    {
+         4,  7,  9, 11, 13, 14, 63, 63,
+         7,  7, 11, 12, 14, 63, 63, 63,
+         9, 11, 13, 14, 63, 63, 63, 63,
+        11, 11, 13, 14, 63, 63, 63, 63,
+        11, 13, 14, 63, 63, 63, 63, 63,
+        13, 14, 63, 63, 63, 63, 63, 63,
+        13, 63, 63, 63, 63, 63, 63, 63,
+        63, 63, 63, 63, 63, 63, 63, 63
+    }, {
+         4,  5,  6,  7,  9, 11, 13, 15,
+         5,  5,  7,  8, 11, 13, 15, 17,
+         6,  7,  9, 11, 13, 15, 15, 17,
+         7,  7,  9, 11, 13, 15, 17, 19,
+         7,  9, 11, 13, 14, 16, 19, 23,
+         9, 11, 13, 14, 16, 19, 23, 29,
+         9, 11, 13, 15, 17, 21, 28, 35,
+        11, 13, 16, 17, 21, 28, 35, 41
+    }, {
+         4,  4,  5,  5,  6,  7,  7,  9,
+         4,  4,  5,  6,  7,  7,  9,  9,
+         5,  5,  6,  7,  7,  9,  9, 10,
+         5,  5,  6,  7,  7,  9,  9, 10,
+         5,  6,  7,  7,  8,  9, 10, 12,
+         6,  7,  7,  8,  9, 10, 12, 15,
+         6,  7,  7,  9, 10, 11, 14, 17,
+         7,  7,  9, 10, 11, 14, 17, 21
+    }, {
+         4,  4,  4,  4,  4,  4,  4,  4,
+         4,  4,  4,  4,  4,  4,  4,  4,
+         4,  4,  4,  4,  4,  4,  4,  4,
+         4,  4,  4,  4,  4,  4,  4,  5,
+         4,  4,  4,  4,  4,  4,  5,  5,
+         4,  4,  4,  4,  4,  5,  5,  6,
+         4,  4,  4,  4,  5,  5,  6,  7,
+         4,  4,  4,  4,  5,  6,  7,  7
+    }
+};
+
+
+typedef struct {
+    FDCTDSPContext fdsp;
+    uint8_t* fill_y;
+    uint8_t* fill_u;
+    uint8_t* fill_v;
+
+    int qmat_luma[16][64];
+    int qmat_chroma[16][64];
+} ProresContext;
+
+static void encode_codeword(PutBitContext *pb, int val, int codebook)
+{
+    unsigned int rice_order, exp_order, switch_bits, first_exp, exp, zeros,
+            mask;
+
+    /* number of bits to switch between rice and exp golomb */
+    switch_bits = codebook & 3;
+    rice_order  = codebook >> 5;
+    exp_order   = (codebook >> 2) & 7;
+
+    first_exp = ((switch_bits + 1) << rice_order);
+
+    if (val >= first_exp) { /* exp golomb */
+        val -= first_exp;
+        val += (1 << exp_order);
+        exp = av_log2(val);
+        zeros = exp - exp_order + switch_bits + 1;
+        put_bits(pb, zeros, 0);
+        put_bits(pb, exp + 1, val);
+    } else if (rice_order) {
+        mask = (1 << rice_order) - 1;
+        put_bits(pb, (val >> rice_order), 0);
+        put_bits(pb, 1, 1);
+        put_bits(pb, rice_order, val & mask);
+    } else {
+        put_bits(pb, val, 0);
+        put_bits(pb, 1, 1);
+    }
+}
+
+#define QSCALE(qmat,ind,val) ((val) / ((qmat)[ind]))
+#define TO_GOLOMB(val) (((val) << 1) ^ ((val) >> 31))
+#define DIFF_SIGN(val, sign) (((val) >> 31) ^ (sign))
+#define IS_NEGATIVE(val) ((((val) >> 31) ^ -1) + 1)
+#define TO_GOLOMB2(val,sign) ((val)==0 ? 0 : ((val) << 1) + (sign))
+
+static av_always_inline int get_level(int val)
+{
+    int sign = (val >> 31);
+    return (val ^ sign) - sign;
+}
+
+#define FIRST_DC_CB 0xB8
+
+static const uint8_t dc_codebook[7] = { 0x04, 0x28, 0x28, 0x4D, 0x4D, 0x70, 0x70};
+
+static void encode_dc_coeffs(PutBitContext *pb, int16_t *in,
+        int blocks_per_slice, int *qmat)
+{
+    int prev_dc, code;
+    int i, sign, idx;
+    int new_dc, delta, diff_sign, new_code;
+
+    prev_dc = QSCALE(qmat, 0, in[0] - 16384);
+    code = TO_GOLOMB(prev_dc);
+    encode_codeword(pb, code, FIRST_DC_CB);
+
+    code = 5; sign = 0; idx = 64;
+    for (i = 1; i < blocks_per_slice; i++, idx += 64) {
+        new_dc    = QSCALE(qmat, 0, in[idx] - 16384);
+        delta     = new_dc - prev_dc;
+        diff_sign = DIFF_SIGN(delta, sign);
+        new_code  = TO_GOLOMB2(get_level(delta), diff_sign);
+
+        encode_codeword(pb, new_code, dc_codebook[FFMIN(code, 6)]);
+
+        code      = new_code;
+        sign      = delta >> 31;
+        prev_dc   = new_dc;
+    }
+}
+
+static const uint8_t run_to_cb[16] = { 0x06, 0x06, 0x05, 0x05, 0x04, 0x29,
+        0x29, 0x29, 0x29, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x4C };
+static const uint8_t lev_to_cb[10] = { 0x04, 0x0A, 0x05, 0x06, 0x04, 0x28,
+        0x28, 0x28, 0x28, 0x4C };
+
+static void encode_ac_coeffs(AVCodecContext *avctx, PutBitContext *pb,
+        int16_t *in, int blocks_per_slice, int *qmat)
+{
+    int prev_run = 4;
+    int prev_level = 2;
+
+    int run = 0, level, code, i, j;
+    for (i = 1; i < 64; i++) {
+        int indp = progressive_scan[i];
+        for (j = 0; j < blocks_per_slice; j++) {
+            int val = QSCALE(qmat, indp, in[(j << 6) + indp]);
+            if (val) {
+                encode_codeword(pb, run, run_to_cb[FFMIN(prev_run, 15)]);
+
+                prev_run   = run;
+                run        = 0;
+                level      = get_level(val);
+                code       = level - 1;
+
+                encode_codeword(pb, code, lev_to_cb[FFMIN(prev_level, 9)]);
+
+                prev_level = level;
+
+                put_bits(pb, 1, IS_NEGATIVE(val));
+            } else {
+                ++run;
+            }
+        }
+    }
+}
+
+static void get(uint8_t *pixels, int stride, int16_t* block)
+{
+    int i;
+
+    for (i = 0; i < 8; i++) {
+        AV_WN64(block, AV_RN64(pixels));
+        AV_WN64(block+4, AV_RN64(pixels+8));
+        pixels += stride;
+        block += 8;
+    }
+}
+
+static void fdct_get(FDCTDSPContext *fdsp, uint8_t *pixels, int stride, int16_t* block)
+{
+    get(pixels, stride, block);
+    fdsp->fdct(block);
+}
+
+static int encode_slice_plane(AVCodecContext *avctx, int mb_count,
+        uint8_t *src, int src_stride, uint8_t *buf, unsigned buf_size,
+        int *qmat, int chroma)
+{
+    ProresContext* ctx = avctx->priv_data;
+    FDCTDSPContext *fdsp = &ctx->fdsp;
+    DECLARE_ALIGNED(16, int16_t, blocks)[DEFAULT_SLICE_MB_WIDTH << 8], *block;
+    int i, blocks_per_slice;
+    PutBitContext pb;
+
+    block = blocks;
+    for (i = 0; i < mb_count; i++) {
+        fdct_get(fdsp, src,                  src_stride, block + (0 << 6));
+        fdct_get(fdsp, src + 8 * src_stride, src_stride, block + ((2 - chroma) << 6));
+        if (!chroma) {
+            fdct_get(fdsp, src + 16,                  src_stride, block + (1 << 6));
+            fdct_get(fdsp, src + 16 + 8 * src_stride, src_stride, block + (3 << 6));
+        }
+
+        block += (256 >> chroma);
+        src   += (32  >> chroma);
+    }
+
+    blocks_per_slice = mb_count << (2 - chroma);
+    init_put_bits(&pb, buf, buf_size << 3);
+
+    encode_dc_coeffs(&pb, blocks, blocks_per_slice, qmat);
+    encode_ac_coeffs(avctx, &pb, blocks, blocks_per_slice, qmat);
+
+    flush_put_bits(&pb);
+    return put_bits_ptr(&pb) - pb.buf;
+}
+
+static av_always_inline unsigned encode_slice_data(AVCodecContext *avctx,
+        uint8_t *dest_y, uint8_t *dest_u, uint8_t *dest_v, int luma_stride,
+        int chroma_stride, unsigned mb_count, uint8_t *buf, unsigned data_size,
+        unsigned* y_data_size, unsigned* u_data_size, unsigned* v_data_size,
+        int qp)
+{
+    ProresContext* ctx = avctx->priv_data;
+
+    *y_data_size = encode_slice_plane(avctx, mb_count, dest_y, luma_stride,
+            buf, data_size, ctx->qmat_luma[qp - 1], 0);
+
+    if (!(avctx->flags & CODEC_FLAG_GRAY)) {
+        *u_data_size = encode_slice_plane(avctx, mb_count, dest_u,
+                chroma_stride, buf + *y_data_size, data_size - *y_data_size,
+                ctx->qmat_chroma[qp - 1], 1);
+
+        *v_data_size = encode_slice_plane(avctx, mb_count, dest_v,
+                chroma_stride, buf + *y_data_size + *u_data_size,
+                data_size - *y_data_size - *u_data_size,
+                ctx->qmat_chroma[qp - 1], 1);
+    }
+
+    return *y_data_size + *u_data_size + *v_data_size;
+}
+
+static void subimage_with_fill(uint16_t *src, unsigned x, unsigned y,
+        unsigned stride, unsigned width, unsigned height, uint16_t *dst,
+        unsigned dst_width, unsigned dst_height)
+{
+
+    int box_width = FFMIN(width - x, dst_width);
+    int box_height = FFMIN(height - y, dst_height);
+    int i, j, src_stride = stride >> 1;
+    uint16_t last_pix, *last_line;
+
+    src += y * src_stride + x;
+    for (i = 0; i < box_height; ++i) {
+        for (j = 0; j < box_width; ++j) {
+            dst[j] = src[j];
+        }
+        last_pix = dst[j - 1];
+        for (; j < dst_width; j++)
+            dst[j] = last_pix;
+        src += src_stride;
+        dst += dst_width;
+    }
+    last_line = dst - dst_width;
+    for (; i < dst_height; i++) {
+        for (j = 0; j < dst_width; ++j) {
+            dst[j] = last_line[j];
+        }
+        dst += dst_width;
+    }
+}
+
+static int encode_slice(AVCodecContext *avctx, const AVFrame *pic, int mb_x,
+        int mb_y, unsigned mb_count, uint8_t *buf, unsigned data_size,
+        int unsafe, int *qp)
+{
+    int luma_stride, chroma_stride;
+    int hdr_size = 6, slice_size;
+    uint8_t *dest_y, *dest_u, *dest_v;
+    unsigned y_data_size = 0, u_data_size = 0, v_data_size = 0;
+    ProresContext* ctx = avctx->priv_data;
+    int tgt_bits   = (mb_count * bitrate_table[avctx->profile]) >> 2;
+    int low_bytes  = (tgt_bits - (tgt_bits >> 3)) >> 3; // 12% bitrate fluctuation
+    int high_bytes = (tgt_bits + (tgt_bits >> 3)) >> 3;
+
+    luma_stride   = pic->linesize[0];
+    chroma_stride = pic->linesize[1];
+
+    dest_y = pic->data[0] + (mb_y << 4) * luma_stride   + (mb_x << 5);
+    dest_u = pic->data[1] + (mb_y << 4) * chroma_stride + (mb_x << 4);
+    dest_v = pic->data[2] + (mb_y << 4) * chroma_stride + (mb_x << 4);
+
+    if (unsafe) {
+
+        subimage_with_fill((uint16_t *) pic->data[0], mb_x << 4, mb_y << 4,
+                luma_stride, avctx->width, avctx->height,
+                (uint16_t *) ctx->fill_y, mb_count << 4, 16);
+        subimage_with_fill((uint16_t *) pic->data[1], mb_x << 3, mb_y << 4,
+                chroma_stride, avctx->width >> 1, avctx->height,
+                (uint16_t *) ctx->fill_u, mb_count << 3, 16);
+        subimage_with_fill((uint16_t *) pic->data[2], mb_x << 3, mb_y << 4,
+                chroma_stride, avctx->width >> 1, avctx->height,
+                (uint16_t *) ctx->fill_v, mb_count << 3, 16);
+
+        encode_slice_data(avctx, ctx->fill_y, ctx->fill_u, ctx->fill_v,
+                mb_count << 5, mb_count << 4, mb_count, buf + hdr_size,
+                data_size - hdr_size, &y_data_size, &u_data_size, &v_data_size,
+                *qp);
+    } else {
+        slice_size = encode_slice_data(avctx, dest_y, dest_u, dest_v,
+                luma_stride, chroma_stride, mb_count, buf + hdr_size,
+                data_size - hdr_size, &y_data_size, &u_data_size, &v_data_size,
+                *qp);
+
+        if (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]) {
+            do {
+                *qp += 1;
+                slice_size = encode_slice_data(avctx, dest_y, dest_u, dest_v,
+                        luma_stride, chroma_stride, mb_count, buf + hdr_size,
+                        data_size - hdr_size, &y_data_size, &u_data_size,
+                        &v_data_size, *qp);
+            } while (slice_size > high_bytes && *qp < qp_end_table[avctx->profile]);
+        } else if (slice_size < low_bytes && *qp
+                > qp_start_table[avctx->profile]) {
+            do {
+                *qp -= 1;
+                slice_size = encode_slice_data(avctx, dest_y, dest_u, dest_v,
+                        luma_stride, chroma_stride, mb_count, buf + hdr_size,
+                        data_size - hdr_size, &y_data_size, &u_data_size,
+                        &v_data_size, *qp);
+            } while (slice_size < low_bytes && *qp > qp_start_table[avctx->profile]);
+        }
+    }
+
+    buf[0] = hdr_size << 3;
+    buf[1] = *qp;
+    AV_WB16(buf + 2, y_data_size);
+    AV_WB16(buf + 4, u_data_size);
+
+    return hdr_size + y_data_size + u_data_size + v_data_size;
+}
+
+static int prores_encode_picture(AVCodecContext *avctx, const AVFrame *pic,
+        uint8_t *buf, const int buf_size)
+{
+    int mb_width = (avctx->width + 15) >> 4;
+    int mb_height = (avctx->height + 15) >> 4;
+    int hdr_size, sl_size, i;
+    int mb_y, sl_data_size, qp;
+    int unsafe_bot, unsafe_right;
+    uint8_t *sl_data, *sl_data_sizes;
+    int slice_per_line = 0, rem = mb_width;
+
+    for (i = av_log2(DEFAULT_SLICE_MB_WIDTH); i >= 0; --i) {
+        slice_per_line += rem >> i;
+        rem &= (1 << i) - 1;
+    }
+
+    qp = qp_start_table[avctx->profile];
+    hdr_size = 8; sl_data_size = buf_size - hdr_size;
+    sl_data_sizes = buf + hdr_size;
+    sl_data = sl_data_sizes + (slice_per_line * mb_height * 2);
+    for (mb_y = 0; mb_y < mb_height; mb_y++) {
+        int mb_x = 0;
+        int slice_mb_count = DEFAULT_SLICE_MB_WIDTH;
+        while (mb_x < mb_width) {
+            while (mb_width - mb_x < slice_mb_count)
+                slice_mb_count >>= 1;
+
+            unsafe_bot = (avctx->height & 0xf) && (mb_y == mb_height - 1);
+            unsafe_right = (avctx->width & 0xf) && (mb_x + slice_mb_count == mb_width);
+
+            sl_size = encode_slice(avctx, pic, mb_x, mb_y, slice_mb_count,
+                    sl_data, sl_data_size, unsafe_bot || unsafe_right, &qp);
+
+            bytestream_put_be16(&sl_data_sizes, sl_size);
+            sl_data           += sl_size;
+            sl_data_size      -= sl_size;
+            mb_x              += slice_mb_count;
+        }
+    }
+
+    buf[0] = hdr_size << 3;
+    AV_WB32(buf + 1, sl_data - buf);
+    AV_WB16(buf + 5, slice_per_line * mb_height);
+    buf[7] = av_log2(DEFAULT_SLICE_MB_WIDTH) << 4;
+
+    return sl_data - buf;
+}
+
+static int prores_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                               const AVFrame *pict, int *got_packet)
+{
+    int header_size = 148;
+    uint8_t *buf;
+    int pic_size, ret;
+    int frame_size = FFALIGN(avctx->width, 16) * FFALIGN(avctx->height, 16)*16 + 500 + FF_MIN_BUFFER_SIZE; //FIXME choose tighter limit
+
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, frame_size + FF_MIN_BUFFER_SIZE)) < 0)
+        return ret;
+
+    buf = pkt->data;
+    pic_size = prores_encode_picture(avctx, pict, buf + header_size + 8,
+            pkt->size - header_size - 8);
+
+    bytestream_put_be32(&buf, pic_size + 8 + header_size);
+    bytestream_put_buffer(&buf, "icpf", 4);
+
+    bytestream_put_be16(&buf, header_size);
+    bytestream_put_be16(&buf, 0);
+    bytestream_put_buffer(&buf, "fmpg", 4);
+    bytestream_put_be16(&buf, avctx->width);
+    bytestream_put_be16(&buf, avctx->height);
+    *buf++ = 0x83; // {10}(422){00}{00}(frame){11}
+    *buf++ = 0;
+    *buf++ = 2;
+    *buf++ = 2;
+    *buf++ = 6;
+    *buf++ = 32;
+    *buf++ = 0;
+    *buf++ = 3;
+
+    bytestream_put_buffer(&buf, QMAT_LUMA[avctx->profile],   64);
+    bytestream_put_buffer(&buf, QMAT_CHROMA[avctx->profile], 64);
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    pkt->size = pic_size + 8 + header_size;
+    *got_packet = 1;
+
+    return 0;
+}
+
+static void scale_mat(const uint8_t* src, int* dst, int scale)
+{
+    int i;
+    for (i = 0; i < 64; i++)
+        dst[i] = src[i] * scale;
+}
+
+static av_cold int prores_encode_init(AVCodecContext *avctx)
+{
+    int i;
+    ProresContext* ctx = avctx->priv_data;
+
+    if (avctx->pix_fmt != AV_PIX_FMT_YUV422P10) {
+        av_log(avctx, AV_LOG_ERROR, "need YUV422P10\n");
+        return -1;
+    }
+    avctx->bits_per_raw_sample = 10;
+
+    if (avctx->width & 0x1) {
+        av_log(avctx, AV_LOG_ERROR,
+                "frame width needs to be multiple of 2\n");
+        return -1;
+    }
+
+    if (avctx->width > 65534 || avctx->height > 65535) {
+        av_log(avctx, AV_LOG_ERROR,
+                "The maximum dimensions are 65534x65535\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((avctx->height & 0xf) || (avctx->width & 0xf)) {
+        ctx->fill_y = av_malloc(4 * (DEFAULT_SLICE_MB_WIDTH << 8));
+        if (!ctx->fill_y)
+            return AVERROR(ENOMEM);
+        ctx->fill_u = ctx->fill_y + (DEFAULT_SLICE_MB_WIDTH << 9);
+        ctx->fill_v = ctx->fill_u + (DEFAULT_SLICE_MB_WIDTH << 8);
+    }
+
+    if (avctx->profile == FF_PROFILE_UNKNOWN) {
+        avctx->profile = FF_PROFILE_PRORES_STANDARD;
+        av_log(avctx, AV_LOG_INFO,
+                "encoding with ProRes standard (apcn) profile\n");
+
+    } else if (avctx->profile < FF_PROFILE_PRORES_PROXY
+            || avctx->profile > FF_PROFILE_PRORES_HQ) {
+        av_log(
+                avctx,
+                AV_LOG_ERROR,
+                "unknown profile %d, use [0 - apco, 1 - apcs, 2 - apcn (default), 3 - apch]\n",
+                avctx->profile);
+        return -1;
+    }
+
+    ff_fdctdsp_init(&ctx->fdsp, avctx);
+
+    avctx->codec_tag = AV_RL32((const uint8_t*)profiles[avctx->profile].name);
+
+    for (i = 1; i <= 16; i++) {
+        scale_mat(QMAT_LUMA[avctx->profile]  , ctx->qmat_luma[i - 1]  , i);
+        scale_mat(QMAT_CHROMA[avctx->profile], ctx->qmat_chroma[i - 1], i);
+    }
+
+    avctx->coded_frame = av_frame_alloc();
+    avctx->coded_frame->key_frame = 1;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
+    return 0;
+}
+
+static av_cold int prores_encode_close(AVCodecContext *avctx)
+{
+    ProresContext* ctx = avctx->priv_data;
+    av_freep(&avctx->coded_frame);
+    av_freep(&ctx->fill_y);
+
+    return 0;
+}
+
+AVCodec ff_prores_aw_encoder = {
+    .name           = "prores_aw",
+    .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_PRORES,
+    .priv_data_size = sizeof(ProresContext),
+    .init           = prores_encode_init,
+    .close          = prores_encode_close,
+    .encode2        = prores_encode_frame,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_NONE},
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .profiles       = profiles
+};
+
+AVCodec ff_prores_encoder = {
+    .name           = "prores",
+    .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_PRORES,
+    .priv_data_size = sizeof(ProresContext),
+    .init           = prores_encode_init,
+    .close          = prores_encode_close,
+    .encode2        = prores_encode_frame,
+    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_YUV422P10, AV_PIX_FMT_NONE},
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
+    .profiles       = profiles
+};
diff --git a/libavcodec/proresenc.c b/libavcodec/proresenc_kostya.c
index bdb826c..93bcde7 100644
--- a/libavcodec/proresenc.c
+++ b/libavcodec/proresenc_kostya.c
@@ -3,20 +3,23 @@
  *
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This encoder appears to be based on Anatoliy Wassermans considering
+ * similarities in the bugs.
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -940,10 +943,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     pkt_size = ctx->frame_size_upper_bound + FF_MIN_BUFFER_SIZE;
 
-    if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
         return ret;
-    }
 
     orig_buf = pkt->data;
 
@@ -1032,10 +1033,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             }
         }
 
-        if (ctx->pictures_per_frame == 1)
-            picture_size = buf - picture_size_pos - 6;
-        else
-            picture_size = buf - picture_size_pos + 1;
+        picture_size = buf - (picture_size_pos - 1);
         bytestream_put_be32(&picture_size_pos, picture_size);
     }
 
@@ -1275,8 +1273,8 @@ static const AVClass proresenc_class = {
     .version    = LIBAVUTIL_VERSION_INT,
 };
 
-AVCodec ff_prores_encoder = {
-    .name           = "prores",
+AVCodec ff_prores_ks_encoder = {
+    .name           = "prores_ks",
     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_PRORES,
diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c
index a2af611..22d2497 100644
--- a/libavcodec/psymodel.c
+++ b/libavcodec/psymodel.c
@@ -2,20 +2,20 @@
  * audio encoder psychoacoustic model
  * Copyright (C) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,10 +35,10 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
     int i, j, k = 0;
 
     ctx->avctx = avctx;
-    ctx->ch        = av_mallocz(sizeof(ctx->ch[0]) * avctx->channels * 2);
-    ctx->group     = av_mallocz(sizeof(ctx->group[0]) * num_groups);
-    ctx->bands     = av_malloc (sizeof(ctx->bands[0])     * num_lens);
-    ctx->num_bands = av_malloc (sizeof(ctx->num_bands[0]) * num_lens);
+    ctx->ch        = av_mallocz_array(sizeof(ctx->ch[0]), avctx->channels * 2);
+    ctx->group     = av_mallocz_array(sizeof(ctx->group[0]), num_groups);
+    ctx->bands     = av_malloc_array (sizeof(ctx->bands[0]),      num_lens);
+    ctx->num_bands = av_malloc_array (sizeof(ctx->num_bands[0]),  num_lens);
     memcpy(ctx->bands,     bands,     sizeof(ctx->bands[0])     *  num_lens);
     memcpy(ctx->num_bands, num_bands, sizeof(ctx->num_bands[0]) *  num_lens);
 
@@ -75,7 +75,7 @@ FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel)
 
 av_cold void ff_psy_end(FFPsyContext *ctx)
 {
-    if (ctx->model->end)
+    if (ctx->model && ctx->model->end)
         ctx->model->end(ctx);
     av_freep(&ctx->bands);
     av_freep(&ctx->num_bands);
@@ -88,6 +88,7 @@ typedef struct FFPsyPreprocessContext{
     float stereo_att;
     struct FFIIRFilterCoeffs *fcoeffs;
     struct FFIIRFilterState **fstate;
+    struct FFIIRFilterContext fiir;
 }FFPsyPreprocessContext;
 
 #define FILT_ORDER 4
@@ -103,15 +104,21 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av
     if (avctx->cutoff > 0)
         cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate;
 
-    if (cutoff_coeff)
+    if (!cutoff_coeff && avctx->codec_id == AV_CODEC_ID_AAC)
+        cutoff_coeff = 2.0 * AAC_CUTOFF(avctx) / avctx->sample_rate;
+
+    if (cutoff_coeff && cutoff_coeff < 0.98)
     ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH,
                                              FF_FILTER_MODE_LOWPASS, FILT_ORDER,
                                              cutoff_coeff, 0.0, 0.0);
     if (ctx->fcoeffs) {
-        ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels);
+        ctx->fstate = av_mallocz_array(sizeof(ctx->fstate[0]), avctx->channels);
         for (i = 0; i < avctx->channels; i++)
             ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER);
     }
+
+    ff_iir_filter_init(&ctx->fiir);
+
     return ctx;
 }
 
@@ -119,11 +126,12 @@ void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int ch
 {
     int ch;
     int frame_size = ctx->avctx->frame_size;
+    FFIIRFilterContext *iir = &ctx->fiir;
 
     if (ctx->fstate) {
         for (ch = 0; ch < channels; ch++)
-            ff_iir_filter_flt(ctx->fcoeffs, ctx->fstate[ch], frame_size,
-                              &audio[ch][frame_size], 1, &audio[ch][frame_size], 1);
+            iir->filter_flt(ctx->fcoeffs, ctx->fstate[ch], frame_size,
+                            &audio[ch][frame_size], 1, &audio[ch][frame_size], 1);
     }
 }
 
diff --git a/libavcodec/psymodel.h b/libavcodec/psymodel.h
index 1cc3066..75261ba 100644
--- a/libavcodec/psymodel.h
+++ b/libavcodec/psymodel.h
@@ -2,20 +2,20 @@
  * audio encoder psychoacoustic model
  * Copyright (C) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,6 +29,8 @@
 /** maximum number of channels */
 #define PSY_MAX_CHANS 20
 
+#define AAC_CUTOFF(s) ((s)->bit_rate ? FFMIN3(4000 + (s)->bit_rate/8, 12000 + (s)->bit_rate/32, (s)->sample_rate / 2) : ((s)->sample_rate / 2))
+
 /**
  * single band psychoacoustic information
  */
diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c
index 682fd05..407ca2e 100644
--- a/libavcodec/pthread.c
+++ b/libavcodec/pthread.c
@@ -6,20 +6,20 @@
  * to Michael Niedermayer <michaelni@gmx.at> for writing initial
  * implementation.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index a658f3e..1db46fc 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,6 +30,8 @@
 #include <pthread.h>
 #elif HAVE_W32THREADS
 #include "compat/w32pthreads.h"
+#elif HAVE_OS2THREADS
+#include "compat/os2threads.h"
 #endif
 
 #include "avcodec.h"
@@ -77,6 +79,10 @@ typedef struct PerThreadContext {
                                      * Set when the codec calls get_buffer().
                                      * State is returned to STATE_SETTING_UP afterwards.
                                      */
+        STATE_GET_FORMAT,           /**<
+                                     * Set when the codec calls get_format().
+                                     * State is returned to STATE_SETTING_UP afterwards.
+                                     */
         STATE_SETUP_FINISHED        ///< Set after the codec has called ff_thread_finish_setup().
     } state;
 
@@ -90,6 +96,9 @@ typedef struct PerThreadContext {
 
     AVFrame *requested_frame;       ///< AVFrame the codec passed to get_buffer()
     int      requested_flags;       ///< flags passed to get_buffer() for requested_frame
+
+    const enum AVPixelFormat *available_formats; ///< Format array for get_format()
+    enum AVPixelFormat result_format;            ///< get_format() result
 } PerThreadContext;
 
 /**
@@ -112,6 +121,14 @@ typedef struct FrameThreadContext {
     int die;                       ///< Set when threads should exit.
 } FrameThreadContext;
 
+#if FF_API_GET_BUFFER
+#define THREAD_SAFE_CALLBACKS(avctx) \
+((avctx)->thread_safe_callbacks || (!(avctx)->get_buffer && (avctx)->get_buffer2 == avcodec_default_get_buffer2))
+#else
+#define THREAD_SAFE_CALLBACKS(avctx) \
+((avctx)->thread_safe_callbacks || (avctx)->get_buffer2 == avcodec_default_get_buffer2)
+#endif
+
 /**
  * Codec worker thread.
  *
@@ -126,20 +143,16 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
     AVCodecContext *avctx = p->avctx;
     const AVCodec *codec = avctx->codec;
 
+    pthread_mutex_lock(&p->mutex);
     while (1) {
-        if (p->state == STATE_INPUT_READY && !fctx->die) {
-            pthread_mutex_lock(&p->mutex);
             while (p->state == STATE_INPUT_READY && !fctx->die)
                 pthread_cond_wait(&p->input_cond, &p->mutex);
-            pthread_mutex_unlock(&p->mutex);
-        }
 
         if (fctx->die) break;
 
-        if (!codec->update_thread_context && avctx->thread_safe_callbacks)
+        if (!codec->update_thread_context && THREAD_SAFE_CALLBACKS(avctx))
             ff_thread_finish_setup(avctx);
 
-        pthread_mutex_lock(&p->mutex);
         av_frame_unref(p->frame);
         p->got_frame = 0;
         p->result = codec->decode(avctx, p->frame, &p->got_frame, &p->avpkt);
@@ -153,14 +166,21 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
 
         if (p->state == STATE_SETTING_UP) ff_thread_finish_setup(avctx);
 
+        pthread_mutex_lock(&p->progress_mutex);
+#if 0 //BUFREF-FIXME
+        for (i = 0; i < MAX_BUFFERS; i++)
+            if (p->progress_used[i] && (p->got_frame || p->result<0 || avctx->codec_id != AV_CODEC_ID_H264)) {
+                p->progress[i][0] = INT_MAX;
+                p->progress[i][1] = INT_MAX;
+            }
+#endif
         p->state = STATE_INPUT_READY;
 
-        pthread_mutex_lock(&p->progress_mutex);
+        pthread_cond_broadcast(&p->progress_cond);
         pthread_cond_signal(&p->output_cond);
         pthread_mutex_unlock(&p->progress_mutex);
-
-        pthread_mutex_unlock(&p->mutex);
     }
+    pthread_mutex_unlock(&p->mutex);
 
     return NULL;
 }
@@ -210,10 +230,16 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
         dst->hwaccel = src->hwaccel;
         dst->hwaccel_context = src->hwaccel_context;
+
+        dst->channels       = src->channels;
+        dst->sample_rate    = src->sample_rate;
+        dst->sample_fmt     = src->sample_fmt;
+        dst->channel_layout = src->channel_layout;
         dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data;
     }
 
     if (for_user) {
+        dst->delay       = src->thread_count - 1;
         dst->coded_frame = src->coded_frame;
     } else {
         if (dst->codec->update_thread_context)
@@ -246,6 +272,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
     dst->opaque   = src->opaque;
     dst->debug    = src->debug;
+    dst->debug_mv = src->debug_mv;
 
     dst->slice_flags = src->slice_flags;
     dst->flags2      = src->flags2;
@@ -254,6 +281,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 
     dst->frame_number     = src->frame_number;
     dst->reordered_opaque = src->reordered_opaque;
+    dst->thread_safe_callbacks = src->thread_safe_callbacks;
 
     if (src->slice_count && src->slice_offset) {
         if (dst->slice_count < src->slice_count) {
@@ -284,7 +312,8 @@ static void release_delayed_buffers(PerThreadContext *p)
         pthread_mutex_lock(&fctx->buffer_mutex);
 
         // fix extended data in case the caller screwed it up
-        av_assert0(p->avctx->codec_type == AVMEDIA_TYPE_VIDEO);
+        av_assert0(p->avctx->codec_type == AVMEDIA_TYPE_VIDEO ||
+                   p->avctx->codec_type == AVMEDIA_TYPE_AUDIO);
         f = &p->released_buffers[--p->num_released_buffers];
         f->extended_data = f->data;
         av_frame_unref(f);
@@ -336,18 +365,30 @@ static int submit_packet(PerThreadContext *p, AVPacket *avpkt)
 
 FF_DISABLE_DEPRECATION_WARNINGS
     if (!p->avctx->thread_safe_callbacks && (
+         p->avctx->get_format != avcodec_default_get_format ||
 #if FF_API_GET_BUFFER
          p->avctx->get_buffer ||
 #endif
          p->avctx->get_buffer2 != avcodec_default_get_buffer2)) {
 FF_ENABLE_DEPRECATION_WARNINGS
         while (p->state != STATE_SETUP_FINISHED && p->state != STATE_INPUT_READY) {
+            int call_done = 1;
             pthread_mutex_lock(&p->progress_mutex);
             while (p->state == STATE_SETTING_UP)
                 pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
 
-            if (p->state == STATE_GET_BUFFER) {
+            switch (p->state) {
+            case STATE_GET_BUFFER:
                 p->result = ff_get_buffer(p->avctx, p->requested_frame, p->requested_flags);
+                break;
+            case STATE_GET_FORMAT:
+                p->result_format = ff_get_format(p->avctx, p->available_formats);
+                break;
+            default:
+                call_done = 0;
+                break;
+            }
+            if (call_done) {
                 p->state  = STATE_SETTING_UP;
                 pthread_cond_signal(&p->progress_cond);
             }
@@ -384,9 +425,10 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
      * If we're still receiving the initial packets, don't return a frame.
      */
 
-    if (fctx->delaying) {
-        if (fctx->next_decoding >= (avctx->thread_count-1)) fctx->delaying = 0;
+    if (fctx->next_decoding > (avctx->thread_count-1-(avctx->codec_id == AV_CODEC_ID_FFV1)))
+        fctx->delaying = 0;
 
+    if (fctx->delaying) {
         *got_picture_ptr=0;
         if (avpkt->size)
             return avpkt->size;
@@ -437,7 +479,7 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
 void ff_thread_report_progress(ThreadFrame *f, int n, int field)
 {
     PerThreadContext *p;
-    int *progress = f->progress ? (int*)f->progress->data : NULL;
+    volatile int *progress = f->progress ? (int*)f->progress->data : NULL;
 
     if (!progress || progress[field] >= n) return;
 
@@ -455,7 +497,7 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int field)
 void ff_thread_await_progress(ThreadFrame *f, int n, int field)
 {
     PerThreadContext *p;
-    int *progress = f->progress ? (int*)f->progress->data : NULL;
+    volatile int *progress = f->progress ? (int*)f->progress->data : NULL;
 
     if (!progress || progress[field] >= n) return;
 
@@ -475,6 +517,10 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
 
     if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return;
 
+    if(p->state == STATE_SETUP_FINISHED){
+        av_log(avctx, AV_LOG_WARNING, "Multiple ff_thread_finish_setup() calls\n");
+    }
+
     pthread_mutex_lock(&p->progress_mutex);
     p->state = STATE_SETUP_FINISHED;
     pthread_cond_broadcast(&p->progress_cond);
@@ -495,6 +541,7 @@ static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count
                 pthread_cond_wait(&p->output_cond, &p->progress_mutex);
             pthread_mutex_unlock(&p->progress_mutex);
         }
+        p->got_frame = 0;
     }
 }
 
@@ -507,7 +554,11 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
     park_frame_worker_threads(fctx, thread_count);
 
     if (fctx->prev_thread && fctx->prev_thread != fctx->threads)
-        update_context_from_thread(fctx->threads->avctx, fctx->prev_thread->avctx, 0);
+        if (update_context_from_thread(fctx->threads->avctx, fctx->prev_thread->avctx, 0) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Final thread update failed\n");
+            fctx->prev_thread->avctx->internal->is_copy = fctx->threads->avctx->internal->is_copy;
+            fctx->threads->avctx->internal->is_copy = 1;
+        }
 
     fctx->die = 1;
 
@@ -520,6 +571,7 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
 
         if (p->thread_init)
             pthread_join(p->thread, NULL);
+        p->thread_init=0;
 
         if (codec->close)
             codec->close(p->avctx);
@@ -569,7 +621,8 @@ int ff_frame_thread_init(AVCodecContext *avctx)
 
     if (!thread_count) {
         int nb_cpus = av_cpu_count();
-        av_log(avctx, AV_LOG_DEBUG, "detected %d logical cores\n", nb_cpus);
+        if ((avctx->debug & (FF_DEBUG_VIS_QP | FF_DEBUG_VIS_MB_TYPE)) || avctx->debug_mv)
+            nb_cpus = 1;
         // use number of cores + 1 as thread count if there is more than one
         if (nb_cpus > 1)
             thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
@@ -584,7 +637,7 @@ int ff_frame_thread_init(AVCodecContext *avctx)
 
     avctx->internal->thread_ctx = fctx = av_mallocz(sizeof(FrameThreadContext));
 
-    fctx->threads = av_mallocz(sizeof(PerThreadContext) * thread_count);
+    fctx->threads = av_mallocz_array(thread_count, sizeof(PerThreadContext));
     pthread_mutex_init(&fctx->buffer_mutex, NULL);
     fctx->delaying = 1;
 
@@ -601,6 +654,7 @@ int ff_frame_thread_init(AVCodecContext *avctx)
         p->frame = av_frame_alloc();
         if (!p->frame) {
             err = AVERROR(ENOMEM);
+            av_freep(&copy);
             goto error;
         }
 
@@ -645,8 +699,10 @@ int ff_frame_thread_init(AVCodecContext *avctx)
 
         if (err) goto error;
 
-        if (!pthread_create(&p->thread, NULL, frame_worker_thread, p))
-            p->thread_init = 1;
+        err = AVERROR(pthread_create(&p->thread, NULL, frame_worker_thread, p));
+        p->thread_init= !err;
+        if(!p->thread_init)
+            goto error;
     }
 
     return 0;
@@ -686,18 +742,30 @@ void ff_thread_flush(AVCodecContext *avctx)
     }
 }
 
-int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
+int ff_thread_can_start_frame(AVCodecContext *avctx)
+{
+    PerThreadContext *p = avctx->internal->thread_ctx;
+    if ((avctx->active_thread_type&FF_THREAD_FRAME) && p->state != STATE_SETTING_UP &&
+        (avctx->codec->update_thread_context || !THREAD_SAFE_CALLBACKS(avctx))) {
+        return 0;
+    }
+    return 1;
+}
+
+static int thread_get_buffer_internal(AVCodecContext *avctx, ThreadFrame *f, int flags)
 {
     PerThreadContext *p = avctx->internal->thread_ctx;
     int err;
 
     f->owner = avctx;
 
+    ff_init_buffer_info(avctx, f->f);
+
     if (!(avctx->active_thread_type & FF_THREAD_FRAME))
         return ff_get_buffer(avctx, f->f, flags);
 
     if (p->state != STATE_SETTING_UP &&
-        (avctx->codec->update_thread_context || !avctx->thread_safe_callbacks)) {
+        (avctx->codec->update_thread_context || !THREAD_SAFE_CALLBACKS(avctx))) {
         av_log(avctx, AV_LOG_ERROR, "get_buffer() cannot be called after ff_thread_finish_setup()\n");
         return -1;
     }
@@ -723,11 +791,11 @@ FF_DISABLE_DEPRECATION_WARNINGS
 FF_ENABLE_DEPRECATION_WARNINGS
         err = ff_get_buffer(avctx, f->f, flags);
     } else {
+        pthread_mutex_lock(&p->progress_mutex);
         p->requested_frame = f->f;
         p->requested_flags = flags;
         p->state = STATE_GET_BUFFER;
-        pthread_mutex_lock(&p->progress_mutex);
-        pthread_cond_signal(&p->progress_cond);
+        pthread_cond_broadcast(&p->progress_cond);
 
         while (p->state != STATE_SETTING_UP)
             pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
@@ -737,7 +805,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
         pthread_mutex_unlock(&p->progress_mutex);
 
     }
-    if (!avctx->thread_safe_callbacks && !avctx->codec->update_thread_context)
+    if (!THREAD_SAFE_CALLBACKS(avctx) && !avctx->codec->update_thread_context)
         ff_thread_finish_setup(avctx);
 
     if (err)
@@ -748,6 +816,40 @@ FF_ENABLE_DEPRECATION_WARNINGS
     return err;
 }
 
+enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
+{
+    enum AVPixelFormat res;
+    PerThreadContext *p = avctx->internal->thread_ctx;
+    if (!(avctx->active_thread_type & FF_THREAD_FRAME) || avctx->thread_safe_callbacks ||
+        avctx->get_format == avcodec_default_get_format)
+        return ff_get_format(avctx, fmt);
+    if (p->state != STATE_SETTING_UP) {
+        av_log(avctx, AV_LOG_ERROR, "get_format() cannot be called after ff_thread_finish_setup()\n");
+        return -1;
+    }
+    pthread_mutex_lock(&p->progress_mutex);
+    p->available_formats = fmt;
+    p->state = STATE_GET_FORMAT;
+    pthread_cond_broadcast(&p->progress_cond);
+
+    while (p->state != STATE_SETTING_UP)
+        pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
+
+    res = p->result_format;
+
+    pthread_mutex_unlock(&p->progress_mutex);
+
+    return res;
+}
+
+int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
+{
+    int ret = thread_get_buffer_internal(avctx, f, flags);
+    if (ret < 0)
+        av_log(avctx, AV_LOG_ERROR, "thread_get_buffer() failed\n");
+    return ret;
+}
+
 void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f)
 {
     PerThreadContext *p = avctx->internal->thread_ctx;
diff --git a/libavcodec/pthread_internal.h b/libavcodec/pthread_internal.h
index 5dfd0b2..6a2f378 100644
--- a/libavcodec/pthread_internal.h
+++ b/libavcodec/pthread_internal.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/pthread_slice.c b/libavcodec/pthread_slice.c
index d7c73f0..fea989f 100644
--- a/libavcodec/pthread_slice.c
+++ b/libavcodec/pthread_slice.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,8 @@
 #include <pthread.h>
 #elif HAVE_W32THREADS
 #include "compat/w32pthreads.h"
+#elif HAVE_OS2THREADS
+#include "compat/os2threads.h"
 #endif
 
 #include "avcodec.h"
@@ -58,6 +60,12 @@ typedef struct SliceThreadContext {
     unsigned current_execute;
     int current_job;
     int done;
+
+    int *entries;
+    int entries_count;
+    int thread_count;
+    pthread_cond_t *progress_cond;
+    pthread_mutex_t *progress_mutex;
 } SliceThreadContext;
 
 static void* attribute_align_arg worker(void *v)
@@ -175,7 +183,8 @@ int ff_slice_thread_init(AVCodecContext *avctx)
 
     if (!thread_count) {
         int nb_cpus = av_cpu_count();
-        av_log(avctx, AV_LOG_DEBUG, "detected %d logical cores\n", nb_cpus);
+        if  (avctx->height)
+            nb_cpus = FFMIN(nb_cpus, (avctx->height+15)/16);
         // use number of cores + 1 as thread count if there is more than one
         if (nb_cpus > 1)
             thread_count = avctx->thread_count = FFMIN(nb_cpus + 1, MAX_AUTO_THREADS);
@@ -192,7 +201,7 @@ int ff_slice_thread_init(AVCodecContext *avctx)
     if (!c)
         return -1;
 
-    c->workers = av_mallocz(sizeof(pthread_t)*thread_count);
+    c->workers = av_mallocz_array(thread_count, sizeof(pthread_t));
     if (!c->workers) {
         av_free(c);
         return -1;
@@ -222,3 +231,65 @@ int ff_slice_thread_init(AVCodecContext *avctx)
     avctx->execute2 = thread_execute2;
     return 0;
 }
+
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n)
+{
+    SliceThreadContext *p = avctx->internal->thread_ctx;
+    int *entries = p->entries;
+
+    pthread_mutex_lock(&p->progress_mutex[thread]);
+    entries[field] +=n;
+    pthread_cond_signal(&p->progress_cond[thread]);
+    pthread_mutex_unlock(&p->progress_mutex[thread]);
+}
+
+void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift)
+{
+    SliceThreadContext *p  = avctx->internal->thread_ctx;
+    int *entries      = p->entries;
+
+    if (!entries || !field) return;
+
+    thread = thread ? thread - 1 : p->thread_count - 1;
+
+    pthread_mutex_lock(&p->progress_mutex[thread]);
+    while ((entries[field - 1] - entries[field]) < shift){
+        pthread_cond_wait(&p->progress_cond[thread], &p->progress_mutex[thread]);
+    }
+    pthread_mutex_unlock(&p->progress_mutex[thread]);
+}
+
+int ff_alloc_entries(AVCodecContext *avctx, int count)
+{
+    int i;
+
+    if (avctx->active_thread_type & FF_THREAD_SLICE)  {
+        SliceThreadContext *p = avctx->internal->thread_ctx;
+        p->thread_count  = avctx->thread_count;
+        p->entries       = av_mallocz_array(count, sizeof(int));
+
+        p->progress_mutex = av_malloc_array(p->thread_count, sizeof(pthread_mutex_t));
+        p->progress_cond  = av_malloc_array(p->thread_count, sizeof(pthread_cond_t));
+
+        if (!p->entries || !p->progress_mutex || !p->progress_cond) {
+            av_freep(&p->entries);
+            av_freep(&p->progress_mutex);
+            av_freep(&p->progress_cond);
+            return AVERROR(ENOMEM);
+        }
+        p->entries_count  = count;
+
+        for (i = 0; i < p->thread_count; i++) {
+            pthread_mutex_init(&p->progress_mutex[i], NULL);
+            pthread_cond_init(&p->progress_cond[i], NULL);
+        }
+    }
+
+    return 0;
+}
+
+void ff_reset_entries(AVCodecContext *avctx)
+{
+    SliceThreadContext *p = avctx->internal->thread_ctx;
+    memset(p->entries, 0, p->entries_count * sizeof(int));
+}
diff --git a/libavcodec/ptx.c b/libavcodec/ptx.c
index 76fff26..8c3abd7 100644
--- a/libavcodec/ptx.c
+++ b/libavcodec/ptx.c
@@ -2,20 +2,20 @@
  * V.Flash PTX (.ptx) image decoder
  * Copyright (c) 2007 Ivo van Poorten
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -46,7 +46,7 @@ static int ptx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR_PATCHWELCOME;
     }
 
-    avctx->pix_fmt = AV_PIX_FMT_RGB555;
+    avctx->pix_fmt = AV_PIX_FMT_BGR555LE;
 
     if (buf_end - buf < offset)
         return AVERROR_INVALIDDATA;
@@ -58,10 +58,8 @@ static int ptx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
         return ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
 
     p->pict_type = AV_PICTURE_TYPE_I;
 
@@ -69,13 +67,7 @@ static int ptx_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     stride = p->linesize[0];
 
     for (y = 0; y < h && buf_end - buf >= w * bytes_per_pixel; y++) {
-#if HAVE_BIGENDIAN
-        unsigned int x;
-        for (x=0; x<w*bytes_per_pixel; x+=bytes_per_pixel)
-            AV_WN16(ptr+x, AV_RL16(buf+x));
-#else
         memcpy(ptr, buf, w*bytes_per_pixel);
-#endif
         ptr += stride;
         buf += w*bytes_per_pixel;
     }
diff --git a/libavcodec/put_bits.h b/libavcodec/put_bits.h
index 17666fa..8081fb9 100644
--- a/libavcodec/put_bits.h
+++ b/libavcodec/put_bits.h
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@
 #include <assert.h>
 
 #include "libavutil/intreadwrite.h"
+#include "libavutil/avassert.h"
 
 typedef struct PutBitContext {
     uint32_t bit_buf;
@@ -136,7 +137,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
     unsigned int bit_buf;
     int bit_left;
 
-    assert(n <= 31 && value < (1U << n));
+    av_assert2(n <= 31 && value < (1U << n));
 
     bit_buf  = s->bit_buf;
     bit_left = s->bit_left;
@@ -145,6 +146,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 #ifdef BITSTREAM_WRITER_LE
     bit_buf |= value << (32 - bit_left);
     if (n >= bit_left) {
+        av_assert2(s->buf_ptr+3<s->buf_end);
         AV_WL32(s->buf_ptr, bit_buf);
         s->buf_ptr += 4;
         bit_buf     = (bit_left == 32) ? 0 : value >> bit_left;
@@ -158,6 +160,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
     } else {
         bit_buf   <<= bit_left;
         bit_buf    |= value >> (n - bit_left);
+        av_assert2(s->buf_ptr+3<s->buf_end);
         AV_WB32(s->buf_ptr, bit_buf);
         s->buf_ptr += 4;
         bit_left   += 32 - n;
@@ -171,7 +174,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 
 static inline void put_sbits(PutBitContext *pb, int n, int32_t value)
 {
-    assert(n >= 0 && n <= 31);
+    av_assert2(n >= 0 && n <= 31);
 
     put_bits(pb, n, value & ((1 << n) - 1));
 }
@@ -207,8 +210,8 @@ static inline uint8_t *put_bits_ptr(PutBitContext *s)
  */
 static inline void skip_put_bytes(PutBitContext *s, int n)
 {
-    assert((put_bits_count(s) & 7) == 0);
-    assert(s->bit_left == 32);
+    av_assert2((put_bits_count(s) & 7) == 0);
+    av_assert2(s->bit_left == 32);
     s->buf_ptr += n;
 }
 
diff --git a/libavcodec/qcelpdata.h b/libavcodec/qcelpdata.h
index 319833e..931c990 100644
--- a/libavcodec/qcelpdata.h
+++ b/libavcodec/qcelpdata.h
@@ -2,20 +2,20 @@
  * QCELP decoder
  * Copyright (c) 2007 Reynaldo H. Verdejo Pinochet
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,7 +26,7 @@
  * @file
  * Data tables for the QCELP decoder
  * @author Reynaldo H. Verdejo Pinochet
- * @remark Libav merging spearheaded by Kenan Gillet
+ * @remark FFmpeg merging spearheaded by Kenan Gillet
  * @remark Development mentored by Benjamin Larson
  */
 
@@ -66,7 +66,7 @@ typedef struct QCELPFrame {
 } QCELPFrame;
 
 /**
- * pre-calculated table for hammsinc function
+ * Pre-calculated table for hammsinc function.
  * Only half of the table is needed because of symmetry.
  *
  * TIA/EIA/IS-733 2.4.5.2-2/3
@@ -82,7 +82,7 @@ typedef struct QCELPBitmap {
 #define QCELP_OF(variable, bit, len) {offsetof(QCELPFrame, variable), bit, len}
 
 /**
- * bitmap unpacking tables for RATE_FULL
+ * Bitmap unpacking tables for RATE_FULL
  *
  * TIA/EIA/IS-733 Table 2.4.7.1-1
  */
@@ -169,7 +169,7 @@ static const QCELPBitmap qcelp_rate_full_bitmap[] = {
 };
 
 /**
- * bitmap unpacking tables for RATE_HALF
+ * Bitmap unpacking tables for RATE_HALF
  *
  * TIA/EIA/IS-733 Table 2.4.7.2-1
  */
@@ -211,7 +211,7 @@ static const QCELPBitmap qcelp_rate_half_bitmap[] = {
 };
 
 /**
- * bitmap unpacking tables for RATE_QUARTER
+ * Bitmap unpacking tables for RATE_QUARTER
  *
  * TIA/EIA/IS-733 Table 2.4.7.3-1
  */
@@ -232,7 +232,7 @@ static const QCELPBitmap qcelp_rate_quarter_bitmap[] = {
 };
 
 /**
- * bitmap unpacking tables for RATE_OCTAVE
+ * Bitmap unpacking tables for RATE_OCTAVE
  *
  * trick: CBSEED is written into QCELPContext.cbsign[15],
  * which is not used for RATE_OCTAVE.
@@ -257,12 +257,12 @@ static const QCELPBitmap qcelp_rate_octave_bitmap[] = {
     QCELP_OF(lspv   [8], 0, 1), //  8
     QCELP_OF(cbsign[15], 0, 1), //  7
     QCELP_OF(lspv   [9], 0, 1), //  6
-    QCELP_OF(cbgain [0], 0, 2), //  7
+    QCELP_OF(cbgain [0], 0, 2), //  5
     QCELP_OF(reserved,   0, 4)  //  3
 };
 
 /**
- * position of the bitmapping data for each packet type in
+ * Bitmapping data position for each packet type in
  * the QCELPContext
  */
 static const QCELPBitmap * const qcelp_unpacking_bitmaps_per_rate[5] = {
@@ -420,12 +420,12 @@ static const qcelp_vector * const qcelp_lspvq[5] = {
 };
 
 /**
- * the final gain scalefactor before clipping into a usable output float
+ * The final gain scalefactor before clipping into a usable output float
  */
 #define QCELP_SCALE 8192.
 
 /**
- * table for computing Ga (decoded linear codebook gain magnitude)
+ * Table for computing Ga (decoded linear codebook gain magnitude)
  *
  * @note The table could fit in int16_t in x*8 form, but it seems
  *       to be slower on x86
@@ -452,7 +452,7 @@ static const float qcelp_g12ga[61] = {
  1000.000/QCELP_SCALE};
 
 /**
- * circular codebook for rate 1 frames in x*100 form
+ * Circular codebook for rate 1 frames in x*100 form
  *
  * TIA/EIA/IS-733 2.4.6.1-2
  */
@@ -477,7 +477,7 @@ static const int16_t qcelp_rate_full_codebook[128] = {
 #define QCELP_RATE_FULL_CODEBOOK_RATIO .01
 
 /**
- * circular codebook for rate 1/2 frames in x*2 form
+ * Circular codebook for rate 1/2 frames in x*2 form
  *
  * TIA/EIA/IS-733 2.4.6.1-1
  */
@@ -511,7 +511,7 @@ static const int8_t qcelp_rate_half_codebook[128] = {
 #define QCELP_SQRT1887 1.373681186
 
 /**
- * table for impulse response of BPF used to filter
+ * Table for impulse response of BPF used to filter
  * the white excitation for bitrate 1/4 synthesis
  *
  * Only half the tables are needed because of symmetry.
@@ -526,14 +526,14 @@ static const double qcelp_rnd_fir_coefs[11] = {
 
 /**
  * This spread factor is used, for bitrate 1/8 and I_F_Q,
- * to force the LSP frequencies to be at least 80 Hz apart.
+ * to force LSP frequencies to be at least 80 Hz apart.
  *
  * TIA/EIA/IS-733 2.4.3.3.2
  */
 #define QCELP_LSP_SPREAD_FACTOR 0.02
 
 /**
- * predictor coefficient for the conversion of LSP codes
+ * Predictor coefficient for the conversion of LSP codes
  * to LSP frequencies for 1/8 and I_F_Q
  *
  * TIA/EIA/IS-733 2.4.3.2.7-2
@@ -541,7 +541,7 @@ static const double qcelp_rnd_fir_coefs[11] = {
 #define QCELP_LSP_OCTAVE_PREDICTOR 29.0/32
 
 /**
- * initial coefficient to perform bandwidth expansion on LPC
+ * Initial coefficient to perform bandwidth expansion on LPC
  *
  * @note: 0.9883 looks like an approximation of 253/256.
  *
diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index 7d9427c..6f08607 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -2,20 +2,20 @@
  * QCELP decoder
  * Copyright (c) 2007 Reynaldo H. Verdejo Pinochet
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,12 +23,13 @@
  * @file
  * QCELP decoder
  * @author Reynaldo H. Verdejo Pinochet
- * @remark Libav merging spearheaded by Kenan Gillet
+ * @remark FFmpeg merging spearheaded by Kenan Gillet
  * @remark Development mentored by Benjamin Larson
  */
 
 #include <stddef.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/float_dsp.h"
 #include "avcodec.h"
@@ -40,9 +41,6 @@
 #include "acelp_vectors.h"
 #include "lsp.h"
 
-#undef NDEBUG
-#include <assert.h>
-
 typedef enum {
     I_F_Q = -1,    /**< insufficient frame quality */
     SILENCE,
@@ -135,7 +133,7 @@ static int decode_lspf(QCELPContext *q, float *lspf)
         } else {
             erasure_coeff = QCELP_LSP_OCTAVE_PREDICTOR;
 
-            assert(q->bitrate == I_F_Q);
+            av_assert2(q->bitrate == I_F_Q);
 
             if (q->erasure_count > 1)
                 erasure_coeff *= q->erasure_count < 4 ? 0.9 : 0.7;
@@ -239,7 +237,7 @@ static void decode_gain_and_index(QCELPContext *q, float *gain)
                     av_clip((q->prev_g1[0] + q->prev_g1[1]) / 2 - 5, 0, 54);
             subframes_count = 8;
         } else {
-            assert(q->bitrate == I_F_Q);
+            av_assert2(q->bitrate == I_F_Q);
 
             g1[0] = q->prev_g1[1];
             switch (q->erasure_count) {
@@ -321,7 +319,8 @@ static void compute_svector(QCELPContext *q, const float *gain,
             tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
             cindex   = -q->frame.cindex[i];
             for (j = 0; j < 10; j++)
-                *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cindex++ & 127];
+                *cdn_vector++ = tmp_gain *
+                                qcelp_rate_full_codebook[cindex++ & 127];
         }
         break;
     case RATE_HALF:
@@ -329,7 +328,8 @@ static void compute_svector(QCELPContext *q, const float *gain,
             tmp_gain = gain[i] * QCELP_RATE_HALF_CODEBOOK_RATIO;
             cindex   = -q->frame.cindex[i];
             for (j = 0; j < 40; j++)
-                *cdn_vector++ = tmp_gain * qcelp_rate_half_codebook[cindex++ & 127];
+                *cdn_vector++ = tmp_gain *
+                                qcelp_rate_half_codebook[cindex++ & 127];
         }
         break;
     case RATE_QUARTER:
@@ -374,7 +374,8 @@ static void compute_svector(QCELPContext *q, const float *gain,
         for (i = 0; i < 4; i++) {
             tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
             for (j = 0; j < 40; j++)
-                *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cbseed++ & 127];
+                *cdn_vector++ = tmp_gain *
+                                qcelp_rate_full_codebook[cbseed++ & 127];
         }
         break;
     case SILENCE:
@@ -435,7 +436,8 @@ static const float *do_pitchfilter(float memory[303], const float v_in[160],
             for (v_len = v_in + 40; v_in < v_len; v_in++) {
                 if (pfrac[i]) { // If it is a fractional lag...
                     for (j = 0, *v_out = 0.0; j < 4; j++)
-                        *v_out += qcelp_hammsinc_table[j] * (v_lag[j - 4] + v_lag[3 - j]);
+                        *v_out += qcelp_hammsinc_table[j] *
+                                  (v_lag[j - 4] + v_lag[3 - j]);
                 } else
                     *v_out = *v_lag;
 
@@ -486,7 +488,7 @@ static void apply_pitch_filters(QCELPContext *q, float *cdn_vector)
                   else
                       max_pitch_gain = 0.0;
             } else {
-                assert(q->bitrate == SILENCE);
+                av_assert2(q->bitrate == SILENCE);
                 max_pitch_gain = 1.0;
             }
             for (i = 0; i < 4; i++)
@@ -511,7 +513,8 @@ static void apply_pitch_filters(QCELPContext *q, float *cdn_vector)
 
         apply_gain_ctrl(cdn_vector, v_synthesis_filtered, v_pre_filtered);
     } else {
-        memcpy(q->pitch_synthesis_filter_mem, cdn_vector + 17, 143 * sizeof(float));
+        memcpy(q->pitch_synthesis_filter_mem,
+               cdn_vector + 17, 143 * sizeof(float));
         memcpy(q->pitch_pre_filter_mem, cdn_vector + 17, 143 * sizeof(float));
         memset(q->pitch_gain, 0, sizeof(q->pitch_gain));
         memset(q->pitch_lag,  0, sizeof(q->pitch_lag));
@@ -630,7 +633,7 @@ static qcelp_packet_rate determine_bitrate(AVCodecContext *avctx,
         (*buf)++;
     } else if ((bitrate = buf_size2bitrate(buf_size + 1)) >= 0) {
         av_log(avctx, AV_LOG_WARNING,
-               "Bitrate byte is missing, guessing the bitrate from packet size.\n");
+               "Bitrate byte missing, guessing bitrate from packet size.\n");
     } else
         return I_F_Q;
 
@@ -695,14 +698,12 @@ static int qcelp_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = 160;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     outbuffer = (float *)frame->data[0];
 
     if ((q->bitrate = determine_bitrate(avctx, buf_size, &buf)) == I_F_Q) {
-        warn_insufficient_frame_quality(avctx, "bitrate cannot be determined.");
+        warn_insufficient_frame_quality(avctx, "Bitrate cannot be determined.");
         goto erasure;
     }
 
@@ -770,7 +771,8 @@ erasure:
     formant_mem = q->formant_mem + 10;
     for (i = 0; i < 4; i++) {
         interpolate_lpc(q, quantized_lspf, lpc, i);
-        ff_celp_lp_synthesis_filterf(formant_mem, lpc, outbuffer + i * 40, 40, 10);
+        ff_celp_lp_synthesis_filterf(formant_mem, lpc,
+                                     outbuffer + i * 40, 40, 10);
         formant_mem += 40;
     }
 
diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 04e7def..bf830db 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -5,20 +5,20 @@
  * Copyright (c) 2005 Alex Beregszaszi
  * Copyright (c) 2005 Roberto Togni
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -168,7 +168,7 @@ typedef struct {
     /// I/O data
     const uint8_t *compressed_data;
     int compressed_size;
-    float output_buffer[QDM2_MAX_FRAME_SIZE * 2];
+    float output_buffer[QDM2_MAX_FRAME_SIZE * MPA_MAX_CHANNELS * 2];
 
     /// Synthesis filter
     MPADSPContext mpadsp;
@@ -375,7 +375,14 @@ static int qdm2_get_vlc(GetBitContext *gb, VLC *vlc, int flag, int depth)
 
     /* stage-3, optional */
     if (flag) {
-        int tmp = vlc_stage3_values[value];
+        int tmp;
+
+        if (value >= 60) {
+            av_log(NULL, AV_LOG_ERROR, "value %d in qdm2_get_vlc too large\n", value);
+            return 0;
+        }
+
+        tmp= vlc_stage3_values[value];
 
         if ((value & ~3) > 0)
             tmp += get_bits(gb, (value >> 2));
@@ -694,7 +701,8 @@ static void fill_coding_method_array(sb_int8_array tone_level_idx,
 
     if (!superblocktype_2_3) {
         /* This case is untested, no samples available */
-        SAMPLES_NEEDED
+        avpriv_request_sample(NULL, "!superblocktype_2_3");
+        return;
         for (ch = 0; ch < nb_channels; ch++)
             for (sb = 0; sb < 30; sb++) {
                 for (j = 1; j < 63; j++) {  // The loop only iterates to 63 so the code doesn't overflow the buffer
@@ -730,7 +738,7 @@ static void fill_coding_method_array(sb_int8_array tone_level_idx,
                     for (j = 0; j < 64; j++)
                         acc += tone_level_idx_temp[ch][sb][j];
 
-            multres = 0x66666667 * (acc * 10);
+            multres = 0x66666667LL * (acc * 10);
             esp_40 = (multres >> 32) / 8 + ((multres & 0xffffffff) >> 31);
             for (ch = 0;  ch < nb_channels; ch++)
                 for (sb = 0; sb < 30; sb++)
@@ -806,7 +814,7 @@ static void fill_coding_method_array(sb_int8_array tone_level_idx,
  * @param sb_min    lower subband processed (sb_min included)
  * @param sb_max    higher subband processed (sb_max excluded)
  */
-static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
+static int synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
                                        int length, int sb_min, int sb_max)
 {
     int sb, j, k, n, ch, run, channels;
@@ -814,14 +822,15 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
     int type34_first;
     float type34_div = 0;
     float type34_predictor;
-    float samples[10], sign_bits[16];
+    float samples[10];
+    int sign_bits[16] = {0};
 
     if (length == 0) {
         // If no data use noise
         for (sb=sb_min; sb < sb_max; sb++)
             build_sb_samples_from_noise(q, sb);
 
-        return;
+        return 0;
     }
 
     for (sb = sb_min; sb < sb_max; sb++) {
@@ -845,6 +854,7 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
 
             if (fix_coding_method_array(sb, q->nb_channels,
                                             q->coding_method)) {
+                av_log(NULL, AV_LOG_ERROR, "coding method invalid\n");
                 build_sb_samples_from_noise(q, sb);
                 continue;
             }
@@ -869,6 +879,11 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
                                 }
                             } else {
                                 n = get_bits(gb, 8);
+                                if (n >= 243) {
+                                    av_log(NULL, AV_LOG_ERROR, "Invalid 8bit codeword\n");
+                                    return AVERROR_INVALIDDATA;
+                                }
+
                                 for (k = 0; k < 5; k++)
                                     samples[2 * k] = dequant_1bit[joined_stereo][random_dequant_index[n][k]];
                             }
@@ -905,6 +920,11 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
                                 }
                             } else {
                                 n = get_bits (gb, 8);
+                                if (n >= 243) {
+                                    av_log(NULL, AV_LOG_ERROR, "Invalid 8bit codeword\n");
+                                    return AVERROR_INVALIDDATA;
+                                }
+
                                 for (k = 0; k < 5; k++)
                                     samples[k] = dequant_1bit[joined_stereo][random_dequant_index[n][k]];
                             }
@@ -918,6 +938,11 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
                     case 24:
                         if (get_bits_left(gb) >= 7) {
                             n = get_bits(gb, 7);
+                            if (n >= 125) {
+                                av_log(NULL, AV_LOG_ERROR, "Invalid 7bit codeword\n");
+                                return AVERROR_INVALIDDATA;
+                            }
+
                             for (k = 0; k < 3; k++)
                                 samples[k] = (random_dequant_type24[n][k] - 2.0) * 0.5;
                         } else {
@@ -930,10 +955,11 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
                     case 30:
                         if (get_bits_left(gb) >= 4) {
                             unsigned index = qdm2_get_vlc(gb, &vlc_tab_type30, 0, 1);
-                            if (index < FF_ARRAY_ELEMS(type30_dequant)) {
-                                samples[0] = type30_dequant[index];
-                            } else
-                                samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                            if (index >= FF_ARRAY_ELEMS(type30_dequant)) {
+                                av_log(NULL, AV_LOG_ERROR, "index %d out of type30_dequant array\n", index);
+                                return AVERROR_INVALIDDATA;
+                            }
+                            samples[0] = type30_dequant[index];
                         } else
                             samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
 
@@ -949,11 +975,12 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
                                 type34_first = 0;
                             } else {
                                 unsigned index = qdm2_get_vlc(gb, &vlc_tab_type34, 0, 1);
-                                if (index < FF_ARRAY_ELEMS(type34_delta)) {
-                                    samples[0] = type34_delta[index] / type34_div + type34_predictor;
-                                    type34_predictor = samples[0];
-                                } else
-                                    samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
+                                if (index >= FF_ARRAY_ELEMS(type34_delta)) {
+                                    av_log(NULL, AV_LOG_ERROR, "index %d out of type34_delta array\n", index);
+                                    return AVERROR_INVALIDDATA;
+                                }
+                                samples[0] = type34_delta[index] / type34_div + type34_predictor;
+                                type34_predictor = samples[0];
                             }
                         } else {
                             samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
@@ -990,6 +1017,7 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
             } // j loop
         } // channel loop
     } // subband loop
+    return 0;
 }
 
 /**
@@ -1002,24 +1030,27 @@ static void synthfilt_build_sb_samples(QDM2Context *q, GetBitContext *gb,
  * @param quantized_coeffs    pointer to quantized_coeffs[ch][0]
  * @param gb        bitreader context
  */
-static void init_quantized_coeffs_elem0(int8_t *quantized_coeffs,
+static int init_quantized_coeffs_elem0(int8_t *quantized_coeffs,
                                         GetBitContext *gb)
 {
     int i, k, run, level, diff;
 
     if (get_bits_left(gb) < 16)
-        return;
+        return -1;
     level = qdm2_get_vlc(gb, &vlc_tab_level, 0, 2);
 
     quantized_coeffs[0] = level;
 
     for (i = 0; i < 7; ) {
         if (get_bits_left(gb) < 16)
-            break;
+            return -1;
         run = qdm2_get_vlc(gb, &vlc_tab_run, 0, 1) + 1;
 
+        if (i + run >= 8)
+            return -1;
+
         if (get_bits_left(gb) < 16)
-            break;
+            return -1;
         diff = qdm2_get_se_vlc(&vlc_tab_diff, gb, 2);
 
         for (k = 1; k <= run; k++)
@@ -1028,6 +1059,7 @@ static void init_quantized_coeffs_elem0(int8_t *quantized_coeffs,
         level += diff;
         i += run;
     }
+    return 0;
 }
 
 /**
@@ -1102,7 +1134,7 @@ static void init_tone_level_dequantization(QDM2Context *q, GetBitContext *gb)
  * @param q       context
  * @param node    pointer to node with packet
  */
-static void process_subpacket_9(QDM2Context *q, QDM2SubPNode *node)
+static int process_subpacket_9(QDM2Context *q, QDM2SubPNode *node)
 {
     GetBitContext gb;
     int i, j, k, n, ch, run, level, diff;
@@ -1120,6 +1152,9 @@ static void process_subpacket_9(QDM2Context *q, QDM2SubPNode *node)
                 run  = qdm2_get_vlc(&gb, &vlc_tab_run, 0, 1) + 1;
                 diff = qdm2_get_se_vlc(&vlc_tab_diff, &gb, 2);
 
+                if (j + run >= 8)
+                    return -1;
+
                 for (k = 1; k <= run; k++)
                     q->quantized_coeffs[ch][i][j + k] = (level + ((k * diff) / run));
 
@@ -1131,6 +1166,8 @@ static void process_subpacket_9(QDM2Context *q, QDM2SubPNode *node)
     for (ch = 0; ch < q->nb_channels; ch++)
         for (i = 0; i < 8; i++)
             q->quantized_coeffs[ch][0][i] = 0;
+
+    return 0;
 }
 
 /**
@@ -1200,7 +1237,7 @@ static void process_subpacket_12(QDM2Context *q, QDM2SubPNode *node)
     synthfilt_build_sb_samples(q, &gb, length, 8, QDM2_SB_USED(q->sub_sampling));
 }
 
-/*
+/**
  * Process new subpackets for synthesis filter
  *
  * @param q       context
@@ -1233,7 +1270,7 @@ static void process_synthesis_subpackets(QDM2Context *q, QDM2SubPNode *list)
         process_subpacket_12(q, NULL);
 }
 
-/*
+/**
  * Decode superblock, fill packet lists.
  *
  * @param q    context
@@ -1393,9 +1430,14 @@ static void qdm2_fft_decode_tones(QDM2Context *q, int duration,
     local_int_10 = 1 << (q->group_order - duration - 1);
     offset       = 1;
 
-    while (1) {
+    while (get_bits_left(gb)>0) {
         if (q->superblocktype_2_3) {
             while ((n = qdm2_get_vlc(gb, &vlc_tab_fft_tone_offset[local_int_8], 1, 2)) < 2) {
+                if (get_bits_left(gb)<0) {
+                    if(local_int_4 < q->group_size)
+                        av_log(NULL, AV_LOG_ERROR, "overread in qdm2_fft_decode_tones()\n");
+                    return;
+                }
                 offset = 1;
                 if (n == 0) {
                     local_int_4  += local_int_10;
@@ -1708,12 +1750,19 @@ static void qdm2_synthesis_filter(QDM2Context *q, int index)
  *
  * @param q    context
  */
-static av_cold void qdm2_init_static_data(AVCodec *codec) {
+static av_cold void qdm2_init_static_data(void) {
+    static int done;
+
+    if(done)
+        return;
+
     qdm2_init_vlc();
     ff_mpa_synth_init_float(ff_mpa_synth_window_float);
     softclip_table_init();
     rnd_table_init();
     init_noise_samples();
+
+    done = 1;
 }
 
 /**
@@ -1726,6 +1775,8 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
     int extradata_size;
     int tmp_val, tmp, size;
 
+    qdm2_init_static_data();
+
     /* extradata parsing
 
     Structure:
@@ -1814,8 +1865,10 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
 
     avctx->channels = s->nb_channels = s->channels = AV_RB32(extradata);
     extradata += 4;
-    if (s->channels <= 0 || s->channels > MPA_MAX_CHANNELS)
+    if (s->channels <= 0 || s->channels > MPA_MAX_CHANNELS) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid number of channels\n");
         return AVERROR_INVALIDDATA;
+    }
     avctx->channel_layout = avctx->channels == 2 ? AV_CH_LAYOUT_STEREO :
                                                    AV_CH_LAYOUT_MONO;
 
@@ -1842,6 +1895,7 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
     // something like max decodable tones
     s->group_order = av_log2(s->group_size) + 1;
     s->frame_size = s->group_size / 16; // 16 iterations per super block
+
     if (s->frame_size > QDM2_MAX_FRAME_SIZE)
         return AVERROR_INVALIDDATA;
 
@@ -1864,18 +1918,9 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
     if ((tmp * 2240) < avctx->bit_rate)  tmp_val = 4;
     s->cm_table_select = tmp_val;
 
-    if (s->sub_sampling == 0)
-        tmp = 7999;
-    else
-        tmp = ((-(s->sub_sampling -1)) & 8000) + 20000;
-    /*
-    0: 7999 -> 0
-    1: 20000 -> 2
-    2: 28000 -> 2
-    */
-    if (tmp < 8000)
+    if (avctx->bit_rate <= 8000)
         s->coeff_per_sb_select = 0;
-    else if (tmp <= 16000)
+    else if (avctx->bit_rate < 16000)
         s->coeff_per_sb_select = 1;
     else
         s->coeff_per_sb_select = 2;
@@ -1912,6 +1957,9 @@ static int qdm2_decode(QDM2Context *q, const uint8_t *in, int16_t *out)
     int ch, i;
     const int frame_size = (q->frame_size * q->channels);
 
+    if((unsigned)frame_size > FF_ARRAY_ELEMS(q->output_buffer)/2)
+        return -1;
+
     /* select input buffer */
     q->compressed_data = in;
     q->compressed_size = q->checksum_size;
@@ -1983,10 +2031,8 @@ static int qdm2_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = 16 * s->frame_size;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     out = (int16_t *)frame->data[0];
 
     for (i = 0; i < 16; i++) {
@@ -2007,7 +2053,6 @@ AVCodec ff_qdm2_decoder = {
     .id               = AV_CODEC_ID_QDM2,
     .priv_data_size   = sizeof(QDM2Context),
     .init             = qdm2_decode_init,
-    .init_static_data = qdm2_init_static_data,
     .close            = qdm2_decode_close,
     .decode           = qdm2_decode_frame,
     .capabilities     = CODEC_CAP_DR1,
diff --git a/libavcodec/qdm2_tablegen.c b/libavcodec/qdm2_tablegen.c
index 59d82df..a7a9fb6 100644
--- a/libavcodec/qdm2_tablegen.c
+++ b/libavcodec/qdm2_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/qdm2_tablegen.h b/libavcodec/qdm2_tablegen.h
index bb73d92..13ec9be 100644
--- a/libavcodec/qdm2_tablegen.h
+++ b/libavcodec/qdm2_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,7 +37,7 @@
 #include "libavcodec/qdm2_tables.h"
 #else
 static uint16_t softclip_table[HARDCLIP_THRESHOLD - SOFTCLIP_THRESHOLD + 1];
-static float noise_table[4096];
+static float noise_table[4096 + 20];
 static uint8_t random_dequant_index[256][5];
 static uint8_t random_dequant_type24[128][3];
 static float noise_samples[128];
@@ -54,8 +54,7 @@ static av_cold void softclip_table_init(void) {
 // random generated table
 static av_cold void rnd_table_init(void) {
     int i,j;
-    uint32_t ldw,hdw;
-    uint64_t tmp64_1;
+    uint32_t ldw;
     uint64_t random_seed = 0;
     float delta = 1.0 / 16384.0;
     for(i = 0; i < 4096 ;i++) {
@@ -67,22 +66,18 @@ static av_cold void rnd_table_init(void) {
         random_seed = 81;
         ldw = i;
         for (j = 0; j < 5 ;j++) {
-            random_dequant_index[i][j] = (uint8_t)((ldw / random_seed) & 0xFF);
-            ldw = (uint32_t)ldw % (uint32_t)random_seed;
-            tmp64_1 = (random_seed * 0x55555556);
-            hdw = (uint32_t)(tmp64_1 >> 32);
-            random_seed = (uint64_t)(hdw + (ldw >> 31));
+            random_dequant_index[i][j] = ldw / random_seed;
+            ldw %= random_seed;
+            random_seed /= 3;
         }
     }
     for (i = 0; i < 128 ;i++) {
         random_seed = 25;
         ldw = i;
         for (j = 0; j < 3 ;j++) {
-            random_dequant_type24[i][j] = (uint8_t)((ldw / random_seed) & 0xFF);
-            ldw = (uint32_t)ldw % (uint32_t)random_seed;
-            tmp64_1 = (random_seed * 0x66666667);
-            hdw = (uint32_t)(tmp64_1 >> 33);
-            random_seed = hdw + (ldw >> 31);
+            random_dequant_type24[i][j] = ldw / random_seed;
+            ldw %= random_seed;
+            random_seed /= 5;
         }
     }
 }
diff --git a/libavcodec/qdm2data.h b/libavcodec/qdm2data.h
index ad6ea88..355d613 100644
--- a/libavcodec/qdm2data.h
+++ b/libavcodec/qdm2data.h
@@ -5,20 +5,20 @@
  * Copyright (c) 2005 Alex Beregszaszi
  * Copyright (c) 2005 Roberto Togni
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/qdrw.c b/libavcodec/qdrw.c
index c365bfd..3f82e5b 100644
--- a/libavcodec/qdrw.c
+++ b/libavcodec/qdrw.c
@@ -2,20 +2,20 @@
  * QuickDraw (qdrw) codec
  * Copyright (c) 2004 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,10 +43,8 @@ static int decode_frame(AVCodecContext *avctx,
     uint32_t *pal;
     int r, g, b;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
     p->pict_type = AV_PICTURE_TYPE_I;
     p->key_frame = 1;
 
@@ -82,7 +80,7 @@ static int decode_frame(AVCodecContext *avctx,
         buf++;
         b = *buf++;
         buf++;
-        pal[idx] = (r << 16) | (g << 8) | b;
+        pal[idx] = 0xFFU << 24 | r << 16 | g << 8 | b;
     }
     p->palette_has_changed = 1;
 
diff --git a/libavcodec/qpeg.c b/libavcodec/qpeg.c
index 4de1655..94cb5bd 100644
--- a/libavcodec/qpeg.c
+++ b/libavcodec/qpeg.c
@@ -2,20 +2,20 @@
  * QPEG codec
  * Copyright (c) 2004 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,8 +30,7 @@
 
 typedef struct QpegContext{
     AVCodecContext *avctx;
-    AVFrame *pic;
-    uint8_t *refdata;
+    AVFrame *pic, *ref;
     uint32_t pal[256];
     GetByteContext buffer;
 } QpegContext;
@@ -111,7 +110,7 @@ static const int qpeg_table_w[16] =
  { 0x00, 0x20, 0x18, 0x08, 0x18, 0x10, 0x20, 0x10, 0x08, 0x10, 0x20, 0x20, 0x08, 0x10, 0x18, 0x04};
 
 /* Decodes delta frames */
-static void qpeg_decode_inter(QpegContext *qctx, uint8_t *dst,
+static void av_noinline qpeg_decode_inter(QpegContext *qctx, uint8_t *dst,
                               int stride, int width, int height,
                               int delta, const uint8_t *ctable,
                               uint8_t *refdata)
@@ -121,9 +120,12 @@ static void qpeg_decode_inter(QpegContext *qctx, uint8_t *dst,
     int filled = 0;
     int orig_height;
 
+    if(!refdata)
+        refdata= dst;
+
     /* copy prev frame */
     for(i = 0; i < height; i++)
-        memcpy(refdata + (i * width), dst + (i * stride), width);
+        memcpy(dst + (i * stride), refdata + (i * stride), width);
 
     orig_height = height;
     height--;
@@ -134,7 +136,7 @@ static void qpeg_decode_inter(QpegContext *qctx, uint8_t *dst,
 
         if(delta) {
             /* motion compensation */
-            while((code & 0xF0) == 0xF0) {
+            while(bytestream2_get_bytes_left(&qctx->buffer) > 0 && (code & 0xF0) == 0xF0) {
                 if(delta == 1) {
                     int me_idx;
                     int me_w, me_h, me_x, me_y;
@@ -167,10 +169,10 @@ static void qpeg_decode_inter(QpegContext *qctx, uint8_t *dst,
                                me_x, me_y, me_w, me_h, filled, height);
                     else {
                         /* do motion compensation */
-                        me_plane = refdata + (filled + me_x) + (height - me_y) * width;
+                        me_plane = refdata + (filled + me_x) + (height - me_y) * stride;
                         for(j = 0; j < me_h; j++) {
                             for(i = 0; i < me_w; i++)
-                                dst[filled + i - (j * stride)] = me_plane[i - (j * width)];
+                                dst[filled + i - (j * stride)] = me_plane[i - (j * stride)];
                         }
                     }
                 }
@@ -198,6 +200,9 @@ static void qpeg_decode_inter(QpegContext *qctx, uint8_t *dst,
         } else if(code >= 0xC0) { /* copy code: 0xC0..0xDF */
             code &= 0x1F;
 
+            if(code + 1 > bytestream2_get_bytes_left(&qctx->buffer))
+                break;
+
             for(i = 0; i <= code; i++) {
                 dst[filled++] = bytestream2_get_byte(&qctx->buffer);
                 if(filled >= width) {
@@ -251,6 +256,7 @@ static int decode_frame(AVCodecContext *avctx,
     uint8_t ctable[128];
     QpegContext * const a = avctx->priv_data;
     AVFrame * const p = a->pic;
+    AVFrame * const ref = a->ref;
     uint8_t* outdata;
     int delta, ret;
     const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE, NULL);
@@ -261,10 +267,12 @@ static int decode_frame(AVCodecContext *avctx,
     }
 
     bytestream2_init(&a->buffer, avpkt->data, avpkt->size);
-    if ((ret = ff_reget_buffer(avctx, p)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+
+    av_frame_unref(ref);
+    av_frame_move_ref(ref, p);
+
+    if ((ret = ff_get_buffer(avctx, p, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
     outdata = p->data[0];
     bytestream2_skip(&a->buffer, 4);
     bytestream2_get_buffer(&a->buffer, ctable, 128);
@@ -274,7 +282,7 @@ static int decode_frame(AVCodecContext *avctx,
     if(delta == 0x10) {
         qpeg_decode_intra(a, outdata, p->linesize[0], avctx->width, avctx->height);
     } else {
-        qpeg_decode_inter(a, outdata, p->linesize[0], avctx->width, avctx->height, delta, ctable, a->refdata);
+        qpeg_decode_inter(a, outdata, p->linesize[0], avctx->width, avctx->height, delta, ctable, ref->data[0]);
     }
 
     /* make the palette available on the way out */
@@ -292,13 +300,25 @@ static int decode_frame(AVCodecContext *avctx,
     return avpkt->size;
 }
 
+static void decode_flush(AVCodecContext *avctx){
+    QpegContext * const a = avctx->priv_data;
+    int i, pal_size;
+    const uint8_t *pal_src;
+
+    pal_size = FFMIN(1024U, avctx->extradata_size);
+    pal_src = avctx->extradata + avctx->extradata_size - pal_size;
+
+    for (i=0; i<pal_size/4; i++)
+        a->pal[i] = 0xFFU<<24 | AV_RL32(pal_src+4*i);
+}
+
 static av_cold int decode_end(AVCodecContext *avctx)
 {
     QpegContext * const a = avctx->priv_data;
 
     av_frame_free(&a->pic);
+    av_frame_free(&a->ref);
 
-    av_free(a->refdata);
     return 0;
 }
 
@@ -307,10 +327,12 @@ static av_cold int decode_init(AVCodecContext *avctx){
 
     a->avctx = avctx;
     avctx->pix_fmt= AV_PIX_FMT_PAL8;
-    a->refdata = av_malloc(avctx->width * avctx->height);
+
+    decode_flush(avctx);
 
     a->pic = av_frame_alloc();
-    if (!a->pic) {
+    a->ref = av_frame_alloc();
+    if (!a->pic || !a->ref) {
         decode_end(avctx);
         return AVERROR(ENOMEM);
     }
@@ -327,5 +349,6 @@ AVCodec ff_qpeg_decoder = {
     .init           = decode_init,
     .close          = decode_end,
     .decode         = decode_frame,
+    .flush          = decode_flush,
     .capabilities   = CODEC_CAP_DR1,
 };
diff --git a/libavcodec/qpel_template.c b/libavcodec/qpel_template.c
index 2106160..e52a78c 100644
--- a/libavcodec/qpel_template.c
+++ b/libavcodec/qpel_template.c
@@ -1,20 +1,22 @@
 /*
  * quarterpel DSP function templates
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/qpeldsp.c b/libavcodec/qpeldsp.c
index 1d0422a..1c0ec63 100644
--- a/libavcodec/qpeldsp.c
+++ b/libavcodec/qpeldsp.c
@@ -1,20 +1,22 @@
 /*
  * quarterpel DSP functions
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,6 +32,7 @@
 #include "libavutil/attributes.h"
 #include "copy_block.h"
 #include "qpeldsp.h"
+#include "diracdsp.h"
 
 #define BIT_DEPTH 8
 #include "hpel_template.c"
@@ -732,6 +735,51 @@ void ff_put_pixels8_l2_8(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
 
 }
 
+#if CONFIG_DIRAC_DECODER
+#define DIRAC_MC(OPNAME)\
+void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+     OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_8_c(dst   , src[0]   , stride, h);\
+    OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l2_8(dst   , src[0]   , src[1]   , stride, stride, stride, h);\
+    OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l4_8(dst   , src[0]   , src[1]   , src[2]   , src[3]   , stride, stride, stride, stride, stride, h);\
+    OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
+}
+DIRAC_MC(put)
+DIRAC_MC(avg)
+#endif
+
 av_cold void ff_qpeldsp_init(QpelDSPContext *c)
 {
 #define dspfunc(PFX, IDX, NUM)                              \
diff --git a/libavcodec/qpeldsp.h b/libavcodec/qpeldsp.h
index 4ad141d..b51420a 100644
--- a/libavcodec/qpeldsp.h
+++ b/libavcodec/qpeldsp.h
@@ -1,20 +1,20 @@
 /*
  * quarterpel DSP functions
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
index 28f1720..4eeeea4 100644
--- a/libavcodec/qtrle.c
+++ b/libavcodec/qtrle.c
@@ -2,20 +2,20 @@
  * Quicktime Animation (RLE) Video Decoder
  * Copyright (C) 2004 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,17 +59,25 @@ static void qtrle_decode_1bpp(QtrleContext *s, int row_ptr, int lines_to_change)
     int rle_code;
     int pixel_ptr;
     int row_inc = s->frame->linesize[0];
-    unsigned char pi0, pi1;  /* 2 8-pixel values */
-    unsigned char *rgb = s->frame->data[0];
+    uint8_t pi0, pi1;  /* 2 8-pixel values */
+    uint8_t *rgb = s->frame->data[0];
     int pixel_limit = s->frame->linesize[0] * s->avctx->height;
     int skip;
+    /* skip & 0x80 appears to mean 'start a new line', which can be interpreted
+     * as 'go to next line' during the decoding of a frame but is 'go to first
+     * line' at the beginning. Since we always interpret it as 'go to next line'
+     * in the decoding loop (which makes code simpler/faster), the first line
+     * would not be counted, so we count one more.
+     * See: https://trac.ffmpeg.org/ticket/226
+     * In the following decoding loop, row_ptr will be the position of the
+     * current row. */
 
     row_ptr  -= row_inc;
     pixel_ptr = row_ptr;
     lines_to_change++;
     while (lines_to_change) {
         skip     =              bytestream2_get_byte(&s->g);
-        rle_code = (signed char)bytestream2_get_byte(&s->g);
+        rle_code = (int8_t)bytestream2_get_byte(&s->g);
         if (rle_code == 0)
             break;
         if(skip & 0x80) {
@@ -80,6 +88,9 @@ static void qtrle_decode_1bpp(QtrleContext *s, int row_ptr, int lines_to_change)
             pixel_ptr += 2 * skip;
         CHECK_PIXEL_PTR(0);  /* make sure pixel_ptr is positive */
 
+        if(rle_code == -1)
+            continue;
+
         if (rle_code < 0) {
             /* decode the run length code */
             rle_code = -rle_code;
@@ -99,8 +110,8 @@ static void qtrle_decode_1bpp(QtrleContext *s, int row_ptr, int lines_to_change)
             rle_code *= 2;
             CHECK_PIXEL_PTR(rle_code);
 
-            while (rle_code--)
-                rgb[pixel_ptr++] = bytestream2_get_byte(&s->g);
+            bytestream2_get_buffer(&s->g, &rgb[pixel_ptr], rle_code);
+            pixel_ptr += rle_code;
         }
     }
 }
@@ -111,8 +122,8 @@ static inline void qtrle_decode_2n4bpp(QtrleContext *s, int row_ptr,
     int rle_code, i;
     int pixel_ptr;
     int row_inc = s->frame->linesize[0];
-    unsigned char pi[16];  /* 16 palette indices */
-    unsigned char *rgb = s->frame->data[0];
+    uint8_t pi[16];  /* 16 palette indices */
+    uint8_t *rgb = s->frame->data[0];
     int pixel_limit = s->frame->linesize[0] * s->avctx->height;
     int num_pixels = (bpp == 4) ? 8 : 16;
 
@@ -120,7 +131,7 @@ static inline void qtrle_decode_2n4bpp(QtrleContext *s, int row_ptr,
         pixel_ptr = row_ptr + (num_pixels * (bytestream2_get_byte(&s->g) - 1));
         CHECK_PIXEL_PTR(0);
 
-        while ((rle_code = (signed char)bytestream2_get_byte(&s->g)) != -1) {
+        while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (num_pixels * (bytestream2_get_byte(&s->g) - 1));
@@ -136,8 +147,8 @@ static inline void qtrle_decode_2n4bpp(QtrleContext *s, int row_ptr,
                 }
                 CHECK_PIXEL_PTR(rle_code * num_pixels);
                 while (rle_code--) {
-                    for (i = 0; i < num_pixels; i++)
-                        rgb[pixel_ptr++] = pi[i];
+                    memcpy(&rgb[pixel_ptr], &pi, num_pixels);
+                    pixel_ptr += num_pixels;
                 }
             } else {
                 /* copy the same pixel directly to output 4 times */
@@ -167,15 +178,15 @@ static void qtrle_decode_8bpp(QtrleContext *s, int row_ptr, int lines_to_change)
     int rle_code;
     int pixel_ptr;
     int row_inc = s->frame->linesize[0];
-    unsigned char pi1, pi2, pi3, pi4;  /* 4 palette indexes */
-    unsigned char *rgb = s->frame->data[0];
+    uint8_t pi1, pi2, pi3, pi4;  /* 4 palette indexes */
+    uint8_t *rgb = s->frame->data[0];
     int pixel_limit = s->frame->linesize[0] * s->avctx->height;
 
     while (lines_to_change--) {
         pixel_ptr = row_ptr + (4 * (bytestream2_get_byte(&s->g) - 1));
         CHECK_PIXEL_PTR(0);
 
-        while ((rle_code = (signed char)bytestream2_get_byte(&s->g)) != -1) {
+        while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (4 * (bytestream2_get_byte(&s->g) - 1));
@@ -203,9 +214,8 @@ static void qtrle_decode_8bpp(QtrleContext *s, int row_ptr, int lines_to_change)
                 rle_code *= 4;
                 CHECK_PIXEL_PTR(rle_code);
 
-                while (rle_code--) {
-                    rgb[pixel_ptr++] = bytestream2_get_byte(&s->g);
-                }
+                bytestream2_get_buffer(&s->g, &rgb[pixel_ptr], rle_code);
+                pixel_ptr += rle_code;
             }
         }
         row_ptr += row_inc;
@@ -217,15 +227,15 @@ static void qtrle_decode_16bpp(QtrleContext *s, int row_ptr, int lines_to_change
     int rle_code;
     int pixel_ptr;
     int row_inc = s->frame->linesize[0];
-    unsigned short rgb16;
-    unsigned char *rgb = s->frame->data[0];
+    uint16_t rgb16;
+    uint8_t *rgb = s->frame->data[0];
     int pixel_limit = s->frame->linesize[0] * s->avctx->height;
 
     while (lines_to_change--) {
         pixel_ptr = row_ptr + (bytestream2_get_byte(&s->g) - 1) * 2;
         CHECK_PIXEL_PTR(0);
 
-        while ((rle_code = (signed char)bytestream2_get_byte(&s->g)) != -1) {
+        while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (bytestream2_get_byte(&s->g) - 1) * 2;
@@ -238,7 +248,7 @@ static void qtrle_decode_16bpp(QtrleContext *s, int row_ptr, int lines_to_change
                 CHECK_PIXEL_PTR(rle_code * 2);
 
                 while (rle_code--) {
-                    *(unsigned short *)(&rgb[pixel_ptr]) = rgb16;
+                    *(uint16_t *)(&rgb[pixel_ptr]) = rgb16;
                     pixel_ptr += 2;
                 }
             } else {
@@ -247,7 +257,7 @@ static void qtrle_decode_16bpp(QtrleContext *s, int row_ptr, int lines_to_change
                 /* copy pixels directly to output */
                 while (rle_code--) {
                     rgb16 = bytestream2_get_be16(&s->g);
-                    *(unsigned short *)(&rgb[pixel_ptr]) = rgb16;
+                    *(uint16_t *)(&rgb[pixel_ptr]) = rgb16;
                     pixel_ptr += 2;
                 }
             }
@@ -261,15 +271,15 @@ static void qtrle_decode_24bpp(QtrleContext *s, int row_ptr, int lines_to_change
     int rle_code;
     int pixel_ptr;
     int row_inc = s->frame->linesize[0];
-    unsigned char r, g, b;
-    unsigned char *rgb = s->frame->data[0];
+    uint8_t r, g, b;
+    uint8_t *rgb = s->frame->data[0];
     int pixel_limit = s->frame->linesize[0] * s->avctx->height;
 
     while (lines_to_change--) {
         pixel_ptr = row_ptr + (bytestream2_get_byte(&s->g) - 1) * 3;
         CHECK_PIXEL_PTR(0);
 
-        while ((rle_code = (signed char)bytestream2_get_byte(&s->g)) != -1) {
+        while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (bytestream2_get_byte(&s->g) - 1) * 3;
@@ -309,14 +319,14 @@ static void qtrle_decode_32bpp(QtrleContext *s, int row_ptr, int lines_to_change
     int pixel_ptr;
     int row_inc = s->frame->linesize[0];
     unsigned int argb;
-    unsigned char *rgb = s->frame->data[0];
+    uint8_t *rgb = s->frame->data[0];
     int pixel_limit = s->frame->linesize[0] * s->avctx->height;
 
     while (lines_to_change--) {
         pixel_ptr = row_ptr + (bytestream2_get_byte(&s->g) - 1) * 4;
         CHECK_PIXEL_PTR(0);
 
-        while ((rle_code = (signed char)bytestream2_get_byte(&s->g)) != -1) {
+        while ((rle_code = (int8_t)bytestream2_get_byte(&s->g)) != -1) {
             if (rle_code == 0) {
                 /* there's another skip code in the stream */
                 pixel_ptr += (bytestream2_get_byte(&s->g) - 1) * 4;
@@ -403,10 +413,8 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
     int ret;
 
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log (s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     /* check if this frame is even supposed to change */
     if (avpkt->size < 8)
@@ -426,6 +434,8 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
         bytestream2_skip(&s->g, 2);
         height     = bytestream2_get_be16(&s->g);
         bytestream2_skip(&s->g, 2);
+        if (height > s->avctx->height - start_line)
+            goto done;
     } else {
         start_line = 0;
         height     = s->avctx->height;
diff --git a/libavcodec/qtrleenc.c b/libavcodec/qtrleenc.c
index 7c98bea..d723188 100644
--- a/libavcodec/qtrleenc.c
+++ b/libavcodec/qtrleenc.c
@@ -5,20 +5,20 @@
  *
  * This file is based on flashsvenc.c.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,6 +39,7 @@ typedef struct QtrleEncContext {
     int pixel_size;
     AVPicture previous_frame;
     unsigned int max_buf_size;
+    int logical_width;
     /**
      * This array will contain at ith position the value of the best RLE code
      * if the line started at pixel i
@@ -75,13 +76,23 @@ static av_cold int qtrle_encode_end(AVCodecContext *avctx)
 static av_cold int qtrle_encode_init(AVCodecContext *avctx)
 {
     QtrleEncContext *s = avctx->priv_data;
+    int ret;
 
     if (av_image_check_size(avctx->width, avctx->height, 0, avctx) < 0) {
-        return -1;
+        return AVERROR(EINVAL);
     }
     s->avctx=avctx;
+    s->logical_width=avctx->width;
 
     switch (avctx->pix_fmt) {
+    case AV_PIX_FMT_GRAY8:
+        if (avctx->width % 4) {
+            av_log(avctx, AV_LOG_ERROR, "Width not being a multiple of 4 is not supported\n");
+            return AVERROR(EINVAL);
+        }
+        s->logical_width = avctx->width / 4;
+        s->pixel_size = 4;
+        break;
     case AV_PIX_FMT_RGB555BE:
         s->pixel_size = 2;
         break;
@@ -95,24 +106,24 @@ static av_cold int qtrle_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Unsupported colorspace.\n");
         break;
     }
-    avctx->bits_per_coded_sample = s->pixel_size*8;
+    avctx->bits_per_coded_sample = avctx->pix_fmt == AV_PIX_FMT_GRAY8 ? 40 : s->pixel_size*8;
 
-    s->rlecode_table = av_mallocz(s->avctx->width);
-    s->skip_table    = av_mallocz(s->avctx->width);
-    s->length_table  = av_mallocz((s->avctx->width + 1)*sizeof(int));
+    s->rlecode_table = av_mallocz(s->logical_width);
+    s->skip_table    = av_mallocz(s->logical_width);
+    s->length_table  = av_mallocz_array(s->logical_width + 1, sizeof(int));
     if (!s->skip_table || !s->length_table || !s->rlecode_table) {
         av_log(avctx, AV_LOG_ERROR, "Error allocating memory.\n");
-        return -1;
+        return AVERROR(ENOMEM);
     }
-    if (avpicture_alloc(&s->previous_frame, avctx->pix_fmt, avctx->width, avctx->height) < 0) {
+    if ((ret = avpicture_alloc(&s->previous_frame, avctx->pix_fmt, avctx->width, avctx->height)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error allocating picture\n");
-        return -1;
+        return ret;
     }
 
-    s->max_buf_size = s->avctx->width*s->avctx->height*s->pixel_size*2 /* image base material */
-                      + 15                                           /* header + footer */
-                      + s->avctx->height*2                           /* skip code+rle end */
-                      + s->avctx->width/MAX_RLE_BULK + 1             /* rle codes */;
+    s->max_buf_size = s->logical_width*s->avctx->height*s->pixel_size*2 /* image base material */
+                      + 15                                            /* header + footer */
+                      + s->avctx->height*2                            /* skip code+rle end */
+                      + s->logical_width/MAX_RLE_BULK + 1             /* rle codes */;
 
     avctx->coded_frame = av_frame_alloc();
     if (!avctx->coded_frame) {
@@ -128,26 +139,26 @@ static av_cold int qtrle_encode_init(AVCodecContext *avctx)
  */
 static void qtrle_encode_line(QtrleEncContext *s, const AVFrame *p, int line, uint8_t **buf)
 {
-    int width=s->avctx->width;
+    int width=s->logical_width;
     int i;
     signed char rlecode;
 
-    /* We will use it to compute the best bulk copy sequence */
-    unsigned int bulkcount;
     /* This will be the number of pixels equal to the preivous frame one's
      * starting from the ith pixel */
     unsigned int skipcount;
     /* This will be the number of consecutive equal pixels in the current
      * frame, starting from the ith one also */
-    unsigned int repeatcount;
+    unsigned int av_uninit(repeatcount);
 
     /* The cost of the three different possibilities */
-    int total_bulk_cost;
     int total_skip_cost;
     int total_repeat_cost;
 
-    int temp_cost;
-    int j;
+    int base_bulk_cost;
+    int lowest_bulk_cost;
+    int lowest_bulk_cost_index;
+    int sec_lowest_bulk_cost;
+    int sec_lowest_bulk_cost_index;
 
     uint8_t *this_line = p->               data[0] + line*p->               linesize[0] +
         (width - 1)*s->pixel_size;
@@ -157,8 +168,57 @@ static void qtrle_encode_line(QtrleEncContext *s, const AVFrame *p, int line, ui
     s->length_table[width] = 0;
     skipcount = 0;
 
+    /* Initial values */
+    lowest_bulk_cost = INT_MAX / 2;
+    lowest_bulk_cost_index = width;
+    sec_lowest_bulk_cost = INT_MAX / 2;
+    sec_lowest_bulk_cost_index = width;
+
+    base_bulk_cost = 1 + s->pixel_size;
+
     for (i = width - 1; i >= 0; i--) {
 
+        int prev_bulk_cost;
+
+        /* If our lowest bulk cost index is too far away, replace it
+         * with the next lowest bulk cost */
+        if (FFMIN(width, i + MAX_RLE_BULK) < lowest_bulk_cost_index) {
+            lowest_bulk_cost = sec_lowest_bulk_cost;
+            lowest_bulk_cost_index = sec_lowest_bulk_cost_index;
+
+            sec_lowest_bulk_cost = INT_MAX / 2;
+            sec_lowest_bulk_cost_index = width;
+        }
+
+        /* Deal with the first pixel's bulk cost */
+        if (!i) {
+            base_bulk_cost++;
+            lowest_bulk_cost++;
+            sec_lowest_bulk_cost++;
+        }
+
+        /* Look at the bulk cost of the previous loop and see if it is
+         * a new lower bulk cost */
+        prev_bulk_cost = s->length_table[i + 1] + base_bulk_cost;
+        if (prev_bulk_cost <= sec_lowest_bulk_cost) {
+            /* If it's lower than the 2nd lowest, then it may be lower
+             * than the lowest */
+            if (prev_bulk_cost <= lowest_bulk_cost) {
+
+                /* If we have found a new lowest bulk cost,
+                 * then the 2nd lowest bulk cost is now farther than the
+                 * lowest bulk cost, and will never be used */
+                sec_lowest_bulk_cost = INT_MAX / 2;
+
+                lowest_bulk_cost = prev_bulk_cost;
+                lowest_bulk_cost_index = i + 1;
+            } else {
+                /* Then it must be the 2nd lowest bulk cost */
+                sec_lowest_bulk_cost = prev_bulk_cost;
+                sec_lowest_bulk_cost_index = i + 1;
+            }
+        }
+
         if (!s->avctx->coded_frame->key_frame && !memcmp(this_line, prev_line, s->pixel_size))
             skipcount = FFMIN(skipcount + 1, MAX_RLE_SKIP);
         else
@@ -194,31 +254,22 @@ static void qtrle_encode_line(QtrleEncContext *s, const AVFrame *p, int line, ui
         }
         else {
             /* We cannot do neither skip nor repeat
-             * thus we search for the best bulk copy to do */
-
-            int limit = FFMIN(width - i, MAX_RLE_BULK);
+             * thus we use the best bulk copy  */
 
-            temp_cost = 1 + s->pixel_size + !i;
-            total_bulk_cost = INT_MAX;
+            s->length_table[i]  = lowest_bulk_cost;
+            s->rlecode_table[i] = lowest_bulk_cost_index - i;
 
-            for (j = 1; j <= limit; j++) {
-                if (s->length_table[i + j] + temp_cost < total_bulk_cost) {
-                    /* We have found a better bulk copy ... */
-                    total_bulk_cost = s->length_table[i + j] + temp_cost;
-                    bulkcount = j;
-                }
-                temp_cost += s->pixel_size;
-            }
-
-            s->length_table[i]  = total_bulk_cost;
-            s->rlecode_table[i] = bulkcount;
         }
 
+        /* These bulk costs increase every iteration */
+        lowest_bulk_cost += s->pixel_size;
+        sec_lowest_bulk_cost += s->pixel_size;
+
         this_line -= s->pixel_size;
         prev_line -= s->pixel_size;
     }
 
-    /* Good ! Now we have the best sequence for this line, let's ouput it */
+    /* Good ! Now we have the best sequence for this line, let's output it */
 
     /* We do a special case for the first pixel so that we avoid testing it in
      * the whole loop */
@@ -243,12 +294,28 @@ static void qtrle_encode_line(QtrleEncContext *s, const AVFrame *p, int line, ui
         }
         else if (rlecode > 0) {
             /* bulk copy */
-            bytestream_put_buffer(buf, this_line + i*s->pixel_size, rlecode*s->pixel_size);
+            if (s->avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
+                int j;
+                // QT grayscale colorspace has 0=white and 255=black, we will
+                // ignore the palette that is included in the AVFrame because
+                // AV_PIX_FMT_GRAY8 has defined color mapping
+                for (j = 0; j < rlecode*s->pixel_size; ++j)
+                    bytestream_put_byte(buf, *(this_line + i*s->pixel_size + j) ^ 0xff);
+            } else {
+                bytestream_put_buffer(buf, this_line + i*s->pixel_size, rlecode*s->pixel_size);
+            }
             i += rlecode;
         }
         else {
             /* repeat the bits */
-            bytestream_put_buffer(buf, this_line + i*s->pixel_size, s->pixel_size);
+            if (s->avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
+                int j;
+                // QT grayscale colorspace has 0=white and 255=black, ...
+                for (j = 0; j < s->pixel_size; ++j)
+                    bytestream_put_byte(buf, *(this_line + i*s->pixel_size + j) ^ 0xff);
+            } else {
+                bytestream_put_buffer(buf, this_line + i*s->pixel_size, s->pixel_size);
+            }
             i -= rlecode;
         }
     }
@@ -264,7 +331,7 @@ static int encode_frame(QtrleEncContext *s, const AVFrame *p, uint8_t *buf)
     uint8_t *orig_buf = buf;
 
     if (!s->avctx->coded_frame->key_frame) {
-        unsigned line_size = s->avctx->width * s->pixel_size;
+        unsigned line_size = s->logical_width * s->pixel_size;
         for (start_line = 0; start_line < s->avctx->height; start_line++)
             if (memcmp(p->data[0] + start_line*p->linesize[0],
                        s->previous_frame.data[0] + start_line*s->previous_frame.linesize[0],
@@ -304,11 +371,8 @@ static int qtrle_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     AVFrame * const p = avctx->coded_frame;
     int ret;
 
-    if ((ret = ff_alloc_packet(pkt, s->max_buf_size)) < 0) {
-        /* Upper bound check for compressed data */
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", s->max_buf_size);
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->max_buf_size)) < 0)
         return ret;
-    }
 
     if (avctx->gop_size == 0 || (s->avctx->frame_number % avctx->gop_size) == 0) {
         /* I-Frame */
@@ -343,6 +407,6 @@ AVCodec ff_qtrle_encoder = {
     .encode2        = qtrle_encode_frame,
     .close          = qtrle_encode_end,
     .pix_fmts       = (const enum AVPixelFormat[]){
-        AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB555BE, AV_PIX_FMT_ARGB, AV_PIX_FMT_NONE
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB555BE, AV_PIX_FMT_ARGB, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE
     },
 };
diff --git a/libavcodec/r210dec.c b/libavcodec/r210dec.c
index 6adaac0..cbebf7c 100644
--- a/libavcodec/r210dec.c
+++ b/libavcodec/r210dec.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Doeffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,7 +39,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     int h, w, ret;
     AVFrame *pic = data;
     const uint32_t *src = (const uint32_t *)avpkt->data;
-    int aligned_width = FFALIGN(avctx->width, 64);
+    int aligned_width = FFALIGN(avctx->width,
+                                avctx->codec_id == AV_CODEC_ID_R10K ? 1 : 64);
     uint8_t *dst_line;
 
     if (avpkt->size < 4 * aligned_width * avctx->height) {
@@ -57,8 +58,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     for (h = 0; h < avctx->height; h++) {
         uint16_t *dst = (uint16_t *)dst_line;
         for (w = 0; w < avctx->width; w++) {
-            uint32_t pixel = av_be2ne32(*src++);
+            uint32_t pixel;
             uint16_t r, g, b;
+            if (avctx->codec_id==AV_CODEC_ID_AVRP) {
+                pixel = av_le2ne32(*src++);
+            } else {
+                pixel = av_be2ne32(*src++);
+            }
             if (avctx->codec_id==AV_CODEC_ID_R210) {
                 b =  pixel <<  6;
                 g = (pixel >>  4) & 0xffc0;
@@ -103,3 +109,14 @@ AVCodec ff_r10k_decoder = {
     .capabilities   = CODEC_CAP_DR1,
 };
 #endif
+#if CONFIG_AVRP_DECODER
+AVCodec ff_avrp_decoder = {
+    .name           = "avrp",
+    .long_name      = NULL_IF_CONFIG_SMALL("Avid 1:1 10-bit RGB Packer"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AVRP,
+    .init           = decode_init,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+};
+#endif
diff --git a/libavcodec/r210enc.c b/libavcodec/r210enc.c
new file mode 100644
index 0000000..d61cd75
--- /dev/null
+++ b/libavcodec/r210enc.c
@@ -0,0 +1,123 @@
+/*
+ * R210 encoder
+ *
+ * Copyright (c) 2012 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "bytestream.h"
+
+static av_cold int encode_init(AVCodecContext *avctx)
+{
+    avctx->coded_frame = av_frame_alloc();
+
+    if (!avctx->coded_frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                        const AVFrame *pic, int *got_packet)
+{
+    int i, j, ret;
+    int aligned_width = FFALIGN(avctx->width,
+                                avctx->codec_id == AV_CODEC_ID_R10K ? 1 : 64);
+    int pad = (aligned_width - avctx->width) * 4;
+    uint8_t *src_line;
+    uint8_t *dst;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, 4 * aligned_width * avctx->height)) < 0)
+        return ret;
+
+    avctx->coded_frame->key_frame = 1;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+    src_line = pic->data[0];
+    dst = pkt->data;
+
+    for (i = 0; i < avctx->height; i++) {
+        uint16_t *src = (uint16_t *)src_line;
+        for (j = 0; j < avctx->width; j++) {
+            uint32_t pixel;
+            uint16_t r = *src++ >> 6;
+            uint16_t g = *src++ >> 6;
+            uint16_t b = *src++ >> 4;
+            if (avctx->codec_id == AV_CODEC_ID_R210)
+                pixel = (r << 20) | (g << 10) | b >> 2;
+            else
+                pixel = (r << 22) | (g << 12) | b;
+            if (avctx->codec_id == AV_CODEC_ID_AVRP)
+                bytestream_put_le32(&dst, pixel);
+            else
+                bytestream_put_be32(&dst, pixel);
+        }
+        memset(dst, 0, pad);
+        dst += pad;
+        src_line += pic->linesize[0];
+    }
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+#if CONFIG_R210_ENCODER
+AVCodec ff_r210_encoder = {
+    .name           = "r210",
+    .long_name      = NULL_IF_CONFIG_SMALL("Uncompressed RGB 10-bit"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_R210,
+    .init           = encode_init,
+    .encode2        = encode_frame,
+    .close          = encode_close,
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+};
+#endif
+#if CONFIG_R10K_ENCODER
+AVCodec ff_r10k_encoder = {
+    .name           = "r10k",
+    .long_name      = NULL_IF_CONFIG_SMALL("AJA Kona 10-bit RGB Codec"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_R10K,
+    .init           = encode_init,
+    .encode2        = encode_frame,
+    .close          = encode_close,
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+};
+#endif
+#if CONFIG_AVRP_ENCODER
+AVCodec ff_avrp_encoder = {
+    .name           = "avrp",
+    .long_name      = NULL_IF_CONFIG_SMALL("Avid 1:1 10-bit RGB Packer"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_AVRP,
+    .init           = encode_init,
+    .encode2        = encode_frame,
+    .close          = encode_close,
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_RGB48, AV_PIX_FMT_NONE },
+};
+#endif
diff --git a/libavcodec/ra144.c b/libavcodec/ra144.c
index 355badd..d5ad02f 100644
--- a/libavcodec/ra144.c
+++ b/libavcodec/ra144.c
@@ -2,20 +2,20 @@
  * Real Audio 1.0 (14.4K)
  * Copyright (c) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -1566,8 +1566,15 @@ int ff_eval_refl(int *refl, const int16_t *coefs, AVCodecContext *avctx)
         if (!b)
             b = -2;
 
-        for (j=0; j <= i; j++)
-            bp1[j] = ((bp2[j] - ((refl[i+1] * bp2[i-j]) >> 12)) * (0x1000000 / b)) >> 12;
+        b = 0x1000000 / b;
+        for (j=0; j <= i; j++) {
+#if CONFIG_FTRAPV
+            int a = bp2[j] - ((refl[i+1] * bp2[i-j]) >> 12);
+            if((int)(a*(unsigned)b) != a*(int64_t)b)
+                return 1;
+#endif
+            bp1[j] = ((bp2[j] - ((refl[i+1] * bp2[i-j]) >> 12)) * b) >> 12;
+        }
 
         if ((unsigned) bp1[i] + 0x1000 > 0x1fff)
             return 1;
@@ -1674,12 +1681,9 @@ unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy)
 }
 
 /** inverse root mean square */
-int ff_irms(const int16_t *data)
+int ff_irms(AudioDSPContext *adsp, const int16_t *data)
 {
-    unsigned int i, sum = 0;
-
-    for (i=0; i < BLOCKSIZE; i++)
-        sum += data[i] * data[i];
+    unsigned int sum = adsp->scalarproduct_int16(data, data, BLOCKSIZE);
 
     if (sum == 0)
         return 0; /* OOPS - division by zero */
@@ -1687,18 +1691,17 @@ int ff_irms(const int16_t *data)
     return 0x20000000 / (ff_t_sqrt(sum) >> 8);
 }
 
-void ff_subblock_synthesis(RA144Context *ractx, const uint16_t *lpc_coefs,
+void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
                            int cba_idx, int cb1_idx, int cb2_idx,
                            int gval, int gain)
 {
-    uint16_t buffer_a[BLOCKSIZE];
-    uint16_t *block;
+    int16_t *block;
     int m[3];
 
     if (cba_idx) {
         cba_idx += BLOCKSIZE/2 - 1;
-        ff_copy_and_dup(buffer_a, ractx->adapt_cb, cba_idx);
-        m[0] = (ff_irms(buffer_a) * gval) >> 12;
+        ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx);
+        m[0] = (ff_irms(&ractx->adsp, ractx->buffer_a) * gval) >> 12;
     } else {
         m[0] = 0;
     }
@@ -1709,7 +1712,7 @@ void ff_subblock_synthesis(RA144Context *ractx, const uint16_t *lpc_coefs,
 
     block = ractx->adapt_cb + BUFFERSIZE - BLOCKSIZE;
 
-    add_wav(block, gain, cba_idx, m, cba_idx? buffer_a: NULL,
+    add_wav(block, gain, cba_idx, m, cba_idx? ractx->buffer_a: NULL,
             ff_cb1_vects[cb1_idx], ff_cb2_vects[cb2_idx]);
 
     memcpy(ractx->curr_sblock, ractx->curr_sblock + BLOCKSIZE,
diff --git a/libavcodec/ra144.h b/libavcodec/ra144.h
index 81d6964..c1ceb87 100644
--- a/libavcodec/ra144.h
+++ b/libavcodec/ra144.h
@@ -2,20 +2,20 @@
  * Real Audio 1.0 (14.4K)
  * Copyright (c) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,16 +25,18 @@
 #include <stdint.h>
 #include "lpc.h"
 #include "audio_frame_queue.h"
+#include "audiodsp.h"
 
 #define NBLOCKS         4       ///< number of subblocks within a block
 #define BLOCKSIZE       40      ///< subblock size in 16-bit words
 #define BUFFERSIZE      146     ///< the size of the adaptive codebook
 #define FIXED_CB_SIZE   128     ///< size of fixed codebooks
-#define FRAMESIZE       20      ///< size of encoded frame
+#define FRAME_SIZE      20      ///< size of encoded frame
 #define LPC_ORDER       10      ///< order of LPC filter
 
 typedef struct RA144Context {
     AVCodecContext *avctx;
+    AudioDSPContext adsp;
     LPCContext lpc_ctx;
     AudioFrameQueue afq;
     int last_frame;
@@ -56,7 +58,9 @@ typedef struct RA144Context {
 
     /** Adaptive codebook, its size is two units bigger to avoid a
      *  buffer overflow. */
-    uint16_t adapt_cb[146+2];
+    int16_t adapt_cb[146+2];
+
+    DECLARE_ALIGNED(16, int16_t, buffer_a)[FFALIGN(BLOCKSIZE,16)];
 } RA144Context;
 
 void ff_copy_and_dup(int16_t *target, const int16_t *source, int offset);
@@ -68,8 +72,8 @@ unsigned int ff_rms(const int *data);
 int ff_interp(RA144Context *ractx, int16_t *out, int a, int copyold,
               int energy);
 unsigned int ff_rescale_rms(unsigned int rms, unsigned int energy);
-int ff_irms(const int16_t *data);
-void ff_subblock_synthesis(RA144Context *ractx, const uint16_t *lpc_coefs,
+int ff_irms(AudioDSPContext *adsp, const int16_t *data/*align 16*/);
+void ff_subblock_synthesis(RA144Context *ractx, const int16_t *lpc_coefs,
                            int cba_idx, int cb1_idx, int cb2_idx,
                            int gval, int gain);
 
diff --git a/libavcodec/ra144dec.c b/libavcodec/ra144dec.c
index 3be3877..29c7822 100644
--- a/libavcodec/ra144dec.c
+++ b/libavcodec/ra144dec.c
@@ -5,20 +5,20 @@
  * Copyright (c) 2003 Nick Kurshev
  *     Based on public domain decoder at http://www.honeypot.net/audio
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,7 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx)
     RA144Context *ractx = avctx->priv_data;
 
     ractx->avctx = avctx;
+    ff_audiodsp_init(&ractx->adsp);
 
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
     ractx->lpc_coef[1] = ractx->lpc_tables[1];
@@ -45,7 +46,7 @@ static av_cold int ra144_decode_init(AVCodecContext * avctx)
     return 0;
 }
 
-static void do_output_subblock(RA144Context *ractx, const uint16_t  *lpc_coefs,
+static void do_output_subblock(RA144Context *ractx, const int16_t  *lpc_coefs,
                                int gval, GetBitContext *gb)
 {
     int cba_idx = get_bits(gb, 7); // index of the adaptive CB, 0 if none
@@ -66,7 +67,7 @@ static int ra144_decode_frame(AVCodecContext * avctx, void *data,
     int buf_size = avpkt->size;
     static const uint8_t sizes[LPC_ORDER] = {6, 5, 5, 4, 4, 3, 3, 3, 3, 2};
     unsigned int refl_rms[NBLOCKS];           // RMS of the reflection coefficients
-    uint16_t block_coefs[NBLOCKS][LPC_ORDER]; // LPC coefficients of each sub-block
+    int16_t block_coefs[NBLOCKS][LPC_ORDER];  // LPC coefficients of each sub-block
     unsigned int lpc_refl[LPC_ORDER];         // LPC reflection coefficients of the frame
     int i, j;
     int ret;
@@ -76,7 +77,7 @@ static int ra144_decode_frame(AVCodecContext * avctx, void *data,
     RA144Context *ractx = avctx->priv_data;
     GetBitContext gb;
 
-    if (buf_size < FRAMESIZE) {
+    if (buf_size < FRAME_SIZE) {
         av_log(avctx, AV_LOG_ERROR,
                "Frame too small (%d bytes). Truncated file?\n", buf_size);
         *got_frame_ptr = 0;
@@ -85,13 +86,11 @@ static int ra144_decode_frame(AVCodecContext * avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = NBLOCKS * BLOCKSIZE;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (int16_t *)frame->data[0];
 
-    init_get_bits(&gb, buf, FRAMESIZE * 8);
+    init_get_bits8(&gb, buf, FRAME_SIZE);
 
     for (i = 0; i < LPC_ORDER; i++)
         lpc_refl[i] = ff_lpc_refl_cb[i][get_bits(&gb, sizes[i])];
@@ -124,7 +123,7 @@ static int ra144_decode_frame(AVCodecContext * avctx, void *data,
 
     *got_frame_ptr = 1;
 
-    return FRAMESIZE;
+    return FRAME_SIZE;
 }
 
 AVCodec ff_ra_144_decoder = {
diff --git a/libavcodec/ra144enc.c b/libavcodec/ra144enc.c
index 7627adc..499c41a 100644
--- a/libavcodec/ra144enc.c
+++ b/libavcodec/ra144enc.c
@@ -2,20 +2,20 @@
  * Real Audio 1.0 (14.4K) encoder
  * Copyright (c) 2010 Francesco Lavra <francescolavra@interfree.it>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,7 +35,6 @@
 #include "put_bits.h"
 #include "ra144.h"
 
-
 static av_cold int ra144_encode_close(AVCodecContext *avctx)
 {
     RA144Context *ractx = avctx->priv_data;
@@ -62,6 +61,7 @@ static av_cold int ra144_encode_init(AVCodecContext * avctx)
     ractx->lpc_coef[0] = ractx->lpc_tables[0];
     ractx->lpc_coef[1] = ractx->lpc_tables[1];
     ractx->avctx = avctx;
+    ff_audiodsp_init(&ractx->adsp);
     ret = ff_lpc_init(&ractx->lpc_ctx, avctx->frame_size, LPC_ORDER,
                       FF_LPC_TYPE_LEVINSON);
     if (ret < 0)
@@ -198,8 +198,8 @@ static void create_adapt_vect(float *vect, const int16_t *cb, int lag)
 static int adaptive_cb_search(const int16_t *adapt_cb, float *work,
                               const float *coefs, float *data)
 {
-    int i, best_vect;
-    float score, gain, best_score, best_gain;
+    int i, av_uninit(best_vect);
+    float score, gain, best_score, av_uninit(best_gain);
     float exc[BLOCKSIZE];
 
     gain = best_score = 0;
@@ -335,9 +335,9 @@ static void ra144_encode_subblock(RA144Context *ractx,
     float data[BLOCKSIZE] = { 0 }, work[LPC_ORDER + BLOCKSIZE];
     float coefs[LPC_ORDER];
     float zero[BLOCKSIZE], cba[BLOCKSIZE], cb1[BLOCKSIZE], cb2[BLOCKSIZE];
-    int16_t cba_vect[BLOCKSIZE];
     int cba_idx, cb1_idx, cb2_idx, gain;
-    int i, n, m[3];
+    int i, n;
+    unsigned m[3];
     float g[3];
     float error, best_error;
 
@@ -373,8 +373,8 @@ static void ra144_encode_subblock(RA144Context *ractx,
          */
         memcpy(cba, work + LPC_ORDER, sizeof(cba));
 
-        ff_copy_and_dup(cba_vect, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
-        m[0] = (ff_irms(cba_vect) * rms) >> 12;
+        ff_copy_and_dup(ractx->buffer_a, ractx->adapt_cb, cba_idx + BLOCKSIZE / 2 - 1);
+        m[0] = (ff_irms(&ractx->adsp, ractx->buffer_a) * rms) >> 12;
     }
     fixed_cb_search(work + LPC_ORDER, coefs, data, cba_idx, &cb1_idx, &cb2_idx);
     for (i = 0; i < BLOCKSIZE; i++) {
@@ -447,10 +447,8 @@ static int ra144_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     if (ractx->last_frame)
         return 0;
 
-    if ((ret = ff_alloc_packet(avpkt, FRAMESIZE))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, FRAME_SIZE)) < 0)
         return ret;
-    }
 
     /**
      * Since the LPC coefficients are calculated on a frame centered over the
@@ -538,7 +536,7 @@ static int ra144_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     ff_af_queue_remove(&ractx->afq, avctx->frame_size, &avpkt->pts,
                        &avpkt->duration);
 
-    avpkt->size = FRAMESIZE;
+    avpkt->size = FRAME_SIZE;
     *got_packet_ptr = 1;
     return 0;
 }
@@ -556,4 +554,6 @@ AVCodec ff_ra_144_encoder = {
     .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_SMALL_LAST_FRAME,
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
                                                      AV_SAMPLE_FMT_NONE },
+    .supported_samplerates = (const int[]){ 8000, 0 },
+    .channel_layouts = (const uint64_t[]) { AV_CH_LAYOUT_MONO, 0 },
 };
diff --git a/libavcodec/ra288.c b/libavcodec/ra288.c
index 215786c..c1b9b6b 100644
--- a/libavcodec/ra288.c
+++ b/libavcodec/ra288.c
@@ -2,20 +2,20 @@
  * RealAudio 2.0 (28.8K)
  * Copyright (c) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -67,6 +67,11 @@ static av_cold int ra288_decode_init(AVCodecContext *avctx)
     avctx->channel_layout = AV_CH_LAYOUT_MONO;
     avctx->sample_fmt     = AV_SAMPLE_FMT_FLT;
 
+    if (avctx->block_align <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported block align\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
     avpriv_float_dsp_init(&ractx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
 
     return 0;
@@ -104,14 +109,14 @@ static void decode(RA288Context *ractx, float gain, int cb_coef)
     for (i=0; i < 5; i++)
         buffer[i] = codetable[cb_coef][i] * sumsum;
 
-    sum = avpriv_scalarproduct_float_c(buffer, buffer, 5) * ((1 << 24) / 5.0);
+    sum = avpriv_scalarproduct_float_c(buffer, buffer, 5);
 
-    sum = FFMAX(sum, 1);
+    sum = FFMAX(sum, 5.0 / (1<<24));
 
     /* shift and store */
     memmove(gain_block, gain_block + 1, 9 * sizeof(*gain_block));
 
-    gain_block[9] = 10 * log10(sum) - 32;
+    gain_block[9] = 10 * log10(sum) + (10*log10(((1<<24)/5.)) - 32);
 
     ff_celp_lp_synthesis_filterf(block, ractx->sp_lpc, buffer, 5, 36);
 }
@@ -139,6 +144,8 @@ static void do_hybrid_window(RA288Context *ractx,
                                             MAX_BACKWARD_FILTER_LEN   +
                                             MAX_BACKWARD_FILTER_NONREC, 16)]);
 
+    av_assert2(order>=0);
+
     ractx->fdsp.vector_fmul(work, window, hist, FFALIGN(order + n + non_rec, 16));
 
     convolve(buffer1, work + order    , n      , order);
@@ -191,13 +198,11 @@ static int ra288_decode_frame(AVCodecContext * avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = RA288_BLOCK_SIZE * RA288_BLOCKS_PER_FRAME;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     out = (float *)frame->data[0];
 
-    init_get_bits(&gb, buf, avctx->block_align * 8);
+    init_get_bits8(&gb, buf, avctx->block_align);
 
     for (i=0; i < RA288_BLOCKS_PER_FRAME; i++) {
         float gain = amptable[get_bits(&gb, 3)];
diff --git a/libavcodec/ra288.h b/libavcodec/ra288.h
index 9f4beeb..7323eea 100644
--- a/libavcodec/ra288.h
+++ b/libavcodec/ra288.h
@@ -2,20 +2,20 @@
  * RealAudio 2.0 (28.8K)
  * Copyright (c) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/ralf.c b/libavcodec/ralf.c
index ebcdf6f..8a319ac 100644
--- a/libavcodec/ralf.c
+++ b/libavcodec/ralf.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -461,10 +461,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame_ptr,
     }
 
     frame->nb_samples = ctx->max_frame_size;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Me fail get_buffer()? That's unpossible!\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples0 = (int16_t *)frame->data[0];
     samples1 = (int16_t *)frame->data[1];
 
diff --git a/libavcodec/ralfdata.h b/libavcodec/ralfdata.h
index 83eb970..9a84e45 100644
--- a/libavcodec/ralfdata.h
+++ b/libavcodec/ralfdata.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rangecoder.c b/libavcodec/rangecoder.c
index af0a8c0..69150a5 100644
--- a/libavcodec/rangecoder.c
+++ b/libavcodec/rangecoder.c
@@ -2,20 +2,20 @@
  * Range coder
  * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,7 @@
 #include <string.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "rangecoder.h"
 #include "bytestream.h"
@@ -55,7 +56,7 @@ av_cold void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf,
     /* cast to avoid compiler warning */
     ff_init_range_encoder(c, (uint8_t *)buf, buf_size);
 
-    c->low = bytestream_get_be16(&c->bytestream);
+    c->low = bytestream_get_be16((const uint8_t **)&c->bytestream);
 }
 
 void ff_build_rac_states(RangeCoder *c, int factor, int max_p)
@@ -107,8 +108,8 @@ int ff_rac_terminate(RangeCoder *c)
     c->range = 0xFF;
     renorm_encoder(c);
 
-    assert(c->low == 0);
-    assert(c->range >= 0x100);
+    av_assert1(c->low   == 0);
+    av_assert1(c->range >= 0x100);
 
     return c->bytestream - c->bytestream_start;
 }
diff --git a/libavcodec/rangecoder.h b/libavcodec/rangecoder.h
index ad9c81f..88e555f 100644
--- a/libavcodec/rangecoder.h
+++ b/libavcodec/rangecoder.h
@@ -2,20 +2,20 @@
  * Range coder
  * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,9 +28,9 @@
 #define AVCODEC_RANGECODER_H
 
 #include <stdint.h>
-#include <assert.h>
 
 #include "libavutil/common.h"
+#include "libavutil/avassert.h"
 
 typedef struct RangeCoder {
     int low;
@@ -86,9 +86,9 @@ static inline void put_rac(RangeCoder *c, uint8_t *const state, int bit)
 {
     int range1 = (c->range * (*state)) >> 8;
 
-    assert(*state);
-    assert(range1 < c->range);
-    assert(range1 > 0);
+    av_assert2(*state);
+    av_assert2(range1 < c->range);
+    av_assert2(range1 > 0);
     if (!bit) {
         c->range -= range1;
         *state    = c->zero_state[*state];
diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c
index 70d9787..72b3f44 100644
--- a/libavcodec/ratecontrol.c
+++ b/libavcodec/ratecontrol.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,7 +47,7 @@ void ff_write_pass1_stats(MpegEncContext *s)
 {
     snprintf(s->avctx->stats_out, 256,
              "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d "
-             "fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d hbits:%d;\n",
+             "fcode:%d bcode:%d mc-var:%"PRId64" var:%"PRId64" icount:%d skipcount:%d hbits:%d;\n",
              s->current_picture_ptr->f->display_picture_number,
              s->current_picture_ptr->f->coded_picture_number,
              s->pict_type,
@@ -64,6 +64,11 @@ void ff_write_pass1_stats(MpegEncContext *s)
              s->header_bits);
 }
 
+static double get_fps(AVCodecContext *avctx)
+{
+    return 1.0 / av_q2d(avctx->time_base) / FFMAX(avctx->ticks_per_frame, 1);
+}
+
 static inline double qp2bits(RateControlEntry *rce, double qp)
 {
     if (qp <= 0.0) {
@@ -125,6 +130,13 @@ av_cold int ff_rate_control_init(MpegEncContext *s)
     };
     emms_c();
 
+    if (!s->avctx->rc_max_available_vbv_use && s->avctx->rc_buffer_size) {
+        if (s->avctx->rc_max_rate) {
+            s->avctx->rc_max_available_vbv_use = av_clipf(s->avctx->rc_max_rate/(s->avctx->rc_buffer_size*get_fps(s->avctx)), 1.0/3, 1.0);
+        } else
+            s->avctx->rc_max_available_vbv_use = 1.0;
+    }
+
     res = av_expr_parse(&rcc->rc_eq_eval,
                         s->avctx->rc_eq ? s->avctx->rc_eq : "tex^qComp",
                         const_names, func1_names, func1,
@@ -148,6 +160,8 @@ av_cold int ff_rate_control_init(MpegEncContext *s)
         rcc->last_qscale_for[i] = FF_QP2LAMBDA * 5;
     }
     rcc->buffer_index = s->avctx->rc_initial_buffer_occupancy;
+    if (!rcc->buffer_index)
+        rcc->buffer_index = s->avctx->rc_buffer_size * 3 / 4;
 
     if (s->flags & CODEC_FLAG_PASS2) {
         int i;
@@ -193,7 +207,7 @@ av_cold int ff_rate_control_init(MpegEncContext *s)
             assert(picture_number < rcc->num_entries);
             rce = &rcc->entry[picture_number];
 
-            e += sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d hbits:%d",
+            e += sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%"SCNd64" var:%"SCNd64" icount:%d skipcount:%d hbits:%d",
                         &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits,
                         &rce->mv_bits, &rce->misc_bits,
                         &rce->f_code, &rce->b_code,
@@ -276,7 +290,7 @@ av_cold int ff_rate_control_init(MpegEncContext *s)
                 get_qscale(s, &rce, rcc->pass1_wanted_bits / rcc->pass1_rc_eq_output_sum, i);
 
                 // FIXME misbehaves a little for variable fps
-                rcc->pass1_wanted_bits += s->bit_rate / (1 / av_q2d(s->avctx->time_base));
+                rcc->pass1_wanted_bits += s->bit_rate / get_fps(s->avctx);
             }
         }
     }
@@ -301,7 +315,7 @@ av_cold void ff_rate_control_uninit(MpegEncContext *s)
 int ff_vbv_update(MpegEncContext *s, int frame_size)
 {
     RateControlContext *rcc = &s->rc_context;
-    const double fps        = 1 / av_q2d(s->avctx->time_base);
+    const double fps        = get_fps(s->avctx);
     const int buffer_size   = s->avctx->rc_buffer_size;
     const double min_rate   = s->avctx->rc_min_rate / fps;
     const double max_rate   = s->avctx->rc_max_rate / fps;
@@ -315,6 +329,9 @@ int ff_vbv_update(MpegEncContext *s, int frame_size)
         rcc->buffer_index -= frame_size;
         if (rcc->buffer_index < 0) {
             av_log(s->avctx, AV_LOG_ERROR, "rc buffer underflow\n");
+            if (frame_size > max_rate && s->qscale == s->avctx->qmax) {
+                av_log(s->avctx, AV_LOG_ERROR, "max bitrate possibly too small or try trellis with large lmax or increase qmax\n");
+            }
             rcc->buffer_index = 0;
         }
 
@@ -491,7 +508,7 @@ static double modify_qscale(MpegEncContext *s, RateControlEntry *rce,
 {
     RateControlContext *rcc  = &s->rc_context;
     const double buffer_size = s->avctx->rc_buffer_size;
-    const double fps         = 1 / av_q2d(s->avctx->time_base);
+    const double fps         = get_fps(s->avctx);
     const double min_rate    = s->avctx->rc_min_rate / fps;
     const double max_rate    = s->avctx->rc_max_rate / fps;
     const int pict_type      = rce->new_pict_type;
@@ -737,7 +754,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
     RateControlEntry local_rce, *rce;
     double bits;
     double rate_factor;
-    int var;
+    int64_t var;
     const int pict_type = s->pict_type;
     Picture * const pic = &s->current_picture;
     emms_c();
@@ -750,19 +767,25 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
 
     get_qminmax(&qmin, &qmax, s, pict_type);
 
-    fps = 1 / av_q2d(s->avctx->time_base);
+    fps = get_fps(s->avctx);
     /* update predictors */
     if (picture_number > 2 && !dry_run) {
-        const int last_var = s->last_pict_type == AV_PICTURE_TYPE_I ? rcc->last_mb_var_sum
-                                                                    : rcc->last_mc_mb_var_sum;
+        const int64_t last_var =
+            s->last_pict_type == AV_PICTURE_TYPE_I ? rcc->last_mb_var_sum
+                                                   : rcc->last_mc_mb_var_sum;
+        av_assert1(s->frame_bits >= s->stuffing_bits);
         update_predictor(&rcc->pred[s->last_pict_type],
                          rcc->last_qscale,
-                         sqrt(last_var), s->frame_bits);
+                         sqrt(last_var),
+                         s->frame_bits - s->stuffing_bits);
     }
 
     if (s->flags & CODEC_FLAG_PASS2) {
         assert(picture_number >= 0);
-        assert(picture_number < rcc->num_entries);
+        if (picture_number >= rcc->num_entries) {
+            av_log(s, AV_LOG_ERROR, "Input is longer than 2-pass log file\n");
+            return -1;
+        }
         rce         = &rcc->entry[picture_number];
         wanted_bits = rce->expected_bits;
     } else {
@@ -796,7 +819,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
             assert(pict_type == rce->new_pict_type);
 
         q = rce->new_qscale / br_compensation;
-        av_dlog(s, "%f %f %f last:%d var:%d type:%d//\n", q, rce->new_qscale,
+        av_dlog(s, "%f %f %f last:%d var:%"PRId64" type:%d//\n", q, rce->new_qscale,
                 br_compensation, s->frame_bits, var, pict_type);
     } else {
         rce->pict_type     =
@@ -825,7 +848,6 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
         rcc->mv_bits_sum[pict_type] += rce->mv_bits;
         rcc->frame_count[pict_type]++;
 
-        bits        = rce->i_tex_bits + rce->p_tex_bits;
         rate_factor = rcc->pass1_wanted_bits /
                       rcc->pass1_rc_eq_output_sum * br_compensation;
 
@@ -858,7 +880,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
     if (s->avctx->debug & FF_DEBUG_RC) {
         av_log(s->avctx, AV_LOG_DEBUG,
                "%c qp:%d<%2.1f<%d %d want:%d total:%d comp:%f st_q:%2.2f "
-               "size:%d var:%d/%d br:%d fps:%d\n",
+               "size:%d var:%"PRId64"/%"PRId64" br:%d fps:%d\n",
                av_get_picture_type_char(pict_type),
                qmin, q, qmax, picture_number,
                (int)wanted_bits / 1000, (int)s->total_bits / 1000,
@@ -893,7 +915,7 @@ static int init_pass2(MpegEncContext *s)
     RateControlContext *rcc = &s->rc_context;
     AVCodecContext *a       = s->avctx;
     int i, toobig;
-    double fps             = 1 / av_q2d(s->avctx->time_base);
+    double fps             = get_fps(s->avctx);
     double complexity[5]   = { 0 }; // approximate bits at quant=1
     uint64_t const_bits[5] = { 0 }; // quantizer independent bits
     uint64_t all_const_bits;
@@ -902,7 +924,7 @@ static int init_pass2(MpegEncContext *s)
     double rate_factor          = 0;
     double step;
     const int filter_size = (int)(a->qblur * 4) | 1;
-    double expected_bits;
+    double expected_bits = 0; // init to silence gcc warning
     double *qscale, *blurred_qscale, qscale_sum;
 
     /* find complexity & const_bits & decide the pict_types */
@@ -929,8 +951,8 @@ static int init_pass2(MpegEncContext *s)
         return -1;
     }
 
-    qscale         = av_malloc(sizeof(double) * rcc->num_entries);
-    blurred_qscale = av_malloc(sizeof(double) * rcc->num_entries);
+    qscale         = av_malloc_array(rcc->num_entries, sizeof(double));
+    blurred_qscale = av_malloc_array(rcc->num_entries, sizeof(double));
     toobig = 0;
 
     for (step = 256 * 256; step > 0.0000001; step *= 0.5) {
@@ -949,6 +971,12 @@ static int init_pass2(MpegEncContext *s)
         assert(filter_size % 2 == 1);
 
         /* fixed I/B QP relative to P mode */
+        for (i = FFMAX(0, rcc->num_entries - 300); i < rcc->num_entries; i++) {
+            RateControlEntry *rce = &rcc->entry[i];
+
+            qscale[i] = get_diff_limited_q(s, rce, qscale[i]);
+        }
+
         for (i = rcc->num_entries - 1; i >= 0; i--) {
             RateControlEntry *rce = &rcc->entry[i];
 
diff --git a/libavcodec/ratecontrol.h b/libavcodec/ratecontrol.h
index 63ebeb2..eeb4bb9 100644
--- a/libavcodec/ratecontrol.h
+++ b/libavcodec/ratecontrol.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -49,8 +49,8 @@ typedef struct RateControlEntry{
     uint64_t expected_bits;
     int new_pict_type;
     float new_qscale;
-    int mc_mb_var_sum;
-    int mb_var_sum;
+    int64_t mc_mb_var_sum;
+    int64_t mb_var_sum;
     int i_count;
     int skip_count;
     int f_code;
@@ -71,8 +71,8 @@ typedef struct RateControlContext{
     double pass1_wanted_bits;     ///< bits which should have been outputed by the pass1 code (including complexity init)
     double last_qscale;
     double last_qscale_for[5];    ///< last qscale for a specific pict type, used for max_diff & ipb factor stuff
-    int last_mc_mb_var_sum;
-    int last_mb_var_sum;
+    int64_t last_mc_mb_var_sum;
+    int64_t last_mb_var_sum;
     uint64_t i_cplx_sum[5];
     uint64_t p_cplx_sum[5];
     uint64_t mv_bits_sum[5];
diff --git a/libavcodec/raw.c b/libavcodec/raw.c
index 541ef7a..56c4a6b 100644
--- a/libavcodec/raw.c
+++ b/libavcodec/raw.c
@@ -2,20 +2,20 @@
  * Raw Video Codec
  * Copyright (c) 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -65,6 +65,7 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'u', 'p') },
     { AV_PIX_FMT_UYVY422, MKTAG('V', 'D', 'T', 'Z') }, /* SoftLab-NSK VideoTizer */
     { AV_PIX_FMT_UYVY422, MKTAG('a', 'u', 'v', '2') },
+    { AV_PIX_FMT_UYVY422, MKTAG('c', 'y', 'u', 'v') }, /* CYUV is also Creative YUV */
     { AV_PIX_FMT_UYYVYY411, MKTAG('Y', '4', '1', '1') },
     { AV_PIX_FMT_GRAY8,   MKTAG('G', 'R', 'E', 'Y') },
     { AV_PIX_FMT_NV12,    MKTAG('N', 'V', '1', '2') },
@@ -83,14 +84,18 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_BGR444LE, MKTAG('B', 'G', 'R', 12) },
     { AV_PIX_FMT_RGB444BE, MKTAG(12 , 'B', 'G', 'R') },
     { AV_PIX_FMT_BGR444BE, MKTAG(12 , 'R', 'G', 'B') },
-    { AV_PIX_FMT_RGBA,     MKTAG('R', 'G', 'B', 'A') },
-    { AV_PIX_FMT_BGRA,     MKTAG('B', 'G', 'R', 'A') },
     { AV_PIX_FMT_RGBA64LE, MKTAG('R', 'B', 'A', 64 ) },
     { AV_PIX_FMT_BGRA64LE, MKTAG('B', 'R', 'A', 64 ) },
     { AV_PIX_FMT_RGBA64BE, MKTAG(64 , 'R', 'B', 'A') },
     { AV_PIX_FMT_BGRA64BE, MKTAG(64 , 'B', 'R', 'A') },
+    { AV_PIX_FMT_RGBA,     MKTAG('R', 'G', 'B', 'A') },
+    { AV_PIX_FMT_RGB0,     MKTAG('R', 'G', 'B',  0 ) },
+    { AV_PIX_FMT_BGRA,     MKTAG('B', 'G', 'R', 'A') },
+    { AV_PIX_FMT_BGR0,     MKTAG('B', 'G', 'R',  0 ) },
     { AV_PIX_FMT_ABGR,     MKTAG('A', 'B', 'G', 'R') },
+    { AV_PIX_FMT_0BGR,     MKTAG( 0 , 'B', 'G', 'R') },
     { AV_PIX_FMT_ARGB,     MKTAG('A', 'R', 'G', 'B') },
+    { AV_PIX_FMT_0RGB,     MKTAG( 0 , 'R', 'G', 'B') },
     { AV_PIX_FMT_RGB24,    MKTAG('R', 'G', 'B', 24 ) },
     { AV_PIX_FMT_BGR24,    MKTAG('B', 'G', 'R', 24 ) },
     { AV_PIX_FMT_YUV411P,  MKTAG('4', '1', '1', 'P') },
@@ -120,6 +125,18 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_YUV422P10BE, MKTAG(10 , 10 , '3', 'Y') },
     { AV_PIX_FMT_YUV444P10LE, MKTAG('Y', '3',  0 , 10 ) },
     { AV_PIX_FMT_YUV444P10BE, MKTAG(10 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P12LE, MKTAG('Y', '3', 11 , 12 ) },
+    { AV_PIX_FMT_YUV420P12BE, MKTAG(12 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P12LE, MKTAG('Y', '3', 10 , 12 ) },
+    { AV_PIX_FMT_YUV422P12BE, MKTAG(12 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P12LE, MKTAG('Y', '3',  0 , 12 ) },
+    { AV_PIX_FMT_YUV444P12BE, MKTAG(12 ,  0 , '3', 'Y') },
+    { AV_PIX_FMT_YUV420P14LE, MKTAG('Y', '3', 11 , 14 ) },
+    { AV_PIX_FMT_YUV420P14BE, MKTAG(14 , 11 , '3', 'Y') },
+    { AV_PIX_FMT_YUV422P14LE, MKTAG('Y', '3', 10 , 14 ) },
+    { AV_PIX_FMT_YUV422P14BE, MKTAG(14 , 10 , '3', 'Y') },
+    { AV_PIX_FMT_YUV444P14LE, MKTAG('Y', '3',  0 , 14 ) },
+    { AV_PIX_FMT_YUV444P14BE, MKTAG(14 ,  0 , '3', 'Y') },
     { AV_PIX_FMT_YUV420P16LE, MKTAG('Y', '3', 11 , 16 ) },
     { AV_PIX_FMT_YUV420P16BE, MKTAG(16 , 11 , '3', 'Y') },
     { AV_PIX_FMT_YUV422P16LE, MKTAG('Y', '3', 10 , 16 ) },
@@ -127,6 +144,8 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_YUV444P16LE, MKTAG('Y', '3',  0 , 16 ) },
     { AV_PIX_FMT_YUV444P16BE, MKTAG(16 ,  0 , '3', 'Y') },
     { AV_PIX_FMT_YUVA420P,    MKTAG('Y', '4', 11 ,  8 ) },
+    { AV_PIX_FMT_YUVA422P,    MKTAG('Y', '4', 10 ,  8 ) },
+    { AV_PIX_FMT_YUVA444P,    MKTAG('Y', '4',  0 ,  8 ) },
     { AV_PIX_FMT_YA8,         MKTAG('Y', '2',  0 ,  8 ) },
 
     { AV_PIX_FMT_YUVA420P9LE,  MKTAG('Y', '4', 11 ,  9 ) },
@@ -148,10 +167,41 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_YUVA444P16LE, MKTAG('Y', '4',  0 , 16 ) },
     { AV_PIX_FMT_YUVA444P16BE, MKTAG(16 ,  0 , '4', 'Y') },
 
+    { AV_PIX_FMT_GBRP,         MKTAG('G', '3', 00 ,  8 ) },
+    { AV_PIX_FMT_GBRP9LE,      MKTAG('G', '3', 00 ,  9 ) },
+    { AV_PIX_FMT_GBRP9BE,      MKTAG( 9 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP10LE,     MKTAG('G', '3', 00 , 10 ) },
+    { AV_PIX_FMT_GBRP10BE,     MKTAG(10 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP12LE,     MKTAG('G', '3', 00 , 12 ) },
+    { AV_PIX_FMT_GBRP12BE,     MKTAG(12 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP14LE,     MKTAG('G', '3', 00 , 14 ) },
+    { AV_PIX_FMT_GBRP14BE,     MKTAG(14 , 00 , '3', 'G') },
+    { AV_PIX_FMT_GBRP16LE,     MKTAG('G', '3', 00 , 16 ) },
+    { AV_PIX_FMT_GBRP16BE,     MKTAG(16 , 00 , '3', 'G') },
+
+    { AV_PIX_FMT_XYZ12LE,      MKTAG('X', 'Y', 'Z' , 36 ) },
+    { AV_PIX_FMT_XYZ12BE,      MKTAG(36 , 'Z' , 'Y', 'X') },
+
+    { AV_PIX_FMT_BAYER_BGGR8,    MKTAG(0xBA, 'B', 'G', 8   ) },
+    { AV_PIX_FMT_BAYER_BGGR16LE, MKTAG(0xBA, 'B', 'G', 16  ) },
+    { AV_PIX_FMT_BAYER_BGGR16BE, MKTAG(16  , 'G', 'B', 0xBA) },
+    { AV_PIX_FMT_BAYER_RGGB8,    MKTAG(0xBA, 'R', 'G', 8   ) },
+    { AV_PIX_FMT_BAYER_RGGB16LE, MKTAG(0xBA, 'R', 'G', 16  ) },
+    { AV_PIX_FMT_BAYER_RGGB16BE, MKTAG(16  , 'G', 'R', 0xBA) },
+    { AV_PIX_FMT_BAYER_GBRG8,    MKTAG(0xBA, 'G', 'B', 8   ) },
+    { AV_PIX_FMT_BAYER_GBRG16LE, MKTAG(0xBA, 'G', 'B', 16  ) },
+    { AV_PIX_FMT_BAYER_GBRG16BE, MKTAG(16,   'B', 'G', 0xBA) },
+    { AV_PIX_FMT_BAYER_GRBG8,    MKTAG(0xBA, 'G', 'R', 8   ) },
+    { AV_PIX_FMT_BAYER_GRBG16LE, MKTAG(0xBA, 'G', 'R', 16  ) },
+    { AV_PIX_FMT_BAYER_GRBG16BE, MKTAG(16,   'R', 'G', 0xBA) },
+
     /* quicktime */
+    { AV_PIX_FMT_YUV420P, MKTAG('R', '4', '2', '0') }, /* Radius DV YUV PAL */
+    { AV_PIX_FMT_YUV411P, MKTAG('R', '4', '1', '1') }, /* Radius DV YUV NTSC */
     { AV_PIX_FMT_UYVY422, MKTAG('2', 'v', 'u', 'y') },
     { AV_PIX_FMT_UYVY422, MKTAG('2', 'V', 'u', 'y') },
     { AV_PIX_FMT_UYVY422, MKTAG('A', 'V', 'U', 'I') }, /* FIXME merge both fields */
+    { AV_PIX_FMT_UYVY422, MKTAG('b', 'x', 'y', 'v') },
     { AV_PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', '2') },
     { AV_PIX_FMT_YUYV422, MKTAG('y', 'u', 'v', 's') },
     { AV_PIX_FMT_YUYV422, MKTAG('D', 'V', 'O', 'O') }, /* Digital Voodoo SD 8 Bit */
@@ -159,8 +209,10 @@ const PixelFormatTag ff_raw_pix_fmt_tags[] = {
     { AV_PIX_FMT_RGB565LE,MKTAG('L', '5', '6', '5') },
     { AV_PIX_FMT_RGB565BE,MKTAG('B', '5', '6', '5') },
     { AV_PIX_FMT_BGR24,   MKTAG('2', '4', 'B', 'G') },
+    { AV_PIX_FMT_BGR24,   MKTAG('b', 'x', 'b', 'g') },
     { AV_PIX_FMT_BGRA,    MKTAG('B', 'G', 'R', 'A') },
     { AV_PIX_FMT_RGBA,    MKTAG('R', 'G', 'B', 'A') },
+    { AV_PIX_FMT_RGB24,   MKTAG('b', 'x', 'r', 'g') },
     { AV_PIX_FMT_ABGR,    MKTAG('A', 'B', 'G', 'R') },
     { AV_PIX_FMT_GRAY16BE,MKTAG('b', '1', '6', 'g') },
     { AV_PIX_FMT_RGB48BE, MKTAG('b', '4', '8', 'r') },
@@ -182,3 +234,28 @@ unsigned int avcodec_pix_fmt_to_codec_tag(enum AVPixelFormat fmt)
     }
     return 0;
 }
+
+const PixelFormatTag avpriv_pix_fmt_bps_avi[] = {
+    { AV_PIX_FMT_MONOWHITE, 1 },
+    { AV_PIX_FMT_PAL8,    2 },
+    { AV_PIX_FMT_PAL8,    4 },
+    { AV_PIX_FMT_PAL8,    8 },
+    { AV_PIX_FMT_RGB444LE, 12 },
+    { AV_PIX_FMT_RGB555LE, 15 },
+    { AV_PIX_FMT_RGB555LE, 16 },
+    { AV_PIX_FMT_BGR24,  24 },
+    { AV_PIX_FMT_BGRA,   32 },
+    { AV_PIX_FMT_NONE,    0 },
+};
+
+const PixelFormatTag avpriv_pix_fmt_bps_mov[] = {
+    { AV_PIX_FMT_MONOWHITE, 1 },
+    { AV_PIX_FMT_PAL8,      2 },
+    { AV_PIX_FMT_PAL8,      4 },
+    { AV_PIX_FMT_PAL8,      8 },
+    { AV_PIX_FMT_RGB555BE, 16 },
+    { AV_PIX_FMT_RGB24,    24 },
+    { AV_PIX_FMT_ARGB,     32 },
+    { AV_PIX_FMT_MONOWHITE,33 },
+    { AV_PIX_FMT_NONE,      0 },
+};
diff --git a/libavcodec/raw.h b/libavcodec/raw.h
index bf66671..a417993 100644
--- a/libavcodec/raw.h
+++ b/libavcodec/raw.h
@@ -2,20 +2,20 @@
  * Raw Video Codec
  * Copyright (c) 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,12 +28,20 @@
 #define AVCODEC_RAW_H
 
 #include "avcodec.h"
+#include "libavutil/internal.h"
 
 typedef struct PixelFormatTag {
     enum AVPixelFormat pix_fmt;
     unsigned int fourcc;
 } PixelFormatTag;
 
-extern const PixelFormatTag ff_raw_pix_fmt_tags[];
+extern av_export const PixelFormatTag ff_raw_pix_fmt_tags[];
+#if LIBAVCODEC_VERSION_MAJOR < 55
+enum AVPixelFormat ff_find_pix_fmt(const PixelFormatTag *tags, unsigned int fourcc);
+#endif
+enum AVPixelFormat avpriv_find_pix_fmt(const PixelFormatTag *tags, unsigned int fourcc);
+
+extern av_export const PixelFormatTag avpriv_pix_fmt_bps_avi[];
+extern av_export const PixelFormatTag avpriv_pix_fmt_bps_mov[];
 
 #endif /* AVCODEC_RAW_H */
diff --git a/libavcodec/rawdec.c b/libavcodec/rawdec.c
index 3b69e49..ee1f397 100644
--- a/libavcodec/rawdec.c
+++ b/libavcodec/rawdec.c
@@ -2,20 +2,20 @@
  * Raw Video Decoder
  * Copyright (c) 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,72 +25,68 @@
  */
 
 #include "avcodec.h"
+#include "bswapdsp.h"
+#include "get_bits.h"
 #include "internal.h"
 #include "raw.h"
+#include "libavutil/avassert.h"
 #include "libavutil/buffer.h"
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
 
 typedef struct RawVideoContext {
+    AVClass *av_class;
     AVBufferRef *palette;
     int frame_size;  /* size of the frame in bytes */
     int flip;
     int is_2_4_bpp; // 2 or 4 bpp raw in avi/mov
     int is_yuv2;
+    int is_lt_16bpp; // 16bpp pixfmt and bits_per_coded_sample < 16
+    int tff;
+
+    BswapDSPContext bbdsp;
+    void *bitstream_buf;
+    unsigned int bitstream_buf_size;
 } RawVideoContext;
 
-static const PixelFormatTag pix_fmt_bps_avi[] = {
-    { AV_PIX_FMT_PAL8,    4 },
-    { AV_PIX_FMT_PAL8,    8 },
-    { AV_PIX_FMT_RGB444, 12 },
-    { AV_PIX_FMT_RGB555, 15 },
-    { AV_PIX_FMT_RGB555, 16 },
-    { AV_PIX_FMT_BGR24,  24 },
-    { AV_PIX_FMT_RGB32,  32 },
-    { AV_PIX_FMT_NONE,    0 },
+static const AVOption options[]={
+{"top", "top field first", offsetof(RawVideoContext, tff), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 1, AV_OPT_FLAG_DECODING_PARAM|AV_OPT_FLAG_VIDEO_PARAM},
+{NULL}
 };
 
-static const PixelFormatTag pix_fmt_bps_mov[] = {
-    { AV_PIX_FMT_MONOWHITE, 1 },
-    { AV_PIX_FMT_PAL8,      2 },
-    { AV_PIX_FMT_PAL8,      4 },
-    { AV_PIX_FMT_PAL8,      8 },
-    // FIXME swscale does not support 16 bit in .mov, sample 16bit.mov
-    // http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html
-    { AV_PIX_FMT_RGB555BE, 16 },
-    { AV_PIX_FMT_RGB24,    24 },
-    { AV_PIX_FMT_ARGB,     32 },
-    { AV_PIX_FMT_MONOWHITE,33 },
-    { AV_PIX_FMT_NONE,      0 },
+static const AVClass rawdec_class = {
+    .class_name = "rawdec",
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
 };
 
-static enum AVPixelFormat find_pix_fmt(const PixelFormatTag *tags,
-                                       unsigned int fourcc)
+#if LIBAVCODEC_VERSION_MAJOR < 55
+enum AVPixelFormat ff_find_pix_fmt(const PixelFormatTag *tags, unsigned int fourcc)
 {
-    while (tags->pix_fmt >= 0) {
-        if (tags->fourcc == fourcc)
-            return tags->pix_fmt;
-        tags++;
-    }
-    return AV_PIX_FMT_YUV420P;
+    return avpriv_find_pix_fmt(tags, fourcc);
 }
+#endif
 
 static av_cold int raw_init_decoder(AVCodecContext *avctx)
 {
     RawVideoContext *context = avctx->priv_data;
     const AVPixFmtDescriptor *desc;
 
-    if (avctx->codec_tag == MKTAG('r', 'a', 'w', ' '))
-        avctx->pix_fmt = find_pix_fmt(pix_fmt_bps_mov,
+    ff_bswapdsp_init(&context->bbdsp);
+
+    if (   avctx->codec_tag == MKTAG('r','a','w',' ')
+        || avctx->codec_tag == MKTAG('N','O','1','6'))
+        avctx->pix_fmt = avpriv_find_pix_fmt(avpriv_pix_fmt_bps_mov,
                                       avctx->bits_per_coded_sample);
     else if (avctx->codec_tag == MKTAG('W', 'R', 'A', 'W'))
-        avctx->pix_fmt = find_pix_fmt(pix_fmt_bps_avi,
+        avctx->pix_fmt = avpriv_find_pix_fmt(avpriv_pix_fmt_bps_avi,
                                       avctx->bits_per_coded_sample);
-    else if (avctx->codec_tag)
-        avctx->pix_fmt = find_pix_fmt(ff_raw_pix_fmt_tags, avctx->codec_tag);
+    else if (avctx->codec_tag && (avctx->codec_tag & 0xFFFFFF) != MKTAG('B','I','T', 0))
+        avctx->pix_fmt = avpriv_find_pix_fmt(ff_raw_pix_fmt_tags, avctx->codec_tag);
     else if (avctx->pix_fmt == AV_PIX_FMT_NONE && avctx->bits_per_coded_sample)
-        avctx->pix_fmt = find_pix_fmt(pix_fmt_bps_avi,
+        avctx->pix_fmt = avpriv_find_pix_fmt(avpriv_pix_fmt_bps_avi,
                                       avctx->bits_per_coded_sample);
 
     desc = av_pix_fmt_desc_get(avctx->pix_fmt);
@@ -109,15 +105,22 @@ static av_cold int raw_init_decoder(AVCodecContext *avctx)
             memset(context->palette->data, 0, AVPALETTE_SIZE);
     }
 
-    context->frame_size = avpicture_get_size(avctx->pix_fmt, avctx->width,
-                                             avctx->height);
     if ((avctx->bits_per_coded_sample == 4 || avctx->bits_per_coded_sample == 2) &&
         avctx->pix_fmt == AV_PIX_FMT_PAL8 &&
-       (!avctx->codec_tag || avctx->codec_tag == MKTAG('r','a','w',' ')))
+       (!avctx->codec_tag || avctx->codec_tag == MKTAG('r','a','w',' '))) {
         context->is_2_4_bpp = 1;
+        context->frame_size = avpicture_get_size(avctx->pix_fmt,
+                                                 FFALIGN(avctx->width, 16),
+                                                 avctx->height);
+    } else {
+        context->is_lt_16bpp = av_get_bits_per_pixel(desc) == 16 && avctx->bits_per_coded_sample && avctx->bits_per_coded_sample < 16;
+        context->frame_size = avpicture_get_size(avctx->pix_fmt, avctx->width,
+                                                 avctx->height);
+    }
 
     if ((avctx->extradata_size >= 9 &&
          !memcmp(avctx->extradata + avctx->extradata_size - 9, "BottomUp", 9)) ||
+        avctx->codec_tag == MKTAG('c','y','u','v') ||
         avctx->codec_tag == MKTAG(3, 0, 0, 0) ||
         avctx->codec_tag == MKTAG('W','R','A','W'))
         context->flip = 1;
@@ -135,6 +138,34 @@ static void flip(AVCodecContext *avctx, AVPicture *picture)
     picture->linesize[0] *= -1;
 }
 
+/*
+ * Scale sample to 16-bit resolution
+ */
+#define SCALE16(x, bits) (((x) << (16 - (bits))) | ((x) >> (2 * (bits) - 16)))
+
+/**
+ * Scale buffer to 16 bits per coded sample resolution
+ */
+#define MKSCALE16(name, r16, w16) \
+static void name(AVCodecContext *avctx, uint8_t * dst, const uint8_t *buf, int buf_size, int packed) \
+{ \
+    int i; \
+    if (!packed) { \
+        for (i = 0; i + 1 < buf_size; i += 2) \
+            w16(dst + i, SCALE16(r16(buf + i), avctx->bits_per_coded_sample)); \
+    } else { \
+        GetBitContext gb; \
+        init_get_bits(&gb, buf, buf_size * 8); \
+        for (i = 0; i < avctx->width * avctx->height; i++) { \
+            int sample = get_bits(&gb, avctx->bits_per_coded_sample); \
+            w16(dst + i*2, SCALE16(sample, avctx->bits_per_coded_sample)); \
+        } \
+   } \
+}
+
+MKSCALE16(scale16be, AV_RB16, AV_WB16)
+MKSCALE16(scale16le, AV_RL16, AV_WL16)
+
 static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
                       AVPacket *avpkt)
 {
@@ -142,8 +173,9 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
     RawVideoContext *context       = avctx->priv_data;
     const uint8_t *buf             = avpkt->data;
     int buf_size                   = avpkt->size;
-    int need_copy                  = !avpkt->buf || context->is_2_4_bpp || context->is_yuv2;
-    int res;
+    int linesize_align             = 4;
+    int res, len;
+    int need_copy                  = !avpkt->buf || context->is_2_4_bpp || context->is_yuv2 || context->is_lt_16bpp;
 
     AVFrame   *frame   = data;
     AVPicture *picture = data;
@@ -155,12 +187,19 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
     if (res < 0)
         return res;
 
-    if (buf_size < context->frame_size - (avctx->pix_fmt == AV_PIX_FMT_PAL8 ?
-                                          AVPALETTE_SIZE : 0))
-        return -1;
+    av_frame_set_pkt_pos     (frame, avctx->internal->pkt->pos);
+    av_frame_set_pkt_duration(frame, avctx->internal->pkt->duration);
+
+    if (context->tff >= 0) {
+        frame->interlaced_frame = 1;
+        frame->top_field_first  = context->tff;
+    }
+
+    if ((res = av_image_check_size(avctx->width, avctx->height, 0, avctx)) < 0)
+        return res;
 
     if (need_copy)
-        frame->buf[0] = av_buffer_alloc(context->frame_size);
+        frame->buf[0] = av_buffer_alloc(FFMAX(context->frame_size, buf_size));
     else
         frame->buf[0] = av_buffer_ref(avpkt->buf);
     if (!frame->buf[0])
@@ -172,21 +211,48 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
         uint8_t *dst = frame->buf[0]->data;
         buf_size = context->frame_size - AVPALETTE_SIZE;
         if (avctx->bits_per_coded_sample == 4) {
-            for (i = 0; 2 * i + 1 < buf_size; i++) {
+            for (i = 0; 2 * i + 1 < buf_size && i<avpkt->size; i++) {
                 dst[2 * i + 0] = buf[i] >> 4;
                 dst[2 * i + 1] = buf[i] & 15;
             }
+            linesize_align = 8;
         } else {
-            for (i = 0; 4 * i + 3 < buf_size; i++) {
+            av_assert0(avctx->bits_per_coded_sample == 2);
+            for (i = 0; 4 * i + 3 < buf_size && i<avpkt->size; i++) {
                 dst[4 * i + 0] = buf[i] >> 6;
                 dst[4 * i + 1] = buf[i] >> 4 & 3;
                 dst[4 * i + 2] = buf[i] >> 2 & 3;
                 dst[4 * i + 3] = buf[i]      & 3;
             }
+            linesize_align = 16;
         }
         buf = dst;
+    } else if (context->is_lt_16bpp) {
+        uint8_t *dst = frame->buf[0]->data;
+        int packed = (avctx->codec_tag & 0xFFFFFF) == MKTAG('B','I','T', 0);
+        int swap   =  avctx->codec_tag >> 24;
+
+        if (packed && swap) {
+            av_fast_padded_malloc(&context->bitstream_buf, &context->bitstream_buf_size, buf_size);
+            if (!context->bitstream_buf)
+                return AVERROR(ENOMEM);
+            if (swap == 16)
+                context->bbdsp.bswap16_buf(context->bitstream_buf, (const uint16_t*)buf, buf_size / 2);
+            else if (swap == 32)
+                context->bbdsp.bswap_buf(context->bitstream_buf, (const uint32_t*)buf, buf_size / 4);
+            else
+                return AVERROR_INVALIDDATA;
+            buf = context->bitstream_buf;
+        }
+
+        if (desc->flags & AV_PIX_FMT_FLAG_BE)
+            scale16be(avctx, dst, buf, buf_size, packed);
+        else
+            scale16le(avctx, dst, buf, buf_size, packed);
+
+        buf = dst;
     } else if (need_copy) {
-        memcpy(frame->buf[0]->data, buf, FFMIN(buf_size, context->frame_size));
+        memcpy(frame->buf[0]->data, buf, buf_size);
         buf = frame->buf[0]->data;
     }
 
@@ -194,9 +260,18 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
         avctx->codec_tag == MKTAG('A', 'V', 'u', 'p'))
         buf += buf_size - context->frame_size;
 
+    len = context->frame_size - (avctx->pix_fmt==AV_PIX_FMT_PAL8 ? AVPALETTE_SIZE : 0);
+    if (buf_size < len && (avctx->codec_tag & 0xFFFFFF) != MKTAG('B','I','T', 0)) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid buffer size, packet size %d < expected frame_size %d\n", buf_size, len);
+        av_buffer_unref(&frame->buf[0]);
+        return AVERROR(EINVAL);
+    }
+
     if ((res = avpicture_fill(picture, buf, avctx->pix_fmt,
-                              avctx->width, avctx->height)) < 0)
+                              avctx->width, avctx->height)) < 0) {
+        av_buffer_unref(&frame->buf[0]);
         return res;
+    }
 
     if (avctx->pix_fmt == AV_PIX_FMT_PAL8) {
         const uint8_t *pal = av_packet_get_side_data(avpkt, AV_PKT_DATA_PALETTE,
@@ -205,20 +280,44 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
         if (pal) {
             av_buffer_unref(&context->palette);
             context->palette = av_buffer_alloc(AVPALETTE_SIZE);
-            if (!context->palette)
+            if (!context->palette) {
+                av_buffer_unref(&frame->buf[0]);
                 return AVERROR(ENOMEM);
+            }
             memcpy(context->palette->data, pal, AVPALETTE_SIZE);
             frame->palette_has_changed = 1;
         }
     }
 
+    if ((avctx->pix_fmt==AV_PIX_FMT_BGR24    ||
+        avctx->pix_fmt==AV_PIX_FMT_GRAY8    ||
+        avctx->pix_fmt==AV_PIX_FMT_RGB555LE ||
+        avctx->pix_fmt==AV_PIX_FMT_RGB555BE ||
+        avctx->pix_fmt==AV_PIX_FMT_RGB565LE ||
+        avctx->pix_fmt==AV_PIX_FMT_MONOWHITE ||
+        avctx->pix_fmt==AV_PIX_FMT_PAL8) &&
+        FFALIGN(frame->linesize[0], linesize_align) * avctx->height <= buf_size)
+        frame->linesize[0] = FFALIGN(frame->linesize[0], linesize_align);
+
+    if (avctx->pix_fmt == AV_PIX_FMT_NV12 && avctx->codec_tag == MKTAG('N', 'V', '1', '2') &&
+        FFALIGN(frame->linesize[0], linesize_align) * avctx->height +
+        FFALIGN(frame->linesize[1], linesize_align) * ((avctx->height + 1) / 2) <= buf_size) {
+        int la0 = FFALIGN(frame->linesize[0], linesize_align);
+        frame->data[1] += (la0 - frame->linesize[0]) * avctx->height;
+        frame->linesize[0] = la0;
+        frame->linesize[1] = FFALIGN(frame->linesize[1], linesize_align);
+    }
+
     if ((avctx->pix_fmt == AV_PIX_FMT_PAL8 && buf_size < context->frame_size) ||
         (desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)) {
         frame->buf[1]  = av_buffer_ref(context->palette);
-        if (!frame->buf[1])
+        if (!frame->buf[1]) {
+            av_buffer_unref(&frame->buf[0]);
             return AVERROR(ENOMEM);
+        }
         frame->data[1] = frame->buf[1]->data;
     }
+
     if (avctx->pix_fmt == AV_PIX_FMT_BGR24 &&
         ((frame->linesize[0] + 3) & ~3) * avctx->height <= buf_size)
         frame->linesize[0] = (frame->linesize[0] + 3) & ~3;
@@ -232,6 +331,11 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
         avctx->codec_tag == MKTAG('Y', 'V', 'U', '9'))
         FFSWAP(uint8_t *, picture->data[1], picture->data[2]);
 
+    if (avctx->codec_tag == AV_RL32("I420") && (avctx->width+1)*(avctx->height+1) * 3/2 == buf_size) {
+        picture->data[1] = picture->data[1] +  (avctx->width+1)*(avctx->height+1) -avctx->width*avctx->height;
+        picture->data[2] = picture->data[2] + ((avctx->width+1)*(avctx->height+1) -avctx->width*avctx->height)*5/4;
+    }
+
     if (avctx->codec_tag == AV_RL32("yuv2") &&
         avctx->pix_fmt   == AV_PIX_FMT_YUYV422) {
         int x, y;
@@ -243,6 +347,12 @@ static int raw_decode(AVCodecContext *avctx, void *data, int *got_frame,
         }
     }
 
+    if (avctx->field_order > AV_FIELD_PROGRESSIVE) { /* we have interlaced material flagged in container */
+        frame->interlaced_frame = 1;
+        if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB)
+            frame->top_field_first = 1;
+    }
+
     *got_frame = 1;
     return buf_size;
 }
@@ -264,4 +374,5 @@ AVCodec ff_rawvideo_decoder = {
     .init           = raw_init_decoder,
     .close          = raw_close_decoder,
     .decode         = raw_decode,
+    .priv_class     = &rawdec_class,
 };
diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c
index bca0d08..c5d3d2e 100644
--- a/libavcodec/rawenc.c
+++ b/libavcodec/rawenc.c
@@ -2,20 +2,20 @@
  * Raw Video Encoder
  * Copyright (c) 2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,8 +36,8 @@ static av_cold int raw_encode_init(AVCodecContext *avctx)
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
 
     avctx->coded_frame            = avctx->priv_data;
+    avcodec_get_frame_defaults(avctx->coded_frame);
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-    avctx->coded_frame->key_frame = 1;
     avctx->bits_per_coded_sample = av_get_bits_per_pixel(desc);
     if(!avctx->codec_tag)
         avctx->codec_tag = avcodec_pix_fmt_to_codec_tag(avctx->pix_fmt);
@@ -52,7 +52,7 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
     if (ret < 0)
         return ret;
 
-    if ((ret = ff_alloc_packet(pkt, ret)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, pkt, ret)) < 0)
         return ret;
     if ((ret = avpicture_layout((const AVPicture *)frame, avctx->pix_fmt, avctx->width,
                                 avctx->height, pkt->data, pkt->size)) < 0)
diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c
index 54cf14a..218dd4c 100644
--- a/libavcodec/rdft.c
+++ b/libavcodec/rdft.c
@@ -2,20 +2,20 @@
  * (I)RDFT transforms
  * Copyright (c) 2009 Alex Converse <alex dot converse at gmail dot com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include <stdlib.h>
diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h
index 8ff620f..37c40e7 100644
--- a/libavcodec/rdft.h
+++ b/libavcodec/rdft.h
@@ -2,24 +2,24 @@
  * (I)RDFT transforms
  * Copyright (c) 2009 Alex Converse <alex dot converse at gmail dot com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVCODEC_RDFT_H
+#if !defined(AVCODEC_RDFT_H) && (!defined(FFT_FLOAT) || FFT_FLOAT)
 #define AVCODEC_RDFT_H
 
 #include "config.h"
diff --git a/libavcodec/realtextdec.c b/libavcodec/realtextdec.c
new file mode 100644
index 0000000..4578897
--- /dev/null
+++ b/libavcodec/realtextdec.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * RealText subtitle decoder
+ * @see http://service.real.com/help/library/guides/ProductionGuide/prodguide/htmfiles/realtext.htm
+ */
+
+#include "avcodec.h"
+#include "ass.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+
+static int rt_event_to_ass(AVBPrint *buf, const char *p)
+{
+    int prev_chr_is_space = 1;
+
+    while (*p) {
+        if (*p != '<') {
+            if (!av_isspace(*p))
+                av_bprint_chars(buf, *p, 1);
+            else if (!prev_chr_is_space)
+                av_bprint_chars(buf, ' ', 1);
+            prev_chr_is_space = av_isspace(*p);
+        } else {
+            const char *end = strchr(p, '>');
+            if (!end)
+                break;
+            if (!av_strncasecmp(p, "<br/>", 5) ||
+                !av_strncasecmp(p, "<br>",  4)) {
+                av_bprintf(buf, "\\N");
+            }
+            p = end;
+        }
+        p++;
+    }
+    av_bprintf(buf, "\r\n");
+    return 0;
+}
+
+static int realtext_decode_frame(AVCodecContext *avctx,
+                                 void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVSubtitle *sub = data;
+    const char *ptr = avpkt->data;
+    AVBPrint buf;
+
+    av_bprint_init(&buf, 0, 4096);
+    // note: no need to rescale pts & duration since they are in the same
+    // timebase as ASS (1/100)
+    if (ptr && avpkt->size > 0 && !rt_event_to_ass(&buf, ptr))
+        ff_ass_add_rect(sub, buf.str, avpkt->pts, avpkt->duration, 0);
+    *got_sub_ptr = sub->num_rects > 0;
+    av_bprint_finalize(&buf, NULL);
+    return avpkt->size;
+}
+
+AVCodec ff_realtext_decoder = {
+    .name           = "realtext",
+    .long_name      = NULL_IF_CONFIG_SMALL("RealText subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_REALTEXT,
+    .decode         = realtext_decode_frame,
+    .init           = ff_ass_subtitle_header_default,
+};
diff --git a/libavcodec/rectangle.h b/libavcodec/rectangle.h
index 73e8b0a..594a760 100644
--- a/libavcodec/rectangle.h
+++ b/libavcodec/rectangle.h
@@ -2,20 +2,20 @@
  * rectangle filling function
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,9 +28,9 @@
 #ifndef AVCODEC_RECTANGLE_H
 #define AVCODEC_RECTANGLE_H
 
-#include <assert.h>
 #include "config.h"
 #include "libavutil/common.h"
+#include "libavutil/avassert.h"
 
 /**
  * fill a rectangle.
@@ -40,13 +40,14 @@
  */
 static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
     uint8_t *p= (uint8_t*)vp;
-    assert(size==1 || size==2 || size==4);
-    assert(w<=4);
+    av_assert2(size==1 || size==2 || size==4);
+    av_assert2(w<=4);
 
     w      *= size;
     stride *= size;
 
-    assert((stride&(w-1))==0);
+    av_assert2((((long)vp)&(FFMIN(w, 8<<(HAVE_NEON|ARCH_PPC|HAVE_MMX))-1)) == 0);
+    av_assert2((stride&(w-1))==0);
     if(w==2){
         const uint16_t v= size==4 ? val : val*0x0101;
         *(uint16_t*)(p + 0*stride)= v;
@@ -116,8 +117,8 @@ static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride,
         *(uint32_t*)(p +12+3*stride)= val;
 #endif
     }else
-        assert(0);
-    assert(h==4);
+        av_assert2(0);
+    av_assert2(h==4);
 }
 
 #endif /* AVCODEC_RECTANGLE_H */
diff --git a/libavcodec/remove_extradata_bsf.c b/libavcodec/remove_extradata_bsf.c
index 460482a..e880b95 100644
--- a/libavcodec/remove_extradata_bsf.c
+++ b/libavcodec/remove_extradata_bsf.c
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -49,7 +49,6 @@ static int remove_extradata(AVBitStreamFilterContext *bsfc, AVCodecContext *avct
 }
 
 AVBitStreamFilter ff_remove_extradata_bsf={
-    "remove_extra",
-    0,
-    remove_extradata,
+    .name   = "remove_extra",
+    .filter = remove_extradata,
 };
diff --git a/libavcodec/resample.c b/libavcodec/resample.c
new file mode 100644
index 0000000..c45aa16
--- /dev/null
+++ b/libavcodec/resample.c
@@ -0,0 +1,443 @@
+/*
+ * samplerate conversion for both audio and video
+ * Copyright (c) 2000 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * samplerate conversion for both audio and video
+ */
+
+#include <string.h>
+
+#include "avcodec.h"
+#include "audioconvert.h"
+#include "libavutil/opt.h"
+#include "libavutil/mem.h"
+#include "libavutil/samplefmt.h"
+
+#if FF_API_AVCODEC_RESAMPLE
+
+#define MAX_CHANNELS 8
+
+struct AVResampleContext;
+
+static const char *context_to_name(void *ptr)
+{
+    return "audioresample";
+}
+
+static const AVOption options[] = {{NULL}};
+static const AVClass audioresample_context_class = {
+    "ReSampleContext", context_to_name, options, LIBAVUTIL_VERSION_INT
+};
+
+struct ReSampleContext {
+    struct AVResampleContext *resample_context;
+    short *temp[MAX_CHANNELS];
+    int temp_len;
+    float ratio;
+    /* channel convert */
+    int input_channels, output_channels, filter_channels;
+    AVAudioConvert *convert_ctx[2];
+    enum AVSampleFormat sample_fmt[2]; ///< input and output sample format
+    unsigned sample_size[2];           ///< size of one sample in sample_fmt
+    short *buffer[2];                  ///< buffers used for conversion to S16
+    unsigned buffer_size[2];           ///< sizes of allocated buffers
+};
+
+/* n1: number of samples */
+static void stereo_to_mono(short *output, short *input, int n1)
+{
+    short *p, *q;
+    int n = n1;
+
+    p = input;
+    q = output;
+    while (n >= 4) {
+        q[0] = (p[0] + p[1]) >> 1;
+        q[1] = (p[2] + p[3]) >> 1;
+        q[2] = (p[4] + p[5]) >> 1;
+        q[3] = (p[6] + p[7]) >> 1;
+        q += 4;
+        p += 8;
+        n -= 4;
+    }
+    while (n > 0) {
+        q[0] = (p[0] + p[1]) >> 1;
+        q++;
+        p += 2;
+        n--;
+    }
+}
+
+/* n1: number of samples */
+static void mono_to_stereo(short *output, short *input, int n1)
+{
+    short *p, *q;
+    int n = n1;
+    int v;
+
+    p = input;
+    q = output;
+    while (n >= 4) {
+        v = p[0]; q[0] = v; q[1] = v;
+        v = p[1]; q[2] = v; q[3] = v;
+        v = p[2]; q[4] = v; q[5] = v;
+        v = p[3]; q[6] = v; q[7] = v;
+        q += 8;
+        p += 4;
+        n -= 4;
+    }
+    while (n > 0) {
+        v = p[0]; q[0] = v; q[1] = v;
+        q += 2;
+        p += 1;
+        n--;
+    }
+}
+
+/*
+5.1 to stereo input: [fl, fr, c, lfe, rl, rr]
+- Left = front_left + rear_gain * rear_left + center_gain * center
+- Right = front_right + rear_gain * rear_right + center_gain * center
+Where rear_gain is usually around 0.5-1.0 and
+      center_gain is almost always 0.7 (-3 dB)
+*/
+static void surround_to_stereo(short **output, short *input, int channels, int samples)
+{
+    int i;
+    short l, r;
+
+    for (i = 0; i < samples; i++) {
+        int fl,fr,c,rl,rr;
+        fl = input[0];
+        fr = input[1];
+        c = input[2];
+        // lfe = input[3];
+        rl = input[4];
+        rr = input[5];
+
+        l = av_clip_int16(fl + (0.5 * rl) + (0.7 * c));
+        r = av_clip_int16(fr + (0.5 * rr) + (0.7 * c));
+
+        /* output l & r. */
+        *output[0]++ = l;
+        *output[1]++ = r;
+
+        /* increment input. */
+        input += channels;
+    }
+}
+
+static void deinterleave(short **output, short *input, int channels, int samples)
+{
+    int i, j;
+
+    for (i = 0; i < samples; i++) {
+        for (j = 0; j < channels; j++) {
+            *output[j]++ = *input++;
+        }
+    }
+}
+
+static void interleave(short *output, short **input, int channels, int samples)
+{
+    int i, j;
+
+    for (i = 0; i < samples; i++) {
+        for (j = 0; j < channels; j++) {
+            *output++ = *input[j]++;
+        }
+    }
+}
+
+static void ac3_5p1_mux(short *output, short *input1, short *input2, int n)
+{
+    int i;
+    short l, r;
+
+    for (i = 0; i < n; i++) {
+        l = *input1++;
+        r = *input2++;
+        *output++ = l;                  /* left */
+        *output++ = (l / 2) + (r / 2);  /* center */
+        *output++ = r;                  /* right */
+        *output++ = 0;                  /* left surround */
+        *output++ = 0;                  /* right surroud */
+        *output++ = 0;                  /* low freq */
+    }
+}
+
+#define SUPPORT_RESAMPLE(ch1, ch2, ch3, ch4, ch5, ch6, ch7, ch8) \
+    ch8<<7 | ch7<<6 | ch6<<5 | ch5<<4 | ch4<<3 | ch3<<2 | ch2<<1 | ch1<<0
+
+static const uint8_t supported_resampling[MAX_CHANNELS] = {
+    // output ch:    1  2  3  4  5  6  7  8
+    SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 0, 0, 0), // 1 input channel
+    SUPPORT_RESAMPLE(1, 1, 0, 0, 0, 1, 0, 0), // 2 input channels
+    SUPPORT_RESAMPLE(0, 0, 1, 0, 0, 0, 0, 0), // 3 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 1, 0, 0, 0, 0), // 4 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 1, 0, 0, 0), // 5 input channels
+    SUPPORT_RESAMPLE(0, 1, 0, 0, 0, 1, 0, 0), // 6 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 1, 0), // 7 input channels
+    SUPPORT_RESAMPLE(0, 0, 0, 0, 0, 0, 0, 1), // 8 input channels
+};
+
+ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
+                                        int output_rate, int input_rate,
+                                        enum AVSampleFormat sample_fmt_out,
+                                        enum AVSampleFormat sample_fmt_in,
+                                        int filter_length, int log2_phase_count,
+                                        int linear, double cutoff)
+{
+    ReSampleContext *s;
+
+    if (input_channels > MAX_CHANNELS) {
+        av_log(NULL, AV_LOG_ERROR,
+               "Resampling with input channels greater than %d is unsupported.\n",
+               MAX_CHANNELS);
+        return NULL;
+    }
+    if (!(supported_resampling[input_channels-1] & (1<<(output_channels-1)))) {
+        int i;
+        av_log(NULL, AV_LOG_ERROR, "Unsupported audio resampling. Allowed "
+               "output channels for %d input channel%s", input_channels,
+               input_channels > 1 ? "s:" : ":");
+        for (i = 0; i < MAX_CHANNELS; i++)
+            if (supported_resampling[input_channels-1] & (1<<i))
+                av_log(NULL, AV_LOG_ERROR, " %d", i + 1);
+        av_log(NULL, AV_LOG_ERROR, "\n");
+        return NULL;
+    }
+
+    s = av_mallocz(sizeof(ReSampleContext));
+    if (!s) {
+        av_log(NULL, AV_LOG_ERROR, "Can't allocate memory for resample context.\n");
+        return NULL;
+    }
+
+    s->ratio = (float)output_rate / (float)input_rate;
+
+    s->input_channels = input_channels;
+    s->output_channels = output_channels;
+
+    s->filter_channels = s->input_channels;
+    if (s->output_channels < s->filter_channels)
+        s->filter_channels = s->output_channels;
+
+    s->sample_fmt[0]  = sample_fmt_in;
+    s->sample_fmt[1]  = sample_fmt_out;
+    s->sample_size[0] = av_get_bytes_per_sample(s->sample_fmt[0]);
+    s->sample_size[1] = av_get_bytes_per_sample(s->sample_fmt[1]);
+
+    if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) {
+        if (!(s->convert_ctx[0] = av_audio_convert_alloc(AV_SAMPLE_FMT_S16, 1,
+                                                         s->sample_fmt[0], 1, NULL, 0))) {
+            av_log(s, AV_LOG_ERROR,
+                   "Cannot convert %s sample format to s16 sample format\n",
+                   av_get_sample_fmt_name(s->sample_fmt[0]));
+            av_free(s);
+            return NULL;
+        }
+    }
+
+    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
+        if (!(s->convert_ctx[1] = av_audio_convert_alloc(s->sample_fmt[1], 1,
+                                                         AV_SAMPLE_FMT_S16, 1, NULL, 0))) {
+            av_log(s, AV_LOG_ERROR,
+                   "Cannot convert s16 sample format to %s sample format\n",
+                   av_get_sample_fmt_name(s->sample_fmt[1]));
+            av_audio_convert_free(s->convert_ctx[0]);
+            av_free(s);
+            return NULL;
+        }
+    }
+
+    s->resample_context = av_resample_init(output_rate, input_rate,
+                                           filter_length, log2_phase_count,
+                                           linear, cutoff);
+
+    *(const AVClass**)s->resample_context = &audioresample_context_class;
+
+    return s;
+}
+
+/* resample audio. 'nb_samples' is the number of input samples */
+/* XXX: optimize it ! */
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples)
+{
+    int i, nb_samples1;
+    short *bufin[MAX_CHANNELS];
+    short *bufout[MAX_CHANNELS];
+    short *buftmp2[MAX_CHANNELS], *buftmp3[MAX_CHANNELS];
+    short *output_bak = NULL;
+    int lenout;
+
+    if (s->input_channels == s->output_channels && s->ratio == 1.0 && 0) {
+        /* nothing to do */
+        memcpy(output, input, nb_samples * s->input_channels * sizeof(short));
+        return nb_samples;
+    }
+
+    if (s->sample_fmt[0] != AV_SAMPLE_FMT_S16) {
+        int istride[1] = { s->sample_size[0] };
+        int ostride[1] = { 2 };
+        const void *ibuf[1] = { input };
+        void       *obuf[1];
+        unsigned input_size = nb_samples * s->input_channels * 2;
+
+        if (!s->buffer_size[0] || s->buffer_size[0] < input_size) {
+            av_free(s->buffer[0]);
+            s->buffer_size[0] = input_size;
+            s->buffer[0] = av_malloc(s->buffer_size[0]);
+            if (!s->buffer[0]) {
+                av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
+                return 0;
+            }
+        }
+
+        obuf[0] = s->buffer[0];
+
+        if (av_audio_convert(s->convert_ctx[0], obuf, ostride,
+                             ibuf, istride, nb_samples * s->input_channels) < 0) {
+            av_log(s->resample_context, AV_LOG_ERROR,
+                   "Audio sample format conversion failed\n");
+            return 0;
+        }
+
+        input = s->buffer[0];
+    }
+
+    lenout= 2*s->output_channels*nb_samples * s->ratio + 16;
+
+    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
+        int out_size = lenout * av_get_bytes_per_sample(s->sample_fmt[1]) *
+                       s->output_channels;
+        output_bak = output;
+
+        if (!s->buffer_size[1] || s->buffer_size[1] < out_size) {
+            av_free(s->buffer[1]);
+            s->buffer_size[1] = out_size;
+            s->buffer[1] = av_malloc(s->buffer_size[1]);
+            if (!s->buffer[1]) {
+                av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
+                return 0;
+            }
+        }
+
+        output = s->buffer[1];
+    }
+
+    /* XXX: move those malloc to resample init code */
+    for (i = 0; i < s->filter_channels; i++) {
+        bufin[i] = av_malloc_array((nb_samples + s->temp_len), sizeof(short));
+        bufout[i] = av_malloc_array(lenout, sizeof(short));
+
+        if (!bufin[i] || !bufout[i]) {
+            av_log(s->resample_context, AV_LOG_ERROR, "Could not allocate buffer\n");
+            nb_samples1 = 0;
+            goto fail;
+        }
+
+        memcpy(bufin[i], s->temp[i], s->temp_len * sizeof(short));
+        buftmp2[i] = bufin[i] + s->temp_len;
+    }
+
+    if (s->input_channels == 2 && s->output_channels == 1) {
+        buftmp3[0] = output;
+        stereo_to_mono(buftmp2[0], input, nb_samples);
+    } else if (s->output_channels >= 2 && s->input_channels == 1) {
+        buftmp3[0] = bufout[0];
+        memcpy(buftmp2[0], input, nb_samples * sizeof(short));
+    } else if (s->input_channels == 6 && s->output_channels ==2) {
+        buftmp3[0] = bufout[0];
+        buftmp3[1] = bufout[1];
+        surround_to_stereo(buftmp2, input, s->input_channels, nb_samples);
+    } else if (s->output_channels >= s->input_channels && s->input_channels >= 2) {
+        for (i = 0; i < s->input_channels; i++) {
+            buftmp3[i] = bufout[i];
+        }
+        deinterleave(buftmp2, input, s->input_channels, nb_samples);
+    } else {
+        buftmp3[0] = output;
+        memcpy(buftmp2[0], input, nb_samples * sizeof(short));
+    }
+
+    nb_samples += s->temp_len;
+
+    /* resample each channel */
+    nb_samples1 = 0; /* avoid warning */
+    for (i = 0; i < s->filter_channels; i++) {
+        int consumed;
+        int is_last = i + 1 == s->filter_channels;
+
+        nb_samples1 = av_resample(s->resample_context, buftmp3[i], bufin[i],
+                                  &consumed, nb_samples, lenout, is_last);
+        s->temp_len = nb_samples - consumed;
+        s->temp[i] = av_realloc_array(s->temp[i], s->temp_len, sizeof(short));
+        memcpy(s->temp[i], bufin[i] + consumed, s->temp_len * sizeof(short));
+    }
+
+    if (s->output_channels == 2 && s->input_channels == 1) {
+        mono_to_stereo(output, buftmp3[0], nb_samples1);
+    } else if (s->output_channels == 6 && s->input_channels == 2) {
+        ac3_5p1_mux(output, buftmp3[0], buftmp3[1], nb_samples1);
+    } else if ((s->output_channels == s->input_channels && s->input_channels >= 2) ||
+               (s->output_channels == 2 && s->input_channels == 6)) {
+        interleave(output, buftmp3, s->output_channels, nb_samples1);
+    }
+
+    if (s->sample_fmt[1] != AV_SAMPLE_FMT_S16) {
+        int istride[1] = { 2 };
+        int ostride[1] = { s->sample_size[1] };
+        const void *ibuf[1] = { output };
+        void       *obuf[1] = { output_bak };
+
+        if (av_audio_convert(s->convert_ctx[1], obuf, ostride,
+                             ibuf, istride, nb_samples1 * s->output_channels) < 0) {
+            av_log(s->resample_context, AV_LOG_ERROR,
+                   "Audio sample format conversion failed\n");
+            return 0;
+        }
+    }
+
+fail:
+    for (i = 0; i < s->filter_channels; i++) {
+        av_free(bufin[i]);
+        av_free(bufout[i]);
+    }
+
+    return nb_samples1;
+}
+
+void audio_resample_close(ReSampleContext *s)
+{
+    int i;
+    av_resample_close(s->resample_context);
+    for (i = 0; i < s->filter_channels; i++)
+        av_freep(&s->temp[i]);
+    av_freep(&s->buffer[0]);
+    av_freep(&s->buffer[1]);
+    av_audio_convert_free(s->convert_ctx[0]);
+    av_audio_convert_free(s->convert_ctx[1]);
+    av_free(s);
+}
+
+#endif
diff --git a/libavcodec/resample2.c b/libavcodec/resample2.c
new file mode 100644
index 0000000..cd9fe1c
--- /dev/null
+++ b/libavcodec/resample2.c
@@ -0,0 +1,319 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio resampling
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/avassert.h"
+#include "avcodec.h"
+#include "libavutil/common.h"
+
+#if FF_API_AVCODEC_RESAMPLE
+
+#ifndef CONFIG_RESAMPLE_HP
+#define FILTER_SHIFT 15
+
+#define FELEM int16_t
+#define FELEM2 int32_t
+#define FELEML int64_t
+#define FELEM_MAX INT16_MAX
+#define FELEM_MIN INT16_MIN
+#define WINDOW_TYPE 9
+#elif !defined(CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE)
+#define FILTER_SHIFT 30
+
+#define FELEM int32_t
+#define FELEM2 int64_t
+#define FELEML int64_t
+#define FELEM_MAX INT32_MAX
+#define FELEM_MIN INT32_MIN
+#define WINDOW_TYPE 12
+#else
+#define FILTER_SHIFT 0
+
+#define FELEM double
+#define FELEM2 double
+#define FELEML double
+#define WINDOW_TYPE 24
+#endif
+
+
+typedef struct AVResampleContext{
+    const AVClass *av_class;
+    FELEM *filter_bank;
+    int filter_length;
+    int ideal_dst_incr;
+    int dst_incr;
+    int index;
+    int frac;
+    int src_incr;
+    int compensation_distance;
+    int phase_shift;
+    int phase_mask;
+    int linear;
+}AVResampleContext;
+
+/**
+ * 0th order modified bessel function of the first kind.
+ */
+static double bessel(double x){
+    double v=1;
+    double lastv=0;
+    double t=1;
+    int i;
+
+    x= x*x/4;
+    for(i=1; v != lastv; i++){
+        lastv=v;
+        t *= x/(i*i);
+        v += t;
+    }
+    return v;
+}
+
+/**
+ * Build a polyphase filterbank.
+ * @param factor resampling factor
+ * @param scale wanted sum of coefficients for each filter
+ * @param type 0->cubic, 1->blackman nuttall windowed sinc, 2..16->kaiser windowed sinc beta=2..16
+ * @return 0 on success, negative on error
+ */
+static int build_filter(FELEM *filter, double factor, int tap_count, int phase_count, int scale, int type){
+    int ph, i;
+    double x, y, w;
+    double *tab = av_malloc_array(tap_count, sizeof(*tab));
+    const int center= (tap_count-1)/2;
+
+    if (!tab)
+        return AVERROR(ENOMEM);
+
+    /* if upsampling, only need to interpolate, no filter */
+    if (factor > 1.0)
+        factor = 1.0;
+
+    for(ph=0;ph<phase_count;ph++) {
+        double norm = 0;
+        for(i=0;i<tap_count;i++) {
+            x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
+            if (x == 0) y = 1.0;
+            else        y = sin(x) / x;
+            switch(type){
+            case 0:{
+                const float d= -0.5; //first order derivative = -0.5
+                x = fabs(((double)(i - center) - (double)ph / phase_count) * factor);
+                if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*(            -x*x + x*x*x);
+                else      y=                       d*(-4 + 8*x - 5*x*x + x*x*x);
+                break;}
+            case 1:
+                w = 2.0*x / (factor*tap_count) + M_PI;
+                y *= 0.3635819 - 0.4891775 * cos(w) + 0.1365995 * cos(2*w) - 0.0106411 * cos(3*w);
+                break;
+            default:
+                w = 2.0*x / (factor*tap_count*M_PI);
+                y *= bessel(type*sqrt(FFMAX(1-w*w, 0)));
+                break;
+            }
+
+            tab[i] = y;
+            norm += y;
+        }
+
+        /* normalize so that an uniform color remains the same */
+        for(i=0;i<tap_count;i++) {
+#ifdef CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE
+            filter[ph * tap_count + i] = tab[i] / norm;
+#else
+            filter[ph * tap_count + i] = av_clip(lrintf(tab[i] * scale / norm), FELEM_MIN, FELEM_MAX);
+#endif
+        }
+    }
+#if 0
+    {
+#define LEN 1024
+        int j,k;
+        double sine[LEN + tap_count];
+        double filtered[LEN];
+        double maxff=-2, minff=2, maxsf=-2, minsf=2;
+        for(i=0; i<LEN; i++){
+            double ss=0, sf=0, ff=0;
+            for(j=0; j<LEN+tap_count; j++)
+                sine[j]= cos(i*j*M_PI/LEN);
+            for(j=0; j<LEN; j++){
+                double sum=0;
+                ph=0;
+                for(k=0; k<tap_count; k++)
+                    sum += filter[ph * tap_count + k] * sine[k+j];
+                filtered[j]= sum / (1<<FILTER_SHIFT);
+                ss+= sine[j + center] * sine[j + center];
+                ff+= filtered[j] * filtered[j];
+                sf+= sine[j + center] * filtered[j];
+            }
+            ss= sqrt(2*ss/LEN);
+            ff= sqrt(2*ff/LEN);
+            sf= 2*sf/LEN;
+            maxff= FFMAX(maxff, ff);
+            minff= FFMIN(minff, ff);
+            maxsf= FFMAX(maxsf, sf);
+            minsf= FFMIN(minsf, sf);
+            if(i%11==0){
+                av_log(NULL, AV_LOG_ERROR, "i:%4d ss:%f ff:%13.6e-%13.6e sf:%13.6e-%13.6e\n", i, ss, maxff, minff, maxsf, minsf);
+                minff=minsf= 2;
+                maxff=maxsf= -2;
+            }
+        }
+    }
+#endif
+
+    av_free(tab);
+    return 0;
+}
+
+AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_size, int phase_shift, int linear, double cutoff){
+    AVResampleContext *c= av_mallocz(sizeof(AVResampleContext));
+    double factor= FFMIN(out_rate * cutoff / in_rate, 1.0);
+    int phase_count= 1<<phase_shift;
+
+    if (!c)
+        return NULL;
+
+    c->phase_shift= phase_shift;
+    c->phase_mask= phase_count-1;
+    c->linear= linear;
+
+    c->filter_length= FFMAX((int)ceil(filter_size/factor), 1);
+    c->filter_bank= av_mallocz_array(c->filter_length, (phase_count+1)*sizeof(FELEM));
+    if (!c->filter_bank)
+        goto error;
+    if (build_filter(c->filter_bank, factor, c->filter_length, phase_count, 1<<FILTER_SHIFT, WINDOW_TYPE))
+        goto error;
+    memcpy(&c->filter_bank[c->filter_length*phase_count+1], c->filter_bank, (c->filter_length-1)*sizeof(FELEM));
+    c->filter_bank[c->filter_length*phase_count]= c->filter_bank[c->filter_length - 1];
+
+    if(!av_reduce(&c->src_incr, &c->dst_incr, out_rate, in_rate * (int64_t)phase_count, INT32_MAX/2))
+        goto error;
+    c->ideal_dst_incr= c->dst_incr;
+
+    c->index= -phase_count*((c->filter_length-1)/2);
+
+    return c;
+error:
+    av_free(c->filter_bank);
+    av_free(c);
+    return NULL;
+}
+
+void av_resample_close(AVResampleContext *c){
+    av_freep(&c->filter_bank);
+    av_freep(&c);
+}
+
+void av_resample_compensate(AVResampleContext *c, int sample_delta, int compensation_distance){
+//    sample_delta += (c->ideal_dst_incr - c->dst_incr)*(int64_t)c->compensation_distance / c->ideal_dst_incr;
+    c->compensation_distance= compensation_distance;
+    c->dst_incr = c->ideal_dst_incr - c->ideal_dst_incr * (int64_t)sample_delta / compensation_distance;
+}
+
+int av_resample(AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx){
+    int dst_index, i;
+    int index= c->index;
+    int frac= c->frac;
+    int dst_incr_frac= c->dst_incr % c->src_incr;
+    int dst_incr=      c->dst_incr / c->src_incr;
+    int compensation_distance= c->compensation_distance;
+
+  if(compensation_distance == 0 && c->filter_length == 1 && c->phase_shift==0){
+        int64_t index2= ((int64_t)index)<<32;
+        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
+        dst_size= FFMIN(dst_size, (src_size-1-index) * (int64_t)c->src_incr / c->dst_incr);
+
+        for(dst_index=0; dst_index < dst_size; dst_index++){
+            dst[dst_index] = src[index2>>32];
+            index2 += incr;
+        }
+        index += dst_index * dst_incr;
+        index += (frac + dst_index * (int64_t)dst_incr_frac) / c->src_incr;
+        frac   = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr;
+  }else{
+    for(dst_index=0; dst_index < dst_size; dst_index++){
+        FELEM *filter= c->filter_bank + c->filter_length*(index & c->phase_mask);
+        int sample_index= index >> c->phase_shift;
+        FELEM2 val=0;
+
+        if(sample_index < 0){
+            for(i=0; i<c->filter_length; i++)
+                val += src[FFABS(sample_index + i) % src_size] * filter[i];
+        }else if(sample_index + c->filter_length > src_size){
+            break;
+        }else if(c->linear){
+            FELEM2 v2=0;
+            for(i=0; i<c->filter_length; i++){
+                val += src[sample_index + i] * (FELEM2)filter[i];
+                v2  += src[sample_index + i] * (FELEM2)filter[i + c->filter_length];
+            }
+            val+=(v2-val)*(FELEML)frac / c->src_incr;
+        }else{
+            for(i=0; i<c->filter_length; i++){
+                val += src[sample_index + i] * (FELEM2)filter[i];
+            }
+        }
+
+#ifdef CONFIG_RESAMPLE_AUDIOPHILE_KIDDY_MODE
+        dst[dst_index] = av_clip_int16(lrintf(val));
+#else
+        val = (val + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;
+        dst[dst_index] = (unsigned)(val + 32768) > 65535 ? (val>>31) ^ 32767 : val;
+#endif
+
+        frac += dst_incr_frac;
+        index += dst_incr;
+        if(frac >= c->src_incr){
+            frac -= c->src_incr;
+            index++;
+        }
+
+        if(dst_index + 1 == compensation_distance){
+            compensation_distance= 0;
+            dst_incr_frac= c->ideal_dst_incr % c->src_incr;
+            dst_incr=      c->ideal_dst_incr / c->src_incr;
+        }
+    }
+  }
+    *consumed= FFMAX(index, 0) >> c->phase_shift;
+    if(index>=0) index &= c->phase_mask;
+
+    if(compensation_distance){
+        compensation_distance -= dst_index;
+        av_assert2(compensation_distance > 0);
+    }
+    if(update_ctx){
+        c->frac= frac;
+        c->index= index;
+        c->dst_incr= dst_incr_frac + c->src_incr*dst_incr;
+        c->compensation_distance= compensation_distance;
+    }
+
+    return dst_index;
+}
+
+#endif
diff --git a/libavcodec/rl.h b/libavcodec/rl.h
index 367cc98..c80283d 100644
--- a/libavcodec/rl.h
+++ b/libavcodec/rl.h
@@ -2,20 +2,20 @@
  * Copyright (c) 2000-2002 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rl2.c b/libavcodec/rl2.c
index 54b3e6a..6e63ed1 100644
--- a/libavcodec/rl2.c
+++ b/libavcodec/rl2.c
@@ -2,20 +2,20 @@
  * RL2 Video Decoder
  * Copyright (C) 2008 Sascha Sommer (saschasommer@freenet.de)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -53,7 +53,7 @@ typedef struct Rl2Context {
  * @param s rl2 context
  * @param in input buffer
  * @param size input buffer size
- * @param out ouput buffer
+ * @param out output buffer
  * @param stride stride of the output buffer
  * @param video_base offset of the rle data inside the frame
  */
@@ -155,7 +155,7 @@ static av_cold int rl2_decode_init(AVCodecContext *avctx)
 
     /** initialize palette */
     for (i = 0; i < AVPALETTE_COUNT; i++)
-        s->palette[i] = AV_RB24(&avctx->extradata[6 + i * 3]);
+        s->palette[i] = 0xFFU << 24 | AV_RB24(&avctx->extradata[6 + i * 3]);
 
     /** decode background frame if present */
     back_size = avctx->extradata_size - EXTRADATA1_SIZE;
@@ -181,10 +181,8 @@ static int rl2_decode_frame(AVCodecContext *avctx,
     int ret, buf_size  = avpkt->size;
     Rl2Context *s = avctx->priv_data;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     /** run length decode */
     rl2_rle_decode(s, buf, buf_size, frame->data[0], frame->linesize[0],
diff --git a/libavcodec/rle.c b/libavcodec/rle.c
index cbbde93..d2ec68c 100644
--- a/libavcodec/rle.c
+++ b/libavcodec/rle.c
@@ -2,20 +2,20 @@
  * RLE encoder
  * Copyright (c) 2007 Bobby Bingham
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
diff --git a/libavcodec/rle.h b/libavcodec/rle.h
index 00261d3..2485132 100644
--- a/libavcodec/rle.h
+++ b/libavcodec/rle.h
@@ -1,20 +1,20 @@
 /*
  * RLE encoder
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rnd_avg.h b/libavcodec/rnd_avg.h
index 412cda5..344775e 100644
--- a/libavcodec/rnd_avg.h
+++ b/libavcodec/rnd_avg.h
@@ -1,18 +1,21 @@
 /*
- * This file is part of Libav.
+ * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
+ * Copyright (c) 2011 Oskar Arvidsson
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/roqaudioenc.c b/libavcodec/roqaudioenc.c
index f97d5d6..b0b76d0 100644
--- a/libavcodec/roqaudioenc.c
+++ b/libavcodec/roqaudioenc.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2005 Eric Lasota
  *    Based on RoQ specs (c)2001 Tim Ferguson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -147,6 +147,8 @@ static int roq_dpcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
             context->input_frames++;
             return 0;
         }
+    }
+    if (context->input_frames < 8) {
         in = context->frame_buffer;
     }
 
@@ -155,15 +157,13 @@ static int roq_dpcm_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         context->lastSample[1] &= 0xFF00;
     }
 
-    if (context->input_frames == 7 || !in)
+    if (context->input_frames == 7)
         data_size = avctx->channels * context->buffered_samples;
     else
         data_size = avctx->channels * avctx->frame_size;
 
-    if ((ret = ff_alloc_packet(avpkt, ROQ_HEADER_SIZE + data_size))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, ROQ_HEADER_SIZE + data_size)) < 0)
         return ret;
-    }
     out = avpkt->data;
 
     bytestream_put_byte(&out, stereo ? 0x21 : 0x20);
diff --git a/libavcodec/roqvideo.c b/libavcodec/roqvideo.c
index 77df079..eb8fc25 100644
--- a/libavcodec/roqvideo.c
+++ b/libavcodec/roqvideo.c
@@ -2,20 +2,20 @@
  * Copyright (C) 2003 Mike Melanson
  * Copyright (C) 2003 Dr. Tim Ferguson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/roqvideo.h b/libavcodec/roqvideo.h
index 3f00022..3da6eaa 100644
--- a/libavcodec/roqvideo.h
+++ b/libavcodec/roqvideo.h
@@ -2,20 +2,20 @@
  * Copyright (C) 2003 Mike Melanson
  * Copyright (C) 2003 Dr. Tim Ferguson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,6 +43,7 @@ struct RoqTempData;
 
 typedef struct RoqContext {
 
+    const AVClass *class;
     AVCodecContext *avctx;
     AVFrame *last_frame;
     AVFrame *current_frame;
@@ -69,6 +70,9 @@ typedef struct RoqContext {
     const AVFrame *frame_to_enc;
     uint8_t *out_buf;
     struct RoqTempData *tmpData;
+
+    int quake3_compat; // Quake 3 compatibility option
+
 } RoqContext;
 
 #define RoQ_INFO              0x1001
diff --git a/libavcodec/roqvideodec.c b/libavcodec/roqvideodec.c
index ac7d4ba..8efa300 100644
--- a/libavcodec/roqvideodec.c
+++ b/libavcodec/roqvideodec.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,10 +25,7 @@
  *   http://www.csse.monash.edu.au/~timf/
  */
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -72,9 +69,19 @@ static void roqvideo_decode_frame(RoqContext *ri)
 
     chunk_start = bytestream2_tell(&ri->gb);
     xpos = ypos = 0;
+
+    if (chunk_size > bytestream2_get_bytes_left(&ri->gb)) {
+        av_log(ri->avctx, AV_LOG_ERROR, "Chunk does not fit in input buffer\n");
+        chunk_size = bytestream2_get_bytes_left(&ri->gb);
+    }
+
     while (bytestream2_tell(&ri->gb) < chunk_start + chunk_size) {
         for (yp = ypos; yp < ypos + 16; yp += 8)
             for (xp = xpos; xp < xpos + 16; xp += 8) {
+                if (bytestream2_tell(&ri->gb) >= chunk_start + chunk_size) {
+                    av_log(ri->avctx, AV_LOG_VERBOSE, "Chunk is too short\n");
+                    return;
+                }
                 if (vqflg_pos < 0) {
                     vqflg = bytestream2_get_le16(&ri->gb);
                     vqflg_pos = 7;
@@ -106,6 +113,10 @@ static void roqvideo_decode_frame(RoqContext *ri)
                         if(k & 0x01) x += 4;
                         if(k & 0x02) y += 4;
 
+                        if (bytestream2_tell(&ri->gb) >= chunk_start + chunk_size) {
+                            av_log(ri->avctx, AV_LOG_VERBOSE, "Chunk is too short\n");
+                            return;
+                        }
                         if (vqflg_pos < 0) {
                             vqflg = bytestream2_get_le16(&ri->gb);
                             vqflg_pos = 7;
@@ -140,7 +151,7 @@ static void roqvideo_decode_frame(RoqContext *ri)
                     }
                     break;
                 default:
-                    av_log(ri->avctx, AV_LOG_ERROR, "Unknown vq code: %d\n", vqid);
+                    av_assert2(0);
             }
         }
 
@@ -178,7 +189,8 @@ static av_cold int roq_decode_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
     }
 
-    avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+    avctx->pix_fmt = AV_PIX_FMT_YUVJ444P;
+    avctx->color_range = AVCOL_RANGE_JPEG;
 
     return 0;
 }
@@ -193,10 +205,8 @@ static int roq_decode_frame(AVCodecContext *avctx,
     int copy= !s->current_frame->data[0];
     int ret;
 
-    if ((ret = ff_reget_buffer(avctx, s->current_frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "  RoQ: get_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->current_frame)) < 0)
         return ret;
-    }
 
     if(copy)
         av_picture_copy((AVPicture*)s->current_frame, (AVPicture*)s->last_frame,
diff --git a/libavcodec/roqvideoenc.c b/libavcodec/roqvideoenc.c
index af0089f..1c5970f 100644
--- a/libavcodec/roqvideoenc.c
+++ b/libavcodec/roqvideoenc.c
@@ -5,27 +5,27 @@
  * Copyright (C) 2004-2007 Eric Lasota
  *    Based on RoQ specs (C) 2001 Tim Ferguson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * id RoQ encoder by Vitor. Based on the Switchblade3 library and the
- * Switchblade3 Libav glue by Eric Lasota.
+ * Switchblade3 FFmpeg glue by Eric Lasota.
  */
 
 /*
@@ -57,6 +57,7 @@
 #include <string.h>
 
 #include "libavutil/attributes.h"
+#include "libavutil/opt.h"
 #include "roqvideo.h"
 #include "bytestream.h"
 #include "elbg.h"
@@ -69,7 +70,7 @@
  * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a
  * Quake 3 bug.
  */
-#define MAX_CBS_4x4 255
+#define MAX_CBS_4x4 256
 
 #define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks.
 
@@ -248,7 +249,7 @@ static void create_cel_evals(RoqContext *enc, RoqTempdata *tempData)
 {
     int n=0, x, y, i;
 
-    tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(CelEvaluation));
+    tempData->cel_evals = av_malloc_array(enc->width*enc->height/64, sizeof(CelEvaluation));
 
     /* Map to the ROQ quadtree order */
     for (y=0; y<enc->height; y+=16)
@@ -540,7 +541,7 @@ static void remap_codebooks(RoqContext *enc, RoqTempdata *tempData)
     int i, j, idx=0;
 
     /* Make remaps for the final codebook usage */
-    for (i=0; i<MAX_CBS_4x4; i++) {
+    for (i=0; i<(enc->quake3_compat ? MAX_CBS_4x4-1 : MAX_CBS_4x4); i++) {
         if (tempData->codebooks.usedCB4[i]) {
             tempData->i2f4[i] = idx;
             tempData->f2i4[idx] = i;
@@ -798,16 +799,16 @@ static void generate_codebook(RoqContext *enc, RoqTempdata *tempdata,
     int i, j, k;
     int c_size = size*size/4;
     int *buf;
-    int *codebook = av_malloc(6*c_size*cbsize*sizeof(int));
+    int *codebook = av_malloc_array(6*c_size, cbsize*sizeof(int));
     int *closest_cb;
 
     if (size == 4)
-        closest_cb = av_malloc(6*c_size*inputCount*sizeof(int));
+        closest_cb = av_malloc_array(6*c_size, inputCount*sizeof(int));
     else
         closest_cb = tempdata->closest_cb2;
 
-    ff_init_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx);
-    ff_do_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx);
+    avpriv_init_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx);
+    avpriv_do_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx);
 
     if (size == 4)
         av_free(closest_cb);
@@ -833,8 +834,8 @@ static void generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData)
     int max = enc->width*enc->height/16;
     uint8_t mb2[3*4];
     roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4);
-    uint8_t *yuvClusters=av_malloc(sizeof(int)*max*6*4);
-    int *points = av_malloc(max*6*4*sizeof(int));
+    uint8_t *yuvClusters=av_malloc_array(max, sizeof(int)*6*4);
+    int *points = av_malloc_array(max, 6*4*sizeof(int));
     int bias;
 
     /* Subsample YUV data */
@@ -847,11 +848,11 @@ static void generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData)
     }
 
     /* Create 4x4 codebooks */
-    generate_codebook(enc, tempData, points, max, results4, 4, MAX_CBS_4x4);
+    generate_codebook(enc, tempData, points, max, results4, 4, (enc->quake3_compat ? MAX_CBS_4x4-1 : MAX_CBS_4x4));
 
-    codebooks->numCB4 = MAX_CBS_4x4;
+    codebooks->numCB4 = (enc->quake3_compat ? MAX_CBS_4x4-1 : MAX_CBS_4x4);
 
-    tempData->closest_cb2 = av_malloc(max*4*sizeof(int));
+    tempData->closest_cb2 = av_malloc_array(max, 4*sizeof(int));
 
     /* Create 2x2 codebooks */
     generate_codebook(enc, tempData, points, max*4, enc->cb2x2, 2, MAX_CBS_2x2);
@@ -880,7 +881,7 @@ static void generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData)
     av_free(results4);
 }
 
-static void roq_encode_video(RoqContext *enc)
+static int roq_encode_video(RoqContext *enc)
 {
     RoqTempdata *tempData = enc->tmpData;
     int i;
@@ -901,10 +902,14 @@ static void roq_encode_video(RoqContext *enc)
         gather_data_for_cel(tempData->cel_evals + i, enc, tempData);
 
     /* Quake 3 can't handle chunks bigger than 65535 bytes */
-    if (tempData->mainChunkSize/8 > 65535) {
+    if (tempData->mainChunkSize/8 > 65535 && enc->quake3_compat) {
+        if (enc->lambda > 100000) {
+            av_log(enc->avctx, AV_LOG_ERROR, "Cannot encode video in Quake compatible form\n");
+            return AVERROR(EINVAL);
+        }
         av_log(enc->avctx, AV_LOG_ERROR,
-               "Warning, generated a frame too big (%d > 65535), "
-               "try using a smaller qscale value.\n",
+               "Warning, generated a frame too big for Quake (%d > 65535), "
+               "now switching to a bigger qscale value.\n",
                tempData->mainChunkSize/8);
         enc->lambda *= 1.5;
         tempData->mainChunkSize = 0;
@@ -935,6 +940,8 @@ static void roq_encode_video(RoqContext *enc)
     av_free(tempData->closest_cb2);
 
     enc->framesSinceKeyframe++;
+
+    return 0;
 }
 
 static av_cold int roq_encode_end(AVCodecContext *avctx)
@@ -962,11 +969,16 @@ static av_cold int roq_encode_init(AVCodecContext *avctx)
     enc->framesSinceKeyframe = 0;
     if ((avctx->width & 0xf) || (avctx->height & 0xf)) {
         av_log(avctx, AV_LOG_ERROR, "Dimensions must be divisible by 16\n");
-        return -1;
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->width > 65535 || avctx->height > 65535) {
+        av_log(avctx, AV_LOG_ERROR, "Dimensions are max %d\n", enc->quake3_compat ? 32768 : 65535);
+        return AVERROR(EINVAL);
     }
 
     if (((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1)))
-        av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of two\n");
+        av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of two, this is not supported by quake\n");
 
     enc->width = avctx->width;
     enc->height = avctx->height;
@@ -984,16 +996,16 @@ static av_cold int roq_encode_init(AVCodecContext *avctx)
     enc->tmpData      = av_malloc(sizeof(RoqTempdata));
 
     enc->this_motion4 =
-        av_mallocz((enc->width*enc->height/16)*sizeof(motion_vect));
+        av_mallocz_array((enc->width*enc->height/16), sizeof(motion_vect));
 
     enc->last_motion4 =
-        av_malloc ((enc->width*enc->height/16)*sizeof(motion_vect));
+        av_malloc_array ((enc->width*enc->height/16), sizeof(motion_vect));
 
     enc->this_motion8 =
-        av_mallocz((enc->width*enc->height/64)*sizeof(motion_vect));
+        av_mallocz_array((enc->width*enc->height/64), sizeof(motion_vect));
 
     enc->last_motion8 =
-        av_malloc ((enc->width*enc->height/64)*sizeof(motion_vect));
+        av_malloc_array ((enc->width*enc->height/64), sizeof(motion_vect));
 
     return 0;
 }
@@ -1041,10 +1053,8 @@ static int roq_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     /* 138 bits max per 8x8 block +
      *     256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */
     size = ((enc->width * enc->height / 64) * 138 + 7) / 8 + 256 * (6 + 4) + 8;
-    if ((ret = ff_alloc_packet(pkt, size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet with size %d.\n", size);
+    if ((ret = ff_alloc_packet2(avctx, pkt, size)) < 0)
         return ret;
-    }
     enc->out_buf = pkt->data;
 
     /* Check for I frame */
@@ -1054,11 +1064,9 @@ static int roq_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     if (enc->first_frame) {
         /* Alloc memory for the reconstruction data (we must know the stride
          for that) */
-        if (ff_get_buffer(avctx, enc->current_frame, 0) ||
-            ff_get_buffer(avctx, enc->last_frame, 0)) {
-            av_log(avctx, AV_LOG_ERROR, "  RoQ: get_buffer() failed\n");
-            return -1;
-        }
+        if ((ret = ff_get_buffer(avctx, enc->current_frame, 0)) < 0 ||
+            (ret = ff_get_buffer(avctx, enc->last_frame,    0)) < 0)
+            return ret;
 
         /* Before the first video frame, write a "video info" chunk */
         roq_write_video_info_chunk(enc);
@@ -1067,7 +1075,8 @@ static int roq_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     /* Encode the actual frame */
-    roq_encode_video(enc);
+    if ((ret = roq_encode_video(enc)) < 0)
+        return ret;
 
     pkt->size   = enc->out_buf - pkt->data;
     if (enc->framesSinceKeyframe == 1)
@@ -1077,6 +1086,20 @@ static int roq_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
+#define OFFSET(x) offsetof(RoqContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "quake3_compat", "Whether to respect known limitations in Quake 3 decoder", OFFSET(quake3_compat), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, VE },
+    { NULL },
+};
+
+static const AVClass roq_class = {
+    .class_name = "RoQ",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_roq_encoder = {
     .name                 = "roqvideo",
     .long_name            = NULL_IF_CONFIG_SMALL("id RoQ video"),
@@ -1086,7 +1109,7 @@ AVCodec ff_roq_encoder = {
     .init                 = roq_encode_init,
     .encode2              = roq_encode_frame,
     .close                = roq_encode_end,
-    .supported_framerates = (const AVRational[]){ {30,1}, {0,0} },
-    .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV444P,
+    .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUVJ444P,
                                                         AV_PIX_FMT_NONE },
+    .priv_class     = &roq_class,
 };
diff --git a/libavcodec/rpza.c b/libavcodec/rpza.c
index 83dde7a..aac437e 100644
--- a/libavcodec/rpza.c
+++ b/libavcodec/rpza.c
@@ -2,20 +2,20 @@
  * Quicktime Video (RPZA) Video Decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -80,7 +80,7 @@ static void rpza_decode_stream(RpzaContext *s)
     uint16_t *pixels = (uint16_t *)s->frame->data[0];
 
     int row_ptr = 0;
-    int pixel_ptr = 0;
+    int pixel_ptr = -4;
     int block_ptr;
     int pixel_x, pixel_y;
     int total_blocks;
@@ -134,6 +134,7 @@ static void rpza_decode_stream(RpzaContext *s)
         case 0xa0:
             colorA = bytestream2_get_be16(&s->gb);
             while (n_blocks--) {
+                ADVANCE_BLOCK()
                 block_ptr = row_ptr + pixel_ptr;
                 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                     for (pixel_x = 0; pixel_x < 4; pixel_x++){
@@ -142,7 +143,6 @@ static void rpza_decode_stream(RpzaContext *s)
                     }
                     block_ptr += row_inc;
                 }
-                ADVANCE_BLOCK();
             }
             break;
 
@@ -179,6 +179,7 @@ static void rpza_decode_stream(RpzaContext *s)
             if (bytestream2_get_bytes_left(&s->gb) < n_blocks * 4)
                 return;
             while (n_blocks--) {
+                ADVANCE_BLOCK();
                 block_ptr = row_ptr + pixel_ptr;
                 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                     uint8_t index = bytestream2_get_byteu(&s->gb);
@@ -189,7 +190,6 @@ static void rpza_decode_stream(RpzaContext *s)
                     }
                     block_ptr += row_inc;
                 }
-                ADVANCE_BLOCK();
             }
             break;
 
@@ -197,6 +197,7 @@ static void rpza_decode_stream(RpzaContext *s)
         case 0x00:
             if (bytestream2_get_bytes_left(&s->gb) < 30)
                 return;
+            ADVANCE_BLOCK();
             block_ptr = row_ptr + pixel_ptr;
             for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                 for (pixel_x = 0; pixel_x < 4; pixel_x++){
@@ -208,7 +209,6 @@ static void rpza_decode_stream(RpzaContext *s)
                 }
                 block_ptr += row_inc;
             }
-            ADVANCE_BLOCK();
             break;
 
         /* Unknown opcode */
@@ -244,10 +244,8 @@ static int rpza_decode_frame(AVCodecContext *avctx,
 
     bytestream2_init(&s->gb, avpkt->data, avpkt->size);
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     rpza_decode_stream(s);
 
diff --git a/libavcodec/rtjpeg.c b/libavcodec/rtjpeg.c
index 67eeff8..8e02bce 100644
--- a/libavcodec/rtjpeg.c
+++ b/libavcodec/rtjpeg.c
@@ -2,20 +2,20 @@
  * RTJpeg decoding functions
  * Copyright (c) 2006 Reimar Doeffinger
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "libavutil/common.h"
diff --git a/libavcodec/rtjpeg.h b/libavcodec/rtjpeg.h
index cd30079..d22ff40 100644
--- a/libavcodec/rtjpeg.h
+++ b/libavcodec/rtjpeg.h
@@ -2,20 +2,20 @@
  * RTJpeg decoding functions
  * copyright (c) 2006 Reimar Doeffinger
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c
index 835a1aa..be6ca84 100644
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -305,7 +305,7 @@ static int rv20_decode_picture_header(RVDecContext *rv)
 {
     MpegEncContext *s = &rv->m;
     int seq, mb_pos, i, ret;
-    int rpr_bits;
+    int rpr_max;
 
     i = get_bits(&s->gb, 2);
     switch (i) {
@@ -326,6 +326,10 @@ static int rv20_decode_picture_header(RVDecContext *rv)
         return AVERROR_INVALIDDATA;
     }
 
+    if (s->low_delay && s->pict_type == AV_PICTURE_TYPE_B) {
+        av_log(s->avctx, AV_LOG_ERROR, "low delay B\n");
+        return -1;
+    }
     if (s->last_picture_ptr == NULL && s->pict_type == AV_PICTURE_TYPE_B) {
         av_log(s->avctx, AV_LOG_ERROR, "early B-frame\n");
         return AVERROR_INVALIDDATA;
@@ -343,17 +347,17 @@ static int rv20_decode_picture_header(RVDecContext *rv)
     }
 
     if (RV_GET_MINOR_VER(rv->sub_id) >= 2)
-        s->loop_filter = get_bits1(&s->gb);
+        s->loop_filter = get_bits1(&s->gb) && !s->avctx->lowres;
 
     if (RV_GET_MINOR_VER(rv->sub_id) <= 1)
         seq = get_bits(&s->gb, 8) << 7;
     else
         seq = get_bits(&s->gb, 13) << 2;
 
-    rpr_bits = s->avctx->extradata[1] & 7;
-    if (rpr_bits) {
+    rpr_max = s->avctx->extradata[1] & 7;
+    if (rpr_max) {
         int f, new_w, new_h;
-        rpr_bits = FFMIN((rpr_bits >> 1) + 1, 3);
+        int rpr_bits = av_log2(rpr_max) + 1;
 
         f = get_bits(&s->gb, rpr_bits);
 
@@ -370,10 +374,21 @@ static int rv20_decode_picture_header(RVDecContext *rv)
             new_h = rv->orig_height;
         }
         if (new_w != s->width || new_h != s->height) {
+            AVRational old_aspect = s->avctx->sample_aspect_ratio;
             av_log(s->avctx, AV_LOG_DEBUG,
                    "attempting to change resolution to %dx%d\n", new_w, new_h);
+            if (av_image_check_size(new_w, new_h, 0, s->avctx) < 0)
+                return AVERROR_INVALIDDATA;
             ff_MPV_common_end(s);
 
+            // attempt to keep aspect during typical resolution switches
+            if (!old_aspect.num)
+                old_aspect = (AVRational){1, 1};
+            if (2 * new_w * s->height == new_h * s->width)
+                s->avctx->sample_aspect_ratio = av_mul_q(old_aspect, (AVRational){2, 1});
+            if (new_w * s->height == 2 * new_h * s->width)
+                s->avctx->sample_aspect_ratio = av_mul_q(old_aspect, (AVRational){1, 2});
+
             ret = ff_set_dimensions(s->avctx, new_w, new_h);
             if (ret < 0)
                 return ret;
@@ -385,9 +400,10 @@ static int rv20_decode_picture_header(RVDecContext *rv)
         }
 
         if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
-            av_log(s->avctx, AV_LOG_DEBUG, "F %d/%d\n", f, rpr_bits);
+            av_log(s->avctx, AV_LOG_DEBUG, "F %d/%d/%d\n", f, rpr_bits, rpr_max);
         }
-    } else if (av_image_check_size(s->width, s->height, 0, s->avctx) < 0)
+    }
+    if (av_image_check_size(s->width, s->height, 0, s->avctx) < 0)
         return AVERROR_INVALIDDATA;
 
     mb_pos = ff_h263_decode_mba(s);
@@ -406,15 +422,17 @@ static int rv20_decode_picture_header(RVDecContext *rv)
         } else {
             s->time    = seq;
             s->pb_time = s->pp_time - (s->last_non_b_time - s->time);
-            if (s->pp_time <= s->pb_time ||
-                s->pp_time <= s->pp_time - s->pb_time || s->pp_time <= 0) {
-                av_log(s->avctx, AV_LOG_DEBUG, "messed up order, possible "
-                       "from seeking? skipping current b frame\n");
-                return FRAME_SKIPPED;
-            }
-            ff_mpeg4_init_direct_mv(s);
         }
     }
+    if (s->pict_type == AV_PICTURE_TYPE_B) {
+        if (s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0) {
+            av_log(s->avctx, AV_LOG_DEBUG,
+                   "messed up order, possible from seeking? skipping current b frame\n");
+#define ERROR_SKIP_FRAME -123
+            return ERROR_SKIP_FRAME;
+        }
+        ff_mpeg4_init_direct_mv(s);
+    }
 
     s->no_rounding = get_bits1(&s->gb);
 
@@ -426,7 +444,8 @@ static int rv20_decode_picture_header(RVDecContext *rv)
     s->unrestricted_mv = 1;
     s->h263_aic        = s->pict_type == AV_PICTURE_TYPE_I;
     s->modified_quant  = 1;
-    s->loop_filter     = 1;
+    if (!s->avctx->lowres)
+        s->loop_filter = 1;
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO) {
         av_log(s->avctx, AV_LOG_INFO,
@@ -435,7 +454,7 @@ static int rv20_decode_picture_header(RVDecContext *rv)
                s->no_rounding);
     }
 
-    assert(s->pict_type != AV_PICTURE_TYPE_B || !s->low_delay);
+    av_assert0(s->pict_type != AV_PICTURE_TYPE_B || !s->low_delay);
 
     return s->mb_width * s->mb_height - mb_pos;
 }
@@ -492,8 +511,8 @@ static av_cold int rv10_decode_init(AVCodecContext *avctx)
     }
 
     if (avctx->debug & FF_DEBUG_PICT_INFO) {
-        av_log(avctx, AV_LOG_DEBUG, "ver:%X ver0:%X\n", rv->sub_id,
-               avctx->extradata_size >= 4 ? ((int *) avctx->extradata)[0] : -1);
+        av_log(avctx, AV_LOG_DEBUG, "ver:%X ver0:%"PRIX32"\n", rv->sub_id,
+               ((uint32_t *) avctx->extradata)[0]);
     }
 
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
@@ -540,7 +559,8 @@ static int rv10_decode_packet(AVCodecContext *avctx, const uint8_t *buf,
     else
         mb_count = rv20_decode_picture_header(rv);
     if (mb_count < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n");
+        if (mb_count != ERROR_SKIP_FRAME)
+            av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -573,6 +593,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, const uint8_t *buf,
         }
     }
 
+
     av_dlog(avctx, "qscale=%d\n", s->qscale);
 
     /* default quantization values */
@@ -686,6 +707,8 @@ static int rv10_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     const uint8_t *slices_hdr = NULL;
 
     av_dlog(avctx, "*****frame %d size=%d\n", avctx->frame_number, buf_size);
+    s->flags  = avctx->flags;
+    s->flags2 = avctx->flags2;
 
     /* no supplementary picture */
     if (buf_size == 0) {
@@ -743,11 +766,13 @@ static int rv10_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay) {
             if ((ret = av_frame_ref(pict, s->current_picture_ptr->f)) < 0)
                 return ret;
-            ff_print_debug_info(s, s->current_picture_ptr);
+            ff_print_debug_info(s, s->current_picture_ptr, pict);
+            ff_mpv_export_qp_table(s, pict, s->current_picture_ptr, FF_QSCALE_TYPE_MPEG1);
         } else if (s->last_picture_ptr != NULL) {
             if ((ret = av_frame_ref(pict, s->last_picture_ptr->f)) < 0)
                 return ret;
-            ff_print_debug_info(s, s->last_picture_ptr);
+            ff_print_debug_info(s, s->last_picture_ptr, pict);
+            ff_mpv_export_qp_table(s, pict,s->last_picture_ptr, FF_QSCALE_TYPE_MPEG1);
         }
 
         if (s->last_picture_ptr || s->low_delay) {
@@ -771,6 +796,7 @@ AVCodec ff_rv10_decoder = {
     .close          = rv10_decode_end,
     .decode         = rv10_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE
@@ -788,6 +814,7 @@ AVCodec ff_rv20_decoder = {
     .decode         = rv10_decode_frame,
     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY,
     .flush          = ff_mpeg_flush,
+    .max_lowres     = 3,
     .pix_fmts       = (const enum AVPixelFormat[]) {
         AV_PIX_FMT_YUV420P,
         AV_PIX_FMT_NONE
diff --git a/libavcodec/rv10enc.c b/libavcodec/rv10enc.c
index 9b23d7d..1f85743 100644
--- a/libavcodec/rv10enc.c
+++ b/libavcodec/rv10enc.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv20enc.c b/libavcodec/rv20enc.c
index 67879e2..b943116 100644
--- a/libavcodec/rv20enc.c
+++ b/libavcodec/rv20enc.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000,2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -40,12 +40,12 @@ void ff_rv20_encode_picture_header(MpegEncContext *s, int picture_number){
 
     put_bits(&s->pb, 1, s->no_rounding);
 
-    assert(s->f_code == 1);
-    assert(s->unrestricted_mv == 0);
-    assert(s->alt_inter_vlc == 0);
-    assert(s->umvplus == 0);
-    assert(s->modified_quant==1);
-    assert(s->loop_filter==1);
+    av_assert0(s->f_code == 1);
+    av_assert0(s->unrestricted_mv == 0);
+    av_assert0(s->alt_inter_vlc == 0);
+    av_assert0(s->umvplus == 0);
+    av_assert0(s->modified_quant==1);
+    av_assert0(s->loop_filter==1);
 
     s->h263_aic= s->pict_type == AV_PICTURE_TYPE_I;
     if(s->h263_aic){
diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c
index 1675ea2..fd8fd4f 100644
--- a/libavcodec/rv30.c
+++ b/libavcodec/rv30.c
@@ -2,20 +2,20 @@
  * RV30 decoder
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -51,8 +51,13 @@ static int rv30_parse_slice_header(RV34DecContext *r, GetBitContext *gb, SliceIn
     si->quant = get_bits(gb, 5);
     skip_bits1(gb);
     si->pts = get_bits(gb, 13);
-    rpr = get_bits(gb, r->rpr);
+    rpr = get_bits(gb, av_log2(r->max_rpr) + 1);
     if(rpr){
+        if (rpr > r->max_rpr) {
+            av_log(avctx, AV_LOG_ERROR, "rpr too large\n");
+            return AVERROR_INVALIDDATA;
+        }
+
         if (avctx->extradata_size < rpr * 2 + 8) {
             av_log(avctx, AV_LOG_ERROR,
                    "Insufficient extradata - need at least %d bytes, got %d\n",
@@ -82,7 +87,7 @@ static int rv30_decode_intra_types(RV34DecContext *r, GetBitContext *gb, int8_t
     for(i = 0; i < 4; i++, dst += r->intra_types_stride - 4){
         for(j = 0; j < 4; j+= 2){
             unsigned code = svq3_get_ue_golomb(gb) << 1;
-            if(code >= 81*2){
+            if (code > 80U*2U) {
                 av_log(r->s.avctx, AV_LOG_ERROR, "Incorrect intra prediction code\n");
                 return -1;
             }
@@ -261,8 +266,12 @@ static av_cold int rv30_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "Extradata is too small.\n");
         return -1;
     }
-    r->rpr = (avctx->extradata[1] & 7) >> 1;
-    r->rpr = FFMIN(r->rpr + 1, 3);
+
+    r->max_rpr = avctx->extradata[1] & 7;
+    if(avctx->extradata_size < 2*r->max_rpr + 8){
+        av_log(avctx, AV_LOG_WARNING, "Insufficient extradata - need at least %d bytes, got %d\n",
+               2*r->max_rpr + 8, avctx->extradata_size);
+    }
 
     r->parse_slice_header = rv30_parse_slice_header;
     r->decode_intra_types = rv30_decode_intra_types;
diff --git a/libavcodec/rv30data.h b/libavcodec/rv30data.h
index 5ee3048..5c4cb97 100644
--- a/libavcodec/rv30data.h
+++ b/libavcodec/rv30data.h
@@ -2,20 +2,20 @@
  * RealVideo 3 decoder
  * copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -58,7 +58,7 @@ static const uint8_t rv30_itype_code[9*9*2] = {
  *
  * This is really a three-dimensional matrix with dimensions
  * [-1..9][-1..9][0..9]. The first and second coordinates are
- * detemined by the top and left neighbors (-1 if unavailable).
+ * determined by the top and left neighbors (-1 if unavailable).
  */
 static const uint8_t rv30_itype_from_context[900] = {
     0, 9, 9, 9, 9, 9, 9, 9, 9,
diff --git a/libavcodec/rv30dsp.c b/libavcodec/rv30dsp.c
index 36187a7..6cc27a2 100644
--- a/libavcodec/rv30dsp.c
+++ b/libavcodec/rv30dsp.c
@@ -2,20 +2,20 @@
  * RV30 decoder motion compensation functions
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 0c36348..d82b41e 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -2,20 +2,20 @@
  * RV30/40 decoder common data
  * Copyright (c) 2007 Mike Melanson, Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,6 +24,7 @@
  * RV30/40 decoder common data
  */
 
+#include "libavutil/imgutils.h"
 #include "libavutil/internal.h"
 
 #include "avcodec.h"
@@ -215,7 +216,7 @@ static int rv34_decode_cbp(GetBitContext *gb, RV34VLC *vlc, int table)
 }
 
 /**
- * Get one coefficient value from the bistream and store it.
+ * Get one coefficient value from the bitstream and store it.
  */
 static inline void decode_coeff(int16_t *dst, int coef, int esc, GetBitContext *gb, VLC* vlc, int q)
 {
@@ -510,7 +511,7 @@ static void rv34_pred_mv(RV34DecContext *r, int block_type, int subblock_no, int
     }
 }
 
-#define GET_PTS_DIFF(a, b) ((a - b + 8192) & 0x1FFF)
+#define GET_PTS_DIFF(a, b) (((a) - (b) + 8192) & 0x1FFF)
 
 /**
  * Calculate motion vector component that should be added for direct blocks.
@@ -672,6 +673,7 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
     int dxy, mx, my, umx, umy, lx, ly, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
     int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride + mv_off;
     int is16x16 = 1;
+    int emu = 0;
 
     if(thirdpel){
         int chroma_mx, chroma_my;
@@ -723,24 +725,14 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
     if(s->h_edge_pos - (width << 3) < 6 || s->v_edge_pos - (height << 3) < 6 ||
        (unsigned)(src_x - !!lx*2) > s->h_edge_pos - !!lx*2 - (width <<3) - 4 ||
        (unsigned)(src_y - !!ly*2) > s->v_edge_pos - !!ly*2 - (height<<3) - 4) {
-        uint8_t *uvbuf = s->edge_emu_buffer + 22 * s->linesize;
-
         srcY -= 2 + 2*s->linesize;
         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY,
                                  s->linesize, s->linesize,
                                  (width << 3) + 6, (height << 3) + 6,
-                            src_x - 2, src_y - 2, s->h_edge_pos, s->v_edge_pos);
+                                 src_x - 2, src_y - 2,
+                                 s->h_edge_pos, s->v_edge_pos);
         srcY = s->edge_emu_buffer + 2 + 2*s->linesize;
-        s->vdsp.emulated_edge_mc(uvbuf, srcU,
-                                 s->uvlinesize,s->uvlinesize,
-                                 (width << 2) + 1, (height << 2) + 1,
-                            uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
-        s->vdsp.emulated_edge_mc(uvbuf + 16, srcV,
-                                 s->uvlinesize, s->uvlinesize,
-                                 (width << 2) + 1, (height << 2) + 1,
-                            uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, s->v_edge_pos >> 1);
-        srcU = uvbuf;
-        srcV = uvbuf + 16;
+        emu = 1;
     }
     if(!weighted){
         Y = s->dest[0] + xoff      + yoff     *s->linesize;
@@ -763,6 +755,24 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
     }
     is16x16 = (block_type != RV34_MB_P_8x8) && (block_type != RV34_MB_P_16x8) && (block_type != RV34_MB_P_8x16);
     qpel_mc[!is16x16][dxy](Y, srcY, s->linesize);
+    if (emu) {
+        uint8_t *uvbuf = s->edge_emu_buffer;
+
+        s->vdsp.emulated_edge_mc(uvbuf, srcU,
+                                 s->uvlinesize, s->uvlinesize,
+                                 (width << 2) + 1, (height << 2) + 1,
+                                 uvsrc_x, uvsrc_y,
+                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+        srcU = uvbuf;
+        uvbuf += 9*s->uvlinesize;
+
+        s->vdsp.emulated_edge_mc(uvbuf, srcV,
+                                 s->uvlinesize, s->uvlinesize,
+                                 (width << 2) + 1, (height << 2) + 1,
+                                 uvsrc_x, uvsrc_y,
+                                 s->h_edge_pos >> 1, s->v_edge_pos >> 1);
+        srcV = uvbuf;
+    }
     chroma_mc[2-width]   (U, srcU, s->uvlinesize, height*4, uvmx, uvmy);
     chroma_mc[2-width]   (V, srcV, s->uvlinesize, height*4, uvmx, uvmy);
 }
@@ -1339,7 +1349,7 @@ static int check_slice_end(RV34DecContext *r, MpegEncContext *s)
     if(r->s.mb_skip_run > 1)
         return 0;
     bits = get_bits_left(&s->gb);
-    if(bits < 0 || (bits < 8 && !show_bits(&s->gb, bits)))
+    if(bits <= 0 || (bits < 8 && !show_bits(&s->gb, bits)))
         return 1;
     return 0;
 }
@@ -1361,11 +1371,11 @@ static int rv34_decoder_alloc(RV34DecContext *r)
 {
     r->intra_types_stride = r->s.mb_width * 4 + 4;
 
-    r->cbp_chroma       = av_malloc(r->s.mb_stride * r->s.mb_height *
+    r->cbp_chroma       = av_mallocz(r->s.mb_stride * r->s.mb_height *
                                     sizeof(*r->cbp_chroma));
-    r->cbp_luma         = av_malloc(r->s.mb_stride * r->s.mb_height *
+    r->cbp_luma         = av_mallocz(r->s.mb_stride * r->s.mb_height *
                                     sizeof(*r->cbp_luma));
-    r->deblock_coefs    = av_malloc(r->s.mb_stride * r->s.mb_height *
+    r->deblock_coefs    = av_mallocz(r->s.mb_stride * r->s.mb_height *
                                     sizeof(*r->deblock_coefs));
     r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 *
                                     sizeof(*r->intra_types_hist));
@@ -1410,6 +1420,10 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int
         av_log(s->avctx, AV_LOG_ERROR, "Slice type mismatch\n");
         return AVERROR_INVALIDDATA;
     }
+    if (s->width != r->si.width || s->height != r->si.height) {
+        av_log(s->avctx, AV_LOG_ERROR, "Size mismatch\n");
+        return AVERROR_INVALIDDATA;
+    }
 
     r->si.end = end;
     s->qscale = r->si.quant;
@@ -1587,18 +1601,30 @@ static int finish_frame(AVCodecContext *avctx, AVFrame *pict)
     if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay) {
         if ((ret = av_frame_ref(pict, s->current_picture_ptr->f)) < 0)
             return ret;
-        ff_print_debug_info(s, s->current_picture_ptr);
+        ff_print_debug_info(s, s->current_picture_ptr, pict);
+        ff_mpv_export_qp_table(s, pict, s->current_picture_ptr, FF_QSCALE_TYPE_MPEG1);
         got_picture = 1;
     } else if (s->last_picture_ptr != NULL) {
         if ((ret = av_frame_ref(pict, s->last_picture_ptr->f)) < 0)
             return ret;
-        ff_print_debug_info(s, s->last_picture_ptr);
+        ff_print_debug_info(s, s->last_picture_ptr, pict);
+        ff_mpv_export_qp_table(s, pict, s->last_picture_ptr, FF_QSCALE_TYPE_MPEG1);
         got_picture = 1;
     }
 
     return got_picture;
 }
 
+static AVRational update_sar(int old_w, int old_h, AVRational sar, int new_w, int new_h)
+{
+    // attempt to keep aspect during typical resolution switches
+    if (!sar.num)
+        sar = (AVRational){1, 1};
+
+    sar = av_mul_q(sar, (AVRational){new_h * old_w, new_w * old_h});
+    return sar;
+}
+
 int ff_rv34_decode_frame(AVCodecContext *avctx,
                             void *data, int *got_picture_ptr,
                             AVPacket *avpkt)
@@ -1659,8 +1685,8 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
 
     /* first slice */
     if (si.start == 0) {
-        if (s->mb_num_left > 0) {
-            av_log(avctx, AV_LOG_ERROR, "New frame but still %d MB left.",
+        if (s->mb_num_left > 0 && s->current_picture_ptr) {
+            av_log(avctx, AV_LOG_ERROR, "New frame but still %d MB left.\n",
                    s->mb_num_left);
             ff_er_frame_end(&s->er);
             ff_MPV_frame_end(s);
@@ -1672,6 +1698,12 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
             av_log(s->avctx, AV_LOG_WARNING, "Changing dimensions to %dx%d\n",
                    si.width, si.height);
 
+            if (av_image_check_size(si.width, si.height, 0, s->avctx))
+                return AVERROR_INVALIDDATA;
+
+            s->avctx->sample_aspect_ratio = update_sar(
+                s->width, s->height, s->avctx->sample_aspect_ratio,
+                si.width, si.height);
             s->width  = si.width;
             s->height = si.height;
 
diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h
index c32c089..870164c 100644
--- a/libavcodec/rv34.h
+++ b/libavcodec/rv34.h
@@ -2,20 +2,20 @@
  * RV30/40 decoder common data declarations
  * Copyright (c) 2007 Mike Melanson, Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -102,7 +102,7 @@ typedef struct RV34DecContext{
     int dmv[4][2];           ///< differential motion vectors for the current macroblock
 
     int rv30;                ///< indicates which RV variasnt is currently decoded
-    int rpr;                 ///< one field size in RV30 slice header
+    int max_rpr;
 
     int cur_pts, last_pts, next_pts;
     int scaled_weight;
diff --git a/libavcodec/rv34_parser.c b/libavcodec/rv34_parser.c
index 8af7443..6a07a5f 100644
--- a/libavcodec/rv34_parser.c
+++ b/libavcodec/rv34_parser.c
@@ -2,20 +2,20 @@
  * RV30/40 parser
  * Copyright (c) 2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv34data.h b/libavcodec/rv34data.h
index 3064124..4b2701f 100644
--- a/libavcodec/rv34data.h
+++ b/libavcodec/rv34data.h
@@ -2,20 +2,20 @@
  * RealVideo 4 decoder
  * copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv34dsp.c b/libavcodec/rv34dsp.c
index 7234ee8..c3f245e 100644
--- a/libavcodec/rv34dsp.c
+++ b/libavcodec/rv34dsp.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Mike Melanson, Konstantin Shishkov
  * Copyright (c) 2011 Janne Grunau
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h
index 1aa80cf..2e9ec4e 100644
--- a/libavcodec/rv34dsp.h
+++ b/libavcodec/rv34dsp.h
@@ -2,20 +2,20 @@
  * RV30/40 decoder motion compensation functions
  * Copyright (c) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv34vlc.h b/libavcodec/rv34vlc.h
index f4670c1..aa29357 100644
--- a/libavcodec/rv34vlc.h
+++ b/libavcodec/rv34vlc.h
@@ -2,20 +2,20 @@
  * RealVideo 3/4 decoder
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
index b7e113c..043fd72 100644
--- a/libavcodec/rv40.c
+++ b/libavcodec/rv40.c
@@ -2,20 +2,20 @@
  * RV40 decoder
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -229,8 +229,11 @@ static int rv40_decode_mb_info(RV34DecContext *r)
     int prev_type = 0;
     int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
 
-    if(!r->s.mb_skip_run)
+    if(!r->s.mb_skip_run) {
         r->s.mb_skip_run = svq3_get_ue_golomb(gb) + 1;
+        if(r->s.mb_skip_run > (unsigned)s->mb_num)
+            return -1;
+    }
 
     if(--r->s.mb_skip_run)
          return RV34_MB_SKIP;
@@ -357,7 +360,7 @@ static void rv40_loop_filter(RV34DecContext *r, int row)
     int uvcbp[4][2];
     /**
      * This mask represents the pattern of luma subblocks that should be filtered
-     * in addition to the coded ones because because they lie at the edge of
+     * in addition to the coded ones because they lie at the edge of
      * 8x8 block with different enough motion vectors
      */
     unsigned mvmasks[4];
diff --git a/libavcodec/rv40data.h b/libavcodec/rv40data.h
index 42328af..36f9f91 100644
--- a/libavcodec/rv40data.h
+++ b/libavcodec/rv40data.h
@@ -2,20 +2,20 @@
  * RealVideo 4 decoder
  * copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
index d1643e0..6d08847 100644
--- a/libavcodec/rv40dsp.c
+++ b/libavcodec/rv40dsp.c
@@ -2,20 +2,20 @@
  * RV40 decoder motion compensation functions
  * Copyright (c) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,6 +32,7 @@
 #include "pixels.h"
 #include "rnd_avg.h"
 #include "rv34dsp.h"
+#include "libavutil/avassert.h"
 
 #define RV40_LOWPASS(OPNAME, OP) \
 static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
@@ -299,7 +300,7 @@ static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
     int i;\
     int bias = rv40_bias[y>>1][x>>1];\
     \
-    assert(x<8 && y<8 && x>=0 && y>=0);\
+    av_assert2(x<8 && y<8 && x>=0 && y>=0);\
 \
     if(D){\
         for(i = 0; i < h; i++){\
@@ -332,7 +333,7 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
     int i;\
     int bias = rv40_bias[y>>1][x>>1];\
     \
-    assert(x<8 && y<8 && x>=0 && y>=0);\
+    av_assert2(x<8 && y<8 && x>=0 && y>=0);\
 \
     if(D){\
         for(i = 0; i < h; i++){\
diff --git a/libavcodec/rv40vlc2.h b/libavcodec/rv40vlc2.h
index 2f63fc2..15119a1 100644
--- a/libavcodec/rv40vlc2.h
+++ b/libavcodec/rv40vlc2.h
@@ -2,20 +2,20 @@
  * RealVideo 4 decoder
  * copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/s302m.c b/libavcodec/s302m.c
index 36384fb..7639a0f 100644
--- a/libavcodec/s302m.c
+++ b/libavcodec/s302m.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2008 Laurent Aimar <fenrir@videolan.org>
  * Copyright (c) 2009 Baptiste Coudurier <baptiste.coudurier@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,18 +59,31 @@ static int s302m_parse_frame_header(AVCodecContext *avctx, const uint8_t *buf,
     }
 
     /* Set output properties */
-    avctx->bits_per_coded_sample = bits;
+    avctx->bits_per_raw_sample = bits;
     if (bits > 16)
         avctx->sample_fmt = AV_SAMPLE_FMT_S32;
     else
         avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
     avctx->channels    = channels;
+    switch(channels) {
+        case 2:
+            avctx->channel_layout = AV_CH_LAYOUT_STEREO;
+            break;
+        case 4:
+            avctx->channel_layout = AV_CH_LAYOUT_QUAD;
+            break;
+        case 6:
+            avctx->channel_layout = AV_CH_LAYOUT_5POINT1_BACK;
+            break;
+        case 8:
+            avctx->channel_layout = AV_CH_LAYOUT_5POINT1_BACK | AV_CH_LAYOUT_STEREO_DOWNMIX;
+    }
     avctx->sample_rate = 48000;
-    avctx->bit_rate    = 48000 * avctx->channels * (avctx->bits_per_coded_sample + 4) +
+    avctx->bit_rate    = 48000 * avctx->channels * (avctx->bits_per_raw_sample + 4) +
                          32 * (48000 / (buf_size * 8 /
                                         (avctx->channels *
-                                         (avctx->bits_per_coded_sample + 4))));
+                                         (avctx->bits_per_raw_sample + 4))));
 
     return frame_size;
 }
@@ -91,16 +104,14 @@ static int s302m_decode_frame(AVCodecContext *avctx, void *data,
     buf      += AES3_HEADER_LEN;
 
     /* get output buffer */
-    block_size = (avctx->bits_per_coded_sample + 4) / 4;
+    block_size = (avctx->bits_per_raw_sample + 4) / 4;
     frame->nb_samples = 2 * (buf_size / block_size) / avctx->channels;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     buf_size = (frame->nb_samples * avctx->channels / 2) * block_size;
 
-    if (avctx->bits_per_coded_sample == 24) {
+    if (avctx->bits_per_raw_sample == 24) {
         uint32_t *o = (uint32_t *)frame->data[0];
         for (; buf_size > 6; buf_size -= 7) {
             *o++ = (ff_reverse[buf[2]]        << 24) |
@@ -112,7 +123,7 @@ static int s302m_decode_frame(AVCodecContext *avctx, void *data,
                    (ff_reverse[buf[3] & 0x0f] <<  4);
             buf += 7;
         }
-    } else if (avctx->bits_per_coded_sample == 20) {
+    } else if (avctx->bits_per_raw_sample == 20) {
         uint32_t *o = (uint32_t *)frame->data[0];
         for (; buf_size > 5; buf_size -= 6) {
             *o++ = (ff_reverse[buf[2] & 0xf0] << 28) |
diff --git a/libavcodec/s302menc.c b/libavcodec/s302menc.c
new file mode 100644
index 0000000..540ac29
--- /dev/null
+++ b/libavcodec/s302menc.c
@@ -0,0 +1,178 @@
+/*
+ * SMPTE 302M encoder
+ * Copyright (c) 2010 Google, Inc.
+ * Copyright (c) 2013 Darryl Wallace <wallacdj@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "mathops.h"
+#include "put_bits.h"
+
+#define AES3_HEADER_LEN 4
+
+typedef struct S302MEncContext {
+    uint8_t framing_index; /* Set for even channels on multiple of 192 samples */
+} S302MEncContext;
+
+static av_cold int s302m_encode_init(AVCodecContext *avctx)
+{
+    S302MEncContext *s = avctx->priv_data;
+
+    if (avctx->channels & 1 || avctx->channels > 8) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Encoding %d channel(s) is not allowed. Only 2, 4, 6 and 8 channels are supported.\n",
+               avctx->channels);
+        return AVERROR(EINVAL);
+    }
+
+    switch (avctx->sample_fmt) {
+    case AV_SAMPLE_FMT_S16:
+        avctx->bits_per_raw_sample = 16;
+        break;
+    case AV_SAMPLE_FMT_S32:
+        if (avctx->bits_per_raw_sample > 20) {
+            if (avctx->bits_per_raw_sample > 24)
+                av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
+            avctx->bits_per_raw_sample = 24;
+        } else if (!avctx->bits_per_raw_sample) {
+            avctx->bits_per_raw_sample = 24;
+        } else if (avctx->bits_per_raw_sample <= 20) {
+            avctx->bits_per_raw_sample = 20;
+        }
+    }
+
+    avctx->frame_size = 0;
+    avctx->bit_rate   = 48000 * avctx->channels *
+                       (avctx->bits_per_raw_sample + 4);
+    s->framing_index  = 0;
+
+    return 0;
+}
+
+static int s302m_encode2_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                               const AVFrame *frame, int *got_packet_ptr)
+{
+    S302MEncContext *s = avctx->priv_data;
+    const int buf_size = AES3_HEADER_LEN +
+                        (frame->nb_samples *
+                         avctx->channels *
+                        (avctx->bits_per_raw_sample + 4)) / 8;
+    int ret, c, channels;
+    uint8_t *o;
+    PutBitContext pb;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, buf_size)) < 0)
+        return ret;
+
+    o = avpkt->data;
+    init_put_bits(&pb, o, buf_size * 8);
+    put_bits(&pb, 16, buf_size - AES3_HEADER_LEN);
+    put_bits(&pb, 2, (avctx->channels - 2) >> 1);   // number of channels
+    put_bits(&pb, 8, 0);                            // channel ID
+    put_bits(&pb, 2, (avctx->bits_per_raw_sample - 16) / 4); // bits per samples (0 = 16bit, 1 = 20bit, 2 = 24bit)
+    put_bits(&pb, 4, 0);                            // alignments
+    flush_put_bits(&pb);
+    o += AES3_HEADER_LEN;
+
+    if (avctx->bits_per_raw_sample == 24) {
+        const uint32_t *samples = (uint32_t *)frame->data[0];
+
+        for (c = 0; c < frame->nb_samples; c++) {
+            uint8_t vucf = s->framing_index == 0 ? 0x10: 0;
+
+            for (channels = 0; channels < avctx->channels; channels += 2) {
+                o[0] = ff_reverse[(samples[0] & 0x0000FF00) >> 8];
+                o[1] = ff_reverse[(samples[0] & 0x00FF0000) >> 16];
+                o[2] = ff_reverse[(samples[0] & 0xFF000000) >> 24];
+                o[3] = ff_reverse[(samples[1] & 0x00000F00) >> 4] | vucf;
+                o[4] = ff_reverse[(samples[1] & 0x000FF000) >> 12];
+                o[5] = ff_reverse[(samples[1] & 0x0FF00000) >> 20];
+                o[6] = ff_reverse[(samples[1] & 0xF0000000) >> 28];
+                o += 7;
+                samples += 2;
+            }
+
+            s->framing_index++;
+            if (s->framing_index >= 192)
+                s->framing_index = 0;
+        }
+    } else if (avctx->bits_per_raw_sample == 20) {
+        const uint32_t *samples = (uint32_t *)frame->data[0];
+
+        for (c = 0; c < frame->nb_samples; c++) {
+            uint8_t vucf = s->framing_index == 0 ? 0x80: 0;
+
+            for (channels = 0; channels < avctx->channels; channels += 2) {
+                o[0] = ff_reverse[ (samples[0] & 0x000FF000) >> 12];
+                o[1] = ff_reverse[ (samples[0] & 0x0FF00000) >> 20];
+                o[2] = ff_reverse[((samples[0] & 0xF0000000) >> 28) | vucf];
+                o[3] = ff_reverse[ (samples[1] & 0x000FF000) >> 12];
+                o[4] = ff_reverse[ (samples[1] & 0x0FF00000) >> 20];
+                o[5] = ff_reverse[ (samples[1] & 0xF0000000) >> 28];
+                o += 6;
+                samples += 2;
+            }
+
+            s->framing_index++;
+            if (s->framing_index >= 192)
+                s->framing_index = 0;
+        }
+    } else if (avctx->bits_per_raw_sample == 16) {
+        const uint16_t *samples = (uint16_t *)frame->data[0];
+
+        for (c = 0; c < frame->nb_samples; c++) {
+            uint8_t vucf = s->framing_index == 0 ? 0x10 : 0;
+
+            for (channels = 0; channels < avctx->channels; channels += 2) {
+                o[0] = ff_reverse[ samples[0] & 0xFF];
+                o[1] = ff_reverse[(samples[0] & 0xFF00) >>  8];
+                o[2] = ff_reverse[(samples[1] & 0x0F)   <<  4] | vucf;
+                o[3] = ff_reverse[(samples[1] & 0x0FF0) >>  4];
+                o[4] = ff_reverse[(samples[1] & 0xF000) >> 12];
+                o += 5;
+                samples += 2;
+
+            }
+
+            s->framing_index++;
+            if (s->framing_index >= 192)
+                s->framing_index = 0;
+        }
+    }
+
+    *got_packet_ptr = 1;
+
+    return 0;
+}
+
+AVCodec ff_s302m_encoder = {
+    .name                  = "s302m",
+    .long_name             = NULL_IF_CONFIG_SMALL("SMPTE 302M"),
+    .type                  = AVMEDIA_TYPE_AUDIO,
+    .id                    = AV_CODEC_ID_S302M,
+    .priv_data_size        = sizeof(S302MEncContext),
+    .init                  = s302m_encode_init,
+    .encode2               = s302m_encode2_frame,
+    .sample_fmts           = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32,
+                                                            AV_SAMPLE_FMT_S16,
+                                                            AV_SAMPLE_FMT_NONE },
+    .capabilities          = CODEC_CAP_VARIABLE_FRAME_SIZE | CODEC_CAP_EXPERIMENTAL,
+    .supported_samplerates = (const int[]) { 48000, 0 },
+};
diff --git a/libavcodec/s3tc.c b/libavcodec/s3tc.c
index d35cf2a..4743d78 100644
--- a/libavcodec/s3tc.c
+++ b/libavcodec/s3tc.c
@@ -4,20 +4,20 @@
  *
  * see also: http://wiki.multimedia.cx/index.php?title=S3TC
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/s3tc.h b/libavcodec/s3tc.h
index 25237b9..2d77b3a 100644
--- a/libavcodec/s3tc.h
+++ b/libavcodec/s3tc.h
@@ -2,20 +2,20 @@
  * S3 Texture Compression (S3TC) decoding functions
  * Copyright (c) 2007 by Ivo van Poorten
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/samidec.c b/libavcodec/samidec.c
new file mode 100644
index 0000000..39ac608
--- /dev/null
+++ b/libavcodec/samidec.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SAMI subtitle decoder
+ * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
+ */
+
+#include "ass.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+
+typedef struct {
+    AVBPrint source;
+    AVBPrint content;
+    AVBPrint full;
+} SAMIContext;
+
+static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
+{
+    SAMIContext *sami = avctx->priv_data;
+    int ret = 0;
+    char *tag = NULL;
+    char *dupsrc = av_strdup(src);
+    char *p = dupsrc;
+
+    av_bprint_clear(&sami->content);
+    for (;;) {
+        char *saveptr = NULL;
+        int prev_chr_is_space = 0;
+        AVBPrint *dst = &sami->content;
+
+        /* parse & extract paragraph tag */
+        p = av_stristr(p, "<P");
+        if (!p)
+            break;
+        if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
+            p++;
+            continue;
+        }
+        if (dst->len) // add a separator with the previous paragraph if there was one
+            av_bprintf(dst, "\\N");
+        tag = av_strtok(p, ">", &saveptr);
+        if (!tag || !saveptr)
+            break;
+        p = saveptr;
+
+        /* check if the current paragraph is the "source" (speaker name) */
+        if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
+            dst = &sami->source;
+            av_bprint_clear(dst);
+        }
+
+        /* if empty event -> skip subtitle */
+        while (av_isspace(*p))
+            p++;
+        if (!strncmp(p, "&nbsp;", 6)) {
+            ret = -1;
+            goto end;
+        }
+
+        /* extract the text, stripping most of the tags */
+        while (*p) {
+            if (*p == '<') {
+                if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
+                    break;
+                if (!av_strncasecmp(p, "<BR", 3))
+                    av_bprintf(dst, "\\N");
+                p++;
+                while (*p && *p != '>')
+                    p++;
+                if (!*p)
+                    break;
+                if (*p == '>')
+                    p++;
+            }
+            if (!av_isspace(*p))
+                av_bprint_chars(dst, *p, 1);
+            else if (!prev_chr_is_space)
+                av_bprint_chars(dst, ' ', 1);
+            prev_chr_is_space = av_isspace(*p);
+            p++;
+        }
+    }
+
+    av_bprint_clear(&sami->full);
+    if (sami->source.len)
+        av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->source.str);
+    av_bprintf(&sami->full, "%s\r\n", sami->content.str);
+
+end:
+    av_free(dupsrc);
+    return ret;
+}
+
+static int sami_decode_frame(AVCodecContext *avctx,
+                             void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVSubtitle *sub = data;
+    const char *ptr = avpkt->data;
+    SAMIContext *sami = avctx->priv_data;
+
+    if (ptr && avpkt->size > 0 && !sami_paragraph_to_ass(avctx, ptr)) {
+        int ts_start     = av_rescale_q(avpkt->pts, avctx->time_base, (AVRational){1,100});
+        int ts_duration  = avpkt->duration != -1 ?
+                           av_rescale_q(avpkt->duration, avctx->time_base, (AVRational){1,100}) : -1;
+        ff_ass_add_rect(sub, sami->full.str, ts_start, ts_duration, 0);
+    }
+    *got_sub_ptr = sub->num_rects > 0;
+    return avpkt->size;
+}
+
+static av_cold int sami_init(AVCodecContext *avctx)
+{
+    SAMIContext *sami = avctx->priv_data;
+    av_bprint_init(&sami->source,  0, 2048);
+    av_bprint_init(&sami->content, 0, 2048);
+    av_bprint_init(&sami->full,    0, 2048);
+    return ff_ass_subtitle_header_default(avctx);
+}
+
+static av_cold int sami_close(AVCodecContext *avctx)
+{
+    SAMIContext *sami = avctx->priv_data;
+    av_bprint_finalize(&sami->source,  NULL);
+    av_bprint_finalize(&sami->content, NULL);
+    av_bprint_finalize(&sami->full,    NULL);
+    return 0;
+}
+
+AVCodec ff_sami_decoder = {
+    .name           = "sami",
+    .long_name      = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_SAMI,
+    .priv_data_size = sizeof(SAMIContext),
+    .init           = sami_init,
+    .close          = sami_close,
+    .decode         = sami_decode_frame,
+};
diff --git a/libavcodec/sanm.c b/libavcodec/sanm.c
index 41c6551..9e5ec54 100644
--- a/libavcodec/sanm.c
+++ b/libavcodec/sanm.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2006 Cyril Zorin
  * Copyright (c) 2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -105,108 +105,159 @@ static const int8_t motion_vectors[256][2] = {
 };
 
 static const int8_t c37_mv[] = {
-    0,   0,   1,   0,   2,   0,   3,   0,   5,   0,   8,   0,  13,   0,  21,
-    0,  -1,   0,  -2,   0,  -3,   0,  -5,   0,  -8,   0, -13,   0, -17,   0,
-  -21,   0,   0,   1,   1,   1,   2,   1,   3,   1,   5,   1,   8,   1,  13,
-    1,  21,   1,  -1,   1,  -2,   1,  -3,   1,  -5,   1,  -8,   1, -13,   1,
-  -17,   1, -21,   1,   0,   2,   1,   2,   2,   2,   3,   2,   5,   2,   8,
-    2,  13,   2,  21,   2,  -1,   2,  -2,   2,  -3,   2,  -5,   2,  -8,   2,
-  -13,   2, -17,   2, -21,   2,   0,   3,   1,   3,   2,   3,   3,   3,   5,
-    3,   8,   3,  13,   3,  21,   3,  -1,   3,  -2,   3,  -3,   3,  -5,   3,
-   -8,   3, -13,   3, -17,   3, -21,   3,   0,   5,   1,   5,   2,   5,   3,
-    5,   5,   5,   8,   5,  13,   5,  21,   5,  -1,   5,  -2,   5,  -3,   5,
-   -5,   5,  -8,   5, -13,   5, -17,   5, -21,   5,   0,   8,   1,   8,   2,
-    8,   3,   8,   5,   8,   8,   8,  13,   8,  21,   8,  -1,   8,  -2,   8,
-   -3,   8,  -5,   8,  -8,   8, -13,   8, -17,   8, -21,   8,   0,  13,   1,
-   13,   2,  13,   3,  13,   5,  13,   8,  13,  13,  13,  21,  13,  -1,  13,
-   -2,  13,  -3,  13,  -5,  13,  -8,  13, -13,  13, -17,  13, -21,  13,   0,
-   21,   1,  21,   2,  21,   3,  21,   5,  21,   8,  21,  13,  21,  21,  21,
-   -1,  21,  -2,  21,  -3,  21,  -5,  21,  -8,  21, -13,  21, -17,  21, -21,
-   21,   0,  -1,   1,  -1,   2,  -1,   3,  -1,   5,  -1,   8,  -1,  13,  -1,
-   21,  -1,  -1,  -1,  -2,  -1,  -3,  -1,  -5,  -1,  -8,  -1, -13,  -1, -17,
-   -1, -21,  -1,   0,  -2,   1,  -2,   2,  -2,   3,  -2,   5,  -2,   8,  -2,
-   13,  -2,  21,  -2,  -1,  -2,  -2,  -2,  -3,  -2,  -5,  -2,  -8,  -2, -13,
-   -2, -17,  -2, -21,  -2,   0,  -3,   1,  -3,   2,  -3,   3,  -3,   5,  -3,
-    8,  -3,  13,  -3,  21,  -3,  -1,  -3,  -2,  -3,  -3,  -3,  -5,  -3,  -8,
-   -3, -13,  -3, -17,  -3, -21,  -3,   0,  -5,   1,  -5,   2,  -5,   3,  -5,
-    5,  -5,   8,  -5,  13,  -5,  21,  -5,  -1,  -5,  -2,  -5,  -3,  -5,  -5,
-   -5,  -8,  -5, -13,  -5, -17,  -5, -21,  -5,   0,  -8,   1,  -8,   2,  -8,
-    3,  -8,   5,  -8,   8,  -8,  13,  -8,  21,  -8,  -1,  -8,  -2,  -8,  -3,
-   -8,  -5,  -8,  -8,  -8, -13,  -8, -17,  -8, -21,  -8,   0, -13,   1, -13,
-    2, -13,   3, -13,   5, -13,   8, -13,  13, -13,  21, -13,  -1, -13,  -2,
-  -13,  -3, -13,  -5, -13,  -8, -13, -13, -13, -17, -13, -21, -13,   0, -17,
-    1, -17,   2, -17,   3, -17,   5, -17,   8, -17,  13, -17,  21, -17,  -1,
-  -17,  -2, -17,  -3, -17,  -5, -17,  -8, -17, -13, -17, -17, -17, -21, -17,
-    0, -21,   1, -21,   2, -21,   3, -21,   5, -21,   8, -21,  13, -21,  21,
-  -21,  -1, -21,  -2, -21,  -3, -21,  -5, -21,  -8, -21, -13, -21, -17, -21,
-    0,   0,  -8, -29,   8, -29, -18, -25,  17, -25,   0, -23,  -6, -22,   6,
-  -22, -13, -19,  12, -19,   0, -18,  25, -18, -25, -17,  -5, -17,   5, -17,
-  -10, -15,  10, -15,   0, -14,  -4, -13,   4, -13,  19, -13, -19, -12,  -8,
-  -11,  -2, -11,   0, -11,   2, -11,   8, -11, -15, -10,  -4, -10,   4, -10,
-   15, -10,  -6,  -9,  -1,  -9,   1,  -9,   6,  -9, -29,  -8, -11,  -8,  -8,
-   -8,  -3,  -8,   3,  -8,   8,  -8,  11,  -8,  29,  -8,  -5,  -7,  -2,  -7,
-    0,  -7,   2,  -7,   5,  -7, -22,  -6,  -9,  -6,  -6,  -6,  -3,  -6,  -1,
-   -6,   1,  -6,   3,  -6,   6,  -6,   9,  -6,  22,  -6, -17,  -5,  -7,  -5,
-   -4,  -5,  -2,  -5,   0,  -5,   2,  -5,   4,  -5,   7,  -5,  17,  -5, -13,
-   -4, -10,  -4,  -5,  -4,  -3,  -4,  -1,  -4,   0,  -4,   1,  -4,   3,  -4,
-    5,  -4,  10,  -4,  13,  -4,  -8,  -3,  -6,  -3,  -4,  -3,  -3,  -3,  -2,
-   -3,  -1,  -3,   0,  -3,   1,  -3,   2,  -3,   4,  -3,   6,  -3,   8,  -3,
-  -11,  -2,  -7,  -2,  -5,  -2,  -3,  -2,  -2,  -2,  -1,  -2,   0,  -2,   1,
-   -2,   2,  -2,   3,  -2,   5,  -2,   7,  -2,  11,  -2,  -9,  -1,  -6,  -1,
-   -4,  -1,  -3,  -1,  -2,  -1,  -1,  -1,   0,  -1,   1,  -1,   2,  -1,   3,
-   -1,   4,  -1,   6,  -1,   9,  -1, -31,   0, -23,   0, -18,   0, -14,   0,
-  -11,   0,  -7,   0,  -5,   0,  -4,   0,  -3,   0,  -2,   0,  -1,   0,   0,
-  -31,   1,   0,   2,   0,   3,   0,   4,   0,   5,   0,   7,   0,  11,   0,
-   14,   0,  18,   0,  23,   0,  31,   0,  -9,   1,  -6,   1,  -4,   1,  -3,
-    1,  -2,   1,  -1,   1,   0,   1,   1,   1,   2,   1,   3,   1,   4,   1,
-    6,   1,   9,   1, -11,   2,  -7,   2,  -5,   2,  -3,   2,  -2,   2,  -1,
-    2,   0,   2,   1,   2,   2,   2,   3,   2,   5,   2,   7,   2,  11,   2,
-   -8,   3,  -6,   3,  -4,   3,  -2,   3,  -1,   3,   0,   3,   1,   3,   2,
-    3,   3,   3,   4,   3,   6,   3,   8,   3, -13,   4, -10,   4,  -5,   4,
-   -3,   4,  -1,   4,   0,   4,   1,   4,   3,   4,   5,   4,  10,   4,  13,
-    4, -17,   5,  -7,   5,  -4,   5,  -2,   5,   0,   5,   2,   5,   4,   5,
-    7,   5,  17,   5, -22,   6,  -9,   6,  -6,   6,  -3,   6,  -1,   6,   1,
-    6,   3,   6,   6,   6,   9,   6,  22,   6,  -5,   7,  -2,   7,   0,   7,
-    2,   7,   5,   7, -29,   8, -11,   8,  -8,   8,  -3,   8,   3,   8,   8,
-    8,  11,   8,  29,   8,  -6,   9,  -1,   9,   1,   9,   6,   9, -15,  10,
-   -4,  10,   4,  10,  15,  10,  -8,  11,  -2,  11,   0,  11,   2,  11,   8,
-   11,  19,  12, -19,  13,  -4,  13,   4,  13,   0,  14, -10,  15,  10,  15,
-   -5,  17,   5,  17,  25,  17, -25,  18,   0,  18, -12,  19,  13,  19,  -6,
-   22,   6,  22,   0,  23, -17,  25,  18,  25,  -8,  29,   8,  29,   0,  31,
-    0,   0,  -6, -22,   6, -22, -13, -19,  12, -19,   0, -18,  -5, -17,   5,
-  -17, -10, -15,  10, -15,   0, -14,  -4, -13,   4, -13,  19, -13, -19, -12,
-   -8, -11,  -2, -11,   0, -11,   2, -11,   8, -11, -15, -10,  -4, -10,   4,
-  -10,  15, -10,  -6,  -9,  -1,  -9,   1,  -9,   6,  -9, -11,  -8,  -8,  -8,
-   -3,  -8,   0,  -8,   3,  -8,   8,  -8,  11,  -8,  -5,  -7,  -2,  -7,   0,
-   -7,   2,  -7,   5,  -7, -22,  -6,  -9,  -6,  -6,  -6,  -3,  -6,  -1,  -6,
-    1,  -6,   3,  -6,   6,  -6,   9,  -6,  22,  -6, -17,  -5,  -7,  -5,  -4,
-   -5,  -2,  -5,  -1,  -5,   0,  -5,   1,  -5,   2,  -5,   4,  -5,   7,  -5,
-   17,  -5, -13,  -4, -10,  -4,  -5,  -4,  -3,  -4,  -2,  -4,  -1,  -4,   0,
-   -4,   1,  -4,   2,  -4,   3,  -4,   5,  -4,  10,  -4,  13,  -4,  -8,  -3,
-   -6,  -3,  -4,  -3,  -3,  -3,  -2,  -3,  -1,  -3,   0,  -3,   1,  -3,   2,
-   -3,   3,  -3,   4,  -3,   6,  -3,   8,  -3, -11,  -2,  -7,  -2,  -5,  -2,
-   -4,  -2,  -3,  -2,  -2,  -2,  -1,  -2,   0,  -2,   1,  -2,   2,  -2,   3,
-   -2,   4,  -2,   5,  -2,   7,  -2,  11,  -2,  -9,  -1,  -6,  -1,  -5,  -1,
-   -4,  -1,  -3,  -1,  -2,  -1,  -1,  -1,   0,  -1,   1,  -1,   2,  -1,   3,
-   -1,   4,  -1,   5,  -1,   6,  -1,   9,  -1, -23,   0, -18,   0, -14,   0,
-  -11,   0,  -7,   0,  -5,   0,  -4,   0,  -3,   0,  -2,   0,  -1,   0,   0,
-  -23,   1,   0,   2,   0,   3,   0,   4,   0,   5,   0,   7,   0,  11,   0,
-   14,   0,  18,   0,  23,   0,  -9,   1,  -6,   1,  -5,   1,  -4,   1,  -3,
-    1,  -2,   1,  -1,   1,   0,   1,   1,   1,   2,   1,   3,   1,   4,   1,
-    5,   1,   6,   1,   9,   1, -11,   2,  -7,   2,  -5,   2,  -4,   2,  -3,
-    2,  -2,   2,  -1,   2,   0,   2,   1,   2,   2,   2,   3,   2,   4,   2,
-    5,   2,   7,   2,  11,   2,  -8,   3,  -6,   3,  -4,   3,  -3,   3,  -2,
-    3,  -1,   3,   0,   3,   1,   3,   2,   3,   3,   3,   4,   3,   6,   3,
-    8,   3, -13,   4, -10,   4,  -5,   4,  -3,   4,  -2,   4,  -1,   4,   0,
-    4,   1,   4,   2,   4,   3,   4,   5,   4,  10,   4,  13,   4, -17,   5,
-   -7,   5,  -4,   5,  -2,   5,  -1,   5,   0,   5,   1,   5,   2,   5,   4,
-    5,   7,   5,  17,   5, -22,   6,  -9,   6,  -6,   6,  -3,   6,  -1,   6,
-    1,   6,   3,   6,   6,   6,   9,   6,  22,   6,  -5,   7,  -2,   7,   0,
-    7,   2,   7,   5,   7, -11,   8,  -8,   8,  -3,   8,   0,   8,   3,   8,
-    8,   8,  11,   8,  -6,   9,  -1,   9,   1,   9,   6,   9, -15,  10,  -4,
-   10,   4,  10,  15,  10,  -8,  11,  -2,  11,   0,  11,   2,  11,   8,  11,
-   19,  12, -19,  13,  -4,  13,   4,  13,   0,  14, -10,  15,  10,  15,  -5,
-   17,   5,  17,   0,  18, -12,  19,  13,  19,  -6,  22,   6,  22,   0,  23,
+    0,   0,   1,   0,   2,   0,   3,   0,   5,   0,
+    8,   0,  13,   0,  21,   0,  -1,   0,  -2,   0,
+   -3,   0,  -5,   0,  -8,   0, -13,   0, -17,   0,
+  -21,   0,   0,   1,   1,   1,   2,   1,   3,   1,
+    5,   1,   8,   1,  13,   1,  21,   1,  -1,   1,
+   -2,   1,  -3,   1,  -5,   1,  -8,   1, -13,   1,
+  -17,   1, -21,   1,   0,   2,   1,   2,   2,   2,
+    3,   2,   5,   2,   8,   2,  13,   2,  21,   2,
+   -1,   2,  -2,   2,  -3,   2,  -5,   2,  -8,   2,
+  -13,   2, -17,   2, -21,   2,   0,   3,   1,   3,
+    2,   3,   3,   3,   5,   3,   8,   3,  13,   3,
+   21,   3,  -1,   3,  -2,   3,  -3,   3,  -5,   3,
+   -8,   3, -13,   3, -17,   3, -21,   3,   0,   5,
+    1,   5,   2,   5,   3,   5,   5,   5,   8,   5,
+   13,   5,  21,   5,  -1,   5,  -2,   5,  -3,   5,
+   -5,   5,  -8,   5, -13,   5, -17,   5, -21,   5,
+    0,   8,   1,   8,   2,   8,   3,   8,   5,   8,
+    8,   8,  13,   8,  21,   8,  -1,   8,  -2,   8,
+   -3,   8,  -5,   8,  -8,   8, -13,   8, -17,   8,
+  -21,   8,   0,  13,   1,  13,   2,  13,   3,  13,
+    5,  13,   8,  13,  13,  13,  21,  13,  -1,  13,
+   -2,  13,  -3,  13,  -5,  13,  -8,  13, -13,  13,
+  -17,  13, -21,  13,   0,  21,   1,  21,   2,  21,
+    3,  21,   5,  21,   8,  21,  13,  21,  21,  21,
+   -1,  21,  -2,  21,  -3,  21,  -5,  21,  -8,  21,
+  -13,  21, -17,  21, -21,  21,   0,  -1,   1,  -1,
+    2,  -1,   3,  -1,   5,  -1,   8,  -1,  13,  -1,
+   21,  -1,  -1,  -1,  -2,  -1,  -3,  -1,  -5,  -1,
+   -8,  -1, -13,  -1, -17,  -1, -21,  -1,   0,  -2,
+    1,  -2,   2,  -2,   3,  -2,   5,  -2,   8,  -2,
+   13,  -2,  21,  -2,  -1,  -2,  -2,  -2,  -3,  -2,
+   -5,  -2,  -8,  -2, -13,  -2, -17,  -2, -21,  -2,
+    0,  -3,   1,  -3,   2,  -3,   3,  -3,   5,  -3,
+    8,  -3,  13,  -3,  21,  -3,  -1,  -3,  -2,  -3,
+   -3,  -3,  -5,  -3,  -8,  -3, -13,  -3, -17,  -3,
+  -21,  -3,   0,  -5,   1,  -5,   2,  -5,   3,  -5,
+    5,  -5,   8,  -5,  13,  -5,  21,  -5,  -1,  -5,
+   -2,  -5,  -3,  -5,  -5,  -5,  -8,  -5, -13,  -5,
+  -17,  -5, -21,  -5,   0,  -8,   1,  -8,   2,  -8,
+    3,  -8,   5,  -8,   8,  -8,  13,  -8,  21,  -8,
+   -1,  -8,  -2,  -8,  -3,  -8,  -5,  -8,  -8,  -8,
+  -13,  -8, -17,  -8, -21,  -8,   0, -13,   1, -13,
+    2, -13,   3, -13,   5, -13,   8, -13,  13, -13,
+   21, -13,  -1, -13,  -2, -13,  -3, -13,  -5, -13,
+   -8, -13, -13, -13, -17, -13, -21, -13,   0, -17,
+    1, -17,   2, -17,   3, -17,   5, -17,   8, -17,
+   13, -17,  21, -17,  -1, -17,  -2, -17,  -3, -17,
+   -5, -17,  -8, -17, -13, -17, -17, -17, -21, -17,
+    0, -21,   1, -21,   2, -21,   3, -21,   5, -21,
+    8, -21,  13, -21,  21, -21,  -1, -21,  -2, -21,
+   -3, -21,  -5, -21,  -8, -21, -13, -21, -17, -21,
+    0,   0,  -8, -29,   8, -29, -18, -25,  17, -25,
+    0, -23,  -6, -22,   6, -22, -13, -19,  12, -19,
+    0, -18,  25, -18, -25, -17,  -5, -17,   5, -17,
+  -10, -15,  10, -15,   0, -14,  -4, -13,   4, -13,
+   19, -13, -19, -12,  -8, -11,  -2, -11,   0, -11,
+    2, -11,   8, -11, -15, -10,  -4, -10,   4, -10,
+   15, -10,  -6,  -9,  -1,  -9,   1,  -9,   6,  -9,
+  -29,  -8, -11,  -8,  -8,  -8,  -3,  -8,   3,  -8,
+    8,  -8,  11,  -8,  29,  -8,  -5,  -7,  -2,  -7,
+    0,  -7,   2,  -7,   5,  -7, -22,  -6,  -9,  -6,
+   -6,  -6,  -3,  -6,  -1,  -6,   1,  -6,   3,  -6,
+    6,  -6,   9,  -6,  22,  -6, -17,  -5,  -7,  -5,
+   -4,  -5,  -2,  -5,   0,  -5,   2,  -5,   4,  -5,
+    7,  -5,  17,  -5, -13,  -4, -10,  -4,  -5,  -4,
+   -3,  -4,  -1,  -4,   0,  -4,   1,  -4,   3,  -4,
+    5,  -4,  10,  -4,  13,  -4,  -8,  -3,  -6,  -3,
+   -4,  -3,  -3,  -3,  -2,  -3,  -1,  -3,   0,  -3,
+    1,  -3,   2,  -3,   4,  -3,   6,  -3,   8,  -3,
+  -11,  -2,  -7,  -2,  -5,  -2,  -3,  -2,  -2,  -2,
+   -1,  -2,   0,  -2,   1,  -2,   2,  -2,   3,  -2,
+    5,  -2,   7,  -2,  11,  -2,  -9,  -1,  -6,  -1,
+   -4,  -1,  -3,  -1,  -2,  -1,  -1,  -1,   0,  -1,
+    1,  -1,   2,  -1,   3,  -1,   4,  -1,   6,  -1,
+    9,  -1, -31,   0, -23,   0, -18,   0, -14,   0,
+  -11,   0,  -7,   0,  -5,   0,  -4,   0,  -3,   0,
+   -2,   0,  -1,   0,   0, -31,   1,   0,   2,   0,
+    3,   0,   4,   0,   5,   0,   7,   0,  11,   0,
+   14,   0,  18,   0,  23,   0,  31,   0,  -9,   1,
+   -6,   1,  -4,   1,  -3,   1,  -2,   1,  -1,   1,
+    0,   1,   1,   1,   2,   1,   3,   1,   4,   1,
+    6,   1,   9,   1, -11,   2,  -7,   2,  -5,   2,
+   -3,   2,  -2,   2,  -1,   2,   0,   2,   1,   2,
+    2,   2,   3,   2,   5,   2,   7,   2,  11,   2,
+   -8,   3,  -6,   3,  -4,   3,  -2,   3,  -1,   3,
+    0,   3,   1,   3,   2,   3,   3,   3,   4,   3,
+    6,   3,   8,   3, -13,   4, -10,   4,  -5,   4,
+   -3,   4,  -1,   4,   0,   4,   1,   4,   3,   4,
+    5,   4,  10,   4,  13,   4, -17,   5,  -7,   5,
+   -4,   5,  -2,   5,   0,   5,   2,   5,   4,   5,
+    7,   5,  17,   5, -22,   6,  -9,   6,  -6,   6,
+   -3,   6,  -1,   6,   1,   6,   3,   6,   6,   6,
+    9,   6,  22,   6,  -5,   7,  -2,   7,   0,   7,
+    2,   7,   5,   7, -29,   8, -11,   8,  -8,   8,
+   -3,   8,   3,   8,   8,   8,  11,   8,  29,   8,
+   -6,   9,  -1,   9,   1,   9,   6,   9, -15,  10,
+   -4,  10,   4,  10,  15,  10,  -8,  11,  -2,  11,
+    0,  11,   2,  11,   8,  11,  19,  12, -19,  13,
+   -4,  13,   4,  13,   0,  14, -10,  15,  10,  15,
+   -5,  17,   5,  17,  25,  17, -25,  18,   0,  18,
+  -12,  19,  13,  19,  -6,  22,   6,  22,   0,  23,
+  -17,  25,  18,  25,  -8,  29,   8,  29,   0,  31,
+    0,   0,  -6, -22,   6, -22, -13, -19,  12, -19,
+    0, -18,  -5, -17,   5, -17, -10, -15,  10, -15,
+    0, -14,  -4, -13,   4, -13,  19, -13, -19, -12,
+   -8, -11,  -2, -11,   0, -11,   2, -11,   8, -11,
+  -15, -10,  -4, -10,   4, -10,  15, -10,  -6,  -9,
+   -1,  -9,   1,  -9,   6,  -9, -11,  -8,  -8,  -8,
+   -3,  -8,   0,  -8,   3,  -8,   8,  -8,  11,  -8,
+   -5,  -7,  -2,  -7,   0,  -7,   2,  -7,   5,  -7,
+  -22,  -6,  -9,  -6,  -6,  -6,  -3,  -6,  -1,  -6,
+    1,  -6,   3,  -6,   6,  -6,   9,  -6,  22,  -6,
+  -17,  -5,  -7,  -5,  -4,  -5,  -2,  -5,  -1,  -5,
+    0,  -5,   1,  -5,   2,  -5,   4,  -5,   7,  -5,
+   17,  -5, -13,  -4, -10,  -4,  -5,  -4,  -3,  -4,
+   -2,  -4,  -1,  -4,   0,  -4,   1,  -4,   2,  -4,
+    3,  -4,   5,  -4,  10,  -4,  13,  -4,  -8,  -3,
+   -6,  -3,  -4,  -3,  -3,  -3,  -2,  -3,  -1,  -3,
+    0,  -3,   1,  -3,   2,  -3,   3,  -3,   4,  -3,
+    6,  -3,   8,  -3, -11,  -2,  -7,  -2,  -5,  -2,
+   -4,  -2,  -3,  -2,  -2,  -2,  -1,  -2,   0,  -2,
+    1,  -2,   2,  -2,   3,  -2,   4,  -2,   5,  -2,
+    7,  -2,  11,  -2,  -9,  -1,  -6,  -1,  -5,  -1,
+   -4,  -1,  -3,  -1,  -2,  -1,  -1,  -1,   0,  -1,
+    1,  -1,   2,  -1,   3,  -1,   4,  -1,   5,  -1,
+    6,  -1,   9,  -1, -23,   0, -18,   0, -14,   0,
+  -11,   0,  -7,   0,  -5,   0,  -4,   0,  -3,   0,
+   -2,   0,  -1,   0,   0, -23,   1,   0,   2,   0,
+    3,   0,   4,   0,   5,   0,   7,   0,  11,   0,
+   14,   0,  18,   0,  23,   0,  -9,   1,  -6,   1,
+   -5,   1,  -4,   1,  -3,   1,  -2,   1,  -1,   1,
+    0,   1,   1,   1,   2,   1,   3,   1,   4,   1,
+    5,   1,   6,   1,   9,   1, -11,   2,  -7,   2,
+   -5,   2,  -4,   2,  -3,   2,  -2,   2,  -1,   2,
+    0,   2,   1,   2,   2,   2,   3,   2,   4,   2,
+    5,   2,   7,   2,  11,   2,  -8,   3,  -6,   3,
+   -4,   3,  -3,   3,  -2,   3,  -1,   3,   0,   3,
+    1,   3,   2,   3,   3,   3,   4,   3,   6,   3,
+    8,   3, -13,   4, -10,   4,  -5,   4,  -3,   4,
+   -2,   4,  -1,   4,   0,   4,   1,   4,   2,   4,
+    3,   4,   5,   4,  10,   4,  13,   4, -17,   5,
+   -7,   5,  -4,   5,  -2,   5,  -1,   5,   0,   5,
+    1,   5,   2,   5,   4,   5,   7,   5,  17,   5,
+  -22,   6,  -9,   6,  -6,   6,  -3,   6,  -1,   6,
+    1,   6,   3,   6,   6,   6,   9,   6,  22,   6,
+   -5,   7,  -2,   7,   0,   7,   2,   7,   5,   7,
+  -11,   8,  -8,   8,  -3,   8,   0,   8,   3,   8,
+    8,   8,  11,   8,  -6,   9,  -1,   9,   1,   9,
+    6,   9, -15,  10,  -4,  10,   4,  10,  15,  10,
+   -8,  11,  -2,  11,   0,  11,   2,  11,   8,  11,
+   19,  12, -19,  13,  -4,  13,   4,  13,   0,  14,
+  -10,  15,  10,  15,  -5,  17,   5,  17,   0,  18,
+  -12,  19,  13,  19,  -6,  22,   6,  22,   0,  23,
 };
 
 typedef struct SANMVideoContext {
@@ -460,7 +511,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
         }
 
         ctx->subversion = AV_RL16(avctx->extradata);
-        for (i = 0; i < 256; i++)
+        for (i = 0; i < PALETTE_SIZE; i++)
             ctx->pal[i] = 0xFFU << 24 | AV_RL32(avctx->extradata + 2 + i * 4);
     }
 
@@ -1466,7 +1517,7 @@ static int decode_frame(AVCodecContext *avctx, void *data,
 
 AVCodec ff_sanm_decoder = {
     .name           = "sanm",
-    .long_name      = NULL_IF_CONFIG_SMALL("LucasArts SANM video"),
+    .long_name      = NULL_IF_CONFIG_SMALL("LucasArts SANM/Smush video"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_SANM,
     .priv_data_size = sizeof(SANMVideoContext),
diff --git a/libavcodec/sbr.h b/libavcodec/sbr.h
index a47ad6e..e28fccd 100644
--- a/libavcodec/sbr.h
+++ b/libavcodec/sbr.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
  * Copyright (c) 2010      Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,8 @@
 #include "aacps.h"
 #include "sbrdsp.h"
 
+typedef struct AACContext AACContext;
+
 /**
  * Spectral Band Replication header - spectrum parameters that invoke a reset if they differ from the previous header.
  */
@@ -108,10 +110,31 @@ typedef struct SBRData {
     /** @} */
 } SBRData;
 
+typedef struct SpectralBandReplication SpectralBandReplication;
+
+/**
+ * aacsbr functions pointers
+ */
+typedef struct AACSBRContext {
+    int (*sbr_lf_gen)(AACContext *ac, SpectralBandReplication *sbr,
+                      float X_low[32][40][2], const float W[2][32][32][2],
+                      int buf_idx);
+    void (*sbr_hf_assemble)(float Y1[38][64][2],
+                            const float X_high[64][40][2],
+                            SpectralBandReplication *sbr, SBRData *ch_data,
+                            const int e_a[2]);
+    int (*sbr_x_gen)(SpectralBandReplication *sbr, float X[2][38][64],
+                     const float Y0[38][64][2], const float Y1[38][64][2],
+                     const float X_low[32][40][2], int ch);
+    void (*sbr_hf_inverse_filter)(SBRDSPContext *dsp,
+                                  float (*alpha0)[2], float (*alpha1)[2],
+                                  const float X_low[32][40][2], int k0);
+} AACSBRContext;
+
 /**
  * Spectral Band Replication
  */
-typedef struct SpectralBandReplication {
+struct SpectralBandReplication {
     int                sample_rate;
     int                start;
     int                reset;
@@ -153,7 +176,7 @@ typedef struct SpectralBandReplication {
     ///Frequency borders for noise floors
     uint16_t           f_tablenoise[6];
     ///Frequency borders for the limiter
-    uint16_t           f_tablelim[29];
+    uint16_t           f_tablelim[30];
     unsigned           num_patches;
     uint8_t            patch_num_subbands[6];
     uint8_t            patch_start_subband[6];
@@ -184,6 +207,7 @@ typedef struct SpectralBandReplication {
     FFTContext         mdct_ana;
     FFTContext         mdct;
     SBRDSPContext      dsp;
-} SpectralBandReplication;
+    AACSBRContext      c;
+};
 
 #endif /* AVCODEC_SBR_H */
diff --git a/libavcodec/sbrdsp.c b/libavcodec/sbrdsp.c
index b7917dc..e4f053b 100644
--- a/libavcodec/sbrdsp.c
+++ b/libavcodec/sbrdsp.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2008-2009 Robert Swain ( rob opendot cl )
  * Copyright (c) 2009-2010 Alex Converse <alex.converse@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -72,6 +72,7 @@ static void sbr_qmf_pre_shuffle_c(float *z)
         zi[64 + 2 * k + 2].i = zi[63 - k].i ^ (1U << 31);
         zi[64 + 2 * k + 3].i = zi[ k + 2].i;
     }
+
     zi[64 + 2 * 31 + 0].i = zi[64 - 31].i ^ (1U << 31);
     zi[64 + 2 * 31 + 1].i = zi[31 +  1].i;
 }
@@ -289,4 +290,6 @@ av_cold void ff_sbrdsp_init(SBRDSPContext *s)
         ff_sbrdsp_init_arm(s);
     if (ARCH_X86)
         ff_sbrdsp_init_x86(s);
+    if (ARCH_MIPS)
+        ff_sbrdsp_init_mips(s);
 }
diff --git a/libavcodec/sbrdsp.h b/libavcodec/sbrdsp.h
index 07235c6..1c1bcdf 100644
--- a/libavcodec/sbrdsp.h
+++ b/libavcodec/sbrdsp.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Mans Rullgard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,5 +47,6 @@ extern const float ff_sbr_noise_table[][2];
 void ff_sbrdsp_init(SBRDSPContext *s);
 void ff_sbrdsp_init_arm(SBRDSPContext *s);
 void ff_sbrdsp_init_x86(SBRDSPContext *s);
+void ff_sbrdsp_init_mips(SBRDSPContext *s);
 
 #endif /* AVCODEC_SBRDSP_H */
diff --git a/libavcodec/sgi.h b/libavcodec/sgi.h
index 3c47d3a..5ec891e 100644
--- a/libavcodec/sgi.h
+++ b/libavcodec/sgi.h
@@ -2,20 +2,20 @@
  * SGI image encoder
  * Xiaohui Sun <tjnksxh@hotmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/sgidec.c b/libavcodec/sgidec.c
index ebca2e8..6f51ec3 100644
--- a/libavcodec/sgidec.c
+++ b/libavcodec/sgidec.c
@@ -2,24 +2,25 @@
  * SGI image decoder
  * Todd Kirby <doubleshot@pacbell.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -41,7 +42,7 @@ typedef struct SgiState {
  * @param out_buf Points to one line after the output buffer.
  * @param len length of out_buf in bytes
  * @param pixelstride pixel stride of input buffer
- * @return size of output in bytes, -1 if buffer overflows
+ * @return size of output in bytes, else return error code.
  */
 static int expand_rle_row8(SgiState *s, uint8_t *out_buf,
                            int len, int pixelstride)
@@ -59,7 +60,7 @@ static int expand_rle_row8(SgiState *s, uint8_t *out_buf,
         }
 
         /* Check for buffer overflow. */
-        if (pixelstride * (count - 1) >= len) {
+        if (out_end - out_buf <= pixelstride * (count - 1)) {
             av_log(s->avctx, AV_LOG_ERROR, "Invalid pixel count.\n");
             return AVERROR_INVALIDDATA;
         }
@@ -125,7 +126,7 @@ static int expand_rle_row16(SgiState *s, uint16_t *out_buf,
  * Read a run length encoded SGI image.
  * @param out_buf output buffer
  * @param s the current image state
- * @return 0 if no error, else return error number.
+ * @return 0 if no error, else return error code.
  */
 static int read_rle_sgi(uint8_t *out_buf, SgiState *s)
 {
@@ -163,7 +164,7 @@ static int read_rle_sgi(uint8_t *out_buf, SgiState *s)
  * Read an uncompressed SGI image.
  * @param out_buf output buffer
  * @param s the current image state
- * @return 0 if read success, otherwise return -1.
+ * @return 0 if read success, else return error code.
  */
 static int read_uncompressed_sgi(unsigned char *out_buf, SgiState *s)
 {
@@ -215,27 +216,27 @@ static int decode_frame(AVCodecContext *avctx,
     }
 
     /* Test for SGI magic. */
-    if (bytestream2_get_be16(&s->g) != SGI_MAGIC) {
+    if (bytestream2_get_be16u(&s->g) != SGI_MAGIC) {
         av_log(avctx, AV_LOG_ERROR, "bad magic number\n");
         return AVERROR_INVALIDDATA;
     }
 
-    rle                  = bytestream2_get_byte(&s->g);
-    s->bytes_per_channel = bytestream2_get_byte(&s->g);
-    dimension            = bytestream2_get_be16(&s->g);
-    s->width             = bytestream2_get_be16(&s->g);
-    s->height            = bytestream2_get_be16(&s->g);
-    s->depth             = bytestream2_get_be16(&s->g);
+    rle                  = bytestream2_get_byteu(&s->g);
+    s->bytes_per_channel = bytestream2_get_byteu(&s->g);
+    dimension            = bytestream2_get_be16u(&s->g);
+    s->width             = bytestream2_get_be16u(&s->g);
+    s->height            = bytestream2_get_be16u(&s->g);
+    s->depth             = bytestream2_get_be16u(&s->g);
 
     if (s->bytes_per_channel != 1 && s->bytes_per_channel != 2) {
         av_log(avctx, AV_LOG_ERROR, "wrong channel number\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     /* Check for supported image dimensions. */
     if (dimension != 2 && dimension != 3) {
         av_log(avctx, AV_LOG_ERROR, "wrong dimension number\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     if (s->depth == SGI_GRAYSCALE) {
@@ -246,17 +247,15 @@ static int decode_frame(AVCodecContext *avctx,
         avctx->pix_fmt = s->bytes_per_channel == 2 ? AV_PIX_FMT_RGBA64BE : AV_PIX_FMT_RGBA;
     } else {
         av_log(avctx, AV_LOG_ERROR, "wrong picture format\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     ret = ff_set_dimensions(avctx, s->width, s->height);
     if (ret < 0)
         return ret;
 
-    if (ff_get_buffer(avctx, p, 0) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed.\n");
-        return -1;
-    }
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
+        return ret;
 
     p->pict_type = AV_PICTURE_TYPE_I;
     p->key_frame = 1;
@@ -273,13 +272,11 @@ static int decode_frame(AVCodecContext *avctx,
     } else {
         ret = read_uncompressed_sgi(out_buf, s);
     }
-
-    if (ret == 0) {
-        *got_frame = 1;
-        return avpkt->size;
-    } else {
+    if (ret)
         return ret;
-    }
+
+    *got_frame = 1;
+    return avpkt->size;
 }
 
 static av_cold int sgi_decode_init(AVCodecContext *avctx)
diff --git a/libavcodec/sgienc.c b/libavcodec/sgienc.c
index bfc0995..21026af 100644
--- a/libavcodec/sgienc.c
+++ b/libavcodec/sgienc.c
@@ -2,20 +2,20 @@
  * SGI image encoder
  * Todd Kirby <doubleshot@pacbell.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,6 +33,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
     if (avctx->width > 65535 || avctx->height > 65535) {
         av_log(avctx, AV_LOG_ERROR,
                "Unsupported resolution %dx%d.\n", avctx->width, avctx->height);
+        av_log(avctx, AV_LOG_ERROR, "SGI does not support resolutions above 65535x65535\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -113,10 +114,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     else // assume ff_rl_encode() produces at most 2x size of input
         length += tablesize * 2 + depth * height * (2 * width + 1);
 
-    if ((ret = ff_alloc_packet(pkt, bytes_per_channel * length)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet of size %d.\n", length);
+    if ((ret = ff_alloc_packet2(avctx, pkt, bytes_per_channel * length)) < 0)
         return ret;
-    }
     buf     = pkt->data;
     end_buf = pkt->data + pkt->size;
 
@@ -184,13 +183,15 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
             for (y = 0; y < height; y++) {
                 for (x = 0; x < width * depth; x += depth)
-                    if (bytes_per_channel == 1)
+                    if (bytes_per_channel == 1) {
                         bytestream_put_byte(&buf, in_buf[x]);
-                    else
-                        if (put_be)
+                    } else {
+                        if (put_be) {
                             bytestream_put_be16(&buf, ((uint16_t *)in_buf)[x]);
-                        else
+                        } else {
                             bytestream_put_le16(&buf, ((uint16_t *)in_buf)[x]);
+                        }
+                    }
 
                 in_buf -= p->linesize[0];
             }
diff --git a/libavcodec/sgirledec.c b/libavcodec/sgirledec.c
index d7bc5a9..69d012e 100644
--- a/libavcodec/sgirledec.c
+++ b/libavcodec/sgirledec.c
@@ -2,20 +2,20 @@
  * Silicon Graphics RLE 8-bit video decoder
  * Copyright (c) 2012 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -49,9 +49,9 @@ static av_cold int sgirle_decode_init(AVCodecContext *avctx)
  * Convert SGI RBG323 pixel into AV_PIX_FMT_BGR8
  * SGI RGB data is packed as 8bpp, (msb)3R 2B 3G(lsb)
  */
-#define RBG323_TO_BGR8(x) (((x << 3) & 0xC0) |                                \
-                           ((x << 3) & 0x38) |                                \
-                           ((x >> 5) & 7))
+#define RBG323_TO_BGR8(x) ((((x) << 3) & 0xC0) |                                \
+                           (((x) << 3) & 0x38) |                                \
+                           (((x) >> 5) & 7))
 static av_always_inline
 void rbg323_to_bgr8(uint8_t *dst, const uint8_t *src, int size)
 {
@@ -110,8 +110,8 @@ static int decode_sgirle8(AVCodecContext *avctx, uint8_t *dst,
                 v   -= length;
             } while (v > 0);
         } else {
-            av_log(avctx, AV_LOG_ERROR, "Invalid opcode %d.\n", v);
-            return AVERROR_INVALIDDATA;
+            avpriv_request_sample(avctx, "opcode %d", v);
+            return AVERROR_PATCHWELCOME;
         }
     }
     return 0;
diff --git a/libavcodec/sh4/README b/libavcodec/sh4/README
new file mode 100644
index 0000000..8dd61fe
--- /dev/null
+++ b/libavcodec/sh4/README
@@ -0,0 +1,6 @@
+SH4 optimizations have been removed in
+commit d6096a67422534918405abb46dafbbac4608cbc3
+The last revission with the optimizations is cbfc9046e1c7e295b74f252902ae6f255eef4e78
+
+If you want to maintain these (or other) SH4 optimizations in ffmpeg, then please
+contact ffmpeg-devel@ffmpeg.org
diff --git a/libavcodec/shorten.c b/libavcodec/shorten.c
index c465fff..5c4bf81 100644
--- a/libavcodec/shorten.c
+++ b/libavcodec/shorten.c
@@ -2,20 +2,20 @@
  * Shorten decoder
  * Copyright (c) 2005 Jeff Muizelaar
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -50,8 +50,12 @@
 #define ENERGYSIZE 3
 #define BITSHIFTSIZE 2
 
+#define TYPE_S8    1
+#define TYPE_U8    2
 #define TYPE_S16HL 3
+#define TYPE_U16HL 4
 #define TYPE_S16LH 5
+#define TYPE_U16LH 6
 
 #define NWRAP 3
 #define NSKIPSIZE 1
@@ -112,7 +116,6 @@ static av_cold int shorten_decode_init(AVCodecContext *avctx)
 {
     ShortenContext *s = avctx->priv_data;
     s->avctx          = avctx;
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
 
     return 0;
 }
@@ -175,13 +178,17 @@ static int init_offset(ShortenContext *s)
     int nblock = FFMAX(1, s->nmean);
     /* initialise offset */
     switch (s->internal_ftype) {
+    case TYPE_U8:
+        s->avctx->sample_fmt = AV_SAMPLE_FMT_U8P;
+        mean = 0x80;
+        break;
     case TYPE_S16HL:
     case TYPE_S16LH:
-        mean = 0;
+        s->avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
         break;
     default:
-        av_log(s->avctx, AV_LOG_ERROR, "unknown audio type");
-        return AVERROR_INVALIDDATA;
+        av_log(s->avctx, AV_LOG_ERROR, "unknown audio type\n");
+        return AVERROR_PATCHWELCOME;
     }
 
     for (chan = 0; chan < s->channels; chan++)
@@ -193,7 +200,7 @@ static int init_offset(ShortenContext *s)
 static int decode_wave_header(AVCodecContext *avctx, const uint8_t *header,
                               int header_size)
 {
-    int len;
+    int len, bps;
     short wave_format;
     GetByteContext gb;
 
@@ -214,7 +221,7 @@ static int decode_wave_header(AVCodecContext *avctx, const uint8_t *header,
     while (bytestream2_get_le32(&gb) != MKTAG('f', 'm', 't', ' ')) {
         len = bytestream2_get_le32(&gb);
         bytestream2_skip(&gb, len);
-        if (bytestream2_get_bytes_left(&gb) < 16) {
+        if (len < 0 || bytestream2_get_bytes_left(&gb) < 16) {
             av_log(avctx, AV_LOG_ERROR, "no fmt chunk found\n");
             return AVERROR_INVALIDDATA;
         }
@@ -240,10 +247,11 @@ static int decode_wave_header(AVCodecContext *avctx, const uint8_t *header,
     avctx->sample_rate = bytestream2_get_le32(&gb);
     bytestream2_skip(&gb, 4); // skip bit rate    (represents original uncompressed bit rate)
     bytestream2_skip(&gb, 2); // skip block align (not needed)
-    avctx->bits_per_coded_sample = bytestream2_get_le16(&gb);
+    bps = bytestream2_get_le16(&gb);
+    avctx->bits_per_coded_sample = bps;
 
-    if (avctx->bits_per_coded_sample != 16) {
-        av_log(avctx, AV_LOG_ERROR, "unsupported number of bits per sample\n");
+    if (bps != 16 && bps != 8) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported number of bits per sample: %d\n", bps);
         return AVERROR(ENOSYS);
     }
 
@@ -254,18 +262,6 @@ static int decode_wave_header(AVCodecContext *avctx, const uint8_t *header,
     return 0;
 }
 
-static void output_buffer(int16_t **samples, int nchan, int blocksize,
-                          int32_t **buffer)
-{
-    int i, ch;
-    for (ch = 0; ch < nchan; ch++) {
-        int32_t *in  = buffer[ch];
-        int16_t *out = samples[ch];
-        for (i = 0; i < blocksize; i++)
-            out[i] = av_clip_int16(in[i]);
-    }
-}
-
 static const int fixed_coeffs[][3] = {
     { 0,  0,  0 },
     { 1,  0,  0 },
@@ -429,13 +425,14 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
     /* allocate internal bitstream buffer */
     if (s->max_framesize == 0) {
         void *tmp_ptr;
-        s->max_framesize = 1024; // should hopefully be enough for the first header
+        s->max_framesize = 8192; // should hopefully be enough for the first header
         tmp_ptr = av_fast_realloc(s->bitstream, &s->allocated_bitstream_size,
                                   s->max_framesize + FF_INPUT_BUFFER_PADDING_SIZE);
         if (!tmp_ptr) {
             av_log(avctx, AV_LOG_ERROR, "error allocating bitstream buffer\n");
             return AVERROR(ENOMEM);
         }
+        memset(tmp_ptr, 0, s->allocated_bitstream_size);
         s->bitstream = tmp_ptr;
     }
 
@@ -444,7 +441,7 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
         buf_size       = FFMIN(buf_size, s->max_framesize - s->bitstream_size);
         input_buf_size = buf_size;
 
-        if (s->bitstream_index + s->bitstream_size + buf_size >
+        if (s->bitstream_index + s->bitstream_size + buf_size + FF_INPUT_BUFFER_PADDING_SIZE >
             s->allocated_bitstream_size) {
             memmove(s->bitstream, &s->bitstream[s->bitstream_index],
                     s->bitstream_size);
@@ -508,9 +505,16 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
                 while (len--)
                     get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
                 break;
-            case FN_BITSHIFT:
-                s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
+            case FN_BITSHIFT: {
+                unsigned bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
+                if (bitshift > 31) {
+                    av_log(avctx, AV_LOG_ERROR, "bitshift %d is invalid\n",
+                           bitshift);
+                    return AVERROR_PATCHWELCOME;
+                }
+                s->bitshift = bitshift;
                 break;
+            }
             case FN_BLOCKSIZE: {
                 unsigned blocksize = get_uint(s, av_log2(s->blocksize));
                 if (blocksize > s->blocksize) {
@@ -558,7 +562,7 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
                     sum += s->offset[channel][i];
                 coffset = sum / s->nmean;
                 if (s->version >= 2)
-                    coffset >>= FFMIN(1, s->bitshift);
+                    coffset = s->bitshift == 0 ? coffset : coffset >> s->bitshift - 1 >> 1;
             }
 
             /* decode samples for this channel */
@@ -597,15 +601,30 @@ static int shorten_decode_frame(AVCodecContext *avctx, void *data,
             /* if this is the last channel in the block, output the samples */
             s->cur_chan++;
             if (s->cur_chan == s->channels) {
+                uint8_t *samples_u8;
+                int16_t *samples_s16;
+                int chan;
+
                 /* get output buffer */
                 frame->nb_samples = s->blocksize;
-                if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-                    av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
                     return ret;
+
+                for (chan = 0; chan < s->channels; chan++) {
+                    samples_u8  = ((uint8_t **)frame->extended_data)[chan];
+                    samples_s16 = ((int16_t **)frame->extended_data)[chan];
+                    for (i = 0; i < s->blocksize; i++) {
+                        switch (s->internal_ftype) {
+                        case TYPE_U8:
+                            *samples_u8++ = av_clip_uint8(s->decoded[chan][i]);
+                            break;
+                        case TYPE_S16HL:
+                        case TYPE_S16LH:
+                            *samples_s16++ = av_clip_int16(s->decoded[chan][i]);
+                            break;
+                        }
+                    }
                 }
-                /* interleave output */
-                output_buffer((int16_t **)frame->extended_data, s->channels,
-                              s->blocksize, s->decoded);
 
                 *got_frame_ptr = 1;
             }
@@ -658,5 +677,6 @@ AVCodec ff_shorten_decoder = {
     .decode         = shorten_decode_frame,
     .capabilities   = CODEC_CAP_DELAY | CODEC_CAP_DR1,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
+                                                      AV_SAMPLE_FMT_U8P,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index f61e9e6..eeb6279 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,6 +38,10 @@
 #include "simple_idct_template.c"
 #undef BIT_DEPTH
 
+#define BIT_DEPTH 12
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+
 /* 2x4x8 idct */
 
 #define CN_SHIFT 12
@@ -228,6 +232,8 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat)
     for (i = 0; i < 8; i++)
         idctRowCondDC_10(block + i*8, 2);
 
-    for (i = 0; i < 8; i++)
+    for (i = 0; i < 8; i++) {
+        block[i] += 8192;
         idctSparseCol_10(block + i);
+    }
 }
diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h
index 7f14aae..154e297 100644
--- a/libavcodec/simple_idct.h
+++ b/libavcodec/simple_idct.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,6 +37,11 @@ void ff_simple_idct_8(int16_t *block);
 void ff_simple_idct_put_10(uint8_t *dest, int line_size, int16_t *block);
 void ff_simple_idct_add_10(uint8_t *dest, int line_size, int16_t *block);
 void ff_simple_idct_10(int16_t *block);
+
+void ff_simple_idct_put_12(uint8_t *dest, int line_size, int16_t *block);
+void ff_simple_idct_add_12(uint8_t *dest, int line_size, int16_t *block);
+void ff_simple_idct_12(int16_t *block);
+
 /**
  * Special version of ff_simple_idct_10() which does dequantization
  * and scales by a factor of 2 more between the two IDCTs to account
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index b287c4f..789db8d 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -64,19 +64,33 @@
 #define MUL(a, b)    MUL16(a, b)
 #define MAC(a, b, c) MAC16(a, b, c)
 
-#elif BIT_DEPTH == 10
+#elif BIT_DEPTH == 10 || BIT_DEPTH == 12
 
-#define W1 90901
-#define W2 85627
-#define W3 77062
-#define W4 65535
-#define W5 51491
-#define W6 35468
-#define W7 18081
+#if BIT_DEPTH == 10
+#define W1 (22725*4)  // 90901
+#define W2 (21407*4) //  85627
+#define W3 (19265*4) //  77062
+#define W4 (16384*4) //  65535
+#define W5 (12873*4) //  51491
+#define W6 ( 8867*4) //  35468
+#define W7 ( 4520*4) //  18081
 
 #define ROW_SHIFT 15
 #define COL_SHIFT 20
 #define DC_SHIFT 1
+#else
+#define W1 45451
+#define W2 42813
+#define W3 38531
+#define W4 32767
+#define W5 25746
+#define W6 17734
+#define W7 9041
+
+#define ROW_SHIFT 16
+#define COL_SHIFT 17
+#define DC_SHIFT -1
+#endif
 
 #define MUL(a, b)    ((a) * (b))
 #define MAC(a, b, c) ((a) += (b) * (c))
@@ -95,13 +109,13 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
 #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
     if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) {
         uint64_t temp;
-        if (DC_SHIFT - extra_shift > 0) {
-            temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
+        if (DC_SHIFT - extra_shift >= 0) {
+            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
         } else {
-            temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
+            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
         }
-        temp += temp << 16;
-        temp += temp << 32;
+        temp += temp * (1 << 16);
+        temp += temp * ((uint64_t) 1 << 32);
         ((uint64_t *)row)[0] = temp;
         ((uint64_t *)row)[1] = temp;
         return;
@@ -112,19 +126,19 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
           ((uint32_t*)row)[3] |
           row[1])) {
         uint32_t temp;
-        if (DC_SHIFT - extra_shift > 0) {
-            temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
+        if (DC_SHIFT - extra_shift >= 0) {
+            temp = (row[0] * (1 << (DC_SHIFT - extra_shift))) & 0xffff;
         } else {
-            temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
+            temp = ((row[0] + (1<<(extra_shift - DC_SHIFT-1))) >> (extra_shift - DC_SHIFT)) & 0xffff;
         }
-        temp += temp << 16;
+        temp += temp * (1 << 16);
         ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
             ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
         return;
     }
 #endif
 
-    a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
+    a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
     a1 = a0;
     a2 = a0;
     a3 = a0;
diff --git a/libavcodec/sinewin.c b/libavcodec/sinewin.c
index be38dbc..1fa0e95 100644
--- a/libavcodec/sinewin.c
+++ b/libavcodec/sinewin.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/sinewin.h b/libavcodec/sinewin.h
index 478036d..2268fd5 100644
--- a/libavcodec/sinewin.h
+++ b/libavcodec/sinewin.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Robert Swain
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/sinewin_tablegen.c b/libavcodec/sinewin_tablegen.c
index 90a75c2..561ae3e 100644
--- a/libavcodec/sinewin_tablegen.c
+++ b/libavcodec/sinewin_tablegen.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/sinewin_tablegen.h b/libavcodec/sinewin_tablegen.h
index 1ee225b..2b9c4f2 100644
--- a/libavcodec/sinewin_tablegen.h
+++ b/libavcodec/sinewin_tablegen.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/sipr.c b/libavcodec/sipr.c
index 98607f2..17260a0 100644
--- a/libavcodec/sipr.c
+++ b/libavcodec/sipr.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 Vladimir Voroshilov
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -543,10 +543,8 @@ static int sipr_decode_frame(AVCodecContext *avctx, void *data,
     /* get output buffer */
     frame->nb_samples = mode_par->frames_per_packet * subframe_size *
                         mode_par->subframe_count;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (float *)frame->data[0];
 
     init_get_bits(&gb, buf, mode_par->bits_per_frame);
diff --git a/libavcodec/sipr.h b/libavcodec/sipr.h
index 4cdea67..34f7f99 100644
--- a/libavcodec/sipr.h
+++ b/libavcodec/sipr.h
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 Vladimir Voroshilov
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/sipr16k.c b/libavcodec/sipr16k.c
index f7fcb34..bfd8ade 100644
--- a/libavcodec/sipr16k.c
+++ b/libavcodec/sipr16k.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 Vladimir Voroshilov
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,7 +29,6 @@
 #include "libavutil/float_dsp.h"
 #include "libavutil/mathematics.h"
 #include "lsp.h"
-#include "celp_filters.h"
 #include "acelp_vectors.h"
 #include "acelp_pitch_delay.h"
 #include "acelp_filters.h"
diff --git a/libavcodec/sipr16kdata.h b/libavcodec/sipr16kdata.h
index ec60c29..96bf0e9 100644
--- a/libavcodec/sipr16kdata.h
+++ b/libavcodec/sipr16kdata.h
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 Vladimir Voroshilov
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/siprdata.h b/libavcodec/siprdata.h
index 92037a4..ed804ee 100644
--- a/libavcodec/siprdata.h
+++ b/libavcodec/siprdata.h
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 Vladimir Voroshilov
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index ba69303..518bdad 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -2,20 +2,20 @@
  * Smacker decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -97,10 +97,14 @@ enum SmkBlockTypes {
  */
 static int smacker_decode_tree(GetBitContext *gb, HuffContext *hc, uint32_t prefix, int length)
 {
+    if(length > 32 || length > 3*SMKTREE_BITS) {
+        av_log(NULL, AV_LOG_ERROR, "length too long\n");
+        return AVERROR_INVALIDDATA;
+    }
     if(!get_bits1(gb)){ //Leaf
-        if(hc->current >= 256){
+        if(hc->current >= hc->length){
             av_log(NULL, AV_LOG_ERROR, "Tree size exceeded!\n");
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         if(length){
             hc->bits[hc->current] = prefix;
@@ -131,14 +135,14 @@ static int smacker_decode_bigtree(GetBitContext *gb, HuffContext *hc, DBCtx *ctx
 {
     if (hc->current + 1 >= hc->length) {
         av_log(NULL, AV_LOG_ERROR, "Tree size exceeded!\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     if(!get_bits1(gb)){ //Leaf
         int val, i1, i2;
         i1 = ctx->v1->table ? get_vlc2(gb, ctx->v1->table, SMKTREE_BITS, 3) : 0;
         i2 = ctx->v2->table ? get_vlc2(gb, ctx->v2->table, SMKTREE_BITS, 3) : 0;
         if (i1 < 0 || i2 < 0)
-            return -1;
+            return AVERROR_INVALIDDATA;
         val = ctx->recode1[i1] | (ctx->recode2[i2] << 8);
         if(val == ctx->escapes[0]) {
             ctx->last[0] = hc->current;
@@ -170,7 +174,7 @@ static int smacker_decode_bigtree(GetBitContext *gb, HuffContext *hc, DBCtx *ctx
 }
 
 /**
- * Store large tree as Libav's vlc codes
+ * Store large tree as FFmpeg's vlc codes
  */
 static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext *gb, int **recodes, int *last, int size)
 {
@@ -184,7 +188,7 @@ static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext *gb, int
 
     if(size >= UINT_MAX>>4){ // (((size + 3) >> 2) + 3) << 2 must not overflow
         av_log(smk->avctx, AV_LOG_ERROR, "size too large\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     tmp1.length = 256;
@@ -207,40 +211,51 @@ static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext *gb, int
     }
 
     if(get_bits1(gb)) {
-        smacker_decode_tree(gb, &tmp1, 0, 0);
-        skip_bits1(gb);
-        res = init_vlc(&vlc[0], SMKTREE_BITS, tmp1.length,
-                    tmp1.lengths, sizeof(int), sizeof(int),
-                    tmp1.bits, sizeof(uint32_t), sizeof(uint32_t), INIT_VLC_LE);
-        if(res < 0) {
-            av_log(smk->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
+        res = smacker_decode_tree(gb, &tmp1, 0, 0);
+        if (res < 0) {
             err = res;
             goto error;
         }
-    } else {
+        skip_bits1(gb);
+        if(tmp1.current > 1) {
+            res = init_vlc(&vlc[0], SMKTREE_BITS, tmp1.length,
+                        tmp1.lengths, sizeof(int), sizeof(int),
+                        tmp1.bits, sizeof(uint32_t), sizeof(uint32_t), INIT_VLC_LE);
+            if(res < 0) {
+                av_log(smk->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
+                err = res;
+                goto error;
+            }
+        }
+    }
+    if (!vlc[0].table) {
         av_log(smk->avctx, AV_LOG_ERROR, "Skipping low bytes tree\n");
     }
     if(get_bits1(gb)){
-        smacker_decode_tree(gb, &tmp2, 0, 0);
-        skip_bits1(gb);
-        res = init_vlc(&vlc[1], SMKTREE_BITS, tmp2.length,
-                    tmp2.lengths, sizeof(int), sizeof(int),
-                    tmp2.bits, sizeof(uint32_t), sizeof(uint32_t), INIT_VLC_LE);
-        if(res < 0) {
-            av_log(smk->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
+        res = smacker_decode_tree(gb, &tmp2, 0, 0);
+        if (res < 0) {
             err = res;
             goto error;
         }
-    } else {
+        skip_bits1(gb);
+        if(tmp2.current > 1) {
+            res = init_vlc(&vlc[1], SMKTREE_BITS, tmp2.length,
+                        tmp2.lengths, sizeof(int), sizeof(int),
+                        tmp2.bits, sizeof(uint32_t), sizeof(uint32_t), INIT_VLC_LE);
+            if(res < 0) {
+                av_log(smk->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
+                err = res;
+                goto error;
+            }
+        }
+    }
+    if (!vlc[1].table) {
         av_log(smk->avctx, AV_LOG_ERROR, "Skipping high bytes tree\n");
     }
 
-    escapes[0]  = get_bits(gb, 8);
-    escapes[0] |= get_bits(gb, 8) << 8;
-    escapes[1]  = get_bits(gb, 8);
-    escapes[1] |= get_bits(gb, 8) << 8;
-    escapes[2]  = get_bits(gb, 8);
-    escapes[2] |= get_bits(gb, 8) << 8;
+    escapes[0]  = get_bits(gb, 16);
+    escapes[1]  = get_bits(gb, 16);
+    escapes[2]  = get_bits(gb, 16);
 
     last[0] = last[1] = last[2] = -1;
 
@@ -256,7 +271,7 @@ static int smacker_decode_header_tree(SmackVContext *smk, GetBitContext *gb, int
     huff.length = ((size + 3) >> 2) + 4;
     huff.maxlength = 0;
     huff.current = 0;
-    huff.values = av_mallocz(huff.length * sizeof(int));
+    huff.values = av_mallocz_array(huff.length, sizeof(int));
     if (!huff.values) {
         err = AVERROR(ENOMEM);
         goto error;
@@ -294,14 +309,14 @@ error:
 
 static int decode_header_trees(SmackVContext *smk) {
     GetBitContext gb;
-    int mmap_size, mclr_size, full_size, type_size;
+    int mmap_size, mclr_size, full_size, type_size, ret;
 
     mmap_size = AV_RL32(smk->avctx->extradata);
     mclr_size = AV_RL32(smk->avctx->extradata + 4);
     full_size = AV_RL32(smk->avctx->extradata + 8);
     type_size = AV_RL32(smk->avctx->extradata + 12);
 
-    init_get_bits(&gb, smk->avctx->extradata + 16, (smk->avctx->extradata_size - 16) * 8);
+    init_get_bits8(&gb, smk->avctx->extradata + 16, smk->avctx->extradata_size - 16);
 
     if(!get_bits1(&gb)) {
         av_log(smk->avctx, AV_LOG_INFO, "Skipping MMAP tree\n");
@@ -311,8 +326,9 @@ static int decode_header_trees(SmackVContext *smk) {
         smk->mmap_tbl[0] = 0;
         smk->mmap_last[0] = smk->mmap_last[1] = smk->mmap_last[2] = 1;
     } else {
-        if (smacker_decode_header_tree(smk, &gb, &smk->mmap_tbl, smk->mmap_last, mmap_size))
-            return -1;
+        ret = smacker_decode_header_tree(smk, &gb, &smk->mmap_tbl, smk->mmap_last, mmap_size);
+        if (ret < 0)
+            return ret;
     }
     if(!get_bits1(&gb)) {
         av_log(smk->avctx, AV_LOG_INFO, "Skipping MCLR tree\n");
@@ -322,8 +338,9 @@ static int decode_header_trees(SmackVContext *smk) {
         smk->mclr_tbl[0] = 0;
         smk->mclr_last[0] = smk->mclr_last[1] = smk->mclr_last[2] = 1;
     } else {
-        if (smacker_decode_header_tree(smk, &gb, &smk->mclr_tbl, smk->mclr_last, mclr_size))
-            return -1;
+        ret = smacker_decode_header_tree(smk, &gb, &smk->mclr_tbl, smk->mclr_last, mclr_size);
+        if (ret < 0)
+            return ret;
     }
     if(!get_bits1(&gb)) {
         av_log(smk->avctx, AV_LOG_INFO, "Skipping FULL tree\n");
@@ -333,8 +350,9 @@ static int decode_header_trees(SmackVContext *smk) {
         smk->full_tbl[0] = 0;
         smk->full_last[0] = smk->full_last[1] = smk->full_last[2] = 1;
     } else {
-        if (smacker_decode_header_tree(smk, &gb, &smk->full_tbl, smk->full_last, full_size))
-            return -1;
+        ret = smacker_decode_header_tree(smk, &gb, &smk->full_tbl, smk->full_last, full_size);
+        if (ret < 0)
+            return ret;
     }
     if(!get_bits1(&gb)) {
         av_log(smk->avctx, AV_LOG_INFO, "Skipping TYPE tree\n");
@@ -344,8 +362,9 @@ static int decode_header_trees(SmackVContext *smk) {
         smk->type_tbl[0] = 0;
         smk->type_last[0] = smk->type_last[1] = smk->type_last[2] = 1;
     } else {
-        if (smacker_decode_header_tree(smk, &gb, &smk->type_tbl, smk->type_last, type_size))
-            return -1;
+        ret = smacker_decode_header_tree(smk, &gb, &smk->type_tbl, smk->type_last, type_size);
+        if (ret < 0)
+            return ret;
     }
 
     return 0;
@@ -389,12 +408,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     int flags;
 
     if (avpkt->size <= 769)
-        return 0;
+        return AVERROR_INVALIDDATA;
 
-    if ((ret = ff_reget_buffer(avctx, smk->pic)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, smk->pic)) < 0)
         return ret;
-    }
 
     /* make the palette available on the way out */
     pal = (uint32_t*)smk->pic->data[1];
@@ -402,25 +419,25 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     flags = bytestream2_get_byteu(&gb2);
     smk->pic->palette_has_changed = flags & 1;
     smk->pic->key_frame = !!(flags & 2);
-    if(smk->pic->key_frame)
+    if (smk->pic->key_frame)
         smk->pic->pict_type = AV_PICTURE_TYPE_I;
     else
         smk->pic->pict_type = AV_PICTURE_TYPE_P;
 
     for(i = 0; i < 256; i++)
-        *pal++ = bytestream2_get_be24u(&gb2);
+        *pal++ = 0xFFU << 24 | bytestream2_get_be24u(&gb2);
 
     last_reset(smk->mmap_tbl, smk->mmap_last);
     last_reset(smk->mclr_tbl, smk->mclr_last);
     last_reset(smk->full_tbl, smk->full_last);
     last_reset(smk->type_tbl, smk->type_last);
-    init_get_bits(&gb, avpkt->data + 769, (avpkt->size - 769) * 8);
+    if ((ret = init_get_bits8(&gb, avpkt->data + 769, avpkt->size - 769)) < 0)
+        return ret;
 
     blk = 0;
     bw = avctx->width >> 2;
     bh = avctx->height >> 2;
     blocks = bw * bh;
-    out = smk->pic->data[0];
     stride = smk->pic->linesize[0];
     while(blk < blocks) {
         int type, run, mode;
@@ -481,7 +498,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                     out += stride;
                     out[0] = out[1] = pix & 0xFF;
                     out[2] = out[3] = pix >> 8;
-                    out += stride;
                     break;
                 case 2:
                     for(i = 0; i < 2; i++) {
@@ -560,6 +576,7 @@ static av_cold int decode_end(AVCodecContext *avctx)
 static av_cold int decode_init(AVCodecContext *avctx)
 {
     SmackVContext * const c = avctx->priv_data;
+    int ret;
 
     c->avctx = avctx;
 
@@ -572,19 +589,19 @@ static av_cold int decode_init(AVCodecContext *avctx)
     /* decode huffman trees from extradata */
     if(avctx->extradata_size < 16){
         av_log(avctx, AV_LOG_ERROR, "Extradata missing!\n");
-        return -1;
+        return AVERROR(EINVAL);
     }
 
-    if (decode_header_trees(c)) {
+    ret = decode_header_trees(c);
+    if (ret < 0) {
         decode_end(avctx);
-        return -1;
+        return ret;
     }
 
     return 0;
 }
 
 
-
 static av_cold int smka_decode_init(AVCodecContext *avctx)
 {
     if (avctx->channels < 1 || avctx->channels > 2) {
@@ -624,7 +641,13 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data,
 
     unp_size = AV_RL32(buf);
 
-    init_get_bits(&gb, buf + 4, (buf_size - 4) * 8);
+    if (unp_size > (1U<<24)) {
+        av_log(avctx, AV_LOG_ERROR, "packet is too big\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if ((ret = init_get_bits8(&gb, buf + 4, buf_size - 4)) < 0)
+        return ret;
 
     if(!get_bits1(&gb)){
         av_log(avctx, AV_LOG_INFO, "Sound: no data\n");
@@ -637,17 +660,15 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data,
         av_log(avctx, AV_LOG_ERROR, "channels mismatch\n");
         return AVERROR(EINVAL);
     }
-    if (bits && avctx->sample_fmt == AV_SAMPLE_FMT_U8) {
+    if (bits == (avctx->sample_fmt == AV_SAMPLE_FMT_U8)) {
         av_log(avctx, AV_LOG_ERROR, "sample format mismatch\n");
         return AVERROR(EINVAL);
     }
 
     /* get output buffer */
     frame->nb_samples = unp_size / (avctx->channels * (bits + 1));
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples  = (int16_t *)frame->data[0];
     samples8 =            frame->data[0];
 
@@ -687,16 +708,26 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data,
         for(i = 0; i <= stereo; i++)
             *samples++ = pred[i];
         for(; i < unp_size / 2; i++) {
+            if(get_bits_left(&gb)<0)
+                return AVERROR_INVALIDDATA;
             if(i & stereo) {
                 if(vlc[2].table)
                     res = get_vlc2(&gb, vlc[2].table, SMKTREE_BITS, 3);
                 else
                     res = 0;
+                if (res < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "invalid vlc\n");
+                    return AVERROR_INVALIDDATA;
+                }
                 val  = h[2].values[res];
                 if(vlc[3].table)
                     res = get_vlc2(&gb, vlc[3].table, SMKTREE_BITS, 3);
                 else
                     res = 0;
+                if (res < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "invalid vlc\n");
+                    return AVERROR_INVALIDDATA;
+                }
                 val |= h[3].values[res] << 8;
                 pred[1] += sign_extend(val, 16);
                 *samples++ = pred[1];
@@ -705,11 +736,19 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data,
                     res = get_vlc2(&gb, vlc[0].table, SMKTREE_BITS, 3);
                 else
                     res = 0;
+                if (res < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "invalid vlc\n");
+                    return AVERROR_INVALIDDATA;
+                }
                 val  = h[0].values[res];
                 if(vlc[1].table)
                     res = get_vlc2(&gb, vlc[1].table, SMKTREE_BITS, 3);
                 else
                     res = 0;
+                if (res < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "invalid vlc\n");
+                    return AVERROR_INVALIDDATA;
+                }
                 val |= h[1].values[res] << 8;
                 pred[0] += sign_extend(val, 16);
                 *samples++ = pred[0];
@@ -721,11 +760,17 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data,
         for(i = 0; i <= stereo; i++)
             *samples8++ = pred[i];
         for(; i < unp_size; i++) {
+            if(get_bits_left(&gb)<0)
+                return AVERROR_INVALIDDATA;
             if(i & stereo){
                 if(vlc[1].table)
                     res = get_vlc2(&gb, vlc[1].table, SMKTREE_BITS, 3);
                 else
                     res = 0;
+                if (res < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "invalid vlc\n");
+                    return AVERROR_INVALIDDATA;
+                }
                 pred[1] += sign_extend(h[1].values[res], 8);
                 *samples8++ = pred[1];
             } else {
@@ -733,6 +778,10 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data,
                     res = get_vlc2(&gb, vlc[0].table, SMKTREE_BITS, 3);
                 else
                     res = 0;
+                if (res < 0) {
+                    av_log(avctx, AV_LOG_ERROR, "invalid vlc\n");
+                    return AVERROR_INVALIDDATA;
+                }
                 pred[0] += sign_extend(h[0].values[res], 8);
                 *samples8++ = pred[0];
             }
diff --git a/libavcodec/smc.c b/libavcodec/smc.c
index 46903ab..31e6c88 100644
--- a/libavcodec/smc.c
+++ b/libavcodec/smc.c
@@ -2,20 +2,20 @@
  * Quicktime Graphics (SMC) Video Decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -84,7 +84,7 @@ static void smc_decode_stream(SmcContext *s)
     int stride = s->frame->linesize[0];
     int i;
     int chunk_size;
-    int buf_size = (int) (s->gb.buffer_end - s->gb.buffer_start);
+    int buf_size = bytestream2_size(&s->gb);
     unsigned char opcode;
     int n_blocks;
     unsigned int color_flags;
@@ -436,10 +436,8 @@ static int smc_decode_frame(AVCodecContext *avctx,
 
     bytestream2_init(&s->gb, buf, buf_size);
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     if (pal) {
         s->frame->palette_has_changed = 1;
diff --git a/libavcodec/smvjpegdec.c b/libavcodec/smvjpegdec.c
new file mode 100644
index 0000000..69327cd
--- /dev/null
+++ b/libavcodec/smvjpegdec.c
@@ -0,0 +1,209 @@
+/*
+ * SMV JPEG decoder
+ * Copyright (c) 2013 Ash Hughes
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SMV JPEG decoder.
+ */
+
+// #define DEBUG
+#include "avcodec.h"
+#include "libavutil/opt.h"
+#include "libavutil/imgutils.h"
+#include "mjpegdec.h"
+#include "internal.h"
+
+typedef struct SMVJpegDecodeContext {
+    MJpegDecodeContext jpg;
+    AVFrame *picture[2]; /* pictures array */
+    AVCodecContext* avctx;
+    int frames_per_jpeg;
+    int mjpeg_data_size;
+} SMVJpegDecodeContext;
+
+static inline void smv_img_pnt_plane(uint8_t      **dst, uint8_t *src,
+                                     int src_linesize, int height, int nlines)
+{
+    if (!dst || !src)
+        return;
+    src += (nlines) * src_linesize * height;
+    *dst = src;
+}
+
+static inline void smv_img_pnt(uint8_t *dst_data[4], uint8_t *src_data[4],
+                               const int src_linesizes[4],
+                               enum PixelFormat pix_fmt, int width, int height,
+                               int nlines)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    int i, planes_nb = 0;
+
+    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL)
+        return;
+
+    for (i = 0; i < desc->nb_components; i++)
+        planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
+
+    for (i = 0; i < planes_nb; i++) {
+        int h = height;
+        if (i == 1 || i == 2) {
+            h = FF_CEIL_RSHIFT(height, desc->log2_chroma_h);
+        }
+        smv_img_pnt_plane(&dst_data[i], src_data[i],
+            src_linesizes[i], h, nlines);
+    }
+    if (desc->flags & AV_PIX_FMT_FLAG_PAL ||
+        desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL)
+        dst_data[1] = src_data[1];
+}
+
+static av_cold int smvjpeg_decode_init(AVCodecContext *avctx)
+{
+    SMVJpegDecodeContext *s = avctx->priv_data;
+    AVCodec *codec;
+    AVDictionary *thread_opt = NULL;
+    int ret = 0;
+
+    s->frames_per_jpeg = 0;
+
+    s->picture[0] = av_frame_alloc();
+    if (!s->picture[0])
+        return AVERROR(ENOMEM);
+
+    s->picture[1] = av_frame_alloc();
+    if (!s->picture[1])
+        return AVERROR(ENOMEM);
+
+    s->jpg.picture_ptr      = s->picture[0];
+
+    if (avctx->extradata_size >= 4)
+        s->frames_per_jpeg = AV_RL32(avctx->extradata);
+
+    if (s->frames_per_jpeg <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid number of frames per jpeg.\n");
+        ret = -1;
+    }
+
+    codec = avcodec_find_decoder(AV_CODEC_ID_MJPEG);
+    if (!codec) {
+        av_log(avctx, AV_LOG_ERROR, "MJPEG codec not found\n");
+        ret = -1;
+    }
+
+    s->avctx = avcodec_alloc_context3(codec);
+
+    av_dict_set(&thread_opt, "threads", "1", 0);
+    s->avctx->refcounted_frames = 1;
+    s->avctx->flags = avctx->flags;
+    s->avctx->idct_algo = avctx->idct_algo;
+    if (ff_codec_open2_recursive(s->avctx, codec, &thread_opt) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "MJPEG codec failed to open\n");
+        ret = -1;
+    }
+    av_dict_free(&thread_opt);
+
+    return ret;
+}
+
+static int smvjpeg_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                            AVPacket *avpkt)
+{
+    const AVPixFmtDescriptor *desc;
+    SMVJpegDecodeContext *s = avctx->priv_data;
+    AVFrame* mjpeg_data = s->picture[0];
+    int i, cur_frame = 0, ret = 0;
+
+    cur_frame = avpkt->pts % s->frames_per_jpeg;
+
+    /* Are we at the start of a block? */
+    if (!cur_frame) {
+        av_frame_unref(mjpeg_data);
+        ret = avcodec_decode_video2(s->avctx, mjpeg_data, &s->mjpeg_data_size, avpkt);
+    } else if (!s->mjpeg_data_size)
+        return AVERROR(EINVAL);
+
+    desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
+    if (desc && mjpeg_data->height % (s->frames_per_jpeg << desc->log2_chroma_h)) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid height\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /*use the last lot... */
+    *data_size = s->mjpeg_data_size;
+
+    avctx->pix_fmt = s->avctx->pix_fmt;
+
+    /* We shouldn't get here if frames_per_jpeg <= 0 because this was rejected
+       in init */
+    ret = ff_set_dimensions(avctx, mjpeg_data->width, mjpeg_data->height / s->frames_per_jpeg);
+    if (ret < 0) {
+        av_log(s, AV_LOG_ERROR, "Failed to set dimensions\n");
+        return ret;
+    }
+
+    if (*data_size) {
+        s->picture[1]->extended_data = NULL;
+        s->picture[1]->width         = avctx->width;
+        s->picture[1]->height        = avctx->height;
+        s->picture[1]->format        = avctx->pix_fmt;
+        /* ff_init_buffer_info(avctx, &s->picture[1]); */
+        smv_img_pnt(s->picture[1]->data, mjpeg_data->data, mjpeg_data->linesize,
+                    avctx->pix_fmt, avctx->width, avctx->height, cur_frame);
+        for (i = 0; i < AV_NUM_DATA_POINTERS; i++)
+            s->picture[1]->linesize[i] = mjpeg_data->linesize[i];
+
+        ret = av_frame_ref(data, s->picture[1]);
+    }
+
+    return ret;
+}
+
+static av_cold int smvjpeg_decode_end(AVCodecContext *avctx)
+{
+    SMVJpegDecodeContext *s = avctx->priv_data;
+    MJpegDecodeContext *jpg = &s->jpg;
+    int ret;
+
+    jpg->picture_ptr = NULL;
+    av_frame_free(&s->picture[0]);
+    av_frame_free(&s->picture[1]);
+    ret = avcodec_close(s->avctx);
+    av_freep(&s->avctx);
+    return ret;
+}
+
+static const AVClass smvjpegdec_class = {
+    .class_name = "SMVJPEG decoder",
+    .item_name  = av_default_item_name,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_smvjpeg_decoder = {
+    .name           = "smvjpeg",
+    .long_name      = NULL_IF_CONFIG_SMALL("SMV JPEG"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_SMVJPEG,
+    .priv_data_size = sizeof(SMVJpegDecodeContext),
+    .init           = smvjpeg_decode_init,
+    .close          = smvjpeg_decode_end,
+    .decode         = smvjpeg_decode_frame,
+    .priv_class     = &smvjpegdec_class,
+};
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
new file mode 100644
index 0000000..dc80ce6
--- /dev/null
+++ b/libavcodec/snow.c
@@ -0,0 +1,723 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intmath.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "me_cmp.h"
+#include "snow_dwt.h"
+#include "internal.h"
+#include "snow.h"
+#include "snowdata.h"
+
+#include "rangecoder.h"
+#include "mathops.h"
+#include "h263.h"
+
+
+void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+    int y, x;
+    IDWTELEM * dst;
+    for(y=0; y<b_h; y++){
+        //FIXME ugly misuse of obmc_stride
+        const uint8_t *obmc1= obmc + y*obmc_stride;
+        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+        dst = slice_buffer_get_line(sb, src_y + y);
+        for(x=0; x<b_w; x++){
+            int v=   obmc1[x] * block[3][x + y*src_stride]
+                    +obmc2[x] * block[2][x + y*src_stride]
+                    +obmc3[x] * block[1][x + y*src_stride]
+                    +obmc4[x] * block[0][x + y*src_stride];
+
+            v <<= 8 - LOG2_OBMC_MAX;
+            if(FRAC_BITS != 8){
+                v >>= 8 - FRAC_BITS;
+            }
+            if(add){
+                v += dst[x + src_x];
+                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
+                if(v&(~255)) v= ~(v>>31);
+                dst8[x + y*src_stride] = v;
+            }else{
+                dst[x + src_x] -= v;
+            }
+        }
+    }
+}
+
+int ff_snow_get_buffer(SnowContext *s, AVFrame *frame)
+{
+    int ret, i;
+    int edges_needed = av_codec_is_encoder(s->avctx->codec);
+
+    frame->width  = s->avctx->width ;
+    frame->height = s->avctx->height;
+    if (edges_needed) {
+        frame->width  += 2 * EDGE_WIDTH;
+        frame->height += 2 * EDGE_WIDTH;
+    }
+    if ((ret = ff_get_buffer(s->avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+    if (edges_needed) {
+        for (i = 0; frame->data[i]; i++) {
+            int offset = (EDGE_WIDTH >> (i ? s->chroma_v_shift : 0)) *
+                            frame->linesize[i] +
+                            (EDGE_WIDTH >> (i ? s->chroma_h_shift : 0));
+            frame->data[i] += offset;
+        }
+        frame->width  = s->avctx->width;
+        frame->height = s->avctx->height;
+    }
+
+    return 0;
+}
+
+void ff_snow_reset_contexts(SnowContext *s){ //FIXME better initial contexts
+    int plane_index, level, orientation;
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        for(level=0; level<MAX_DECOMPOSITIONS; level++){
+            for(orientation=level ? 1:0; orientation<4; orientation++){
+                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
+            }
+        }
+    }
+    memset(s->header_state, MID_STATE, sizeof(s->header_state));
+    memset(s->block_state, MID_STATE, sizeof(s->block_state));
+}
+
+int ff_snow_alloc_blocks(SnowContext *s){
+    int w= FF_CEIL_RSHIFT(s->avctx->width,  LOG2_MB_SIZE);
+    int h= FF_CEIL_RSHIFT(s->avctx->height, LOG2_MB_SIZE);
+
+    s->b_width = w;
+    s->b_height= h;
+
+    av_free(s->block);
+    s->block= av_mallocz_array(w * h,  sizeof(BlockNode) << (s->block_max_depth*2));
+    if (!s->block)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static av_cold void init_qexp(void){
+    int i;
+    double v=128;
+
+    for(i=0; i<QROOT; i++){
+        ff_qexp[i]= lrintf(v);
+        v *= pow(2, 1.0 / QROOT);
+    }
+}
+static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
+    static const uint8_t weight[64]={
+    8,7,6,5,4,3,2,1,
+    7,7,0,0,0,0,0,1,
+    6,0,6,0,0,0,2,0,
+    5,0,0,5,0,3,0,0,
+    4,0,0,0,4,0,0,0,
+    3,0,0,5,0,3,0,0,
+    2,0,6,0,0,0,2,0,
+    1,7,0,0,0,0,0,1,
+    };
+
+    static const uint8_t brane[256]={
+    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
+    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
+    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
+    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
+    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
+    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
+    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
+    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
+    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
+    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
+    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
+    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
+    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
+    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
+    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
+    };
+
+    static const uint8_t needs[16]={
+    0,1,0,0,
+    2,4,2,0,
+    0,1,0,0,
+    15
+    };
+
+    int x, y, b, r, l;
+    int16_t tmpIt   [64*(32+HTAPS_MAX)];
+    uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
+    int16_t *tmpI= tmpIt;
+    uint8_t *tmp2= tmp2t[0];
+    const uint8_t *hpel[11];
+    av_assert2(dx<16 && dy<16);
+    r= brane[dx + 16*dy]&15;
+    l= brane[dx + 16*dy]>>4;
+
+    b= needs[l] | needs[r];
+    if(p && !p->diag_mc)
+        b= 15;
+
+    if(b&5){
+        for(y=0; y < b_h+HTAPS_MAX-1; y++){
+            for(x=0; x < b_w; x++){
+                int a_1=src[x + HTAPS_MAX/2-4];
+                int a0= src[x + HTAPS_MAX/2-3];
+                int a1= src[x + HTAPS_MAX/2-2];
+                int a2= src[x + HTAPS_MAX/2-1];
+                int a3= src[x + HTAPS_MAX/2+0];
+                int a4= src[x + HTAPS_MAX/2+1];
+                int a5= src[x + HTAPS_MAX/2+2];
+                int a6= src[x + HTAPS_MAX/2+3];
+                int am=0;
+                if(!p || p->fast_mc){
+                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+                    tmpI[x]= am;
+                    am= (am+16)>>5;
+                }else{
+                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
+                    tmpI[x]= am;
+                    am= (am+32)>>6;
+                }
+
+                if(am&(~255)) am= ~(am>>31);
+                tmp2[x]= am;
+            }
+            tmpI+= 64;
+            tmp2+= 64;
+            src += stride;
+        }
+        src -= stride*y;
+    }
+    src += HTAPS_MAX/2 - 1;
+    tmp2= tmp2t[1];
+
+    if(b&2){
+        for(y=0; y < b_h; y++){
+            for(x=0; x < b_w+1; x++){
+                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
+                int a0= src[x + (HTAPS_MAX/2-3)*stride];
+                int a1= src[x + (HTAPS_MAX/2-2)*stride];
+                int a2= src[x + (HTAPS_MAX/2-1)*stride];
+                int a3= src[x + (HTAPS_MAX/2+0)*stride];
+                int a4= src[x + (HTAPS_MAX/2+1)*stride];
+                int a5= src[x + (HTAPS_MAX/2+2)*stride];
+                int a6= src[x + (HTAPS_MAX/2+3)*stride];
+                int am=0;
+                if(!p || p->fast_mc)
+                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
+                else
+                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
+
+                if(am&(~255)) am= ~(am>>31);
+                tmp2[x]= am;
+            }
+            src += stride;
+            tmp2+= 64;
+        }
+        src -= stride*y;
+    }
+    src += stride*(HTAPS_MAX/2 - 1);
+    tmp2= tmp2t[2];
+    tmpI= tmpIt;
+    if(b&4){
+        for(y=0; y < b_h; y++){
+            for(x=0; x < b_w; x++){
+                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
+                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
+                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
+                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
+                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
+                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
+                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
+                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
+                int am=0;
+                if(!p || p->fast_mc)
+                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
+                else
+                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
+                if(am&(~255)) am= ~(am>>31);
+                tmp2[x]= am;
+            }
+            tmpI+= 64;
+            tmp2+= 64;
+        }
+    }
+
+    hpel[ 0]= src;
+    hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
+    hpel[ 2]= src + 1;
+
+    hpel[ 4]= tmp2t[1];
+    hpel[ 5]= tmp2t[2];
+    hpel[ 6]= tmp2t[1] + 1;
+
+    hpel[ 8]= src + stride;
+    hpel[ 9]= hpel[1] + 64;
+    hpel[10]= hpel[8] + 1;
+
+#define MC_STRIDE(x) (needs[x] ? 64 : stride)
+
+    if(b==15){
+        int dxy = dx / 8 + dy / 8 * 4;
+        const uint8_t *src1 = hpel[dxy    ];
+        const uint8_t *src2 = hpel[dxy + 1];
+        const uint8_t *src3 = hpel[dxy + 4];
+        const uint8_t *src4 = hpel[dxy + 5];
+        int stride1 = MC_STRIDE(dxy);
+        int stride2 = MC_STRIDE(dxy + 1);
+        int stride3 = MC_STRIDE(dxy + 4);
+        int stride4 = MC_STRIDE(dxy + 5);
+        dx&=7;
+        dy&=7;
+        for(y=0; y < b_h; y++){
+            for(x=0; x < b_w; x++){
+                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
+                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
+            }
+            src1+=stride1;
+            src2+=stride2;
+            src3+=stride3;
+            src4+=stride4;
+            dst +=stride;
+        }
+    }else{
+        const uint8_t *src1= hpel[l];
+        const uint8_t *src2= hpel[r];
+        int stride1 = MC_STRIDE(l);
+        int stride2 = MC_STRIDE(r);
+        int a= weight[((dx&7) + (8*(dy&7)))];
+        int b= 8-a;
+        for(y=0; y < b_h; y++){
+            for(x=0; x < b_w; x++){
+                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
+            }
+            src1+=stride1;
+            src2+=stride2;
+            dst +=stride;
+        }
+    }
+}
+
+void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
+    if(block->type & BLOCK_INTRA){
+        int x, y;
+        const unsigned color  = block->color[plane_index];
+        const unsigned color4 = color*0x01010101;
+        if(b_w==32){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+                *(uint32_t*)&dst[8 + y*stride]= color4;
+                *(uint32_t*)&dst[12+ y*stride]= color4;
+                *(uint32_t*)&dst[16+ y*stride]= color4;
+                *(uint32_t*)&dst[20+ y*stride]= color4;
+                *(uint32_t*)&dst[24+ y*stride]= color4;
+                *(uint32_t*)&dst[28+ y*stride]= color4;
+            }
+        }else if(b_w==16){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+                *(uint32_t*)&dst[8 + y*stride]= color4;
+                *(uint32_t*)&dst[12+ y*stride]= color4;
+            }
+        }else if(b_w==8){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+            }
+        }else if(b_w==4){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+            }
+        }else{
+            for(y=0; y < b_h; y++){
+                for(x=0; x < b_w; x++){
+                    dst[x + y*stride]= color;
+                }
+            }
+        }
+    }else{
+        uint8_t *src= s->last_picture[block->ref]->data[plane_index];
+        const int scale= plane_index ?  (2*s->mv_scale)>>s->chroma_h_shift : 2*s->mv_scale;
+        int mx= block->mx*scale;
+        int my= block->my*scale;
+        const int dx= mx&15;
+        const int dy= my&15;
+        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
+        sx += (mx>>4) - (HTAPS_MAX/2-1);
+        sy += (my>>4) - (HTAPS_MAX/2-1);
+        src += sx + sy*stride;
+        if(   (unsigned)sx >= FFMAX(w - b_w - (HTAPS_MAX-2), 0)
+           || (unsigned)sy >= FFMAX(h - b_h - (HTAPS_MAX-2), 0)){
+            s->vdsp.emulated_edge_mc(tmp + MB_SIZE, src,
+                                     stride, stride,
+                                     b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1,
+                                     sx, sy, w, h);
+            src= tmp + MB_SIZE;
+        }
+
+        av_assert2(s->chroma_h_shift == s->chroma_v_shift); // only one mv_scale
+
+        av_assert2((tab_index>=0 && tab_index<4) || b_w==32);
+        if(    (dx&3) || (dy&3)
+            || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h)
+            || (b_w&(b_w-1))
+            || b_w == 1
+            || b_h == 1
+            || !s->plane[plane_index].fast_mc )
+            mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
+        else if(b_w==32){
+            int y;
+            for(y=0; y<b_h; y+=16){
+                s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
+                s->h264qpel.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
+            }
+        }else if(b_w==b_h)
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
+        else if(b_w==2*b_h){
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
+        }else{
+            av_assert2(2*b_w==b_h);
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
+            s->h264qpel.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
+        }
+    }
+}
+
+#define mca(dx,dy,b_w)\
+static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int h){\
+    av_assert2(h==b_w);\
+    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
+}
+
+mca( 0, 0,16)
+mca( 8, 0,16)
+mca( 0, 8,16)
+mca( 8, 8,16)
+mca( 0, 0,8)
+mca( 8, 0,8)
+mca( 0, 8,8)
+mca( 8, 8,8)
+
+av_cold int ff_snow_common_init(AVCodecContext *avctx){
+    SnowContext *s = avctx->priv_data;
+    int width, height;
+    int i, j;
+
+    s->avctx= avctx;
+    s->max_ref_frames=1; //just make sure it's not an invalid value in case of no initial keyframe
+
+    ff_me_cmp_init(&s->mecc, avctx);
+    ff_hpeldsp_init(&s->hdsp, avctx->flags);
+    ff_videodsp_init(&s->vdsp, 8);
+    ff_dwt_init(&s->dwt);
+    ff_h264qpel_init(&s->h264qpel, 8);
+
+#define mcf(dx,dy)\
+    s->qdsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
+    s->qdsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
+        s->h264qpel.put_h264_qpel_pixels_tab[0][dy+dx/4];\
+    s->qdsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
+    s->qdsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
+        s->h264qpel.put_h264_qpel_pixels_tab[1][dy+dx/4];
+
+    mcf( 0, 0)
+    mcf( 4, 0)
+    mcf( 8, 0)
+    mcf(12, 0)
+    mcf( 0, 4)
+    mcf( 4, 4)
+    mcf( 8, 4)
+    mcf(12, 4)
+    mcf( 0, 8)
+    mcf( 4, 8)
+    mcf( 8, 8)
+    mcf(12, 8)
+    mcf( 0,12)
+    mcf( 4,12)
+    mcf( 8,12)
+    mcf(12,12)
+
+#define mcfh(dx,dy)\
+    s->hdsp.put_pixels_tab       [0][dy/4+dx/8]=\
+    s->hdsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
+        mc_block_hpel ## dx ## dy ## 16;\
+    s->hdsp.put_pixels_tab       [1][dy/4+dx/8]=\
+    s->hdsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
+        mc_block_hpel ## dx ## dy ## 8;
+
+    mcfh(0, 0)
+    mcfh(8, 0)
+    mcfh(0, 8)
+    mcfh(8, 8)
+
+    init_qexp();
+
+//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
+
+    width= s->avctx->width;
+    height= s->avctx->height;
+
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_idwt_buffer, width, height * sizeof(IDWTELEM), fail);
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->spatial_dwt_buffer,  width, height * sizeof(DWTELEM),  fail); //FIXME this does not belong here
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_dwt_buffer,     width, sizeof(DWTELEM),  fail);
+    FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->temp_idwt_buffer,    width, sizeof(IDWTELEM), fail);
+    FF_ALLOC_ARRAY_OR_GOTO(avctx,  s->run_buffer,          ((width + 1) >> 1), ((height + 1) >> 1) * sizeof(*s->run_buffer), fail);
+
+    for(i=0; i<MAX_REF_FRAMES; i++) {
+        for(j=0; j<MAX_REF_FRAMES; j++)
+            ff_scale_mv_ref[i][j] = 256*(i+1)/(j+1);
+        s->last_picture[i] = av_frame_alloc();
+        if (!s->last_picture[i])
+            goto fail;
+    }
+
+    s->mconly_picture = av_frame_alloc();
+    s->current_picture = av_frame_alloc();
+    if (!s->mconly_picture || !s->current_picture)
+        goto fail;
+
+    return 0;
+fail:
+    return AVERROR(ENOMEM);
+}
+
+int ff_snow_common_init_after_header(AVCodecContext *avctx) {
+    SnowContext *s = avctx->priv_data;
+    int plane_index, level, orientation;
+    int ret, emu_buf_size;
+
+    if(!s->scratchbuf) {
+        if ((ret = ff_get_buffer(s->avctx, s->mconly_picture,
+                                 AV_GET_BUFFER_FLAG_REF)) < 0)
+            return ret;
+        FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->scratchbuf, FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256), 7*MB_SIZE, fail);
+        emu_buf_size = FFMAX(s->mconly_picture->linesize[0], 2*avctx->width+256) * (2 * MB_SIZE + HTAPS_MAX - 1);
+        FF_ALLOC_OR_GOTO(avctx, s->emu_edge_buffer, emu_buf_size, fail);
+    }
+
+    if(s->mconly_picture->format != avctx->pix_fmt) {
+        av_log(avctx, AV_LOG_ERROR, "pixel format changed\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
+        int w= s->avctx->width;
+        int h= s->avctx->height;
+
+        if(plane_index){
+            w>>= s->chroma_h_shift;
+            h>>= s->chroma_v_shift;
+        }
+        s->plane[plane_index].width = w;
+        s->plane[plane_index].height= h;
+
+        for(level=s->spatial_decomposition_count-1; level>=0; level--){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                SubBand *b= &s->plane[plane_index].band[level][orientation];
+
+                b->buf= s->spatial_dwt_buffer;
+                b->level= level;
+                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
+                b->width = (w + !(orientation&1))>>1;
+                b->height= (h + !(orientation>1))>>1;
+
+                b->stride_line = 1 << (s->spatial_decomposition_count - level);
+                b->buf_x_offset = 0;
+                b->buf_y_offset = 0;
+
+                if(orientation&1){
+                    b->buf += (w+1)>>1;
+                    b->buf_x_offset = (w+1)>>1;
+                }
+                if(orientation>1){
+                    b->buf += b->stride>>1;
+                    b->buf_y_offset = b->stride_line >> 1;
+                }
+                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
+
+                if(level)
+                    b->parent= &s->plane[plane_index].band[level-1][orientation];
+                //FIXME avoid this realloc
+                av_freep(&b->x_coeff);
+                b->x_coeff=av_mallocz_array(((b->width+1) * b->height+1), sizeof(x_and_coeff));
+                if (!b->x_coeff)
+                    goto fail;
+            }
+            w= (w+1)>>1;
+            h= (h+1)>>1;
+        }
+    }
+
+    return 0;
+fail:
+    return AVERROR(ENOMEM);
+}
+
+#define USE_HALFPEL_PLANE 0
+
+static int halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
+    int p,x,y;
+
+    for(p=0; p < s->nb_planes; p++){
+        int is_chroma= !!p;
+        int w= is_chroma ? s->avctx->width >>s->chroma_h_shift : s->avctx->width;
+        int h= is_chroma ? s->avctx->height>>s->chroma_v_shift : s->avctx->height;
+        int ls= frame->linesize[p];
+        uint8_t *src= frame->data[p];
+
+        halfpel[1][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
+        halfpel[2][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
+        halfpel[3][p] = (uint8_t*) av_malloc(ls * (h + 2 * EDGE_WIDTH)) + EDGE_WIDTH * (1 + ls);
+        if (!halfpel[1][p] || !halfpel[2][p] || !halfpel[3][p])
+            return AVERROR(ENOMEM);
+
+        halfpel[0][p]= src;
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= y*ls + x;
+
+                halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
+            }
+        }
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= y*ls + x;
+
+                halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
+            }
+        }
+        src= halfpel[1][p];
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= y*ls + x;
+
+                halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
+            }
+        }
+
+//FIXME border!
+    }
+    return 0;
+}
+
+void ff_snow_release_buffer(AVCodecContext *avctx)
+{
+    SnowContext *s = avctx->priv_data;
+    int i;
+
+    if(s->last_picture[s->max_ref_frames-1]->data[0]){
+        av_frame_unref(s->last_picture[s->max_ref_frames-1]);
+        for(i=0; i<9; i++)
+            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
+                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture->linesize[i%3]));
+    }
+}
+
+int ff_snow_frame_start(SnowContext *s){
+   AVFrame *tmp;
+   int i, ret;
+
+    ff_snow_release_buffer(s->avctx);
+
+    tmp= s->last_picture[s->max_ref_frames-1];
+    for(i=s->max_ref_frames-1; i>0; i--)
+        s->last_picture[i] = s->last_picture[i-1];
+    memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
+    if(USE_HALFPEL_PLANE && s->current_picture->data[0]) {
+        if((ret = halfpel_interpol(s, s->halfpel_plane[0], s->current_picture)) < 0)
+            return ret;
+    }
+    s->last_picture[0] = s->current_picture;
+    s->current_picture = tmp;
+
+    if(s->keyframe){
+        s->ref_frames= 0;
+    }else{
+        int i;
+        for(i=0; i<s->max_ref_frames && s->last_picture[i]->data[0]; i++)
+            if(i && s->last_picture[i-1]->key_frame)
+                break;
+        s->ref_frames= i;
+        if(s->ref_frames==0){
+            av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
+            return -1;
+        }
+    }
+    if ((ret = ff_snow_get_buffer(s, s->current_picture)) < 0)
+        return ret;
+
+    s->current_picture->key_frame= s->keyframe;
+
+    return 0;
+}
+
+av_cold void ff_snow_common_end(SnowContext *s)
+{
+    int plane_index, level, orientation, i;
+
+    av_freep(&s->spatial_dwt_buffer);
+    av_freep(&s->temp_dwt_buffer);
+    av_freep(&s->spatial_idwt_buffer);
+    av_freep(&s->temp_idwt_buffer);
+    av_freep(&s->run_buffer);
+
+    s->m.me.temp= NULL;
+    av_freep(&s->m.me.scratchpad);
+    av_freep(&s->m.me.map);
+    av_freep(&s->m.me.score_map);
+    av_freep(&s->m.obmc_scratchpad);
+
+    av_freep(&s->block);
+    av_freep(&s->scratchbuf);
+    av_freep(&s->emu_edge_buffer);
+
+    for(i=0; i<MAX_REF_FRAMES; i++){
+        av_freep(&s->ref_mvs[i]);
+        av_freep(&s->ref_scores[i]);
+        if(s->last_picture[i]->data[0]) {
+            av_assert0(s->last_picture[i]->data[0] != s->current_picture->data[0]);
+        }
+        av_frame_free(&s->last_picture[i]);
+    }
+
+    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
+        for(level=s->spatial_decomposition_count-1; level>=0; level--){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                SubBand *b= &s->plane[plane_index].band[level][orientation];
+
+                av_freep(&b->x_coeff);
+            }
+        }
+    }
+    av_frame_free(&s->mconly_picture);
+    av_frame_free(&s->current_picture);
+}
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
new file mode 100644
index 0000000..38810c1
--- /dev/null
+++ b/libavcodec/snow.h
@@ -0,0 +1,710 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SNOW_H
+#define AVCODEC_SNOW_H
+
+#include "hpeldsp.h"
+#include "me_cmp.h"
+#include "qpeldsp.h"
+#include "snow_dwt.h"
+
+#include "rangecoder.h"
+#include "mathops.h"
+#include "mpegvideo.h"
+#include "h264qpel.h"
+
+#define MID_STATE 128
+
+#define MAX_PLANES 4
+#define QSHIFT 5
+#define QROOT (1<<QSHIFT)
+#define LOSSLESS_QLOG -128
+#define FRAC_BITS 4
+#define MAX_REF_FRAMES 8
+
+#define LOG2_OBMC_MAX 8
+#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
+typedef struct BlockNode{
+    int16_t mx;
+    int16_t my;
+    uint8_t ref;
+    uint8_t color[3];
+    uint8_t type;
+//#define TYPE_SPLIT    1
+#define BLOCK_INTRA   1
+#define BLOCK_OPT     2
+//#define TYPE_NOCOLOR  4
+    uint8_t level; //FIXME merge into type?
+}BlockNode;
+
+static const BlockNode null_block= { //FIXME add border maybe
+    .color= {128,128,128},
+    .mx= 0,
+    .my= 0,
+    .ref= 0,
+    .type= 0,
+    .level= 0,
+};
+
+#define LOG2_MB_SIZE 4
+#define MB_SIZE (1<<LOG2_MB_SIZE)
+#define ENCODER_EXTRA_BITS 4
+#define HTAPS_MAX 8
+
+typedef struct x_and_coeff{
+    int16_t x;
+    uint16_t coeff;
+} x_and_coeff;
+
+typedef struct SubBand{
+    int level;
+    int stride;
+    int width;
+    int height;
+    int qlog;        ///< log(qscale)/log[2^(1/6)]
+    DWTELEM *buf;
+    IDWTELEM *ibuf;
+    int buf_x_offset;
+    int buf_y_offset;
+    int stride_line; ///< Stride measured in lines, not pixels.
+    x_and_coeff * x_coeff;
+    struct SubBand *parent;
+    uint8_t state[/*7*2*/ 7 + 512][32];
+}SubBand;
+
+typedef struct Plane{
+    int width;
+    int height;
+    SubBand band[MAX_DECOMPOSITIONS][4];
+
+    int htaps;
+    int8_t hcoeff[HTAPS_MAX/2];
+    int diag_mc;
+    int fast_mc;
+
+    int last_htaps;
+    int8_t last_hcoeff[HTAPS_MAX/2];
+    int last_diag_mc;
+}Plane;
+
+typedef struct SnowContext{
+    AVClass *class;
+    AVCodecContext *avctx;
+    RangeCoder c;
+    MECmpContext mecc;
+    HpelDSPContext hdsp;
+    QpelDSPContext qdsp;
+    VideoDSPContext vdsp;
+    H264QpelContext h264qpel;
+    MpegvideoEncDSPContext mpvencdsp;
+    SnowDWTContext dwt;
+    AVFrame *new_picture;
+    AVFrame *input_picture;              ///< new_picture with the internal linesizes
+    AVFrame *current_picture;
+    AVFrame *last_picture[MAX_REF_FRAMES];
+    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
+    AVFrame *mconly_picture;
+//     uint8_t q_context[16];
+    uint8_t header_state[32];
+    uint8_t block_state[128 + 32*128];
+    int keyframe;
+    int always_reset;
+    int version;
+    int spatial_decomposition_type;
+    int last_spatial_decomposition_type;
+    int temporal_decomposition_type;
+    int spatial_decomposition_count;
+    int last_spatial_decomposition_count;
+    int temporal_decomposition_count;
+    int max_ref_frames;
+    int ref_frames;
+    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
+    uint32_t *ref_scores[MAX_REF_FRAMES];
+    DWTELEM *spatial_dwt_buffer;
+    DWTELEM *temp_dwt_buffer;
+    IDWTELEM *spatial_idwt_buffer;
+    IDWTELEM *temp_idwt_buffer;
+    int *run_buffer;
+    int colorspace_type;
+    int chroma_h_shift;
+    int chroma_v_shift;
+    int spatial_scalability;
+    int qlog;
+    int last_qlog;
+    int lambda;
+    int lambda2;
+    int pass1_rc;
+    int mv_scale;
+    int last_mv_scale;
+    int qbias;
+    int last_qbias;
+#define QBIAS_SHIFT 3
+    int b_width;
+    int b_height;
+    int block_max_depth;
+    int last_block_max_depth;
+    int nb_planes;
+    Plane plane[MAX_PLANES];
+    BlockNode *block;
+#define ME_CACHE_SIZE 1024
+    unsigned me_cache[ME_CACHE_SIZE];
+    unsigned me_cache_generation;
+    slice_buffer sb;
+    int memc_only;
+    int no_bitstream;
+
+    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
+
+    uint8_t *scratchbuf;
+    uint8_t *emu_edge_buffer;
+}SnowContext;
+
+/* Tables */
+extern const uint8_t * const ff_obmc_tab[4];
+extern uint8_t ff_qexp[QROOT];
+extern int ff_scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
+
+/* C bits used by mmx/sse2/altivec */
+
+static av_always_inline void snow_interleave_line_header(int * i, int width, IDWTELEM * low, IDWTELEM * high){
+    (*i) = (width) - 2;
+
+    if (width & 1){
+        low[(*i)+1] = low[((*i)+1)>>1];
+        (*i)--;
+    }
+}
+
+static av_always_inline void snow_interleave_line_footer(int * i, IDWTELEM * low, IDWTELEM * high){
+    for (; (*i)>=0; (*i)-=2){
+        low[(*i)+1] = high[(*i)>>1];
+        low[*i] = low[(*i)>>1];
+    }
+}
+
+static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, IDWTELEM * dst, IDWTELEM * src, IDWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
+    for(; i<w; i++){
+        dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
+    }
+
+    if((width^lift_high)&1){
+        dst[w] = src[w] - ((mul * 2 * ref[w] + add) >> shift);
+    }
+}
+
+static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, IDWTELEM * dst, IDWTELEM * src, IDWTELEM * ref, int width, int w){
+        for(; i<w; i++){
+            dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS);
+        }
+
+        if(width&1){
+            dst[w] = src[w] + ((2 * ref[w] + W_BO + 4 * src[w]) >> W_BS);
+        }
+}
+
+/* common code */
+
+int ff_snow_common_init(AVCodecContext *avctx);
+int ff_snow_common_init_after_header(AVCodecContext *avctx);
+void ff_snow_common_end(SnowContext *s);
+void ff_snow_release_buffer(AVCodecContext *avctx);
+void ff_snow_reset_contexts(SnowContext *s);
+int ff_snow_alloc_blocks(SnowContext *s);
+int ff_snow_frame_start(SnowContext *s);
+void ff_snow_pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, ptrdiff_t stride,
+                     int sx, int sy, int b_w, int b_h, BlockNode *block,
+                     int plane_index, int w, int h);
+int ff_snow_get_buffer(SnowContext *s, AVFrame *frame);
+/* common inline functions */
+//XXX doublecheck all of them should stay inlined
+
+static inline void snow_set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
+    const int w= s->b_width << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    const int block_w= 1<<rem_depth;
+    BlockNode block;
+    int i,j;
+
+    block.color[0]= l;
+    block.color[1]= cb;
+    block.color[2]= cr;
+    block.mx= mx;
+    block.my= my;
+    block.ref= ref;
+    block.type= type;
+    block.level= level;
+
+    for(j=0; j<block_w; j++){
+        for(i=0; i<block_w; i++){
+            s->block[index + i + j*w]= block;
+        }
+    }
+}
+
+static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
+                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
+    if(s->ref_frames == 1){
+        *mx = mid_pred(left->mx, top->mx, tr->mx);
+        *my = mid_pred(left->my, top->my, tr->my);
+    }else{
+        const int *scale = ff_scale_mv_ref[ref];
+        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
+                       (top ->mx * scale[top ->ref] + 128) >>8,
+                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
+        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
+                       (top ->my * scale[top ->ref] + 128) >>8,
+                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
+    }
+}
+
+static av_always_inline int same_block(BlockNode *a, BlockNode *b){
+    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
+        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
+    }else{
+        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
+    }
+}
+
+//FIXME name cleanup (b_w, block_w, b_width stuff)
+//XXX should we really inline it?
+static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
+    const int b_width = s->b_width  << s->block_max_depth;
+    const int b_height= s->b_height << s->block_max_depth;
+    const int b_stride= b_width;
+    BlockNode *lt= &s->block[b_x + b_y*b_stride];
+    BlockNode *rt= lt+1;
+    BlockNode *lb= lt+b_stride;
+    BlockNode *rb= lb+1;
+    uint8_t *block[4];
+    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
+    uint8_t *tmp = s->scratchbuf;
+    uint8_t *ptmp;
+    int x,y;
+
+    if(b_x<0){
+        lt= rt;
+        lb= rb;
+    }else if(b_x + 1 >= b_width){
+        rt= lt;
+        rb= lb;
+    }
+    if(b_y<0){
+        lt= lb;
+        rt= rb;
+    }else if(b_y + 1 >= b_height){
+        lb= lt;
+        rb= rt;
+    }
+
+    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
+        obmc -= src_x;
+        b_w += src_x;
+        if(!sliced && !offset_dst)
+            dst -= src_x;
+        src_x=0;
+    }
+    if(src_x + b_w > w){
+        b_w = w - src_x;
+    }
+    if(src_y<0){
+        obmc -= src_y*obmc_stride;
+        b_h += src_y;
+        if(!sliced && !offset_dst)
+            dst -= src_y*dst_stride;
+        src_y=0;
+    }
+    if(src_y + b_h> h){
+        b_h = h - src_y;
+    }
+
+    if(b_w<=0 || b_h<=0) return;
+
+    av_assert2(src_stride > 2*MB_SIZE + 5);
+
+    if(!sliced && offset_dst)
+        dst += src_x + src_y*dst_stride;
+    dst8+= src_x + src_y*src_stride;
+//    src += src_x + src_y*src_stride;
+
+    ptmp= tmp + 3*tmp_step;
+    block[0]= ptmp;
+    ptmp+=tmp_step;
+    ff_snow_pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
+
+    if(same_block(lt, rt)){
+        block[1]= block[0];
+    }else{
+        block[1]= ptmp;
+        ptmp+=tmp_step;
+        ff_snow_pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
+    }
+
+    if(same_block(lt, lb)){
+        block[2]= block[0];
+    }else if(same_block(rt, lb)){
+        block[2]= block[1];
+    }else{
+        block[2]= ptmp;
+        ptmp+=tmp_step;
+        ff_snow_pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
+    }
+
+    if(same_block(lt, rb) ){
+        block[3]= block[0];
+    }else if(same_block(rt, rb)){
+        block[3]= block[1];
+    }else if(same_block(lb, rb)){
+        block[3]= block[2];
+    }else{
+        block[3]= ptmp;
+        ff_snow_pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
+    }
+    if(sliced){
+        s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    }else{
+        for(y=0; y<b_h; y++){
+            //FIXME ugly misuse of obmc_stride
+            const uint8_t *obmc1= obmc + y*obmc_stride;
+            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
+            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
+            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
+            for(x=0; x<b_w; x++){
+                int v=   obmc1[x] * block[3][x + y*src_stride]
+                        +obmc2[x] * block[2][x + y*src_stride]
+                        +obmc3[x] * block[1][x + y*src_stride]
+                        +obmc4[x] * block[0][x + y*src_stride];
+
+                v <<= 8 - LOG2_OBMC_MAX;
+                if(FRAC_BITS != 8){
+                    v >>= 8 - FRAC_BITS;
+                }
+                if(add){
+                    v += dst[x + y*dst_stride];
+                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
+                    if(v&(~255)) v= ~(v>>31);
+                    dst8[x + y*src_stride] = v;
+                }else{
+                    dst[x + y*dst_stride] -= v;
+                }
+            }
+        }
+    }
+}
+
+static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
+    Plane *p= &s->plane[plane_index];
+    const int mb_w= s->b_width  << s->block_max_depth;
+    const int mb_h= s->b_height << s->block_max_depth;
+    int x, y, mb_x;
+    int block_size = MB_SIZE >> s->block_max_depth;
+    int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *dst8= s->current_picture->data[plane_index];
+    int w= p->width;
+    int h= p->height;
+    av_assert2(s->chroma_h_shift == s->chroma_v_shift); // obmc params assume squares
+    if(s->keyframe || (s->avctx->debug&512)){
+        if(mb_y==mb_h)
+            return;
+
+        if(add){
+            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
+                for(x=0; x<w; x++){
+                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+                    v >>= FRAC_BITS;
+                    if(v&(~255)) v= ~(v>>31);
+                    dst8[x + y*ref_stride]= v;
+                }
+            }
+        }else{
+            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
+                for(x=0; x<w; x++){
+                    buf[x + y*w]-= 128<<FRAC_BITS;
+                }
+            }
+        }
+
+        return;
+    }
+
+    for(mb_x=0; mb_x<=mb_w; mb_x++){
+        add_yblock(s, 0, NULL, buf, dst8, obmc,
+                   block_w*mb_x - block_w/2,
+                   block_h*mb_y - block_h/2,
+                   block_w, block_h,
+                   w, h,
+                   w, ref_stride, obmc_stride,
+                   mb_x - 1, mb_y - 1,
+                   add, 1, plane_index);
+    }
+}
+
+static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
+    const int mb_h= s->b_height << s->block_max_depth;
+    int mb_y;
+    for(mb_y=0; mb_y<=mb_h; mb_y++)
+        predict_slice(s, buf, plane_index, add, mb_y);
+}
+
+static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
+    const int w= s->b_width << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    const int block_w= 1<<rem_depth;
+    const int block_h= 1<<rem_depth; //FIXME "w!=h"
+    BlockNode block;
+    int i,j;
+
+    block.color[0]= l;
+    block.color[1]= cb;
+    block.color[2]= cr;
+    block.mx= mx;
+    block.my= my;
+    block.ref= ref;
+    block.type= type;
+    block.level= level;
+
+    for(j=0; j<block_h; j++){
+        for(i=0; i<block_w; i++){
+            s->block[index + i + j*w]= block;
+        }
+    }
+}
+
+static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
+    SnowContext *s = c->avctx->priv_data;
+    const int offset[3]= {
+          y*c->  stride + x,
+        ((y*c->uvstride + x)>>s->chroma_h_shift),
+        ((y*c->uvstride + x)>>s->chroma_h_shift),
+    };
+    int i;
+    for(i=0; i<3; i++){
+        c->src[0][i]= src [i];
+        c->ref[0][i]= ref [i] + offset[i];
+    }
+    av_assert2(!ref_index);
+}
+
+
+/* bitstream functions */
+
+extern const int8_t ff_quant3bA[256];
+
+#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
+
+static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
+    int i;
+
+    if(v){
+        const int a= FFABS(v);
+        const int e= av_log2(a);
+        const int el= FFMIN(e, 10);
+        put_rac(c, state+0, 0);
+
+        for(i=0; i<el; i++){
+            put_rac(c, state+1+i, 1);  //1..10
+        }
+        for(; i<e; i++){
+            put_rac(c, state+1+9, 1);  //1..10
+        }
+        put_rac(c, state+1+FFMIN(i,9), 0);
+
+        for(i=e-1; i>=el; i--){
+            put_rac(c, state+22+9, (a>>i)&1); //22..31
+        }
+        for(; i>=0; i--){
+            put_rac(c, state+22+i, (a>>i)&1); //22..31
+        }
+
+        if(is_signed)
+            put_rac(c, state+11 + el, v < 0); //11..21
+    }else{
+        put_rac(c, state+0, 1);
+    }
+}
+
+static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
+    if(get_rac(c, state+0))
+        return 0;
+    else{
+        int i, e, a;
+        e= 0;
+        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
+            e++;
+        }
+
+        a= 1;
+        for(i=e-1; i>=0; i--){
+            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
+        }
+
+        e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
+        return (a^e)-e;
+    }
+}
+
+static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
+    int i;
+    int r= log2>=0 ? 1<<log2 : 1;
+
+    av_assert2(v>=0);
+    av_assert2(log2>=-4);
+
+    while(v >= r){
+        put_rac(c, state+4+log2, 1);
+        v -= r;
+        log2++;
+        if(log2>0) r+=r;
+    }
+    put_rac(c, state+4+log2, 0);
+
+    for(i=log2-1; i>=0; i--){
+        put_rac(c, state+31-i, (v>>i)&1);
+    }
+}
+
+static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
+    int i;
+    int r= log2>=0 ? 1<<log2 : 1;
+    int v=0;
+
+    av_assert2(log2>=-4);
+
+    while(log2<28 && get_rac(c, state+4+log2)){
+        v+= r;
+        log2++;
+        if(log2>0) r+=r;
+    }
+
+    for(i=log2-1; i>=0; i--){
+        v+= get_rac(c, state+31-i)<<i;
+    }
+
+    return v;
+}
+
+static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
+    const int w= b->width;
+    const int h= b->height;
+    int x,y;
+
+    int run, runs;
+    x_and_coeff *xc= b->x_coeff;
+    x_and_coeff *prev_xc= NULL;
+    x_and_coeff *prev2_xc= xc;
+    x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
+    x_and_coeff *prev_parent_xc= parent_xc;
+
+    runs= get_symbol2(&s->c, b->state[30], 0);
+    if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
+    else           run= INT_MAX;
+
+    for(y=0; y<h; y++){
+        int v=0;
+        int lt=0, t=0, rt=0;
+
+        if(y && prev_xc->x == 0){
+            rt= prev_xc->coeff;
+        }
+        for(x=0; x<w; x++){
+            int p=0;
+            const int l= v;
+
+            lt= t; t= rt;
+
+            if(y){
+                if(prev_xc->x <= x)
+                    prev_xc++;
+                if(prev_xc->x == x + 1)
+                    rt= prev_xc->coeff;
+                else
+                    rt=0;
+            }
+            if(parent_xc){
+                if(x>>1 > parent_xc->x){
+                    parent_xc++;
+                }
+                if(x>>1 == parent_xc->x){
+                    p= parent_xc->coeff;
+                }
+            }
+            if(/*ll|*/l|lt|t|rt|p){
+                int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
+
+                v=get_rac(&s->c, &b->state[0][context]);
+                if(v){
+                    v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
+                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + ff_quant3bA[l&0xFF] + 3*ff_quant3bA[t&0xFF]]);
+
+                    xc->x=x;
+                    (xc++)->coeff= v;
+                }
+            }else{
+                if(!run){
+                    if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
+                    else           run= INT_MAX;
+                    v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
+                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
+
+                    xc->x=x;
+                    (xc++)->coeff= v;
+                }else{
+                    int max_run;
+                    run--;
+                    v=0;
+                    av_assert2(run >= 0);
+                    if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
+                    else  max_run= FFMIN(run, w-x-1);
+                    if(parent_xc)
+                        max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
+                    av_assert2(max_run >= 0 && max_run <= run);
+
+                    x+= max_run;
+                    run-= max_run;
+                }
+            }
+        }
+        (xc++)->x= w+1; //end marker
+        prev_xc= prev2_xc;
+        prev2_xc= xc;
+
+        if(parent_xc){
+            if(y&1){
+                while(parent_xc->x != parent->width+1)
+                    parent_xc++;
+                parent_xc++;
+                prev_parent_xc= parent_xc;
+            }else{
+                parent_xc= prev_parent_xc;
+            }
+        }
+    }
+
+    (xc++)->x= w+1; //end marker
+}
+
+#endif /* AVCODEC_SNOW_H */
diff --git a/libavcodec/snow_dwt.c b/libavcodec/snow_dwt.c
new file mode 100644
index 0000000..63ff7a0
--- /dev/null
+++ b/libavcodec/snow_dwt.c
@@ -0,0 +1,865 @@
+/*
+ * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2008 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "me_cmp.h"
+#include "snow_dwt.h"
+
+int ff_slice_buffer_init(slice_buffer *buf, int line_count,
+                         int max_allocated_lines, int line_width,
+                         IDWTELEM *base_buffer)
+{
+    int i;
+
+    buf->base_buffer = base_buffer;
+    buf->line_count  = line_count;
+    buf->line_width  = line_width;
+    buf->data_count  = max_allocated_lines;
+    buf->line        = av_mallocz_array(line_count, sizeof(IDWTELEM *));
+    if (!buf->line)
+        return AVERROR(ENOMEM);
+    buf->data_stack  = av_malloc_array(max_allocated_lines, sizeof(IDWTELEM *));
+    if (!buf->data_stack) {
+        av_freep(&buf->line);
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < max_allocated_lines; i++) {
+        buf->data_stack[i] = av_malloc_array(line_width, sizeof(IDWTELEM));
+        if (!buf->data_stack[i]) {
+            for (i--; i >=0; i--)
+                av_freep(&buf->data_stack[i]);
+            av_freep(&buf->data_stack);
+            av_freep(&buf->line);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    buf->data_stack_top = max_allocated_lines - 1;
+    return 0;
+}
+
+IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line)
+{
+    IDWTELEM *buffer;
+
+    av_assert0(buf->data_stack_top >= 0);
+//  av_assert1(!buf->line[line]);
+    if (buf->line[line])
+        return buf->line[line];
+
+    buffer = buf->data_stack[buf->data_stack_top];
+    buf->data_stack_top--;
+    buf->line[line] = buffer;
+
+    return buffer;
+}
+
+void ff_slice_buffer_release(slice_buffer *buf, int line)
+{
+    IDWTELEM *buffer;
+
+    av_assert1(line >= 0 && line < buf->line_count);
+    av_assert1(buf->line[line]);
+
+    buffer = buf->line[line];
+    buf->data_stack_top++;
+    buf->data_stack[buf->data_stack_top] = buffer;
+    buf->line[line]                      = NULL;
+}
+
+void ff_slice_buffer_flush(slice_buffer *buf)
+{
+    int i;
+    for (i = 0; i < buf->line_count; i++)
+        if (buf->line[i])
+            ff_slice_buffer_release(buf, i);
+}
+
+void ff_slice_buffer_destroy(slice_buffer *buf)
+{
+    int i;
+    ff_slice_buffer_flush(buf);
+
+    for (i = buf->data_count - 1; i >= 0; i--)
+        av_freep(&buf->data_stack[i]);
+    av_freep(&buf->data_stack);
+    av_freep(&buf->line);
+}
+
+static inline int mirror(int v, int m)
+{
+    while ((unsigned)v > (unsigned)m) {
+        v = -v;
+        if (v < 0)
+            v += 2 * m;
+    }
+    return v;
+}
+
+static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
+                                  int dst_step, int src_step, int ref_step,
+                                  int width, int mul, int add, int shift,
+                                  int highpass, int inverse)
+{
+    const int mirror_left  = !highpass;
+    const int mirror_right = (width & 1) ^ highpass;
+    const int w            = (width >> 1) - 1 + (highpass & width);
+    int i;
+
+#define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref)))
+    if (mirror_left) {
+        dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse);
+        dst   += dst_step;
+        src   += src_step;
+    }
+
+    for (i = 0; i < w; i++)
+        dst[i * dst_step] = LIFT(src[i * src_step],
+                                 ((mul * (ref[i * ref_step] +
+                                          ref[(i + 1) * ref_step]) +
+                                   add) >> shift),
+                                 inverse);
+
+    if (mirror_right)
+        dst[w * dst_step] = LIFT(src[w * src_step],
+                                 ((mul * 2 * ref[w * ref_step] + add) >> shift),
+                                 inverse);
+}
+
+static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
+                                   int dst_step, int src_step, int ref_step,
+                                   int width, int mul, int add, int shift,
+                                   int highpass, int inverse)
+{
+    const int mirror_left  = !highpass;
+    const int mirror_right = (width & 1) ^ highpass;
+    const int w            = (width >> 1) - 1 + (highpass & width);
+    int i;
+
+    av_assert1(shift == 4);
+#define LIFTS(src, ref, inv)                                            \
+    ((inv) ? (src) + (((ref) + 4 * (src)) >> shift)                     \
+           : -((-16 * (src) + (ref) + add /                             \
+                4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23)))
+    if (mirror_left) {
+        dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse);
+        dst   += dst_step;
+        src   += src_step;
+    }
+
+    for (i = 0; i < w; i++)
+        dst[i * dst_step] = LIFTS(src[i * src_step],
+                                  mul * (ref[i * ref_step] +
+                                         ref[(i + 1) * ref_step]) + add,
+                                  inverse);
+
+    if (mirror_right)
+        dst[w * dst_step] = LIFTS(src[w * src_step],
+                                  mul * 2 * ref[w * ref_step] + add,
+                                  inverse);
+}
+
+static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width)
+{
+    const int width2 = width >> 1;
+    int x;
+    const int w2 = (width + 1) >> 1;
+
+    for (x = 0; x < width2; x++) {
+        temp[x]      = b[2 * x];
+        temp[x + w2] = b[2 * x + 1];
+    }
+    if (width & 1)
+        temp[x] = b[2 * x];
+    lift(b + w2, temp + w2, temp,   1, 1, 1, width, -1, 0, 1, 1, 0);
+    lift(b,      temp,      b + w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
+}
+
+static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
+                                    int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] -= (b0[i] + b2[i]) >> 1;
+}
+
+static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
+                                    int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] += (b0[i] + b2[i] + 2) >> 2;
+}
+
+static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp,
+                                 int width, int height, int stride)
+{
+    int y;
+    DWTELEM *b0 = buffer + mirror(-2 - 1, height - 1) * stride;
+    DWTELEM *b1 = buffer + mirror(-2,     height - 1) * stride;
+
+    for (y = -2; y < height; y += 2) {
+        DWTELEM *b2 = buffer + mirror(y + 1, height - 1) * stride;
+        DWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride;
+
+        if (y + 1 < (unsigned)height)
+            horizontal_decompose53i(b2, temp, width);
+        if (y + 2 < (unsigned)height)
+            horizontal_decompose53i(b3, temp, width);
+
+        if (y + 1 < (unsigned)height)
+            vertical_decompose53iH0(b1, b2, b3, width);
+        if (y + 0 < (unsigned)height)
+            vertical_decompose53iL0(b0, b1, b2, width);
+
+        b0 = b2;
+        b1 = b3;
+    }
+}
+
+static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width)
+{
+    const int w2 = (width + 1) >> 1;
+
+    lift(temp + w2, b + 1, b,         1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
+    liftS(temp,     b,     temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
+    lift(b + w2, temp + w2, temp,     1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
+    lift(b,      temp,      b + w2,   1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
+}
+
+static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
+                                    int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
+}
+
+static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
+                                    int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
+}
+
+static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
+                                    int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) /
+                (5 * 16) - (1 << 23);
+}
+
+static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
+                                    int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
+}
+
+static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp,
+                                 int width, int height, int stride)
+{
+    int y;
+    DWTELEM *b0 = buffer + mirror(-4 - 1, height - 1) * stride;
+    DWTELEM *b1 = buffer + mirror(-4,     height - 1) * stride;
+    DWTELEM *b2 = buffer + mirror(-4 + 1, height - 1) * stride;
+    DWTELEM *b3 = buffer + mirror(-4 + 2, height - 1) * stride;
+
+    for (y = -4; y < height; y += 2) {
+        DWTELEM *b4 = buffer + mirror(y + 3, height - 1) * stride;
+        DWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride;
+
+        if (y + 3 < (unsigned)height)
+            horizontal_decompose97i(b4, temp, width);
+        if (y + 4 < (unsigned)height)
+            horizontal_decompose97i(b5, temp, width);
+
+        if (y + 3 < (unsigned)height)
+            vertical_decompose97iH0(b3, b4, b5, width);
+        if (y + 2 < (unsigned)height)
+            vertical_decompose97iL0(b2, b3, b4, width);
+        if (y + 1 < (unsigned)height)
+            vertical_decompose97iH1(b1, b2, b3, width);
+        if (y + 0 < (unsigned)height)
+            vertical_decompose97iL1(b0, b1, b2, width);
+
+        b0 = b2;
+        b1 = b3;
+        b2 = b4;
+        b3 = b5;
+    }
+}
+
+void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height,
+                    int stride, int type, int decomposition_count)
+{
+    int level;
+
+    for (level = 0; level < decomposition_count; level++) {
+        switch (type) {
+        case DWT_97:
+            spatial_decompose97i(buffer, temp,
+                                 width >> level, height >> level,
+                                 stride << level);
+            break;
+        case DWT_53:
+            spatial_decompose53i(buffer, temp,
+                                 width >> level, height >> level,
+                                 stride << level);
+            break;
+        }
+    }
+}
+
+static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width)
+{
+    const int width2 = width >> 1;
+    const int w2     = (width + 1) >> 1;
+    int x;
+
+    for (x = 0; x < width2; x++) {
+        temp[2 * x]     = b[x];
+        temp[2 * x + 1] = b[x + w2];
+    }
+    if (width & 1)
+        temp[2 * x] = b[x];
+
+    b[0] = temp[0] - ((temp[1] + 1) >> 1);
+    for (x = 2; x < width - 1; x += 2) {
+        b[x]     = temp[x]     - ((temp[x - 1] + temp[x + 1] + 2) >> 2);
+        b[x - 1] = temp[x - 1] + ((b[x - 2]    + b[x]        + 1) >> 1);
+    }
+    if (width & 1) {
+        b[x]     = temp[x]     - ((temp[x - 1]     + 1) >> 1);
+        b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
+    } else
+        b[x - 1] = temp[x - 1] + b[x - 2];
+}
+
+static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                  int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] += (b0[i] + b2[i]) >> 1;
+}
+
+static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                  int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] -= (b0[i] + b2[i] + 2) >> 2;
+}
+
+static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb,
+                                             int height, int stride_line)
+{
+    cs->b0 = slice_buffer_get_line(sb,
+                                   mirror(-1 - 1, height - 1) * stride_line);
+    cs->b1 = slice_buffer_get_line(sb, mirror(-1, height - 1) * stride_line);
+    cs->y  = -1;
+}
+
+static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer,
+                                    int height, int stride)
+{
+    cs->b0 = buffer + mirror(-1 - 1, height - 1) * stride;
+    cs->b1 = buffer + mirror(-1,     height - 1) * stride;
+    cs->y  = -1;
+}
+
+static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
+                                           IDWTELEM *temp,
+                                           int width, int height,
+                                           int stride_line)
+{
+    int y = cs->y;
+
+    IDWTELEM *b0 = cs->b0;
+    IDWTELEM *b1 = cs->b1;
+    IDWTELEM *b2 = slice_buffer_get_line(sb,
+                                         mirror(y + 1, height - 1) *
+                                         stride_line);
+    IDWTELEM *b3 = slice_buffer_get_line(sb,
+                                         mirror(y + 2, height - 1) *
+                                         stride_line);
+
+    if (y + 1 < (unsigned)height && y < (unsigned)height) {
+        int x;
+
+        for (x = 0; x < width; x++) {
+            b2[x] -= (b1[x] + b3[x] + 2) >> 2;
+            b1[x] += (b0[x] + b2[x])     >> 1;
+        }
+    } else {
+        if (y + 1 < (unsigned)height)
+            vertical_compose53iL0(b1, b2, b3, width);
+        if (y + 0 < (unsigned)height)
+            vertical_compose53iH0(b0, b1, b2, width);
+    }
+
+    if (y - 1 < (unsigned)height)
+        horizontal_compose53i(b0, temp, width);
+    if (y + 0 < (unsigned)height)
+        horizontal_compose53i(b1, temp, width);
+
+    cs->b0  = b2;
+    cs->b1  = b3;
+    cs->y  += 2;
+}
+
+static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer,
+                                  IDWTELEM *temp, int width, int height,
+                                  int stride)
+{
+    int y        = cs->y;
+    IDWTELEM *b0 = cs->b0;
+    IDWTELEM *b1 = cs->b1;
+    IDWTELEM *b2 = buffer + mirror(y + 1, height - 1) * stride;
+    IDWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride;
+
+    if (y + 1 < (unsigned)height)
+        vertical_compose53iL0(b1, b2, b3, width);
+    if (y + 0 < (unsigned)height)
+        vertical_compose53iH0(b0, b1, b2, width);
+
+    if (y - 1 < (unsigned)height)
+        horizontal_compose53i(b0, temp, width);
+    if (y + 0 < (unsigned)height)
+        horizontal_compose53i(b1, temp, width);
+
+    cs->b0  = b2;
+    cs->b1  = b3;
+    cs->y  += 2;
+}
+
+void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width)
+{
+    const int w2 = (width + 1) >> 1;
+    int x;
+
+    temp[0] = b[0] - ((3 * b[w2] + 2) >> 2);
+    for (x = 1; x < (width >> 1); x++) {
+        temp[2 * x]     = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3);
+        temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
+    }
+    if (width & 1) {
+        temp[2 * x]     = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2);
+        temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
+    } else
+        temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2];
+
+    b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3);
+    for (x = 2; x < width - 1; x += 2) {
+        b[x]     = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4);
+        b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
+    }
+    if (width & 1) {
+        b[x]     = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3);
+        b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
+    } else
+        b[x - 1] = temp[x - 1] + 3 * b[x - 2];
+}
+
+static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                  int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
+}
+
+static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                  int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
+}
+
+static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                  int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS;
+}
+
+static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                  int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++)
+        b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
+}
+
+void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                 IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
+                                 int width)
+{
+    int i;
+
+    for (i = 0; i < width; i++) {
+        b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS;
+        b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS;
+        b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS;
+        b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
+    }
+}
+
+static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb,
+                                             int height, int stride_line)
+{
+    cs->b0 = slice_buffer_get_line(sb, mirror(-3 - 1, height - 1) * stride_line);
+    cs->b1 = slice_buffer_get_line(sb, mirror(-3,     height - 1) * stride_line);
+    cs->b2 = slice_buffer_get_line(sb, mirror(-3 + 1, height - 1) * stride_line);
+    cs->b3 = slice_buffer_get_line(sb, mirror(-3 + 2, height - 1) * stride_line);
+    cs->y  = -3;
+}
+
+static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height,
+                                    int stride)
+{
+    cs->b0 = buffer + mirror(-3 - 1, height - 1) * stride;
+    cs->b1 = buffer + mirror(-3,     height - 1) * stride;
+    cs->b2 = buffer + mirror(-3 + 1, height - 1) * stride;
+    cs->b3 = buffer + mirror(-3 + 2, height - 1) * stride;
+    cs->y  = -3;
+}
+
+static void spatial_compose97i_dy_buffered(SnowDWTContext *dsp, DWTCompose *cs,
+                                           slice_buffer * sb, IDWTELEM *temp,
+                                           int width, int height,
+                                           int stride_line)
+{
+    int y = cs->y;
+
+    IDWTELEM *b0 = cs->b0;
+    IDWTELEM *b1 = cs->b1;
+    IDWTELEM *b2 = cs->b2;
+    IDWTELEM *b3 = cs->b3;
+    IDWTELEM *b4 = slice_buffer_get_line(sb,
+                                         mirror(y + 3, height - 1) *
+                                         stride_line);
+    IDWTELEM *b5 = slice_buffer_get_line(sb,
+                                         mirror(y + 4, height - 1) *
+                                         stride_line);
+
+    if (y > 0 && y + 4 < height) {
+        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
+    } else {
+        if (y + 3 < (unsigned)height)
+            vertical_compose97iL1(b3, b4, b5, width);
+        if (y + 2 < (unsigned)height)
+            vertical_compose97iH1(b2, b3, b4, width);
+        if (y + 1 < (unsigned)height)
+            vertical_compose97iL0(b1, b2, b3, width);
+        if (y + 0 < (unsigned)height)
+            vertical_compose97iH0(b0, b1, b2, width);
+    }
+
+    if (y - 1 < (unsigned)height)
+        dsp->horizontal_compose97i(b0, temp, width);
+    if (y + 0 < (unsigned)height)
+        dsp->horizontal_compose97i(b1, temp, width);
+
+    cs->b0  = b2;
+    cs->b1  = b3;
+    cs->b2  = b4;
+    cs->b3  = b5;
+    cs->y  += 2;
+}
+
+static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer,
+                                  IDWTELEM *temp, int width, int height,
+                                  int stride)
+{
+    int y        = cs->y;
+    IDWTELEM *b0 = cs->b0;
+    IDWTELEM *b1 = cs->b1;
+    IDWTELEM *b2 = cs->b2;
+    IDWTELEM *b3 = cs->b3;
+    IDWTELEM *b4 = buffer + mirror(y + 3, height - 1) * stride;
+    IDWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride;
+
+    if (y + 3 < (unsigned)height)
+        vertical_compose97iL1(b3, b4, b5, width);
+    if (y + 2 < (unsigned)height)
+        vertical_compose97iH1(b2, b3, b4, width);
+    if (y + 1 < (unsigned)height)
+        vertical_compose97iL0(b1, b2, b3, width);
+    if (y + 0 < (unsigned)height)
+        vertical_compose97iH0(b0, b1, b2, width);
+
+    if (y - 1 < (unsigned)height)
+        ff_snow_horizontal_compose97i(b0, temp, width);
+    if (y + 0 < (unsigned)height)
+        ff_snow_horizontal_compose97i(b1, temp, width);
+
+    cs->b0  = b2;
+    cs->b1  = b3;
+    cs->b2  = b4;
+    cs->b3  = b5;
+    cs->y  += 2;
+}
+
+void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
+                                   int height, int stride_line, int type,
+                                   int decomposition_count)
+{
+    int level;
+    for (level = decomposition_count - 1; level >= 0; level--) {
+        switch (type) {
+        case DWT_97:
+            spatial_compose97i_buffered_init(cs + level, sb, height >> level,
+                                             stride_line << level);
+            break;
+        case DWT_53:
+            spatial_compose53i_buffered_init(cs + level, sb, height >> level,
+                                             stride_line << level);
+            break;
+        }
+    }
+}
+
+void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs,
+                                    slice_buffer *slice_buf, IDWTELEM *temp,
+                                    int width, int height, int stride_line,
+                                    int type, int decomposition_count, int y)
+{
+    const int support = type == 1 ? 3 : 5;
+    int level;
+    if (type == 2)
+        return;
+
+    for (level = decomposition_count - 1; level >= 0; level--)
+        while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
+            switch (type) {
+            case DWT_97:
+                spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp,
+                                               width >> level,
+                                               height >> level,
+                                               stride_line << level);
+                break;
+            case DWT_53:
+                spatial_compose53i_dy_buffered(cs + level, slice_buf, temp,
+                                               width >> level,
+                                               height >> level,
+                                               stride_line << level);
+                break;
+            }
+        }
+}
+
+static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width,
+                                 int height, int stride, int type,
+                                 int decomposition_count)
+{
+    int level;
+    for (level = decomposition_count - 1; level >= 0; level--) {
+        switch (type) {
+        case DWT_97:
+            spatial_compose97i_init(cs + level, buffer, height >> level,
+                                    stride << level);
+            break;
+        case DWT_53:
+            spatial_compose53i_init(cs + level, buffer, height >> level,
+                                    stride << level);
+            break;
+        }
+    }
+}
+
+static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer,
+                                  IDWTELEM *temp, int width, int height,
+                                  int stride, int type,
+                                  int decomposition_count, int y)
+{
+    const int support = type == 1 ? 3 : 5;
+    int level;
+    if (type == 2)
+        return;
+
+    for (level = decomposition_count - 1; level >= 0; level--)
+        while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
+            switch (type) {
+            case DWT_97:
+                spatial_compose97i_dy(cs + level, buffer, temp, width >> level,
+                                      height >> level, stride << level);
+                break;
+            case DWT_53:
+                spatial_compose53i_dy(cs + level, buffer, temp, width >> level,
+                                      height >> level, stride << level);
+                break;
+            }
+        }
+}
+
+void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
+                     int stride, int type, int decomposition_count)
+{
+    DWTCompose cs[MAX_DECOMPOSITIONS];
+    int y;
+    ff_spatial_idwt_init(cs, buffer, width, height, stride, type,
+                         decomposition_count);
+    for (y = 0; y < height; y += 4)
+        ff_spatial_idwt_slice(cs, buffer, temp, width, height, stride, type,
+                              decomposition_count, y);
+}
+
+static inline int w_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size,
+                      int w, int h, int type)
+{
+    int s, i, j;
+    const int dec_count = w == 8 ? 3 : 4;
+    int tmp[32 * 32], tmp2[32];
+    int level, ori;
+    static const int scale[2][2][4][4] = {
+        {
+            { // 9/7 8x8 dec=3
+                { 268, 239, 239, 213 },
+                { 0,   224, 224, 152 },
+                { 0,   135, 135, 110 },
+            },
+            { // 9/7 16x16 or 32x32 dec=4
+                { 344, 310, 310, 280 },
+                { 0,   320, 320, 228 },
+                { 0,   175, 175, 136 },
+                { 0,   129, 129, 102 },
+            }
+        },
+        {
+            { // 5/3 8x8 dec=3
+                { 275, 245, 245, 218 },
+                { 0,   230, 230, 156 },
+                { 0,   138, 138, 113 },
+            },
+            { // 5/3 16x16 or 32x32 dec=4
+                { 352, 317, 317, 286 },
+                { 0,   328, 328, 233 },
+                { 0,   180, 180, 140 },
+                { 0,   132, 132, 105 },
+            }
+        }
+    };
+
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j += 4) {
+            tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4;
+            tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4;
+            tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4;
+            tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4;
+        }
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+
+    ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count);
+
+    s = 0;
+    av_assert1(w == h);
+    for (level = 0; level < dec_count; level++)
+        for (ori = level ? 1 : 0; ori < 4; ori++) {
+            int size   = w >> (dec_count - level);
+            int sx     = (ori & 1) ? size : 0;
+            int stride = 32 << (dec_count - level);
+            int sy     = (ori & 2) ? stride >> 1 : 0;
+
+            for (i = 0; i < size; i++)
+                for (j = 0; j < size; j++) {
+                    int v = tmp[sx + sy + i * stride + j] *
+                            scale[type][dec_count - 3][level][ori];
+                    s += FFABS(v);
+                }
+        }
+    av_assert1(s >= 0);
+    return s >> 9;
+}
+
+static int w53_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    return w_c(v, pix1, pix2, line_size, 8, h, 1);
+}
+
+static int w97_8_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    return w_c(v, pix1, pix2, line_size, 8, h, 0);
+}
+
+static int w53_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    return w_c(v, pix1, pix2, line_size, 16, h, 1);
+}
+
+static int w97_16_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    return w_c(v, pix1, pix2, line_size, 16, h, 0);
+}
+
+int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    return w_c(v, pix1, pix2, line_size, 32, h, 1);
+}
+
+int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    return w_c(v, pix1, pix2, line_size, 32, h, 0);
+}
+
+void ff_dsputil_init_dwt(MECmpContext *c)
+{
+    c->w53[0] = w53_16_c;
+    c->w53[1] = w53_8_c;
+    c->w97[0] = w97_16_c;
+    c->w97[1] = w97_8_c;
+}
+
+void ff_dwt_init(SnowDWTContext *c)
+{
+    c->vertical_compose97i   = ff_snow_vertical_compose97i;
+    c->horizontal_compose97i = ff_snow_horizontal_compose97i;
+    c->inner_add_yblock      = ff_snow_inner_add_yblock;
+
+    if (HAVE_MMX)
+        ff_dwt_init_x86(c);
+}
+
+
diff --git a/libavcodec/snow_dwt.h b/libavcodec/snow_dwt.h
new file mode 100644
index 0000000..e929189
--- /dev/null
+++ b/libavcodec/snow_dwt.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SNOW_DWT_H
+#define AVCODEC_SNOW_DWT_H
+
+#include <stdint.h>
+
+typedef int DWTELEM;
+typedef short IDWTELEM;
+
+#define MAX_DECOMPOSITIONS 8
+
+typedef struct DWTCompose {
+    IDWTELEM *b0;
+    IDWTELEM *b1;
+    IDWTELEM *b2;
+    IDWTELEM *b3;
+    int y;
+} DWTCompose;
+
+/** Used to minimize the amount of memory used in order to
+ *  optimize cache performance. **/
+typedef struct slice_buffer_s {
+    IDWTELEM **line;   ///< For use by idwt and predict_slices.
+    IDWTELEM **data_stack;   ///< Used for internal purposes.
+    int data_stack_top;
+    int line_count;
+    int line_width;
+    int data_count;
+    IDWTELEM *base_buffer;  ///< Buffer that this structure is caching.
+} slice_buffer;
+
+struct SnowDWTContext;
+
+typedef struct SnowDWTContext {
+    void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
+                                int width);
+    void (*horizontal_compose97i)(IDWTELEM *b, IDWTELEM *temp, int width);
+    void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride,
+                             uint8_t **block, int b_w, int b_h, int src_x,
+                             int src_y, int src_stride, slice_buffer *sb,
+                             int add, uint8_t *dst8);
+} SnowDWTContext;
+
+
+#define DWT_97 0
+#define DWT_53 1
+
+#define liftS lift
+#define W_AM 3
+#define W_AO 0
+#define W_AS 1
+
+#undef liftS
+#define W_BM 1
+#define W_BO 8
+#define W_BS 4
+
+#define W_CM 1
+#define W_CO 0
+#define W_CS 0
+
+#define W_DM 3
+#define W_DO 4
+#define W_DS 3
+
+#define slice_buffer_get_line(slice_buf, line_num)                          \
+    ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num]              \
+                                 : ff_slice_buffer_load_line((slice_buf),   \
+                                                             (line_num)))
+
+int ff_slice_buffer_init(slice_buffer *buf, int line_count,
+                         int max_allocated_lines, int line_width,
+                         IDWTELEM *base_buffer);
+void ff_slice_buffer_release(slice_buffer *buf, int line);
+void ff_slice_buffer_flush(slice_buffer *buf);
+void ff_slice_buffer_destroy(slice_buffer *buf);
+IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line);
+
+void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+                                 IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
+                                 int width);
+void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width);
+void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride,
+                              uint8_t **block, int b_w, int b_h, int src_x,
+                              int src_y, int src_stride, slice_buffer *sb,
+                              int add, uint8_t *dst8);
+
+int ff_w53_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+int ff_w97_32_c(struct MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
+
+void ff_spatial_dwt(int *buffer, int *temp, int width, int height, int stride,
+                    int type, int decomposition_count);
+
+void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
+                                   int height, int stride_line, int type,
+                                   int decomposition_count);
+void ff_spatial_idwt_buffered_slice(SnowDWTContext *dsp, DWTCompose *cs,
+                                    slice_buffer *slice_buf, IDWTELEM *temp,
+                                    int width, int height, int stride_line,
+                                    int type, int decomposition_count, int y);
+void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
+                     int stride, int type, int decomposition_count);
+
+void ff_dwt_init(SnowDWTContext *c);
+void ff_dwt_init_x86(SnowDWTContext *c);
+
+#endif /* AVCODEC_DWT_H */
diff --git a/libavcodec/snowdata.h b/libavcodec/snowdata.h
new file mode 100644
index 0000000..490fdf8
--- /dev/null
+++ b/libavcodec/snowdata.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SNOWDATA_H
+#define AVCODEC_SNOWDATA_H
+
+#include "snow.h"
+
+static const uint8_t obmc32[1024]={
+  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
+  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
+  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
+  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
+  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
+  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
+  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
+  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
+  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
+  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
+  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
+  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
+  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
+  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
+  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
+  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
+  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
+  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
+  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
+  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
+  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
+  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
+  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
+  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
+  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
+  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
+  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
+  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
+  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
+  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
+  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
+  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
+ //error:0.000020
+};
+static const uint8_t obmc16[256]={
+  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
+  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
+  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
+  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
+  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
+ 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
+ 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
+ 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
+ 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
+ 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
+ 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
+  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
+  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
+  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
+  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
+  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
+//error:0.000015
+};
+
+//linear *64
+static const uint8_t obmc8[64]={
+  4, 12, 20, 28, 28, 20, 12,  4,
+ 12, 36, 60, 84, 84, 60, 36, 12,
+ 20, 60,100,140,140,100, 60, 20,
+ 28, 84,140,196,196,140, 84, 28,
+ 28, 84,140,196,196,140, 84, 28,
+ 20, 60,100,140,140,100, 60, 20,
+ 12, 36, 60, 84, 84, 60, 36, 12,
+  4, 12, 20, 28, 28, 20, 12,  4,
+//error:0.000000
+};
+
+//linear *64
+static const uint8_t obmc4[16]={
+ 16, 48, 48, 16,
+ 48,144,144, 48,
+ 48,144,144, 48,
+ 16, 48, 48, 16,
+//error:0.000000
+};
+
+const int8_t ff_quant3bA[256]={
+ 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+ 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
+};
+
+const uint8_t * const ff_obmc_tab[4]= {
+    obmc32, obmc16, obmc8, obmc4
+};
+
+/* runtime generated tables */
+uint8_t ff_qexp[QROOT];
+int ff_scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
+
+
+#endif /* AVCODEC_SNOW_H */
diff --git a/libavcodec/snowdec.c b/libavcodec/snowdec.c
new file mode 100644
index 0000000..327157b
--- /dev/null
+++ b/libavcodec/snowdec.c
@@ -0,0 +1,606 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intmath.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "snow_dwt.h"
+#include "internal.h"
+#include "snow.h"
+
+#include "rangecoder.h"
+#include "mathops.h"
+
+#include "mpegvideo.h"
+#include "h263.h"
+
+static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
+    Plane *p= &s->plane[plane_index];
+    const int mb_w= s->b_width  << s->block_max_depth;
+    const int mb_h= s->b_height << s->block_max_depth;
+    int x, y, mb_x;
+    int block_size = MB_SIZE >> s->block_max_depth;
+    int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+    int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *dst8= s->current_picture->data[plane_index];
+    int w= p->width;
+    int h= p->height;
+
+    if(s->keyframe || (s->avctx->debug&512)){
+        if(mb_y==mb_h)
+            return;
+
+        if(add){
+            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
+//                DWTELEM * line = slice_buffer_get_line(sb, y);
+                IDWTELEM * line = sb->line[y];
+                for(x=0; x<w; x++){
+//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
+                    v >>= FRAC_BITS;
+                    if(v&(~255)) v= ~(v>>31);
+                    dst8[x + y*ref_stride]= v;
+                }
+            }
+        }else{
+            for(y=block_h*mb_y; y<FFMIN(h,block_h*(mb_y+1)); y++){
+//                DWTELEM * line = slice_buffer_get_line(sb, y);
+                IDWTELEM * line = sb->line[y];
+                for(x=0; x<w; x++){
+                    line[x] -= 128 << FRAC_BITS;
+//                    buf[x + y*w]-= 128<<FRAC_BITS;
+                }
+            }
+        }
+
+        return;
+    }
+
+    for(mb_x=0; mb_x<=mb_w; mb_x++){
+        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
+                   block_w*mb_x - block_w/2,
+                   block_h*mb_y - block_h/2,
+                   block_w, block_h,
+                   w, h,
+                   w, ref_stride, obmc_stride,
+                   mb_x - 1, mb_y - 1,
+                   add, 0, plane_index);
+    }
+}
+
+static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
+    const int w= b->width;
+    int y;
+    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
+    int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
+    int new_index = 0;
+
+    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
+        qadd= 0;
+        qmul= 1<<QEXPSHIFT;
+    }
+
+    /* If we are on the second or later slice, restore our index. */
+    if (start_y != 0)
+        new_index = save_state[0];
+
+
+    for(y=start_y; y<h; y++){
+        int x = 0;
+        int v;
+        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
+        memset(line, 0, b->width*sizeof(IDWTELEM));
+        v = b->x_coeff[new_index].coeff;
+        x = b->x_coeff[new_index++].x;
+        while(x < w){
+            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
+            register int u= -(v&1);
+            line[x] = (t^u) - u;
+
+            v = b->x_coeff[new_index].coeff;
+            x = b->x_coeff[new_index++].x;
+        }
+    }
+
+    /* Save our variables for the next slice. */
+    save_state[0] = new_index;
+
+    return;
+}
+
+static int decode_q_branch(SnowContext *s, int level, int x, int y){
+    const int w= s->b_width << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    int trx= (x+1)<<rem_depth;
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+    int res;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
+        return 0;
+    }
+
+    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
+        int type, mx, my;
+        int l = left->color[0];
+        int cb= left->color[1];
+        int cr= left->color[2];
+        int ref = 0;
+        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
+        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
+
+        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
+
+        if(type){
+            pred_mv(s, &mx, &my, 0, left, top, tr);
+            l += get_symbol(&s->c, &s->block_state[32], 1);
+            if (s->nb_planes > 2) {
+                cb+= get_symbol(&s->c, &s->block_state[64], 1);
+                cr+= get_symbol(&s->c, &s->block_state[96], 1);
+            }
+        }else{
+            if(s->ref_frames > 1)
+                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
+            if (ref >= s->ref_frames) {
+                av_log(s->avctx, AV_LOG_ERROR, "Invalid ref\n");
+                return AVERROR_INVALIDDATA;
+            }
+            pred_mv(s, &mx, &my, ref, left, top, tr);
+            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
+            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
+        }
+        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
+    }else{
+        if ((res = decode_q_branch(s, level+1, 2*x+0, 2*y+0)) < 0 ||
+            (res = decode_q_branch(s, level+1, 2*x+1, 2*y+0)) < 0 ||
+            (res = decode_q_branch(s, level+1, 2*x+0, 2*y+1)) < 0 ||
+            (res = decode_q_branch(s, level+1, 2*x+1, 2*y+1)) < 0)
+            return res;
+    }
+    return 0;
+}
+
+static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
+    const int w= b->width;
+    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
+    int x,y;
+
+    if(s->qlog == LOSSLESS_QLOG) return;
+
+    for(y=start_y; y<end_y; y++){
+//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
+        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+        for(x=0; x<w; x++){
+            int i= line[x];
+            if(i<0){
+                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
+            }else if(i>0){
+                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
+            }
+        }
+    }
+}
+
+static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
+    const int w= b->width;
+    int x,y;
+
+    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
+    IDWTELEM * prev;
+
+    if (start_y != 0)
+        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+
+    for(y=start_y; y<end_y; y++){
+        prev = line;
+//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
+        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
+        for(x=0; x<w; x++){
+            if(x){
+                if(use_median){
+                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
+                    else  line[x] += line[x - 1];
+                }else{
+                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
+                    else  line[x] += line[x - 1];
+                }
+            }else{
+                if(y) line[x] += prev[x];
+            }
+        }
+    }
+}
+
+static void decode_qlogs(SnowContext *s){
+    int plane_index, level, orientation;
+
+    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1:0; orientation<4; orientation++){
+                int q;
+                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
+                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
+                else                    q= get_symbol(&s->c, s->header_state, 1);
+                s->plane[plane_index].band[level][orientation].qlog= q;
+            }
+        }
+    }
+}
+
+#define GET_S(dst, check) \
+    tmp= get_symbol(&s->c, s->header_state, 0);\
+    if(!(check)){\
+        av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
+        return AVERROR_INVALIDDATA;\
+    }\
+    dst= tmp;
+
+static int decode_header(SnowContext *s){
+    int plane_index, tmp;
+    uint8_t kstate[32];
+
+    memset(kstate, MID_STATE, sizeof(kstate));
+
+    s->keyframe= get_rac(&s->c, kstate);
+    if(s->keyframe || s->always_reset){
+        ff_snow_reset_contexts(s);
+        s->spatial_decomposition_type=
+        s->qlog=
+        s->qbias=
+        s->mv_scale=
+        s->block_max_depth= 0;
+    }
+    if(s->keyframe){
+        GET_S(s->version, tmp <= 0U)
+        s->always_reset= get_rac(&s->c, s->header_state);
+        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
+        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
+        GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
+        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
+        if (s->colorspace_type == 1) {
+            s->avctx->pix_fmt= AV_PIX_FMT_GRAY8;
+            s->nb_planes = 1;
+        } else if(s->colorspace_type == 0) {
+            s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
+            s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
+
+            if(s->chroma_h_shift == 1 && s->chroma_v_shift==1){
+                s->avctx->pix_fmt= AV_PIX_FMT_YUV420P;
+            }else if(s->chroma_h_shift == 0 && s->chroma_v_shift==0){
+                s->avctx->pix_fmt= AV_PIX_FMT_YUV444P;
+            }else if(s->chroma_h_shift == 2 && s->chroma_v_shift==2){
+                s->avctx->pix_fmt= AV_PIX_FMT_YUV410P;
+            } else {
+                av_log(s, AV_LOG_ERROR, "unsupported color subsample mode %d %d\n", s->chroma_h_shift, s->chroma_v_shift);
+                s->chroma_h_shift = s->chroma_v_shift = 1;
+                s->avctx->pix_fmt= AV_PIX_FMT_YUV420P;
+                return AVERROR_INVALIDDATA;
+            }
+            s->nb_planes = 3;
+        } else {
+            av_log(s, AV_LOG_ERROR, "unsupported color space\n");
+            s->chroma_h_shift = s->chroma_v_shift = 1;
+            s->avctx->pix_fmt= AV_PIX_FMT_YUV420P;
+            return AVERROR_INVALIDDATA;
+        }
+
+
+        s->spatial_scalability= get_rac(&s->c, s->header_state);
+//        s->rate_scalability= get_rac(&s->c, s->header_state);
+        GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
+        s->max_ref_frames++;
+
+        decode_qlogs(s);
+    }
+
+    if(!s->keyframe){
+        if(get_rac(&s->c, s->header_state)){
+            for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
+                int htaps, i, sum=0;
+                Plane *p= &s->plane[plane_index];
+                p->diag_mc= get_rac(&s->c, s->header_state);
+                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
+                if((unsigned)htaps > HTAPS_MAX || htaps==0)
+                    return AVERROR_INVALIDDATA;
+                p->htaps= htaps;
+                for(i= htaps/2; i; i--){
+                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
+                    sum += p->hcoeff[i];
+                }
+                p->hcoeff[0]= 32-sum;
+            }
+            s->plane[2].diag_mc= s->plane[1].diag_mc;
+            s->plane[2].htaps  = s->plane[1].htaps;
+            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
+        }
+        if(get_rac(&s->c, s->header_state)){
+            GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
+            decode_qlogs(s);
+        }
+    }
+
+    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
+    if(s->spatial_decomposition_type > 1U){
+        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported\n", s->spatial_decomposition_type);
+        return AVERROR_INVALIDDATA;
+    }
+    if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
+             s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 1){
+        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size\n", s->spatial_decomposition_count);
+        return AVERROR_INVALIDDATA;
+    }
+
+
+    s->qlog           += get_symbol(&s->c, s->header_state, 1);
+    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
+    s->qbias          += get_symbol(&s->c, s->header_state, 1);
+    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
+    if(s->block_max_depth > 1 || s->block_max_depth < 0){
+        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large\n", s->block_max_depth);
+        s->block_max_depth= 0;
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    int ret;
+
+    if ((ret = ff_snow_common_init(avctx)) < 0) {
+        ff_snow_common_end(avctx->priv_data);
+        return ret;
+    }
+
+    return 0;
+}
+
+static int decode_blocks(SnowContext *s){
+    int x, y;
+    int w= s->b_width;
+    int h= s->b_height;
+    int res;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            if ((res = decode_q_branch(s, 0, x, y)) < 0)
+                return res;
+        }
+    }
+    return 0;
+}
+
+static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
+                        AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    SnowContext *s = avctx->priv_data;
+    RangeCoder * const c= &s->c;
+    int bytes_read;
+    AVFrame *picture = data;
+    int level, orientation, plane_index;
+    int res;
+
+    ff_init_range_decoder(c, buf, buf_size);
+    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+
+    s->current_picture->pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
+    if ((res = decode_header(s)) < 0)
+        return res;
+    if ((res=ff_snow_common_init_after_header(avctx)) < 0)
+        return res;
+
+    // realloc slice buffer for the case that spatial_decomposition_count changed
+    ff_slice_buffer_destroy(&s->sb);
+    if ((res = ff_slice_buffer_init(&s->sb, s->plane[0].height,
+                                    (MB_SIZE >> s->block_max_depth) +
+                                    s->spatial_decomposition_count * 11 + 1,
+                                    s->plane[0].width,
+                                    s->spatial_idwt_buffer)) < 0)
+        return res;
+
+    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
+        Plane *p= &s->plane[plane_index];
+        p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
+                                              && p->hcoeff[1]==-10
+                                              && p->hcoeff[2]==2;
+    }
+
+    ff_snow_alloc_blocks(s);
+
+    if((res = ff_snow_frame_start(s)) < 0)
+        return res;
+    //keyframe flag duplication mess FIXME
+    if(avctx->debug&FF_DEBUG_PICT_INFO)
+        av_log(avctx, AV_LOG_ERROR,
+               "keyframe:%d qlog:%d qbias: %d mvscale: %d "
+               "decomposition_type:%d decomposition_count:%d\n",
+               s->keyframe, s->qlog, s->qbias, s->mv_scale,
+               s->spatial_decomposition_type,
+               s->spatial_decomposition_count
+              );
+
+    if ((res = decode_blocks(s)) < 0)
+        return res;
+
+    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
+        Plane *p= &s->plane[plane_index];
+        int w= p->width;
+        int h= p->height;
+        int x, y;
+        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
+
+        if(s->avctx->debug&2048){
+            memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
+            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
+
+            for(y=0; y<h; y++){
+                for(x=0; x<w; x++){
+                    int v= s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x];
+                    s->mconly_picture->data[plane_index][y*s->mconly_picture->linesize[plane_index] + x]= v;
+                }
+            }
+        }
+
+        {
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                SubBand *b= &p->band[level][orientation];
+                unpack_coeffs(s, b, b->parent, orientation);
+            }
+        }
+        }
+
+        {
+        const int mb_h= s->b_height << s->block_max_depth;
+        const int block_size = MB_SIZE >> s->block_max_depth;
+        const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+        int mb_y;
+        DWTCompose cs[MAX_DECOMPOSITIONS];
+        int yd=0, yq=0;
+        int y;
+        int end_y;
+
+        ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
+        for(mb_y=0; mb_y<=mb_h; mb_y++){
+
+            int slice_starty = block_h*mb_y;
+            int slice_h = block_h*(mb_y+1);
+
+            if (!(s->keyframe || s->avctx->debug&512)){
+                slice_starty = FFMAX(0, slice_starty - (block_h >> 1));
+                slice_h -= (block_h >> 1);
+            }
+
+            for(level=0; level<s->spatial_decomposition_count; level++){
+                for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                    SubBand *b= &p->band[level][orientation];
+                    int start_y;
+                    int end_y;
+                    int our_mb_start = mb_y;
+                    int our_mb_end = (mb_y + 1);
+                    const int extra= 3;
+                    start_y = (mb_y ? ((block_h * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
+                    end_y = (((block_h * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
+                    if (!(s->keyframe || s->avctx->debug&512)){
+                        start_y = FFMAX(0, start_y - (block_h >> (1+s->spatial_decomposition_count - level)));
+                        end_y = FFMAX(0, end_y - (block_h >> (1+s->spatial_decomposition_count - level)));
+                    }
+                    start_y = FFMIN(b->height, start_y);
+                    end_y = FFMIN(b->height, end_y);
+
+                    if (start_y != end_y){
+                        if (orientation == 0){
+                            SubBand * correlate_band = &p->band[0][0];
+                            int correlate_end_y = FFMIN(b->height, end_y + 1);
+                            int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
+                            decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
+                            correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
+                            dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
+                        }
+                        else
+                            decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
+                    }
+                }
+            }
+
+            for(; yd<slice_h; yd+=4){
+                ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, s->temp_idwt_buffer, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
+            }
+
+            if(s->qlog == LOSSLESS_QLOG){
+                for(; yq<slice_h && yq<h; yq++){
+                    IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
+                    for(x=0; x<w; x++){
+                        line[x] <<= FRAC_BITS;
+                    }
+                }
+            }
+
+            predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
+
+            y = FFMIN(p->height, slice_starty);
+            end_y = FFMIN(p->height, slice_h);
+            while(y < end_y)
+                ff_slice_buffer_release(&s->sb, y++);
+        }
+
+        ff_slice_buffer_flush(&s->sb);
+        }
+
+    }
+
+    emms_c();
+
+    ff_snow_release_buffer(avctx);
+
+    if(!(s->avctx->debug&2048))
+        res = av_frame_ref(picture, s->current_picture);
+    else
+        res = av_frame_ref(picture, s->mconly_picture);
+
+    if (res < 0)
+        return res;
+
+    *got_frame = 1;
+
+    bytes_read= c->bytestream - c->bytestream_start;
+    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
+
+    return bytes_read;
+}
+
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    SnowContext *s = avctx->priv_data;
+
+    ff_slice_buffer_destroy(&s->sb);
+
+    ff_snow_common_end(s);
+
+    return 0;
+}
+
+AVCodec ff_snow_decoder = {
+    .name           = "snow",
+    .long_name      = NULL_IF_CONFIG_SMALL("Snow"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_SNOW,
+    .priv_data_size = sizeof(SnowContext),
+    .init           = decode_init,
+    .close          = decode_end,
+    .decode         = decode_frame,
+    .capabilities   = CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
+};
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
new file mode 100644
index 0000000..0fae0a6
--- /dev/null
+++ b/libavcodec/snowenc.c
@@ -0,0 +1,2025 @@
+/*
+ * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intmath.h"
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "snow_dwt.h"
+#include "snow.h"
+
+#include "rangecoder.h"
+#include "mathops.h"
+
+#include "mpegvideo.h"
+#include "h263.h"
+
+static av_cold int encode_init(AVCodecContext *avctx)
+{
+    SnowContext *s = avctx->priv_data;
+    int plane_index, ret;
+    int i;
+
+    if(avctx->prediction_method == DWT_97
+       && (avctx->flags & CODEC_FLAG_QSCALE)
+       && avctx->global_quality == 0){
+        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
+        return -1;
+    }
+
+    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
+
+    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
+    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        s->plane[plane_index].diag_mc= 1;
+        s->plane[plane_index].htaps= 6;
+        s->plane[plane_index].hcoeff[0]=  40;
+        s->plane[plane_index].hcoeff[1]= -10;
+        s->plane[plane_index].hcoeff[2]=   2;
+        s->plane[plane_index].fast_mc= 1;
+    }
+
+    if ((ret = ff_snow_common_init(avctx)) < 0) {
+        ff_snow_common_end(avctx->priv_data);
+        return ret;
+    }
+    ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
+
+    ff_snow_alloc_blocks(s);
+
+    s->version=0;
+
+    s->m.avctx   = avctx;
+    s->m.flags   = avctx->flags;
+    s->m.bit_rate= avctx->bit_rate;
+
+    s->m.me.temp      =
+    s->m.me.scratchpad= av_mallocz_array((avctx->width+64), 2*16*2*sizeof(uint8_t));
+    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
+    if (!s->m.me.scratchpad || !s->m.me.map || !s->m.me.score_map || !s->m.obmc_scratchpad)
+        return AVERROR(ENOMEM);
+
+    ff_h263_encode_init(&s->m); //mv_penalty
+
+    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
+
+    if(avctx->flags&CODEC_FLAG_PASS1){
+        if(!avctx->stats_out)
+            avctx->stats_out = av_mallocz(256);
+
+        if (!avctx->stats_out)
+            return AVERROR(ENOMEM);
+    }
+    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
+        if(ff_rate_control_init(&s->m) < 0)
+            return -1;
+    }
+    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
+
+    switch(avctx->pix_fmt){
+    case AV_PIX_FMT_YUV444P:
+//    case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV420P:
+//    case AV_PIX_FMT_YUV411P:
+    case AV_PIX_FMT_YUV410P:
+        s->nb_planes = 3;
+        s->colorspace_type= 0;
+        break;
+    case AV_PIX_FMT_GRAY8:
+        s->nb_planes = 1;
+        s->colorspace_type = 1;
+        break;
+/*    case AV_PIX_FMT_RGB32:
+        s->colorspace= 1;
+        break;*/
+    default:
+        av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
+        return -1;
+    }
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+
+    ff_set_cmp(&s->mecc, s->mecc.me_cmp, s->avctx->me_cmp);
+    ff_set_cmp(&s->mecc, s->mecc.me_sub_cmp, s->avctx->me_sub_cmp);
+
+    s->input_picture = av_frame_alloc();
+    if (!s->input_picture)
+        return AVERROR(ENOMEM);
+
+    if ((ret = ff_snow_get_buffer(s, s->input_picture)) < 0)
+        return ret;
+
+    if(s->avctx->me_method == ME_ITER){
+        int size= s->b_width * s->b_height << 2*s->block_max_depth;
+        for(i=0; i<s->max_ref_frames; i++){
+            s->ref_mvs[i]= av_mallocz_array(size, sizeof(int16_t[2]));
+            s->ref_scores[i]= av_mallocz_array(size, sizeof(uint32_t));
+            if (!s->ref_mvs[i] || !s->ref_scores[i])
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
+//near copy & paste from dsputil, FIXME
+static int pix_sum(uint8_t * pix, int line_size, int w, int h)
+{
+    int s, i, j;
+
+    s = 0;
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j++) {
+            s += pix[0];
+            pix ++;
+        }
+        pix += line_size - w;
+    }
+    return s;
+}
+
+//near copy & paste from dsputil, FIXME
+static int pix_norm1(uint8_t * pix, int line_size, int w)
+{
+    int s, i, j;
+    uint32_t *sq = ff_square_tab + 256;
+
+    s = 0;
+    for (i = 0; i < w; i++) {
+        for (j = 0; j < w; j ++) {
+            s += sq[pix[0]];
+            pix ++;
+        }
+        pix += line_size - w;
+    }
+    return s;
+}
+
+static inline int get_penalty_factor(int lambda, int lambda2, int type){
+    switch(type&0xFF){
+    default:
+    case FF_CMP_SAD:
+        return lambda>>FF_LAMBDA_SHIFT;
+    case FF_CMP_DCT:
+        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
+    case FF_CMP_W53:
+        return (4*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_W97:
+        return (2*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_SATD:
+    case FF_CMP_DCT264:
+        return (2*lambda)>>FF_LAMBDA_SHIFT;
+    case FF_CMP_RD:
+    case FF_CMP_PSNR:
+    case FF_CMP_SSE:
+    case FF_CMP_NSSE:
+        return lambda2>>FF_LAMBDA_SHIFT;
+    case FF_CMP_BIT:
+        return 1;
+    }
+}
+
+//FIXME copy&paste
+#define P_LEFT P[1]
+#define P_TOP P[2]
+#define P_TOPRIGHT P[3]
+#define P_MEDIAN P[4]
+#define P_MV1 P[9]
+#define FLAG_QPEL   1 //must be 1
+
+static int encode_q_branch(SnowContext *s, int level, int x, int y){
+    uint8_t p_buffer[1024];
+    uint8_t i_buffer[1024];
+    uint8_t p_state[sizeof(s->block_state)];
+    uint8_t i_state[sizeof(s->block_state)];
+    RangeCoder pc, ic;
+    uint8_t *pbbak= s->c.bytestream;
+    uint8_t *pbbak_start= s->c.bytestream_start;
+    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
+    const int w= s->b_width  << s->block_max_depth;
+    const int h= s->b_height << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    const int block_w= 1<<(LOG2_MB_SIZE - level);
+    int trx= (x+1)<<rem_depth;
+    int try= (y+1)<<rem_depth;
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
+    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int pl = left->color[0];
+    int pcb= left->color[1];
+    int pcr= left->color[2];
+    int pmx, pmy;
+    int mx=0, my=0;
+    int l,cr,cb;
+    const int stride= s->current_picture->linesize[0];
+    const int uvstride= s->current_picture->linesize[1];
+    uint8_t *current_data[3]= { s->input_picture->data[0] + (x + y*  stride)*block_w,
+                                s->input_picture->data[1] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift),
+                                s->input_picture->data[2] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift)};
+    int P[10][2];
+    int16_t last_mv[3][2];
+    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
+    const int shift= 1+qpel;
+    MotionEstContext *c= &s->m.me;
+    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
+    int my_context= av_log2(2*FFABS(left->my - top->my));
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+    int ref, best_ref, ref_score, ref_mx, ref_my;
+
+    av_assert0(sizeof(s->block_state) >= 256);
+    if(s->keyframe){
+        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
+        return 0;
+    }
+
+//    clip predictors / edge ?
+
+    P_LEFT[0]= left->mx;
+    P_LEFT[1]= left->my;
+    P_TOP [0]= top->mx;
+    P_TOP [1]= top->my;
+    P_TOPRIGHT[0]= tr->mx;
+    P_TOPRIGHT[1]= tr->my;
+
+    last_mv[0][0]= s->block[index].mx;
+    last_mv[0][1]= s->block[index].my;
+    last_mv[1][0]= right->mx;
+    last_mv[1][1]= right->my;
+    last_mv[2][0]= bottom->mx;
+    last_mv[2][1]= bottom->my;
+
+    s->m.mb_stride=2;
+    s->m.mb_x=
+    s->m.mb_y= 0;
+    c->skip= 0;
+
+    av_assert1(c->  stride ==   stride);
+    av_assert1(c->uvstride == uvstride);
+
+    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
+    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
+    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
+    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
+
+    c->xmin = - x*block_w - 16+3;
+    c->ymin = - y*block_w - 16+3;
+    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
+    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
+
+    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
+    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
+    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
+    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
+    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
+    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
+    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
+
+    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
+    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
+
+    if (!y) {
+        c->pred_x= P_LEFT[0];
+        c->pred_y= P_LEFT[1];
+    } else {
+        c->pred_x = P_MEDIAN[0];
+        c->pred_y = P_MEDIAN[1];
+    }
+
+    score= INT_MAX;
+    best_ref= 0;
+    for(ref=0; ref<s->ref_frames; ref++){
+        init_ref(c, current_data, s->last_picture[ref]->data, NULL, block_w*x, block_w*y, 0);
+
+        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
+                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
+
+        av_assert2(ref_mx >= c->xmin);
+        av_assert2(ref_mx <= c->xmax);
+        av_assert2(ref_my >= c->ymin);
+        av_assert2(ref_my <= c->ymax);
+
+        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
+        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
+        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
+        if(s->ref_mvs[ref]){
+            s->ref_mvs[ref][index][0]= ref_mx;
+            s->ref_mvs[ref][index][1]= ref_my;
+            s->ref_scores[ref][index]= ref_score;
+        }
+        if(score > ref_score){
+            score= ref_score;
+            best_ref= ref;
+            mx= ref_mx;
+            my= ref_my;
+        }
+    }
+    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
+
+  //  subpel search
+    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
+    pc= s->c;
+    pc.bytestream_start=
+    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
+    memcpy(p_state, s->block_state, sizeof(s->block_state));
+
+    if(level!=s->block_max_depth)
+        put_rac(&pc, &p_state[4 + s_context], 1);
+    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
+    if(s->ref_frames > 1)
+        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
+    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
+    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
+    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
+    p_len= pc.bytestream - pc.bytestream_start;
+    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
+
+    block_s= block_w*block_w;
+    sum = pix_sum(current_data[0], stride, block_w, block_w);
+    l= (sum + block_s/2)/block_s;
+    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
+
+    if (s->nb_planes > 2) {
+        block_s= block_w*block_w>>(s->chroma_h_shift + s->chroma_v_shift);
+        sum = pix_sum(current_data[1], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
+        cb= (sum + block_s/2)/block_s;
+    //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
+        sum = pix_sum(current_data[2], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
+        cr= (sum + block_s/2)/block_s;
+    //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
+    }else
+        cb = cr = 0;
+
+    ic= s->c;
+    ic.bytestream_start=
+    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
+    memcpy(i_state, s->block_state, sizeof(s->block_state));
+    if(level!=s->block_max_depth)
+        put_rac(&ic, &i_state[4 + s_context], 1);
+    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
+    put_symbol(&ic, &i_state[32],  l-pl , 1);
+    if (s->nb_planes > 2) {
+        put_symbol(&ic, &i_state[64], cb-pcb, 1);
+        put_symbol(&ic, &i_state[96], cr-pcr, 1);
+    }
+    i_len= ic.bytestream - ic.bytestream_start;
+    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
+
+//    assert(score==256*256*256*64-1);
+    av_assert1(iscore < 255*255*256 + s->lambda2*10);
+    av_assert1(iscore >= 0);
+    av_assert1(l>=0 && l<=255);
+    av_assert1(pl>=0 && pl<=255);
+
+    if(level==0){
+        int varc= iscore >> 8;
+        int vard= score >> 8;
+        if (vard <= 64 || vard < varc)
+            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
+        else
+            c->scene_change_score+= s->m.qscale;
+    }
+
+    if(level!=s->block_max_depth){
+        put_rac(&s->c, &s->block_state[4 + s_context], 0);
+        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
+        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
+        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
+        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
+        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
+
+        if(score2 < score && score2 < iscore)
+            return score2;
+    }
+
+    if(iscore < score){
+        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
+        memcpy(pbbak, i_buffer, i_len);
+        s->c= ic;
+        s->c.bytestream_start= pbbak_start;
+        s->c.bytestream= pbbak + i_len;
+        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
+        memcpy(s->block_state, i_state, sizeof(s->block_state));
+        return iscore;
+    }else{
+        memcpy(pbbak, p_buffer, p_len);
+        s->c= pc;
+        s->c.bytestream_start= pbbak_start;
+        s->c.bytestream= pbbak + p_len;
+        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
+        memcpy(s->block_state, p_state, sizeof(s->block_state));
+        return score;
+    }
+}
+
+static void encode_q_branch2(SnowContext *s, int level, int x, int y){
+    const int w= s->b_width  << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    int trx= (x+1)<<rem_depth;
+    BlockNode *b= &s->block[index];
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int pl = left->color[0];
+    int pcb= left->color[1];
+    int pcr= left->color[2];
+    int pmx, pmy;
+    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
+    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
+    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
+        return;
+    }
+
+    if(level!=s->block_max_depth){
+        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
+            put_rac(&s->c, &s->block_state[4 + s_context], 1);
+        }else{
+            put_rac(&s->c, &s->block_state[4 + s_context], 0);
+            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
+            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
+            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
+            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
+            return;
+        }
+    }
+    if(b->type & BLOCK_INTRA){
+        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
+        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
+        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
+        if (s->nb_planes > 2) {
+            put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
+            put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
+        }
+        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
+    }else{
+        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
+        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
+        if(s->ref_frames > 1)
+            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
+        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
+        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
+        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
+    }
+}
+
+static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
+    int i, x2, y2;
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    const int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *src= s-> input_picture->data[plane_index];
+    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int index= mb_x + mb_y*b_stride;
+    BlockNode *b= &s->block[index];
+    BlockNode backup= *b;
+    int ab=0;
+    int aa=0;
+
+    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc stuff above
+
+    b->type|= BLOCK_INTRA;
+    b->color[plane_index]= 0;
+    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
+
+    for(i=0; i<4; i++){
+        int mb_x2= mb_x + (i &1) - 1;
+        int mb_y2= mb_y + (i>>1) - 1;
+        int x= block_w*mb_x2 + block_w/2;
+        int y= block_h*mb_y2 + block_h/2;
+
+        add_yblock(s, 0, NULL, dst + (i&1)*block_w + (i>>1)*obmc_stride*block_h, NULL, obmc,
+                    x, y, block_w, block_h, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
+
+        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_h); y2++){
+            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
+                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_h*mb_y - block_h/2))*obmc_stride;
+                int obmc_v= obmc[index];
+                int d;
+                if(y<0) obmc_v += obmc[index + block_h*obmc_stride];
+                if(x<0) obmc_v += obmc[index + block_w];
+                if(y+block_h>h) obmc_v += obmc[index - block_h*obmc_stride];
+                if(x+block_w>w) obmc_v += obmc[index - block_w];
+                //FIXME precalculate this or simplify it somehow else
+
+                d = -dst[index] + (1<<(FRAC_BITS-1));
+                dst[index] = d;
+                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
+                aa += obmc_v * obmc_v; //FIXME precalculate this
+            }
+        }
+    }
+    *b= backup;
+
+    return av_clip( ROUNDED_DIV(ab<<LOG2_OBMC_MAX, aa), 0, 255); //FIXME we should not need clipping
+}
+
+static inline int get_block_bits(SnowContext *s, int x, int y, int w){
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    int index= x + y*b_stride;
+    const BlockNode *b     = &s->block[index];
+    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
+    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
+    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
+    int dmx, dmy;
+//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
+//  int my_context= av_log2(2*FFABS(left->my - top->my));
+
+    if(x<0 || x>=b_stride || y>=b_height)
+        return 0;
+/*
+1            0      0
+01X          1-2    1
+001XX        3-6    2-3
+0001XXX      7-14   4-7
+00001XXXX   15-30   8-15
+*/
+//FIXME try accurate rate
+//FIXME intra and inter predictors if surrounding blocks are not the same type
+    if(b->type & BLOCK_INTRA){
+        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
+                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
+                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
+    }else{
+        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
+        dmx-= b->mx;
+        dmy-= b->my;
+        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
+                    + av_log2(2*FFABS(dmy))
+                    + av_log2(2*b->ref));
+    }
+}
+
+static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, uint8_t (*obmc_edged)[MB_SIZE * 2]){
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    const int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *dst= s->current_picture->data[plane_index];
+    uint8_t *src= s->  input_picture->data[plane_index];
+    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
+    uint8_t *cur = s->scratchbuf;
+    uint8_t *tmp = s->emu_edge_buffer;
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int distortion;
+    int rate= 0;
+    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+    int sx= block_w*mb_x - block_w/2;
+    int sy= block_h*mb_y - block_h/2;
+    int x0= FFMAX(0,-sx);
+    int y0= FFMAX(0,-sy);
+    int x1= FFMIN(block_w*2, w-sx);
+    int y1= FFMIN(block_h*2, h-sy);
+    int i,x,y;
+
+    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below chckinhg only block_w
+
+    ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_h*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
+
+    for(y=y0; y<y1; y++){
+        const uint8_t *obmc1= obmc_edged[y];
+        const IDWTELEM *pred1 = pred + y*obmc_stride;
+        uint8_t *cur1 = cur + y*ref_stride;
+        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
+        for(x=x0; x<x1; x++){
+#if FRAC_BITS >= LOG2_OBMC_MAX
+            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
+#else
+            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
+#endif
+            v = (v + pred1[x]) >> FRAC_BITS;
+            if(v&(~255)) v= ~(v>>31);
+            dst1[x] = v;
+        }
+    }
+
+    /* copy the regions where obmc[] = (uint8_t)256 */
+    if(LOG2_OBMC_MAX == 8
+        && (mb_x == 0 || mb_x == b_stride-1)
+        && (mb_y == 0 || mb_y == b_height-1)){
+        if(mb_x == 0)
+            x1 = block_w;
+        else
+            x0 = block_w;
+        if(mb_y == 0)
+            y1 = block_h;
+        else
+            y0 = block_h;
+        for(y=y0; y<y1; y++)
+            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
+    }
+
+    if(block_w==16){
+        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
+        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
+        /* FIXME cmps overlap but do not cover the wavelet's whole support.
+         * So improving the score of one block is not strictly guaranteed
+         * to improve the score of the whole frame, thus iterative motion
+         * estimation does not always converge. */
+        if(s->avctx->me_cmp == FF_CMP_W97)
+            distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
+        else if(s->avctx->me_cmp == FF_CMP_W53)
+            distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
+        else{
+            distortion = 0;
+            for(i=0; i<4; i++){
+                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
+                distortion += s->mecc.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
+            }
+        }
+    }else{
+        av_assert2(block_w==8);
+        distortion = s->mecc.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
+    }
+
+    if(plane_index==0){
+        for(i=0; i<4; i++){
+/* ..RRr
+ * .RXx.
+ * rxx..
+ */
+            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
+        }
+        if(mb_x == b_stride-2)
+            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
+    }
+    return distortion + rate*penalty_factor;
+}
+
+static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
+    int i, y2;
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
+    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
+    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
+    const int ref_stride= s->current_picture->linesize[plane_index];
+    uint8_t *dst= s->current_picture->data[plane_index];
+    uint8_t *src= s-> input_picture->data[plane_index];
+    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
+    // const has only been removed from zero_dst to suppress a warning
+    static IDWTELEM zero_dst[4096]; //FIXME
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int distortion= 0;
+    int rate= 0;
+    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+
+    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below
+
+    for(i=0; i<9; i++){
+        int mb_x2= mb_x + (i%3) - 1;
+        int mb_y2= mb_y + (i/3) - 1;
+        int x= block_w*mb_x2 + block_w/2;
+        int y= block_h*mb_y2 + block_h/2;
+
+        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
+                   x, y, block_w, block_h, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
+
+        //FIXME find a cleaner/simpler way to skip the outside stuff
+        for(y2= y; y2<0; y2++)
+            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
+        for(y2= h; y2<y+block_h; y2++)
+            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
+        if(x<0){
+            for(y2= y; y2<y+block_h; y2++)
+                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
+        }
+        if(x+block_w > w){
+            for(y2= y; y2<y+block_h; y2++)
+                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
+        }
+
+        av_assert1(block_w== 8 || block_w==16);
+        distortion += s->mecc.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_h);
+    }
+
+    if(plane_index==0){
+        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
+        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
+
+/* ..RRRr
+ * .RXXx.
+ * .RXXx.
+ * rxxx.
+ */
+        if(merged)
+            rate = get_block_bits(s, mb_x, mb_y, 2);
+        for(i=merged?4:0; i<9; i++){
+            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
+            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
+        }
+    }
+    return distortion + rate*penalty_factor;
+}
+
+static int encode_subband_c0run(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){
+    const int w= b->width;
+    const int h= b->height;
+    int x, y;
+
+    if(1){
+        int run=0;
+        int *runs = s->run_buffer;
+        int run_index=0;
+        int max_index;
+
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int v, p=0;
+                int /*ll=0, */l=0, lt=0, t=0, rt=0;
+                v= src[x + y*stride];
+
+                if(y){
+                    t= src[x + (y-1)*stride];
+                    if(x){
+                        lt= src[x - 1 + (y-1)*stride];
+                    }
+                    if(x + 1 < w){
+                        rt= src[x + 1 + (y-1)*stride];
+                    }
+                }
+                if(x){
+                    l= src[x - 1 + y*stride];
+                    /*if(x > 1){
+                        if(orientation==1) ll= src[y + (x-2)*stride];
+                        else               ll= src[x - 2 + y*stride];
+                    }*/
+                }
+                if(parent){
+                    int px= x>>1;
+                    int py= y>>1;
+                    if(px<b->parent->width && py<b->parent->height)
+                        p= parent[px + py*2*stride];
+                }
+                if(!(/*ll|*/l|lt|t|rt|p)){
+                    if(v){
+                        runs[run_index++]= run;
+                        run=0;
+                    }else{
+                        run++;
+                    }
+                }
+            }
+        }
+        max_index= run_index;
+        runs[run_index++]= run;
+        run_index=0;
+        run= runs[run_index++];
+
+        put_symbol2(&s->c, b->state[30], max_index, 0);
+        if(run_index <= max_index)
+            put_symbol2(&s->c, b->state[1], run, 3);
+
+        for(y=0; y<h; y++){
+            if(s->c.bytestream_end - s->c.bytestream < w*40){
+                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+                return -1;
+            }
+            for(x=0; x<w; x++){
+                int v, p=0;
+                int /*ll=0, */l=0, lt=0, t=0, rt=0;
+                v= src[x + y*stride];
+
+                if(y){
+                    t= src[x + (y-1)*stride];
+                    if(x){
+                        lt= src[x - 1 + (y-1)*stride];
+                    }
+                    if(x + 1 < w){
+                        rt= src[x + 1 + (y-1)*stride];
+                    }
+                }
+                if(x){
+                    l= src[x - 1 + y*stride];
+                    /*if(x > 1){
+                        if(orientation==1) ll= src[y + (x-2)*stride];
+                        else               ll= src[x - 2 + y*stride];
+                    }*/
+                }
+                if(parent){
+                    int px= x>>1;
+                    int py= y>>1;
+                    if(px<b->parent->width && py<b->parent->height)
+                        p= parent[px + py*2*stride];
+                }
+                if(/*ll|*/l|lt|t|rt|p){
+                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
+
+                    put_rac(&s->c, &b->state[0][context], !!v);
+                }else{
+                    if(!run){
+                        run= runs[run_index++];
+
+                        if(run_index <= max_index)
+                            put_symbol2(&s->c, b->state[1], run, 3);
+                        av_assert2(v);
+                    }else{
+                        run--;
+                        av_assert2(!v);
+                    }
+                }
+                if(v){
+                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
+                    int l2= 2*FFABS(l) + (l<0);
+                    int t2= 2*FFABS(t) + (t<0);
+
+                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
+                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + ff_quant3bA[l2&0xFF] + 3*ff_quant3bA[t2&0xFF]], v<0);
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+static int encode_subband(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){
+//    encode_subband_qtree(s, b, src, parent, stride, orientation);
+//    encode_subband_z0run(s, b, src, parent, stride, orientation);
+    return encode_subband_c0run(s, b, src, parent, stride, orientation);
+//    encode_subband_dzr(s, b, src, parent, stride, orientation);
+}
+
+static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
+    const int b_stride= s->b_width << s->block_max_depth;
+    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
+    BlockNode backup= *block;
+    unsigned value;
+    int rd, index;
+
+    av_assert2(mb_x>=0 && mb_y>=0);
+    av_assert2(mb_x<b_stride);
+
+    if(intra){
+        block->color[0] = p[0];
+        block->color[1] = p[1];
+        block->color[2] = p[2];
+        block->type |= BLOCK_INTRA;
+    }else{
+        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
+        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
+        if(s->me_cache[index] == value)
+            return 0;
+        s->me_cache[index]= value;
+
+        block->mx= p[0];
+        block->my= p[1];
+        block->type &= ~BLOCK_INTRA;
+    }
+
+    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
+
+//FIXME chroma
+    if(rd < *best_rd){
+        *best_rd= rd;
+        return 1;
+    }else{
+        *block= backup;
+        return 0;
+    }
+}
+
+/* special case for int[2] args we discard afterwards,
+ * fixes compilation problem with gcc 2.95 */
+static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
+    int p[2] = {p0, p1};
+    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
+}
+
+static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
+    const int b_stride= s->b_width << s->block_max_depth;
+    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
+    BlockNode backup[4];
+    unsigned value;
+    int rd, index;
+
+    /* We don't initialize backup[] during variable declaration, because
+     * that fails to compile on MSVC: "cannot convert from 'BlockNode' to
+     * 'int16_t'". */
+    backup[0] = block[0];
+    backup[1] = block[1];
+    backup[2] = block[b_stride];
+    backup[3] = block[b_stride + 1];
+
+    av_assert2(mb_x>=0 && mb_y>=0);
+    av_assert2(mb_x<b_stride);
+    av_assert2(((mb_x|mb_y)&1) == 0);
+
+    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
+    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
+    if(s->me_cache[index] == value)
+        return 0;
+    s->me_cache[index]= value;
+
+    block->mx= p0;
+    block->my= p1;
+    block->ref= ref;
+    block->type &= ~BLOCK_INTRA;
+    block[1]= block[b_stride]= block[b_stride+1]= *block;
+
+    rd= get_4block_rd(s, mb_x, mb_y, 0);
+
+//FIXME chroma
+    if(rd < *best_rd){
+        *best_rd= rd;
+        return 1;
+    }else{
+        block[0]= backup[0];
+        block[1]= backup[1];
+        block[b_stride]= backup[2];
+        block[b_stride+1]= backup[3];
+        return 0;
+    }
+}
+
+static void iterative_me(SnowContext *s){
+    int pass, mb_x, mb_y;
+    const int b_width = s->b_width  << s->block_max_depth;
+    const int b_height= s->b_height << s->block_max_depth;
+    const int b_stride= b_width;
+    int color[3];
+
+    {
+        RangeCoder r = s->c;
+        uint8_t state[sizeof(s->block_state)];
+        memcpy(state, s->block_state, sizeof(s->block_state));
+        for(mb_y= 0; mb_y<s->b_height; mb_y++)
+            for(mb_x= 0; mb_x<s->b_width; mb_x++)
+                encode_q_branch(s, 0, mb_x, mb_y);
+        s->c = r;
+        memcpy(s->block_state, state, sizeof(s->block_state));
+    }
+
+    for(pass=0; pass<25; pass++){
+        int change= 0;
+
+        for(mb_y= 0; mb_y<b_height; mb_y++){
+            for(mb_x= 0; mb_x<b_width; mb_x++){
+                int dia_change, i, j, ref;
+                int best_rd= INT_MAX, ref_rd;
+                BlockNode backup, ref_b;
+                const int index= mb_x + mb_y * b_stride;
+                BlockNode *block= &s->block[index];
+                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
+                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
+                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
+                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
+                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
+                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
+                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
+                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
+                const int b_w= (MB_SIZE >> s->block_max_depth);
+                uint8_t obmc_edged[MB_SIZE * 2][MB_SIZE * 2];
+
+                if(pass && (block->type & BLOCK_OPT))
+                    continue;
+                block->type |= BLOCK_OPT;
+
+                backup= *block;
+
+                if(!s->me_cache_generation)
+                    memset(s->me_cache, 0, sizeof(s->me_cache));
+                s->me_cache_generation += 1<<22;
+
+                //FIXME precalculate
+                {
+                    int x, y;
+                    for (y = 0; y < b_w * 2; y++)
+                        memcpy(obmc_edged[y], ff_obmc_tab[s->block_max_depth] + y * b_w * 2, b_w * 2);
+                    if(mb_x==0)
+                        for(y=0; y<b_w*2; y++)
+                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
+                    if(mb_x==b_stride-1)
+                        for(y=0; y<b_w*2; y++)
+                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
+                    if(mb_y==0){
+                        for(x=0; x<b_w*2; x++)
+                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
+                        for(y=1; y<b_w; y++)
+                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
+                    }
+                    if(mb_y==b_height-1){
+                        for(x=0; x<b_w*2; x++)
+                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
+                        for(y=b_w; y<b_w*2-1; y++)
+                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
+                    }
+                }
+
+                //skip stuff outside the picture
+                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
+                    uint8_t *src= s->  input_picture->data[0];
+                    uint8_t *dst= s->current_picture->data[0];
+                    const int stride= s->current_picture->linesize[0];
+                    const int block_w= MB_SIZE >> s->block_max_depth;
+                    const int block_h= MB_SIZE >> s->block_max_depth;
+                    const int sx= block_w*mb_x - block_w/2;
+                    const int sy= block_h*mb_y - block_h/2;
+                    const int w= s->plane[0].width;
+                    const int h= s->plane[0].height;
+                    int y;
+
+                    for(y=sy; y<0; y++)
+                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
+                    for(y=h; y<sy+block_h*2; y++)
+                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
+                    if(sx<0){
+                        for(y=sy; y<sy+block_h*2; y++)
+                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
+                    }
+                    if(sx+block_w*2 > w){
+                        for(y=sy; y<sy+block_h*2; y++)
+                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
+                    }
+                }
+
+                // intra(black) = neighbors' contribution to the current block
+                for(i=0; i < s->nb_planes; i++)
+                    color[i]= get_dc(s, mb_x, mb_y, i);
+
+                // get previous score (cannot be cached due to OBMC)
+                if(pass > 0 && (block->type&BLOCK_INTRA)){
+                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
+                    check_block(s, mb_x, mb_y, color0, 1, obmc_edged, &best_rd);
+                }else
+                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, obmc_edged, &best_rd);
+
+                ref_b= *block;
+                ref_rd= best_rd;
+                for(ref=0; ref < s->ref_frames; ref++){
+                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
+                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
+                        continue;
+                    block->ref= ref;
+                    best_rd= INT_MAX;
+
+                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], obmc_edged, &best_rd);
+                    check_block_inter(s, mb_x, mb_y, 0, 0, obmc_edged, &best_rd);
+                    if(tb)
+                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], obmc_edged, &best_rd);
+                    if(lb)
+                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], obmc_edged, &best_rd);
+                    if(rb)
+                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], obmc_edged, &best_rd);
+                    if(bb)
+                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], obmc_edged, &best_rd);
+
+                    /* fullpel ME */
+                    //FIXME avoid subpel interpolation / round to nearest integer
+                    do{
+                        dia_change=0;
+                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
+                            for(j=0; j<i; j++){
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), obmc_edged, &best_rd);
+                            }
+                        }
+                    }while(dia_change);
+                    /* subpel ME */
+                    do{
+                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
+                        dia_change=0;
+                        for(i=0; i<8; i++)
+                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], obmc_edged, &best_rd);
+                    }while(dia_change);
+                    //FIXME or try the standard 2 pass qpel or similar
+
+                    mvr[0][0]= block->mx;
+                    mvr[0][1]= block->my;
+                    if(ref_rd > best_rd){
+                        ref_rd= best_rd;
+                        ref_b= *block;
+                    }
+                }
+                best_rd= ref_rd;
+                *block= ref_b;
+                check_block(s, mb_x, mb_y, color, 1, obmc_edged, &best_rd);
+                //FIXME RD style color selection
+                if(!same_block(block, &backup)){
+                    if(tb ) tb ->type &= ~BLOCK_OPT;
+                    if(lb ) lb ->type &= ~BLOCK_OPT;
+                    if(rb ) rb ->type &= ~BLOCK_OPT;
+                    if(bb ) bb ->type &= ~BLOCK_OPT;
+                    if(tlb) tlb->type &= ~BLOCK_OPT;
+                    if(trb) trb->type &= ~BLOCK_OPT;
+                    if(blb) blb->type &= ~BLOCK_OPT;
+                    if(brb) brb->type &= ~BLOCK_OPT;
+                    change ++;
+                }
+            }
+        }
+        av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
+        if(!change)
+            break;
+    }
+
+    if(s->block_max_depth == 1){
+        int change= 0;
+        for(mb_y= 0; mb_y<b_height; mb_y+=2){
+            for(mb_x= 0; mb_x<b_width; mb_x+=2){
+                int i;
+                int best_rd, init_rd;
+                const int index= mb_x + mb_y * b_stride;
+                BlockNode *b[4];
+
+                b[0]= &s->block[index];
+                b[1]= b[0]+1;
+                b[2]= b[0]+b_stride;
+                b[3]= b[2]+1;
+                if(same_block(b[0], b[1]) &&
+                   same_block(b[0], b[2]) &&
+                   same_block(b[0], b[3]))
+                    continue;
+
+                if(!s->me_cache_generation)
+                    memset(s->me_cache, 0, sizeof(s->me_cache));
+                s->me_cache_generation += 1<<22;
+
+                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
+
+                //FIXME more multiref search?
+                check_4block_inter(s, mb_x, mb_y,
+                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
+                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
+
+                for(i=0; i<4; i++)
+                    if(!(b[i]->type&BLOCK_INTRA))
+                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
+
+                if(init_rd != best_rd)
+                    change++;
+            }
+        }
+        av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
+    }
+}
+
+static void encode_blocks(SnowContext *s, int search){
+    int x, y;
+    int w= s->b_width;
+    int h= s->b_height;
+
+    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
+        iterative_me(s);
+
+    for(y=0; y<h; y++){
+        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
+            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
+            return;
+        }
+        for(x=0; x<w; x++){
+            if(s->avctx->me_method == ME_ITER || !search)
+                encode_q_branch2(s, 0, x, y);
+            else
+                encode_q_branch (s, 0, x, y);
+        }
+    }
+}
+
+static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
+    const int w= b->width;
+    const int h= b->height;
+    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qmul= ff_qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
+    int x,y, thres1, thres2;
+
+    if(s->qlog == LOSSLESS_QLOG){
+        for(y=0; y<h; y++)
+            for(x=0; x<w; x++)
+                dst[x + y*stride]= src[x + y*stride];
+        return;
+    }
+
+    bias= bias ? 0 : (3*qmul)>>3;
+    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
+    thres2= 2*thres1;
+
+    if(!bias){
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= src[x + y*stride];
+
+                if((unsigned)(i+thres1) > thres2){
+                    if(i>=0){
+                        i<<= QEXPSHIFT;
+                        i/= qmul; //FIXME optimize
+                        dst[x + y*stride]=  i;
+                    }else{
+                        i= -i;
+                        i<<= QEXPSHIFT;
+                        i/= qmul; //FIXME optimize
+                        dst[x + y*stride]= -i;
+                    }
+                }else
+                    dst[x + y*stride]= 0;
+            }
+        }
+    }else{
+        for(y=0; y<h; y++){
+            for(x=0; x<w; x++){
+                int i= src[x + y*stride];
+
+                if((unsigned)(i+thres1) > thres2){
+                    if(i>=0){
+                        i<<= QEXPSHIFT;
+                        i= (i + bias) / qmul; //FIXME optimize
+                        dst[x + y*stride]=  i;
+                    }else{
+                        i= -i;
+                        i<<= QEXPSHIFT;
+                        i= (i + bias) / qmul; //FIXME optimize
+                        dst[x + y*stride]= -i;
+                    }
+                }else
+                    dst[x + y*stride]= 0;
+            }
+        }
+    }
+}
+
+static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
+    const int w= b->width;
+    const int h= b->height;
+    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
+    const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
+    int x,y;
+
+    if(s->qlog == LOSSLESS_QLOG) return;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            int i= src[x + y*stride];
+            if(i<0){
+                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
+            }else if(i>0){
+                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
+            }
+        }
+    }
+}
+
+static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
+    const int w= b->width;
+    const int h= b->height;
+    int x,y;
+
+    for(y=h-1; y>=0; y--){
+        for(x=w-1; x>=0; x--){
+            int i= x + y*stride;
+
+            if(x){
+                if(use_median){
+                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
+                    else  src[i] -= src[i - 1];
+                }else{
+                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
+                    else  src[i] -= src[i - 1];
+                }
+            }else{
+                if(y) src[i] -= src[i - stride];
+            }
+        }
+    }
+}
+
+static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
+    const int w= b->width;
+    const int h= b->height;
+    int x,y;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<w; x++){
+            int i= x + y*stride;
+
+            if(x){
+                if(use_median){
+                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
+                    else  src[i] += src[i - 1];
+                }else{
+                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
+                    else  src[i] += src[i - 1];
+                }
+            }else{
+                if(y) src[i] += src[i - stride];
+            }
+        }
+    }
+}
+
+static void encode_qlogs(SnowContext *s){
+    int plane_index, level, orientation;
+
+    for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1:0; orientation<4; orientation++){
+                if(orientation==2) continue;
+                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
+            }
+        }
+    }
+}
+
+static void encode_header(SnowContext *s){
+    int plane_index, i;
+    uint8_t kstate[32];
+
+    memset(kstate, MID_STATE, sizeof(kstate));
+
+    put_rac(&s->c, kstate, s->keyframe);
+    if(s->keyframe || s->always_reset){
+        ff_snow_reset_contexts(s);
+        s->last_spatial_decomposition_type=
+        s->last_qlog=
+        s->last_qbias=
+        s->last_mv_scale=
+        s->last_block_max_depth= 0;
+        for(plane_index=0; plane_index<2; plane_index++){
+            Plane *p= &s->plane[plane_index];
+            p->last_htaps=0;
+            p->last_diag_mc=0;
+            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
+        }
+    }
+    if(s->keyframe){
+        put_symbol(&s->c, s->header_state, s->version, 0);
+        put_rac(&s->c, s->header_state, s->always_reset);
+        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
+        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
+        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
+        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
+        if (s->nb_planes > 2) {
+            put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
+            put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
+        }
+        put_rac(&s->c, s->header_state, s->spatial_scalability);
+//        put_rac(&s->c, s->header_state, s->rate_scalability);
+        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
+
+        encode_qlogs(s);
+    }
+
+    if(!s->keyframe){
+        int update_mc=0;
+        for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
+            Plane *p= &s->plane[plane_index];
+            update_mc |= p->last_htaps   != p->htaps;
+            update_mc |= p->last_diag_mc != p->diag_mc;
+            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
+        }
+        put_rac(&s->c, s->header_state, update_mc);
+        if(update_mc){
+            for(plane_index=0; plane_index<FFMIN(s->nb_planes, 2); plane_index++){
+                Plane *p= &s->plane[plane_index];
+                put_rac(&s->c, s->header_state, p->diag_mc);
+                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
+                for(i= p->htaps/2; i; i--)
+                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
+            }
+        }
+        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
+            put_rac(&s->c, s->header_state, 1);
+            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
+            encode_qlogs(s);
+        }else
+            put_rac(&s->c, s->header_state, 0);
+    }
+
+    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
+    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
+    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
+    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
+    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
+
+}
+
+static void update_last_header_values(SnowContext *s){
+    int plane_index;
+
+    if(!s->keyframe){
+        for(plane_index=0; plane_index<2; plane_index++){
+            Plane *p= &s->plane[plane_index];
+            p->last_diag_mc= p->diag_mc;
+            p->last_htaps  = p->htaps;
+            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
+        }
+    }
+
+    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
+    s->last_qlog                        = s->qlog;
+    s->last_qbias                       = s->qbias;
+    s->last_mv_scale                    = s->mv_scale;
+    s->last_block_max_depth             = s->block_max_depth;
+    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
+}
+
+static int qscale2qlog(int qscale){
+    return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
+           + 61*QROOT/8; ///< 64 > 60
+}
+
+static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
+{
+    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
+     * FIXME we know exact mv bits at this point,
+     * but ratecontrol isn't set up to include them. */
+    uint32_t coef_sum= 0;
+    int level, orientation, delta_qlog;
+
+    for(level=0; level<s->spatial_decomposition_count; level++){
+        for(orientation=level ? 1 : 0; orientation<4; orientation++){
+            SubBand *b= &s->plane[0].band[level][orientation];
+            IDWTELEM *buf= b->ibuf;
+            const int w= b->width;
+            const int h= b->height;
+            const int stride= b->stride;
+            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
+            const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
+            const int qdiv= (1<<16)/qmul;
+            int x, y;
+            //FIXME this is ugly
+            for(y=0; y<h; y++)
+                for(x=0; x<w; x++)
+                    buf[x+y*stride]= b->buf[x+y*stride];
+            if(orientation==0)
+                decorrelate(s, b, buf, stride, 1, 0);
+            for(y=0; y<h; y++)
+                for(x=0; x<w; x++)
+                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
+        }
+    }
+
+    /* ugly, ratecontrol just takes a sqrt again */
+    av_assert0(coef_sum < INT_MAX);
+    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
+
+    if(pict->pict_type == AV_PICTURE_TYPE_I){
+        s->m.current_picture.mb_var_sum= coef_sum;
+        s->m.current_picture.mc_mb_var_sum= 0;
+    }else{
+        s->m.current_picture.mc_mb_var_sum= coef_sum;
+        s->m.current_picture.mb_var_sum= 0;
+    }
+
+    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
+    if (pict->quality < 0)
+        return INT_MIN;
+    s->lambda= pict->quality * 3/2;
+    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
+    s->qlog+= delta_qlog;
+    return delta_qlog;
+}
+
+static void calculate_visual_weight(SnowContext *s, Plane *p){
+    int width = p->width;
+    int height= p->height;
+    int level, orientation, x, y;
+
+    for(level=0; level<s->spatial_decomposition_count; level++){
+        for(orientation=level ? 1 : 0; orientation<4; orientation++){
+            SubBand *b= &p->band[level][orientation];
+            IDWTELEM *ibuf= b->ibuf;
+            int64_t error=0;
+
+            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
+            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
+            ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            for(y=0; y<height; y++){
+                for(x=0; x<width; x++){
+                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
+                    error += d*d;
+                }
+            }
+
+            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
+        }
+    }
+}
+
+static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                        const AVFrame *pict, int *got_packet)
+{
+    SnowContext *s = avctx->priv_data;
+    RangeCoder * const c= &s->c;
+    AVFrame *pic = pict;
+    const int width= s->avctx->width;
+    const int height= s->avctx->height;
+    int level, orientation, plane_index, i, y, ret;
+    uint8_t rc_header_bak[sizeof(s->header_state)];
+    uint8_t rc_block_bak[sizeof(s->block_state)];
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->b_width*s->b_height*MB_SIZE*MB_SIZE*3 + FF_MIN_BUFFER_SIZE)) < 0)
+        return ret;
+
+    ff_init_range_encoder(c, pkt->data, pkt->size);
+    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+
+    for(i=0; i < s->nb_planes; i++){
+        int hshift= i ? s->chroma_h_shift : 0;
+        int vshift= i ? s->chroma_v_shift : 0;
+        for(y=0; y<(height>>vshift); y++)
+            memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]],
+                   &pict->data[i][y * pict->linesize[i]],
+                   width>>hshift);
+        s->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i],
+                                width >> hshift, height >> vshift,
+                                EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
+                                EDGE_TOP | EDGE_BOTTOM);
+
+    }
+    emms_c();
+    s->new_picture = pict;
+
+    s->m.picture_number= avctx->frame_number;
+    if(avctx->flags&CODEC_FLAG_PASS2){
+        s->m.pict_type = pic->pict_type = s->m.rc_context.entry[avctx->frame_number].new_pict_type;
+        s->keyframe = pic->pict_type == AV_PICTURE_TYPE_I;
+        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
+            pic->quality = ff_rate_estimate_qscale(&s->m, 0);
+            if (pic->quality < 0)
+                return -1;
+        }
+    }else{
+        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
+        s->m.pict_type = pic->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+    }
+
+    if(s->pass1_rc && avctx->frame_number == 0)
+        pic->quality = 2*FF_QP2LAMBDA;
+    if (pic->quality) {
+        s->qlog   = qscale2qlog(pic->quality);
+        s->lambda = pic->quality * 3/2;
+    }
+    if (s->qlog < 0 || (!pic->quality && (avctx->flags & CODEC_FLAG_QSCALE))) {
+        s->qlog= LOSSLESS_QLOG;
+        s->lambda = 0;
+    }//else keep previous frame's qlog until after motion estimation
+
+    if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) {
+        int w = s->avctx->width;
+        int h = s->avctx->height;
+
+        s->mpvencdsp.draw_edges(s->current_picture->data[0],
+                                s->current_picture->linesize[0], w   , h   ,
+                                EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
+        if (s->current_picture->data[2]) {
+            s->mpvencdsp.draw_edges(s->current_picture->data[1],
+                                    s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
+                                    EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
+            s->mpvencdsp.draw_edges(s->current_picture->data[2],
+                                    s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift,
+                                    EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM);
+        }
+    }
+
+    ff_snow_frame_start(s);
+    avctx->coded_frame= s->current_picture;
+
+    s->m.current_picture_ptr= &s->m.current_picture;
+    s->m.current_picture.f = s->current_picture;
+    s->m.current_picture.f->pts = pict->pts;
+    if(pic->pict_type == AV_PICTURE_TYPE_P){
+        int block_width = (width +15)>>4;
+        int block_height= (height+15)>>4;
+        int stride= s->current_picture->linesize[0];
+
+        av_assert0(s->current_picture->data[0]);
+        av_assert0(s->last_picture[0]->data[0]);
+
+        s->m.avctx= s->avctx;
+        s->m.   last_picture.f = s->last_picture[0];
+        s->m.    new_picture.f = s->input_picture;
+        s->m.   last_picture_ptr= &s->m.   last_picture;
+        s->m.linesize = stride;
+        s->m.uvlinesize= s->current_picture->linesize[1];
+        s->m.width = width;
+        s->m.height= height;
+        s->m.mb_width = block_width;
+        s->m.mb_height= block_height;
+        s->m.mb_stride=   s->m.mb_width+1;
+        s->m.b8_stride= 2*s->m.mb_width+1;
+        s->m.f_code=1;
+        s->m.pict_type = pic->pict_type;
+        s->m.me_method= s->avctx->me_method;
+        s->m.me.scene_change_score=0;
+        s->m.flags= s->avctx->flags;
+        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
+        s->m.out_format= FMT_H263;
+        s->m.unrestricted_mv= 1;
+
+        s->m.lambda = s->lambda;
+        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
+        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
+
+        s->m.mecc= s->mecc; //move
+        s->m.qdsp= s->qdsp; //move
+        s->m.hdsp = s->hdsp;
+        ff_init_me(&s->m);
+        s->hdsp = s->m.hdsp;
+        s->mecc= s->m.mecc;
+    }
+
+    if(s->pass1_rc){
+        memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
+        memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
+    }
+
+redo_frame:
+
+    s->spatial_decomposition_count= 5;
+
+    while(   !(width >>(s->chroma_h_shift + s->spatial_decomposition_count))
+          || !(height>>(s->chroma_v_shift + s->spatial_decomposition_count)))
+        s->spatial_decomposition_count--;
+
+    if (s->spatial_decomposition_count <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "Resolution too low\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->m.pict_type = pic->pict_type;
+    s->qbias = pic->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
+
+    ff_snow_common_init_after_header(avctx);
+
+    if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
+        for(plane_index=0; plane_index < s->nb_planes; plane_index++){
+            calculate_visual_weight(s, &s->plane[plane_index]);
+        }
+    }
+
+    encode_header(s);
+    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
+    encode_blocks(s, 1);
+    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
+
+    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
+        Plane *p= &s->plane[plane_index];
+        int w= p->width;
+        int h= p->height;
+        int x, y;
+//        int bits= put_bits_count(&s->c.pb);
+
+        if (!s->memc_only) {
+            //FIXME optimize
+            if(pict->data[plane_index]) //FIXME gray hack
+                for(y=0; y<h; y++){
+                    for(x=0; x<w; x++){
+                        s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
+                    }
+                }
+            predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
+
+            if(   plane_index==0
+               && pic->pict_type == AV_PICTURE_TYPE_P
+               && !(avctx->flags&CODEC_FLAG_PASS2)
+               && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
+                ff_init_range_encoder(c, pkt->data, pkt->size);
+                ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
+                pic->pict_type= AV_PICTURE_TYPE_I;
+                s->keyframe=1;
+                s->current_picture->key_frame=1;
+                goto redo_frame;
+            }
+
+            if(s->qlog == LOSSLESS_QLOG){
+                for(y=0; y<h; y++){
+                    for(x=0; x<w; x++){
+                        s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
+                    }
+                }
+            }else{
+                for(y=0; y<h; y++){
+                    for(x=0; x<w; x++){
+                        s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
+                    }
+                }
+            }
+
+            ff_spatial_dwt(s->spatial_dwt_buffer, s->temp_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+
+            if(s->pass1_rc && plane_index==0){
+                int delta_qlog = ratecontrol_1pass(s, pic);
+                if (delta_qlog <= INT_MIN)
+                    return -1;
+                if(delta_qlog){
+                    //reordering qlog in the bitstream would eliminate this reset
+                    ff_init_range_encoder(c, pkt->data, pkt->size);
+                    memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
+                    memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
+                    encode_header(s);
+                    encode_blocks(s, 0);
+                }
+            }
+
+            for(level=0; level<s->spatial_decomposition_count; level++){
+                for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                    SubBand *b= &p->band[level][orientation];
+
+                    quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
+                    if(orientation==0)
+                        decorrelate(s, b, b->ibuf, b->stride, pic->pict_type == AV_PICTURE_TYPE_P, 0);
+                    if (!s->no_bitstream)
+                    encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
+                    av_assert0(b->parent==NULL || b->parent->stride == b->stride*2);
+                    if(orientation==0)
+                        correlate(s, b, b->ibuf, b->stride, 1, 0);
+                }
+            }
+
+            for(level=0; level<s->spatial_decomposition_count; level++){
+                for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                    SubBand *b= &p->band[level][orientation];
+
+                    dequantize(s, b, b->ibuf, b->stride);
+                }
+            }
+
+            ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            if(s->qlog == LOSSLESS_QLOG){
+                for(y=0; y<h; y++){
+                    for(x=0; x<w; x++){
+                        s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
+                    }
+                }
+            }
+            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
+        }else{
+            //ME/MC only
+            if(pic->pict_type == AV_PICTURE_TYPE_I){
+                for(y=0; y<h; y++){
+                    for(x=0; x<w; x++){
+                        s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x]=
+                            pict->data[plane_index][y*pict->linesize[plane_index] + x];
+                    }
+                }
+            }else{
+                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
+                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
+            }
+        }
+        if(s->avctx->flags&CODEC_FLAG_PSNR){
+            int64_t error= 0;
+
+            if(pict->data[plane_index]) //FIXME gray hack
+                for(y=0; y<h; y++){
+                    for(x=0; x<w; x++){
+                        int d= s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
+                        error += d*d;
+                    }
+                }
+            s->avctx->error[plane_index] += error;
+            s->current_picture->error[plane_index] = error;
+        }
+
+    }
+
+    update_last_header_values(s);
+
+    ff_snow_release_buffer(avctx);
+
+    s->current_picture->coded_picture_number = avctx->frame_number;
+    s->current_picture->pict_type = pict->pict_type;
+    s->current_picture->quality = pict->quality;
+    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
+    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
+    s->m.current_picture.f->display_picture_number =
+    s->m.current_picture.f->coded_picture_number   = avctx->frame_number;
+    s->m.current_picture.f->quality                = pic->quality;
+    s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
+    if(s->pass1_rc)
+        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
+            return -1;
+    if(avctx->flags&CODEC_FLAG_PASS1)
+        ff_write_pass1_stats(&s->m);
+    s->m.last_pict_type = s->m.pict_type;
+    avctx->frame_bits = s->m.frame_bits;
+    avctx->mv_bits = s->m.mv_bits;
+    avctx->misc_bits = s->m.misc_bits;
+    avctx->p_tex_bits = s->m.p_tex_bits;
+
+    emms_c();
+
+    pkt->size = ff_rac_terminate(c);
+    if (avctx->coded_frame->key_frame)
+        pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+
+    return 0;
+}
+
+static av_cold int encode_end(AVCodecContext *avctx)
+{
+    SnowContext *s = avctx->priv_data;
+
+    ff_snow_common_end(s);
+    ff_rate_control_uninit(&s->m);
+    av_frame_free(&s->input_picture);
+    av_free(avctx->stats_out);
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(SnowContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "memc_only",      "Only do ME/MC (I frames -> ref, P frame -> ME+MC).",   OFFSET(memc_only), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "no_bitstream",   "Skip final bitstream writeout.",                    OFFSET(no_bitstream), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { NULL },
+};
+
+static const AVClass snowenc_class = {
+    .class_name = "snow encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_snow_encoder = {
+    .name           = "snow",
+    .long_name      = NULL_IF_CONFIG_SMALL("Snow"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_SNOW,
+    .priv_data_size = sizeof(SnowContext),
+    .init           = encode_init,
+    .encode2        = encode_frame,
+    .close          = encode_end,
+    .pix_fmts       = (const enum AVPixelFormat[]){
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_NONE
+    },
+    .priv_class     = &snowenc_class,
+};
+
+
+#ifdef TEST
+#undef malloc
+#undef free
+#undef printf
+
+#include "libavutil/lfg.h"
+#include "libavutil/mathematics.h"
+
+int main(void){
+#define width  256
+#define height 256
+    int buffer[2][width*height];
+    SnowContext s;
+    int i;
+    AVLFG prng;
+    s.spatial_decomposition_count=6;
+    s.spatial_decomposition_type=1;
+
+    s.temp_dwt_buffer  = av_mallocz(width * sizeof(DWTELEM));
+    s.temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
+
+    av_lfg_init(&prng, 1);
+
+    printf("testing 5/3 DWT\n");
+    for(i=0; i<width*height; i++)
+        buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
+
+    ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+    ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+
+    for(i=0; i<width*height; i++)
+        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
+
+    printf("testing 9/7 DWT\n");
+    s.spatial_decomposition_type=0;
+    for(i=0; i<width*height; i++)
+        buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
+
+    ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+    ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+
+    for(i=0; i<width*height; i++)
+        if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
+
+    {
+    int level, orientation, x, y;
+    int64_t errors[8][4];
+    int64_t g=0;
+
+        memset(errors, 0, sizeof(errors));
+        s.spatial_decomposition_count=3;
+        s.spatial_decomposition_type=0;
+        for(level=0; level<s.spatial_decomposition_count; level++){
+            for(orientation=level ? 1 : 0; orientation<4; orientation++){
+                int w= width  >> (s.spatial_decomposition_count-level);
+                int h= height >> (s.spatial_decomposition_count-level);
+                int stride= width  << (s.spatial_decomposition_count-level);
+                DWTELEM *buf= buffer[0];
+                int64_t error=0;
+
+                if(orientation&1) buf+=w;
+                if(orientation>1) buf+=stride>>1;
+
+                memset(buffer[0], 0, sizeof(int)*width*height);
+                buf[w/2 + h/2*stride]= 256*256;
+                ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+                for(y=0; y<height; y++){
+                    for(x=0; x<width; x++){
+                        int64_t d= buffer[0][x + y*width];
+                        error += d*d;
+                        if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
+                    }
+                    if(FFABS(height/2-y)<9 && level==2) printf("\n");
+                }
+                error= (int)(sqrt(error)+0.5);
+                errors[level][orientation]= error;
+                if(g) g=av_gcd(g, error);
+                else g= error;
+            }
+        }
+        printf("static int const visual_weight[][4]={\n");
+        for(level=0; level<s.spatial_decomposition_count; level++){
+            printf("  {");
+            for(orientation=0; orientation<4; orientation++){
+                printf("%8"PRId64",", errors[level][orientation]/g);
+            }
+            printf("},\n");
+        }
+        printf("};\n");
+        {
+            int level=2;
+            int w= width  >> (s.spatial_decomposition_count-level);
+            //int h= height >> (s.spatial_decomposition_count-level);
+            int stride= width  << (s.spatial_decomposition_count-level);
+            DWTELEM *buf= buffer[0];
+            int64_t error=0;
+
+            buf+=w;
+            buf+=stride>>1;
+
+            memset(buffer[0], 0, sizeof(int)*width*height);
+            for(y=0; y<height; y++){
+                for(x=0; x<width; x++){
+                    int tab[4]={0,2,3,1};
+                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
+                }
+            }
+            ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+            for(y=0; y<height; y++){
+                for(x=0; x<width; x++){
+                    int64_t d= buffer[0][x + y*width];
+                    error += d*d;
+                    if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
+                }
+                if(FFABS(height/2-y)<9) printf("\n");
+            }
+        }
+
+    }
+    return 0;
+}
+#endif /* TEST */
diff --git a/libavcodec/sonic.c b/libavcodec/sonic.c
new file mode 100644
index 0000000..a5e573a
--- /dev/null
+++ b/libavcodec/sonic.c
@@ -0,0 +1,1094 @@
+/*
+ * Simple free lossless/lossy audio codec
+ * Copyright (c) 2004 Alex Beregszaszi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "avcodec.h"
+#include "get_bits.h"
+#include "golomb.h"
+#include "internal.h"
+#include "rangecoder.h"
+
+
+/**
+ * @file
+ * Simple free lossless/lossy audio codec
+ * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
+ * Written and designed by Alex Beregszaszi
+ *
+ * TODO:
+ *  - CABAC put/get_symbol
+ *  - independent quantizer for channels
+ *  - >2 channels support
+ *  - more decorrelation types
+ *  - more tap_quant tests
+ *  - selectable intlist writers/readers (bonk-style, golomb, cabac)
+ */
+
+#define MAX_CHANNELS 2
+
+#define MID_SIDE 0
+#define LEFT_SIDE 1
+#define RIGHT_SIDE 2
+
+typedef struct SonicContext {
+    int version;
+    int minor_version;
+    int lossless, decorrelation;
+
+    int num_taps, downsampling;
+    double quantization;
+
+    int channels, samplerate, block_align, frame_size;
+
+    int *tap_quant;
+    int *int_samples;
+    int *coded_samples[MAX_CHANNELS];
+
+    // for encoding
+    int *tail;
+    int tail_size;
+    int *window;
+    int window_size;
+
+    // for decoding
+    int *predictor_k;
+    int *predictor_state[MAX_CHANNELS];
+} SonicContext;
+
+#define LATTICE_SHIFT   10
+#define SAMPLE_SHIFT    4
+#define LATTICE_FACTOR  (1 << LATTICE_SHIFT)
+#define SAMPLE_FACTOR   (1 << SAMPLE_SHIFT)
+
+#define BASE_QUANT      0.6
+#define RATE_VARIATION  3.0
+
+static inline int shift(int a,int b)
+{
+    return (a+(1<<(b-1))) >> b;
+}
+
+static inline int shift_down(int a,int b)
+{
+    return (a>>b)+(a<0);
+}
+
+static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
+    int i;
+
+#define put_rac(C,S,B) \
+do{\
+    if(rc_stat){\
+        rc_stat[*(S)][B]++;\
+        rc_stat2[(S)-state][B]++;\
+    }\
+    put_rac(C,S,B);\
+}while(0)
+
+    if(v){
+        const int a= FFABS(v);
+        const int e= av_log2(a);
+        put_rac(c, state+0, 0);
+        if(e<=9){
+            for(i=0; i<e; i++){
+                put_rac(c, state+1+i, 1);  //1..10
+            }
+            put_rac(c, state+1+i, 0);
+
+            for(i=e-1; i>=0; i--){
+                put_rac(c, state+22+i, (a>>i)&1); //22..31
+            }
+
+            if(is_signed)
+                put_rac(c, state+11 + e, v < 0); //11..21
+        }else{
+            for(i=0; i<e; i++){
+                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
+            }
+            put_rac(c, state+1+9, 0);
+
+            for(i=e-1; i>=0; i--){
+                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
+            }
+
+            if(is_signed)
+                put_rac(c, state+11 + 10, v < 0); //11..21
+        }
+    }else{
+        put_rac(c, state+0, 1);
+    }
+#undef put_rac
+}
+
+static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
+    if(get_rac(c, state+0))
+        return 0;
+    else{
+        int i, e, a;
+        e= 0;
+        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
+            e++;
+        }
+
+        a= 1;
+        for(i=e-1; i>=0; i--){
+            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
+        }
+
+        e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
+        return (a^e)-e;
+    }
+}
+
+#if 1
+static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
+{
+    int i;
+
+    for (i = 0; i < entries; i++)
+        put_symbol(c, state, buf[i], 1, NULL, NULL);
+
+    return 1;
+}
+
+static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
+{
+    int i;
+
+    for (i = 0; i < entries; i++)
+        buf[i] = get_symbol(c, state, 1);
+
+    return 1;
+}
+#elif 1
+static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
+{
+    int i;
+
+    for (i = 0; i < entries; i++)
+        set_se_golomb(pb, buf[i]);
+
+    return 1;
+}
+
+static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
+{
+    int i;
+
+    for (i = 0; i < entries; i++)
+        buf[i] = get_se_golomb(gb);
+
+    return 1;
+}
+
+#else
+
+#define ADAPT_LEVEL 8
+
+static int bits_to_store(uint64_t x)
+{
+    int res = 0;
+
+    while(x)
+    {
+        res++;
+        x >>= 1;
+    }
+    return res;
+}
+
+static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
+{
+    int i, bits;
+
+    if (!max)
+        return;
+
+    bits = bits_to_store(max);
+
+    for (i = 0; i < bits-1; i++)
+        put_bits(pb, 1, value & (1 << i));
+
+    if ( (value | (1 << (bits-1))) <= max)
+        put_bits(pb, 1, value & (1 << (bits-1)));
+}
+
+static unsigned int read_uint_max(GetBitContext *gb, int max)
+{
+    int i, bits, value = 0;
+
+    if (!max)
+        return 0;
+
+    bits = bits_to_store(max);
+
+    for (i = 0; i < bits-1; i++)
+        if (get_bits1(gb))
+            value += 1 << i;
+
+    if ( (value | (1<<(bits-1))) <= max)
+        if (get_bits1(gb))
+            value += 1 << (bits-1);
+
+    return value;
+}
+
+static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
+{
+    int i, j, x = 0, low_bits = 0, max = 0;
+    int step = 256, pos = 0, dominant = 0, any = 0;
+    int *copy, *bits;
+
+    copy = av_calloc(entries, sizeof(*copy));
+    if (!copy)
+        return AVERROR(ENOMEM);
+
+    if (base_2_part)
+    {
+        int energy = 0;
+
+        for (i = 0; i < entries; i++)
+            energy += abs(buf[i]);
+
+        low_bits = bits_to_store(energy / (entries * 2));
+        if (low_bits > 15)
+            low_bits = 15;
+
+        put_bits(pb, 4, low_bits);
+    }
+
+    for (i = 0; i < entries; i++)
+    {
+        put_bits(pb, low_bits, abs(buf[i]));
+        copy[i] = abs(buf[i]) >> low_bits;
+        if (copy[i] > max)
+            max = abs(copy[i]);
+    }
+
+    bits = av_calloc(entries*max, sizeof(*bits));
+    if (!bits)
+    {
+        av_free(copy);
+        return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i <= max; i++)
+    {
+        for (j = 0; j < entries; j++)
+            if (copy[j] >= i)
+                bits[x++] = copy[j] > i;
+    }
+
+    // store bitstream
+    while (pos < x)
+    {
+        int steplet = step >> 8;
+
+        if (pos + steplet > x)
+            steplet = x - pos;
+
+        for (i = 0; i < steplet; i++)
+            if (bits[i+pos] != dominant)
+                any = 1;
+
+        put_bits(pb, 1, any);
+
+        if (!any)
+        {
+            pos += steplet;
+            step += step / ADAPT_LEVEL;
+        }
+        else
+        {
+            int interloper = 0;
+
+            while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
+                interloper++;
+
+            // note change
+            write_uint_max(pb, interloper, (step >> 8) - 1);
+
+            pos += interloper + 1;
+            step -= step / ADAPT_LEVEL;
+        }
+
+        if (step < 256)
+        {
+            step = 65536 / step;
+            dominant = !dominant;
+        }
+    }
+
+    // store signs
+    for (i = 0; i < entries; i++)
+        if (buf[i])
+            put_bits(pb, 1, buf[i] < 0);
+
+    av_free(bits);
+    av_free(copy);
+
+    return 0;
+}
+
+static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
+{
+    int i, low_bits = 0, x = 0;
+    int n_zeros = 0, step = 256, dominant = 0;
+    int pos = 0, level = 0;
+    int *bits = av_calloc(entries, sizeof(*bits));
+
+    if (!bits)
+        return AVERROR(ENOMEM);
+
+    if (base_2_part)
+    {
+        low_bits = get_bits(gb, 4);
+
+        if (low_bits)
+            for (i = 0; i < entries; i++)
+                buf[i] = get_bits(gb, low_bits);
+    }
+
+//    av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
+
+    while (n_zeros < entries)
+    {
+        int steplet = step >> 8;
+
+        if (!get_bits1(gb))
+        {
+            for (i = 0; i < steplet; i++)
+                bits[x++] = dominant;
+
+            if (!dominant)
+                n_zeros += steplet;
+
+            step += step / ADAPT_LEVEL;
+        }
+        else
+        {
+            int actual_run = read_uint_max(gb, steplet-1);
+
+//            av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
+
+            for (i = 0; i < actual_run; i++)
+                bits[x++] = dominant;
+
+            bits[x++] = !dominant;
+
+            if (!dominant)
+                n_zeros += actual_run;
+            else
+                n_zeros++;
+
+            step -= step / ADAPT_LEVEL;
+        }
+
+        if (step < 256)
+        {
+            step = 65536 / step;
+            dominant = !dominant;
+        }
+    }
+
+    // reconstruct unsigned values
+    n_zeros = 0;
+    for (i = 0; n_zeros < entries; i++)
+    {
+        while(1)
+        {
+            if (pos >= entries)
+            {
+                pos = 0;
+                level += 1 << low_bits;
+            }
+
+            if (buf[pos] >= level)
+                break;
+
+            pos++;
+        }
+
+        if (bits[i])
+            buf[pos] += 1 << low_bits;
+        else
+            n_zeros++;
+
+        pos++;
+    }
+    av_free(bits);
+
+    // read signs
+    for (i = 0; i < entries; i++)
+        if (buf[i] && get_bits1(gb))
+            buf[i] = -buf[i];
+
+//    av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
+
+    return 0;
+}
+#endif
+
+static void predictor_init_state(int *k, int *state, int order)
+{
+    int i;
+
+    for (i = order-2; i >= 0; i--)
+    {
+        int j, p, x = state[i];
+
+        for (j = 0, p = i+1; p < order; j++,p++)
+            {
+            int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
+            state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
+            x = tmp;
+        }
+    }
+}
+
+static int predictor_calc_error(int *k, int *state, int order, int error)
+{
+    int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
+
+#if 1
+    int *k_ptr = &(k[order-2]),
+        *state_ptr = &(state[order-2]);
+    for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
+    {
+        int k_value = *k_ptr, state_value = *state_ptr;
+        x -= shift_down(k_value * state_value, LATTICE_SHIFT);
+        state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT);
+    }
+#else
+    for (i = order-2; i >= 0; i--)
+    {
+        x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
+        state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
+    }
+#endif
+
+    // don't drift too far, to avoid overflows
+    if (x >  (SAMPLE_FACTOR<<16)) x =  (SAMPLE_FACTOR<<16);
+    if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
+
+    state[0] = x;
+
+    return x;
+}
+
+#if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
+// Heavily modified Levinson-Durbin algorithm which
+// copes better with quantization, and calculates the
+// actual whitened result as it goes.
+
+static void modified_levinson_durbin(int *window, int window_entries,
+        int *out, int out_entries, int channels, int *tap_quant)
+{
+    int i;
+    int *state = av_calloc(window_entries, sizeof(*state));
+
+    memcpy(state, window, 4* window_entries);
+
+    for (i = 0; i < out_entries; i++)
+    {
+        int step = (i+1)*channels, k, j;
+        double xx = 0.0, xy = 0.0;
+#if 1
+        int *x_ptr = &(window[step]);
+        int *state_ptr = &(state[0]);
+        j = window_entries - step;
+        for (;j>0;j--,x_ptr++,state_ptr++)
+        {
+            double x_value = *x_ptr;
+            double state_value = *state_ptr;
+            xx += state_value*state_value;
+            xy += x_value*state_value;
+        }
+#else
+        for (j = 0; j <= (window_entries - step); j++);
+        {
+            double stepval = window[step+j];
+            double stateval = window[j];
+//            xx += (double)window[j]*(double)window[j];
+//            xy += (double)window[step+j]*(double)window[j];
+            xx += stateval*stateval;
+            xy += stepval*stateval;
+        }
+#endif
+        if (xx == 0.0)
+            k = 0;
+        else
+            k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
+
+        if (k > (LATTICE_FACTOR/tap_quant[i]))
+            k = LATTICE_FACTOR/tap_quant[i];
+        if (-k > (LATTICE_FACTOR/tap_quant[i]))
+            k = -(LATTICE_FACTOR/tap_quant[i]);
+
+        out[i] = k;
+        k *= tap_quant[i];
+
+#if 1
+        x_ptr = &(window[step]);
+        state_ptr = &(state[0]);
+        j = window_entries - step;
+        for (;j>0;j--,x_ptr++,state_ptr++)
+        {
+            int x_value = *x_ptr;
+            int state_value = *state_ptr;
+            *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
+            *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
+        }
+#else
+        for (j=0; j <= (window_entries - step); j++)
+        {
+            int stepval = window[step+j];
+            int stateval=state[j];
+            window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
+            state[j] += shift_down(k * stepval, LATTICE_SHIFT);
+        }
+#endif
+    }
+
+    av_free(state);
+}
+
+static inline int code_samplerate(int samplerate)
+{
+    switch (samplerate)
+    {
+        case 44100: return 0;
+        case 22050: return 1;
+        case 11025: return 2;
+        case 96000: return 3;
+        case 48000: return 4;
+        case 32000: return 5;
+        case 24000: return 6;
+        case 16000: return 7;
+        case 8000: return 8;
+    }
+    return AVERROR(EINVAL);
+}
+
+static av_cold int sonic_encode_init(AVCodecContext *avctx)
+{
+    SonicContext *s = avctx->priv_data;
+    PutBitContext pb;
+    int i;
+
+    s->version = 2;
+
+    if (avctx->channels > MAX_CHANNELS)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
+        return AVERROR(EINVAL); /* only stereo or mono for now */
+    }
+
+    if (avctx->channels == 2)
+        s->decorrelation = MID_SIDE;
+    else
+        s->decorrelation = 3;
+
+    if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
+    {
+        s->lossless = 1;
+        s->num_taps = 32;
+        s->downsampling = 1;
+        s->quantization = 0.0;
+    }
+    else
+    {
+        s->num_taps = 128;
+        s->downsampling = 2;
+        s->quantization = 1.0;
+    }
+
+    // max tap 2048
+    if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    // generate taps
+    s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
+    for (i = 0; i < s->num_taps; i++)
+        s->tap_quant[i] = ff_sqrt(i+1);
+
+    s->channels = avctx->channels;
+    s->samplerate = avctx->sample_rate;
+
+    s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
+    s->frame_size = s->channels*s->block_align*s->downsampling;
+
+    s->tail_size = s->num_taps*s->channels;
+    s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
+    if (!s->tail)
+        return AVERROR(ENOMEM);
+
+    s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
+    if (!s->predictor_k)
+        return AVERROR(ENOMEM);
+
+    for (i = 0; i < s->channels; i++)
+    {
+        s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
+        if (!s->coded_samples[i])
+            return AVERROR(ENOMEM);
+    }
+
+    s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
+
+    s->window_size = ((2*s->tail_size)+s->frame_size);
+    s->window = av_calloc(s->window_size, sizeof(*s->window));
+    if (!s->window)
+        return AVERROR(ENOMEM);
+
+    avctx->extradata = av_mallocz(16);
+    if (!avctx->extradata)
+        return AVERROR(ENOMEM);
+    init_put_bits(&pb, avctx->extradata, 16*8);
+
+    put_bits(&pb, 2, s->version); // version
+    if (s->version >= 1)
+    {
+        if (s->version >= 2) {
+            put_bits(&pb, 8, s->version);
+            put_bits(&pb, 8, s->minor_version);
+        }
+        put_bits(&pb, 2, s->channels);
+        put_bits(&pb, 4, code_samplerate(s->samplerate));
+    }
+    put_bits(&pb, 1, s->lossless);
+    if (!s->lossless)
+        put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
+    put_bits(&pb, 2, s->decorrelation);
+    put_bits(&pb, 2, s->downsampling);
+    put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
+    put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
+
+    flush_put_bits(&pb);
+    avctx->extradata_size = put_bits_count(&pb)/8;
+
+    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
+        s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
+
+    avctx->frame_size = s->block_align*s->downsampling;
+
+    return 0;
+}
+
+static av_cold int sonic_encode_close(AVCodecContext *avctx)
+{
+    SonicContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < s->channels; i++)
+        av_freep(&s->coded_samples[i]);
+
+    av_freep(&s->predictor_k);
+    av_freep(&s->tail);
+    av_freep(&s->tap_quant);
+    av_freep(&s->window);
+    av_freep(&s->int_samples);
+
+    return 0;
+}
+
+static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                              const AVFrame *frame, int *got_packet_ptr)
+{
+    SonicContext *s = avctx->priv_data;
+    RangeCoder c;
+    int i, j, ch, quant = 0, x = 0;
+    int ret;
+    const short *samples = (const int16_t*)frame->data[0];
+    uint8_t state[32];
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
+        return ret;
+
+    ff_init_range_encoder(&c, avpkt->data, avpkt->size);
+    ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
+    memset(state, 128, sizeof(state));
+
+    // short -> internal
+    for (i = 0; i < s->frame_size; i++)
+        s->int_samples[i] = samples[i];
+
+    if (!s->lossless)
+        for (i = 0; i < s->frame_size; i++)
+            s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
+
+    switch(s->decorrelation)
+    {
+        case MID_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+            {
+                s->int_samples[i] += s->int_samples[i+1];
+                s->int_samples[i+1] -= shift(s->int_samples[i], 1);
+            }
+            break;
+        case LEFT_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+                s->int_samples[i+1] -= s->int_samples[i];
+            break;
+        case RIGHT_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+                s->int_samples[i] -= s->int_samples[i+1];
+            break;
+    }
+
+    memset(s->window, 0, 4* s->window_size);
+
+    for (i = 0; i < s->tail_size; i++)
+        s->window[x++] = s->tail[i];
+
+    for (i = 0; i < s->frame_size; i++)
+        s->window[x++] = s->int_samples[i];
+
+    for (i = 0; i < s->tail_size; i++)
+        s->window[x++] = 0;
+
+    for (i = 0; i < s->tail_size; i++)
+        s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
+
+    // generate taps
+    modified_levinson_durbin(s->window, s->window_size,
+                s->predictor_k, s->num_taps, s->channels, s->tap_quant);
+    if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
+        return ret;
+
+    for (ch = 0; ch < s->channels; ch++)
+    {
+        x = s->tail_size+ch;
+        for (i = 0; i < s->block_align; i++)
+        {
+            int sum = 0;
+            for (j = 0; j < s->downsampling; j++, x += s->channels)
+                sum += s->window[x];
+            s->coded_samples[ch][i] = sum;
+        }
+    }
+
+    // simple rate control code
+    if (!s->lossless)
+    {
+        double energy1 = 0.0, energy2 = 0.0;
+        for (ch = 0; ch < s->channels; ch++)
+        {
+            for (i = 0; i < s->block_align; i++)
+            {
+                double sample = s->coded_samples[ch][i];
+                energy2 += sample*sample;
+                energy1 += fabs(sample);
+            }
+        }
+
+        energy2 = sqrt(energy2/(s->channels*s->block_align));
+        energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
+
+        // increase bitrate when samples are like a gaussian distribution
+        // reduce bitrate when samples are like a two-tailed exponential distribution
+
+        if (energy2 > energy1)
+            energy2 += (energy2-energy1)*RATE_VARIATION;
+
+        quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
+//        av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
+
+        quant = av_clip(quant, 1, 65534);
+
+        put_symbol(&c, state, quant, 0, NULL, NULL);
+
+        quant *= SAMPLE_FACTOR;
+    }
+
+    // write out coded samples
+    for (ch = 0; ch < s->channels; ch++)
+    {
+        if (!s->lossless)
+            for (i = 0; i < s->block_align; i++)
+                s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
+
+        if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
+            return ret;
+    }
+
+//    av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
+
+    avpkt->size = ff_rac_terminate(&c);
+    *got_packet_ptr = 1;
+    return 0;
+
+}
+#endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
+
+#if CONFIG_SONIC_DECODER
+static const int samplerate_table[] =
+    { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
+
+static av_cold int sonic_decode_init(AVCodecContext *avctx)
+{
+    SonicContext *s = avctx->priv_data;
+    GetBitContext gb;
+    int i;
+
+    s->channels = avctx->channels;
+    s->samplerate = avctx->sample_rate;
+
+    if (!avctx->extradata)
+    {
+        av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
+
+    s->version = get_bits(&gb, 2);
+    if (s->version >= 2) {
+        s->version       = get_bits(&gb, 8);
+        s->minor_version = get_bits(&gb, 8);
+    }
+    if (s->version != 2)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (s->version >= 1)
+    {
+        s->channels = get_bits(&gb, 2);
+        s->samplerate = samplerate_table[get_bits(&gb, 4)];
+        av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
+            s->channels, s->samplerate);
+    }
+
+    if (s->channels > MAX_CHANNELS)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->lossless = get_bits1(&gb);
+    if (!s->lossless)
+        skip_bits(&gb, 3); // XXX FIXME
+    s->decorrelation = get_bits(&gb, 2);
+    if (s->decorrelation != 3 && s->channels != 2) {
+        av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->downsampling = get_bits(&gb, 2);
+    if (!s->downsampling) {
+        av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->num_taps = (get_bits(&gb, 5)+1)<<5;
+    if (get_bits1(&gb)) // XXX FIXME
+        av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
+
+    s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
+    s->frame_size = s->channels*s->block_align*s->downsampling;
+//    avctx->frame_size = s->block_align;
+
+    av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
+        s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
+
+    // generate taps
+    s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
+    for (i = 0; i < s->num_taps; i++)
+        s->tap_quant[i] = ff_sqrt(i+1);
+
+    s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
+
+    for (i = 0; i < s->channels; i++)
+    {
+        s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state));
+        if (!s->predictor_state[i])
+            return AVERROR(ENOMEM);
+    }
+
+    for (i = 0; i < s->channels; i++)
+    {
+        s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
+        if (!s->coded_samples[i])
+            return AVERROR(ENOMEM);
+    }
+    s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
+
+    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    return 0;
+}
+
+static av_cold int sonic_decode_close(AVCodecContext *avctx)
+{
+    SonicContext *s = avctx->priv_data;
+    int i;
+
+    av_freep(&s->int_samples);
+    av_freep(&s->tap_quant);
+    av_freep(&s->predictor_k);
+
+    for (i = 0; i < s->channels; i++)
+    {
+        av_freep(&s->predictor_state[i]);
+        av_freep(&s->coded_samples[i]);
+    }
+
+    return 0;
+}
+
+static int sonic_decode_frame(AVCodecContext *avctx,
+                            void *data, int *got_frame_ptr,
+                            AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    SonicContext *s = avctx->priv_data;
+    RangeCoder c;
+    uint8_t state[32];
+    int i, quant, ch, j, ret;
+    int16_t *samples;
+    AVFrame *frame = data;
+
+    if (buf_size == 0) return 0;
+
+    frame->nb_samples = s->frame_size / avctx->channels;
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+    samples = (int16_t *)frame->data[0];
+
+//    av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
+
+    memset(state, 128, sizeof(state));
+    ff_init_range_decoder(&c, buf, buf_size);
+    ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
+
+    intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
+
+    // dequantize
+    for (i = 0; i < s->num_taps; i++)
+        s->predictor_k[i] *= s->tap_quant[i];
+
+    if (s->lossless)
+        quant = 1;
+    else
+        quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
+
+//    av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
+
+    for (ch = 0; ch < s->channels; ch++)
+    {
+        int x = ch;
+
+        predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
+
+        intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
+
+        for (i = 0; i < s->block_align; i++)
+        {
+            for (j = 0; j < s->downsampling - 1; j++)
+            {
+                s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
+                x += s->channels;
+            }
+
+            s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
+            x += s->channels;
+        }
+
+        for (i = 0; i < s->num_taps; i++)
+            s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
+    }
+
+    switch(s->decorrelation)
+    {
+        case MID_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+            {
+                s->int_samples[i+1] += shift(s->int_samples[i], 1);
+                s->int_samples[i] -= s->int_samples[i+1];
+            }
+            break;
+        case LEFT_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+                s->int_samples[i+1] += s->int_samples[i];
+            break;
+        case RIGHT_SIDE:
+            for (i = 0; i < s->frame_size; i += s->channels)
+                s->int_samples[i] += s->int_samples[i+1];
+            break;
+    }
+
+    if (!s->lossless)
+        for (i = 0; i < s->frame_size; i++)
+            s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
+
+    // internal -> short
+    for (i = 0; i < s->frame_size; i++)
+        samples[i] = av_clip_int16(s->int_samples[i]);
+
+    *got_frame_ptr = 1;
+
+    return buf_size;
+}
+
+AVCodec ff_sonic_decoder = {
+    .name           = "sonic",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sonic"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_SONIC,
+    .priv_data_size = sizeof(SonicContext),
+    .init           = sonic_decode_init,
+    .close          = sonic_decode_close,
+    .decode         = sonic_decode_frame,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_EXPERIMENTAL,
+};
+#endif /* CONFIG_SONIC_DECODER */
+
+#if CONFIG_SONIC_ENCODER
+AVCodec ff_sonic_encoder = {
+    .name           = "sonic",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sonic"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_SONIC,
+    .priv_data_size = sizeof(SonicContext),
+    .init           = sonic_encode_init,
+    .encode2        = sonic_encode_frame,
+    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
+    .capabilities   = CODEC_CAP_EXPERIMENTAL,
+    .close          = sonic_encode_close,
+};
+#endif
+
+#if CONFIG_SONIC_LS_ENCODER
+AVCodec ff_sonic_ls_encoder = {
+    .name           = "sonicls",
+    .long_name      = NULL_IF_CONFIG_SMALL("Sonic lossless"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_SONIC_LS,
+    .priv_data_size = sizeof(SonicContext),
+    .init           = sonic_encode_init,
+    .encode2        = sonic_encode_frame,
+    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
+    .capabilities   = CODEC_CAP_EXPERIMENTAL,
+    .close          = sonic_encode_close,
+};
+#endif
diff --git a/libavcodec/sp5x.h b/libavcodec/sp5x.h
index 1577302..004fcbb 100644
--- a/libavcodec/sp5x.h
+++ b/libavcodec/sp5x.h
@@ -2,20 +2,20 @@
  * Sunplus JPEG tables
  * Copyright (c) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/sp5xdec.c b/libavcodec/sp5xdec.c
index ca6b9fd..7496198 100644
--- a/libavcodec/sp5xdec.c
+++ b/libavcodec/sp5xdec.c
@@ -2,20 +2,20 @@
  * Sunplus JPEG decoder (SP5X)
  * Copyright (c) 2003 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -72,7 +72,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
         for (i = 2; i < buf_size-2 && j < buf_size+1024-2; i++)
             recoded[j++] = buf[i];
     else
-    for (i = 14; i < buf_size && j < buf_size+1024-2; i++)
+    for (i = 14; i < buf_size && j < buf_size+1024-3; i++)
     {
         recoded[j++] = buf[i];
         if (buf[i] == 0xff)
@@ -90,9 +90,10 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
 
     av_free(recoded);
 
-    return i;
+    return i < 0 ? i : avpkt->size;
 }
 
+#if CONFIG_SP5X_DECODER
 AVCodec ff_sp5x_decoder = {
     .name           = "sp5x",
     .long_name      = NULL_IF_CONFIG_SMALL("Sunplus JPEG (SP5X)"),
@@ -103,8 +104,10 @@ AVCodec ff_sp5x_decoder = {
     .close          = ff_mjpeg_decode_end,
     .decode         = sp5x_decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .max_lowres     = 3,
 };
-
+#endif
+#if CONFIG_AMV_DECODER
 AVCodec ff_amv_decoder = {
     .name           = "amv",
     .long_name      = NULL_IF_CONFIG_SMALL("AMV Video"),
@@ -114,4 +117,6 @@ AVCodec ff_amv_decoder = {
     .init           = ff_mjpeg_decode_init,
     .close          = ff_mjpeg_decode_end,
     .decode         = sp5x_decode_frame,
+    .max_lowres     = 3,
 };
+#endif
diff --git a/libavcodec/sparc/README b/libavcodec/sparc/README
new file mode 100644
index 0000000..f9f2349
--- /dev/null
+++ b/libavcodec/sparc/README
@@ -0,0 +1,6 @@
+SPARC optimizations have been removed in
+commit b4dd424d96f09f9bafb88e47f37df65dc4529143
+The last revission with the optimizations is fb1b70c1ed50951c5fc1a309c3c446b2eaaf564b
+
+If you want to maintain these (or other) SPARC optimizations in ffmpeg, then please
+contact ffmpeg-devel@ffmpeg.org
diff --git a/libavcodec/srtdec.c b/libavcodec/srtdec.c
index bbc8061..b16645a 100644
--- a/libavcodec/srtdec.c
+++ b/libavcodec/srtdec.c
@@ -2,25 +2,26 @@
  * SubRip subtitle decoder
  * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/avstring.h"
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 #include "libavutil/parseutils.h"
 #include "avcodec.h"
 #include "ass.h"
@@ -49,7 +50,7 @@ typedef struct {
 static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end,
                               const char *in, int x1, int y1, int x2, int y2)
 {
-    char c, *param, buffer[128], tmp[128];
+    char *param, buffer[128], tmp[128];
     int len, tag_close, sptr = 1, line_start = 1, an = 0, end = 0;
     SrtStack stack[16];
 
@@ -60,10 +61,11 @@ static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end,
 
     if (x1 >= 0 && y1 >= 0) {
         if (x2 >= 0 && y2 >= 0 && (x2 != x1 || y2 != y1))
-            out += snprintf(out, out_end-out,
+            snprintf(out, out_end-out,
                             "{\\an1}{\\move(%d,%d,%d,%d)}", x1, y1, x2, y2);
         else
-            out += snprintf(out, out_end-out, "{\\an1}{\\pos(%d,%d)}", x1, y1);
+            snprintf(out, out_end-out, "{\\an1}{\\pos(%d,%d)}", x1, y1);
+        out += strlen(out);
     }
 
     for (; out < out_end && !end && *in; in++) {
@@ -77,7 +79,8 @@ static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end,
             }
             while (out[-1] == ' ')
                 out--;
-            out += snprintf(out, out_end-out, "\\N");
+            snprintf(out, out_end-out, "\\N");
+            if(out<out_end) out += strlen(out);
             line_start = 1;
             break;
         case ' ':
@@ -86,16 +89,18 @@ static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end,
             break;
         case '{':    /* skip all {\xxx} substrings except for {\an%d}
                         and all microdvd like styles such as {Y:xxx} */
-            an += sscanf(in, "{\\an%*1u}%c", &c) == 1;
-            if ((an != 1 && sscanf(in, "{\\%*[^}]}%n%c", &len, &c) > 0) ||
-                sscanf(in, "{%*1[CcFfoPSsYy]:%*[^}]}%n%c", &len, &c) > 0) {
+            len = 0;
+            an += sscanf(in, "{\\an%*1u}%n", &len) >= 0 && len > 0;
+            if ((an != 1 && (len = 0, sscanf(in, "{\\%*[^}]}%n", &len) >= 0 && len > 0)) ||
+                (len = 0, sscanf(in, "{%*1[CcFfoPSsYy]:%*[^}]}%n", &len) >= 0 && len > 0)) {
                 in += len - 1;
             } else
                 *out++ = *in;
             break;
         case '<':
             tag_close = in[1] == '/';
-            if (sscanf(in+tag_close+1, "%127[^>]>%n%c", buffer, &len,&c) >= 2) {
+            len = 0;
+            if (sscanf(in+tag_close+1, "%127[^>]>%n", buffer, &len) >= 1 && len > 0) {
                 if ((param = strchr(buffer, ' ')))
                     *param++ = 0;
                 if ((!tag_close && sptr < FF_ARRAY_ELEMS(stack)) ||
@@ -110,8 +115,9 @@ static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end,
                                 if (stack[sptr-1].param[i][0])
                                     for (j=sptr-2; j>=0; j--)
                                         if (stack[j].param[i][0]) {
-                                            out += snprintf(out, out_end-out,
+                                            snprintf(out, out_end-out,
                                                             "%s", stack[j].param[i]);
+                                            if(out<out_end) out += strlen(out);
                                             break;
                                         }
                         } else {
@@ -145,13 +151,16 @@ static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end,
                                     param++;
                             }
                             for (i=0; i<PARAM_NUMBER; i++)
-                                if (stack[sptr].param[i][0])
-                                    out += snprintf(out, out_end-out,
+                                if (stack[sptr].param[i][0]) {
+                                    snprintf(out, out_end-out,
                                                     "%s", stack[sptr].param[i]);
+                                    if(out<out_end) out += strlen(out);
+                                }
                         }
                     } else if (!buffer[1] && strspn(buffer, "bisu") == 1) {
-                        out += snprintf(out, out_end-out,
+                        snprintf(out, out_end-out,
                                         "{\\%c%d}", buffer[0], !tag_close);
+                        if(out<out_end) out += strlen(out);
                     } else {
                         unknown = 1;
                         snprintf(tmp, sizeof(tmp), "</%s>", buffer);
@@ -180,7 +189,7 @@ static const char *srt_to_ass(AVCodecContext *avctx, char *out, char *out_end,
         out -= 2;
     while (out[-1] == ' ')
         out--;
-    out += snprintf(out, out_end-out, "\r\n");
+    snprintf(out, out_end-out, "\r\n");
     return in;
 }
 
@@ -195,7 +204,8 @@ static const char *read_ts(const char *buf, int *ts_start, int *ts_end,
                        "%*[ ]X1:%u X2:%u Y1:%u Y2:%u",
                        &hs, &ms, &ss, ts_start, &he, &me, &se, ts_end,
                        x1, x2, y1, y2);
-        buf += strcspn(buf, "\n") + 1;
+        buf += strcspn(buf, "\n");
+        buf += !!*buf;
         if (c >= 8) {
             *ts_start = 100*(ss + 60*(ms + 60*hs)) + *ts_start/10;
             *ts_end   = 100*(se + 60*(me + 60*he)) + *ts_end  /10;
@@ -213,30 +223,61 @@ static int srt_decode_frame(AVCodecContext *avctx,
     char buffer[2048];
     const char *ptr = avpkt->data;
     const char *end = avpkt->data + avpkt->size;
+    int size;
+    const uint8_t *p = av_packet_get_side_data(avpkt, AV_PKT_DATA_SUBTITLE_POSITION, &size);
+
+    if (p && size == 16) {
+        x1 = AV_RL32(p     );
+        y1 = AV_RL32(p +  4);
+        x2 = AV_RL32(p +  8);
+        y2 = AV_RL32(p + 12);
+    }
 
     if (avpkt->size <= 0)
         return avpkt->size;
 
-    ff_ass_init(sub);
-
     while (ptr < end && *ptr) {
-        ptr = read_ts(ptr, &ts_start, &ts_end, &x1, &y1, &x2, &y2);
-        if (!ptr)
-            break;
+        if (avctx->codec->id == AV_CODEC_ID_SRT) {
+            ptr = read_ts(ptr, &ts_start, &ts_end, &x1, &y1, &x2, &y2);
+            if (!ptr)
+                break;
+        } else {
+            // Do final divide-by-10 outside rescale to force rounding down.
+            ts_start = av_rescale_q(avpkt->pts,
+                                    avctx->time_base,
+                                    (AVRational){1,100});
+            ts_end   = av_rescale_q(avpkt->pts + avpkt->duration,
+                                    avctx->time_base,
+                                    (AVRational){1,100});
+        }
         ptr = srt_to_ass(avctx, buffer, buffer+sizeof(buffer), ptr,
                          x1, y1, x2, y2);
-        ff_ass_add_rect(sub, buffer, ts_start, ts_end, 0);
+        ff_ass_add_rect(sub, buffer, ts_start, ts_end-ts_start, 0);
     }
 
     *got_sub_ptr = sub->num_rects > 0;
     return avpkt->size;
 }
 
+#if CONFIG_SRT_DECODER
+/* deprecated decoder */
 AVCodec ff_srt_decoder = {
     .name         = "srt",
-    .long_name    = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
+    .long_name    = NULL_IF_CONFIG_SMALL("SubRip subtitle with embedded timing"),
     .type         = AVMEDIA_TYPE_SUBTITLE,
     .id           = AV_CODEC_ID_SRT,
     .init         = ff_ass_subtitle_header_default,
     .decode       = srt_decode_frame,
 };
+#endif
+
+#if CONFIG_SUBRIP_DECODER
+AVCodec ff_subrip_decoder = {
+    .name         = "subrip",
+    .long_name    = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
+    .type         = AVMEDIA_TYPE_SUBTITLE,
+    .id           = AV_CODEC_ID_SUBRIP,
+    .init         = ff_ass_subtitle_header_default,
+    .decode       = srt_decode_frame,
+};
+#endif
diff --git a/libavcodec/srtenc.c b/libavcodec/srtenc.c
new file mode 100644
index 0000000..89c26dc
--- /dev/null
+++ b/libavcodec/srtenc.c
@@ -0,0 +1,332 @@
+/*
+ * SubRip subtitle encoder
+ * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdarg.h>
+#include "avcodec.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+#include "ass_split.h"
+#include "ass.h"
+
+
+#define SRT_STACK_SIZE 64
+
+typedef struct {
+    AVCodecContext *avctx;
+    ASSSplitContext *ass_ctx;
+    AVBPrint buffer;
+    unsigned timestamp_end;
+    int count;
+    char stack[SRT_STACK_SIZE];
+    int stack_ptr;
+    int alignment_applied;
+} SRTContext;
+
+
+#ifdef __GNUC__
+__attribute__ ((__format__ (__printf__, 2, 3)))
+#endif
+static void srt_print(SRTContext *s, const char *str, ...)
+{
+    va_list vargs;
+    va_start(vargs, str);
+    av_vbprintf(&s->buffer, str, vargs);
+    va_end(vargs);
+}
+
+static int srt_stack_push(SRTContext *s, const char c)
+{
+    if (s->stack_ptr >= SRT_STACK_SIZE)
+        return -1;
+    s->stack[s->stack_ptr++] = c;
+    return 0;
+}
+
+static char srt_stack_pop(SRTContext *s)
+{
+    if (s->stack_ptr <= 0)
+        return 0;
+    return s->stack[--s->stack_ptr];
+}
+
+static int srt_stack_find(SRTContext *s, const char c)
+{
+    int i;
+    for (i = s->stack_ptr-1; i >= 0; i--)
+        if (s->stack[i] == c)
+            break;
+    return i;
+}
+
+static void srt_close_tag(SRTContext *s, char tag)
+{
+    srt_print(s, "</%c%s>", tag, tag == 'f' ? "ont" : "");
+}
+
+static void srt_stack_push_pop(SRTContext *s, const char c, int close)
+{
+    if (close) {
+        int i = c ? srt_stack_find(s, c) : 0;
+        if (i < 0)
+            return;
+        while (s->stack_ptr != i)
+            srt_close_tag(s, srt_stack_pop(s));
+    } else if (srt_stack_push(s, c) < 0)
+        av_log(s->avctx, AV_LOG_ERROR, "tag stack overflow\n");
+}
+
+static void srt_style_apply(SRTContext *s, const char *style)
+{
+    ASSStyle *st = ff_ass_style_get(s->ass_ctx, style);
+    if (st) {
+        int c = st->primary_color & 0xFFFFFF;
+        if (st->font_name && strcmp(st->font_name, ASS_DEFAULT_FONT) ||
+            st->font_size != ASS_DEFAULT_FONT_SIZE ||
+            c != ASS_DEFAULT_COLOR) {
+            srt_print(s, "<font");
+            if (st->font_name && strcmp(st->font_name, ASS_DEFAULT_FONT))
+                srt_print(s, " face=\"%s\"", st->font_name);
+            if (st->font_size != ASS_DEFAULT_FONT_SIZE)
+                srt_print(s, " size=\"%d\"", st->font_size);
+            if (c != ASS_DEFAULT_COLOR)
+                srt_print(s, " color=\"#%06x\"",
+                          (c & 0xFF0000) >> 16 | c & 0xFF00 | (c & 0xFF) << 16);
+            srt_print(s, ">");
+            srt_stack_push(s, 'f');
+        }
+        if (st->bold != ASS_DEFAULT_BOLD) {
+            srt_print(s, "<b>");
+            srt_stack_push(s, 'b');
+        }
+        if (st->italic != ASS_DEFAULT_ITALIC) {
+            srt_print(s, "<i>");
+            srt_stack_push(s, 'i');
+        }
+        if (st->underline != ASS_DEFAULT_UNDERLINE) {
+            srt_print(s, "<u>");
+            srt_stack_push(s, 'u');
+        }
+        if (st->alignment != ASS_DEFAULT_ALIGNMENT) {
+            srt_print(s, "{\\an%d}", st->alignment);
+            s->alignment_applied = 1;
+        }
+    }
+}
+
+
+static av_cold int srt_encode_init(AVCodecContext *avctx)
+{
+    SRTContext *s = avctx->priv_data;
+    s->avctx = avctx;
+    s->ass_ctx = ff_ass_split(avctx->subtitle_header);
+    av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
+    return s->ass_ctx ? 0 : AVERROR_INVALIDDATA;
+}
+
+static void srt_text_cb(void *priv, const char *text, int len)
+{
+    SRTContext *s = priv;
+    av_bprint_append_data(&s->buffer, text, len);
+}
+
+static void srt_new_line_cb(void *priv, int forced)
+{
+    srt_print(priv, "\r\n");
+}
+
+static void srt_style_cb(void *priv, char style, int close)
+{
+    srt_stack_push_pop(priv, style, close);
+    if (!close)
+        srt_print(priv, "<%c>", style);
+}
+
+static void srt_color_cb(void *priv, unsigned int color, unsigned int color_id)
+{
+    if (color_id > 1)
+        return;
+    srt_stack_push_pop(priv, 'f', color == 0xFFFFFFFF);
+    if (color != 0xFFFFFFFF)
+        srt_print(priv, "<font color=\"#%06x\">",
+              (color & 0xFF0000) >> 16 | color & 0xFF00 | (color & 0xFF) << 16);
+}
+
+static void srt_font_name_cb(void *priv, const char *name)
+{
+    srt_stack_push_pop(priv, 'f', !name);
+    if (name)
+        srt_print(priv, "<font face=\"%s\">", name);
+}
+
+static void srt_font_size_cb(void *priv, int size)
+{
+    srt_stack_push_pop(priv, 'f', size < 0);
+    if (size >= 0)
+        srt_print(priv, "<font size=\"%d\">", size);
+}
+
+static void srt_alignment_cb(void *priv, int alignment)
+{
+    SRTContext *s = priv;
+    if (!s->alignment_applied && alignment >= 0) {
+        srt_print(s, "{\\an%d}", alignment);
+        s->alignment_applied = 1;
+    }
+}
+
+static void srt_cancel_overrides_cb(void *priv, const char *style)
+{
+    srt_stack_push_pop(priv, 0, 1);
+    srt_style_apply(priv, style);
+}
+
+static void srt_move_cb(void *priv, int x1, int y1, int x2, int y2,
+                        int t1, int t2)
+{
+    SRTContext *s = priv;
+
+    if (s->avctx->codec->id == AV_CODEC_ID_SRT) {
+    char buffer[32];
+    int len = snprintf(buffer, sizeof(buffer),
+                       "  X1:%03u X2:%03u Y1:%03u Y2:%03u", x1, x2, y1, y2);
+    unsigned char *dummy;
+    unsigned room;
+
+    av_bprint_get_buffer(&s->buffer, len, &dummy, &room);
+    if (room >= len) {
+        memmove(s->buffer.str + s->timestamp_end + len,
+                s->buffer.str + s->timestamp_end,
+                s->buffer.len - s->timestamp_end + 1);
+        memcpy(s->buffer.str + s->timestamp_end, buffer, len);
+    }
+    /* Increment even if av_bprint_get_buffer() did not return enough room:
+       the bprint structure will be treated as truncated. */
+    s->buffer.len += len;
+    }
+}
+
+static void srt_end_cb(void *priv)
+{
+    SRTContext *s = priv;
+
+    srt_stack_push_pop(priv, 0, 1);
+    if (s->avctx->codec->id == AV_CODEC_ID_SRT)
+        srt_print(priv, "\r\n\r\n");
+}
+
+static const ASSCodesCallbacks srt_callbacks = {
+    .text             = srt_text_cb,
+    .new_line         = srt_new_line_cb,
+    .style            = srt_style_cb,
+    .color            = srt_color_cb,
+    .font_name        = srt_font_name_cb,
+    .font_size        = srt_font_size_cb,
+    .alignment        = srt_alignment_cb,
+    .cancel_overrides = srt_cancel_overrides_cb,
+    .move             = srt_move_cb,
+    .end              = srt_end_cb,
+};
+
+static int srt_encode_frame(AVCodecContext *avctx,
+                            unsigned char *buf, int bufsize, const AVSubtitle *sub)
+{
+    SRTContext *s = avctx->priv_data;
+    ASSDialog *dialog;
+    int i, num;
+
+    av_bprint_clear(&s->buffer);
+
+    for (i=0; i<sub->num_rects; i++) {
+
+        if (sub->rects[i]->type != SUBTITLE_ASS) {
+            av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
+            return AVERROR(ENOSYS);
+        }
+
+        dialog = ff_ass_split_dialog(s->ass_ctx, sub->rects[i]->ass, 0, &num);
+        for (; dialog && num--; dialog++) {
+            if (avctx->codec->id == AV_CODEC_ID_SRT) {
+                int sh, sm, ss, sc = 10 * dialog->start;
+                int eh, em, es, ec = 10 * dialog->end;
+                sh = sc/3600000;  sc -= 3600000*sh;
+                sm = sc/  60000;  sc -=   60000*sm;
+                ss = sc/   1000;  sc -=    1000*ss;
+                eh = ec/3600000;  ec -= 3600000*eh;
+                em = ec/  60000;  ec -=   60000*em;
+                es = ec/   1000;  ec -=    1000*es;
+                srt_print(s,"%d\r\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n",
+                          ++s->count, sh, sm, ss, sc, eh, em, es, ec);
+                s->timestamp_end = s->buffer.len - 2;
+            }
+            s->alignment_applied = 0;
+            srt_style_apply(s, dialog->style);
+            ff_ass_split_override_codes(&srt_callbacks, s, dialog->text);
+        }
+    }
+
+    if (!av_bprint_is_complete(&s->buffer))
+        return AVERROR(ENOMEM);
+    if (!s->buffer.len)
+        return 0;
+
+    if (s->buffer.len > bufsize) {
+        av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
+        return -1;
+    }
+    memcpy(buf, s->buffer.str, s->buffer.len);
+
+    return s->buffer.len;
+}
+
+static int srt_encode_close(AVCodecContext *avctx)
+{
+    SRTContext *s = avctx->priv_data;
+    ff_ass_split_free(s->ass_ctx);
+    av_bprint_finalize(&s->buffer, NULL);
+    return 0;
+}
+
+#if CONFIG_SRT_ENCODER
+/* deprecated encoder */
+AVCodec ff_srt_encoder = {
+    .name           = "srt",
+    .long_name      = NULL_IF_CONFIG_SMALL("SubRip subtitle with embedded timing"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_SRT,
+    .priv_data_size = sizeof(SRTContext),
+    .init           = srt_encode_init,
+    .encode_sub     = srt_encode_frame,
+    .close          = srt_encode_close,
+};
+#endif
+
+#if CONFIG_SUBRIP_ENCODER
+AVCodec ff_subrip_encoder = {
+    .name           = "subrip",
+    .long_name      = NULL_IF_CONFIG_SMALL("SubRip subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_SUBRIP,
+    .priv_data_size = sizeof(SRTContext),
+    .init           = srt_encode_init,
+    .encode_sub     = srt_encode_frame,
+    .close          = srt_encode_close,
+};
+#endif
diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c
index d34981e..940bbb7 100644
--- a/libavcodec/startcode.c
+++ b/libavcodec/startcode.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/startcode.h b/libavcodec/startcode.h
index f38ce54..cfa02b0 100644
--- a/libavcodec/startcode.h
+++ b/libavcodec/startcode.h
@@ -1,21 +1,27 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/**
+ * @file
+ * Accelerated start code search function for start codes common to
+ * MPEG-1/2/4 video, VC-1, H.264/5
+ */
+
 #ifndef AVCODEC_STARTCODE_H
 #define AVCODEC_STARTCODE_H
 
diff --git a/libavcodec/subviewerdec.c b/libavcodec/subviewerdec.c
new file mode 100644
index 0000000..63be418
--- /dev/null
+++ b/libavcodec/subviewerdec.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * SubViewer subtitle decoder
+ * @see https://en.wikipedia.org/wiki/SubViewer
+ */
+
+#include "avcodec.h"
+#include "ass.h"
+#include "libavutil/bprint.h"
+
+static int subviewer_event_to_ass(AVBPrint *buf, const char *p)
+{
+    while (*p) {
+        if (!strncmp(p, "[br]", 4)) {
+            av_bprintf(buf, "\\N");
+            p += 4;
+        } else {
+            if (p[0] == '\n' && p[1])
+                av_bprintf(buf, "\\N");
+            else if (*p != '\n' && *p != '\r')
+                av_bprint_chars(buf, *p, 1);
+            p++;
+        }
+    }
+
+    av_bprintf(buf, "\r\n");
+    return 0;
+}
+
+static int subviewer_decode_frame(AVCodecContext *avctx,
+                                  void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    char c;
+    AVSubtitle *sub = data;
+    const char *ptr = avpkt->data;
+    AVBPrint buf;
+
+    /* To be removed later */
+    if (ptr && sscanf(ptr, "%*u:%*u:%*u.%*u,%*u:%*u:%*u.%*u%c", &c) == 1) {
+        av_log(avctx, AV_LOG_ERROR, "AVPacket is not clean (contains timing "
+               "information). You need to upgrade your libavformat or "
+               "sanitize your packet.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    // note: no need to rescale pts & duration since they are in the same
+    // timebase as ASS (1/100)
+    if (ptr && avpkt->size > 0 && !subviewer_event_to_ass(&buf, ptr))
+        ff_ass_add_rect(sub, buf.str, avpkt->pts, avpkt->duration, 0);
+    *got_sub_ptr = sub->num_rects > 0;
+    av_bprint_finalize(&buf, NULL);
+    return avpkt->size;
+}
+
+AVCodec ff_subviewer_decoder = {
+    .name           = "subviewer",
+    .long_name      = NULL_IF_CONFIG_SMALL("SubViewer subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_SUBVIEWER,
+    .decode         = subviewer_decode_frame,
+    .init           = ff_ass_subtitle_header_default,
+};
diff --git a/libavcodec/sunrast.c b/libavcodec/sunrast.c
index ffa685c..d9918f4 100644
--- a/libavcodec/sunrast.c
+++ b/libavcodec/sunrast.c
@@ -2,20 +2,20 @@
  * Sun Rasterfile (.sun/.ras/im{1,8,24}/.sunras) image decoder
  * Copyright (c) 2007, 2008 Ivo van Poorten
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,7 +33,7 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
     const uint8_t *buf_end   = avpkt->data + avpkt->size;
     AVFrame * const p        = data;
     unsigned int w, h, depth, type, maptype, maplength, stride, x, y, len, alen;
-    uint8_t *ptr;
+    uint8_t *ptr, *ptr2 = NULL;
     const uint8_t *bufstart = buf;
     int ret;
 
@@ -53,7 +53,7 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
     maplength = AV_RB32(buf + 28);
     buf      += 32;
 
-    if (type == RT_FORMAT_TIFF || type == RT_FORMAT_IFF || type == RT_EXPERIMENTAL) {
+    if (type == RT_EXPERIMENTAL) {
         avpriv_request_sample(avctx, "TIFF/IFF/EXPERIMENTAL (compression) type");
         return AVERROR_PATCHWELCOME;
     }
@@ -70,10 +70,17 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
+    if (type == RT_FORMAT_TIFF || type == RT_FORMAT_IFF) {
+        av_log(avctx, AV_LOG_ERROR, "unsupported (compression) type\n");
+        return -1;
+    }
 
     switch (depth) {
         case 1:
-            avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
+            avctx->pix_fmt = maplength ? AV_PIX_FMT_PAL8 : AV_PIX_FMT_MONOWHITE;
+            break;
+        case 4:
+            avctx->pix_fmt = maplength ? AV_PIX_FMT_PAL8 : AV_PIX_FMT_NONE;
             break;
         case 8:
             avctx->pix_fmt = maplength ? AV_PIX_FMT_PAL8 : AV_PIX_FMT_GRAY8;
@@ -81,6 +88,9 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
         case 24:
             avctx->pix_fmt = (type == RT_FORMAT_RGB) ? AV_PIX_FMT_RGB24 : AV_PIX_FMT_BGR24;
             break;
+        case 32:
+            avctx->pix_fmt = (type == RT_FORMAT_RGB) ? AV_PIX_FMT_0RGB : AV_PIX_FMT_0BGR;
+            break;
         default:
             av_log(avctx, AV_LOG_ERROR, "invalid depth\n");
             return AVERROR_INVALIDDATA;
@@ -90,17 +100,15 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
     if (ret < 0)
         return ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
 
     p->pict_type = AV_PICTURE_TYPE_I;
 
     if (buf_end - buf < maplength)
         return AVERROR_INVALIDDATA;
 
-    if (depth != 8 && maplength) {
+    if (depth > 8 && maplength) {
         av_log(avctx, AV_LOG_WARNING, "useless colormap found or file is corrupted, trying to recover\n");
 
     } else if (maplength) {
@@ -113,13 +121,20 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
 
         ptr = p->data[1];
         for (x = 0; x < len; x++, ptr += 4)
-            *(uint32_t *)ptr = (buf[x] << 16) + (buf[len + x] << 8) + buf[len + len + x];
+            *(uint32_t *)ptr = (0xFFU<<24) + (buf[x]<<16) + (buf[len+x]<<8) + buf[len+len+x];
     }
 
     buf += maplength;
 
+    if (maplength && depth < 8) {
+        ptr = ptr2 = av_malloc((w + 15) * h);
+        if (!ptr)
+            return AVERROR(ENOMEM);
+        stride = (w + 15 >> 3) * depth;
+    } else {
     ptr    = p->data[0];
     stride = p->linesize[0];
+    }
 
     /* scanlines are aligned on 16 bit boundaries */
     len  = (depth * w + 7) >> 3;
@@ -160,6 +175,30 @@ static int sunrast_decode_frame(AVCodecContext *avctx, void *data,
             buf += alen;
         }
     }
+    if (avctx->pix_fmt == AV_PIX_FMT_PAL8 && depth < 8) {
+        uint8_t *ptr_free = ptr2;
+        ptr = p->data[0];
+        for (y=0; y<h; y++) {
+            for (x = 0; x < (w + 7 >> 3) * depth; x++) {
+                if (depth == 1) {
+                    ptr[8*x]   = ptr2[x] >> 7;
+                    ptr[8*x+1] = ptr2[x] >> 6 & 1;
+                    ptr[8*x+2] = ptr2[x] >> 5 & 1;
+                    ptr[8*x+3] = ptr2[x] >> 4 & 1;
+                    ptr[8*x+4] = ptr2[x] >> 3 & 1;
+                    ptr[8*x+5] = ptr2[x] >> 2 & 1;
+                    ptr[8*x+6] = ptr2[x] >> 1 & 1;
+                    ptr[8*x+7] = ptr2[x]      & 1;
+                } else {
+                    ptr[2*x]   = ptr2[x] >> 4;
+                    ptr[2*x+1] = ptr2[x] & 0xF;
+                }
+            }
+            ptr  += p->linesize[0];
+            ptr2 += (w + 15 >> 3) * depth;
+        }
+        av_freep(&ptr_free);
+    }
 
     *got_frame = 1;
 
diff --git a/libavcodec/sunrast.h b/libavcodec/sunrast.h
index d9fe307..d162e63 100644
--- a/libavcodec/sunrast.h
+++ b/libavcodec/sunrast.h
@@ -2,20 +2,20 @@
  * Sun Rasterfile Image Format
  * Copyright (c) 2007, 2008 Ivo van Poorten
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/sunrastenc.c b/libavcodec/sunrastenc.c
index 25ae9bd..a55e3d4 100644
--- a/libavcodec/sunrastenc.c
+++ b/libavcodec/sunrastenc.c
@@ -2,20 +2,20 @@
  * Sun Rasterfile (.sun/.ras/im{1,8,24}/.sunras) image encoder
  * Copyright (c) 2012 Aneesh Dogra (lionaneesh) <lionaneesh@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -55,7 +55,7 @@ static void sunrast_image_write_image(AVCodecContext *avctx,
 {
     SUNRASTContext *s = avctx->priv_data;
     const uint8_t *ptr;
-    int len, alen, x;
+    int len, alen, x, y;
 
     if (s->maplength) {     // palettized
         PutByteContext pb_r, pb_g;
@@ -82,33 +82,29 @@ static void sunrast_image_write_image(AVCodecContext *avctx,
      if (s->type == RT_BYTE_ENCODED) {
         uint8_t value, value2;
         int run;
-        const uint8_t *start = linesize < 0 ? pixels + (avctx->height - 1) * linesize
-                                            : pixels;
-        const uint8_t *end   = linesize < 0 ? pixels - linesize
-                                            : pixels + avctx->height * linesize;
 
         ptr = pixels;
 
-#define GET_VALUE ptr >= end || ptr < start ? 0 : x >= len ? ptr[len-1] : ptr[x]
+#define GET_VALUE y >= avctx->height ? 0 : x >= len ? ptr[len-1] : ptr[x]
 
-        x = 0;
+        x = 0, y = 0;
         value2 = GET_VALUE;
-        while (ptr < end && ptr >= start) {
+        while (y < avctx->height) {
             run = 1;
             value = value2;
             x++;
             if (x >= alen) {
                 x = 0;
-                ptr += linesize;
+                ptr += linesize, y++;
             }
 
             value2 = GET_VALUE;
-            while (value2 == value && run < 256 && ptr < end && ptr >= start) {
+            while (value2 == value && run < 256 && y < avctx->height) {
                 x++;
                 run++;
                 if (x >= alen) {
                     x = 0;
-                    ptr += linesize;
+                    ptr += linesize, y++;
                 }
                 value2 = GET_VALUE;
             }
@@ -127,7 +123,6 @@ static void sunrast_image_write_image(AVCodecContext *avctx,
         // update data length for header
         s->length = bytestream2_tell_p(&s->p) - 32 - s->maplength;
     } else {
-        int y;
         for (y = 0; y < avctx->height; y++) {
             bytestream2_put_buffer(&s->p, ptr, len);
             if (len < alen)
@@ -153,12 +148,6 @@ static av_cold int sunrast_encode_init(AVCodecContext *avctx)
         return AVERROR(EINVAL);
     }
 
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     s->maptype                    = RMT_NONE;
     s->maplength                  = 0;
 
@@ -191,7 +180,7 @@ static int sunrast_encode_frame(AVCodecContext *avctx,  AVPacket *avpkt,
     SUNRASTContext *s = avctx->priv_data;
     int ret;
 
-    if ((ret = ff_alloc_packet(avpkt, s->size)) < 0)
+    if ((ret = ff_alloc_packet2(avctx, avpkt, s->size)) < 0)
         return ret;
 
     bytestream2_init_writer(&s->p, avpkt->data, avpkt->size);
diff --git a/libavcodec/svq1.c b/libavcodec/svq1.c
index 545df80..c219f22 100644
--- a/libavcodec/svq1.c
+++ b/libavcodec/svq1.c
@@ -8,20 +8,20 @@
  *
  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/svq1.h b/libavcodec/svq1.h
index 70b5c37..8380f22 100644
--- a/libavcodec/svq1.h
+++ b/libavcodec/svq1.h
@@ -8,20 +8,20 @@
  *
  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/svq13.c b/libavcodec/svq13.c
index e0d2154..b821a44 100644
--- a/libavcodec/svq13.c
+++ b/libavcodec/svq13.c
@@ -1,20 +1,20 @@
 /*
  * SVQ1/SVQ3 decoder common code
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/svq1_cb.h b/libavcodec/svq1_cb.h
index e22cd60..7926ce1 100644
--- a/libavcodec/svq1_cb.h
+++ b/libavcodec/svq1_cb.h
@@ -6,20 +6,20 @@
  * Copyright (C) 2002 the xine project
  * Copyright (C) 2002 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/svq1_vlc.h b/libavcodec/svq1_vlc.h
index 834279d..e56d894 100644
--- a/libavcodec/svq1_vlc.h
+++ b/libavcodec/svq1_vlc.h
@@ -1,20 +1,20 @@
 /*
  * copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index 000487b..1e7ab49 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -8,20 +8,20 @@
  *
  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -110,12 +110,11 @@ static const uint8_t string_table[256] = {
                 break;                                                  \
         }                                                               \
         /* divide block if next bit set */                              \
-        if (get_bits1(bitbuf) == 0)                                     \
+        if (!get_bits1(bitbuf))                                         \
             break;                                                      \
         /* add child nodes */                                           \
         list[n++] = list[i];                                            \
-        list[n++] = list[i] +                                           \
-                    (((level & 1) ? pitch : 1) << (level / 2 + 1));     \
+        list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level >> 1) + 1));\
     }
 
 #define SVQ1_ADD_CODEBOOK()                                             \
@@ -151,7 +150,7 @@ static const uint8_t string_table[256] = {
                       16 * j) << (level + 1);                           \
     }                                                                   \
     mean -= stages * 128;                                               \
-    n4    = mean + (mean >> 31) << 16 | (mean & 0xFFFF);
+    n4    = (mean << 16) + mean;
 
 static int svq1_decode_block_intra(GetBitContext *bitbuf, uint8_t *pixels,
                                    int pitch)
@@ -341,8 +340,7 @@ static int svq1_motion_inter_block(HpelDSPContext *hdsp, GetBitContext *bitbuf,
     }
 
     result = svq1_decode_motion_vector(bitbuf, &mv, pmv);
-
-    if (result != 0)
+    if (result)
         return result;
 
     motion[0].x         =
@@ -385,8 +383,7 @@ static int svq1_motion_inter_4v_block(HpelDSPContext *hdsp, GetBitContext *bitbu
     }
 
     result = svq1_decode_motion_vector(bitbuf, &mv, pmv);
-
-    if (result != 0)
+    if (result)
         return result;
 
     /* predict and decode motion vector (1) */
@@ -398,8 +395,7 @@ static int svq1_motion_inter_4v_block(HpelDSPContext *hdsp, GetBitContext *bitbu
         pmv[1] = &motion[(x / 8) + 3];
     }
     result = svq1_decode_motion_vector(bitbuf, &motion[0], pmv);
-
-    if (result != 0)
+    if (result)
         return result;
 
     /* predict and decode motion vector (2) */
@@ -407,8 +403,7 @@ static int svq1_motion_inter_4v_block(HpelDSPContext *hdsp, GetBitContext *bitbu
     pmv[2] = &motion[(x / 8) + 1];
 
     result = svq1_decode_motion_vector(bitbuf, &motion[(x / 8) + 2], pmv);
-
-    if (result != 0)
+    if (result)
         return result;
 
     /* predict and decode motion vector (3) */
@@ -416,8 +411,7 @@ static int svq1_motion_inter_4v_block(HpelDSPContext *hdsp, GetBitContext *bitbu
     pmv[3] = &motion[(x / 8) + 3];
 
     result = svq1_decode_motion_vector(bitbuf, pmv[3], pmv);
-
-    if (result != 0)
+    if (result)
         return result;
 
     /* form predictions */
@@ -520,6 +514,8 @@ static int svq1_decode_frame_header(AVCodecContext *avctx, AVFrame *frame)
     SVQ1Context *s = avctx->priv_data;
     GetBitContext *bitbuf = &s->gb;
     int frame_size_code;
+    int width  = s->width;
+    int height = s->height;
 
     skip_bits(bitbuf, 8); /* temporal_reference */
 
@@ -558,7 +554,7 @@ static int svq1_decode_frame_header(AVCodecContext *avctx, AVFrame *frame)
             svq1_parse_string(bitbuf, msg);
 
             av_log(avctx, AV_LOG_INFO,
-                   "embedded message: \"%s\"\n", (char *)msg);
+                   "embedded message:\n%s\n", (char *)msg);
         }
 
         skip_bits(bitbuf, 2);
@@ -570,20 +566,20 @@ static int svq1_decode_frame_header(AVCodecContext *avctx, AVFrame *frame)
 
         if (frame_size_code == 7) {
             /* load width, height (12 bits each) */
-            s->width  = get_bits(bitbuf, 12);
-            s->height = get_bits(bitbuf, 12);
+            width  = get_bits(bitbuf, 12);
+            height = get_bits(bitbuf, 12);
 
-            if (!s->width || !s->height)
+            if (!width || !height)
                 return AVERROR_INVALIDDATA;
         } else {
             /* get width, height from table */
-            s->width  = ff_svq1_frame_size_table[frame_size_code][0];
-            s->height = ff_svq1_frame_size_table[frame_size_code][1];
+            width  = ff_svq1_frame_size_table[frame_size_code][0];
+            height = ff_svq1_frame_size_table[frame_size_code][1];
         }
     }
 
     /* unknown fields */
-    if (get_bits1(bitbuf) == 1) {
+    if (get_bits1(bitbuf)) {
         skip_bits1(bitbuf);    /* use packet checksum if (1) */
         skip_bits1(bitbuf);    /* component checksums after image data if (1) */
 
@@ -591,16 +587,18 @@ static int svq1_decode_frame_header(AVCodecContext *avctx, AVFrame *frame)
             return AVERROR_INVALIDDATA;
     }
 
-    if (get_bits1(bitbuf) == 1) {
+    if (get_bits1(bitbuf)) {
         skip_bits1(bitbuf);
         skip_bits(bitbuf, 4);
         skip_bits1(bitbuf);
         skip_bits(bitbuf, 2);
 
-        while (get_bits1(bitbuf) == 1)
-            skip_bits(bitbuf, 8);
+        if (skip_1stop_8data_bits(bitbuf) < 0)
+            return AVERROR_INVALIDDATA;
     }
 
+    s->width  = width;
+    s->height = height;
     return 0;
 }
 
@@ -616,7 +614,7 @@ static int svq1_decode_frame(AVCodecContext *avctx, void *data,
     svq1_pmv *pmv;
 
     /* initialize bit buffer */
-    init_get_bits(&s->gb, buf, buf_size * 8);
+    init_get_bits8(&s->gb, buf, buf_size);
 
     /* decode frame header */
     s->frame_code = get_bits(&s->gb, 22);
@@ -628,12 +626,14 @@ static int svq1_decode_frame(AVCodecContext *avctx, void *data,
     if (s->frame_code != 0x20) {
         uint32_t *src = (uint32_t *)(buf + 4);
 
+        if (buf_size < 36)
+            return AVERROR_INVALIDDATA;
+
         for (i = 0; i < 4; i++)
             src[i] = ((src[i] << 16) | (src[i] >> 16)) ^ src[7 - i];
     }
 
     result = svq1_decode_frame_header(avctx, cur);
-
     if (result != 0) {
         av_dlog(avctx, "Error in svq1_decode_frame_header %i\n", result);
         return result;
@@ -678,8 +678,8 @@ static int svq1_decode_frame(AVCodecContext *avctx, void *data,
                 for (x = 0; x < width; x += 16) {
                     result = svq1_decode_block_intra(&s->gb, &current[x],
                                                      linesize);
-                    if (result != 0) {
-                        av_log(avctx, AV_LOG_INFO,
+                    if (result) {
+                        av_log(avctx, AV_LOG_ERROR,
                                "Error in svq1_decode_block %i (keyframe)\n",
                                result);
                         goto err;
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 506ee9b..95dcee4 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -2,20 +2,20 @@
  * SVQ1 Encoder
  * Copyright (C) 2004 Mike Melanson <melanson@pcisys.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -36,9 +36,8 @@
 #include "svq1.h"
 #include "svq1enc.h"
 #include "svq1enc_cb.h"
+#include "libavutil/avassert.h"
 
-#undef NDEBUG
-#include <assert.h>
 
 static void svq1_write_header(SVQ1EncContext *s, int frame_type)
 {
@@ -59,7 +58,7 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type)
         /* output 5 unknown bits (2 + 2 + 1) */
         put_bits(&s->pb, 5, 2); /* 2 needed by quicktime decoder */
 
-        i = ff_match_2uint16(ff_svq1_frame_size_table,
+        i = ff_match_2uint16((void*)ff_svq1_frame_size_table,
                              FF_ARRAY_ELEMS(ff_svq1_frame_size_table),
                              s->frame_width, s->frame_height);
         put_bits(&s->pb, 3, i);
@@ -78,7 +77,7 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type)
 #define THRESHOLD_MULTIPLIER 0.6
 
 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
-                               int size)
+                               intptr_t size)
 {
     int score = 0, i;
 
@@ -153,7 +152,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
                 score  = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow
                 if (score < best_vector_score) {
                     int mean = diff + (size >> 1) >> (level + 3);
-                    assert(mean > -300 && mean < 300);
+                    av_assert2(mean > -300 && mean < 300);
                     mean               = av_clip(mean, intra ? 0 : -256, 255);
                     best_vector_score  = score;
                     best_vector[stage] = i;
@@ -161,7 +160,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
                     best_vector_mean   = mean;
                 }
             }
-            assert(best_vector_mean != -999);
+            av_assert0(best_vector_mean != -999);
             vector = codebook + stage * size * 16 + best_vector[stage] * size;
             for (j = 0; j < size; j++)
                 block[stage + 1][j] = block[stage][j] - vector[j];
@@ -205,10 +204,10 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
         put_bits(&s->reorder_pb[level], 1, split);
 
     if (!split) {
-        assert(best_mean >= 0 && best_mean < 256 || !intra);
-        assert(best_mean >= -256 && best_mean < 256);
-        assert(best_count >= 0 && best_count < 7);
-        assert(level < 4 || best_count == 0);
+        av_assert1(best_mean >= 0 && best_mean < 256 || !intra);
+        av_assert1(best_mean >= -256 && best_mean < 256);
+        av_assert1(best_count >= 0 && best_count < 7);
+        av_assert1(level < 4 || best_count == 0);
 
         /* output the encoding */
         put_bits(&s->reorder_pb[level],
@@ -218,7 +217,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
                  mean_vlc[best_mean][0]);
 
         for (i = 0; i < best_count; i++) {
-            assert(best_vector[i] >= 0 && best_vector[i] < 16);
+            av_assert2(best_vector[i] >= 0 && best_vector[i] < 16);
             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
         }
 
@@ -232,6 +231,15 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
     return best_score;
 }
 
+static void init_block_index(MpegEncContext *s){
+    s->block_index[0]= s->b8_stride*(s->mb_y*2    )     + s->mb_x*2;
+    s->block_index[1]= s->b8_stride*(s->mb_y*2    ) + 1 + s->mb_x*2;
+    s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1)     + s->mb_x*2;
+    s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) + 1 + s->mb_x*2;
+    s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x;
+    s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x;
+}
+
 static int svq1_encode_plane(SVQ1EncContext *s, int plane,
                              unsigned char *src_plane,
                              unsigned char *ref_plane,
@@ -244,7 +252,7 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
     int block_width, block_height;
     int level;
     int threshold[6];
-    uint8_t *src     = s->scratchbuf + stride * 16;
+    uint8_t *src     = s->scratchbuf + stride * 32;
     const int lambda = (f->quality * f->quality) >>
                        (2 * FF_LAMBDA_SHIFT);
 
@@ -327,8 +335,7 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
 
             for (x = 0; x < block_width; x++) {
                 s->m.mb_x = x;
-                ff_init_block_index(&s->m);
-                ff_update_block_index(&s->m);
+                init_block_index(&s->m);
 
                 ff_estimate_p_frame_motion(&s->m, x, y);
             }
@@ -368,8 +375,7 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
             }
 
             s->m.mb_x = x;
-            ff_init_block_index(&s->m);
-            ff_update_block_index(&s->m);
+            init_block_index(&s->m);
 
             if (f->pict_type == AV_PICTURE_TYPE_I ||
                 (s->m.mb_type[x + y * s->m.mb_stride] &
@@ -410,10 +416,10 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
                     s->m.pb = s->reorder_pb[5];
                     mx      = motion_ptr[0];
                     my      = motion_ptr[1];
-                    assert(mx     >= -32 && mx     <= 31);
-                    assert(my     >= -32 && my     <= 31);
-                    assert(pred_x >= -32 && pred_x <= 31);
-                    assert(pred_y >= -32 && pred_y <= 31);
+                    av_assert1(mx     >= -32 && mx     <= 31);
+                    av_assert1(my     >= -32 && my     <= 31);
+                    av_assert1(pred_x >= -32 && pred_x <= 31);
+                    av_assert1(pred_y >= -32 && pred_y <= 31);
                     ff_h263_encode_motion(&s->m, mx - pred_x, 1);
                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
                     s->reorder_pb[5] = s->m.pb;
@@ -421,12 +427,12 @@ static int svq1_encode_plane(SVQ1EncContext *s, int plane,
 
                     dxy = (mx & 1) + 2 * (my & 1);
 
-                    s->hdsp.put_pixels_tab[0][dxy](temp + 16,
+                    s->hdsp.put_pixels_tab[0][dxy](temp + 16*stride,
                                                    ref + (mx >> 1) +
                                                    stride * (my >> 1),
                                                    stride, 16);
 
-                    score[1] += encode_block(s, src + 16 * x, temp + 16,
+                    score[1] += encode_block(s, src + 16 * x, temp + 16*stride,
                                              decoded, stride, 5, 64, lambda, 0);
                     best      = score[1] <= score[0];
 
@@ -567,12 +573,9 @@ static int svq1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     AVFrame *const p        = avctx->coded_frame;
     int i, ret;
 
-    if (!pkt->data &&
-        (ret = av_new_packet(pkt, s->y_block_width * s->y_block_height *
-                             MAX_MB_BYTES * 3 + FF_MIN_BUFFER_SIZE)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, s->y_block_width * s->y_block_height *
+                             MAX_MB_BYTES*3 + FF_MIN_BUFFER_SIZE)) < 0)
         return ret;
-    }
 
     if (avctx->pix_fmt != AV_PIX_FMT_YUV410P) {
         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
@@ -580,9 +583,11 @@ static int svq1_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     if (!s->current_picture->data[0]) {
-        ff_get_buffer(avctx, s->current_picture, 0);
-        ff_get_buffer(avctx, s->last_picture, 0);
-        s->scratchbuf = av_malloc(s->current_picture->linesize[0] * 16 * 2);
+        if ((ret = ff_get_buffer(avctx, s->current_picture, 0))< 0 ||
+            (ret = ff_get_buffer(avctx, s->last_picture, 0))   < 0) {
+            return ret;
+        }
+        s->scratchbuf = av_malloc(s->current_picture->linesize[0] * 16 * 3);
     }
 
     FFSWAP(AVFrame*, s->current_picture, s->last_picture);
diff --git a/libavcodec/svq1enc.h b/libavcodec/svq1enc.h
index 516e875..740d2ff 100644
--- a/libavcodec/svq1enc.h
+++ b/libavcodec/svq1enc.h
@@ -1,20 +1,20 @@
 /*
  * SVQ1 encoder
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -69,7 +69,7 @@ typedef struct SVQ1EncContext {
     uint8_t *scratchbuf;
 
     int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
-                             int size);
+                             intptr_t size);
 } SVQ1EncContext;
 
 void ff_svq1enc_init_ppc(SVQ1EncContext *c);
diff --git a/libavcodec/svq1enc_cb.h b/libavcodec/svq1enc_cb.h
index a5cd179..1edb4ec 100644
--- a/libavcodec/svq1enc_cb.h
+++ b/libavcodec/svq1enc_cb.h
@@ -2,20 +2,20 @@
  * SVQ1 Encoder
  * Copyright (C) 2004 Mike Melanson <melanson@pcisys.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index fc2120b..97233b1 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -1,20 +1,20 @@
 /*
- * Copyright (c) 2003 The Libav Project
+ * Copyright (c) 2003 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,7 +37,7 @@
  *
  * You will know you have these parameters passed correctly when the decoder
  * correctly decodes this file:
- *  http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
+ *  http://samples.mplayerhq.hu/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
  */
 
 #include <inttypes.h>
@@ -55,6 +55,7 @@
 #include "hpeldsp.h"
 #include "rectangle.h"
 #include "tpeldsp.h"
+#include "vdpau_internal.h"
 
 #if CONFIG_ZLIB
 #include <zlib.h>
@@ -80,6 +81,8 @@ typedef struct {
     int unknown_flag;
     int next_slice_index;
     uint32_t watermark_key;
+    uint8_t *buf;
+    int buf_size;
     int adaptive_quant;
     int next_p_frame_damaged;
     int h_edge_pos;
@@ -159,6 +162,8 @@ static const uint32_t svq3_dequant_coeff[32] = {
     61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
 };
 
+static int svq3_decode_end(AVCodecContext *avctx);
+
 void ff_svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
 {
     const int qmul = svq3_dequant_coeff[qp];
@@ -240,14 +245,17 @@ static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
     static const uint8_t *const scan_patterns[4] =
     { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
 
-    int run, level, limit;
+    int run, level, sign, limit;
     unsigned vlc;
     const int intra           = 3 * type >> 2;
     const uint8_t *const scan = scan_patterns[type];
 
     for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
         for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
-            int sign = (vlc & 1) ? 0 : -1;
+            if ((int32_t)vlc < 0)
+                return -1;
+
+            sign     = (vlc & 1) ? 0 : -1;
             vlc      = vlc + 1 >> 1;
 
             if (type == 3) {
@@ -262,20 +270,19 @@ static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
                     level = (vlc + 9 >> 2) - run;
                 }
             } else {
-                if (vlc < 16) {
+                if (vlc < 16U) {
                     run   = svq3_dct_tables[intra][vlc].run;
                     level = svq3_dct_tables[intra][vlc].level;
                 } else if (intra) {
                     run   = vlc & 0x7;
-                    level = (vlc >> 3) +
-                            ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
+                    level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
                 } else {
                     run   = vlc & 0xF;
-                    level = (vlc >> 4) +
-                            ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
+                    level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
                 }
             }
 
+
             if ((index += run) >= limit)
                 return -1;
 
@@ -617,7 +624,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
             for (i = 0; i < 16; i += 2) {
                 vlc = svq3_get_ue_golomb(&h->gb);
 
-                if (vlc >= 25) {
+                if (vlc >= 25U) {
                     av_log(h->avctx, AV_LOG_ERROR,
                            "luma prediction:%"PRIu32"\n", vlc);
                     return -1;
@@ -687,7 +694,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
 
     if (!IS_INTRA16x16(mb_type) &&
         (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B)) {
-        if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48) {
+        if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48U){
             av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
             return -1;
         }
@@ -803,8 +810,8 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
                     header ^ s->watermark_key);
         }
         if (length > 0) {
-            memcpy((uint8_t *) &h->gb.buffer[get_bits_count(&h->gb) >> 3],
-                   &h->gb.buffer[h->gb.size_in_bits >> 3], length - 1);
+            memmove((uint8_t *) &h->gb.buffer[get_bits_count(&h->gb) >> 3],
+                    &h->gb.buffer[h->gb.size_in_bits >> 3], length - 1);
         }
         skip_bits_long(&h->gb, 0);
     }
@@ -838,8 +845,8 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
     skip_bits1(&h->gb);
     skip_bits(&h->gb, 2);
 
-    while (get_bits1(&h->gb))
-        skip_bits(&h->gb, 8);
+    if (skip_1stop_8data_bits(&h->gb) < 0)
+        return AVERROR_INVALIDDATA;
 
     /* reset intra predictors and invalidate motion vector references */
     if (h->mb_x > 0) {
@@ -868,25 +875,25 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
     unsigned char *extradata_end;
     unsigned int size;
     int marker_found = 0;
+    int ret;
 
     s->cur_pic  = av_mallocz(sizeof(*s->cur_pic));
     s->last_pic = av_mallocz(sizeof(*s->last_pic));
     s->next_pic = av_mallocz(sizeof(*s->next_pic));
     if (!s->next_pic || !s->last_pic || !s->cur_pic) {
-        av_freep(&s->cur_pic);
-        av_freep(&s->last_pic);
-        av_freep(&s->next_pic);
-        return AVERROR(ENOMEM);
+        ret = AVERROR(ENOMEM);
+        goto fail;
     }
 
-    if (ff_h264_decode_init(avctx) < 0)
-        return -1;
+    if ((ret = ff_h264_decode_init(avctx)) < 0)
+        goto fail;
 
     ff_hpeldsp_init(&s->hdsp, avctx->flags);
     ff_tpeldsp_init(&s->tdsp);
 
     h->flags           = avctx->flags;
     h->is_complex      = 1;
+    h->sps.chroma_format_idc = 1;
     h->picture_structure = PICT_FRAME;
     avctx->pix_fmt     = AV_PIX_FMT_YUVJ420P;
     avctx->color_range = AVCOL_RANGE_JPEG;
@@ -917,8 +924,10 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
         int frame_size_code;
 
         size = AV_RB32(&extradata[4]);
-        if (size > extradata_end - extradata - 8)
-            return AVERROR_INVALIDDATA;
+        if (size > extradata_end - extradata - 8) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
         init_get_bits(&gb, extradata + 8, size * 8);
 
         /* 'frame size code' and optional 'width, height' */
@@ -972,8 +981,10 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
         /* unknown field */
         skip_bits1(&gb);
 
-        while (get_bits1(&gb))
-            skip_bits(&gb, 8);
+        if (skip_1stop_8data_bits(&gb) < 0) {
+            ret = AVERROR_INVALIDDATA;
+            goto fail;
+        }
 
         s->unknown_flag  = get_bits1(&gb);
         avctx->has_b_frames = !h->low_delay;
@@ -990,9 +1001,11 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
             int offset                = get_bits_count(&gb) + 7 >> 3;
             uint8_t *buf;
 
-            if (watermark_height > 0 &&
-                (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
-                return -1;
+            if (watermark_height <= 0 ||
+                (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height) {
+                ret = -1;
+                goto fail;
+            }
 
             buf = av_malloc(buf_len);
             av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
@@ -1005,7 +1018,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
                 av_log(avctx, AV_LOG_ERROR,
                        "could not uncompress watermark logo\n");
                 av_free(buf);
-                return -1;
+                ret = -1;
+                goto fail;
             }
             s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
             s->watermark_key = s->watermark_key << 16 | s->watermark_key;
@@ -1015,7 +1029,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
 #else
             av_log(avctx, AV_LOG_ERROR,
                    "this svq3 file contains watermark which need zlib support compiled in\n");
-            return -1;
+            ret = -1;
+            goto fail;
 #endif
         }
     }
@@ -1030,12 +1045,15 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
     s->h_edge_pos = h->mb_width * 16;
     s->v_edge_pos = h->mb_height * 16;
 
-    if (ff_h264_alloc_tables(h) < 0) {
+    if ((ret = ff_h264_alloc_tables(h)) < 0) {
         av_log(avctx, AV_LOG_ERROR, "svq3 memory allocation failed\n");
-        return AVERROR(ENOMEM);
+        goto fail;
     }
 
     return 0;
+fail:
+    svq3_decode_end(avctx);
+    return ret;
 }
 
 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
@@ -1105,10 +1123,11 @@ fail:
 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
                              int *got_frame, AVPacket *avpkt)
 {
-    const uint8_t *buf = avpkt->data;
     SVQ3Context *s     = avctx->priv_data;
     H264Context *h     = &s->h;
     int buf_size       = avpkt->size;
+    int left;
+    uint8_t *buf;
     int ret, m, i;
 
     /* special case for last picture */
@@ -1123,10 +1142,20 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
         return 0;
     }
 
-    init_get_bits(&h->gb, buf, 8 * buf_size);
-
     h->mb_x = h->mb_y = h->mb_xy = 0;
 
+    if (s->watermark_key) {
+        av_fast_padded_malloc(&s->buf, &s->buf_size, buf_size);
+        if (!s->buf)
+            return AVERROR(ENOMEM);
+        memcpy(s->buf, avpkt->data, buf_size);
+        buf = s->buf;
+    } else {
+        buf = avpkt->data;
+    }
+
+    init_get_bits(&h->gb, buf, 8 * buf_size);
+
     if (svq3_decode_slice_header(avctx))
         return -1;
 
@@ -1166,6 +1195,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
     if (h->pict_type != AV_PICTURE_TYPE_I) {
         if (!s->last_pic->f.data[0]) {
             av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
+            av_frame_unref(&s->last_pic->f);
             ret = get_buffer(avctx, s->last_pic);
             if (ret < 0)
                 return ret;
@@ -1178,6 +1208,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
 
         if (h->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f.data[0]) {
             av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
+            av_frame_unref(&s->next_pic->f);
             ret = get_buffer(avctx, s->next_pic);
             if (ret < 0)
                 return ret;
@@ -1267,7 +1298,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
                 return -1;
             }
 
-            if (mb_type != 0)
+            if (mb_type != 0 || h->cbp)
                 ff_h264_hl_decode_mb(h);
 
             if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
@@ -1281,6 +1312,18 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
                            h->low_delay);
     }
 
+    left = buf_size*8 - get_bits_count(&h->gb);
+
+    if (h->mb_y != h->mb_height || h->mb_x != h->mb_width) {
+        av_log(avctx, AV_LOG_INFO, "frame num %d incomplete pic x %d y %d left %d\n", avctx->frame_number, h->mb_y, h->mb_x, left);
+        //av_hex_dump(stderr, buf+buf_size-8, 8);
+    }
+
+    if (left < 0) {
+        av_log(avctx, AV_LOG_ERROR, "frame num %d left %d\n", avctx->frame_number, left);
+        return -1;
+    }
+
     if (h->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
         ret = av_frame_ref(data, &s->cur_pic->f);
     else if (s->last_pic->f.data[0])
@@ -1317,6 +1360,10 @@ static av_cold int svq3_decode_end(AVCodecContext *avctx)
 
     ff_h264_free_context(h);
 
+    av_freep(&s->buf);
+    s->buf_size = 0;
+    av_freep(&h->edge_emu_buffer);
+
     return 0;
 }
 
diff --git a/libavcodec/svq3.h b/libavcodec/svq3.h
index a20e620..5007a8c 100644
--- a/libavcodec/svq3.h
+++ b/libavcodec/svq3.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/synth_filter.c b/libavcodec/synth_filter.c
index d0ace40..d49ffe6 100644
--- a/libavcodec/synth_filter.c
+++ b/libavcodec/synth_filter.c
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/synth_filter.h b/libavcodec/synth_filter.h
index f842c70..b63fd77 100644
--- a/libavcodec/synth_filter.h
+++ b/libavcodec/synth_filter.h
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/tableprint.h b/libavcodec/tableprint.h
index daa89fe..26d063e 100644
--- a/libavcodec/tableprint.h
+++ b/libavcodec/tableprint.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/tak.c b/libavcodec/tak.c
index 867a84b..ed41ca8 100644
--- a/libavcodec/tak.c
+++ b/libavcodec/tak.c
@@ -2,28 +2,49 @@
  * TAK common code
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/bswap.h"
 #include "libavutil/crc.h"
 #include "libavutil/intreadwrite.h"
 #include "tak.h"
 
+static const int64_t tak_channel_layouts[] = {
+    0,
+    AV_CH_FRONT_LEFT,
+    AV_CH_FRONT_RIGHT,
+    AV_CH_FRONT_CENTER,
+    AV_CH_LOW_FREQUENCY,
+    AV_CH_BACK_LEFT,
+    AV_CH_BACK_RIGHT,
+    AV_CH_FRONT_LEFT_OF_CENTER,
+    AV_CH_FRONT_RIGHT_OF_CENTER,
+    AV_CH_BACK_CENTER,
+    AV_CH_SIDE_LEFT,
+    AV_CH_SIDE_RIGHT,
+    AV_CH_TOP_CENTER,
+    AV_CH_TOP_FRONT_LEFT,
+    AV_CH_TOP_FRONT_CENTER,
+    AV_CH_TOP_FRONT_RIGHT,
+    AV_CH_TOP_BACK_LEFT,
+    AV_CH_TOP_BACK_CENTER,
+    AV_CH_TOP_BACK_RIGHT,
+};
+
 static const uint16_t frame_duration_type_quants[] = {
     3, 4, 6, 8, 4096, 8192, 16384, 512, 1024, 2048,
 };
@@ -51,22 +72,6 @@ static int tak_get_nb_samples(int sample_rate, enum TAKFrameSizeType type)
     return nb_samples;
 }
 
-static int crc_init = 0;
-#if CONFIG_SMALL
-#define CRC_TABLE_SIZE 257
-#else
-#define CRC_TABLE_SIZE 1024
-#endif
-static AVCRC crc_24[CRC_TABLE_SIZE];
-
-av_cold void ff_tak_init_crc(void)
-{
-    if (!crc_init) {
-        av_crc_init(crc_24, 0, 24, 0x864CFBU, sizeof(crc_24));
-        crc_init = 1;
-    }
-}
-
 int ff_tak_check_crc(const uint8_t *buf, unsigned int buf_size)
 {
     uint32_t crc, CRC;
@@ -75,8 +80,8 @@ int ff_tak_check_crc(const uint8_t *buf, unsigned int buf_size)
         return AVERROR_INVALIDDATA;
     buf_size -= 3;
 
-    CRC = av_bswap32(AV_RL24(buf + buf_size)) >> 8;
-    crc = av_crc(crc_24, 0xCE04B7U, buf, buf_size);
+    CRC = AV_RB24(buf + buf_size);
+    crc = av_crc(av_crc_get_table(AV_CRC_24_IEEE), 0xCE04B7U, buf, buf_size);
     if (CRC != crc)
         return AVERROR_INVALIDDATA;
 
@@ -108,8 +113,8 @@ void avpriv_tak_parse_streaminfo(GetBitContext *gb, TAKStreamInfo *s)
             for (i = 0; i < s->channels; i++) {
                 int value = get_bits(gb, TAK_FORMAT_CH_LAYOUT_BITS);
 
-                if (value > 0 && value <= 18)
-                    channel_mask |= 1 << (value - 1);
+                if (value < FF_ARRAY_ELEMS(tak_channel_layouts))
+                    channel_mask |= tak_channel_layouts[value];
             }
         }
     }
@@ -144,6 +149,9 @@ int ff_tak_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
         align_get_bits(gb);
     }
 
+    if (ti->flags & TAK_FRAME_FLAG_HAS_METADATA)
+        return AVERROR_INVALIDDATA;
+
     skip_bits(gb, 24);
 
     return 0;
diff --git a/libavcodec/tak.h b/libavcodec/tak.h
index fa91149..e8e2dac 100644
--- a/libavcodec/tak.h
+++ b/libavcodec/tak.h
@@ -2,20 +2,20 @@
  * TAK decoder/demuxer common code
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -99,7 +99,7 @@
 
 enum TAKCodecType {
     TAK_CODEC_MONO_STEREO  = 2,
-    TAK_CODEC_MULTICHANNEL = 4
+    TAK_CODEC_MULTICHANNEL = 4,
 };
 
 enum TAKMetaDataType {
@@ -140,8 +140,6 @@ typedef struct TAKStreamInfo {
     int64_t           samples;
 } TAKStreamInfo;
 
-void ff_tak_init_crc(void);
-
 int ff_tak_check_crc(const uint8_t *buf, unsigned int buf_size);
 
 /**
@@ -162,5 +160,4 @@ void avpriv_tak_parse_streaminfo(GetBitContext *gb, TAKStreamInfo *s);
  */
 int ff_tak_decode_frame_header(AVCodecContext *avctx, GetBitContext *gb,
                                TAKStreamInfo *s, int log_level_offset);
-
 #endif /* AVCODEC_TAK_H */
diff --git a/libavcodec/tak_parser.c b/libavcodec/tak_parser.c
index 295df24..5d8460c 100644
--- a/libavcodec/tak_parser.c
+++ b/libavcodec/tak_parser.c
@@ -2,20 +2,20 @@
  * TAK parser
  * Copyright (c) 2012 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,12 +33,6 @@ typedef struct TAKParseContext {
     int           index;
 } TAKParseContext;
 
-static av_cold int tak_init(AVCodecParserContext *s)
-{
-    ff_tak_init_crc();
-    return 0;
-}
-
 static int tak_parse(AVCodecParserContext *s, AVCodecContext *avctx,
                      const uint8_t **poutbuf, int *poutbuf_size,
                      const uint8_t *buf, int buf_size)
@@ -67,23 +61,22 @@ static int tak_parse(AVCodecParserContext *s, AVCodecContext *avctx,
                                            buf_size);
             const uint8_t *tmp_buf = buf;
 
-            ff_combine_frame(pc, END_NOT_FOUND, &tmp_buf, &tmp_buf_size);
+            if (ff_combine_frame(pc, END_NOT_FOUND, &tmp_buf, &tmp_buf_size) != -1)
+                return AVERROR(ENOMEM);
             consumed += tmp_buf_size;
             buf      += tmp_buf_size;
             buf_size -= tmp_buf_size;
         }
 
-        for (; t->index + needed <= pc->index; t->index++)
-            if (pc->buffer[t->index]     == 0xFF &&
-                pc->buffer[t->index + 1] == 0xA0) {
+        for (; t->index + needed <= pc->index; t->index++) {
+            if (pc->buffer[ t->index     ] == 0xFF &&
+                pc->buffer[ t->index + 1 ] == 0xA0) {
                 TAKStreamInfo ti;
 
                 init_get_bits(&gb, pc->buffer + t->index,
                               8 * (pc->index - t->index));
                 if (!ff_tak_decode_frame_header(avctx, &gb,
-                                                pc->frame_start_found ? &ti
-                                                                      : &t->ti,
-                                                127) &&
+                        pc->frame_start_found ? &ti : &t->ti, 127) &&
                     !ff_tak_check_crc(pc->buffer + t->index,
                                       get_bits_count(&gb) / 8)) {
                     if (!pc->frame_start_found) {
@@ -91,6 +84,7 @@ static int tak_parse(AVCodecParserContext *s, AVCodecContext *avctx,
                         s->duration           = t->ti.last_frame_samples ?
                                                 t->ti.last_frame_samples :
                                                 t->ti.frame_samples;
+                        s->key_frame          = !!(t->ti.flags & TAK_FRAME_FLAG_HAS_INFO);
                     } else {
                         pc->frame_start_found = 0;
                         next                  = t->index - pc->index;
@@ -99,9 +93,10 @@ static int tak_parse(AVCodecParserContext *s, AVCodecContext *avctx,
                     }
                 }
             }
+        }
     }
-
 found:
+
     if (consumed && !buf_size && next == END_NOT_FOUND ||
         ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
         *poutbuf      = NULL;
@@ -122,7 +117,6 @@ found:
 AVCodecParser ff_tak_parser = {
     .codec_ids      = { AV_CODEC_ID_TAK },
     .priv_data_size = sizeof(TAKParseContext),
-    .parser_init    = tak_init,
     .parser_parse   = tak_parse,
     .parser_close   = ff_parse_close,
 };
diff --git a/libavcodec/takdec.c b/libavcodec/takdec.c
index b0e84ea..5810a01 100644
--- a/libavcodec/takdec.c
+++ b/libavcodec/takdec.c
@@ -2,20 +2,20 @@
  * TAK decoder
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,42 +29,47 @@
 #include "libavutil/samplefmt.h"
 #include "tak.h"
 #include "audiodsp.h"
+#include "thread.h"
 #include "avcodec.h"
 #include "internal.h"
 #include "unary.h"
 
-#define MAX_SUBFRAMES     8                         // max number of subframes per channel
+#define MAX_SUBFRAMES     8                         ///< max number of subframes per channel
 #define MAX_PREDICTORS  256
 
 typedef struct MCDParam {
-    int8_t present;                                 // decorrelation parameter availability for this channel
-    int8_t index;                                   // index into array of decorrelation types
+    int8_t present;                                 ///< decorrelation parameter availability for this channel
+    int8_t index;                                   ///< index into array of decorrelation types
     int8_t chan1;
     int8_t chan2;
 } MCDParam;
 
 typedef struct TAKDecContext {
-    AVCodecContext *avctx;                          // parent AVCodecContext
+    AVCodecContext *avctx;                          ///< parent AVCodecContext
     AudioDSPContext adsp;
     TAKStreamInfo   ti;
-    GetBitContext   gb;                             // bitstream reader initialized to start at the current frame
+    GetBitContext   gb;                             ///< bitstream reader initialized to start at the current frame
 
     int             uval;
-    int             nb_samples;                     // number of samples in the current frame
+    int             nb_samples;                     ///< number of samples in the current frame
     uint8_t        *decode_buffer;
     unsigned int    decode_buffer_size;
-    int32_t        *decoded[TAK_MAX_CHANNELS];      // decoded samples for each channel
+    int32_t        *decoded[TAK_MAX_CHANNELS];      ///< decoded samples for each channel
 
     int8_t          lpc_mode[TAK_MAX_CHANNELS];
-    int8_t          sample_shift[TAK_MAX_CHANNELS]; // shift applied to every sample in the channel
+    int8_t          sample_shift[TAK_MAX_CHANNELS]; ///< shift applied to every sample in the channel
+    int16_t         predictors[MAX_PREDICTORS];
+    int             nb_subframes;                   ///< number of subframes in the current frame
+    int16_t         subframe_len[MAX_SUBFRAMES];    ///< subframe length in samples
     int             subframe_scale;
 
-    int8_t          dmode;                          // channel decorrelation type in the current frame
+    int8_t          dmode;                          ///< channel decorrelation type in the current frame
 
-    MCDParam        mcdparams[TAK_MAX_CHANNELS];    // multichannel decorrelation parameters
+    MCDParam        mcdparams[TAK_MAX_CHANNELS];    ///< multichannel decorrelation parameters
 
-    int16_t        *residues;
-    unsigned int    residues_buf_size;
+    int8_t          coding_mode[128];
+    DECLARE_ALIGNED(16, int16_t, filter)[MAX_PREDICTORS];
+    DECLARE_ALIGNED(16, int16_t, residues)[544];
 } TAKDecContext;
 
 static const int8_t mc_dmodes[] = { 1, 3, 4, 6, };
@@ -132,14 +137,9 @@ static const struct CParam {
     { 0x1A, 0x1800000, 0x1800000, 0x6800000, 0xC000000 },
 };
 
-static av_cold void tak_init_static_data(AVCodec *codec)
-{
-    ff_tak_init_crc();
-}
-
 static int set_bps_params(AVCodecContext *avctx)
 {
-    switch (avctx->bits_per_coded_sample) {
+    switch (avctx->bits_per_raw_sample) {
     case 8:
         avctx->sample_fmt = AV_SAMPLE_FMT_U8P;
         break;
@@ -150,11 +150,10 @@ static int set_bps_params(AVCodecContext *avctx)
         avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
         break;
     default:
-        av_log(avctx, AV_LOG_ERROR, "unsupported bits per sample: %d\n",
-               avctx->bits_per_coded_sample);
+        av_log(avctx, AV_LOG_ERROR, "invalid/unsupported bits per sample: %d\n",
+               avctx->bits_per_raw_sample);
         return AVERROR_INVALIDDATA;
     }
-    avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
 
     return 0;
 }
@@ -175,6 +174,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx)
     ff_audiodsp_init(&s->adsp);
 
     s->avctx = avctx;
+    avctx->bits_per_raw_sample = avctx->bits_per_coded_sample;
 
     set_sample_rate_params(avctx);
 
@@ -236,10 +236,10 @@ static void decode_lpc(int32_t *coeffs, int mode, int length)
     }
 }
 
-static int decode_segment(GetBitContext *gb, int mode, int32_t *decoded,
-                          int len)
+static int decode_segment(TAKDecContext *s, int8_t mode, int32_t *decoded, int len)
 {
     struct CParam code;
+    GetBitContext *gb = &s->gb;
     int i;
 
     if (!mode) {
@@ -290,7 +290,6 @@ static int decode_residues(TAKDecContext *s, int32_t *decoded, int length)
 
     if (get_bits1(gb)) {
         int wlength, rval;
-        int coding_mode[128];
 
         wlength = length / s->uval;
 
@@ -304,7 +303,7 @@ static int decode_residues(TAKDecContext *s, int32_t *decoded, int length)
         if (wlength <= 1 || wlength > 128)
             return AVERROR_INVALIDDATA;
 
-        coding_mode[0] = mode = get_bits(gb, 6);
+        s->coding_mode[0] = mode = get_bits(gb, 6);
 
         for (i = 1; i < wlength; i++) {
             int c = get_unary(gb, 1, 6);
@@ -328,14 +327,14 @@ static int decode_residues(TAKDecContext *s, int32_t *decoded, int length)
                 mode--;
                 break;
             }
-            coding_mode[i] = mode;
+            s->coding_mode[i] = mode;
         }
 
         i = 0;
         while (i < wlength) {
             int len = 0;
 
-            mode = coding_mode[i];
+            mode = s->coding_mode[i];
             do {
                 if (i >= wlength - 1)
                     len += rval;
@@ -345,15 +344,15 @@ static int decode_residues(TAKDecContext *s, int32_t *decoded, int length)
 
                 if (i == wlength)
                     break;
-            } while (coding_mode[i] == mode);
+            } while (s->coding_mode[i] == mode);
 
-            if ((ret = decode_segment(gb, mode, decoded, len)) < 0)
+            if ((ret = decode_segment(s, mode, decoded, len)) < 0)
                 return ret;
             decoded += len;
         }
     } else {
         mode = get_bits(gb, 6);
-        if ((ret = decode_segment(gb, mode, decoded, length)) < 0)
+        if ((ret = decode_segment(s, mode, decoded, length)) < 0)
             return ret;
     }
 
@@ -368,62 +367,13 @@ static int get_bits_esc4(GetBitContext *gb)
         return 0;
 }
 
-static void decode_filter_coeffs(TAKDecContext *s, int filter_order, int size,
-                                 int filter_quant, int16_t *filter)
-{
-    GetBitContext *gb = &s->gb;
-    int i, j, a, b;
-    int filter_tmp[MAX_PREDICTORS];
-    int16_t predictors[MAX_PREDICTORS];
-
-    predictors[0] = get_sbits(gb, 10);
-    predictors[1] = get_sbits(gb, 10);
-    predictors[2] = get_sbits(gb, size) << (10 - size);
-    predictors[3] = get_sbits(gb, size) << (10 - size);
-    if (filter_order > 4) {
-        int av_uninit(code_size);
-        int code_size_base = size - get_bits1(gb);
-
-        for (i = 4; i < filter_order; i++) {
-            if (!(i & 3))
-                code_size = code_size_base - get_bits(gb, 2);
-            predictors[i] = get_sbits(gb, code_size) << (10 - size);
-        }
-    }
-
-    filter_tmp[0] = predictors[0] << 6;
-    for (i = 1; i < filter_order; i++) {
-        int *p1 = &filter_tmp[0];
-        int *p2 = &filter_tmp[i - 1];
-
-        for (j = 0; j < (i + 1) / 2; j++) {
-            int tmp = *p1 + (predictors[i] * *p2 + 256 >> 9);
-            *p2     = *p2 + (predictors[i] * *p1 + 256 >> 9);
-            *p1     = tmp;
-            p1++;
-            p2--;
-        }
-
-        filter_tmp[i] = predictors[i] << 6;
-    }
-
-    a = 1 << (32 - (15 - filter_quant));
-    b = 1 << ((15 - filter_quant) - 1);
-    for (i = 0, j = filter_order - 1; i < filter_order / 2; i++, j--) {
-        filter[j] = a - ((filter_tmp[i] + b) >> (15 - filter_quant));
-        filter[i] = a - ((filter_tmp[j] + b) >> (15 - filter_quant));
-    }
-}
-
 static int decode_subframe(TAKDecContext *s, int32_t *decoded,
                            int subframe_size, int prev_subframe_size)
 {
-    LOCAL_ALIGNED_16(int16_t, filter, [MAX_PREDICTORS]);
     GetBitContext *gb = &s->gb;
-    int i, ret;
+    int x, y, i, j, ret = 0;
     int dshift, size, filter_quant, filter_order;
-
-    memset(filter, 0, MAX_PREDICTORS * sizeof(*filter));
+    int tfilter[MAX_PREDICTORS];
 
     if (!get_bits1(gb))
         return decode_residues(s, decoded, subframe_size);
@@ -466,30 +416,74 @@ static int decode_subframe(TAKDecContext *s, int32_t *decoded,
             return AVERROR_INVALIDDATA;
     }
 
-    decode_filter_coeffs(s, filter_order, size, filter_quant, filter);
+    s->predictors[0] = get_sbits(gb, 10);
+    s->predictors[1] = get_sbits(gb, 10);
+    s->predictors[2] = get_sbits(gb, size) << (10 - size);
+    s->predictors[3] = get_sbits(gb, size) << (10 - size);
+    if (filter_order > 4) {
+        int tmp = size - get_bits1(gb);
+
+        for (i = 4; i < filter_order; i++) {
+            if (!(i & 3))
+                x = tmp - get_bits(gb, 2);
+            s->predictors[i] = get_sbits(gb, x) << (10 - size);
+        }
+    }
+
+    tfilter[0] = s->predictors[0] << 6;
+    for (i = 1; i < filter_order; i++) {
+        int32_t *p1 = &tfilter[0];
+        int32_t *p2 = &tfilter[i - 1];
+
+        for (j = 0; j < (i + 1) / 2; j++) {
+            x     = *p1 + (s->predictors[i] * *p2 + 256 >> 9);
+            *p2  += s->predictors[i] * *p1 + 256 >> 9;
+            *p1++ = x;
+            p2--;
+        }
+
+        tfilter[i] = s->predictors[i] << 6;
+    }
+
+    x = 1 << (32 - (15 - filter_quant));
+    y = 1 << ((15 - filter_quant) - 1);
+    for (i = 0, j = filter_order - 1; i < filter_order / 2; i++, j--) {
+        s->filter[j] = x - ((tfilter[i] + y) >> (15 - filter_quant));
+        s->filter[i] = x - ((tfilter[j] + y) >> (15 - filter_quant));
+    }
 
     if ((ret = decode_residues(s, &decoded[filter_order],
                                subframe_size - filter_order)) < 0)
         return ret;
 
-    av_fast_malloc(&s->residues, &s->residues_buf_size,
-                   FFALIGN(subframe_size + 16, 16) * sizeof(*s->residues));
-    if (!s->residues)
-        return AVERROR(ENOMEM);
-    memset(s->residues, 0, s->residues_buf_size);
-
     for (i = 0; i < filter_order; i++)
         s->residues[i] = *decoded++ >> dshift;
 
-    for (i = 0; i < subframe_size - filter_order; i++) {
-        int v = 1 << (filter_quant - 1);
-
-        v += s->adsp.scalarproduct_int16(&s->residues[i], filter,
-                                         FFALIGN(filter_order, 16));
+    y    = FF_ARRAY_ELEMS(s->residues) - filter_order;
+    x    = subframe_size - filter_order;
+    while (x > 0) {
+        int tmp = FFMIN(y, x);
+
+        for (i = 0; i < tmp; i++) {
+            int v = 1 << (filter_quant - 1);
+
+            if (filter_order & -16)
+                v += s->adsp.scalarproduct_int16(&s->residues[i], s->filter,
+                                                 filter_order & -16);
+            for (j = filter_order & -16; j < filter_order; j += 4) {
+                v += s->residues[i + j + 3] * s->filter[j + 3] +
+                     s->residues[i + j + 2] * s->filter[j + 2] +
+                     s->residues[i + j + 1] * s->filter[j + 1] +
+                     s->residues[i + j    ] * s->filter[j    ];
+            }
+            v = (av_clip(v >> filter_quant, -8192, 8191) << dshift) - *decoded;
+            *decoded++ = v;
+            s->residues[filter_order + i] = v >> dshift;
+        }
 
-        v = (av_clip(v >> filter_quant, -8192, 8191) << dshift) - *decoded;
-        *decoded++ = v;
-        s->residues[filter_order + i] = v >> dshift;
+        x -= tmp;
+        if (x > 0)
+            memcpy(s->residues, &s->residues[y], 2 * filter_order);
     }
 
     emms_c();
@@ -503,50 +497,42 @@ static int decode_channel(TAKDecContext *s, int chan)
     GetBitContext *gb     = &s->gb;
     int32_t *decoded      = s->decoded[chan];
     int left              = s->nb_samples - 1;
-    int i, prev, ret, nb_subframes;
-    int subframe_len[MAX_SUBFRAMES];
+    int i = 0, ret, prev = 0;
 
     s->sample_shift[chan] = get_bits_esc4(gb);
-    if (s->sample_shift[chan] >= avctx->bits_per_coded_sample)
+    if (s->sample_shift[chan] >= avctx->bits_per_raw_sample)
         return AVERROR_INVALIDDATA;
 
-    /* NOTE: TAK 2.2.0 appears to set the sample value to 0 if
-     *       bits_per_coded_sample - sample_shift is 1, but this produces
-     *       non-bit-exact output. Reading the 1 bit using get_sbits() instead
-     *       of skipping it produces bit-exact output. This has been reported
-     *       to the TAK author. */
-    *decoded++        = get_sbits(gb,
-                                  avctx->bits_per_coded_sample -
-                                  s->sample_shift[chan]);
+    *decoded++ = get_sbits(gb, avctx->bits_per_raw_sample - s->sample_shift[chan]);
     s->lpc_mode[chan] = get_bits(gb, 2);
-    nb_subframes      = get_bits(gb, 3) + 1;
+    s->nb_subframes   = get_bits(gb, 3) + 1;
 
-    i = 0;
-    if (nb_subframes > 1) {
-        if (get_bits_left(gb) < (nb_subframes - 1) * 6)
+    if (s->nb_subframes > 1) {
+        if (get_bits_left(gb) < (s->nb_subframes - 1) * 6)
             return AVERROR_INVALIDDATA;
 
-        prev = 0;
-        for (; i < nb_subframes - 1; i++) {
-            int subframe_end = get_bits(gb, 6) * s->subframe_scale;
-            if (subframe_end <= prev)
+        for (; i < s->nb_subframes - 1; i++) {
+            int v = get_bits(gb, 6);
+
+            s->subframe_len[i] = (v - prev) * s->subframe_scale;
+            if (s->subframe_len[i] <= 0)
                 return AVERROR_INVALIDDATA;
-            subframe_len[i] = subframe_end - prev;
-            left           -= subframe_len[i];
-            prev            = subframe_end;
+
+            left -= s->subframe_len[i];
+            prev  = v;
         }
 
         if (left <= 0)
             return AVERROR_INVALIDDATA;
     }
-    subframe_len[i] = left;
+    s->subframe_len[i] = left;
 
     prev = 0;
-    for (i = 0; i < nb_subframes; i++) {
-        if ((ret = decode_subframe(s, decoded, subframe_len[i], prev)) < 0)
+    for (i = 0; i < s->nb_subframes; i++) {
+        if ((ret = decode_subframe(s, decoded, s->subframe_len[i], prev)) < 0)
             return ret;
-        decoded += subframe_len[i];
-        prev     = subframe_len[i];
+        decoded += s->subframe_len[i];
+        prev     = s->subframe_len[i];
     }
 
     return 0;
@@ -599,11 +585,8 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
     case 6:
         FFSWAP(int32_t*, p1, p2);
     case 7: {
-        LOCAL_ALIGNED_16(int16_t, filter, [MAX_PREDICTORS]);
         int length2, order_half, filter_order, dval1, dval2;
-        int av_uninit(code_size);
-
-        memset(filter, 0, MAX_PREDICTORS * sizeof(*filter));
+        int tmp, x, code_size;
 
         if (length < 256)
             return AVERROR_INVALIDDATA;
@@ -616,7 +599,7 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
         for (i = 0; i < filter_order; i++) {
             if (!(i & 3))
                 code_size = 14 - get_bits(gb, 3);
-            filter[i] = get_sbits(gb, code_size);
+            s->filter[i] = get_sbits(gb, code_size);
         }
 
         order_half = filter_order / 2;
@@ -640,24 +623,40 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length)
             }
         }
 
-        av_fast_malloc(&s->residues, &s->residues_buf_size,
-                       FFALIGN(length + 16, 16) * sizeof(*s->residues));
-        if (!s->residues)
-            return AVERROR(ENOMEM);
-        memset(s->residues, 0, s->residues_buf_size);
 
-        for (i = 0; i < length; i++)
-            s->residues[i] = p2[i] >> dshift;
+        for (i = 0; i < filter_order; i++)
+            s->residues[i] = *p2++ >> dshift;
 
         p1 += order_half;
+        x = FF_ARRAY_ELEMS(s->residues) - filter_order;
+        for (; length2 > 0; length2 -= tmp) {
+            tmp = FFMIN(length2, x);
+
+            for (i = 0; i < tmp; i++)
+                s->residues[filter_order + i] = *p2++ >> dshift;
+
+            for (i = 0; i < tmp; i++) {
+                int v = 1 << 9;
+
+                if (filter_order == 16) {
+                    v += s->adsp.scalarproduct_int16(&s->residues[i], s->filter,
+                                                     filter_order);
+                } else {
+                    v += s->residues[i + 7] * s->filter[7] +
+                         s->residues[i + 6] * s->filter[6] +
+                         s->residues[i + 5] * s->filter[5] +
+                         s->residues[i + 4] * s->filter[4] +
+                         s->residues[i + 3] * s->filter[3] +
+                         s->residues[i + 2] * s->filter[2] +
+                         s->residues[i + 1] * s->filter[1] +
+                         s->residues[i    ] * s->filter[0];
+                }
 
-        for (i = 0; i < length2; i++) {
-            int v = 1 << 9;
-
-            v += s->adsp.scalarproduct_int16(&s->residues[i], filter,
-                                             FFALIGN(filter_order, 16));
+                v = (av_clip(v >> 10, -8192, 8191) << dshift) - *p1;
+                *p1++ = v;
+            }
 
-            p1[i] = (av_clip(v >> 10, -8192, 8191) << dshift) - p1[i];
+            memcpy(s->residues, &s->residues[tmp], 2 * filter_order);
         }
 
         emms_c();
@@ -673,24 +672,21 @@ static int tak_decode_frame(AVCodecContext *avctx, void *data,
 {
     TAKDecContext *s  = avctx->priv_data;
     AVFrame *frame    = data;
+    ThreadFrame tframe = { .f = data };
     GetBitContext *gb = &s->gb;
     int chan, i, ret, hsize;
 
     if (pkt->size < TAK_MIN_FRAME_HEADER_BYTES)
         return AVERROR_INVALIDDATA;
 
-    init_get_bits(gb, pkt->data, pkt->size * 8);
+    if ((ret = init_get_bits8(gb, pkt->data, pkt->size)) < 0)
+        return ret;
 
     if ((ret = ff_tak_decode_frame_header(avctx, gb, &s->ti, 0)) < 0)
         return ret;
 
-    if (s->ti.flags & TAK_FRAME_FLAG_HAS_METADATA) {
-        avpriv_request_sample(avctx, "Frame metadata");
-        return AVERROR_PATCHWELCOME;
-    }
-
     hsize = get_bits_count(gb) / 8;
-    if (avctx->err_recognition & AV_EF_CRCCHECK) {
+    if (avctx->err_recognition & (AV_EF_CRCCHECK|AV_EF_COMPLIANT)) {
         if (ff_tak_check_crc(pkt->data, hsize)) {
             av_log(avctx, AV_LOG_ERROR, "CRC error\n");
             if (avctx->err_recognition & AV_EF_EXPLODE)
@@ -724,11 +720,9 @@ static int tak_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
-    if (s->ti.bps != avctx->bits_per_coded_sample) {
-        avctx->bits_per_coded_sample = s->ti.bps;
-        if ((ret = set_bps_params(avctx)) < 0)
-            return ret;
-    }
+    avctx->bits_per_raw_sample = s->ti.bps;
+    if ((ret = set_bps_params(avctx)) < 0)
+        return ret;
     if (s->ti.sample_rate != avctx->sample_rate) {
         avctx->sample_rate = s->ti.sample_rate;
         set_sample_rate_params(avctx);
@@ -741,10 +735,11 @@ static int tak_decode_frame(AVCodecContext *avctx, void *data,
                                              : s->ti.frame_samples;
 
     frame->nb_samples = s->nb_samples;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+    if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
         return ret;
+    ff_thread_finish_setup(avctx);
 
-    if (avctx->bits_per_coded_sample <= 16) {
+    if (avctx->bits_per_raw_sample <= 16) {
         int buf_size = av_samples_get_buffer_size(NULL, avctx->channels,
                                                   s->nb_samples,
                                                   AV_SAMPLE_FMT_S32P, 0);
@@ -765,7 +760,7 @@ static int tak_decode_frame(AVCodecContext *avctx, void *data,
         for (chan = 0; chan < avctx->channels; chan++) {
             int32_t *decoded = s->decoded[chan];
             for (i = 0; i < s->nb_samples; i++)
-                decoded[i] = get_sbits(gb, avctx->bits_per_coded_sample);
+                decoded[i] = get_sbits(gb, avctx->bits_per_raw_sample);
         }
     } else {
         if (s->ti.codec == TAK_CODEC_MONO_STEREO) {
@@ -774,9 +769,9 @@ static int tak_decode_frame(AVCodecContext *avctx, void *data,
                     return ret;
 
             if (avctx->channels == 2) {
-                if (get_bits1(gb)) {
-                    // some kind of subframe length, but it seems to be unused
-                    skip_bits(gb, 6);
+                s->nb_subframes = get_bits(gb, 1) + 1;
+                if (s->nb_subframes > 1) {
+                    s->subframe_len[1] = get_bits(gb, 6);
                 }
 
                 s->dmode = get_bits(gb, 3);
@@ -864,7 +859,7 @@ static int tak_decode_frame(AVCodecContext *avctx, void *data,
     else if (get_bits_left(gb) > 0)
         av_log(avctx, AV_LOG_DEBUG, "underread\n");
 
-    if (avctx->err_recognition & AV_EF_CRCCHECK) {
+    if (avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_COMPLIANT)) {
         if (ff_tak_check_crc(pkt->data + hsize,
                              get_bits_count(gb) / 8 - hsize)) {
             av_log(avctx, AV_LOG_ERROR, "CRC error\n");
@@ -905,12 +900,30 @@ static int tak_decode_frame(AVCodecContext *avctx, void *data,
     return pkt->size;
 }
 
+static int init_thread_copy(AVCodecContext *avctx)
+{
+    TAKDecContext *s = avctx->priv_data;
+    s->avctx = avctx;
+    return 0;
+}
+
+static int update_thread_context(AVCodecContext *dst,
+                                 const AVCodecContext *src)
+{
+    TAKDecContext *tsrc = src->priv_data;
+    TAKDecContext *tdst = dst->priv_data;
+
+    if (dst == src)
+        return 0;
+    memcpy(&tdst->ti, &tsrc->ti, sizeof(TAKStreamInfo));
+    return 0;
+}
+
 static av_cold int tak_decode_close(AVCodecContext *avctx)
 {
     TAKDecContext *s = avctx->priv_data;
 
     av_freep(&s->decode_buffer);
-    av_freep(&s->residues);
 
     return 0;
 }
@@ -922,10 +935,11 @@ AVCodec ff_tak_decoder = {
     .id               = AV_CODEC_ID_TAK,
     .priv_data_size   = sizeof(TAKDecContext),
     .init             = tak_decode_init,
-    .init_static_data = tak_init_static_data,
     .close            = tak_decode_close,
     .decode           = tak_decode_frame,
-    .capabilities     = CODEC_CAP_DR1,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(update_thread_context),
+    .capabilities     = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
     .sample_fmts      = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
                                                         AV_SAMPLE_FMT_S16P,
                                                         AV_SAMPLE_FMT_S32P,
diff --git a/libavcodec/targa.c b/libavcodec/targa.c
index f077c03..b0c9b55 100644
--- a/libavcodec/targa.c
+++ b/libavcodec/targa.c
@@ -2,20 +2,20 @@
  * Targa (.tga) image decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,22 +28,37 @@
 
 typedef struct TargaContext {
     GetByteContext gb;
-
-    int color_type;
-    int compression_type;
 } TargaContext;
 
+static uint8_t *advance_line(uint8_t *start, uint8_t *line,
+                             int stride, int *y, int h, int interleave)
+{
+    *y += interleave;
+
+    if (*y < h) {
+        return line + interleave * stride;
+    } else {
+        *y = (*y + 1) & (interleave - 1);
+        if (*y && *y < h) {
+            return start + *y * stride;
+        } else {
+            return NULL;
+        }
+    }
+}
+
 static int targa_decode_rle(AVCodecContext *avctx, TargaContext *s,
-                            uint8_t *dst, int w, int h, int stride, int bpp)
+                            uint8_t *start, int w, int h, int stride,
+                            int bpp, int interleave)
 {
     int x, y;
     int depth = (bpp + 1) >> 3;
     int type, count;
-    int diff;
+    uint8_t *line = start;
+    uint8_t *dst  = line;
 
-    diff = stride - w * depth;
-    x = y = 0;
-    while (y < h) {
+    x = y = count = 0;
+    while (dst) {
         if (bytestream2_get_bytes_left(&s->gb) <= 0) {
             av_log(avctx, AV_LOG_ERROR,
                    "Ran ouf of data before end-of-image\n");
@@ -52,12 +67,6 @@ static int targa_decode_rle(AVCodecContext *avctx, TargaContext *s,
         type  = bytestream2_get_byteu(&s->gb);
         count = (type & 0x7F) + 1;
         type &= 0x80;
-        if (x + count > w && x + count + 1 > (h - y) * w) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Packet went out of bounds: position (%i,%i) size %i\n",
-                   x, y, count);
-            return AVERROR_INVALIDDATA;
-        }
         if (!type) {
             do {
                 int n  = FFMIN(count, w - x);
@@ -67,10 +76,9 @@ static int targa_decode_rle(AVCodecContext *avctx, TargaContext *s,
                 x     += n;
                 if (x == w) {
                     x    = 0;
-                    y++;
-                    dst += diff;
+                    dst = line = advance_line(start, line, stride, &y, h, interleave);
                 }
-            } while (count > 0);
+            } while (dst && count > 0);
         } else {
             uint8_t tmp[4];
             bytestream2_get_buffer(&s->gb, tmp, depth);
@@ -84,12 +92,17 @@ static int targa_decode_rle(AVCodecContext *avctx, TargaContext *s,
                 } while (--n);
                 if (x == w) {
                     x    = 0;
-                    y++;
-                    dst += diff;
+                    dst = line = advance_line(start, line, stride, &y, h, interleave);
                 }
-            } while (count > 0);
+            } while (dst && count > 0);
         }
     }
+
+    if (count) {
+        av_log(avctx, AV_LOG_ERROR, "Packet went out of bounds\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     return 0;
 }
 
@@ -101,14 +114,15 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame * const p = data;
     uint8_t *dst;
     int stride;
-    int idlen, compr, y, w, h, bpp, flags, ret;
+    int idlen, pal, compr, y, w, h, bpp, flags, ret;
     int first_clr, colors, csize;
+    int interleave;
 
     bytestream2_init(&s->gb, avpkt->data, avpkt->size);
 
     /* parse image header */
     idlen     = bytestream2_get_byte(&s->gb);
-    bytestream2_skip(&s->gb, 1); /* pal */
+    pal       = bytestream2_get_byte(&s->gb);
     compr     = bytestream2_get_byte(&s->gb);
     first_clr = bytestream2_get_le16(&s->gb);
     colors    = bytestream2_get_le16(&s->gb);
@@ -117,17 +131,29 @@ static int decode_frame(AVCodecContext *avctx,
     w         = bytestream2_get_le16(&s->gb);
     h         = bytestream2_get_le16(&s->gb);
     bpp       = bytestream2_get_byte(&s->gb);
+
+    if (bytestream2_get_bytes_left(&s->gb) <= idlen) {
+        av_log(avctx, AV_LOG_ERROR,
+                "Not enough data to read header\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     flags     = bytestream2_get_byte(&s->gb);
+
+    if (!pal && (first_clr || colors || csize)) {
+        av_log(avctx, AV_LOG_WARNING, "File without colormap has colormap information set.\n");
+        // specification says we should ignore those value in this case
+        first_clr = colors = csize = 0;
+    }
+
     // skip identifier if any
     bytestream2_skip(&s->gb, idlen);
 
-    switch(bpp){
+    switch (bpp) {
     case 8:
         avctx->pix_fmt = ((compr & (~TGA_RLE)) == TGA_BW) ? AV_PIX_FMT_GRAY8 : AV_PIX_FMT_PAL8;
         break;
     case 15:
-        avctx->pix_fmt = AV_PIX_FMT_RGB555LE;
-        break;
     case 16:
         avctx->pix_fmt = AV_PIX_FMT_RGB555LE;
         break;
@@ -142,28 +168,34 @@ static int decode_frame(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
     }
 
+    if (colors && (colors + first_clr) > 256) {
+        av_log(avctx, AV_LOG_ERROR, "Incorrect palette: %i colors with offset %i\n", colors, first_clr);
+        return AVERROR_INVALIDDATA;
+    }
+
     if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
         return ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0){
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
-    if(flags & 0x20){
+    p->pict_type = AV_PICTURE_TYPE_I;
+
+    if (flags & TGA_TOPTOBOTTOM) {
         dst = p->data[0];
         stride = p->linesize[0];
-    }else{ //image is upside-down
+    } else { //image is upside-down
         dst = p->data[0] + p->linesize[0] * (h - 1);
         stride = -p->linesize[0];
     }
 
-    if(colors){
+    interleave = flags & TGA_INTERLEAVE2 ? 2 :
+                 flags & TGA_INTERLEAVE4 ? 4 : 1;
+
+    if (colors) {
         int pal_size, pal_sample_size;
-        if((colors + first_clr) > 256){
-            av_log(avctx, AV_LOG_ERROR, "Incorrect palette: %i colors with offset %i\n", colors, first_clr);
-            return AVERROR_INVALIDDATA;
-        }
+
         switch (csize) {
+        case 32: pal_sample_size = 4; break;
         case 24: pal_sample_size = 3; break;
         case 16:
         case 15: pal_sample_size = 2; break;
@@ -172,9 +204,9 @@ static int decode_frame(AVCodecContext *avctx,
             return AVERROR_INVALIDDATA;
         }
         pal_size = colors * pal_sample_size;
-        if(avctx->pix_fmt != AV_PIX_FMT_PAL8)//should not occur but skip palette anyway
+        if (avctx->pix_fmt != AV_PIX_FMT_PAL8) //should not occur but skip palette anyway
             bytestream2_skip(&s->gb, pal_size);
-        else{
+        else {
             int t;
             uint32_t *pal = ((uint32_t *)p->data[1]) + first_clr;
 
@@ -184,10 +216,14 @@ static int decode_frame(AVCodecContext *avctx,
                 return AVERROR_INVALIDDATA;
             }
             switch (pal_sample_size) {
+            case 4:
+                for (t = 0; t < colors; t++)
+                    *pal++ = bytestream2_get_le32u(&s->gb);
+                break;
             case 3:
                 /* RGB24 */
                 for (t = 0; t < colors; t++)
-                    *pal++ = bytestream2_get_le24u(&s->gb);
+                    *pal++ = (0xffU<<24) | bytestream2_get_le24u(&s->gb);
                 break;
             case 2:
                 /* RGB555 */
@@ -198,30 +234,59 @@ static int decode_frame(AVCodecContext *avctx,
                         ((v & 0x001F) <<  3);
                     /* left bit replication */
                     v |= (v & 0xE0E0E0U) >> 5;
-                    *pal++ = v;
+                    *pal++ = (0xffU<<24) | v;
                 }
                 break;
             }
             p->palette_has_changed = 1;
         }
     }
+
     if ((compr & (~TGA_RLE)) == TGA_NODATA) {
         memset(p->data[0], 0, p->linesize[0] * h);
     } else {
-        if(compr & TGA_RLE){
-            int res = targa_decode_rle(avctx, s, dst, w, h, stride, bpp);
+        if (compr & TGA_RLE) {
+            int res = targa_decode_rle(avctx, s, dst, w, h, stride, bpp, interleave);
             if (res < 0)
                 return res;
         } else {
             size_t img_size = w * ((bpp + 1) >> 3);
+            uint8_t *line;
             if (bytestream2_get_bytes_left(&s->gb) < img_size * h) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Not enough data available for image\n");
                 return AVERROR_INVALIDDATA;
             }
-            for (y = 0; y < h; y++) {
-                bytestream2_get_bufferu(&s->gb, dst, img_size);
-                dst += stride;
+
+            line = dst;
+            y = 0;
+            do {
+                bytestream2_get_buffer(&s->gb, line, img_size);
+                line = advance_line(dst, line, stride, &y, h, interleave);
+            } while (line);
+        }
+    }
+
+    if (flags & TGA_RIGHTTOLEFT) { // right-to-left, needs horizontal flip
+        int x;
+        for (y = 0; y < h; y++) {
+            void *line = &p->data[0][y * p->linesize[0]];
+            for (x = 0; x < w >> 1; x++) {
+                switch (bpp) {
+                case 32:
+                    FFSWAP(uint32_t, ((uint32_t *)line)[x], ((uint32_t *)line)[w - x - 1]);
+                    break;
+                case 24:
+                    FFSWAP(uint8_t, ((uint8_t *)line)[3 * x    ], ((uint8_t *)line)[3 * w - 3 * x - 3]);
+                    FFSWAP(uint8_t, ((uint8_t *)line)[3 * x + 1], ((uint8_t *)line)[3 * w - 3 * x - 2]);
+                    FFSWAP(uint8_t, ((uint8_t *)line)[3 * x + 2], ((uint8_t *)line)[3 * w - 3 * x - 1]);
+                    break;
+                case 16:
+                    FFSWAP(uint16_t, ((uint16_t *)line)[x], ((uint16_t *)line)[w - x - 1]);
+                    break;
+                case 8:
+                    FFSWAP(uint8_t, ((uint8_t *)line)[x], ((uint8_t *)line)[w - x - 1]);
+                }
             }
         }
     }
diff --git a/libavcodec/targa.h b/libavcodec/targa.h
index f4ef553..c2f5224 100644
--- a/libavcodec/targa.h
+++ b/libavcodec/targa.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,4 +38,11 @@ enum TargaCompr {
     TGA_RLE    = 8, // flag pointing that data is RLE-coded
 };
 
+enum TargaFlags {
+    TGA_RIGHTTOLEFT = 0x10, // right-to-left (flipped horizontally)
+    TGA_TOPTOBOTTOM = 0x20, // top-to-bottom (NOT flipped vertically)
+    TGA_INTERLEAVE2 = 0x40, // 2-way interleave, odd then even lines
+    TGA_INTERLEAVE4 = 0x80, // 4-way interleave
+};
+
 #endif /* AVCODEC_TARGA_H */
diff --git a/libavcodec/targa_y216dec.c b/libavcodec/targa_y216dec.c
new file mode 100644
index 0000000..5f4eeaa
--- /dev/null
+++ b/libavcodec/targa_y216dec.c
@@ -0,0 +1,83 @@
+/*
+ * Pinnacle TARGA CineWave YUV16 decoder
+ * Copyright (c) 2012 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int y216_decode_init(AVCodecContext *avctx)
+{
+    avctx->pix_fmt             = AV_PIX_FMT_YUV422P16;
+    avctx->bits_per_raw_sample = 14;
+
+    return 0;
+}
+
+static int y216_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame, AVPacket *avpkt)
+{
+    AVFrame *pic = data;
+    const uint16_t *src = (uint16_t *)avpkt->data;
+    uint16_t *y, *u, *v, aligned_width = FFALIGN(avctx->width, 4);
+    int i, j, ret;
+
+    if (avpkt->size < 4 * avctx->height * aligned_width) {
+        av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
+
+    pic->key_frame = 1;
+    pic->pict_type = AV_PICTURE_TYPE_I;
+
+    y = (uint16_t *)pic->data[0];
+    u = (uint16_t *)pic->data[1];
+    v = (uint16_t *)pic->data[2];
+
+    for (i = 0; i < avctx->height; i++) {
+        for (j = 0; j < avctx->width >> 1; j++) {
+            u[    j    ] = src[4 * j    ] << 2 | src[4 * j    ] >> 14;
+            y[2 * j    ] = src[4 * j + 1] << 2 | src[4 * j + 1] >> 14;
+            v[    j    ] = src[4 * j + 2] << 2 | src[4 * j + 2] >> 14;
+            y[2 * j + 1] = src[4 * j + 3] << 2 | src[4 * j + 3] >> 14;
+        }
+
+        y += pic->linesize[0] >> 1;
+        u += pic->linesize[1] >> 1;
+        v += pic->linesize[2] >> 1;
+        src += aligned_width << 1;
+    }
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_targa_y216_decoder = {
+    .name         = "targa_y216",
+    .long_name    = NULL_IF_CONFIG_SMALL("Pinnacle TARGA CineWave YUV16"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_TARGA_Y216,
+    .init         = y216_decode_init,
+    .decode       = y216_decode_frame,
+    .capabilities = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/targaenc.c b/libavcodec/targaenc.c
index 7679029..d4483ec 100644
--- a/libavcodec/targaenc.c
+++ b/libavcodec/targaenc.c
@@ -2,20 +2,20 @@
  * Targa (.tga) image encoder
  * Copyright (c) 2007 Bobby Bingham
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -77,7 +77,7 @@ static int targa_encode_normal(uint8_t *outbuf, const AVFrame *pic, int bpp, int
 static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                               const AVFrame *p, int *got_packet)
 {
-    int bpp, picsize, datasize = -1, ret;
+    int bpp, picsize, datasize = -1, ret, i;
     uint8_t *out;
 
     if(avctx->width > 0xffff || avctx->height > 0xffff) {
@@ -85,10 +85,8 @@ static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         return AVERROR(EINVAL);
     }
     picsize = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
-    if ((ret = ff_alloc_packet(pkt, picsize + 45)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "encoded frame too large\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, picsize + 45)) < 0)
         return ret;
-    }
 
     /* zero out the header and only set applicable fields */
     memset(pkt->data, 0, 12);
@@ -97,13 +95,39 @@ static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     /* image descriptor byte: origin is always top-left, bits 0-3 specify alpha */
     pkt->data[17] = 0x20 | (avctx->pix_fmt == AV_PIX_FMT_BGRA ? 8 : 0);
 
+    out = pkt->data + 18;  /* skip past the header we write */
+
+    avctx->bits_per_coded_sample = av_get_bits_per_pixel(av_pix_fmt_desc_get(avctx->pix_fmt));
     switch(avctx->pix_fmt) {
+    case AV_PIX_FMT_PAL8: {
+        int pal_bpp = 24; /* Only write 32bit palette if there is transparency information */
+        for (i = 0; i < 256; i++)
+            if (AV_RN32(p->data[1] + 4 * i) >> 24 != 0xFF) {
+                pal_bpp = 32;
+                break;
+            }
+        pkt->data[1]  = 1;          /* palette present */
+        pkt->data[2]  = TGA_PAL;    /* uncompressed palettised image */
+        pkt->data[6]  = 1;          /* palette contains 256 entries */
+        pkt->data[7]  = pal_bpp;    /* palette contains pal_bpp bit entries */
+        pkt->data[16] = 8;          /* bpp */
+        for (i = 0; i < 256; i++)
+            if (pal_bpp == 32) {
+                AV_WL32(pkt->data + 18 + 4 * i, *(uint32_t *)(p->data[1] + i * 4));
+            } else {
+            AV_WL24(pkt->data + 18 + 3 * i, *(uint32_t *)(p->data[1] + i * 4));
+            }
+        out += 32 * pal_bpp;        /* skip past the palette we just output */
+        break;
+        }
     case AV_PIX_FMT_GRAY8:
         pkt->data[2]  = TGA_BW;     /* uncompressed grayscale image */
+        avctx->bits_per_coded_sample = 0x28;
         pkt->data[16] = 8;          /* bpp */
         break;
     case AV_PIX_FMT_RGB555LE:
-        pkt->data[2]  = TGA_RGB;    /* uncompresses true-color image */
+        pkt->data[2]  = TGA_RGB;    /* uncompressed true-color image */
+        avctx->bits_per_coded_sample =
         pkt->data[16] = 16;         /* bpp */
         break;
     case AV_PIX_FMT_BGR24:
@@ -121,15 +145,13 @@ static int targa_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
     bpp = pkt->data[16] >> 3;
 
-    out = pkt->data + 18;  /* skip past the header we just output */
-
     /* try RLE compression */
     if (avctx->coder_type != FF_CODER_TYPE_RAW)
         datasize = targa_encode_rle(out, picsize, p, bpp, avctx->width, avctx->height);
 
     /* if that worked well, mark the picture as RLE compressed */
     if(datasize >= 0)
-        pkt->data[2] |= 8;
+        pkt->data[2] |= TGA_RLE;
 
     /* if RLE didn't make it smaller, go back to no compression */
     else datasize = targa_encode_normal(out, p, bpp, avctx->width, avctx->height);
@@ -175,7 +197,7 @@ AVCodec ff_targa_encoder = {
     .close          = targa_encode_close,
     .encode2        = targa_encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]){
-        AV_PIX_FMT_BGR24, AV_PIX_FMT_BGRA, AV_PIX_FMT_RGB555LE, AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_BGR24, AV_PIX_FMT_BGRA, AV_PIX_FMT_RGB555LE, AV_PIX_FMT_GRAY8, AV_PIX_FMT_PAL8,
         AV_PIX_FMT_NONE
     },
 };
diff --git a/libavcodec/textdec.c b/libavcodec/textdec.c
new file mode 100644
index 0000000..d904023
--- /dev/null
+++ b/libavcodec/textdec.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Raw subtitles decoder
+ */
+
+#include "avcodec.h"
+#include "ass.h"
+#include "libavutil/bprint.h"
+#include "libavutil/opt.h"
+
+typedef struct {
+    AVClass *class;
+    const char *linebreaks;
+    int keep_ass_markup;
+} TextContext;
+
+#define OFFSET(x) offsetof(TextContext, x)
+#define SD AV_OPT_FLAG_SUBTITLE_PARAM | AV_OPT_FLAG_DECODING_PARAM
+static const AVOption options[] = {
+    { "keep_ass_markup", "Set if ASS tags must be escaped", OFFSET(keep_ass_markup), AV_OPT_TYPE_INT,    {.i64=0}, 0, 1, .flags=SD },
+    { NULL }
+};
+
+static int text_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVBPrint buf;
+    AVSubtitle *sub = data;
+    const char *ptr = avpkt->data;
+    const TextContext *text = avctx->priv_data;
+    const int ts_start     = av_rescale_q(avpkt->pts,      avctx->time_base, (AVRational){1,100});
+    const int ts_duration  = avpkt->duration != -1 ?
+                             av_rescale_q(avpkt->duration, avctx->time_base, (AVRational){1,100}) : -1;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    if (ptr && avpkt->size > 0 && *ptr) {
+        ff_ass_bprint_text_event(&buf, ptr, avpkt->size, text->linebreaks, text->keep_ass_markup);
+        if (!av_bprint_is_complete(&buf)) {
+            av_bprint_finalize(&buf, NULL);
+            return AVERROR(ENOMEM);
+        }
+        ff_ass_add_rect(sub, buf.str, ts_start, ts_duration, 0);
+    }
+    *got_sub_ptr = sub->num_rects > 0;
+    av_bprint_finalize(&buf, NULL);
+    return avpkt->size;
+}
+
+#define DECLARE_CLASS(decname) static const AVClass decname ## _decoder_class = {   \
+    .class_name = #decname " decoder",      \
+    .item_name  = av_default_item_name,     \
+    .option     = decname ## _options,      \
+    .version    = LIBAVUTIL_VERSION_INT,    \
+}
+
+#if CONFIG_TEXT_DECODER
+#define text_options options
+DECLARE_CLASS(text);
+
+AVCodec ff_text_decoder = {
+    .name           = "text",
+    .long_name      = NULL_IF_CONFIG_SMALL("Raw text subtitle"),
+    .priv_data_size = sizeof(TextContext),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_TEXT,
+    .decode         = text_decode_frame,
+    .init           = ff_ass_subtitle_header_default,
+    .priv_class     = &text_decoder_class,
+};
+#endif
+
+#if CONFIG_VPLAYER_DECODER || CONFIG_PJS_DECODER || CONFIG_SUBVIEWER1_DECODER
+
+static int linebreak_init(AVCodecContext *avctx)
+{
+    TextContext *text = avctx->priv_data;
+    text->linebreaks = "|";
+    return ff_ass_subtitle_header_default(avctx);
+}
+
+#if CONFIG_VPLAYER_DECODER
+#define vplayer_options options
+DECLARE_CLASS(vplayer);
+
+AVCodec ff_vplayer_decoder = {
+    .name           = "vplayer",
+    .long_name      = NULL_IF_CONFIG_SMALL("VPlayer subtitle"),
+    .priv_data_size = sizeof(TextContext),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_VPLAYER,
+    .decode         = text_decode_frame,
+    .init           = linebreak_init,
+    .priv_class     = &vplayer_decoder_class,
+};
+#endif
+
+#if CONFIG_PJS_DECODER
+#define pjs_options options
+DECLARE_CLASS(pjs);
+
+AVCodec ff_pjs_decoder = {
+    .name           = "pjs",
+    .long_name      = NULL_IF_CONFIG_SMALL("PJS subtitle"),
+    .priv_data_size = sizeof(TextContext),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_PJS,
+    .decode         = text_decode_frame,
+    .init           = linebreak_init,
+    .priv_class     = &pjs_decoder_class,
+};
+#endif
+
+#if CONFIG_SUBVIEWER1_DECODER
+#define subviewer1_options options
+DECLARE_CLASS(subviewer1);
+
+AVCodec ff_subviewer1_decoder = {
+    .name           = "subviewer1",
+    .long_name      = NULL_IF_CONFIG_SMALL("SubViewer1 subtitle"),
+    .priv_data_size = sizeof(TextContext),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_SUBVIEWER1,
+    .decode         = text_decode_frame,
+    .init           = linebreak_init,
+    .priv_class     = &subviewer1_decoder_class,
+};
+#endif
+
+#endif /* text subtitles with '|' line break */
diff --git a/libavcodec/thread.h b/libavcodec/thread.h
index 864e67e..c848d7a 100644
--- a/libavcodec/thread.h
+++ b/libavcodec/thread.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2008 Alexander Strange <astrange@ithinksw.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -98,6 +98,16 @@ void ff_thread_report_progress(ThreadFrame *f, int progress, int field);
 void ff_thread_await_progress(ThreadFrame *f, int progress, int field);
 
 /**
+ * Wrapper around get_format() for frame-multithreaded codecs.
+ * Call this function instead of avctx->get_format().
+ * Cannot be called after the codec has called ff_thread_finish_setup().
+ *
+ * @param avctx The current context.
+ * @param fmt The list of available formats.
+ */
+enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt);
+
+/**
  * Wrapper around get_buffer() for frame-multithreaded codecs.
  * Call this function instead of ff_get_buffer(f).
  * Cannot be called after the codec has called ff_thread_finish_setup().
@@ -125,4 +135,9 @@ int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src);
 int ff_thread_init(AVCodecContext *s);
 void ff_thread_free(AVCodecContext *s);
 
+int ff_alloc_entries(AVCodecContext *avctx, int count);
+void ff_reset_entries(AVCodecContext *avctx);
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n);
+void ff_thread_await_progress2(AVCodecContext *avctx,  int field, int thread, int shift);
+
 #endif /* AVCODEC_THREAD_H */
diff --git a/libavcodec/tiertexseqv.c b/libavcodec/tiertexseqv.c
index 33b2579..7c62208 100644
--- a/libavcodec/tiertexseqv.c
+++ b/libavcodec/tiertexseqv.c
@@ -2,20 +2,20 @@
  * Tiertex Limited SEQ Video Decoder
  * Copyright (c) 2006 Gregory Montoir (cyx@users.sourceforge.net)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -179,7 +179,7 @@ static int seqvideo_decode(SeqVideoContext *seq, const unsigned char *data, int
         for (i = 0; i < 256; i++) {
             for (j = 0; j < 3; j++, data++)
                 c[j] = (*data << 2) | (*data >> 4);
-            palette[i] = AV_RB24(c);
+            palette[i] = 0xFFU << 24 | AV_RB24(c);
         }
         seq->frame->palette_has_changed = 1;
     }
@@ -234,10 +234,8 @@ static int seqvideo_decode_frame(AVCodecContext *avctx,
 
     SeqVideoContext *seq = avctx->priv_data;
 
-    if ((ret = ff_reget_buffer(avctx, seq->frame)) < 0) {
-        av_log(seq->avctx, AV_LOG_ERROR, "tiertexseqvideo: reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, seq->frame)) < 0)
         return ret;
-    }
 
     if (seqvideo_decode(seq, buf, buf_size))
         return AVERROR_INVALIDDATA;
diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 2aff45a..5c9823a 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -1,21 +1,20 @@
 /*
- * TIFF image decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +30,7 @@
 #endif
 
 #include "libavutil/attributes.h"
+#include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
@@ -40,6 +40,8 @@
 #include "lzw.h"
 #include "mathops.h"
 #include "tiff.h"
+#include "tiff_data.h"
+#include "thread.h"
 
 typedef struct TiffContext {
     AVCodecContext *avctx;
@@ -52,36 +54,235 @@ typedef struct TiffContext {
     int le;
     enum TiffCompr compr;
     enum TiffPhotometric photometric;
+    int planar;
+    int subsampling[2];
     int fax_opts;
     int predictor;
     int fill_order;
+    uint32_t res[4];
 
     int strips, rps, sstype;
     int sot;
     int stripsizesoff, stripsize, stripoff, strippos;
     LZWState *lzw;
+
+    uint8_t *deinvert_buf;
+    int deinvert_buf_size;
+    uint8_t *yuv_line;
+    unsigned int yuv_line_size;
+
+    int geotag_count;
+    TiffGeoTag *geotags;
 } TiffContext;
 
-static unsigned tget_short(GetByteContext *gb, int le)
+static void free_geotags(TiffContext *const s)
 {
-    return le ? bytestream2_get_le16(gb) : bytestream2_get_be16(gb);
+    int i;
+    for (i = 0; i < s->geotag_count; i++) {
+        if (s->geotags[i].val)
+            av_freep(&s->geotags[i].val);
+    }
+    av_freep(&s->geotags);
+    s->geotag_count = 0;
+}
+
+#define RET_GEOKEY(TYPE, array, element)\
+    if (key >= TIFF_##TYPE##_KEY_ID_OFFSET &&\
+        key - TIFF_##TYPE##_KEY_ID_OFFSET < FF_ARRAY_ELEMS(ff_tiff_##array##_name_type_map))\
+        return ff_tiff_##array##_name_type_map[key - TIFF_##TYPE##_KEY_ID_OFFSET].element;
+
+static const char *get_geokey_name(int key)
+{
+    RET_GEOKEY(VERT, vert, name);
+    RET_GEOKEY(PROJ, proj, name);
+    RET_GEOKEY(GEOG, geog, name);
+    RET_GEOKEY(CONF, conf, name);
+
+    return NULL;
+}
+
+static int get_geokey_type(int key)
+{
+    RET_GEOKEY(VERT, vert, type);
+    RET_GEOKEY(PROJ, proj, type);
+    RET_GEOKEY(GEOG, geog, type);
+    RET_GEOKEY(CONF, conf, type);
+
+    return AVERROR_INVALIDDATA;
 }
 
-static unsigned tget_long(GetByteContext *gb, int le)
+static int cmp_id_key(const void *id, const void *k)
 {
-    return le ? bytestream2_get_le32(gb) : bytestream2_get_be32(gb);
+    return *(const int*)id - ((const TiffGeoTagKeyName*)k)->key;
 }
 
-static unsigned tget(GetByteContext *gb, int type, int le)
+static const char *search_keyval(const TiffGeoTagKeyName *keys, int n, int id)
 {
-    switch (type) {
-    case TIFF_BYTE:  return bytestream2_get_byte(gb);
-    case TIFF_SHORT: return tget_short(gb, le);
-    case TIFF_LONG:  return tget_long(gb, le);
-    default:         return UINT_MAX;
+    TiffGeoTagKeyName *r = bsearch(&id, keys, n, sizeof(keys[0]), cmp_id_key);
+    if(r)
+        return r->name;
+
+    return NULL;
+}
+
+static char *get_geokey_val(int key, int val)
+{
+    char *ap;
+
+    if (val == TIFF_GEO_KEY_UNDEFINED)
+        return av_strdup("undefined");
+    if (val == TIFF_GEO_KEY_USER_DEFINED)
+        return av_strdup("User-Defined");
+
+#define RET_GEOKEY_VAL(TYPE, array)\
+    if (val >= TIFF_##TYPE##_OFFSET &&\
+        val - TIFF_##TYPE##_OFFSET < FF_ARRAY_ELEMS(ff_tiff_##array##_codes))\
+        return av_strdup(ff_tiff_##array##_codes[val - TIFF_##TYPE##_OFFSET]);
+
+    switch (key) {
+    case TIFF_GT_MODEL_TYPE_GEOKEY:
+        RET_GEOKEY_VAL(GT_MODEL_TYPE, gt_model_type);
+        break;
+    case TIFF_GT_RASTER_TYPE_GEOKEY:
+        RET_GEOKEY_VAL(GT_RASTER_TYPE, gt_raster_type);
+        break;
+    case TIFF_GEOG_LINEAR_UNITS_GEOKEY:
+    case TIFF_PROJ_LINEAR_UNITS_GEOKEY:
+    case TIFF_VERTICAL_UNITS_GEOKEY:
+        RET_GEOKEY_VAL(LINEAR_UNIT, linear_unit);
+        break;
+    case TIFF_GEOG_ANGULAR_UNITS_GEOKEY:
+    case TIFF_GEOG_AZIMUTH_UNITS_GEOKEY:
+        RET_GEOKEY_VAL(ANGULAR_UNIT, angular_unit);
+        break;
+    case TIFF_GEOGRAPHIC_TYPE_GEOKEY:
+        RET_GEOKEY_VAL(GCS_TYPE, gcs_type);
+        RET_GEOKEY_VAL(GCSE_TYPE, gcse_type);
+        break;
+    case TIFF_GEOG_GEODETIC_DATUM_GEOKEY:
+        RET_GEOKEY_VAL(GEODETIC_DATUM, geodetic_datum);
+        RET_GEOKEY_VAL(GEODETIC_DATUM_E, geodetic_datum_e);
+        break;
+    case TIFF_GEOG_ELLIPSOID_GEOKEY:
+        RET_GEOKEY_VAL(ELLIPSOID, ellipsoid);
+        break;
+    case TIFF_GEOG_PRIME_MERIDIAN_GEOKEY:
+        RET_GEOKEY_VAL(PRIME_MERIDIAN, prime_meridian);
+        break;
+    case TIFF_PROJECTED_CS_TYPE_GEOKEY:
+        ap = av_strdup(search_keyval(ff_tiff_proj_cs_type_codes, FF_ARRAY_ELEMS(ff_tiff_proj_cs_type_codes), val));
+        if(ap) return ap;
+        break;
+    case TIFF_PROJECTION_GEOKEY:
+        ap = av_strdup(search_keyval(ff_tiff_projection_codes, FF_ARRAY_ELEMS(ff_tiff_projection_codes), val));
+        if(ap) return ap;
+        break;
+    case TIFF_PROJ_COORD_TRANS_GEOKEY:
+        RET_GEOKEY_VAL(COORD_TRANS, coord_trans);
+        break;
+    case TIFF_VERTICAL_CS_TYPE_GEOKEY:
+        RET_GEOKEY_VAL(VERT_CS, vert_cs);
+        RET_GEOKEY_VAL(ORTHO_VERT_CS, ortho_vert_cs);
+        break;
+
+    }
+
+    ap = av_malloc(14);
+    if (ap)
+        snprintf(ap, 14, "Unknown-%d", val);
+    return ap;
+}
+
+static char *doubles2str(double *dp, int count, const char *sep)
+{
+    int i;
+    char *ap, *ap0;
+    uint64_t component_len;
+    if (!sep) sep = ", ";
+    component_len = 24LL + strlen(sep);
+    if (count >= (INT_MAX - 1)/component_len)
+        return NULL;
+    ap = av_malloc(component_len * count + 1);
+    if (!ap)
+        return NULL;
+    ap0   = ap;
+    ap[0] = '\0';
+    for (i = 0; i < count; i++) {
+        unsigned l = snprintf(ap, component_len, "%.15g%s", dp[i], sep);
+        if(l >= component_len) {
+            av_free(ap0);
+            return NULL;
+        }
+        ap += l;
+    }
+    ap0[strlen(ap0) - strlen(sep)] = '\0';
+    return ap0;
+}
+
+static int add_metadata(int count, int type,
+                        const char *name, const char *sep, TiffContext *s, AVFrame *frame)
+{
+    switch(type) {
+    case TIFF_DOUBLE: return ff_tadd_doubles_metadata(count, name, sep, &s->gb, s->le, avpriv_frame_get_metadatap(frame));
+    case TIFF_SHORT : return ff_tadd_shorts_metadata(count, name, sep, &s->gb, s->le, 0, avpriv_frame_get_metadatap(frame));
+    case TIFF_STRING: return ff_tadd_string_metadata(count, name, &s->gb, s->le, avpriv_frame_get_metadatap(frame));
+    default         : return AVERROR_INVALIDDATA;
+    };
+}
+
+static void av_always_inline horizontal_fill(unsigned int bpp, uint8_t* dst,
+                                             int usePtr, const uint8_t *src,
+                                             uint8_t c, int width, int offset)
+{
+    switch (bpp) {
+    case 1:
+        while (--width >= 0) {
+            dst[(width+offset)*8+7] = (usePtr ? src[width] : c)      & 0x1;
+            dst[(width+offset)*8+6] = (usePtr ? src[width] : c) >> 1 & 0x1;
+            dst[(width+offset)*8+5] = (usePtr ? src[width] : c) >> 2 & 0x1;
+            dst[(width+offset)*8+4] = (usePtr ? src[width] : c) >> 3 & 0x1;
+            dst[(width+offset)*8+3] = (usePtr ? src[width] : c) >> 4 & 0x1;
+            dst[(width+offset)*8+2] = (usePtr ? src[width] : c) >> 5 & 0x1;
+            dst[(width+offset)*8+1] = (usePtr ? src[width] : c) >> 6 & 0x1;
+            dst[(width+offset)*8+0] = (usePtr ? src[width] : c) >> 7;
+        }
+        break;
+    case 2:
+        while (--width >= 0) {
+            dst[(width+offset)*4+3] = (usePtr ? src[width] : c) & 0x3;
+            dst[(width+offset)*4+2] = (usePtr ? src[width] : c) >> 2 & 0x3;
+            dst[(width+offset)*4+1] = (usePtr ? src[width] : c) >> 4 & 0x3;
+            dst[(width+offset)*4+0] = (usePtr ? src[width] : c) >> 6;
+        }
+        break;
+    case 4:
+        while (--width >= 0) {
+            dst[(width+offset)*2+1] = (usePtr ? src[width] : c) & 0xF;
+            dst[(width+offset)*2+0] = (usePtr ? src[width] : c) >> 4;
+        }
+        break;
+    default:
+        if (usePtr) {
+            memcpy(dst + offset, src, width);
+        } else {
+            memset(dst + offset, c, width);
+        }
     }
 }
 
+static int deinvert_buffer(TiffContext *s, const uint8_t *src, int size)
+{
+    int i;
+
+    av_fast_padded_malloc(&s->deinvert_buf, &s->deinvert_buf_size, size);
+    if (!s->deinvert_buf)
+        return AVERROR(ENOMEM);
+    for (i = 0; i < size; i++)
+        s->deinvert_buf[i] = ff_reverse[src[i]];
+
+    return 0;
+}
+
 #if CONFIG_ZLIB
 static int tiff_uncompress(uint8_t *dst, unsigned long *len, const uint8_t *src,
                            int size)
@@ -89,7 +290,7 @@ static int tiff_uncompress(uint8_t *dst, unsigned long *len, const uint8_t *src,
     z_stream zstream = { 0 };
     int zret;
 
-    zstream.next_in   = src;
+    zstream.next_in   = (uint8_t *)src;
     zstream.avail_in  = size;
     zstream.next_out  = dst;
     zstream.avail_out = *len;
@@ -115,6 +316,13 @@ static int tiff_unpack_zlib(TiffContext *s, uint8_t *dst, int stride,
     zbuf   = av_malloc(outlen);
     if (!zbuf)
         return AVERROR(ENOMEM);
+    if (s->fill_order) {
+        if ((ret = deinvert_buffer(s, src, size)) < 0) {
+            av_free(zbuf);
+            return ret;
+        }
+        src = s->deinvert_buf;
+    }
     ret = tiff_uncompress(zbuf, &outlen, src, size);
     if (ret != Z_OK) {
         av_log(s->avctx, AV_LOG_ERROR,
@@ -125,7 +333,11 @@ static int tiff_unpack_zlib(TiffContext *s, uint8_t *dst, int stride,
     }
     src = zbuf;
     for (line = 0; line < lines; line++) {
-        memcpy(dst, src, width);
+        if (s->bpp < 8 && s->avctx->pix_fmt == AV_PIX_FMT_PAL8) {
+            horizontal_fill(s->bpp, dst, 1, src, 0, width, 0);
+        } else {
+            memcpy(dst, src, width);
+        }
         dst += stride;
         src += width;
     }
@@ -136,9 +348,10 @@ static int tiff_unpack_zlib(TiffContext *s, uint8_t *dst, int stride,
 
 
 static int tiff_unpack_fax(TiffContext *s, uint8_t *dst, int stride,
-                           const uint8_t *src, int size, int lines)
+                           const uint8_t *src, int size, int width, int lines)
 {
     int i, ret = 0;
+    int line;
     uint8_t *src2 = av_malloc((unsigned)size +
                               FF_INPUT_BUFFER_PADDING_SIZE);
 
@@ -161,21 +374,79 @@ static int tiff_unpack_fax(TiffContext *s, uint8_t *dst, int stride,
     memset(src2 + size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
     ret = ff_ccitt_unpack(s->avctx, src2, size, dst, lines, stride,
                           s->compr, s->fax_opts);
+    if (s->bpp < 8 && s->avctx->pix_fmt == AV_PIX_FMT_PAL8)
+        for (line = 0; line < lines; line++) {
+            horizontal_fill(s->bpp, dst, 1, dst, 0, width, 0);
+            dst += stride;
+        }
     av_free(src2);
     return ret;
 }
 
-static int tiff_unpack_strip(TiffContext *s, uint8_t *dst, int stride,
-                             const uint8_t *src, int size, int lines)
+static void unpack_yuv(TiffContext *s, AVFrame *p,
+                       const uint8_t *src, int lnum)
+{
+    int i, j, k;
+    int w       = (s->width - 1) / s->subsampling[0] + 1;
+    uint8_t *pu = &p->data[1][lnum / s->subsampling[1] * p->linesize[1]];
+    uint8_t *pv = &p->data[2][lnum / s->subsampling[1] * p->linesize[2]];
+    if (s->width % s->subsampling[0] || s->height % s->subsampling[1]) {
+        for (i = 0; i < w; i++) {
+            for (j = 0; j < s->subsampling[1]; j++)
+                for (k = 0; k < s->subsampling[0]; k++)
+                    p->data[0][FFMIN(lnum + j, s->height-1) * p->linesize[0] +
+                               FFMIN(i * s->subsampling[0] + k, s->width-1)] = *src++;
+            *pu++ = *src++;
+            *pv++ = *src++;
+        }
+    }else{
+        for (i = 0; i < w; i++) {
+            for (j = 0; j < s->subsampling[1]; j++)
+                for (k = 0; k < s->subsampling[0]; k++)
+                    p->data[0][(lnum + j) * p->linesize[0] +
+                               i * s->subsampling[0] + k] = *src++;
+            *pu++ = *src++;
+            *pv++ = *src++;
+        }
+    }
+}
+
+
+static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int stride,
+                             const uint8_t *src, int size, int strip_start, int lines)
 {
     PutByteContext pb;
     int c, line, pixels, code, ret;
+    const uint8_t *ssrc = src;
     int width = ((s->width * s->bpp) + 7) >> 3;
+    int is_yuv = s->photometric == TIFF_PHOTOMETRIC_YCBCR;
+
+    if (s->planar)
+        width /= s->bppcount;
 
     if (size <= 0)
         return AVERROR_INVALIDDATA;
 
+    if (is_yuv) {
+        int bytes_per_row = (((s->width - 1) / s->subsampling[0] + 1) * s->bpp *
+                            s->subsampling[0] * s->subsampling[1] + 7) >> 3;
+        av_fast_padded_malloc(&s->yuv_line, &s->yuv_line_size, bytes_per_row);
+        if (s->yuv_line == NULL) {
+            av_log(s->avctx, AV_LOG_ERROR, "Not enough memory\n");
+            return AVERROR(ENOMEM);
+        }
+        dst = s->yuv_line;
+        stride = 0;
+        width = s->width * s->subsampling[1] + 2*(s->width / s->subsampling[0]);
+        av_assert0(width <= bytes_per_row);
+        av_assert0(s->bpp == 24);
+    }
+
     if (s->compr == TIFF_DEFLATE || s->compr == TIFF_ADOBE_DEFLATE) {
+        if (is_yuv) {
+            av_log(s->avctx, AV_LOG_ERROR, "YUV deflate is unsupported");
+            return AVERROR_PATCHWELCOME;
+        }
 #if CONFIG_ZLIB
         return tiff_unpack_zlib(s, dst, stride, src, size, width, lines);
 #else
@@ -186,6 +457,14 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t *dst, int stride,
 #endif
     }
     if (s->compr == TIFF_LZW) {
+        if (s->fill_order) {
+            if ((ret = deinvert_buffer(s, src, size)) < 0)
+                return ret;
+            ssrc = src = s->deinvert_buf;
+        }
+        if (size > 1 && !src[0] && (src[1]&1)) {
+            av_log(s->avctx, AV_LOG_ERROR, "Old style LZW is unsupported\n");
+        }
         if ((ret = ff_lzw_decode_init(s->lzw, 8, src, size, FF_LZW_TIFF)) < 0) {
             av_log(s->avctx, AV_LOG_ERROR, "Error initializing LZW decoder\n");
             return ret;
@@ -197,6 +476,12 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t *dst, int stride,
                        pixels, width);
                 return AVERROR_INVALIDDATA;
             }
+            if (s->bpp < 8 && s->avctx->pix_fmt == AV_PIX_FMT_PAL8)
+                horizontal_fill(s->bpp, dst, 1, dst, 0, width, 0);
+            if (is_yuv) {
+                unpack_yuv(s, p, dst, strip_start + line);
+                line += s->subsampling[1] - 1;
+            }
             dst += stride;
         }
         return 0;
@@ -204,60 +489,123 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t *dst, int stride,
     if (s->compr == TIFF_CCITT_RLE ||
         s->compr == TIFF_G3        ||
         s->compr == TIFF_G4) {
-        return tiff_unpack_fax(s, dst, stride, src, size, lines);
+        if (is_yuv)
+            return AVERROR_INVALIDDATA;
+
+        return tiff_unpack_fax(s, dst, stride, src, size, width, lines);
     }
 
     bytestream2_init(&s->gb, src, size);
-    bytestream2_init_writer(&pb, dst, stride * lines);
+    bytestream2_init_writer(&pb, dst, is_yuv ? s->yuv_line_size : (stride * lines));
 
     for (line = 0; line < lines; line++) {
+        if (src - ssrc > size) {
+            av_log(s->avctx, AV_LOG_ERROR, "Source data overread\n");
+            return AVERROR_INVALIDDATA;
+        }
+
         if (bytestream2_get_bytes_left(&s->gb) == 0 || bytestream2_get_eof(&pb))
             break;
         bytestream2_seek_p(&pb, stride * line, SEEK_SET);
         switch (s->compr) {
         case TIFF_RAW:
+            if (ssrc + size - src < width)
+                return AVERROR_INVALIDDATA;
+
             if (!s->fill_order) {
-                bytestream2_copy_buffer(&pb, &s->gb, width);
+                horizontal_fill(s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8),
+                                dst, 1, src, 0, width, 0);
             } else {
                 int i;
                 for (i = 0; i < width; i++)
-                    bytestream2_put_byte(&pb, ff_reverse[bytestream2_get_byte(&s->gb)]);
+                    dst[i] = ff_reverse[src[i]];
             }
+            src += width;
             break;
         case TIFF_PACKBITS:
             for (pixels = 0; pixels < width;) {
-                code = ff_u8_to_s8(bytestream2_get_byte(&s->gb));
+                if (ssrc + size - src < 2) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Read went out of bounds\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                code = s->fill_order ? (int8_t) ff_reverse[*src++]: (int8_t) *src++;
                 if (code >= 0) {
                     code++;
-                    bytestream2_copy_buffer(&pb, &s->gb, code);
+                    if (pixels + code > width ||
+                        ssrc + size - src < code) {
+                        av_log(s->avctx, AV_LOG_ERROR,
+                               "Copy went out of bounds\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    horizontal_fill(s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8),
+                                    dst, 1, src, 0, code, pixels);
+                    src    += code;
                     pixels += code;
                 } else if (code != -128) { // -127..-1
                     code = (-code) + 1;
-                    c    = bytestream2_get_byte(&s->gb);
-                    bytestream2_set_buffer(&pb, c, code);
+                    if (pixels + code > width) {
+                        av_log(s->avctx, AV_LOG_ERROR,
+                               "Run went out of bounds\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    c = *src++;
+                    horizontal_fill(s->bpp * (s->avctx->pix_fmt == AV_PIX_FMT_PAL8),
+                                    dst, 0, NULL, c, code, pixels);
                     pixels += code;
                 }
             }
+            if (s->fill_order) {
+                int i;
+                for (i = 0; i < width; i++)
+                    dst[i] = ff_reverse[dst[i]];
+            }
             break;
         }
+        if (is_yuv) {
+            unpack_yuv(s, p, dst, strip_start + line);
+            line += s->subsampling[1] - 1;
+        }
+        dst += stride;
     }
     return 0;
 }
 
-static int init_image(TiffContext *s, AVFrame *frame)
+static int init_image(TiffContext *s, ThreadFrame *frame)
 {
     int i, ret;
     uint32_t *pal;
 
-    switch (s->bpp * 10 + s->bppcount) {
+    switch (s->planar * 1000 + s->bpp * 10 + s->bppcount) {
     case 11:
-        s->avctx->pix_fmt = AV_PIX_FMT_MONOBLACK;
-        break;
+        if (!s->palette_is_set) {
+            s->avctx->pix_fmt = AV_PIX_FMT_MONOBLACK;
+            break;
+        }
+    case 21:
+    case 41:
     case 81:
         s->avctx->pix_fmt = AV_PIX_FMT_PAL8;
         break;
     case 243:
-        s->avctx->pix_fmt = AV_PIX_FMT_RGB24;
+        if (s->photometric == TIFF_PHOTOMETRIC_YCBCR) {
+            if (s->subsampling[0] == 1 && s->subsampling[1] == 1) {
+                s->avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+            } else if (s->subsampling[0] == 2 && s->subsampling[1] == 1) {
+                s->avctx->pix_fmt = AV_PIX_FMT_YUV422P;
+            } else if (s->subsampling[0] == 4 && s->subsampling[1] == 1) {
+                s->avctx->pix_fmt = AV_PIX_FMT_YUV411P;
+            } else if (s->subsampling[0] == 1 && s->subsampling[1] == 2) {
+                s->avctx->pix_fmt = AV_PIX_FMT_YUV440P;
+            } else if (s->subsampling[0] == 2 && s->subsampling[1] == 2) {
+                s->avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+            } else if (s->subsampling[0] == 4 && s->subsampling[1] == 4) {
+                s->avctx->pix_fmt = AV_PIX_FMT_YUV410P;
+            } else {
+                av_log(s->avctx, AV_LOG_ERROR, "Unsupported YCbCr subsampling\n");
+                return AVERROR_PATCHWELCOME;
+            }
+        } else
+            s->avctx->pix_fmt = AV_PIX_FMT_RGB24;
         break;
     case 161:
         s->avctx->pix_fmt = s->le ? AV_PIX_FMT_GRAY16LE : AV_PIX_FMT_GRAY16BE;
@@ -272,7 +620,22 @@ static int init_image(TiffContext *s, AVFrame *frame)
         s->avctx->pix_fmt = AV_PIX_FMT_RGBA;
         break;
     case 483:
-        s->avctx->pix_fmt = s->le ? AV_PIX_FMT_RGB48LE : AV_PIX_FMT_RGB48BE;
+        s->avctx->pix_fmt = s->le ? AV_PIX_FMT_RGB48LE  : AV_PIX_FMT_RGB48BE;
+        break;
+    case 644:
+        s->avctx->pix_fmt = s->le ? AV_PIX_FMT_RGBA64LE  : AV_PIX_FMT_RGBA64BE;
+        break;
+    case 1243:
+        s->avctx->pix_fmt = AV_PIX_FMT_GBRP;
+        break;
+    case 1324:
+        s->avctx->pix_fmt = AV_PIX_FMT_GBRAP;
+        break;
+    case 1483:
+        s->avctx->pix_fmt = s->le ? AV_PIX_FMT_GBRP16LE : AV_PIX_FMT_GBRP16BE;
+        break;
+    case 1644:
+        s->avctx->pix_fmt = s->le ? AV_PIX_FMT_GBRAP16LE : AV_PIX_FMT_GBRAP16BE;
         break;
     default:
         av_log(s->avctx, AV_LOG_ERROR,
@@ -285,66 +648,63 @@ static int init_image(TiffContext *s, AVFrame *frame)
         if (ret < 0)
             return ret;
     }
-    if ((ret = ff_get_buffer(s->avctx, frame, 0)) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_thread_get_buffer(s->avctx, frame, 0)) < 0)
         return ret;
-    }
     if (s->avctx->pix_fmt == AV_PIX_FMT_PAL8) {
         if (s->palette_is_set) {
-            memcpy(frame->data[1], s->palette, sizeof(s->palette));
+            memcpy(frame->f->data[1], s->palette, sizeof(s->palette));
         } else {
             /* make default grayscale pal */
-            pal = (uint32_t *) frame->data[1];
-            for (i = 0; i < 256; i++)
-                pal[i] = i * 0x010101;
+            pal = (uint32_t *) frame->f->data[1];
+            for (i = 0; i < 1<<s->bpp; i++)
+                pal[i] = 0xFFU << 24 | i * 255 / ((1<<s->bpp) - 1) * 0x010101;
         }
     }
     return 0;
 }
 
-static int tiff_decode_tag(TiffContext *s)
+static void set_sar(TiffContext *s, unsigned tag, unsigned num, unsigned den)
+{
+    int offset = tag == TIFF_YRES ? 2 : 0;
+    s->res[offset++] = num;
+    s->res[offset]   = den;
+    if (s->res[0] && s->res[1] && s->res[2] && s->res[3])
+        av_reduce(&s->avctx->sample_aspect_ratio.num, &s->avctx->sample_aspect_ratio.den,
+                  s->res[2] * (uint64_t)s->res[1], s->res[0] * (uint64_t)s->res[3], INT32_MAX);
+}
+
+static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
 {
-    unsigned tag, type, count, off, value = 0;
+    unsigned tag, type, count, off, value = 0, value2 = 0;
     int i, start;
+    int pos;
+    int ret;
+    double *dp;
 
-    if (bytestream2_get_bytes_left(&s->gb) < 12)
-        return AVERROR_INVALIDDATA;
-    tag   = tget_short(&s->gb, s->le);
-    type  = tget_short(&s->gb, s->le);
-    count = tget_long(&s->gb, s->le);
-    off   = tget_long(&s->gb, s->le);
-    start = bytestream2_tell(&s->gb);
-
-    if (type == 0 || type >= FF_ARRAY_ELEMS(type_sizes)) {
-        av_log(s->avctx, AV_LOG_DEBUG, "Unknown tiff type (%u) encountered\n",
-               type);
-        return 0;
+    ret = ff_tread_tag(&s->gb, s->le, &tag, &type, &count, &start);
+    if (ret < 0) {
+        goto end;
     }
 
+    off = bytestream2_tell(&s->gb);
     if (count == 1) {
         switch (type) {
         case TIFF_BYTE:
         case TIFF_SHORT:
-            bytestream2_seek(&s->gb, -4, SEEK_CUR);
-            value = tget(&s->gb, type, s->le);
-            break;
         case TIFF_LONG:
-            value = off;
+            value = ff_tget(&s->gb, type, s->le);
+            break;
+        case TIFF_RATIONAL:
+            value  = ff_tget(&s->gb, TIFF_LONG, s->le);
+            value2 = ff_tget(&s->gb, TIFF_LONG, s->le);
             break;
         case TIFF_STRING:
             if (count <= 4) {
-                bytestream2_seek(&s->gb, -4, SEEK_CUR);
                 break;
             }
         default:
             value = UINT_MAX;
-            bytestream2_seek(&s->gb, off, SEEK_SET);
         }
-    } else {
-        if (count <= 4 && type_sizes[type] * count <= 4)
-            bytestream2_seek(&s->gb, -4, SEEK_CUR);
-        else
-            bytestream2_seek(&s->gb, off, SEEK_SET);
     }
 
     switch (tag) {
@@ -367,14 +727,13 @@ static int tiff_decode_tag(TiffContext *s)
         else {
             switch (type) {
             case TIFF_BYTE:
-                s->bpp = (off & 0xFF) + ((off >> 8) & 0xFF) +
-                         ((off >> 16) & 0xFF) + ((off >> 24) & 0xFF);
-                break;
             case TIFF_SHORT:
             case TIFF_LONG:
                 s->bpp = 0;
+                if (bytestream2_get_bytes_left(&s->gb) < type_sizes[type] * count)
+                    return AVERROR_INVALIDDATA;
                 for (i = 0; i < count; i++)
-                    s->bpp += tget(&s->gb, type, s->le);
+                    s->bpp += ff_tget(&s->gb, type, s->le);
                 break;
             default:
                 s->bpp = -1;
@@ -387,6 +746,11 @@ static int tiff_decode_tag(TiffContext *s)
                    "Samples per pixel requires a single value, many provided\n");
             return AVERROR_INVALIDDATA;
         }
+        if (value > 4U) {
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Samples per pixel %d is too large\n", value);
+            return AVERROR_INVALIDDATA;
+        }
         if (s->bppcount == 1)
             s->bpp *= value;
         s->bppcount = value;
@@ -449,6 +813,17 @@ static int tiff_decode_tag(TiffContext *s)
         s->strips = count;
         s->sstype = type;
         break;
+    case TIFF_XRES:
+    case TIFF_YRES:
+        set_sar(s, tag, value, value2);
+        break;
+    case TIFF_TILE_BYTE_COUNTS:
+    case TIFF_TILE_LENGTH:
+    case TIFF_TILE_OFFSETS:
+    case TIFF_TILE_WIDTH:
+        av_log(s->avctx, AV_LOG_ERROR, "Tiled images are not supported\n");
+        return AVERROR_PATCHWELCOME;
+        break;
     case TIFF_PREDICTOR:
         s->predictor = value;
         break;
@@ -458,11 +833,11 @@ static int tiff_decode_tag(TiffContext *s)
         case TIFF_PHOTOMETRIC_BLACK_IS_ZERO:
         case TIFF_PHOTOMETRIC_RGB:
         case TIFF_PHOTOMETRIC_PALETTE:
+        case TIFF_PHOTOMETRIC_YCBCR:
             s->photometric = value;
             break;
         case TIFF_PHOTOMETRIC_ALPHA_MASK:
         case TIFF_PHOTOMETRIC_SEPARATED:
-        case TIFF_PHOTOMETRIC_YCBCR:
         case TIFF_PHOTOMETRIC_CIE_LAB:
         case TIFF_PHOTOMETRIC_ICC_LAB:
         case TIFF_PHOTOMETRIC_ITU_LAB:
@@ -494,25 +869,32 @@ static int tiff_decode_tag(TiffContext *s)
         if (count / 3 > 256 ||
             bytestream2_get_bytes_left(&s->gb) < count / 3 * off * 3)
             return AVERROR_INVALIDDATA;
+
         pal_gb[0] = pal_gb[1] = pal_gb[2] = s->gb;
         bytestream2_skip(&pal_gb[1], count / 3 * off);
         bytestream2_skip(&pal_gb[2], count / 3 * off * 2);
+
         off = (type_sizes[type] - 1) << 3;
         for (i = 0; i < count / 3; i++) {
             uint32_t p = 0xFF000000;
-            p |= (tget(&pal_gb[0], type, s->le) >> off) << 16;
-            p |= (tget(&pal_gb[1], type, s->le) >> off) << 8;
-            p |=  tget(&pal_gb[2], type, s->le) >> off;
+            p |= (ff_tget(&pal_gb[0], type, s->le) >> off) << 16;
+            p |= (ff_tget(&pal_gb[1], type, s->le) >> off) << 8;
+            p |=  ff_tget(&pal_gb[2], type, s->le) >> off;
             s->palette[i] = p;
         }
         s->palette_is_set = 1;
         break;
     }
     case TIFF_PLANAR:
-        if (value == 2) {
-            avpriv_report_missing_feature(s->avctx, "Planar format");
-            return AVERROR_PATCHWELCOME;
+        s->planar = value == 2;
+        break;
+    case TIFF_YCBCR_SUBSAMPLING:
+        if (count != 2) {
+            av_log(s->avctx, AV_LOG_ERROR, "subsample count invalid\n");
+            return AVERROR_INVALIDDATA;
         }
+        for (i = 0; i < count; i++)
+            s->subsampling[i] = ff_tget(&s->gb, type, s->le);
         break;
     case TIFF_T4OPTIONS:
         if (s->compr == TIFF_G3)
@@ -522,6 +904,137 @@ static int tiff_decode_tag(TiffContext *s)
         if (s->compr == TIFF_G4)
             s->fax_opts = value;
         break;
+#define ADD_METADATA(count, name, sep)\
+    if ((ret = add_metadata(count, type, name, sep, s, frame)) < 0) {\
+        av_log(s->avctx, AV_LOG_ERROR, "Error allocating temporary buffer\n");\
+        goto end;\
+    }
+    case TIFF_MODEL_PIXEL_SCALE:
+        ADD_METADATA(count, "ModelPixelScaleTag", NULL);
+        break;
+    case TIFF_MODEL_TRANSFORMATION:
+        ADD_METADATA(count, "ModelTransformationTag", NULL);
+        break;
+    case TIFF_MODEL_TIEPOINT:
+        ADD_METADATA(count, "ModelTiepointTag", NULL);
+        break;
+    case TIFF_GEO_KEY_DIRECTORY:
+        ADD_METADATA(1, "GeoTIFF_Version", NULL);
+        ADD_METADATA(2, "GeoTIFF_Key_Revision", ".");
+        s->geotag_count   = ff_tget_short(&s->gb, s->le);
+        if (s->geotag_count > count / 4 - 1) {
+            s->geotag_count = count / 4 - 1;
+            av_log(s->avctx, AV_LOG_WARNING, "GeoTIFF key directory buffer shorter than specified\n");
+        }
+        if (bytestream2_get_bytes_left(&s->gb) < s->geotag_count * sizeof(int16_t) * 4) {
+            s->geotag_count = 0;
+            return -1;
+        }
+        s->geotags = av_mallocz_array(s->geotag_count, sizeof(TiffGeoTag));
+        if (!s->geotags) {
+            av_log(s->avctx, AV_LOG_ERROR, "Error allocating temporary buffer\n");
+            s->geotag_count = 0;
+            goto end;
+        }
+        for (i = 0; i < s->geotag_count; i++) {
+            s->geotags[i].key    = ff_tget_short(&s->gb, s->le);
+            s->geotags[i].type   = ff_tget_short(&s->gb, s->le);
+            s->geotags[i].count  = ff_tget_short(&s->gb, s->le);
+
+            if (!s->geotags[i].type)
+                s->geotags[i].val  = get_geokey_val(s->geotags[i].key, ff_tget_short(&s->gb, s->le));
+            else
+                s->geotags[i].offset = ff_tget_short(&s->gb, s->le);
+        }
+        break;
+    case TIFF_GEO_DOUBLE_PARAMS:
+        if (count >= INT_MAX / sizeof(int64_t))
+            return AVERROR_INVALIDDATA;
+        if (bytestream2_get_bytes_left(&s->gb) < count * sizeof(int64_t))
+            return AVERROR_INVALIDDATA;
+        dp = av_malloc_array(count, sizeof(double));
+        if (!dp) {
+            av_log(s->avctx, AV_LOG_ERROR, "Error allocating temporary buffer\n");
+            goto end;
+        }
+        for (i = 0; i < count; i++)
+            dp[i] = ff_tget_double(&s->gb, s->le);
+        for (i = 0; i < s->geotag_count; i++) {
+            if (s->geotags[i].type == TIFF_GEO_DOUBLE_PARAMS) {
+                if (s->geotags[i].count == 0
+                    || s->geotags[i].offset + s->geotags[i].count > count) {
+                    av_log(s->avctx, AV_LOG_WARNING, "Invalid GeoTIFF key %d\n", s->geotags[i].key);
+                } else {
+                    char *ap = doubles2str(&dp[s->geotags[i].offset], s->geotags[i].count, ", ");
+                    if (!ap) {
+                        av_log(s->avctx, AV_LOG_ERROR, "Error allocating temporary buffer\n");
+                        av_freep(&dp);
+                        return AVERROR(ENOMEM);
+                    }
+                    s->geotags[i].val = ap;
+                }
+            }
+        }
+        av_freep(&dp);
+        break;
+    case TIFF_GEO_ASCII_PARAMS:
+        pos = bytestream2_tell(&s->gb);
+        for (i = 0; i < s->geotag_count; i++) {
+            if (s->geotags[i].type == TIFF_GEO_ASCII_PARAMS) {
+                if (s->geotags[i].count == 0
+                    || s->geotags[i].offset +  s->geotags[i].count > count) {
+                    av_log(s->avctx, AV_LOG_WARNING, "Invalid GeoTIFF key %d\n", s->geotags[i].key);
+                } else {
+                    char *ap;
+
+                    bytestream2_seek(&s->gb, pos + s->geotags[i].offset, SEEK_SET);
+                    if (bytestream2_get_bytes_left(&s->gb) < s->geotags[i].count)
+                        return AVERROR_INVALIDDATA;
+                    ap = av_malloc(s->geotags[i].count);
+                    if (!ap) {
+                        av_log(s->avctx, AV_LOG_ERROR, "Error allocating temporary buffer\n");
+                        return AVERROR(ENOMEM);
+                    }
+                    bytestream2_get_bufferu(&s->gb, ap, s->geotags[i].count);
+                    ap[s->geotags[i].count - 1] = '\0'; //replace the "|" delimiter with a 0 byte
+                    s->geotags[i].val = ap;
+                }
+            }
+        }
+        break;
+    case TIFF_ARTIST:
+        ADD_METADATA(count, "artist", NULL);
+        break;
+    case TIFF_COPYRIGHT:
+        ADD_METADATA(count, "copyright", NULL);
+        break;
+    case TIFF_DATE:
+        ADD_METADATA(count, "date", NULL);
+        break;
+    case TIFF_DOCUMENT_NAME:
+        ADD_METADATA(count, "document_name", NULL);
+        break;
+    case TIFF_HOST_COMPUTER:
+        ADD_METADATA(count, "computer", NULL);
+        break;
+    case TIFF_IMAGE_DESCRIPTION:
+        ADD_METADATA(count, "description", NULL);
+        break;
+    case TIFF_MAKE:
+        ADD_METADATA(count, "make", NULL);
+        break;
+    case TIFF_MODEL:
+        ADD_METADATA(count, "model", NULL);
+        break;
+    case TIFF_PAGE_NAME:
+        ADD_METADATA(count, "page_name", NULL);
+        break;
+    case TIFF_PAGE_NUMBER:
+        ADD_METADATA(count, "page_number", " / ");
+        break;
+    case TIFF_SOFTWARE_NAME:
+        ADD_METADATA(count, "software", NULL);
+        break;
     default:
         if (s->avctx->err_recognition & AV_EF_EXPLODE) {
             av_log(s->avctx, AV_LOG_ERROR,
@@ -530,6 +1043,7 @@ static int tiff_decode_tag(TiffContext *s)
             return AVERROR_INVALIDDATA;
         }
     }
+end:
     bytestream2_seek(&s->gb, start, SEEK_SET);
     return 0;
 }
@@ -539,8 +1053,9 @@ static int decode_frame(AVCodecContext *avctx,
 {
     TiffContext *const s = avctx->priv_data;
     AVFrame *const p = data;
+    ThreadFrame frame = { .f = data };
     unsigned off;
-    int id, le, ret;
+    int le, ret, plane, planes;
     int i, j, entries, stride;
     unsigned soff, ssize;
     uint8_t *dst;
@@ -550,78 +1065,93 @@ static int decode_frame(AVCodecContext *avctx,
     bytestream2_init(&s->gb, avpkt->data, avpkt->size);
 
     // parse image header
-    if (avpkt->size < 8)
-        return AVERROR_INVALIDDATA;
-    id = bytestream2_get_le16(&s->gb);
-    if (id == 0x4949)
-        le = 1;
-    else if (id == 0x4D4D)
-        le = 0;
-    else {
-        av_log(avctx, AV_LOG_ERROR, "TIFF header not found\n");
+    if ((ret = ff_tdecode_header(&s->gb, &le, &off))) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid TIFF header\n");
+        return ret;
+    } else if (off >= UINT_MAX - 14 || avpkt->size < off + 14) {
+        av_log(avctx, AV_LOG_ERROR, "IFD offset is greater than image size\n");
         return AVERROR_INVALIDDATA;
     }
     s->le          = le;
+    // TIFF_BPP is not a required tag and defaults to 1
+    s->bppcount    = s->bpp = 1;
     s->photometric = TIFF_PHOTOMETRIC_NONE;
     s->compr       = TIFF_RAW;
     s->fill_order  = 0;
-    // As TIFF 6.0 specification puts it "An arbitrary but carefully chosen number
-    // that further identifies the file as a TIFF file"
-    if (tget_short(&s->gb, le) != 42) {
-        av_log(avctx, AV_LOG_ERROR,
-               "The answer to life, universe and everything is not correct!\n");
-        return AVERROR_INVALIDDATA;
-    }
+    free_geotags(s);
+
     // Reset these offsets so we can tell if they were set this frame
     s->stripsizesoff = s->strippos = 0;
     /* parse image file directory */
-    off = tget_long(&s->gb, le);
-    if (off >= UINT_MAX - 14 || avpkt->size < off + 14) {
-        av_log(avctx, AV_LOG_ERROR, "IFD offset is greater than image size\n");
-        return AVERROR_INVALIDDATA;
-    }
     bytestream2_seek(&s->gb, off, SEEK_SET);
-    entries = tget_short(&s->gb, le);
+    entries = ff_tget_short(&s->gb, le);
+    if (bytestream2_get_bytes_left(&s->gb) < entries * 12)
+        return AVERROR_INVALIDDATA;
     for (i = 0; i < entries; i++) {
-        if ((ret = tiff_decode_tag(s)) < 0)
+        if ((ret = tiff_decode_tag(s, p)) < 0)
+            return ret;
+    }
+
+    for (i = 0; i<s->geotag_count; i++) {
+        const char *keyname = get_geokey_name(s->geotags[i].key);
+        if (!keyname) {
+            av_log(avctx, AV_LOG_WARNING, "Unknown or unsupported GeoTIFF key %d\n", s->geotags[i].key);
+            continue;
+        }
+        if (get_geokey_type(s->geotags[i].key) != s->geotags[i].type) {
+            av_log(avctx, AV_LOG_WARNING, "Type of GeoTIFF key %d is wrong\n", s->geotags[i].key);
+            continue;
+        }
+        ret = av_dict_set(avpriv_frame_get_metadatap(p), keyname, s->geotags[i].val, 0);
+        if (ret<0) {
+            av_log(avctx, AV_LOG_ERROR, "Writing metadata with key '%s' failed\n", keyname);
             return ret;
+        }
     }
+
     if (!s->strippos && !s->stripoff) {
         av_log(avctx, AV_LOG_ERROR, "Image data is missing\n");
         return AVERROR_INVALIDDATA;
     }
     /* now we have the data and may start decoding */
-    if ((ret = init_image(s, p)) < 0)
+    if ((ret = init_image(s, &frame)) < 0)
         return ret;
 
     if (s->strips == 1 && !s->stripsize) {
         av_log(avctx, AV_LOG_WARNING, "Image data size missing\n");
         s->stripsize = avpkt->size - s->stripoff;
     }
-    stride = p->linesize[0];
-    dst    = p->data[0];
 
     if (s->stripsizesoff) {
-        if (s->stripsizesoff >= avpkt->size)
+        if (s->stripsizesoff >= (unsigned)avpkt->size)
             return AVERROR_INVALIDDATA;
         bytestream2_init(&stripsizes, avpkt->data + s->stripsizesoff,
                          avpkt->size - s->stripsizesoff);
     }
     if (s->strippos) {
-        if (s->strippos >= avpkt->size)
+        if (s->strippos >= (unsigned)avpkt->size)
             return AVERROR_INVALIDDATA;
         bytestream2_init(&stripdata, avpkt->data + s->strippos,
                          avpkt->size - s->strippos);
     }
 
+    if (s->rps <= 0) {
+        av_log(avctx, AV_LOG_ERROR, "rps %d invalid\n", s->rps);
+        return AVERROR_INVALIDDATA;
+    }
+
+    planes = s->planar ? s->bppcount : 1;
+    for (plane = 0; plane < planes; plane++) {
+        stride = p->linesize[plane];
+        dst    = p->data[plane];
     for (i = 0; i < s->height; i += s->rps) {
         if (s->stripsizesoff)
-            ssize = tget(&stripsizes, s->sstype, le);
+            ssize = ff_tget(&stripsizes, s->sstype, le);
         else
             ssize = s->stripsize;
 
         if (s->strippos)
-            soff = tget(&stripdata, s->sot, le);
+            soff = ff_tget(&stripdata, s->sot, le);
         else
             soff = s->stripoff;
 
@@ -629,7 +1159,7 @@ static int decode_frame(AVCodecContext *avctx,
             av_log(avctx, AV_LOG_ERROR, "Invalid strip size/offset\n");
             return AVERROR_INVALIDDATA;
         }
-        if ((ret = tiff_unpack_strip(s, dst, stride, avpkt->data + soff, ssize,
+        if ((ret = tiff_unpack_strip(s, p, dst, stride, avpkt->data + soff, ssize, i,
                                      FFMIN(s->rps, s->height - i))) < 0) {
             if (avctx->err_recognition & AV_EF_EXPLODE)
                 return ret;
@@ -638,16 +1168,28 @@ static int decode_frame(AVCodecContext *avctx,
         dst += s->rps * stride;
     }
     if (s->predictor == 2) {
-        dst   = p->data[0];
+        if (s->photometric == TIFF_PHOTOMETRIC_YCBCR) {
+            av_log(s->avctx, AV_LOG_ERROR, "predictor == 2 with YUV is unsupported");
+            return AVERROR_PATCHWELCOME;
+        }
+        dst   = p->data[plane];
         soff  = s->bpp >> 3;
+        if (s->planar)
+            soff  = FFMAX(soff / s->bppcount, 1);
         ssize = s->width * soff;
-        if (s->avctx->pix_fmt == AV_PIX_FMT_RGB48LE) {
+        if (s->avctx->pix_fmt == AV_PIX_FMT_RGB48LE ||
+            s->avctx->pix_fmt == AV_PIX_FMT_RGBA64LE ||
+            s->avctx->pix_fmt == AV_PIX_FMT_GBRP16LE ||
+            s->avctx->pix_fmt == AV_PIX_FMT_GBRAP16LE) {
             for (i = 0; i < s->height; i++) {
                 for (j = soff; j < ssize; j += 2)
                     AV_WL16(dst + j, AV_RL16(dst + j) + AV_RL16(dst + j - soff));
                 dst += stride;
             }
-        } else if (s->avctx->pix_fmt == AV_PIX_FMT_RGB48BE) {
+        } else if (s->avctx->pix_fmt == AV_PIX_FMT_RGB48BE ||
+                   s->avctx->pix_fmt == AV_PIX_FMT_RGBA64BE ||
+                   s->avctx->pix_fmt == AV_PIX_FMT_GBRP16BE ||
+                   s->avctx->pix_fmt == AV_PIX_FMT_GBRAP16BE) {
             for (i = 0; i < s->height; i++) {
                 for (j = soff; j < ssize; j += 2)
                     AV_WB16(dst + j, AV_RB16(dst + j) + AV_RB16(dst + j - soff));
@@ -663,13 +1205,22 @@ static int decode_frame(AVCodecContext *avctx,
     }
 
     if (s->photometric == TIFF_PHOTOMETRIC_WHITE_IS_ZERO) {
-        dst = p->data[0];
+        dst = p->data[plane];
         for (i = 0; i < s->height; i++) {
-            for (j = 0; j < p->linesize[0]; j++)
-                dst[j] = 255 - dst[j];
+            for (j = 0; j < p->linesize[plane]; j++)
+                dst[j] = (s->avctx->pix_fmt == AV_PIX_FMT_PAL8 ? (1<<s->bpp) - 1 : 255) - dst[j];
             dst += stride;
         }
     }
+    }
+
+    if (s->planar && s->bppcount > 2) {
+        FFSWAP(uint8_t*, p->data[0],     p->data[2]);
+        FFSWAP(int,      p->linesize[0], p->linesize[2]);
+        FFSWAP(uint8_t*, p->data[0],     p->data[1]);
+        FFSWAP(int,      p->linesize[0], p->linesize[1]);
+    }
+
     *got_frame = 1;
 
     return avpkt->size;
@@ -681,6 +1232,8 @@ static av_cold int tiff_init(AVCodecContext *avctx)
 
     s->width  = 0;
     s->height = 0;
+    s->subsampling[0] =
+    s->subsampling[1] = 1;
     s->avctx  = avctx;
     ff_lzw_decode_open(&s->lzw);
     ff_ccitt_unpack_init();
@@ -692,7 +1245,10 @@ static av_cold int tiff_end(AVCodecContext *avctx)
 {
     TiffContext *const s = avctx->priv_data;
 
+    free_geotags(s);
+
     ff_lzw_decode_close(&s->lzw);
+    av_freep(&s->deinvert_buf);
     return 0;
 }
 
@@ -705,5 +1261,6 @@ AVCodec ff_tiff_decoder = {
     .init           = tiff_init,
     .close          = tiff_end,
     .decode         = decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(tiff_init),
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/tiff.h b/libavcodec/tiff.h
index 8a3f7f7..11e9f16 100644
--- a/libavcodec/tiff.h
+++ b/libavcodec/tiff.h
@@ -1,27 +1,29 @@
 /*
- * TIFF tables
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file
  * TIFF tables
+ *
+ * For more information about the TIFF format, check the official docs at:
+ * http://partners.adobe.com/public/developer/tiff/index.html
  * @author Konstantin Shishkov
  */
 
@@ -29,6 +31,7 @@
 #define AVCODEC_TIFF_H
 
 #include <stdint.h>
+#include "tiff_common.h"
 
 /** abridged list of TIFF tags */
 enum TiffTags {
@@ -39,6 +42,10 @@ enum TiffTags {
     TIFF_COMPR,
     TIFF_PHOTOMETRIC        = 0x106,
     TIFF_FILL_ORDER         = 0x10A,
+    TIFF_DOCUMENT_NAME      = 0x10D,
+    TIFF_IMAGE_DESCRIPTION  = 0x10E,
+    TIFF_MAKE               = 0x10F,
+    TIFF_MODEL              = 0x110,
     TIFF_STRIP_OFFS         = 0x111,
     TIFF_SAMPLES_PER_PIXEL  = 0x115,
     TIFF_ROWSPERSTRIP       = 0x116,
@@ -46,18 +53,35 @@ enum TiffTags {
     TIFF_XRES               = 0x11A,
     TIFF_YRES               = 0x11B,
     TIFF_PLANAR             = 0x11C,
+    TIFF_PAGE_NAME          = 0x11D,
     TIFF_XPOS               = 0x11E,
     TIFF_YPOS               = 0x11F,
     TIFF_T4OPTIONS          = 0x124,
     TIFF_T6OPTIONS,
     TIFF_RES_UNIT           = 0x128,
+    TIFF_PAGE_NUMBER        = 0x129,
     TIFF_SOFTWARE_NAME      = 0x131,
+    TIFF_DATE               = 0x132,
+    TIFF_ARTIST             = 0x13B,
+    TIFF_HOST_COMPUTER      = 0x13C,
     TIFF_PREDICTOR          = 0x13D,
     TIFF_PAL                = 0x140,
+    TIFF_TILE_WIDTH         = 0x142,
+    TIFF_TILE_LENGTH        = 0x143,
+    TIFF_TILE_OFFSETS       = 0x144,
+    TIFF_TILE_BYTE_COUNTS   = 0x145,
+    TIFF_EXTRASAMPLES       = 0x152,
     TIFF_YCBCR_COEFFICIENTS = 0x211,
     TIFF_YCBCR_SUBSAMPLING  = 0x212,
     TIFF_YCBCR_POSITIONING  = 0x213,
     TIFF_REFERENCE_BW       = 0x214,
+    TIFF_COPYRIGHT          = 0x8298,
+    TIFF_MODEL_TIEPOINT     = 0x8482,
+    TIFF_MODEL_PIXEL_SCALE  = 0x830E,
+    TIFF_MODEL_TRANSFORMATION= 0x8480,
+    TIFF_GEO_KEY_DIRECTORY  = 0x87AF,
+    TIFF_GEO_DOUBLE_PARAMS  = 0x87B0,
+    TIFF_GEO_ASCII_PARAMS   = 0x87B1
 };
 
 /** list of TIFF compression types */
@@ -74,12 +98,52 @@ enum TiffCompr {
     TIFF_DEFLATE  = 0x80B2
 };
 
-enum TiffTypes {
-    TIFF_BYTE = 1,
-    TIFF_STRING,
-    TIFF_SHORT,
-    TIFF_LONG,
-    TIFF_RATIONAL,
+enum TiffGeoTagKey {
+    TIFF_GT_MODEL_TYPE_GEOKEY                = 1024,
+    TIFF_GT_RASTER_TYPE_GEOKEY               = 1025,
+    TIFF_GT_CITATION_GEOKEY                  = 1026,
+    TIFF_GEOGRAPHIC_TYPE_GEOKEY              = 2048,
+    TIFF_GEOG_CITATION_GEOKEY                = 2049,
+    TIFF_GEOG_GEODETIC_DATUM_GEOKEY          = 2050,
+    TIFF_GEOG_PRIME_MERIDIAN_GEOKEY          = 2051,
+    TIFF_GEOG_LINEAR_UNITS_GEOKEY            = 2052,
+    TIFF_GEOG_LINEAR_UNIT_SIZE_GEOKEY        = 2053,
+    TIFF_GEOG_ANGULAR_UNITS_GEOKEY           = 2054,
+    TIFF_GEOG_ANGULAR_UNIT_SIZE_GEOKEY       = 2055,
+    TIFF_GEOG_ELLIPSOID_GEOKEY               = 2056,
+    TIFF_GEOG_SEMI_MAJOR_AXIS_GEOKEY         = 2057,
+    TIFF_GEOG_SEMI_MINOR_AXIS_GEOKEY         = 2058,
+    TIFF_GEOG_INV_FLATTENING_GEOKEY          = 2059,
+    TIFF_GEOG_AZIMUTH_UNITS_GEOKEY           = 2060,
+    TIFF_GEOG_PRIME_MERIDIAN_LONG_GEOKEY     = 2061,
+    TIFF_PROJECTED_CS_TYPE_GEOKEY            = 3072,
+    TIFF_PCS_CITATION_GEOKEY                 = 3073,
+    TIFF_PROJECTION_GEOKEY                   = 3074,
+    TIFF_PROJ_COORD_TRANS_GEOKEY             = 3075,
+    TIFF_PROJ_LINEAR_UNITS_GEOKEY            = 3076,
+    TIFF_PROJ_LINEAR_UNIT_SIZE_GEOKEY        = 3077,
+    TIFF_PROJ_STD_PARALLEL1_GEOKEY           = 3078,
+    TIFF_PROJ_STD_PARALLEL2_GEOKEY           = 3079,
+    TIFF_PROJ_NAT_ORIGIN_LONG_GEOKEY         = 3080,
+    TIFF_PROJ_NAT_ORIGIN_LAT_GEOKEY          = 3081,
+    TIFF_PROJ_FALSE_EASTING_GEOKEY           = 3082,
+    TIFF_PROJ_FALSE_NORTHING_GEOKEY          = 3083,
+    TIFF_PROJ_FALSE_ORIGIN_LONG_GEOKEY       = 3084,
+    TIFF_PROJ_FALSE_ORIGIN_LAT_GEOKEY        = 3085,
+    TIFF_PROJ_FALSE_ORIGIN_EASTING_GEOKEY    = 3086,
+    TIFF_PROJ_FALSE_ORIGIN_NORTHING_GEOKEY   = 3087,
+    TIFF_PROJ_CENTER_LONG_GEOKEY             = 3088,
+    TIFF_PROJ_CENTER_LAT_GEOKEY              = 3089,
+    TIFF_PROJ_CENTER_EASTING_GEOKEY          = 3090,
+    TIFF_PROJ_CENTER_NORTHING_GEOKEY         = 3091,
+    TIFF_PROJ_SCALE_AT_NAT_ORIGIN_GEOKEY     = 3092,
+    TIFF_PROJ_SCALE_AT_CENTER_GEOKEY         = 3093,
+    TIFF_PROJ_AZIMUTH_ANGLE_GEOKEY           = 3094,
+    TIFF_PROJ_STRAIGHT_VERT_POLE_LONG_GEOKEY = 3095,
+    TIFF_VERTICAL_CS_TYPE_GEOKEY             = 4096,
+    TIFF_VERTICAL_CITATION_GEOKEY            = 4097,
+    TIFF_VERTICAL_DATUM_GEOKEY               = 4098,
+    TIFF_VERTICAL_UNITS_GEOKEY               = 4099
 };
 
 enum TiffPhotometric {
@@ -100,9 +164,28 @@ enum TiffPhotometric {
     TIFF_PHOTOMETRIC_LINEAR_RAW = 34892, /* Linear Raw (DNG) */
 };
 
-/** sizes of various TIFF field types (string size = 100)*/
-static const uint8_t type_sizes[6] = {
-    0, 1, 100, 2, 4, 8
+enum TiffGeoTagType {
+    GEOTIFF_SHORT  = 0,
+    GEOTIFF_DOUBLE = 34736,
+    GEOTIFF_STRING = 34737
 };
 
+typedef struct TiffGeoTag {
+    enum TiffGeoTagKey key;
+    enum TiffTags type;
+    int count;
+    int offset;
+    char *val;
+} TiffGeoTag;
+
+typedef struct TiffGeoTagKeyName {
+    const enum TiffGeoTagKey key;
+    const char *const name;
+} TiffGeoTagKeyName;
+
+typedef struct TiffGeoTagNameType {
+    const char *const name;
+    const enum TiffGeoTagType type;
+} TiffGeoTagNameType;
+
 #endif /* AVCODEC_TIFF_H */
diff --git a/libavcodec/tiff_common.c b/libavcodec/tiff_common.c
new file mode 100644
index 0000000..35119af
--- /dev/null
+++ b/libavcodec/tiff_common.c
@@ -0,0 +1,313 @@
+/*
+ * TIFF Common Routines
+ * Copyright (c) 2013 Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * TIFF Common Routines
+ * @author Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ */
+
+#include "tiff_common.h"
+
+
+int ff_tis_ifd(unsigned tag)
+{
+    int i;
+    for (i = 0; i < FF_ARRAY_ELEMS(ifd_tags); i++) {
+        if (ifd_tags[i] == tag) {
+            return i + 1;
+        }
+    }
+    return 0;
+}
+
+
+unsigned ff_tget_short(GetByteContext *gb, int le)
+{
+    return le ? bytestream2_get_le16(gb) : bytestream2_get_be16(gb);
+}
+
+
+unsigned ff_tget_long(GetByteContext *gb, int le)
+{
+    return le ? bytestream2_get_le32(gb) : bytestream2_get_be32(gb);
+}
+
+
+double ff_tget_double(GetByteContext *gb, int le)
+{
+    av_alias64 i = { .u64 = le ? bytestream2_get_le64(gb) : bytestream2_get_be64(gb)};
+    return i.f64;
+}
+
+
+unsigned ff_tget(GetByteContext *gb, int type, int le)
+{
+    switch (type) {
+    case TIFF_BYTE:  return bytestream2_get_byte(gb);
+    case TIFF_SHORT: return ff_tget_short(gb, le);
+    case TIFF_LONG:  return ff_tget_long(gb, le);
+    default:         return UINT_MAX;
+    }
+}
+
+static const char *auto_sep(int count, const char *sep, int i, int columns)
+{
+    if (sep)
+        return i ? sep : "";
+    if (i && i%columns) {
+        return ", ";
+    } else
+        return columns < count ? "\n" : "";
+}
+
+int ff_tadd_rational_metadata(int count, const char *name, const char *sep,
+                              GetByteContext *gb, int le, AVDictionary **metadata)
+{
+    AVBPrint bp;
+    char *ap;
+    int32_t nom, denom;
+    int i;
+
+    if (count >= INT_MAX / sizeof(int64_t) || count <= 0)
+        return AVERROR_INVALIDDATA;
+    if (bytestream2_get_bytes_left(gb) < count * sizeof(int64_t))
+        return AVERROR_INVALIDDATA;
+
+    av_bprint_init(&bp, 10 * count, AV_BPRINT_SIZE_UNLIMITED);
+
+    for (i = 0; i < count; i++) {
+        nom   = ff_tget_long(gb, le);
+        denom = ff_tget_long(gb, le);
+        av_bprintf(&bp, "%s%7i:%-7i", auto_sep(count, sep, i, 4), nom, denom);
+    }
+
+    if ((i = av_bprint_finalize(&bp, &ap))) {
+        return i;
+    }
+    if (!ap) {
+        return AVERROR(ENOMEM);
+    }
+
+    av_dict_set(metadata, name, ap, AV_DICT_DONT_STRDUP_VAL);
+
+    return 0;
+}
+
+
+int ff_tadd_long_metadata(int count, const char *name, const char *sep,
+                          GetByteContext *gb, int le, AVDictionary **metadata)
+{
+    AVBPrint bp;
+    char *ap;
+    int i;
+
+    if (count >= INT_MAX / sizeof(int32_t) || count <= 0)
+        return AVERROR_INVALIDDATA;
+    if (bytestream2_get_bytes_left(gb) < count * sizeof(int32_t))
+        return AVERROR_INVALIDDATA;
+
+    av_bprint_init(&bp, 10 * count, AV_BPRINT_SIZE_UNLIMITED);
+
+    for (i = 0; i < count; i++) {
+        av_bprintf(&bp, "%s%7i", auto_sep(count, sep, i, 8), ff_tget_long(gb, le));
+    }
+
+    if ((i = av_bprint_finalize(&bp, &ap))) {
+        return i;
+    }
+    if (!ap) {
+        return AVERROR(ENOMEM);
+    }
+
+    av_dict_set(metadata, name, ap, AV_DICT_DONT_STRDUP_VAL);
+
+    return 0;
+}
+
+
+int ff_tadd_doubles_metadata(int count, const char *name, const char *sep,
+                             GetByteContext *gb, int le, AVDictionary **metadata)
+{
+    AVBPrint bp;
+    char *ap;
+    int i;
+
+    if (count >= INT_MAX / sizeof(int64_t) || count <= 0)
+        return AVERROR_INVALIDDATA;
+    if (bytestream2_get_bytes_left(gb) < count * sizeof(int64_t))
+        return AVERROR_INVALIDDATA;
+
+    av_bprint_init(&bp, 10 * count, 100 * count);
+
+    for (i = 0; i < count; i++) {
+        av_bprintf(&bp, "%s%.15g", auto_sep(count, sep, i, 4), ff_tget_double(gb, le));
+    }
+
+    if ((i = av_bprint_finalize(&bp, &ap))) {
+        return i;
+    }
+    if (!ap) {
+        return AVERROR(ENOMEM);
+    }
+
+    av_dict_set(metadata, name, ap, AV_DICT_DONT_STRDUP_VAL);
+
+    return 0;
+}
+
+
+int ff_tadd_shorts_metadata(int count, const char *name, const char *sep,
+                            GetByteContext *gb, int le, int is_signed, AVDictionary **metadata)
+{
+    AVBPrint bp;
+    char *ap;
+    int i;
+
+    if (count >= INT_MAX / sizeof(int16_t) || count <= 0)
+        return AVERROR_INVALIDDATA;
+    if (bytestream2_get_bytes_left(gb) < count * sizeof(int16_t))
+        return AVERROR_INVALIDDATA;
+
+    av_bprint_init(&bp, 10 * count, AV_BPRINT_SIZE_UNLIMITED);
+
+    for (i = 0; i < count; i++) {
+        int v = is_signed ? (int16_t)ff_tget_short(gb, le) :  ff_tget_short(gb, le);
+        av_bprintf(&bp, "%s%5i", auto_sep(count, sep, i, 8), v);
+    }
+
+    if ((i = av_bprint_finalize(&bp, &ap))) {
+        return i;
+    }
+    if (!ap) {
+        return AVERROR(ENOMEM);
+    }
+
+    av_dict_set(metadata, name, ap, AV_DICT_DONT_STRDUP_VAL);
+
+    return 0;
+}
+
+
+int ff_tadd_bytes_metadata(int count, const char *name, const char *sep,
+                           GetByteContext *gb, int le, int is_signed, AVDictionary **metadata)
+{
+    AVBPrint bp;
+    char *ap;
+    int i;
+
+    if (count >= INT_MAX / sizeof(int8_t) || count < 0)
+        return AVERROR_INVALIDDATA;
+    if (bytestream2_get_bytes_left(gb) < count * sizeof(int8_t))
+        return AVERROR_INVALIDDATA;
+
+    av_bprint_init(&bp, 10 * count, AV_BPRINT_SIZE_UNLIMITED);
+
+    for (i = 0; i < count; i++) {
+        int v = is_signed ? (int8_t)bytestream2_get_byte(gb) :  bytestream2_get_byte(gb);
+        av_bprintf(&bp, "%s%3i", auto_sep(count, sep, i, 16), v);
+    }
+
+    if ((i = av_bprint_finalize(&bp, &ap))) {
+        return i;
+    }
+    if (!ap) {
+        return AVERROR(ENOMEM);
+    }
+
+    av_dict_set(metadata, name, ap, AV_DICT_DONT_STRDUP_VAL);
+
+    return 0;
+}
+
+int ff_tadd_string_metadata(int count, const char *name,
+                            GetByteContext *gb, int le, AVDictionary **metadata)
+{
+    char *value;
+
+    if (bytestream2_get_bytes_left(gb) < count || count < 0)
+        return AVERROR_INVALIDDATA;
+
+    value = av_malloc(count + 1);
+    if (!value)
+        return AVERROR(ENOMEM);
+
+    bytestream2_get_bufferu(gb, value, count);
+    value[count] = 0;
+
+    av_dict_set(metadata, name, value, AV_DICT_DONT_STRDUP_VAL);
+    return 0;
+}
+
+
+int ff_tdecode_header(GetByteContext *gb, int *le, int *ifd_offset)
+{
+    if (bytestream2_get_bytes_left(gb) < 8) {
+        return AVERROR_INVALIDDATA;
+    }
+
+    *le = bytestream2_get_le16u(gb);
+    if (*le == AV_RB16("II")) {
+        *le = 1;
+    } else if (*le == AV_RB16("MM")) {
+        *le = 0;
+    } else {
+        return AVERROR_INVALIDDATA;
+    }
+
+    if (ff_tget_short(gb, *le) != 42) {
+        return AVERROR_INVALIDDATA;
+    }
+
+    *ifd_offset = ff_tget_long(gb, *le);
+
+    return 0;
+}
+
+
+int ff_tread_tag(GetByteContext *gb, int le, unsigned *tag, unsigned *type,
+                 unsigned *count, int *next)
+{
+    int ifd_tag;
+    int valid_type;
+
+    *tag    = ff_tget_short(gb, le);
+    *type   = ff_tget_short(gb, le);
+    *count  = ff_tget_long (gb, le);
+
+    ifd_tag    = ff_tis_ifd(*tag);
+    valid_type = *type != 0 && *type < FF_ARRAY_ELEMS(type_sizes);
+
+    *next = bytestream2_tell(gb) + 4;
+
+    // check for valid type
+    if (!valid_type) {
+        return AVERROR_INVALIDDATA;
+    }
+
+    // seek to offset if this is an IFD-tag or
+    // if count values do not fit into the offset value
+    if (ifd_tag || (*count > 4 || !(type_sizes[*type] * (*count) <= 4 || *type == TIFF_STRING))) {
+        bytestream2_seek(gb, ff_tget_long (gb, le), SEEK_SET);
+    }
+
+    return 0;
+}
diff --git a/libavcodec/tiff_common.h b/libavcodec/tiff_common.h
new file mode 100644
index 0000000..03558c3
--- /dev/null
+++ b/libavcodec/tiff_common.h
@@ -0,0 +1,152 @@
+/*
+ * TIFF Common Routines
+ * Copyright (c) 2013 Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * TIFF Common Routines
+ * @author Thilo Borgmann <thilo.borgmann _at_ mail.de>
+ */
+
+#ifndef AVCODEC_TIFF_COMMON_H
+#define AVCODEC_TIFF_COMMON_H
+
+#include "avcodec.h"
+#include "tiff.h"
+#include "bytestream.h"
+#include "libavutil/bprint.h"
+
+/** data type identifiers for TIFF tags */
+enum TiffTypes {
+    TIFF_BYTE = 1,
+    TIFF_STRING,
+    TIFF_SHORT,
+    TIFF_LONG,
+    TIFF_RATIONAL,
+    TIFF_SBYTE,
+    TIFF_UNDEFINED,
+    TIFF_SSHORT,
+    TIFF_SLONG,
+    TIFF_SRATIONAL,
+    TIFF_FLOAT,
+    TIFF_DOUBLE,
+    TIFF_IFD
+};
+
+/** sizes of various TIFF field types (string size = 100)*/
+static const uint8_t type_sizes[14] = {
+    0, 1, 100, 2, 4, 8, 1, 1, 2, 4, 8, 4, 8, 4
+};
+
+static const uint16_t ifd_tags[] = {
+    0x8769, // EXIF IFD
+    0x8825, // GPS IFD
+    0xA005  // Interoperability IFD
+};
+
+
+/** Returns a value > 0 if the tag is a known IFD-tag.
+ *  The return value is the array index + 1 within ifd_tags[].
+ */
+int ff_tis_ifd(unsigned tag);
+
+/** Reads a short from the bytestream using given endianness. */
+unsigned ff_tget_short(GetByteContext *gb, int le);
+
+/** Reads a long from the bytestream using given endianness. */
+unsigned ff_tget_long(GetByteContext *gb, int le);
+
+/** Reads a double from the bytestream using given endianness. */
+double   ff_tget_double(GetByteContext *gb, int le);
+
+/** Reads a byte from the bytestream using given endianness. */
+unsigned ff_tget(GetByteContext *gb, int type, int le);
+
+/** Returns an allocated string containing count
+ *  rational values using the given separator.
+ */
+char *ff_trationals2str(int *rp, int count, const char *sep);
+
+/** Returns an allocated string containing count
+ *  long values using the given separator.
+ */
+char *ff_tlongs2str(int32_t *lp, int count, const char *sep);
+
+/** Returns an allocated string containing count
+ *  double values using the given separator.
+ */
+char *ff_tdoubles2str(double *dp, int count, const char *sep);
+
+/** Returns an allocated string containing count
+ *  short values using the given separator.
+ */
+char *ff_tshorts2str(int16_t *sp, int count, const char *sep);
+
+/** Adds count rationals converted to a string
+ *  into the metadata dictionary.
+ */
+int ff_tadd_rational_metadata(int count, const char *name, const char *sep,
+                              GetByteContext *gb, int le, AVDictionary **metadata);
+
+/** Adds count longs converted to a string
+ *  into the metadata dictionary.
+ */
+int ff_tadd_long_metadata(int count, const char *name, const char *sep,
+                          GetByteContext *gb, int le, AVDictionary **metadata);
+
+/** Adds count doubles converted to a string
+ *  into the metadata dictionary.
+ */
+int ff_tadd_doubles_metadata(int count, const char *name, const char *sep,
+                             GetByteContext *gb, int le, AVDictionary **metadata);
+
+/** Adds count shorts converted to a string
+ *  into the metadata dictionary.
+ */
+int ff_tadd_shorts_metadata(int count, const char *name, const char *sep,
+                            GetByteContext *gb, int le, int is_signed, AVDictionary **metadata);
+
+/** Adds count bytes converted to a string
+ *  into the metadata dictionary.
+ */
+int ff_tadd_bytes_metadata(int count, const char *name, const char *sep,
+                           GetByteContext *gb, int le, int is_signed, AVDictionary **metadata);
+
+/** Adds a string of count characters
+ *  into the metadata dictionary.
+ */
+int ff_tadd_string_metadata(int count, const char *name,
+                            GetByteContext *gb, int le, AVDictionary **metadata);
+
+/** Decodes a TIFF header from the input bytestream
+ *  and sets the endianness in *le and the offset to
+ *  the first IFD in *ifd_offset accordingly.
+ */
+int ff_tdecode_header(GetByteContext *gb, int *le, int *ifd_offset);
+
+/** Reads the first 3 fields of a TIFF tag, which are
+ *  the tag id, the tag type and the count of values for that tag.
+ *  Afterwards the bytestream is located at the first value to read and
+ *  *next holds the bytestream offset of the following tag.
+ */
+int ff_tread_tag(GetByteContext *gb, int le, unsigned *tag, unsigned *type,
+                 unsigned *count, int *next);
+
+#endif /* AVCODEC_TIFF_COMMON_H */
diff --git a/libavcodec/tiff_data.c b/libavcodec/tiff_data.c
new file mode 100644
index 0000000..88c2256
--- /dev/null
+++ b/libavcodec/tiff_data.c
@@ -0,0 +1,1870 @@
+/*
+ * TIFF data tables
+ * Copyright (c) 2011 Thomas Kuehnel
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * TIFF data tables
+ * @author Thomas Kuehnel
+ * @see GeoTIFF specification at
+ * http://www.remotesensing.org/geotiff/spec/geotiffhome.html
+ */
+
+#include "tiff_data.h"
+
+const TiffGeoTagNameType ff_tiff_conf_name_type_map[] = {
+    {"GTModelTypeGeoKey",              GEOTIFF_SHORT },
+    {"GTRasterTypeGeoKey",             GEOTIFF_SHORT },
+    {"GTCitationGeoKey",               GEOTIFF_STRING}
+};
+
+const TiffGeoTagNameType ff_tiff_geog_name_type_map[] = {
+    {"GeographicTypeGeoKey",           GEOTIFF_SHORT },
+    {"GeogCitationGeoKey",             GEOTIFF_STRING},
+    {"GeogGeodeticDatumGeoKey",        GEOTIFF_SHORT },
+    {"GeogPrimeMeridianGeoKey",        GEOTIFF_SHORT },
+    {"GeogLinearUnitsGeoKey",          GEOTIFF_SHORT },
+    {"GeogLinearUnitSizeGeoKey",       GEOTIFF_DOUBLE},
+    {"GeogAngularUnitsGeoKey",         GEOTIFF_SHORT },
+    {"GeogAngularUnitSizeGeoKey",      GEOTIFF_DOUBLE},
+    {"GeogEllipsoidGeoKey",            GEOTIFF_SHORT },
+    {"GeogSemiMajorAxisGeoKey",        GEOTIFF_DOUBLE},
+    {"GeogSemiMinorAxisGeoKey",        GEOTIFF_DOUBLE},
+    {"GeogInvFlatteningGeoKey",        GEOTIFF_DOUBLE},
+    {"GeogAzimuthUnitsGeoKey",         GEOTIFF_SHORT },
+    {"GeogPrimeMeridianLongGeoKey",    GEOTIFF_DOUBLE}
+};
+
+const TiffGeoTagNameType ff_tiff_proj_name_type_map[] = {
+    {"ProjectedCSTypeGeoKey",          GEOTIFF_SHORT },
+    {"PCSCitationGeoKey",              GEOTIFF_STRING},
+    {"ProjectionGeoKey",               GEOTIFF_SHORT },
+    {"ProjCoordTransGeoKey",           GEOTIFF_SHORT },
+    {"ProjLinearUnitsGeoKey",          GEOTIFF_SHORT },
+    {"ProjLinearUnitSizeGeoKey",       GEOTIFF_DOUBLE},
+    {"ProjStdParallel1GeoKey",         GEOTIFF_DOUBLE},
+    {"ProjStdParallel2GeoKey",         GEOTIFF_DOUBLE},
+    {"ProjNatOriginLongGeoKey",        GEOTIFF_DOUBLE},
+    {"ProjNatOriginLatGeoKey",         GEOTIFF_DOUBLE},
+    {"ProjFalseEastingGeoKey",         GEOTIFF_DOUBLE},
+    {"ProjFalseNorthingGeoKey",        GEOTIFF_DOUBLE},
+    {"ProjFalseOriginLongGeoKey",      GEOTIFF_DOUBLE},
+    {"ProjFalseOriginLatGeoKey",       GEOTIFF_DOUBLE},
+    {"ProjFalseOriginEastingGeoKey",   GEOTIFF_DOUBLE},
+    {"ProjFalseOriginNorthingGeoKey",  GEOTIFF_DOUBLE},
+    {"ProjCenterLongGeoKey",           GEOTIFF_DOUBLE},
+    {"ProjCenterLatGeoKey",            GEOTIFF_DOUBLE},
+    {"ProjCenterEastingGeoKey",        GEOTIFF_DOUBLE},
+    {"ProjCenterNorthingGeoKey",       GEOTIFF_DOUBLE},
+    {"ProjScaleAtNatOriginGeoKey",     GEOTIFF_DOUBLE},
+    {"ProjScaleAtCenterGeoKey",        GEOTIFF_DOUBLE},
+    {"ProjAzimuthAngleGeoKey",         GEOTIFF_DOUBLE},
+    {"ProjStraightVertPoleLongGeoKey", GEOTIFF_DOUBLE}
+};
+
+const TiffGeoTagNameType ff_tiff_vert_name_type_map[] = {
+    {"VerticalCSTypeGeoKey",           GEOTIFF_SHORT },
+    {"VerticalCitationGeoKey",         GEOTIFF_STRING},
+    {"VerticalDatumGeoKey",            GEOTIFF_SHORT },
+    {"VerticalUnitsGeoKey",            GEOTIFF_SHORT }
+};
+
+const char *const ff_tiff_gt_model_type_codes[] = {
+    "ModelTypeProjected",
+    "ModelTypeGeographic",
+    "ModelTypeGeocentric"
+};
+
+const char *const ff_tiff_gt_raster_type_codes[] = {
+    "RasterPixelIsArea",
+    "RasterPixelIsPoint"
+};
+
+const char *const ff_tiff_linear_unit_codes[] = {
+    "Linear_Meter",
+    "Linear_Foot",
+    "Linear_Foot_US_Survey",
+    "Linear_Foot_Modified_American",
+    "Linear_Foot_Clarke",
+    "Linear_Foot_Indian",
+    "Linear_Link",
+    "Linear_Link_Benoit",
+    "Linear_Link_Sears",
+    "Linear_Chain_Benoit",
+    "Linear_Chain_Sears",
+    "Linear_Yard_Sears",
+    "Linear_Yard_Indian",
+    "Linear_Fathom",
+    "Linear_Mile_International_Nautical"
+};
+
+const char *const ff_tiff_angular_unit_codes[] = {
+    "Angular_Radian",
+    "Angular_Degree",
+    "Angular_Arc_Minute",
+    "Angular_Arc_Second",
+    "Angular_Grad",
+    "Angular_Gon",
+    "Angular_DMS",
+    "Angular_DMS_Hemisphere"
+};
+
+const char *const ff_tiff_gcs_type_codes[] = {
+    "GCS_Adindan",
+    "GCS_AGD66",
+    "GCS_AGD84",
+    "GCS_Ain_el_Abd",
+    "GCS_Afgooye",
+    "GCS_Agadez",
+    "GCS_Lisbon",
+    "GCS_Aratu",
+    "GCS_Arc_1950",
+    "GCS_Arc_1960",
+    "GCS_Batavia",
+    "GCS_Barbados",
+    "GCS_Beduaram",
+    "GCS_Beijing_1954",
+    "GCS_Belge_1950",
+    "GCS_Bermuda_1957",
+    "GCS_Bern_1898",
+    "GCS_Bogota",
+    "GCS_Bukit_Rimpah",
+    "GCS_Camacupa",
+    "GCS_Campo_Inchauspe",
+    "GCS_Cape",
+    "GCS_Carthage",
+    "GCS_Chua",
+    "GCS_Corrego_Alegre",
+    "GCS_Cote_d_Ivoire",
+    "GCS_Deir_ez_Zor",
+    "GCS_Douala",
+    "GCS_Egypt_1907",
+    "GCS_ED50",
+    "GCS_ED87",
+    "GCS_Fahud",
+    "GCS_Gandajika_1970",
+    "GCS_Garoua",
+    "GCS_Guyane_Francaise",
+    "GCS_Hu_Tzu_Shan",
+    "GCS_HD72",
+    "GCS_ID74",
+    "GCS_Indian_1954",
+    "GCS_Indian_1975",
+    "GCS_Jamaica_1875",
+    "GCS_JAD69",
+    "GCS_Kalianpur",
+    "GCS_Kandawala",
+    "GCS_Kertau",
+    "GCS_KOC",
+    "GCS_La_Canoa",
+    "GCS_PSAD56",
+    "GCS_Lake",
+    "GCS_Leigon",
+    "GCS_Liberia_1964",
+    "GCS_Lome",
+    "GCS_Luzon_1911",
+    "GCS_Hito_XVIII_1963",
+    "GCS_Herat_North",
+    "GCS_Mahe_1971",
+    "GCS_Makassar",
+    "GCS_EUREF89",
+    "GCS_Malongo_1987",
+    "GCS_Manoca",
+    "GCS_Merchich",
+    "GCS_Massawa",
+    "GCS_Minna",
+    "GCS_Mhast",
+    "GCS_Monte_Mario",
+    "GCS_M_poraloko",
+    "GCS_NAD27",
+    "GCS_NAD_Michigan",
+    "GCS_NAD83",
+    "GCS_Nahrwan_1967",
+    "GCS_Naparima_1972",
+    "GCS_GD49",
+    "GCS_NGO_1948",
+    "GCS_Datum_73",
+    "GCS_NTF",
+    "GCS_NSWC_9Z_2",
+    "GCS_OSGB_1936",
+    "GCS_OSGB70",
+    "GCS_OS_SN80",
+    "GCS_Padang",
+    "GCS_Palestine_1923",
+    "GCS_Pointe_Noire",
+    "GCS_GDA94",
+    "GCS_Pulkovo_1942",
+    "GCS_Qatar",
+    "GCS_Qatar_1948",
+    "GCS_Qornoq",
+    "GCS_Loma_Quintana",
+    "GCS_Amersfoort",
+    "GCS_RT38",
+    "GCS_SAD69",
+    "GCS_Sapper_Hill_1943",
+    "GCS_Schwarzeck",
+    "GCS_Segora",
+    "GCS_Serindung",
+    "GCS_Sudan",
+    "GCS_Tananarive",
+    "GCS_Timbalai_1948",
+    "GCS_TM65",
+    "GCS_TM75",
+    "GCS_Tokyo",
+    "GCS_Trinidad_1903",
+    "GCS_TC_1948",
+    "GCS_Voirol_1875",
+    "GCS_Voirol_Unifie",
+    "GCS_Bern_1938",
+    "GCS_Nord_Sahara_1959",
+    "GCS_Stockholm_1938",
+    "GCS_Yacare",
+    "GCS_Yoff",
+    "GCS_Zanderij",
+    "GCS_MGI",
+    "GCS_Belge_1972",
+    "GCS_DHDN",
+    "GCS_Conakry_1905",
+    "GCS_WGS_72",
+    "GCS_WGS_72BE",
+    "GCS_WGS_84",
+    "GCS_Bern_1898_Bern",
+    "GCS_Bogota_Bogota",
+    "GCS_Lisbon_Lisbon",
+    "GCS_Makassar_Jakarta",
+    "GCS_MGI_Ferro",
+    "GCS_Monte_Mario_Rome",
+    "GCS_NTF_Paris",
+    "GCS_Padang_Jakarta",
+    "GCS_Belge_1950_Brussels",
+    "GCS_Tananarive_Paris",
+    "GCS_Voirol_1875_Paris",
+    "GCS_Voirol_Unifie_Paris",
+    "GCS_Batavia_Jakarta",
+    "GCS_ATF_Paris",
+    "GCS_NDG_Paris"
+};
+
+const char *const ff_tiff_gcse_type_codes[] = {
+    "GCSE_Airy1830",
+    "GCSE_AiryModified1849",
+    "GCSE_AustralianNationalSpheroid",
+    "GCSE_Bessel1841",
+    "GCSE_BesselModified",
+    "GCSE_BesselNamibia",
+    "GCSE_Clarke1858",
+    "GCSE_Clarke1866",
+    "GCSE_Clarke1866Michigan",
+    "GCSE_Clarke1880_Benoit",
+    "GCSE_Clarke1880_IGN",
+    "GCSE_Clarke1880_RGS",
+    "GCSE_Clarke1880_Arc",
+    "GCSE_Clarke1880_SGA1922",
+    "GCSE_Everest1830_1937Adjustment",
+    "GCSE_Everest1830_1967Definition",
+    "GCSE_Everest1830_1975Definition",
+    "GCSE_Everest1830Modified",
+    "GCSE_GRS1980",
+    "GCSE_Helmert1906",
+    "GCSE_IndonesianNationalSpheroid",
+    "GCSE_International1924",
+    "GCSE_International1967",
+    "GCSE_Krassowsky1940",
+    "GCSE_NWL9D",
+    "GCSE_NWL10D",
+    "GCSE_Plessis1817",
+    "GCSE_Struve1860",
+    "GCSE_WarOffice",
+    "GCSE_WGS84",
+    "GCSE_GEM10C",
+    "GCSE_OSU86F",
+    "GCSE_OSU91A",
+    "GCSE_Clarke1880",
+    "GCSE_Sphere"
+};
+
+const char *const ff_tiff_geodetic_datum_codes[] = {
+    "Datum_Adindan",
+    "Datum_Australian_Geodetic_Datum_1966",
+    "Datum_Australian_Geodetic_Datum_1984",
+    "Datum_Ain_el_Abd_1970",
+    "Datum_Afgooye",
+    "Datum_Agadez",
+    "Datum_Lisbon",
+    "Datum_Aratu",
+    "Datum_Arc_1950",
+    "Datum_Arc_1960",
+    "Datum_Batavia",
+    "Datum_Barbados",
+    "Datum_Beduaram",
+    "Datum_Beijing_1954",
+    "Datum_Reseau_National_Belge_1950",
+    "Datum_Bermuda_1957",
+    "Datum_Bern_1898",
+    "Datum_Bogota",
+    "Datum_Bukit_Rimpah",
+    "Datum_Camacupa",
+    "Datum_Campo_Inchauspe",
+    "Datum_Cape",
+    "Datum_Carthage",
+    "Datum_Chua",
+    "Datum_Corrego_Alegre",
+    "Datum_Cote_d_Ivoire",
+    "Datum_Deir_ez_Zor",
+    "Datum_Douala",
+    "Datum_Egypt_1907",
+    "Datum_European_Datum_1950",
+    "Datum_European_Datum_1987",
+    "Datum_Fahud",
+    "Datum_Gandajika_1970",
+    "Datum_Garoua",
+    "Datum_Guyane_Francaise",
+    "Datum_Hu_Tzu_Shan",
+    "Datum_Hungarian_Datum_1972",
+    "Datum_Indonesian_Datum_1974",
+    "Datum_Indian_1954",
+    "Datum_Indian_1975",
+    "Datum_Jamaica_1875",
+    "Datum_Jamaica_1969",
+    "Datum_Kalianpur",
+    "Datum_Kandawala",
+    "Datum_Kertau",
+    "Datum_Kuwait_Oil_Company",
+    "Datum_La_Canoa",
+    "Datum_Provisional_S_American_Datum_1956",
+    "Datum_Lake",
+    "Datum_Leigon",
+    "Datum_Liberia_1964",
+    "Datum_Lome",
+    "Datum_Luzon_1911",
+    "Datum_Hito_XVIII_1963",
+    "Datum_Herat_North",
+    "Datum_Mahe_1971",
+    "Datum_Makassar",
+    "Datum_European_Reference_System_1989",
+    "Datum_Malongo_1987",
+    "Datum_Manoca",
+    "Datum_Merchich",
+    "Datum_Massawa",
+    "Datum_Minna",
+    "Datum_Mhast",
+    "Datum_Monte_Mario",
+    "Datum_M_poraloko",
+    "Datum_North_American_Datum_1927",
+    "Datum_NAD_Michigan",
+    "Datum_North_American_Datum_1983",
+    "Datum_Nahrwan_1967",
+    "Datum_Naparima_1972",
+    "Datum_New_Zealand_Geodetic_Datum_1949",
+    "Datum_NGO_1948",
+    "Datum_Datum_73",
+    "Datum_Nouvelle_Triangulation_Francaise",
+    "Datum_NSWC_9Z_2",
+    "Datum_OSGB_1936",
+    "Datum_OSGB_1970_SN",
+    "Datum_OS_SN_1980",
+    "Datum_Padang_1884",
+    "Datum_Palestine_1923",
+    "Datum_Pointe_Noire",
+    "Datum_Geocentric_Datum_of_Australia_1994",
+    "Datum_Pulkovo_1942",
+    "Datum_Qatar",
+    "Datum_Qatar_1948",
+    "Datum_Qornoq",
+    "Datum_Loma_Quintana",
+    "Datum_Amersfoort",
+    "Datum_RT38",
+    "Datum_South_American_Datum_1969",
+    "Datum_Sapper_Hill_1943",
+    "Datum_Schwarzeck",
+    "Datum_Segora",
+    "Datum_Serindung",
+    "Datum_Sudan",
+    "Datum_Tananarive_1925",
+    "Datum_Timbalai_1948",
+    "Datum_TM65",
+    "Datum_TM75",
+    "Datum_Tokyo",
+    "Datum_Trinidad_1903",
+    "Datum_Trucial_Coast_1948",
+    "Datum_Voirol_1875",
+    "Datum_Voirol_Unifie_1960",
+    "Datum_Bern_1938",
+    "Datum_Nord_Sahara_1959",
+    "Datum_Stockholm_1938",
+    "Datum_Yacare",
+    "Datum_Yoff",
+    "Datum_Zanderij",
+    "Datum_Militar_Geographische_Institut",
+    "Datum_Reseau_National_Belge_1972",
+    "Datum_Deutsche_Hauptdreiecksnetz",
+    "Datum_Conakry_1905",
+    "Datum_WGS72",
+    "Datum_WGS72_Transit_Broadcast_Ephemeris",
+    "Datum_WGS84",
+    "Datum_Ancienne_Triangulation_Francaise",
+    "Datum_Nord_de_Guerre"
+};
+
+const char *const ff_tiff_geodetic_datum_e_codes[] = {
+    "DatumE_Airy1830",
+    "DatumE_AiryModified1849",
+    "DatumE_AustralianNationalSpheroid",
+    "DatumE_Bessel1841",
+    "DatumE_BesselModified",
+    "DatumE_BesselNamibia",
+    "DatumE_Clarke1858",
+    "DatumE_Clarke1866",
+    "DatumE_Clarke1866Michigan",
+    "DatumE_Clarke1880_Benoit",
+    "DatumE_Clarke1880_IGN",
+    "DatumE_Clarke1880_RGS",
+    "DatumE_Clarke1880_Arc",
+    "DatumE_Clarke1880_SGA1922",
+    "DatumE_Everest1830_1937Adjustment",
+    "DatumE_Everest1830_1967Definition",
+    "DatumE_Everest1830_1975Definition",
+    "DatumE_Everest1830Modified",
+    "DatumE_GRS1980",
+    "DatumE_Helmert1906",
+    "DatumE_IndonesianNationalSpheroid",
+    "DatumE_International1924",
+    "DatumE_International1967",
+    "DatumE_Krassowsky1960",
+    "DatumE_NWL9D",
+    "DatumE_NWL10D",
+    "DatumE_Plessis1817",
+    "DatumE_Struve1860",
+    "DatumE_WarOffice",
+    "DatumE_WGS84",
+    "DatumE_GEM10C",
+    "DatumE_OSU86F",
+    "DatumE_OSU91A",
+    "DatumE_Clarke1880",
+    "DatumE_Sphere"
+};
+
+const char *const ff_tiff_ellipsoid_codes[] = {
+    "Ellipse_Airy_1830",
+    "Ellipse_Airy_Modified_1849",
+    "Ellipse_Australian_National_Spheroid",
+    "Ellipse_Bessel_1841",
+    "Ellipse_Bessel_Modified",
+    "Ellipse_Bessel_Namibia",
+    "Ellipse_Clarke_1858",
+    "Ellipse_Clarke_1866",
+    "Ellipse_Clarke_1866_Michigan",
+    "Ellipse_Clarke_1880_Benoit",
+    "Ellipse_Clarke_1880_IGN",
+    "Ellipse_Clarke_1880_RGS",
+    "Ellipse_Clarke_1880_Arc",
+    "Ellipse_Clarke_1880_SGA_1922",
+    "Ellipse_Everest_1830_1937_Adjustment",
+    "Ellipse_Everest_1830_1967_Definition",
+    "Ellipse_Everest_1830_1975_Definition",
+    "Ellipse_Everest_1830_Modified",
+    "Ellipse_GRS_1980",
+    "Ellipse_Helmert_1906",
+    "Ellipse_Indonesian_National_Spheroid",
+    "Ellipse_International_1924",
+    "Ellipse_International_1967",
+    "Ellipse_Krassowsky_1940",
+    "Ellipse_NWL_9D",
+    "Ellipse_NWL_10D",
+    "Ellipse_Plessis_1817",
+    "Ellipse_Struve_1860",
+    "Ellipse_War_Office",
+    "Ellipse_WGS_84",
+    "Ellipse_GEM_10C",
+    "Ellipse_OSU86F",
+    "Ellipse_OSU91A",
+    "Ellipse_Clarke_1880",
+    "Ellipse_Sphere"
+};
+
+const char *const ff_tiff_prime_meridian_codes[] = {
+    "PM_Greenwich",
+    "PM_Lisbon",
+    "PM_Paris",
+    "PM_Bogota",
+    "PM_Madrid",
+    "PM_Rome",
+    "PM_Bern",
+    "PM_Jakarta",
+    "PM_Ferro",
+    "PM_Brussels",
+    "PM_Stockholm"
+};
+
+const TiffGeoTagKeyName ff_tiff_proj_cs_type_codes[] = {
+    {20137, "PCS_Adindan_UTM_zone_37N"},
+    {20138, "PCS_Adindan_UTM_zone_38N"},
+    {20248, "PCS_AGD66_AMG_zone_48"},
+    {20249, "PCS_AGD66_AMG_zone_49"},
+    {20250, "PCS_AGD66_AMG_zone_50"},
+    {20251, "PCS_AGD66_AMG_zone_51"},
+    {20252, "PCS_AGD66_AMG_zone_52"},
+    {20253, "PCS_AGD66_AMG_zone_53"},
+    {20254, "PCS_AGD66_AMG_zone_54"},
+    {20255, "PCS_AGD66_AMG_zone_55"},
+    {20256, "PCS_AGD66_AMG_zone_56"},
+    {20257, "PCS_AGD66_AMG_zone_57"},
+    {20258, "PCS_AGD66_AMG_zone_58"},
+    {20348, "PCS_AGD84_AMG_zone_48"},
+    {20349, "PCS_AGD84_AMG_zone_49"},
+    {20350, "PCS_AGD84_AMG_zone_50"},
+    {20351, "PCS_AGD84_AMG_zone_51"},
+    {20352, "PCS_AGD84_AMG_zone_52"},
+    {20353, "PCS_AGD84_AMG_zone_53"},
+    {20354, "PCS_AGD84_AMG_zone_54"},
+    {20355, "PCS_AGD84_AMG_zone_55"},
+    {20356, "PCS_AGD84_AMG_zone_56"},
+    {20357, "PCS_AGD84_AMG_zone_57"},
+    {20358, "PCS_AGD84_AMG_zone_58"},
+    {20437, "PCS_Ain_el_Abd_UTM_zone_37N"},
+    {20438, "PCS_Ain_el_Abd_UTM_zone_38N"},
+    {20439, "PCS_Ain_el_Abd_UTM_zone_39N"},
+    {20499, "PCS_Ain_el_Abd_Bahrain_Grid"},
+    {20538, "PCS_Afgooye_UTM_zone_38N"},
+    {20539, "PCS_Afgooye_UTM_zone_39N"},
+    {20700, "PCS_Lisbon_Portugese_Grid"},
+    {20822, "PCS_Aratu_UTM_zone_22S"},
+    {20823, "PCS_Aratu_UTM_zone_23S"},
+    {20824, "PCS_Aratu_UTM_zone_24S"},
+    {20973, "PCS_Arc_1950_Lo13"},
+    {20975, "PCS_Arc_1950_Lo15"},
+    {20977, "PCS_Arc_1950_Lo17"},
+    {20979, "PCS_Arc_1950_Lo19"},
+    {20981, "PCS_Arc_1950_Lo21"},
+    {20983, "PCS_Arc_1950_Lo23"},
+    {20985, "PCS_Arc_1950_Lo25"},
+    {20987, "PCS_Arc_1950_Lo27"},
+    {20989, "PCS_Arc_1950_Lo29"},
+    {20991, "PCS_Arc_1950_Lo31"},
+    {20993, "PCS_Arc_1950_Lo33"},
+    {20995, "PCS_Arc_1950_Lo35"},
+    {21100, "PCS_Batavia_NEIEZ"},
+    {21148, "PCS_Batavia_UTM_zone_48S"},
+    {21149, "PCS_Batavia_UTM_zone_49S"},
+    {21150, "PCS_Batavia_UTM_zone_50S"},
+    {21413, "PCS_Beijing_Gauss_zone_13"},
+    {21414, "PCS_Beijing_Gauss_zone_14"},
+    {21415, "PCS_Beijing_Gauss_zone_15"},
+    {21416, "PCS_Beijing_Gauss_zone_16"},
+    {21417, "PCS_Beijing_Gauss_zone_17"},
+    {21418, "PCS_Beijing_Gauss_zone_18"},
+    {21419, "PCS_Beijing_Gauss_zone_19"},
+    {21420, "PCS_Beijing_Gauss_zone_20"},
+    {21421, "PCS_Beijing_Gauss_zone_21"},
+    {21422, "PCS_Beijing_Gauss_zone_22"},
+    {21423, "PCS_Beijing_Gauss_zone_23"},
+    {21473, "PCS_Beijing_Gauss_13N"},
+    {21474, "PCS_Beijing_Gauss_14N"},
+    {21475, "PCS_Beijing_Gauss_15N"},
+    {21476, "PCS_Beijing_Gauss_16N"},
+    {21477, "PCS_Beijing_Gauss_17N"},
+    {21478, "PCS_Beijing_Gauss_18N"},
+    {21479, "PCS_Beijing_Gauss_19N"},
+    {21480, "PCS_Beijing_Gauss_20N"},
+    {21481, "PCS_Beijing_Gauss_21N"},
+    {21482, "PCS_Beijing_Gauss_22N"},
+    {21483, "PCS_Beijing_Gauss_23N"},
+    {21500, "PCS_Belge_Lambert_50"},
+    {21790, "PCS_Bern_1898_Swiss_Old"},
+    {21817, "PCS_Bogota_UTM_zone_17N"},
+    {21818, "PCS_Bogota_UTM_zone_18N"},
+    {21891, "PCS_Bogota_Colombia_3W"},
+    {21892, "PCS_Bogota_Colombia_Bogota"},
+    {21893, "PCS_Bogota_Colombia_3E"},
+    {21894, "PCS_Bogota_Colombia_6E"},
+    {22032, "PCS_Camacupa_UTM_32S"},
+    {22033, "PCS_Camacupa_UTM_33S"},
+    {22191, "PCS_C_Inchauspe_Argentina_1"},
+    {22192, "PCS_C_Inchauspe_Argentina_2"},
+    {22193, "PCS_C_Inchauspe_Argentina_3"},
+    {22194, "PCS_C_Inchauspe_Argentina_4"},
+    {22195, "PCS_C_Inchauspe_Argentina_5"},
+    {22196, "PCS_C_Inchauspe_Argentina_6"},
+    {22197, "PCS_C_Inchauspe_Argentina_7"},
+    {22332, "PCS_Carthage_UTM_zone_32N"},
+    {22391, "PCS_Carthage_Nord_Tunisie"},
+    {22392, "PCS_Carthage_Sud_Tunisie"},
+    {22523, "PCS_Corrego_Alegre_UTM_23S"},
+    {22524, "PCS_Corrego_Alegre_UTM_24S"},
+    {22832, "PCS_Douala_UTM_zone_32N"},
+    {22992, "PCS_Egypt_1907_Red_Belt"},
+    {22993, "PCS_Egypt_1907_Purple_Belt"},
+    {22994, "PCS_Egypt_1907_Ext_Purple"},
+    {23028, "PCS_ED50_UTM_zone_28N"},
+    {23029, "PCS_ED50_UTM_zone_29N"},
+    {23030, "PCS_ED50_UTM_zone_30N"},
+    {23031, "PCS_ED50_UTM_zone_31N"},
+    {23032, "PCS_ED50_UTM_zone_32N"},
+    {23033, "PCS_ED50_UTM_zone_33N"},
+    {23034, "PCS_ED50_UTM_zone_34N"},
+    {23035, "PCS_ED50_UTM_zone_35N"},
+    {23036, "PCS_ED50_UTM_zone_36N"},
+    {23037, "PCS_ED50_UTM_zone_37N"},
+    {23038, "PCS_ED50_UTM_zone_38N"},
+    {23239, "PCS_Fahud_UTM_zone_39N"},
+    {23240, "PCS_Fahud_UTM_zone_40N"},
+    {23433, "PCS_Garoua_UTM_zone_33N"},
+    {23846, "PCS_ID74_UTM_zone_46N"},
+    {23847, "PCS_ID74_UTM_zone_47N"},
+    {23848, "PCS_ID74_UTM_zone_48N"},
+    {23849, "PCS_ID74_UTM_zone_49N"},
+    {23850, "PCS_ID74_UTM_zone_50N"},
+    {23851, "PCS_ID74_UTM_zone_51N"},
+    {23852, "PCS_ID74_UTM_zone_52N"},
+    {23853, "PCS_ID74_UTM_zone_53N"},
+    {23886, "PCS_ID74_UTM_zone_46S"},
+    {23887, "PCS_ID74_UTM_zone_47S"},
+    {23888, "PCS_ID74_UTM_zone_48S"},
+    {23889, "PCS_ID74_UTM_zone_49S"},
+    {23890, "PCS_ID74_UTM_zone_50S"},
+    {23891, "PCS_ID74_UTM_zone_51S"},
+    {23892, "PCS_ID74_UTM_zone_52S"},
+    {23893, "PCS_ID74_UTM_zone_53S"},
+    {23894, "PCS_ID74_UTM_zone_54S"},
+    {23947, "PCS_Indian_1954_UTM_47N"},
+    {23948, "PCS_Indian_1954_UTM_48N"},
+    {24047, "PCS_Indian_1975_UTM_47N"},
+    {24048, "PCS_Indian_1975_UTM_48N"},
+    {24100, "PCS_Jamaica_1875_Old_Grid"},
+    {24200, "PCS_JAD69_Jamaica_Grid"},
+    {24370, "PCS_Kalianpur_India_0"},
+    {24371, "PCS_Kalianpur_India_I"},
+    {24372, "PCS_Kalianpur_India_IIa"},
+    {24373, "PCS_Kalianpur_India_IIIa"},
+    {24374, "PCS_Kalianpur_India_IVa"},
+    {24382, "PCS_Kalianpur_India_IIb"},
+    {24383, "PCS_Kalianpur_India_IIIb"},
+    {24384, "PCS_Kalianpur_India_IVb"},
+    {24500, "PCS_Kertau_Singapore_Grid"},
+    {24547, "PCS_Kertau_UTM_zone_47N"},
+    {24548, "PCS_Kertau_UTM_zone_48N"},
+    {24720, "PCS_La_Canoa_UTM_zone_20N"},
+    {24721, "PCS_La_Canoa_UTM_zone_21N"},
+    {24818, "PCS_PSAD56_UTM_zone_18N"},
+    {24819, "PCS_PSAD56_UTM_zone_19N"},
+    {24820, "PCS_PSAD56_UTM_zone_20N"},
+    {24821, "PCS_PSAD56_UTM_zone_21N"},
+    {24877, "PCS_PSAD56_UTM_zone_17S"},
+    {24878, "PCS_PSAD56_UTM_zone_18S"},
+    {24879, "PCS_PSAD56_UTM_zone_19S"},
+    {24880, "PCS_PSAD56_UTM_zone_20S"},
+    {24891, "PCS_PSAD56_Peru_west_zone"},
+    {24892, "PCS_PSAD56_Peru_central"},
+    {24893, "PCS_PSAD56_Peru_east_zone"},
+    {25000, "PCS_Leigon_Ghana_Grid"},
+    {25231, "PCS_Lome_UTM_zone_31N"},
+    {25391, "PCS_Luzon_Philippines_I"},
+    {25392, "PCS_Luzon_Philippines_II"},
+    {25393, "PCS_Luzon_Philippines_III"},
+    {25394, "PCS_Luzon_Philippines_IV"},
+    {25395, "PCS_Luzon_Philippines_V"},
+    {25700, "PCS_Makassar_NEIEZ"},
+    {25932, "PCS_Malongo_1987_UTM_32S"},
+    {26191, "PCS_Merchich_Nord_Maroc"},
+    {26192, "PCS_Merchich_Sud_Maroc"},
+    {26193, "PCS_Merchich_Sahara"},
+    {26237, "PCS_Massawa_UTM_zone_37N"},
+    {26331, "PCS_Minna_UTM_zone_31N"},
+    {26332, "PCS_Minna_UTM_zone_32N"},
+    {26391, "PCS_Minna_Nigeria_West"},
+    {26392, "PCS_Minna_Nigeria_Mid_Belt"},
+    {26393, "PCS_Minna_Nigeria_East"},
+    {26432, "PCS_Mhast_UTM_zone_32S"},
+    {26591, "PCS_Monte_Mario_Italy_1"},
+    {26592, "PCS_Monte_Mario_Italy_2"},
+    {26632, "PCS_M_poraloko_UTM_32N"},
+    {26692, "PCS_M_poraloko_UTM_32S"},
+    {26703, "PCS_NAD27_UTM_zone_3N"},
+    {26704, "PCS_NAD27_UTM_zone_4N"},
+    {26705, "PCS_NAD27_UTM_zone_5N"},
+    {26706, "PCS_NAD27_UTM_zone_6N"},
+    {26707, "PCS_NAD27_UTM_zone_7N"},
+    {26708, "PCS_NAD27_UTM_zone_8N"},
+    {26709, "PCS_NAD27_UTM_zone_9N"},
+    {26710, "PCS_NAD27_UTM_zone_10N"},
+    {26711, "PCS_NAD27_UTM_zone_11N"},
+    {26712, "PCS_NAD27_UTM_zone_12N"},
+    {26713, "PCS_NAD27_UTM_zone_13N"},
+    {26714, "PCS_NAD27_UTM_zone_14N"},
+    {26715, "PCS_NAD27_UTM_zone_15N"},
+    {26716, "PCS_NAD27_UTM_zone_16N"},
+    {26717, "PCS_NAD27_UTM_zone_17N"},
+    {26718, "PCS_NAD27_UTM_zone_18N"},
+    {26719, "PCS_NAD27_UTM_zone_19N"},
+    {26720, "PCS_NAD27_UTM_zone_20N"},
+    {26721, "PCS_NAD27_UTM_zone_21N"},
+    {26722, "PCS_NAD27_UTM_zone_22N"},
+    {26729, "PCS_NAD27_Alabama_East"},
+    {26730, "PCS_NAD27_Alabama_West"},
+    {26731, "PCS_NAD27_Alaska_zone_1"},
+    {26732, "PCS_NAD27_Alaska_zone_2"},
+    {26733, "PCS_NAD27_Alaska_zone_3"},
+    {26734, "PCS_NAD27_Alaska_zone_4"},
+    {26735, "PCS_NAD27_Alaska_zone_5"},
+    {26736, "PCS_NAD27_Alaska_zone_6"},
+    {26737, "PCS_NAD27_Alaska_zone_7"},
+    {26738, "PCS_NAD27_Alaska_zone_8"},
+    {26739, "PCS_NAD27_Alaska_zone_9"},
+    {26740, "PCS_NAD27_Alaska_zone_10"},
+    {26741, "PCS_NAD27_California_I"},
+    {26742, "PCS_NAD27_California_II"},
+    {26743, "PCS_NAD27_California_III"},
+    {26744, "PCS_NAD27_California_IV"},
+    {26745, "PCS_NAD27_California_V"},
+    {26746, "PCS_NAD27_California_VI"},
+    {26747, "PCS_NAD27_California_VII"},
+    {26748, "PCS_NAD27_Arizona_East"},
+    {26749, "PCS_NAD27_Arizona_Central"},
+    {26750, "PCS_NAD27_Arizona_West"},
+    {26751, "PCS_NAD27_Arkansas_North"},
+    {26752, "PCS_NAD27_Arkansas_South"},
+    {26753, "PCS_NAD27_Colorado_North"},
+    {26754, "PCS_NAD27_Colorado_Central"},
+    {26755, "PCS_NAD27_Colorado_South"},
+    {26756, "PCS_NAD27_Connecticut"},
+    {26757, "PCS_NAD27_Delaware"},
+    {26758, "PCS_NAD27_Florida_East"},
+    {26759, "PCS_NAD27_Florida_West"},
+    {26760, "PCS_NAD27_Florida_North"},
+    {26761, "PCS_NAD27_Hawaii_zone_1"},
+    {26762, "PCS_NAD27_Hawaii_zone_2"},
+    {26763, "PCS_NAD27_Hawaii_zone_3"},
+    {26764, "PCS_NAD27_Hawaii_zone_4"},
+    {26765, "PCS_NAD27_Hawaii_zone_5"},
+    {26766, "PCS_NAD27_Georgia_East"},
+    {26767, "PCS_NAD27_Georgia_West"},
+    {26768, "PCS_NAD27_Idaho_East"},
+    {26769, "PCS_NAD27_Idaho_Central"},
+    {26770, "PCS_NAD27_Idaho_West"},
+    {26771, "PCS_NAD27_Illinois_East"},
+    {26772, "PCS_NAD27_Illinois_West"},
+    {26773, "PCS_NAD27_Indiana_East"},
+    {26774, "PCS_NAD27_BLM_14N_feet"},
+    {26774, "PCS_NAD27_Indiana_West"},
+    {26775, "PCS_NAD27_BLM_15N_feet"},
+    {26775, "PCS_NAD27_Iowa_North"},
+    {26776, "PCS_NAD27_BLM_16N_feet"},
+    {26776, "PCS_NAD27_Iowa_South"},
+    {26777, "PCS_NAD27_BLM_17N_feet"},
+    {26777, "PCS_NAD27_Kansas_North"},
+    {26778, "PCS_NAD27_Kansas_South"},
+    {26779, "PCS_NAD27_Kentucky_North"},
+    {26780, "PCS_NAD27_Kentucky_South"},
+    {26781, "PCS_NAD27_Louisiana_North"},
+    {26782, "PCS_NAD27_Louisiana_South"},
+    {26783, "PCS_NAD27_Maine_East"},
+    {26784, "PCS_NAD27_Maine_West"},
+    {26785, "PCS_NAD27_Maryland"},
+    {26786, "PCS_NAD27_Massachusetts"},
+    {26787, "PCS_NAD27_Massachusetts_Is"},
+    {26788, "PCS_NAD27_Michigan_North"},
+    {26789, "PCS_NAD27_Michigan_Central"},
+    {26790, "PCS_NAD27_Michigan_South"},
+    {26791, "PCS_NAD27_Minnesota_North"},
+    {26792, "PCS_NAD27_Minnesota_Cent"},
+    {26793, "PCS_NAD27_Minnesota_South"},
+    {26794, "PCS_NAD27_Mississippi_East"},
+    {26795, "PCS_NAD27_Mississippi_West"},
+    {26796, "PCS_NAD27_Missouri_East"},
+    {26797, "PCS_NAD27_Missouri_Central"},
+    {26798, "PCS_NAD27_Missouri_West"},
+    {26801, "PCS_NAD_Michigan_Michigan_East"},
+    {26802, "PCS_NAD_Michigan_Michigan_Old_Central"},
+    {26803, "PCS_NAD_Michigan_Michigan_West"},
+    {26903, "PCS_NAD83_UTM_zone_3N"},
+    {26904, "PCS_NAD83_UTM_zone_4N"},
+    {26905, "PCS_NAD83_UTM_zone_5N"},
+    {26906, "PCS_NAD83_UTM_zone_6N"},
+    {26907, "PCS_NAD83_UTM_zone_7N"},
+    {26908, "PCS_NAD83_UTM_zone_8N"},
+    {26909, "PCS_NAD83_UTM_zone_9N"},
+    {26910, "PCS_NAD83_UTM_zone_10N"},
+    {26911, "PCS_NAD83_UTM_zone_11N"},
+    {26912, "PCS_NAD83_UTM_zone_12N"},
+    {26913, "PCS_NAD83_UTM_zone_13N"},
+    {26914, "PCS_NAD83_UTM_zone_14N"},
+    {26915, "PCS_NAD83_UTM_zone_15N"},
+    {26916, "PCS_NAD83_UTM_zone_16N"},
+    {26917, "PCS_NAD83_UTM_zone_17N"},
+    {26918, "PCS_NAD83_UTM_zone_18N"},
+    {26919, "PCS_NAD83_UTM_zone_19N"},
+    {26920, "PCS_NAD83_UTM_zone_20N"},
+    {26921, "PCS_NAD83_UTM_zone_21N"},
+    {26922, "PCS_NAD83_UTM_zone_22N"},
+    {26923, "PCS_NAD83_UTM_zone_23N"},
+    {26929, "PCS_NAD83_Alabama_East"},
+    {26930, "PCS_NAD83_Alabama_West"},
+    {26931, "PCS_NAD83_Alaska_zone_1"},
+    {26932, "PCS_NAD83_Alaska_zone_2"},
+    {26933, "PCS_NAD83_Alaska_zone_3"},
+    {26934, "PCS_NAD83_Alaska_zone_4"},
+    {26935, "PCS_NAD83_Alaska_zone_5"},
+    {26936, "PCS_NAD83_Alaska_zone_6"},
+    {26937, "PCS_NAD83_Alaska_zone_7"},
+    {26938, "PCS_NAD83_Alaska_zone_8"},
+    {26939, "PCS_NAD83_Alaska_zone_9"},
+    {26940, "PCS_NAD83_Alaska_zone_10"},
+    {26941, "PCS_NAD83_California_1"},
+    {26942, "PCS_NAD83_California_2"},
+    {26943, "PCS_NAD83_California_3"},
+    {26944, "PCS_NAD83_California_4"},
+    {26945, "PCS_NAD83_California_5"},
+    {26946, "PCS_NAD83_California_6"},
+    {26948, "PCS_NAD83_Arizona_East"},
+    {26949, "PCS_NAD83_Arizona_Central"},
+    {26950, "PCS_NAD83_Arizona_West"},
+    {26951, "PCS_NAD83_Arkansas_North"},
+    {26952, "PCS_NAD83_Arkansas_South"},
+    {26953, "PCS_NAD83_Colorado_North"},
+    {26954, "PCS_NAD83_Colorado_Central"},
+    {26955, "PCS_NAD83_Colorado_South"},
+    {26956, "PCS_NAD83_Connecticut"},
+    {26957, "PCS_NAD83_Delaware"},
+    {26958, "PCS_NAD83_Florida_East"},
+    {26959, "PCS_NAD83_Florida_West"},
+    {26960, "PCS_NAD83_Florida_North"},
+    {26961, "PCS_NAD83_Hawaii_zone_1"},
+    {26962, "PCS_NAD83_Hawaii_zone_2"},
+    {26963, "PCS_NAD83_Hawaii_zone_3"},
+    {26964, "PCS_NAD83_Hawaii_zone_4"},
+    {26965, "PCS_NAD83_Hawaii_zone_5"},
+    {26966, "PCS_NAD83_Georgia_East"},
+    {26967, "PCS_NAD83_Georgia_West"},
+    {26968, "PCS_NAD83_Idaho_East"},
+    {26969, "PCS_NAD83_Idaho_Central"},
+    {26970, "PCS_NAD83_Idaho_West"},
+    {26971, "PCS_NAD83_Illinois_East"},
+    {26972, "PCS_NAD83_Illinois_West"},
+    {26973, "PCS_NAD83_Indiana_East"},
+    {26974, "PCS_NAD83_Indiana_West"},
+    {26975, "PCS_NAD83_Iowa_North"},
+    {26976, "PCS_NAD83_Iowa_South"},
+    {26977, "PCS_NAD83_Kansas_North"},
+    {26978, "PCS_NAD83_Kansas_South"},
+    {26979, "PCS_NAD83_Kentucky_North"},
+    {26980, "PCS_NAD83_Kentucky_South"},
+    {26981, "PCS_NAD83_Louisiana_North"},
+    {26982, "PCS_NAD83_Louisiana_South"},
+    {26983, "PCS_NAD83_Maine_East"},
+    {26984, "PCS_NAD83_Maine_West"},
+    {26985, "PCS_NAD83_Maryland"},
+    {26986, "PCS_NAD83_Massachusetts"},
+    {26987, "PCS_NAD83_Massachusetts_Is"},
+    {26988, "PCS_NAD83_Michigan_North"},
+    {26989, "PCS_NAD83_Michigan_Central"},
+    {26990, "PCS_NAD83_Michigan_South"},
+    {26991, "PCS_NAD83_Minnesota_North"},
+    {26992, "PCS_NAD83_Minnesota_Cent"},
+    {26993, "PCS_NAD83_Minnesota_South"},
+    {26994, "PCS_NAD83_Mississippi_East"},
+    {26995, "PCS_NAD83_Mississippi_West"},
+    {26996, "PCS_NAD83_Missouri_East"},
+    {26997, "PCS_NAD83_Missouri_Central"},
+    {26998, "PCS_NAD83_Missouri_West"},
+    {27038, "PCS_Nahrwan_1967_UTM_38N"},
+    {27039, "PCS_Nahrwan_1967_UTM_39N"},
+    {27040, "PCS_Nahrwan_1967_UTM_40N"},
+    {27120, "PCS_Naparima_UTM_20N"},
+    {27200, "PCS_GD49_NZ_Map_Grid"},
+    {27291, "PCS_GD49_North_Island_Grid"},
+    {27292, "PCS_GD49_South_Island_Grid"},
+    {27429, "PCS_Datum_73_UTM_zone_29N"},
+    {27500, "PCS_ATF_Nord_de_Guerre"},
+    {27581, "PCS_NTF_France_I"},
+    {27582, "PCS_NTF_France_II"},
+    {27583, "PCS_NTF_France_III"},
+    {27591, "PCS_NTF_Nord_France"},
+    {27592, "PCS_NTF_Centre_France"},
+    {27593, "PCS_NTF_Sud_France"},
+    {27700, "PCS_British_National_Grid"},
+    {28232, "PCS_Point_Noire_UTM_32S"},
+    {28348, "PCS_GDA94_MGA_zone_48"},
+    {28349, "PCS_GDA94_MGA_zone_49"},
+    {28350, "PCS_GDA94_MGA_zone_50"},
+    {28351, "PCS_GDA94_MGA_zone_51"},
+    {28352, "PCS_GDA94_MGA_zone_52"},
+    {28353, "PCS_GDA94_MGA_zone_53"},
+    {28354, "PCS_GDA94_MGA_zone_54"},
+    {28355, "PCS_GDA94_MGA_zone_55"},
+    {28356, "PCS_GDA94_MGA_zone_56"},
+    {28357, "PCS_GDA94_MGA_zone_57"},
+    {28358, "PCS_GDA94_MGA_zone_58"},
+    {28404, "PCS_Pulkovo_Gauss_zone_4"},
+    {28405, "PCS_Pulkovo_Gauss_zone_5"},
+    {28406, "PCS_Pulkovo_Gauss_zone_6"},
+    {28407, "PCS_Pulkovo_Gauss_zone_7"},
+    {28408, "PCS_Pulkovo_Gauss_zone_8"},
+    {28409, "PCS_Pulkovo_Gauss_zone_9"},
+    {28410, "PCS_Pulkovo_Gauss_zone_10"},
+    {28411, "PCS_Pulkovo_Gauss_zone_11"},
+    {28412, "PCS_Pulkovo_Gauss_zone_12"},
+    {28413, "PCS_Pulkovo_Gauss_zone_13"},
+    {28414, "PCS_Pulkovo_Gauss_zone_14"},
+    {28415, "PCS_Pulkovo_Gauss_zone_15"},
+    {28416, "PCS_Pulkovo_Gauss_zone_16"},
+    {28417, "PCS_Pulkovo_Gauss_zone_17"},
+    {28418, "PCS_Pulkovo_Gauss_zone_18"},
+    {28419, "PCS_Pulkovo_Gauss_zone_19"},
+    {28420, "PCS_Pulkovo_Gauss_zone_20"},
+    {28421, "PCS_Pulkovo_Gauss_zone_21"},
+    {28422, "PCS_Pulkovo_Gauss_zone_22"},
+    {28423, "PCS_Pulkovo_Gauss_zone_23"},
+    {28424, "PCS_Pulkovo_Gauss_zone_24"},
+    {28425, "PCS_Pulkovo_Gauss_zone_25"},
+    {28426, "PCS_Pulkovo_Gauss_zone_26"},
+    {28427, "PCS_Pulkovo_Gauss_zone_27"},
+    {28428, "PCS_Pulkovo_Gauss_zone_28"},
+    {28429, "PCS_Pulkovo_Gauss_zone_29"},
+    {28430, "PCS_Pulkovo_Gauss_zone_30"},
+    {28431, "PCS_Pulkovo_Gauss_zone_31"},
+    {28432, "PCS_Pulkovo_Gauss_zone_32"},
+    {28464, "PCS_Pulkovo_Gauss_4N"},
+    {28465, "PCS_Pulkovo_Gauss_5N"},
+    {28466, "PCS_Pulkovo_Gauss_6N"},
+    {28467, "PCS_Pulkovo_Gauss_7N"},
+    {28468, "PCS_Pulkovo_Gauss_8N"},
+    {28469, "PCS_Pulkovo_Gauss_9N"},
+    {28470, "PCS_Pulkovo_Gauss_10N"},
+    {28471, "PCS_Pulkovo_Gauss_11N"},
+    {28472, "PCS_Pulkovo_Gauss_12N"},
+    {28473, "PCS_Pulkovo_Gauss_13N"},
+    {28474, "PCS_Pulkovo_Gauss_14N"},
+    {28475, "PCS_Pulkovo_Gauss_15N"},
+    {28476, "PCS_Pulkovo_Gauss_16N"},
+    {28477, "PCS_Pulkovo_Gauss_17N"},
+    {28478, "PCS_Pulkovo_Gauss_18N"},
+    {28479, "PCS_Pulkovo_Gauss_19N"},
+    {28480, "PCS_Pulkovo_Gauss_20N"},
+    {28481, "PCS_Pulkovo_Gauss_21N"},
+    {28482, "PCS_Pulkovo_Gauss_22N"},
+    {28483, "PCS_Pulkovo_Gauss_23N"},
+    {28484, "PCS_Pulkovo_Gauss_24N"},
+    {28485, "PCS_Pulkovo_Gauss_25N"},
+    {28486, "PCS_Pulkovo_Gauss_26N"},
+    {28487, "PCS_Pulkovo_Gauss_27N"},
+    {28488, "PCS_Pulkovo_Gauss_28N"},
+    {28489, "PCS_Pulkovo_Gauss_29N"},
+    {28490, "PCS_Pulkovo_Gauss_30N"},
+    {28491, "PCS_Pulkovo_Gauss_31N"},
+    {28492, "PCS_Pulkovo_Gauss_32N"},
+    {28600, "PCS_Qatar_National_Grid"},
+    {28991, "PCS_RD_Netherlands_Old"},
+    {28992, "PCS_RD_Netherlands_New"},
+    {29118, "PCS_SAD69_UTM_zone_18N"},
+    {29119, "PCS_SAD69_UTM_zone_19N"},
+    {29120, "PCS_SAD69_UTM_zone_20N"},
+    {29121, "PCS_SAD69_UTM_zone_21N"},
+    {29122, "PCS_SAD69_UTM_zone_22N"},
+    {29177, "PCS_SAD69_UTM_zone_17S"},
+    {29178, "PCS_SAD69_UTM_zone_18S"},
+    {29179, "PCS_SAD69_UTM_zone_19S"},
+    {29180, "PCS_SAD69_UTM_zone_20S"},
+    {29181, "PCS_SAD69_UTM_zone_21S"},
+    {29182, "PCS_SAD69_UTM_zone_22S"},
+    {29183, "PCS_SAD69_UTM_zone_23S"},
+    {29184, "PCS_SAD69_UTM_zone_24S"},
+    {29185, "PCS_SAD69_UTM_zone_25S"},
+    {29220, "PCS_Sapper_Hill_UTM_20S"},
+    {29221, "PCS_Sapper_Hill_UTM_21S"},
+    {29333, "PCS_Schwarzeck_UTM_33S"},
+    {29635, "PCS_Sudan_UTM_zone_35N"},
+    {29636, "PCS_Sudan_UTM_zone_36N"},
+    {29700, "PCS_Tananarive_Laborde"},
+    {29738, "PCS_Tananarive_UTM_38S"},
+    {29739, "PCS_Tananarive_UTM_39S"},
+    {29800, "PCS_Timbalai_1948_Borneo"},
+    {29849, "PCS_Timbalai_1948_UTM_49N"},
+    {29850, "PCS_Timbalai_1948_UTM_50N"},
+    {29900, "PCS_TM65_Irish_Nat_Grid"},
+    {30200, "PCS_Trinidad_1903_Trinidad"},
+    {30339, "PCS_TC_1948_UTM_zone_39N"},
+    {30340, "PCS_TC_1948_UTM_zone_40N"},
+    {30491, "PCS_Voirol_N_Algerie_ancien"},
+    {30492, "PCS_Voirol_S_Algerie_ancien"},
+    {30591, "PCS_Voirol_Unifie_N_Algerie"},
+    {30592, "PCS_Voirol_Unifie_S_Algerie"},
+    {30600, "PCS_Bern_1938_Swiss_New"},
+    {30729, "PCS_Nord_Sahara_UTM_29N"},
+    {30730, "PCS_Nord_Sahara_UTM_30N"},
+    {30731, "PCS_Nord_Sahara_UTM_31N"},
+    {30732, "PCS_Nord_Sahara_UTM_32N"},
+    {31028, "PCS_Yoff_UTM_zone_28N"},
+    {31121, "PCS_Zanderij_UTM_zone_21N"},
+    {31291, "PCS_MGI_Austria_West"},
+    {31292, "PCS_MGI_Austria_Central"},
+    {31293, "PCS_MGI_Austria_East"},
+    {31300, "PCS_Belge_Lambert_72"},
+    {31491, "PCS_DHDN_Germany_zone_1"},
+    {31492, "PCS_DHDN_Germany_zone_2"},
+    {31493, "PCS_DHDN_Germany_zone_3"},
+    {31494, "PCS_DHDN_Germany_zone_4"},
+    {31495, "PCS_DHDN_Germany_zone_5"},
+    {32001, "PCS_NAD27_Montana_North"},
+    {32002, "PCS_NAD27_Montana_Central"},
+    {32003, "PCS_NAD27_Montana_South"},
+    {32005, "PCS_NAD27_Nebraska_North"},
+    {32006, "PCS_NAD27_Nebraska_South"},
+    {32007, "PCS_NAD27_Nevada_East"},
+    {32008, "PCS_NAD27_Nevada_Central"},
+    {32009, "PCS_NAD27_Nevada_West"},
+    {32010, "PCS_NAD27_New_Hampshire"},
+    {32011, "PCS_NAD27_New_Jersey"},
+    {32012, "PCS_NAD27_New_Mexico_East"},
+    {32013, "PCS_NAD27_New_Mexico_Cent"},
+    {32014, "PCS_NAD27_New_Mexico_West"},
+    {32015, "PCS_NAD27_New_York_East"},
+    {32016, "PCS_NAD27_New_York_Central"},
+    {32017, "PCS_NAD27_New_York_West"},
+    {32018, "PCS_NAD27_New_York_Long_Is"},
+    {32019, "PCS_NAD27_North_Carolina"},
+    {32020, "PCS_NAD27_North_Dakota_N"},
+    {32021, "PCS_NAD27_North_Dakota_S"},
+    {32022, "PCS_NAD27_Ohio_North"},
+    {32023, "PCS_NAD27_Ohio_South"},
+    {32024, "PCS_NAD27_Oklahoma_North"},
+    {32025, "PCS_NAD27_Oklahoma_South"},
+    {32026, "PCS_NAD27_Oregon_North"},
+    {32027, "PCS_NAD27_Oregon_South"},
+    {32028, "PCS_NAD27_Pennsylvania_N"},
+    {32029, "PCS_NAD27_Pennsylvania_S"},
+    {32030, "PCS_NAD27_Rhode_Island"},
+    {32031, "PCS_NAD27_South_Carolina_N"},
+    {32033, "PCS_NAD27_South_Carolina_S"},
+    {32034, "PCS_NAD27_South_Dakota_N"},
+    {32035, "PCS_NAD27_South_Dakota_S"},
+    {32036, "PCS_NAD27_Tennessee"},
+    {32037, "PCS_NAD27_Texas_North"},
+    {32038, "PCS_NAD27_Texas_North_Cen"},
+    {32039, "PCS_NAD27_Texas_Central"},
+    {32040, "PCS_NAD27_Texas_South_Cen"},
+    {32041, "PCS_NAD27_Texas_South"},
+    {32042, "PCS_NAD27_Utah_North"},
+    {32043, "PCS_NAD27_Utah_Central"},
+    {32044, "PCS_NAD27_Utah_South"},
+    {32045, "PCS_NAD27_Vermont"},
+    {32046, "PCS_NAD27_Virginia_North"},
+    {32047, "PCS_NAD27_Virginia_South"},
+    {32048, "PCS_NAD27_Washington_North"},
+    {32049, "PCS_NAD27_Washington_South"},
+    {32050, "PCS_NAD27_West_Virginia_N"},
+    {32051, "PCS_NAD27_West_Virginia_S"},
+    {32052, "PCS_NAD27_Wisconsin_North"},
+    {32053, "PCS_NAD27_Wisconsin_Cen"},
+    {32054, "PCS_NAD27_Wisconsin_South"},
+    {32055, "PCS_NAD27_Wyoming_East"},
+    {32056, "PCS_NAD27_Wyoming_E_Cen"},
+    {32057, "PCS_NAD27_Wyoming_W_Cen"},
+    {32058, "PCS_NAD27_Wyoming_West"},
+    {32059, "PCS_NAD27_Puerto_Rico"},
+    {32060, "PCS_NAD27_St_Croix"},
+    {32100, "PCS_NAD83_Montana"},
+    {32104, "PCS_NAD83_Nebraska"},
+    {32107, "PCS_NAD83_Nevada_East"},
+    {32108, "PCS_NAD83_Nevada_Central"},
+    {32109, "PCS_NAD83_Nevada_West"},
+    {32110, "PCS_NAD83_New_Hampshire"},
+    {32111, "PCS_NAD83_New_Jersey"},
+    {32112, "PCS_NAD83_New_Mexico_East"},
+    {32113, "PCS_NAD83_New_Mexico_Cent"},
+    {32114, "PCS_NAD83_New_Mexico_West"},
+    {32115, "PCS_NAD83_New_York_East"},
+    {32116, "PCS_NAD83_New_York_Central"},
+    {32117, "PCS_NAD83_New_York_West"},
+    {32118, "PCS_NAD83_New_York_Long_Is"},
+    {32119, "PCS_NAD83_North_Carolina"},
+    {32120, "PCS_NAD83_North_Dakota_N"},
+    {32121, "PCS_NAD83_North_Dakota_S"},
+    {32122, "PCS_NAD83_Ohio_North"},
+    {32123, "PCS_NAD83_Ohio_South"},
+    {32124, "PCS_NAD83_Oklahoma_North"},
+    {32125, "PCS_NAD83_Oklahoma_South"},
+    {32126, "PCS_NAD83_Oregon_North"},
+    {32127, "PCS_NAD83_Oregon_South"},
+    {32128, "PCS_NAD83_Pennsylvania_N"},
+    {32129, "PCS_NAD83_Pennsylvania_S"},
+    {32130, "PCS_NAD83_Rhode_Island"},
+    {32133, "PCS_NAD83_South_Carolina"},
+    {32134, "PCS_NAD83_South_Dakota_N"},
+    {32135, "PCS_NAD83_South_Dakota_S"},
+    {32136, "PCS_NAD83_Tennessee"},
+    {32137, "PCS_NAD83_Texas_North"},
+    {32138, "PCS_NAD83_Texas_North_Cen"},
+    {32139, "PCS_NAD83_Texas_Central"},
+    {32140, "PCS_NAD83_Texas_South_Cen"},
+    {32141, "PCS_NAD83_Texas_South"},
+    {32142, "PCS_NAD83_Utah_North"},
+    {32143, "PCS_NAD83_Utah_Central"},
+    {32144, "PCS_NAD83_Utah_South"},
+    {32145, "PCS_NAD83_Vermont"},
+    {32146, "PCS_NAD83_Virginia_North"},
+    {32147, "PCS_NAD83_Virginia_South"},
+    {32148, "PCS_NAD83_Washington_North"},
+    {32149, "PCS_NAD83_Washington_South"},
+    {32150, "PCS_NAD83_West_Virginia_N"},
+    {32151, "PCS_NAD83_West_Virginia_S"},
+    {32152, "PCS_NAD83_Wisconsin_North"},
+    {32153, "PCS_NAD83_Wisconsin_Cen"},
+    {32154, "PCS_NAD83_Wisconsin_South"},
+    {32155, "PCS_NAD83_Wyoming_East"},
+    {32156, "PCS_NAD83_Wyoming_E_Cen"},
+    {32157, "PCS_NAD83_Wyoming_W_Cen"},
+    {32158, "PCS_NAD83_Wyoming_West"},
+    {32161, "PCS_NAD83_Puerto_Rico_Virgin_Is"},
+    {32201, "PCS_WGS72_UTM_zone_1N"},
+    {32202, "PCS_WGS72_UTM_zone_2N"},
+    {32203, "PCS_WGS72_UTM_zone_3N"},
+    {32204, "PCS_WGS72_UTM_zone_4N"},
+    {32205, "PCS_WGS72_UTM_zone_5N"},
+    {32206, "PCS_WGS72_UTM_zone_6N"},
+    {32207, "PCS_WGS72_UTM_zone_7N"},
+    {32208, "PCS_WGS72_UTM_zone_8N"},
+    {32209, "PCS_WGS72_UTM_zone_9N"},
+    {32210, "PCS_WGS72_UTM_zone_10N"},
+    {32211, "PCS_WGS72_UTM_zone_11N"},
+    {32212, "PCS_WGS72_UTM_zone_12N"},
+    {32213, "PCS_WGS72_UTM_zone_13N"},
+    {32214, "PCS_WGS72_UTM_zone_14N"},
+    {32215, "PCS_WGS72_UTM_zone_15N"},
+    {32216, "PCS_WGS72_UTM_zone_16N"},
+    {32217, "PCS_WGS72_UTM_zone_17N"},
+    {32218, "PCS_WGS72_UTM_zone_18N"},
+    {32219, "PCS_WGS72_UTM_zone_19N"},
+    {32220, "PCS_WGS72_UTM_zone_20N"},
+    {32221, "PCS_WGS72_UTM_zone_21N"},
+    {32222, "PCS_WGS72_UTM_zone_22N"},
+    {32223, "PCS_WGS72_UTM_zone_23N"},
+    {32224, "PCS_WGS72_UTM_zone_24N"},
+    {32225, "PCS_WGS72_UTM_zone_25N"},
+    {32226, "PCS_WGS72_UTM_zone_26N"},
+    {32227, "PCS_WGS72_UTM_zone_27N"},
+    {32228, "PCS_WGS72_UTM_zone_28N"},
+    {32229, "PCS_WGS72_UTM_zone_29N"},
+    {32230, "PCS_WGS72_UTM_zone_30N"},
+    {32231, "PCS_WGS72_UTM_zone_31N"},
+    {32232, "PCS_WGS72_UTM_zone_32N"},
+    {32233, "PCS_WGS72_UTM_zone_33N"},
+    {32234, "PCS_WGS72_UTM_zone_34N"},
+    {32235, "PCS_WGS72_UTM_zone_35N"},
+    {32236, "PCS_WGS72_UTM_zone_36N"},
+    {32237, "PCS_WGS72_UTM_zone_37N"},
+    {32238, "PCS_WGS72_UTM_zone_38N"},
+    {32239, "PCS_WGS72_UTM_zone_39N"},
+    {32240, "PCS_WGS72_UTM_zone_40N"},
+    {32241, "PCS_WGS72_UTM_zone_41N"},
+    {32242, "PCS_WGS72_UTM_zone_42N"},
+    {32243, "PCS_WGS72_UTM_zone_43N"},
+    {32244, "PCS_WGS72_UTM_zone_44N"},
+    {32245, "PCS_WGS72_UTM_zone_45N"},
+    {32246, "PCS_WGS72_UTM_zone_46N"},
+    {32247, "PCS_WGS72_UTM_zone_47N"},
+    {32248, "PCS_WGS72_UTM_zone_48N"},
+    {32249, "PCS_WGS72_UTM_zone_49N"},
+    {32250, "PCS_WGS72_UTM_zone_50N"},
+    {32251, "PCS_WGS72_UTM_zone_51N"},
+    {32252, "PCS_WGS72_UTM_zone_52N"},
+    {32253, "PCS_WGS72_UTM_zone_53N"},
+    {32254, "PCS_WGS72_UTM_zone_54N"},
+    {32255, "PCS_WGS72_UTM_zone_55N"},
+    {32256, "PCS_WGS72_UTM_zone_56N"},
+    {32257, "PCS_WGS72_UTM_zone_57N"},
+    {32258, "PCS_WGS72_UTM_zone_58N"},
+    {32259, "PCS_WGS72_UTM_zone_59N"},
+    {32260, "PCS_WGS72_UTM_zone_60N"},
+    {32301, "PCS_WGS72_UTM_zone_1S"},
+    {32302, "PCS_WGS72_UTM_zone_2S"},
+    {32303, "PCS_WGS72_UTM_zone_3S"},
+    {32304, "PCS_WGS72_UTM_zone_4S"},
+    {32305, "PCS_WGS72_UTM_zone_5S"},
+    {32306, "PCS_WGS72_UTM_zone_6S"},
+    {32307, "PCS_WGS72_UTM_zone_7S"},
+    {32308, "PCS_WGS72_UTM_zone_8S"},
+    {32309, "PCS_WGS72_UTM_zone_9S"},
+    {32310, "PCS_WGS72_UTM_zone_10S"},
+    {32311, "PCS_WGS72_UTM_zone_11S"},
+    {32312, "PCS_WGS72_UTM_zone_12S"},
+    {32313, "PCS_WGS72_UTM_zone_13S"},
+    {32314, "PCS_WGS72_UTM_zone_14S"},
+    {32315, "PCS_WGS72_UTM_zone_15S"},
+    {32316, "PCS_WGS72_UTM_zone_16S"},
+    {32317, "PCS_WGS72_UTM_zone_17S"},
+    {32318, "PCS_WGS72_UTM_zone_18S"},
+    {32319, "PCS_WGS72_UTM_zone_19S"},
+    {32320, "PCS_WGS72_UTM_zone_20S"},
+    {32321, "PCS_WGS72_UTM_zone_21S"},
+    {32322, "PCS_WGS72_UTM_zone_22S"},
+    {32323, "PCS_WGS72_UTM_zone_23S"},
+    {32324, "PCS_WGS72_UTM_zone_24S"},
+    {32325, "PCS_WGS72_UTM_zone_25S"},
+    {32326, "PCS_WGS72_UTM_zone_26S"},
+    {32327, "PCS_WGS72_UTM_zone_27S"},
+    {32328, "PCS_WGS72_UTM_zone_28S"},
+    {32329, "PCS_WGS72_UTM_zone_29S"},
+    {32330, "PCS_WGS72_UTM_zone_30S"},
+    {32331, "PCS_WGS72_UTM_zone_31S"},
+    {32332, "PCS_WGS72_UTM_zone_32S"},
+    {32333, "PCS_WGS72_UTM_zone_33S"},
+    {32334, "PCS_WGS72_UTM_zone_34S"},
+    {32335, "PCS_WGS72_UTM_zone_35S"},
+    {32336, "PCS_WGS72_UTM_zone_36S"},
+    {32337, "PCS_WGS72_UTM_zone_37S"},
+    {32338, "PCS_WGS72_UTM_zone_38S"},
+    {32339, "PCS_WGS72_UTM_zone_39S"},
+    {32340, "PCS_WGS72_UTM_zone_40S"},
+    {32341, "PCS_WGS72_UTM_zone_41S"},
+    {32342, "PCS_WGS72_UTM_zone_42S"},
+    {32343, "PCS_WGS72_UTM_zone_43S"},
+    {32344, "PCS_WGS72_UTM_zone_44S"},
+    {32345, "PCS_WGS72_UTM_zone_45S"},
+    {32346, "PCS_WGS72_UTM_zone_46S"},
+    {32347, "PCS_WGS72_UTM_zone_47S"},
+    {32348, "PCS_WGS72_UTM_zone_48S"},
+    {32349, "PCS_WGS72_UTM_zone_49S"},
+    {32350, "PCS_WGS72_UTM_zone_50S"},
+    {32351, "PCS_WGS72_UTM_zone_51S"},
+    {32352, "PCS_WGS72_UTM_zone_52S"},
+    {32353, "PCS_WGS72_UTM_zone_53S"},
+    {32354, "PCS_WGS72_UTM_zone_54S"},
+    {32355, "PCS_WGS72_UTM_zone_55S"},
+    {32356, "PCS_WGS72_UTM_zone_56S"},
+    {32357, "PCS_WGS72_UTM_zone_57S"},
+    {32358, "PCS_WGS72_UTM_zone_58S"},
+    {32359, "PCS_WGS72_UTM_zone_59S"},
+    {32360, "PCS_WGS72_UTM_zone_60S"},
+    {32401, "PCS_WGS72BE_UTM_zone_1N"},
+    {32402, "PCS_WGS72BE_UTM_zone_2N"},
+    {32403, "PCS_WGS72BE_UTM_zone_3N"},
+    {32404, "PCS_WGS72BE_UTM_zone_4N"},
+    {32405, "PCS_WGS72BE_UTM_zone_5N"},
+    {32406, "PCS_WGS72BE_UTM_zone_6N"},
+    {32407, "PCS_WGS72BE_UTM_zone_7N"},
+    {32408, "PCS_WGS72BE_UTM_zone_8N"},
+    {32409, "PCS_WGS72BE_UTM_zone_9N"},
+    {32410, "PCS_WGS72BE_UTM_zone_10N"},
+    {32411, "PCS_WGS72BE_UTM_zone_11N"},
+    {32412, "PCS_WGS72BE_UTM_zone_12N"},
+    {32413, "PCS_WGS72BE_UTM_zone_13N"},
+    {32414, "PCS_WGS72BE_UTM_zone_14N"},
+    {32415, "PCS_WGS72BE_UTM_zone_15N"},
+    {32416, "PCS_WGS72BE_UTM_zone_16N"},
+    {32417, "PCS_WGS72BE_UTM_zone_17N"},
+    {32418, "PCS_WGS72BE_UTM_zone_18N"},
+    {32419, "PCS_WGS72BE_UTM_zone_19N"},
+    {32420, "PCS_WGS72BE_UTM_zone_20N"},
+    {32421, "PCS_WGS72BE_UTM_zone_21N"},
+    {32422, "PCS_WGS72BE_UTM_zone_22N"},
+    {32423, "PCS_WGS72BE_UTM_zone_23N"},
+    {32424, "PCS_WGS72BE_UTM_zone_24N"},
+    {32425, "PCS_WGS72BE_UTM_zone_25N"},
+    {32426, "PCS_WGS72BE_UTM_zone_26N"},
+    {32427, "PCS_WGS72BE_UTM_zone_27N"},
+    {32428, "PCS_WGS72BE_UTM_zone_28N"},
+    {32429, "PCS_WGS72BE_UTM_zone_29N"},
+    {32430, "PCS_WGS72BE_UTM_zone_30N"},
+    {32431, "PCS_WGS72BE_UTM_zone_31N"},
+    {32432, "PCS_WGS72BE_UTM_zone_32N"},
+    {32433, "PCS_WGS72BE_UTM_zone_33N"},
+    {32434, "PCS_WGS72BE_UTM_zone_34N"},
+    {32435, "PCS_WGS72BE_UTM_zone_35N"},
+    {32436, "PCS_WGS72BE_UTM_zone_36N"},
+    {32437, "PCS_WGS72BE_UTM_zone_37N"},
+    {32438, "PCS_WGS72BE_UTM_zone_38N"},
+    {32439, "PCS_WGS72BE_UTM_zone_39N"},
+    {32440, "PCS_WGS72BE_UTM_zone_40N"},
+    {32441, "PCS_WGS72BE_UTM_zone_41N"},
+    {32442, "PCS_WGS72BE_UTM_zone_42N"},
+    {32443, "PCS_WGS72BE_UTM_zone_43N"},
+    {32444, "PCS_WGS72BE_UTM_zone_44N"},
+    {32445, "PCS_WGS72BE_UTM_zone_45N"},
+    {32446, "PCS_WGS72BE_UTM_zone_46N"},
+    {32447, "PCS_WGS72BE_UTM_zone_47N"},
+    {32448, "PCS_WGS72BE_UTM_zone_48N"},
+    {32449, "PCS_WGS72BE_UTM_zone_49N"},
+    {32450, "PCS_WGS72BE_UTM_zone_50N"},
+    {32451, "PCS_WGS72BE_UTM_zone_51N"},
+    {32452, "PCS_WGS72BE_UTM_zone_52N"},
+    {32453, "PCS_WGS72BE_UTM_zone_53N"},
+    {32454, "PCS_WGS72BE_UTM_zone_54N"},
+    {32455, "PCS_WGS72BE_UTM_zone_55N"},
+    {32456, "PCS_WGS72BE_UTM_zone_56N"},
+    {32457, "PCS_WGS72BE_UTM_zone_57N"},
+    {32458, "PCS_WGS72BE_UTM_zone_58N"},
+    {32459, "PCS_WGS72BE_UTM_zone_59N"},
+    {32460, "PCS_WGS72BE_UTM_zone_60N"},
+    {32501, "PCS_WGS72BE_UTM_zone_1S"},
+    {32502, "PCS_WGS72BE_UTM_zone_2S"},
+    {32503, "PCS_WGS72BE_UTM_zone_3S"},
+    {32504, "PCS_WGS72BE_UTM_zone_4S"},
+    {32505, "PCS_WGS72BE_UTM_zone_5S"},
+    {32506, "PCS_WGS72BE_UTM_zone_6S"},
+    {32507, "PCS_WGS72BE_UTM_zone_7S"},
+    {32508, "PCS_WGS72BE_UTM_zone_8S"},
+    {32509, "PCS_WGS72BE_UTM_zone_9S"},
+    {32510, "PCS_WGS72BE_UTM_zone_10S"},
+    {32511, "PCS_WGS72BE_UTM_zone_11S"},
+    {32512, "PCS_WGS72BE_UTM_zone_12S"},
+    {32513, "PCS_WGS72BE_UTM_zone_13S"},
+    {32514, "PCS_WGS72BE_UTM_zone_14S"},
+    {32515, "PCS_WGS72BE_UTM_zone_15S"},
+    {32516, "PCS_WGS72BE_UTM_zone_16S"},
+    {32517, "PCS_WGS72BE_UTM_zone_17S"},
+    {32518, "PCS_WGS72BE_UTM_zone_18S"},
+    {32519, "PCS_WGS72BE_UTM_zone_19S"},
+    {32520, "PCS_WGS72BE_UTM_zone_20S"},
+    {32521, "PCS_WGS72BE_UTM_zone_21S"},
+    {32522, "PCS_WGS72BE_UTM_zone_22S"},
+    {32523, "PCS_WGS72BE_UTM_zone_23S"},
+    {32524, "PCS_WGS72BE_UTM_zone_24S"},
+    {32525, "PCS_WGS72BE_UTM_zone_25S"},
+    {32526, "PCS_WGS72BE_UTM_zone_26S"},
+    {32527, "PCS_WGS72BE_UTM_zone_27S"},
+    {32528, "PCS_WGS72BE_UTM_zone_28S"},
+    {32529, "PCS_WGS72BE_UTM_zone_29S"},
+    {32530, "PCS_WGS72BE_UTM_zone_30S"},
+    {32531, "PCS_WGS72BE_UTM_zone_31S"},
+    {32532, "PCS_WGS72BE_UTM_zone_32S"},
+    {32533, "PCS_WGS72BE_UTM_zone_33S"},
+    {32534, "PCS_WGS72BE_UTM_zone_34S"},
+    {32535, "PCS_WGS72BE_UTM_zone_35S"},
+    {32536, "PCS_WGS72BE_UTM_zone_36S"},
+    {32537, "PCS_WGS72BE_UTM_zone_37S"},
+    {32538, "PCS_WGS72BE_UTM_zone_38S"},
+    {32539, "PCS_WGS72BE_UTM_zone_39S"},
+    {32540, "PCS_WGS72BE_UTM_zone_40S"},
+    {32541, "PCS_WGS72BE_UTM_zone_41S"},
+    {32542, "PCS_WGS72BE_UTM_zone_42S"},
+    {32543, "PCS_WGS72BE_UTM_zone_43S"},
+    {32544, "PCS_WGS72BE_UTM_zone_44S"},
+    {32545, "PCS_WGS72BE_UTM_zone_45S"},
+    {32546, "PCS_WGS72BE_UTM_zone_46S"},
+    {32547, "PCS_WGS72BE_UTM_zone_47S"},
+    {32548, "PCS_WGS72BE_UTM_zone_48S"},
+    {32549, "PCS_WGS72BE_UTM_zone_49S"},
+    {32550, "PCS_WGS72BE_UTM_zone_50S"},
+    {32551, "PCS_WGS72BE_UTM_zone_51S"},
+    {32552, "PCS_WGS72BE_UTM_zone_52S"},
+    {32553, "PCS_WGS72BE_UTM_zone_53S"},
+    {32554, "PCS_WGS72BE_UTM_zone_54S"},
+    {32555, "PCS_WGS72BE_UTM_zone_55S"},
+    {32556, "PCS_WGS72BE_UTM_zone_56S"},
+    {32557, "PCS_WGS72BE_UTM_zone_57S"},
+    {32558, "PCS_WGS72BE_UTM_zone_58S"},
+    {32559, "PCS_WGS72BE_UTM_zone_59S"},
+    {32560, "PCS_WGS72BE_UTM_zone_60S"},
+    {32601, "PCS_WGS84_UTM_zone_1N"},
+    {32602, "PCS_WGS84_UTM_zone_2N"},
+    {32603, "PCS_WGS84_UTM_zone_3N"},
+    {32604, "PCS_WGS84_UTM_zone_4N"},
+    {32605, "PCS_WGS84_UTM_zone_5N"},
+    {32606, "PCS_WGS84_UTM_zone_6N"},
+    {32607, "PCS_WGS84_UTM_zone_7N"},
+    {32608, "PCS_WGS84_UTM_zone_8N"},
+    {32609, "PCS_WGS84_UTM_zone_9N"},
+    {32610, "PCS_WGS84_UTM_zone_10N"},
+    {32611, "PCS_WGS84_UTM_zone_11N"},
+    {32612, "PCS_WGS84_UTM_zone_12N"},
+    {32613, "PCS_WGS84_UTM_zone_13N"},
+    {32614, "PCS_WGS84_UTM_zone_14N"},
+    {32615, "PCS_WGS84_UTM_zone_15N"},
+    {32616, "PCS_WGS84_UTM_zone_16N"},
+    {32617, "PCS_WGS84_UTM_zone_17N"},
+    {32618, "PCS_WGS84_UTM_zone_18N"},
+    {32619, "PCS_WGS84_UTM_zone_19N"},
+    {32620, "PCS_WGS84_UTM_zone_20N"},
+    {32621, "PCS_WGS84_UTM_zone_21N"},
+    {32622, "PCS_WGS84_UTM_zone_22N"},
+    {32623, "PCS_WGS84_UTM_zone_23N"},
+    {32624, "PCS_WGS84_UTM_zone_24N"},
+    {32625, "PCS_WGS84_UTM_zone_25N"},
+    {32626, "PCS_WGS84_UTM_zone_26N"},
+    {32627, "PCS_WGS84_UTM_zone_27N"},
+    {32628, "PCS_WGS84_UTM_zone_28N"},
+    {32629, "PCS_WGS84_UTM_zone_29N"},
+    {32630, "PCS_WGS84_UTM_zone_30N"},
+    {32631, "PCS_WGS84_UTM_zone_31N"},
+    {32632, "PCS_WGS84_UTM_zone_32N"},
+    {32633, "PCS_WGS84_UTM_zone_33N"},
+    {32634, "PCS_WGS84_UTM_zone_34N"},
+    {32635, "PCS_WGS84_UTM_zone_35N"},
+    {32636, "PCS_WGS84_UTM_zone_36N"},
+    {32637, "PCS_WGS84_UTM_zone_37N"},
+    {32638, "PCS_WGS84_UTM_zone_38N"},
+    {32639, "PCS_WGS84_UTM_zone_39N"},
+    {32640, "PCS_WGS84_UTM_zone_40N"},
+    {32641, "PCS_WGS84_UTM_zone_41N"},
+    {32642, "PCS_WGS84_UTM_zone_42N"},
+    {32643, "PCS_WGS84_UTM_zone_43N"},
+    {32644, "PCS_WGS84_UTM_zone_44N"},
+    {32645, "PCS_WGS84_UTM_zone_45N"},
+    {32646, "PCS_WGS84_UTM_zone_46N"},
+    {32647, "PCS_WGS84_UTM_zone_47N"},
+    {32648, "PCS_WGS84_UTM_zone_48N"},
+    {32649, "PCS_WGS84_UTM_zone_49N"},
+    {32650, "PCS_WGS84_UTM_zone_50N"},
+    {32651, "PCS_WGS84_UTM_zone_51N"},
+    {32652, "PCS_WGS84_UTM_zone_52N"},
+    {32653, "PCS_WGS84_UTM_zone_53N"},
+    {32654, "PCS_WGS84_UTM_zone_54N"},
+    {32655, "PCS_WGS84_UTM_zone_55N"},
+    {32656, "PCS_WGS84_UTM_zone_56N"},
+    {32657, "PCS_WGS84_UTM_zone_57N"},
+    {32658, "PCS_WGS84_UTM_zone_58N"},
+    {32659, "PCS_WGS84_UTM_zone_59N"},
+    {32660, "PCS_WGS84_UTM_zone_60N"},
+    {32701, "PCS_WGS84_UTM_zone_1S"},
+    {32702, "PCS_WGS84_UTM_zone_2S"},
+    {32703, "PCS_WGS84_UTM_zone_3S"},
+    {32704, "PCS_WGS84_UTM_zone_4S"},
+    {32705, "PCS_WGS84_UTM_zone_5S"},
+    {32706, "PCS_WGS84_UTM_zone_6S"},
+    {32707, "PCS_WGS84_UTM_zone_7S"},
+    {32708, "PCS_WGS84_UTM_zone_8S"},
+    {32709, "PCS_WGS84_UTM_zone_9S"},
+    {32710, "PCS_WGS84_UTM_zone_10S"},
+    {32711, "PCS_WGS84_UTM_zone_11S"},
+    {32712, "PCS_WGS84_UTM_zone_12S"},
+    {32713, "PCS_WGS84_UTM_zone_13S"},
+    {32714, "PCS_WGS84_UTM_zone_14S"},
+    {32715, "PCS_WGS84_UTM_zone_15S"},
+    {32716, "PCS_WGS84_UTM_zone_16S"},
+    {32717, "PCS_WGS84_UTM_zone_17S"},
+    {32718, "PCS_WGS84_UTM_zone_18S"},
+    {32719, "PCS_WGS84_UTM_zone_19S"},
+    {32720, "PCS_WGS84_UTM_zone_20S"},
+    {32721, "PCS_WGS84_UTM_zone_21S"},
+    {32722, "PCS_WGS84_UTM_zone_22S"},
+    {32723, "PCS_WGS84_UTM_zone_23S"},
+    {32724, "PCS_WGS84_UTM_zone_24S"},
+    {32725, "PCS_WGS84_UTM_zone_25S"},
+    {32726, "PCS_WGS84_UTM_zone_26S"},
+    {32727, "PCS_WGS84_UTM_zone_27S"},
+    {32728, "PCS_WGS84_UTM_zone_28S"},
+    {32729, "PCS_WGS84_UTM_zone_29S"},
+    {32730, "PCS_WGS84_UTM_zone_30S"},
+    {32731, "PCS_WGS84_UTM_zone_31S"},
+    {32732, "PCS_WGS84_UTM_zone_32S"},
+    {32733, "PCS_WGS84_UTM_zone_33S"},
+    {32734, "PCS_WGS84_UTM_zone_34S"},
+    {32735, "PCS_WGS84_UTM_zone_35S"},
+    {32736, "PCS_WGS84_UTM_zone_36S"},
+    {32737, "PCS_WGS84_UTM_zone_37S"},
+    {32738, "PCS_WGS84_UTM_zone_38S"},
+    {32739, "PCS_WGS84_UTM_zone_39S"},
+    {32740, "PCS_WGS84_UTM_zone_40S"},
+    {32741, "PCS_WGS84_UTM_zone_41S"},
+    {32742, "PCS_WGS84_UTM_zone_42S"},
+    {32743, "PCS_WGS84_UTM_zone_43S"},
+    {32744, "PCS_WGS84_UTM_zone_44S"},
+    {32745, "PCS_WGS84_UTM_zone_45S"},
+    {32746, "PCS_WGS84_UTM_zone_46S"},
+    {32747, "PCS_WGS84_UTM_zone_47S"},
+    {32748, "PCS_WGS84_UTM_zone_48S"},
+    {32749, "PCS_WGS84_UTM_zone_49S"},
+    {32750, "PCS_WGS84_UTM_zone_50S"},
+    {32751, "PCS_WGS84_UTM_zone_51S"},
+    {32752, "PCS_WGS84_UTM_zone_52S"},
+    {32753, "PCS_WGS84_UTM_zone_53S"},
+    {32754, "PCS_WGS84_UTM_zone_54S"},
+    {32755, "PCS_WGS84_UTM_zone_55S"},
+    {32756, "PCS_WGS84_UTM_zone_56S"},
+    {32757, "PCS_WGS84_UTM_zone_57S"},
+    {32758, "PCS_WGS84_UTM_zone_58S"},
+    {32759, "PCS_WGS84_UTM_zone_59S"},
+    {32760, "PCS_WGS84_UTM_zone_60S"}
+};
+
+const TiffGeoTagKeyName ff_tiff_projection_codes[] = {
+    {10101, "Proj_Alabama_CS27_East"},
+    {10102, "Proj_Alabama_CS27_West"},
+    {10131, "Proj_Alabama_CS83_East"},
+    {10132, "Proj_Alabama_CS83_West"},
+    {10201, "Proj_Arizona_Coordinate_System_east"},
+    {10202, "Proj_Arizona_Coordinate_System_Central"},
+    {10203, "Proj_Arizona_Coordinate_System_west"},
+    {10231, "Proj_Arizona_CS83_east"},
+    {10232, "Proj_Arizona_CS83_Central"},
+    {10233, "Proj_Arizona_CS83_west"},
+    {10301, "Proj_Arkansas_CS27_North"},
+    {10302, "Proj_Arkansas_CS27_South"},
+    {10331, "Proj_Arkansas_CS83_North"},
+    {10332, "Proj_Arkansas_CS83_South"},
+    {10401, "Proj_California_CS27_I"},
+    {10402, "Proj_California_CS27_II"},
+    {10403, "Proj_California_CS27_III"},
+    {10404, "Proj_California_CS27_IV"},
+    {10405, "Proj_California_CS27_V"},
+    {10406, "Proj_California_CS27_VI"},
+    {10407, "Proj_California_CS27_VII"},
+    {10431, "Proj_California_CS83_1"},
+    {10432, "Proj_California_CS83_2"},
+    {10433, "Proj_California_CS83_3"},
+    {10434, "Proj_California_CS83_4"},
+    {10435, "Proj_California_CS83_5"},
+    {10436, "Proj_California_CS83_6"},
+    {10501, "Proj_Colorado_CS27_North"},
+    {10502, "Proj_Colorado_CS27_Central"},
+    {10503, "Proj_Colorado_CS27_South"},
+    {10531, "Proj_Colorado_CS83_North"},
+    {10532, "Proj_Colorado_CS83_Central"},
+    {10533, "Proj_Colorado_CS83_South"},
+    {10600, "Proj_Connecticut_CS27"},
+    {10630, "Proj_Connecticut_CS83"},
+    {10700, "Proj_Delaware_CS27"},
+    {10730, "Proj_Delaware_CS83"},
+    {10901, "Proj_Florida_CS27_East"},
+    {10902, "Proj_Florida_CS27_West"},
+    {10903, "Proj_Florida_CS27_North"},
+    {10931, "Proj_Florida_CS83_East"},
+    {10932, "Proj_Florida_CS83_West"},
+    {10933, "Proj_Florida_CS83_North"},
+    {11001, "Proj_Georgia_CS27_East"},
+    {11002, "Proj_Georgia_CS27_West"},
+    {11031, "Proj_Georgia_CS83_East"},
+    {11032, "Proj_Georgia_CS83_West"},
+    {11101, "Proj_Idaho_CS27_East"},
+    {11102, "Proj_Idaho_CS27_Central"},
+    {11103, "Proj_Idaho_CS27_West"},
+    {11131, "Proj_Idaho_CS83_East"},
+    {11132, "Proj_Idaho_CS83_Central"},
+    {11133, "Proj_Idaho_CS83_West"},
+    {11201, "Proj_Illinois_CS27_East"},
+    {11202, "Proj_Illinois_CS27_West"},
+    {11231, "Proj_Illinois_CS83_East"},
+    {11232, "Proj_Illinois_CS83_West"},
+    {11301, "Proj_Indiana_CS27_East"},
+    {11302, "Proj_Indiana_CS27_West"},
+    {11331, "Proj_Indiana_CS83_East"},
+    {11332, "Proj_Indiana_CS83_West"},
+    {11401, "Proj_Iowa_CS27_North"},
+    {11402, "Proj_Iowa_CS27_South"},
+    {11431, "Proj_Iowa_CS83_North"},
+    {11432, "Proj_Iowa_CS83_South"},
+    {11501, "Proj_Kansas_CS27_North"},
+    {11502, "Proj_Kansas_CS27_South"},
+    {11531, "Proj_Kansas_CS83_North"},
+    {11532, "Proj_Kansas_CS83_South"},
+    {11601, "Proj_Kentucky_CS27_North"},
+    {11602, "Proj_Kentucky_CS27_South"},
+    {11631, "Proj_Kentucky_CS83_North"},
+    {11632, "Proj_Kentucky_CS83_South"},
+    {11701, "Proj_Louisiana_CS27_North"},
+    {11702, "Proj_Louisiana_CS27_South"},
+    {11731, "Proj_Louisiana_CS83_North"},
+    {11732, "Proj_Louisiana_CS83_South"},
+    {11801, "Proj_Maine_CS27_East"},
+    {11802, "Proj_Maine_CS27_West"},
+    {11831, "Proj_Maine_CS83_East"},
+    {11832, "Proj_Maine_CS83_West"},
+    {11900, "Proj_Maryland_CS27"},
+    {11930, "Proj_Maryland_CS83"},
+    {12001, "Proj_Massachusetts_CS27_Mainland"},
+    {12002, "Proj_Massachusetts_CS27_Island"},
+    {12031, "Proj_Massachusetts_CS83_Mainland"},
+    {12032, "Proj_Massachusetts_CS83_Island"},
+    {12101, "Proj_Michigan_State_Plane_East"},
+    {12102, "Proj_Michigan_State_Plane_Old_Central"},
+    {12103, "Proj_Michigan_State_Plane_West"},
+    {12111, "Proj_Michigan_CS27_North"},
+    {12112, "Proj_Michigan_CS27_Central"},
+    {12113, "Proj_Michigan_CS27_South"},
+    {12141, "Proj_Michigan_CS83_North"},
+    {12142, "Proj_Michigan_CS83_Central"},
+    {12143, "Proj_Michigan_CS83_South"},
+    {12201, "Proj_Minnesota_CS27_North"},
+    {12202, "Proj_Minnesota_CS27_Central"},
+    {12203, "Proj_Minnesota_CS27_South"},
+    {12231, "Proj_Minnesota_CS83_North"},
+    {12232, "Proj_Minnesota_CS83_Central"},
+    {12233, "Proj_Minnesota_CS83_South"},
+    {12301, "Proj_Mississippi_CS27_East"},
+    {12302, "Proj_Mississippi_CS27_West"},
+    {12331, "Proj_Mississippi_CS83_East"},
+    {12332, "Proj_Mississippi_CS83_West"},
+    {12401, "Proj_Missouri_CS27_East"},
+    {12402, "Proj_Missouri_CS27_Central"},
+    {12403, "Proj_Missouri_CS27_West"},
+    {12431, "Proj_Missouri_CS83_East"},
+    {12432, "Proj_Missouri_CS83_Central"},
+    {12433, "Proj_Missouri_CS83_West"},
+    {12501, "Proj_Montana_CS27_North"},
+    {12502, "Proj_Montana_CS27_Central"},
+    {12503, "Proj_Montana_CS27_South"},
+    {12530, "Proj_Montana_CS83"},
+    {12601, "Proj_Nebraska_CS27_North"},
+    {12602, "Proj_Nebraska_CS27_South"},
+    {12630, "Proj_Nebraska_CS83"},
+    {12701, "Proj_Nevada_CS27_East"},
+    {12702, "Proj_Nevada_CS27_Central"},
+    {12703, "Proj_Nevada_CS27_West"},
+    {12731, "Proj_Nevada_CS83_East"},
+    {12732, "Proj_Nevada_CS83_Central"},
+    {12733, "Proj_Nevada_CS83_West"},
+    {12800, "Proj_New_Hampshire_CS27"},
+    {12830, "Proj_New_Hampshire_CS83"},
+    {12900, "Proj_New_Jersey_CS27"},
+    {12930, "Proj_New_Jersey_CS83"},
+    {13001, "Proj_New_Mexico_CS27_East"},
+    {13002, "Proj_New_Mexico_CS27_Central"},
+    {13003, "Proj_New_Mexico_CS27_West"},
+    {13031, "Proj_New_Mexico_CS83_East"},
+    {13032, "Proj_New_Mexico_CS83_Central"},
+    {13033, "Proj_New_Mexico_CS83_West"},
+    {13101, "Proj_New_York_CS27_East"},
+    {13102, "Proj_New_York_CS27_Central"},
+    {13103, "Proj_New_York_CS27_West"},
+    {13104, "Proj_New_York_CS27_Long_Island"},
+    {13131, "Proj_New_York_CS83_East"},
+    {13132, "Proj_New_York_CS83_Central"},
+    {13133, "Proj_New_York_CS83_West"},
+    {13134, "Proj_New_York_CS83_Long_Island"},
+    {13200, "Proj_North_Carolina_CS27"},
+    {13230, "Proj_North_Carolina_CS83"},
+    {13301, "Proj_North_Dakota_CS27_North"},
+    {13302, "Proj_North_Dakota_CS27_South"},
+    {13331, "Proj_North_Dakota_CS83_North"},
+    {13332, "Proj_North_Dakota_CS83_South"},
+    {13401, "Proj_Ohio_CS27_North"},
+    {13402, "Proj_Ohio_CS27_South"},
+    {13431, "Proj_Ohio_CS83_North"},
+    {13432, "Proj_Ohio_CS83_South"},
+    {13501, "Proj_Oklahoma_CS27_North"},
+    {13502, "Proj_Oklahoma_CS27_South"},
+    {13531, "Proj_Oklahoma_CS83_North"},
+    {13532, "Proj_Oklahoma_CS83_South"},
+    {13601, "Proj_Oregon_CS27_North"},
+    {13602, "Proj_Oregon_CS27_South"},
+    {13631, "Proj_Oregon_CS83_North"},
+    {13632, "Proj_Oregon_CS83_South"},
+    {13701, "Proj_Pennsylvania_CS27_North"},
+    {13702, "Proj_Pennsylvania_CS27_South"},
+    {13731, "Proj_Pennsylvania_CS83_North"},
+    {13732, "Proj_Pennsylvania_CS83_South"},
+    {13800, "Proj_Rhode_Island_CS27"},
+    {13830, "Proj_Rhode_Island_CS83"},
+    {13901, "Proj_South_Carolina_CS27_North"},
+    {13902, "Proj_South_Carolina_CS27_South"},
+    {13930, "Proj_South_Carolina_CS83"},
+    {14001, "Proj_South_Dakota_CS27_North"},
+    {14002, "Proj_South_Dakota_CS27_South"},
+    {14031, "Proj_South_Dakota_CS83_North"},
+    {14032, "Proj_South_Dakota_CS83_South"},
+    {14100, "Proj_Tennessee_CS27"},
+    {14130, "Proj_Tennessee_CS83"},
+    {14201, "Proj_Texas_CS27_North"},
+    {14202, "Proj_Texas_CS27_North_Central"},
+    {14203, "Proj_Texas_CS27_Central"},
+    {14204, "Proj_Texas_CS27_South_Central"},
+    {14205, "Proj_Texas_CS27_South"},
+    {14231, "Proj_Texas_CS83_North"},
+    {14232, "Proj_Texas_CS83_North_Central"},
+    {14233, "Proj_Texas_CS83_Central"},
+    {14234, "Proj_Texas_CS83_South_Central"},
+    {14235, "Proj_Texas_CS83_South"},
+    {14301, "Proj_Utah_CS27_North"},
+    {14302, "Proj_Utah_CS27_Central"},
+    {14303, "Proj_Utah_CS27_South"},
+    {14331, "Proj_Utah_CS83_North"},
+    {14332, "Proj_Utah_CS83_Central"},
+    {14333, "Proj_Utah_CS83_South"},
+    {14400, "Proj_Vermont_CS27"},
+    {14430, "Proj_Vermont_CS83"},
+    {14501, "Proj_Virginia_CS27_North"},
+    {14502, "Proj_Virginia_CS27_South"},
+    {14531, "Proj_Virginia_CS83_North"},
+    {14532, "Proj_Virginia_CS83_South"},
+    {14601, "Proj_Washington_CS27_North"},
+    {14602, "Proj_Washington_CS27_South"},
+    {14631, "Proj_Washington_CS83_North"},
+    {14632, "Proj_Washington_CS83_South"},
+    {14701, "Proj_West_Virginia_CS27_North"},
+    {14702, "Proj_West_Virginia_CS27_South"},
+    {14731, "Proj_West_Virginia_CS83_North"},
+    {14732, "Proj_West_Virginia_CS83_South"},
+    {14801, "Proj_Wisconsin_CS27_North"},
+    {14802, "Proj_Wisconsin_CS27_Central"},
+    {14803, "Proj_Wisconsin_CS27_South"},
+    {14831, "Proj_Wisconsin_CS83_North"},
+    {14832, "Proj_Wisconsin_CS83_Central"},
+    {14833, "Proj_Wisconsin_CS83_South"},
+    {14901, "Proj_Wyoming_CS27_East"},
+    {14902, "Proj_Wyoming_CS27_East_Central"},
+    {14903, "Proj_Wyoming_CS27_West_Central"},
+    {14904, "Proj_Wyoming_CS27_West"},
+    {14931, "Proj_Wyoming_CS83_East"},
+    {14932, "Proj_Wyoming_CS83_East_Central"},
+    {14933, "Proj_Wyoming_CS83_West_Central"},
+    {14934, "Proj_Wyoming_CS83_West"},
+    {15001, "Proj_Alaska_CS27_1"},
+    {15002, "Proj_Alaska_CS27_2"},
+    {15003, "Proj_Alaska_CS27_3"},
+    {15004, "Proj_Alaska_CS27_4"},
+    {15005, "Proj_Alaska_CS27_5"},
+    {15006, "Proj_Alaska_CS27_6"},
+    {15007, "Proj_Alaska_CS27_7"},
+    {15008, "Proj_Alaska_CS27_8"},
+    {15009, "Proj_Alaska_CS27_9"},
+    {15010, "Proj_Alaska_CS27_10"},
+    {15031, "Proj_Alaska_CS83_1"},
+    {15032, "Proj_Alaska_CS83_2"},
+    {15033, "Proj_Alaska_CS83_3"},
+    {15034, "Proj_Alaska_CS83_4"},
+    {15035, "Proj_Alaska_CS83_5"},
+    {15036, "Proj_Alaska_CS83_6"},
+    {15037, "Proj_Alaska_CS83_7"},
+    {15038, "Proj_Alaska_CS83_8"},
+    {15039, "Proj_Alaska_CS83_9"},
+    {15040, "Proj_Alaska_CS83_10"},
+    {15101, "Proj_Hawaii_CS27_1"},
+    {15102, "Proj_Hawaii_CS27_2"},
+    {15103, "Proj_Hawaii_CS27_3"},
+    {15104, "Proj_Hawaii_CS27_4"},
+    {15105, "Proj_Hawaii_CS27_5"},
+    {15131, "Proj_Hawaii_CS83_1"},
+    {15132, "Proj_Hawaii_CS83_2"},
+    {15133, "Proj_Hawaii_CS83_3"},
+    {15134, "Proj_Hawaii_CS83_4"},
+    {15135, "Proj_Hawaii_CS83_5"},
+    {15201, "Proj_Puerto_Rico_CS27"},
+    {15202, "Proj_St_Croix"},
+    {15230, "Proj_Puerto_Rico_Virgin_Is"},
+    {15914, "Proj_BLM_14N_feet"},
+    {15915, "Proj_BLM_15N_feet"},
+    {15916, "Proj_BLM_16N_feet"},
+    {15917, "Proj_BLM_17N_feet"},
+    {17348, "Proj_Map_Grid_of_Australia_48"},
+    {17349, "Proj_Map_Grid_of_Australia_49"},
+    {17350, "Proj_Map_Grid_of_Australia_50"},
+    {17351, "Proj_Map_Grid_of_Australia_51"},
+    {17352, "Proj_Map_Grid_of_Australia_52"},
+    {17353, "Proj_Map_Grid_of_Australia_53"},
+    {17354, "Proj_Map_Grid_of_Australia_54"},
+    {17355, "Proj_Map_Grid_of_Australia_55"},
+    {17356, "Proj_Map_Grid_of_Australia_56"},
+    {17357, "Proj_Map_Grid_of_Australia_57"},
+    {17358, "Proj_Map_Grid_of_Australia_58"},
+    {17448, "Proj_Australian_Map_Grid_48"},
+    {17449, "Proj_Australian_Map_Grid_49"},
+    {17450, "Proj_Australian_Map_Grid_50"},
+    {17451, "Proj_Australian_Map_Grid_51"},
+    {17452, "Proj_Australian_Map_Grid_52"},
+    {17453, "Proj_Australian_Map_Grid_53"},
+    {17454, "Proj_Australian_Map_Grid_54"},
+    {17455, "Proj_Australian_Map_Grid_55"},
+    {17456, "Proj_Australian_Map_Grid_56"},
+    {17457, "Proj_Australian_Map_Grid_57"},
+    {17458, "Proj_Australian_Map_Grid_58"},
+    {18031, "Proj_Argentina_1"},
+    {18032, "Proj_Argentina_2"},
+    {18033, "Proj_Argentina_3"},
+    {18034, "Proj_Argentina_4"},
+    {18035, "Proj_Argentina_5"},
+    {18036, "Proj_Argentina_6"},
+    {18037, "Proj_Argentina_7"},
+    {18051, "Proj_Colombia_3W"},
+    {18052, "Proj_Colombia_Bogota"},
+    {18053, "Proj_Colombia_3E"},
+    {18054, "Proj_Colombia_6E"},
+    {18072, "Proj_Egypt_Red_Belt"},
+    {18073, "Proj_Egypt_Purple_Belt"},
+    {18074, "Proj_Extended_Purple_Belt"},
+    {18141, "Proj_New_Zealand_North_Island_Nat_Grid"},
+    {18142, "Proj_New_Zealand_South_Island_Nat_Grid"},
+    {19900, "Proj_Bahrain_Grid"},
+    {19905, "Proj_Netherlands_E_Indies_Equatorial"},
+    {19912, "Proj_RSO_Borneo"}
+};
+
+const char *const ff_tiff_coord_trans_codes[] = {
+    "CT_TransverseMercator",
+    "CT_TransvMercator_Modified_Alaska",
+    "CT_ObliqueMercator",
+    "CT_ObliqueMercator_Laborde",
+    "CT_ObliqueMercator_Rosenmund",
+    "CT_ObliqueMercator_Spherical",
+    "CT_Mercator",
+    "CT_LambertConfConic_2SP",
+    "CT_LambertConfConic_Helmert",
+    "CT_LambertAzimEqualArea",
+    "CT_AlbersEqualArea",
+    "CT_AzimuthalEquidistant",
+    "CT_EquidistantConic",
+    "CT_Stereographic",
+    "CT_PolarStereographic",
+    "CT_ObliqueStereographic",
+    "CT_Equirectangular",
+    "CT_CassiniSoldner",
+    "CT_Gnomonic",
+    "CT_MillerCylindrical",
+    "CT_Orthographic",
+    "CT_Polyconic",
+    "CT_Robinson",
+    "CT_Sinusoidal",
+    "CT_VanDerGrinten",
+    "CT_NewZealandMapGrid",
+    "CT_TransvMercator_SouthOriented"
+};
+
+const char *const ff_tiff_vert_cs_codes[] = {
+    "VertCS_Airy_1830_ellipsoid",
+    "VertCS_Airy_Modified_1849_ellipsoid",
+    "VertCS_ANS_ellipsoid",
+    "VertCS_Bessel_1841_ellipsoid",
+    "VertCS_Bessel_Modified_ellipsoid",
+    "VertCS_Bessel_Namibia_ellipsoid",
+    "VertCS_Clarke_1858_ellipsoid",
+    "VertCS_Clarke_1866_ellipsoid",
+    "VertCS_Clarke_1880_Benoit_ellipsoid",
+    "VertCS_Clarke_1880_IGN_ellipsoid",
+    "VertCS_Clarke_1880_RGS_ellipsoid",
+    "VertCS_Clarke_1880_Arc_ellipsoid",
+    "VertCS_Clarke_1880_SGA_1922_ellipsoid",
+    "VertCS_Everest_1830_1937_Adjustment_ellipsoid",
+    "VertCS_Everest_1830_1967_Definition_ellipsoid",
+    "VertCS_Everest_1830_1975_Definition_ellipsoid",
+    "VertCS_Everest_1830_Modified_ellipsoid",
+    "VertCS_GRS_1980_ellipsoid",
+    "VertCS_Helmert_1906_ellipsoid",
+    "VertCS_INS_ellipsoid",
+    "VertCS_International_1924_ellipsoid",
+    "VertCS_International_1967_ellipsoid",
+    "VertCS_Krassowsky_1940_ellipsoid",
+    "VertCS_NWL_9D_ellipsoid",
+    "VertCS_NWL_10D_ellipsoid",
+    "VertCS_Plessis_1817_ellipsoid",
+    "VertCS_Struve_1860_ellipsoid",
+    "VertCS_War_Office_ellipsoid",
+    "VertCS_WGS_84_ellipsoid",
+    "VertCS_GEM_10C_ellipsoid",
+    "VertCS_OSU86F_ellipsoid",
+    "VertCS_OSU91A_ellipsoid"
+};
+
+const char *const ff_tiff_ortho_vert_cs_codes[] = {
+    "VertCS_Newlyn",
+    "VertCS_North_American_Vertical_Datum_1929",
+    "VertCS_North_American_Vertical_Datum_1988",
+    "VertCS_Yellow_Sea_1956",
+    "VertCS_Baltic_Sea",
+    "VertCS_Caspian_Sea"
+};
diff --git a/libavcodec/tiff_data.h b/libavcodec/tiff_data.h
new file mode 100644
index 0000000..57515f9
--- /dev/null
+++ b/libavcodec/tiff_data.h
@@ -0,0 +1,92 @@
+/*
+ * TIFF data tables
+ * Copyright (c) 2011 Thomas Kuehnel
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * TIFF data tables
+ * @author Thomas Kuehnel
+ * @see GeoTIFF specification at
+ * http://www.remotesensing.org/geotiff/spec/geotiffhome.html
+ */
+
+#ifndef AVCODEC_TIFF_DATA_H
+#define AVCODEC_TIFF_DATA_H
+
+#include "tiff.h"
+
+#define TIFF_CONF_KEY_ID_OFFSET 1024
+extern const TiffGeoTagNameType ff_tiff_conf_name_type_map[3];
+
+#define TIFF_GEOG_KEY_ID_OFFSET 2048
+extern const TiffGeoTagNameType ff_tiff_geog_name_type_map[14];
+
+#define TIFF_PROJ_KEY_ID_OFFSET 3072
+extern const TiffGeoTagNameType ff_tiff_proj_name_type_map[24];
+
+#define TIFF_VERT_KEY_ID_OFFSET 4096
+extern const TiffGeoTagNameType ff_tiff_vert_name_type_map[4];
+
+#define TIFF_GEO_KEY_UNDEFINED    0
+#define TIFF_GEO_KEY_USER_DEFINED 32767
+
+#define TIFF_GT_MODEL_TYPE_OFFSET 1
+extern const char *const ff_tiff_gt_model_type_codes[3];
+
+#define TIFF_GT_RASTER_TYPE_OFFSET 1
+extern const char *const ff_tiff_gt_raster_type_codes[2];
+
+#define TIFF_LINEAR_UNIT_OFFSET 9001
+extern const char *const ff_tiff_linear_unit_codes[15];
+
+#define TIFF_ANGULAR_UNIT_OFFSET 9101
+extern const char *const ff_tiff_angular_unit_codes[8];
+
+#define TIFF_GCS_TYPE_OFFSET 4201
+extern const char *const ff_tiff_gcs_type_codes[133];
+
+#define TIFF_GCSE_TYPE_OFFSET 4001
+extern const char *const ff_tiff_gcse_type_codes[35];
+
+#define TIFF_GEODETIC_DATUM_OFFSET 6201
+extern const char *const ff_tiff_geodetic_datum_codes[120];
+
+#define TIFF_GEODETIC_DATUM_E_OFFSET 6001
+extern const char *const ff_tiff_geodetic_datum_e_codes[35];
+
+#define TIFF_ELLIPSOID_OFFSET 7001
+extern const char *const ff_tiff_ellipsoid_codes[35];
+
+#define TIFF_PRIME_MERIDIAN_OFFSET 8901
+extern const char *const ff_tiff_prime_meridian_codes[11];
+
+extern const TiffGeoTagKeyName ff_tiff_proj_cs_type_codes[978];
+
+extern const TiffGeoTagKeyName ff_tiff_projection_codes[298];
+
+#define TIFF_COORD_TRANS_OFFSET 1
+extern const char *const ff_tiff_coord_trans_codes[27];
+
+#define TIFF_VERT_CS_OFFSET 5001
+extern const char *const ff_tiff_vert_cs_codes[32];
+
+#define TIFF_ORTHO_VERT_CS_OFFSET 5101
+extern const char *const ff_tiff_ortho_vert_cs_codes[6];
+#endif
diff --git a/libavcodec/tiffenc.c b/libavcodec/tiffenc.c
index f450edb..5a61f1a 100644
--- a/libavcodec/tiffenc.c
+++ b/libavcodec/tiffenc.c
@@ -2,20 +2,20 @@
  * TIFF image encoder
  * Copyright (c) 2007 Bartlomiej Wolowiec
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,11 +30,13 @@
 #include <zlib.h>
 #endif
 
+#include "libavutil/imgutils.h"
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
 #include "avcodec.h"
 #include "bytestream.h"
+#include "internal.h"
 #include "lzw.h"
 #include "put_bits.h"
 #include "rle.h"
@@ -43,8 +45,8 @@
 #define TIFF_MAX_ENTRY 32
 
 /** sizes of various TIFF field types (string size = 1)*/
-static const uint8_t type_sizes2[6] = {
-    0, 1, 1, 2, 4, 8
+static const uint8_t type_sizes2[14] = {
+    0, 1, 1, 2, 4, 8, 1, 1, 2, 4, 8, 4, 8, 4
 };
 
 typedef struct TiffEncoderContext {
@@ -58,6 +60,12 @@ typedef struct TiffEncoderContext {
     int bpp_tab_size;                       ///< bpp_tab size
     enum TiffPhotometric photometric_interpretation;  ///< photometric interpretation
     int strips;                             ///< number of strips
+    uint32_t *strip_sizes;
+    unsigned int strip_sizes_size;
+    uint32_t *strip_offsets;
+    unsigned int strip_offsets_size;
+    uint8_t *yuv_line;
+    unsigned int yuv_line_size;
     int rps;                                ///< row per strip
     uint8_t entries[TIFF_MAX_ENTRY * 12];   ///< entries in header
     int num_entries;                        ///< number of entries
@@ -66,10 +74,12 @@ typedef struct TiffEncoderContext {
     int buf_size;                           ///< buffer size
     uint16_t subsampling[2];                ///< YUV subsampling factors
     struct LZWEncodeState *lzws;            ///< LZW encode state
+    uint32_t dpi;                           ///< image resolution in DPI
 } TiffEncoderContext;
 
 /**
- * Check free space in buffer
+ * Check free space in buffer.
+ *
  * @param s Tiff context
  * @param need Needed bytes
  * @return 0 - ok, 1 - no free space
@@ -85,13 +95,13 @@ static inline int check_size(TiffEncoderContext *s, uint64_t need)
 }
 
 /**
- * Put n values to buffer
+ * Put n values to buffer.
  *
- * @param p Pointer to pointer to output buffer
- * @param n Number of values
- * @param val Pointer to values
- * @param type Type of values
- * @param flip =0 - normal copy, >0 - flip
+ * @param p pointer to pointer to output buffer
+ * @param n number of values
+ * @param val pointer to values
+ * @param type type of values
+ * @param flip = 0 - normal copy, >0 - flip
  */
 static void tnput(uint8_t **p, int n, const uint8_t *val, enum TiffTypes type,
                   int flip)
@@ -106,28 +116,29 @@ static void tnput(uint8_t **p, int n, const uint8_t *val, enum TiffTypes type,
 
 /**
  * Add entry to directory in tiff header.
+ *
  * @param s Tiff context
- * @param tag Tag that identifies the entry
- * @param type Entry type
- * @param count The number of values
- * @param ptr_val Pointer to values
+ * @param tag tag that identifies the entry
+ * @param type entry type
+ * @param count the number of values
+ * @param ptr_val pointer to values
  */
 static void add_entry(TiffEncoderContext *s, enum TiffTags tag,
                       enum TiffTypes type, int count, const void *ptr_val)
 {
     uint8_t *entries_ptr = s->entries + 12 * s->num_entries;
 
-    assert(s->num_entries < TIFF_MAX_ENTRY);
+    av_assert0(s->num_entries < TIFF_MAX_ENTRY);
 
     bytestream_put_le16(&entries_ptr, tag);
     bytestream_put_le16(&entries_ptr, type);
     bytestream_put_le32(&entries_ptr, count);
 
-    if (type_sizes[type] * count <= 4) {
+    if (type_sizes[type] * (int64_t)count <= 4) {
         tnput(&entries_ptr, count, ptr_val, type, 0);
     } else {
         bytestream_put_le32(&entries_ptr, *s->buf - s->buf_start);
-        check_size(s, count * type_sizes2[type]);
+        check_size(s, count * (int64_t)type_sizes2[type]);
         tnput(s->buf, count, ptr_val, type, 0);
     }
 
@@ -143,14 +154,14 @@ static void add_entry1(TiffEncoderContext *s,
 }
 
 /**
- * Encode one strip in tiff file
+ * Encode one strip in tiff file.
  *
  * @param s Tiff context
- * @param src Input buffer
- * @param dst Output buffer
- * @param n Size of input buffer
- * @param compr Compression method
- * @return Number of output bytes. If an output error is encountered, -1 returned
+ * @param src input buffer
+ * @param dst output buffer
+ * @param n size of input buffer
+ * @param compr compression method
+ * @return number of output bytes. If an output error is encountered, -1 is returned
  */
 static int encode_strip(TiffEncoderContext *s, const int8_t *src,
                         uint8_t *dst, int n, int compr)
@@ -190,84 +201,86 @@ static void pack_yuv(TiffEncoderContext *s, const AVFrame *p,
     int w       = (s->width - 1) / s->subsampling[0] + 1;
     uint8_t *pu = &p->data[1][lnum / s->subsampling[1] * p->linesize[1]];
     uint8_t *pv = &p->data[2][lnum / s->subsampling[1] * p->linesize[2]];
-    for (i = 0; i < w; i++) {
-        for (j = 0; j < s->subsampling[1]; j++)
-            for (k = 0; k < s->subsampling[0]; k++)
-                *dst++ = p->data[0][(lnum + j) * p->linesize[0] +
-                                    i * s->subsampling[0] + k];
-        *dst++ = *pu++;
-        *dst++ = *pv++;
+    if (s->width % s->subsampling[0] || s->height % s->subsampling[1]) {
+        for (i = 0; i < w; i++) {
+            for (j = 0; j < s->subsampling[1]; j++)
+                for (k = 0; k < s->subsampling[0]; k++)
+                    *dst++ = p->data[0][FFMIN(lnum + j, s->height-1) * p->linesize[0] +
+                                        FFMIN(i * s->subsampling[0] + k, s->width-1)];
+            *dst++ = *pu++;
+            *dst++ = *pv++;
+        }
+    }else{
+        for (i = 0; i < w; i++) {
+            for (j = 0; j < s->subsampling[1]; j++)
+                for (k = 0; k < s->subsampling[0]; k++)
+                    *dst++ = p->data[0][(lnum + j) * p->linesize[0] +
+                                        i * s->subsampling[0] + k];
+            *dst++ = *pu++;
+            *dst++ = *pv++;
+        }
     }
 }
 
 static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                         const AVFrame *pict, int *got_packet)
 {
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
     TiffEncoderContext *s = avctx->priv_data;
     const AVFrame *const p = pict;
     int i;
     uint8_t *ptr;
     uint8_t *offset;
     uint32_t strips;
-    uint32_t *strip_sizes   = NULL;
-    uint32_t *strip_offsets = NULL;
     int bytes_per_row;
-    uint32_t res[2]    = { 72, 1 };     // image resolution (72/1)
-    uint16_t bpp_tab[] = { 8, 8, 8, 8 };
-    int ret;
-    int is_yuv = 0;
-    uint8_t *yuv_line = NULL;
+    uint32_t res[2] = { s->dpi, 1 };    // image resolution (72/1)
+    uint16_t bpp_tab[4];
+    int ret = -1;
+    int is_yuv = 0, alpha = 0;
     int shift_h, shift_v;
     int packet_size;
-    const AVPixFmtDescriptor *pfd;
-
-    s->avctx = avctx;
 
     s->width          = avctx->width;
     s->height         = avctx->height;
     s->subsampling[0] = 1;
     s->subsampling[1] = 1;
 
+    avctx->bits_per_coded_sample =
+    s->bpp          = av_get_bits_per_pixel(desc);
+    s->bpp_tab_size = desc->nb_components;
+
     switch (avctx->pix_fmt) {
-    case AV_PIX_FMT_RGB48LE:
-    case AV_PIX_FMT_GRAY16LE:
+    case AV_PIX_FMT_RGBA64LE:
     case AV_PIX_FMT_RGBA:
+        alpha = 1;
+    case AV_PIX_FMT_RGB48LE:
     case AV_PIX_FMT_RGB24:
-    case AV_PIX_FMT_GRAY8:
-    case AV_PIX_FMT_PAL8:
-        pfd    = av_pix_fmt_desc_get(avctx->pix_fmt);
-        s->bpp = av_get_bits_per_pixel(pfd);
-        if (pfd->flags & AV_PIX_FMT_FLAG_PAL)
-            s->photometric_interpretation = TIFF_PHOTOMETRIC_PALETTE;
-        else if (pfd->flags & AV_PIX_FMT_FLAG_RGB)
-            s->photometric_interpretation = TIFF_PHOTOMETRIC_RGB;
-        else
-            s->photometric_interpretation = TIFF_PHOTOMETRIC_BLACK_IS_ZERO;
-        s->bpp_tab_size = pfd->nb_components;
-        for (i = 0; i < s->bpp_tab_size; i++)
-            bpp_tab[i] = s->bpp / s->bpp_tab_size;
+        s->photometric_interpretation = TIFF_PHOTOMETRIC_RGB;
         break;
+    case AV_PIX_FMT_GRAY8:
+        avctx->bits_per_coded_sample = 0x28;
+    case AV_PIX_FMT_GRAY8A:
+        alpha = avctx->pix_fmt == AV_PIX_FMT_GRAY8A;
+    case AV_PIX_FMT_GRAY16LE:
     case AV_PIX_FMT_MONOBLACK:
-        s->bpp                        = 1;
         s->photometric_interpretation = TIFF_PHOTOMETRIC_BLACK_IS_ZERO;
-        s->bpp_tab_size               = 0;
+        break;
+    case AV_PIX_FMT_PAL8:
+        s->photometric_interpretation = TIFF_PHOTOMETRIC_PALETTE;
         break;
     case AV_PIX_FMT_MONOWHITE:
-        s->bpp                        = 1;
         s->photometric_interpretation = TIFF_PHOTOMETRIC_WHITE_IS_ZERO;
-        s->bpp_tab_size               = 0;
         break;
     case AV_PIX_FMT_YUV420P:
     case AV_PIX_FMT_YUV422P:
+    case AV_PIX_FMT_YUV440P:
     case AV_PIX_FMT_YUV444P:
     case AV_PIX_FMT_YUV410P:
     case AV_PIX_FMT_YUV411P:
         av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &shift_h, &shift_v);
         s->photometric_interpretation = TIFF_PHOTOMETRIC_YCBCR;
-        s->bpp                        = 8 + (16 >> (shift_h + shift_v));
         s->subsampling[0]             = 1 << shift_h;
         s->subsampling[1]             = 1 << shift_v;
-        s->bpp_tab_size               = 3;
         is_yuv                        = 1;
         break;
     default:
@@ -276,6 +289,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         return -1;
     }
 
+    for (i = 0; i < s->bpp_tab_size; i++)
+        bpp_tab[i] = desc->comp[i].depth_minus1 + 1;
+
     if (s->compr == TIFF_DEFLATE       ||
         s->compr == TIFF_ADOBE_DEFLATE ||
         s->compr == TIFF_LZW)
@@ -292,11 +308,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     packet_size = avctx->height * ((avctx->width * s->bpp + 7) >> 3) * 2 +
                   avctx->height * 4 + FF_MIN_BUFFER_SIZE;
 
-    if (!pkt->data &&
-        (ret = av_new_packet(pkt, packet_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, packet_size)) < 0)
         return ret;
-    }
     ptr          = pkt->data;
     s->buf_start = pkt->data;
     s->buf       = &ptr;
@@ -312,9 +325,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     offset = ptr;
     bytestream_put_le32(&ptr, 0);
 
-    strip_sizes   = av_mallocz(sizeof(*strip_sizes)   * strips);
-    strip_offsets = av_mallocz(sizeof(*strip_offsets) * strips);
-    if (!strip_sizes || !strip_offsets) {
+    av_fast_padded_mallocz(&s->strip_sizes  , &s->strip_sizes_size  , sizeof(s->strip_sizes  [0]) * strips);
+    av_fast_padded_mallocz(&s->strip_offsets, &s->strip_offsets_size, sizeof(s->strip_offsets[0]) * strips);
+
+    if (!s->strip_sizes || !s->strip_offsets) {
         ret = AVERROR(ENOMEM);
         goto fail;
     }
@@ -322,8 +336,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     bytes_per_row = (((s->width - 1) / s->subsampling[0] + 1) * s->bpp *
                      s->subsampling[0] * s->subsampling[1] + 7) >> 3;
     if (is_yuv) {
-        yuv_line = av_malloc(bytes_per_row);
-        if (yuv_line == NULL) {
+        av_fast_padded_malloc(&s->yuv_line, &s->yuv_line_size, bytes_per_row);
+        if (s->yuv_line == NULL) {
             av_log(s->avctx, AV_LOG_ERROR, "Not enough memory\n");
             ret = AVERROR(ENOMEM);
             goto fail;
@@ -342,12 +356,12 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             ret = AVERROR(ENOMEM);
             goto fail;
         }
-        strip_offsets[0] = ptr - pkt->data;
+        s->strip_offsets[0] = ptr - pkt->data;
         zn               = 0;
         for (j = 0; j < s->rps; j++) {
             if (is_yuv) {
-                pack_yuv(s, p, yuv_line, j);
-                memcpy(zbuf + zn, yuv_line, bytes_per_row);
+                pack_yuv(s, p, s->yuv_line, j);
+                memcpy(zbuf + zn, s->yuv_line, bytes_per_row);
                 j += s->subsampling[1] - 1;
             } else
                 memcpy(zbuf + j * bytes_per_row,
@@ -361,9 +375,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             goto fail;
         }
         ptr           += ret;
-        strip_sizes[0] = ptr - pkt->data - strip_offsets[0];
+        s->strip_sizes[0] = ptr - pkt->data - s->strip_offsets[0];
     } else
 #endif
+    {
     if (s->compr == TIFF_LZW) {
         s->lzws = av_malloc(ff_lzw_encode_state_size);
         if (!s->lzws) {
@@ -372,17 +387,17 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         }
     }
     for (i = 0; i < s->height; i++) {
-        if (strip_sizes[i / s->rps] == 0) {
+        if (s->strip_sizes[i / s->rps] == 0) {
             if (s->compr == TIFF_LZW) {
                 ff_lzw_encode_init(s->lzws, ptr,
                                    s->buf_size - (*s->buf - s->buf_start),
                                    12, FF_LZW_TIFF, put_bits);
             }
-            strip_offsets[i / s->rps] = ptr - pkt->data;
+            s->strip_offsets[i / s->rps] = ptr - pkt->data;
         }
         if (is_yuv) {
-            pack_yuv(s, p, yuv_line, i);
-            ret = encode_strip(s, yuv_line, ptr, bytes_per_row, s->compr);
+            pack_yuv(s, p, s->yuv_line, i);
+            ret = encode_strip(s, s->yuv_line, ptr, bytes_per_row, s->compr);
             i  += s->subsampling[1] - 1;
         } else
             ret = encode_strip(s, p->data[0] + i * p->linesize[0],
@@ -391,17 +406,18 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             av_log(s->avctx, AV_LOG_ERROR, "Encode strip failed\n");
             goto fail;
         }
-        strip_sizes[i / s->rps] += ret;
+        s->strip_sizes[i / s->rps] += ret;
         ptr                     += ret;
         if (s->compr == TIFF_LZW &&
             (i == s->height - 1 || i % s->rps == s->rps - 1)) {
             ret = ff_lzw_encode_flush(s->lzws, flush_put_bits);
-            strip_sizes[(i / s->rps)] += ret;
-            ptr                       += ret;
+            s->strip_sizes[(i / s->rps)] += ret;
+            ptr                          += ret;
         }
     }
     if (s->compr == TIFF_LZW)
         av_free(s->lzws);
+    }
 
     s->num_entries = 0;
 
@@ -414,14 +430,21 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     add_entry1(s, TIFF_COMPR,       TIFF_SHORT, s->compr);
     add_entry1(s, TIFF_PHOTOMETRIC, TIFF_SHORT, s->photometric_interpretation);
-    add_entry(s,  TIFF_STRIP_OFFS,  TIFF_LONG,  strips, strip_offsets);
+    add_entry(s,  TIFF_STRIP_OFFS,  TIFF_LONG,  strips, s->strip_offsets);
 
     if (s->bpp_tab_size)
         add_entry1(s, TIFF_SAMPLES_PER_PIXEL, TIFF_SHORT, s->bpp_tab_size);
 
     add_entry1(s, TIFF_ROWSPERSTRIP, TIFF_LONG,     s->rps);
-    add_entry(s,  TIFF_STRIP_SIZE,   TIFF_LONG,     strips, strip_sizes);
+    add_entry(s,  TIFF_STRIP_SIZE,   TIFF_LONG,     strips, s->strip_sizes);
     add_entry(s,  TIFF_XRES,         TIFF_RATIONAL, 1,      res);
+    if (avctx->sample_aspect_ratio.num > 0 &&
+        avctx->sample_aspect_ratio.den > 0) {
+        AVRational y = av_mul_q(av_make_q(s->dpi, 1),
+                                avctx->sample_aspect_ratio);
+        res[0] = y.num;
+        res[1] = y.den;
+    }
     add_entry(s,  TIFF_YRES,         TIFF_RATIONAL, 1,      res);
     add_entry1(s, TIFF_RES_UNIT,     TIFF_SHORT,    2);
 
@@ -439,10 +462,14 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         }
         add_entry(s, TIFF_PAL, TIFF_SHORT, 256 * 3, pal);
     }
+    if (alpha)
+        add_entry1(s,TIFF_EXTRASAMPLES,      TIFF_SHORT,            2);
     if (is_yuv) {
         /** according to CCIR Recommendation 601.1 */
         uint32_t refbw[12] = { 15, 1, 235, 1, 128, 1, 240, 1, 128, 1, 240, 1 };
         add_entry(s, TIFF_YCBCR_SUBSAMPLING, TIFF_SHORT,    2, s->subsampling);
+        if (avctx->chroma_sample_location == AVCHROMA_LOC_TOPLEFT)
+            add_entry1(s, TIFF_YCBCR_POSITIONING, TIFF_SHORT, 2);
         add_entry(s, TIFF_REFERENCE_BW,      TIFF_RATIONAL, 6, refbw);
     }
     // write offset to dir
@@ -461,33 +488,40 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     *got_packet = 1;
 
 fail:
-    av_free(strip_sizes);
-    av_free(strip_offsets);
-    av_free(yuv_line);
-    return ret;
+    return ret < 0 ? ret : 0;
 }
 
 static av_cold int encode_init(AVCodecContext *avctx)
 {
+    TiffEncoderContext *s = avctx->priv_data;
+
     avctx->coded_frame = av_frame_alloc();
     if (!avctx->coded_frame)
         return AVERROR(ENOMEM);
 
     avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
     avctx->coded_frame->key_frame = 1;
+    s->avctx = avctx;
 
     return 0;
 }
 
 static av_cold int encode_close(AVCodecContext *avctx)
 {
+    TiffEncoderContext *s = avctx->priv_data;
+
     av_frame_free(&avctx->coded_frame);
+    av_freep(&s->strip_sizes);
+    av_freep(&s->strip_offsets);
+    av_freep(&s->yuv_line);
+
     return 0;
 }
 
 #define OFFSET(x) offsetof(TiffEncoderContext, x)
 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 static const AVOption options[] = {
+    {"dpi", "set the image resolution (in dpi)", OFFSET(dpi), AV_OPT_TYPE_INT, {.i64 = 72}, 1, 0x10000, AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_ENCODING_PARAM},
     { "compression_algo", NULL, OFFSET(compr), AV_OPT_TYPE_INT,   { .i64 = TIFF_PACKBITS }, TIFF_RAW, TIFF_DEFLATE, VE, "compression_algo" },
     { "packbits",         NULL, 0,             AV_OPT_TYPE_CONST, { .i64 = TIFF_PACKBITS }, 0,        0,            VE, "compression_algo" },
     { "raw",              NULL, 0,             AV_OPT_TYPE_CONST, { .i64 = TIFF_RAW      }, 0,        0,            VE, "compression_algo" },
@@ -513,14 +547,15 @@ AVCodec ff_tiff_encoder = {
     .priv_data_size = sizeof(TiffEncoderContext),
     .init           = encode_init,
     .close          = encode_close,
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
     .encode2        = encode_frame,
     .pix_fmts       = (const enum AVPixelFormat[]) {
-        AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB48LE, AV_PIX_FMT_PAL8,
-        AV_PIX_FMT_RGBA,
-        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY16LE,
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_PAL8, AV_PIX_FMT_GRAY8,
+        AV_PIX_FMT_GRAY8A, AV_PIX_FMT_GRAY16LE,
         AV_PIX_FMT_MONOBLACK, AV_PIX_FMT_MONOWHITE,
-        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
-        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
+        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_RGB48LE,
+        AV_PIX_FMT_RGBA, AV_PIX_FMT_RGBA64LE,
         AV_PIX_FMT_NONE
     },
     .priv_class     = &tiffenc_class,
diff --git a/libavcodec/tmv.c b/libavcodec/tmv.c
index f04a5f5..e525a73 100644
--- a/libavcodec/tmv.c
+++ b/libavcodec/tmv.c
@@ -2,20 +2,20 @@
  * 8088flex TMV video decoder
  * Copyright (c) 2009 Daniel Verkamp <daniel at drv.nu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@
 #include "avcodec.h"
 #include "internal.h"
 #include "libavutil/internal.h"
+#include "libavutil/xga_font_data.h"
 
 #include "cga_data.h"
 
@@ -45,10 +46,8 @@ static int tmv_decode_frame(AVCodecContext *avctx, void *data,
     unsigned x, y, fg, bg, c;
     int ret;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     if (avpkt->size < 2*char_rows*char_cols) {
         av_log(avctx, AV_LOG_ERROR,
@@ -63,6 +62,7 @@ static int tmv_decode_frame(AVCodecContext *avctx, void *data,
 
     frame->palette_has_changed = 1;
     memcpy(frame->data[1], ff_cga_palette, 16 * 4);
+    memset(frame->data[1] + 16 * 4, 0, AVPALETTE_SIZE - 16 * 4);
 
     for (y = 0; y < char_rows; y++) {
         for (x = 0; x < char_cols; x++) {
@@ -70,7 +70,7 @@ static int tmv_decode_frame(AVCodecContext *avctx, void *data,
             bg = *src  >> 4;
             fg = *src++ & 0xF;
             ff_draw_pc_font(dst + x * 8, frame->linesize[0],
-                            ff_cga_font, 8, c, fg, bg);
+                            avpriv_cga_font, 8, c, fg, bg);
         }
         dst += frame->linesize[0] * 8;
     }
diff --git a/libavcodec/tpeldsp.c b/libavcodec/tpeldsp.c
index 7ea1da4..cc4fed3 100644
--- a/libavcodec/tpeldsp.c
+++ b/libavcodec/tpeldsp.c
@@ -1,20 +1,20 @@
 /*
  * thirdpel DSP functions
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/tpeldsp.h b/libavcodec/tpeldsp.h
index 9c67d60..3732f17 100644
--- a/libavcodec/tpeldsp.h
+++ b/libavcodec/tpeldsp.h
@@ -1,20 +1,20 @@
 /*
  * thirdpel DSP functions
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/truemotion1.c b/libavcodec/truemotion1.c
index 5dffb4e..660ecf5 100644
--- a/libavcodec/truemotion1.c
+++ b/libavcodec/truemotion1.c
@@ -2,20 +2,20 @@
  * Duck TrueMotion 1.0 Decoder
  * Copyright (C) 2003 Alex Beregszaszi & Mike Melanson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -415,6 +415,8 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
         ff_set_sar(s->avctx, s->avctx->sample_aspect_ratio);
 
         av_fast_malloc(&s->vert_pred, &s->vert_pred_size, s->avctx->width * sizeof(unsigned int));
+        if (!s->vert_pred)
+            return AVERROR(ENOMEM);
     }
 
     /* There is 1 change bit per 4 pixels, so each change byte represents
@@ -483,6 +485,8 @@ static av_cold int truemotion1_decode_init(AVCodecContext *avctx)
     /* there is a vertical predictor for each pixel in a line; each vertical
      * predictor is 0 to start with */
     av_fast_malloc(&s->vert_pred, &s->vert_pred_size, s->avctx->width * sizeof(unsigned int));
+    if (!s->vert_pred)
+        return AVERROR(ENOMEM);
 
     return 0;
 }
@@ -871,10 +875,8 @@ static int truemotion1_decode_frame(AVCodecContext *avctx,
     if ((ret = truemotion1_decode_header(s)) < 0)
         return ret;
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     if (compression_types[s->compression].algorithm == ALGO_RGB24H) {
         truemotion1_decode_24bit(s);
@@ -896,7 +898,7 @@ static av_cold int truemotion1_decode_end(AVCodecContext *avctx)
     TrueMotion1Context *s = avctx->priv_data;
 
     av_frame_free(&s->frame);
-    av_free(s->vert_pred);
+    av_freep(&s->vert_pred);
 
     return 0;
 }
diff --git a/libavcodec/truemotion1data.h b/libavcodec/truemotion1data.h
index e950450..3e58143 100644
--- a/libavcodec/truemotion1data.h
+++ b/libavcodec/truemotion1data.h
@@ -6,20 +6,20 @@
  * the GNU LGPL using the common understanding that data tables necessary
  * for decoding algorithms are not necessarily copyrightable.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #ifndef AVCODEC_TRUEMOTION1DATA_H
diff --git a/libavcodec/truemotion2.c b/libavcodec/truemotion2.c
index 122643d..18d7c1e 100644
--- a/libavcodec/truemotion2.c
+++ b/libavcodec/truemotion2.c
@@ -2,20 +2,20 @@
  * Duck/ON2 TrueMotion 2 Decoder
  * Copyright (c) 2005 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -65,6 +65,9 @@ typedef struct TM2Context {
     GetBitContext gb;
     BswapDSPContext bdsp;
 
+    uint8_t *buffer;
+    int buffer_size;
+
     /* TM2 streams */
     int *tokens[TM2_NUM_STREAMS];
     int tok_lens[TM2_NUM_STREAMS];
@@ -87,7 +90,7 @@ typedef struct TM2Context {
 * Huffman codes for each of streams
 */
 typedef struct TM2Codes {
-    VLC vlc; ///< table for Libav bitstream reader
+    VLC vlc; ///< table for FFmpeg bitstream reader
     int bits;
     int *recode; ///< table for converting from code indexes to values
     int length;
@@ -168,9 +171,14 @@ static int tm2_build_huff_table(TM2Context *ctx, TM2Codes *code)
 
     /* allocate space for codes - it is exactly ceil(nodes / 2) entries */
     huff.max_num = (huff.nodes + 1) >> 1;
-    huff.nums    = av_mallocz(huff.max_num * sizeof(int));
-    huff.bits    = av_mallocz(huff.max_num * sizeof(uint32_t));
-    huff.lens    = av_mallocz(huff.max_num * sizeof(int));
+    huff.nums    = av_calloc(huff.max_num, sizeof(int));
+    huff.bits    = av_calloc(huff.max_num, sizeof(uint32_t));
+    huff.lens    = av_calloc(huff.max_num, sizeof(int));
+
+    if (!huff.nums || !huff.bits || !huff.lens) {
+        res = AVERROR(ENOMEM);
+        goto fail;
+    }
 
     res = tm2_read_tree(ctx, 0, 0, &huff);
 
@@ -192,11 +200,16 @@ static int tm2_build_huff_table(TM2Context *ctx, TM2Codes *code)
         else {
             code->bits = huff.max_bits;
             code->length = huff.max_num;
-            code->recode = av_malloc(code->length * sizeof(int));
+            code->recode = av_malloc_array(code->length, sizeof(int));
+            if (!code->recode) {
+                res = AVERROR(ENOMEM);
+                goto fail;
+            }
             for (i = 0; i < code->length; i++)
                 code->recode[i] = huff.nums[i];
         }
     }
+fail:
     /* free allocated memory */
     av_free(huff.nums);
     av_free(huff.bits);
@@ -216,6 +229,8 @@ static inline int tm2_get_token(GetBitContext *gb, TM2Codes *code)
 {
     int val;
     val = get_vlc2(gb, code->vlc.table, code->bits, 1);
+    if(val<0)
+        return -1;
     return code->recode[val];
 }
 
@@ -247,7 +262,8 @@ static int tm2_read_deltas(TM2Context *ctx, int stream_id)
     d  = get_bits(&ctx->gb, 9);
     mb = get_bits(&ctx->gb, 5);
 
-    if ((d < 1) || (d > TM2_DELTAS) || (mb < 1) || (mb > 32)) {
+    av_assert2(mb < 32);
+    if ((d < 1) || (d > TM2_DELTAS) || (mb < 1)) {
         av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect delta table: %i deltas x %i bits\n", d, mb);
         return AVERROR_INVALIDDATA;
     }
@@ -273,6 +289,11 @@ static int tm2_read_stream(TM2Context *ctx, const uint8_t *buf, int stream_id, i
     TM2Codes codes;
     GetByteContext gb;
 
+    if (buf_size < 4) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "not enough space for len left\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* get stream length in dwords */
     bytestream2_init(&gb, buf, buf_size);
     len  = bytestream2_get_be32(&gb);
@@ -281,8 +302,8 @@ static int tm2_read_stream(TM2Context *ctx, const uint8_t *buf, int stream_id, i
     if (len == 0)
         return 4;
 
-    if (len >= INT_MAX/4-1 || len < 0 || len > buf_size) {
-        av_log(ctx->avctx, AV_LOG_ERROR, "Error, invalid stream size.\n");
+    if (len >= INT_MAX/4-1 || len < 0 || skip > buf_size) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "invalid stream size\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -325,7 +346,11 @@ static int tm2_read_stream(TM2Context *ctx, const uint8_t *buf, int stream_id, i
         tm2_free_codes(&codes);
         return AVERROR_INVALIDDATA;
     }
-    ctx->tokens[stream_id]   = av_realloc(ctx->tokens[stream_id], toks * sizeof(int));
+    ret = av_reallocp_array(&ctx->tokens[stream_id], toks, sizeof(int));
+    if (ret < 0) {
+        ctx->tok_lens[stream_id] = 0;
+        return ret;
+    }
     ctx->tok_lens[stream_id] = toks;
     len = bytestream2_get_be32(&gb);
     if (len > 0) {
@@ -339,7 +364,7 @@ static int tm2_read_stream(TM2Context *ctx, const uint8_t *buf, int stream_id, i
                 return AVERROR_INVALIDDATA;
             }
             ctx->tokens[stream_id][i] = tm2_get_token(&ctx->gb, &codes);
-            if (stream_id <= TM2_MOT && ctx->tokens[stream_id][i] >= TM2_DELTAS) {
+            if (stream_id <= TM2_MOT && ctx->tokens[stream_id][i] >= TM2_DELTAS || ctx->tokens[stream_id][i]<0) {
                 av_log(ctx->avctx, AV_LOG_ERROR, "Invalid delta token index %d for type %d, n=%d\n",
                        ctx->tokens[stream_id][i], stream_id, i);
                 return AVERROR_INVALIDDATA;
@@ -366,8 +391,13 @@ static inline int GET_TOK(TM2Context *ctx,int type)
         av_log(ctx->avctx, AV_LOG_ERROR, "Read token from stream %i out of bounds (%i>=%i)\n", type, ctx->tok_ptrs[type], ctx->tok_lens[type]);
         return 0;
     }
-    if (type <= TM2_MOT)
+    if (type <= TM2_MOT) {
+        if (ctx->tokens[type][ctx->tok_ptrs[type]] >= TM2_DELTAS) {
+            av_log(ctx->avctx, AV_LOG_ERROR, "token %d is too large\n", ctx->tokens[type][ctx->tok_ptrs[type]]);
+            return 0;
+        }
         return ctx->deltas[type][ctx->tokens[type][ctx->tok_ptrs[type]++]];
+    }
     return ctx->tokens[type][ctx->tok_ptrs[type]++];
 }
 
@@ -674,6 +704,11 @@ static inline void tm2_motion_block(TM2Context *ctx, AVFrame *pic, int bx, int b
     mx = av_clip(mx, -(bx * 4 + 4), ctx->avctx->width  - bx * 4);
     my = av_clip(my, -(by * 4 + 4), ctx->avctx->height - by * 4);
 
+    if (4*bx+mx<0 || 4*by+my<0 || 4*bx+mx+4 > ctx->avctx->width || 4*by+my+4 > ctx->avctx->height) {
+        av_log(ctx->avctx, AV_LOG_ERROR, "MV out of picture\n");
+        return;
+    }
+
     Yo += my * oYstride + mx;
     Uo += (my >> 1) * oUstride + (mx >> 1);
     Vo += (my >> 1) * oVstride + (mx >> 1);
@@ -844,37 +879,34 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame * const p    = l->pic;
     int offset           = TM2_HEADER_SIZE;
     int i, t, ret;
-    uint8_t *swbuf;
 
-    swbuf = av_malloc(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
-    if (!swbuf) {
+    av_fast_padded_malloc(&l->buffer, &l->buffer_size, buf_size);
+    if (!l->buffer) {
         av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer\n");
         return AVERROR(ENOMEM);
     }
 
-    if ((ret = ff_reget_buffer(avctx, p)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-        av_free(swbuf);
+    if ((ret = ff_reget_buffer(avctx, p)) < 0)
         return ret;
-    }
 
-    l->bdsp.bswap_buf((uint32_t *) swbuf, (const uint32_t *) buf,
+    l->bdsp.bswap_buf((uint32_t *) l->buffer, (const uint32_t *) buf,
                       buf_size >> 2);
 
-    if ((ret = tm2_read_header(l, swbuf)) < 0) {
-        av_free(swbuf);
+    if ((ret = tm2_read_header(l, l->buffer)) < 0) {
         return ret;
     }
 
     for (i = 0; i < TM2_NUM_STREAMS; i++) {
         if (offset >= buf_size) {
-            av_free(swbuf);
+            av_log(avctx, AV_LOG_ERROR, "no space for tm2_read_stream\n");
             return AVERROR_INVALIDDATA;
         }
-        t = tm2_read_stream(l, swbuf + offset, tm2_stream_order[i],
+
+        t = tm2_read_stream(l, l->buffer + offset, tm2_stream_order[i],
                             buf_size - offset);
         if (t < 0) {
-            av_free(swbuf);
+            int j = tm2_stream_order[i];
+            memset(l->tokens[j], 0, sizeof(**l->tokens) * l->tok_lens[j]);
             return t;
         }
         offset += t;
@@ -888,7 +920,6 @@ static int decode_frame(AVCodecContext *avctx,
     l->cur = !l->cur;
     *got_frame      = 1;
     ret = av_frame_ref(data, l->pic);
-    av_free(swbuf);
 
     return (ret < 0) ? ret : buf_size;
 }
@@ -912,8 +943,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     ff_bswapdsp_init(&l->bdsp);
 
-    l->last  = av_malloc(4 * sizeof(*l->last)  * (w >> 2));
-    l->clast = av_malloc(4 * sizeof(*l->clast) * (w >> 2));
+    l->last  = av_malloc_array(w >> 2, 4 * sizeof(*l->last) );
+    l->clast = av_malloc_array(w >> 2, 4 * sizeof(*l->clast));
 
     for (i = 0; i < TM2_NUM_STREAMS; i++) {
         l->tokens[i] = NULL;
@@ -922,15 +953,15 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     w += 8;
     h += 8;
-    l->Y1_base = av_malloc(sizeof(*l->Y1_base) * w * h);
-    l->Y2_base = av_malloc(sizeof(*l->Y2_base) * w * h);
+    l->Y1_base = av_calloc(w * h, sizeof(*l->Y1_base));
+    l->Y2_base = av_calloc(w * h, sizeof(*l->Y2_base));
     l->y_stride = w;
     w = (w + 1) >> 1;
     h = (h + 1) >> 1;
-    l->U1_base = av_malloc(sizeof(*l->U1_base) * w * h);
-    l->V1_base = av_malloc(sizeof(*l->V1_base) * w * h);
-    l->U2_base = av_malloc(sizeof(*l->U2_base) * w * h);
-    l->V2_base = av_malloc(sizeof(*l->V1_base) * w * h);
+    l->U1_base = av_calloc(w * h, sizeof(*l->U1_base));
+    l->V1_base = av_calloc(w * h, sizeof(*l->V1_base));
+    l->U2_base = av_calloc(w * h, sizeof(*l->U2_base));
+    l->V2_base = av_calloc(w * h, sizeof(*l->V1_base));
     l->uv_stride = w;
     l->cur = 0;
     if (!l->Y1_base || !l->Y2_base || !l->U1_base ||
@@ -944,6 +975,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
         av_freep(&l->V2_base);
         av_freep(&l->last);
         av_freep(&l->clast);
+        av_frame_free(&l->pic);
         return AVERROR(ENOMEM);
     }
     l->Y1 = l->Y1_base + l->y_stride  * 4 + 4;
@@ -973,6 +1005,8 @@ static av_cold int decode_end(AVCodecContext *avctx)
         av_free(l->U2_base);
         av_free(l->V2_base);
     }
+    av_freep(&l->buffer);
+    l->buffer_size = 0;
 
     av_frame_free(&l->pic);
 
diff --git a/libavcodec/truespeech.c b/libavcodec/truespeech.c
index 34b7c3b..b00a45f 100644
--- a/libavcodec/truespeech.c
+++ b/libavcodec/truespeech.c
@@ -2,20 +2,20 @@
  * DSP Group TrueSpeech compatible decoder
  * Copyright (c) 2005 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -325,10 +325,8 @@ static int truespeech_decode_frame(AVCodecContext *avctx, void *data,
 
     /* get output buffer */
     frame->nb_samples = iterations * 240;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (int16_t *)frame->data[0];
 
     memset(samples, 0, iterations * 240 * sizeof(*samples));
diff --git a/libavcodec/truespeech_data.h b/libavcodec/truespeech_data.h
index 6e9806a..73ebda5 100644
--- a/libavcodec/truespeech_data.h
+++ b/libavcodec/truespeech_data.h
@@ -2,20 +2,20 @@
  * DSP Group TrueSpeech compatible decoder
  * copyright (c) 2005 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/tscc.c b/libavcodec/tscc.c
index 16d0770..a7a0c2e 100644
--- a/libavcodec/tscc.c
+++ b/libavcodec/tscc.c
@@ -2,20 +2,20 @@
  * TechSmith Camtasia decoder
  * Copyright (c) 2004 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,6 +47,7 @@
 typedef struct TsccContext {
 
     AVCodecContext *avctx;
+    AVFrame *frame;
 
     // Bits per pixel
     int bpp;
@@ -67,13 +68,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     CamtasiaContext * const c = avctx->priv_data;
-    AVFrame *frame = data;
+    AVFrame *frame = c->frame;
     int ret;
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0){
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, frame)) < 0)
         return ret;
-    }
 
     ret = inflateReset(&c->zstream);
     if (ret != Z_OK) {
@@ -109,6 +108,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         memcpy(frame->data[1], c->pal, AVPALETTE_SIZE);
     }
 
+    if ((ret = av_frame_ref(data, frame)) < 0)
+        return ret;
     *got_frame      = 1;
 
     /* always report that the buffer was completely consumed */
@@ -134,7 +135,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
              break;
     case 32: avctx->pix_fmt = AV_PIX_FMT_RGB32; break;
     default: av_log(avctx, AV_LOG_ERROR, "Camtasia error: unknown depth %i bpp\n", avctx->bits_per_coded_sample);
-             return AVERROR_INVALIDDATA;
+             return AVERROR_PATCHWELCOME;
     }
     c->bpp = avctx->bits_per_coded_sample;
     // buffer size for RLE 'best' case when 2-byte code precedes each pixel and there may be padding after it too
@@ -157,6 +158,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
         return AVERROR_UNKNOWN;
     }
 
+    c->frame = av_frame_alloc();
+
     return 0;
 }
 
@@ -165,6 +168,7 @@ static av_cold int decode_end(AVCodecContext *avctx)
     CamtasiaContext * const c = avctx->priv_data;
 
     av_freep(&c->decomp_buf);
+    av_frame_free(&c->frame);
 
     inflateEnd(&c->zstream);
 
diff --git a/libavcodec/tscc2.c b/libavcodec/tscc2.c
index bd1854b..92210f7 100644
--- a/libavcodec/tscc2.c
+++ b/libavcodec/tscc2.c
@@ -2,20 +2,20 @@
  * TechSmith Screen Codec 2 (aka Dora) decoder
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -90,14 +90,14 @@ static av_cold int init_vlcs(TSCC2Context *c)
     return 0;
 }
 
-#define DEQUANT(val, q) ((q * val + 0x80) >> 8)
+#define DEQUANT(val, q) (((q) * (val) + 0x80) >> 8)
 #define DCT1D(d0, d1, d2, d3, s0, s1, s2, s3, OP) \
     OP(d0, 5 * ((s0) + (s1) + (s2)) + 2 * (s3));  \
     OP(d1, 5 * ((s0) - (s2) - (s3)) + 2 * (s1));  \
     OP(d2, 5 * ((s0) - (s2) + (s3)) - 2 * (s1));  \
     OP(d3, 5 * ((s0) - (s1) + (s2)) - 2 * (s3));  \
 
-#define COL_OP(a, b)  a = b
+#define COL_OP(a, b)  a = (b)
 #define ROW_OP(a, b)  a = ((b) + 0x20) >> 6
 
 static void tscc2_idct4_put(int *in, int q[3], uint8_t *dst, int stride)
@@ -194,7 +194,8 @@ static int tscc2_decode_slice(TSCC2Context *c, int mb_y,
     int i, mb_x, q, ret;
     int off;
 
-    init_get_bits(&c->gb, buf, buf_size * 8);
+    if ((ret = init_get_bits8(&c->gb, buf, buf_size)) < 0)
+        return ret;
 
     for (mb_x = 0; mb_x < c->mb_width; mb_x++) {
         q = c->slice_quants[mb_x + c->mb_width * mb_y];
@@ -234,7 +235,6 @@ static int tscc2_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     if ((ret = ff_reget_buffer(avctx, c->pic)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
         return ret;
     }
 
diff --git a/libavcodec/tscc2data.h b/libavcodec/tscc2data.h
index 70a06e5..4586da7 100644
--- a/libavcodec/tscc2data.h
+++ b/libavcodec/tscc2data.h
@@ -2,20 +2,20 @@
  * TechSmith Screen Codec 2 (aka Dora) decoder
  * Copyright (c) 2012 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/tta.c b/libavcodec/tta.c
index 4d2e2a0..5fdbac8 100644
--- a/libavcodec/tta.c
+++ b/libavcodec/tta.c
@@ -2,20 +2,20 @@
  * TTA (The Lossless True Audio) decoder
  * Copyright (c) 2006 Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,35 +29,23 @@
 
 #define BITSTREAM_READER_LE
 #include <limits.h>
+#include "ttadata.h"
+#include "ttadsp.h"
 #include "avcodec.h"
 #include "get_bits.h"
+#include "thread.h"
+#include "unary.h"
 #include "internal.h"
 #include "libavutil/crc.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
 
 #define FORMAT_SIMPLE    1
 #define FORMAT_ENCRYPTED 2
 
-#define MAX_ORDER 16
-typedef struct TTAFilter {
-    int32_t shift, round, error;
-    int32_t qm[MAX_ORDER];
-    int32_t dx[MAX_ORDER];
-    int32_t dl[MAX_ORDER];
-} TTAFilter;
-
-typedef struct TTARice {
-    uint32_t k0, k1, sum0, sum1;
-} TTARice;
-
-typedef struct TTAChannel {
-    int32_t predictor;
-    TTAFilter filter;
-    TTARice rice;
-} TTAChannel;
-
 typedef struct TTAContext {
+    AVClass *class;
     AVCodecContext *avctx;
-    GetBitContext gb;
     const AVCRC *crc_table;
 
     int format, channels, bps;
@@ -66,128 +54,65 @@ typedef struct TTAContext {
 
     int32_t *decode_buffer;
 
+    uint8_t crc_pass[8];
+    uint8_t *pass;
     TTAChannel *ch_ctx;
+    TTADSPContext dsp;
 } TTAContext;
 
-static const uint32_t shift_1[] = {
-    0x00000001, 0x00000002, 0x00000004, 0x00000008,
-    0x00000010, 0x00000020, 0x00000040, 0x00000080,
-    0x00000100, 0x00000200, 0x00000400, 0x00000800,
-    0x00001000, 0x00002000, 0x00004000, 0x00008000,
-    0x00010000, 0x00020000, 0x00040000, 0x00080000,
-    0x00100000, 0x00200000, 0x00400000, 0x00800000,
-    0x01000000, 0x02000000, 0x04000000, 0x08000000,
-    0x10000000, 0x20000000, 0x40000000, 0x80000000,
-    0x80000000, 0x80000000, 0x80000000, 0x80000000,
-    0x80000000, 0x80000000, 0x80000000, 0x80000000
-};
-
-static const uint32_t * const shift_16 = shift_1 + 4;
-
-static const int32_t ttafilter_configs[4] = {
-    10,
-    9,
-    10,
-    12
+static const int64_t tta_channel_layouts[7] = {
+    AV_CH_LAYOUT_STEREO,
+    AV_CH_LAYOUT_STEREO|AV_CH_LOW_FREQUENCY,
+    AV_CH_LAYOUT_QUAD,
+    0,
+    AV_CH_LAYOUT_5POINT1_BACK,
+    AV_CH_LAYOUT_5POINT1_BACK|AV_CH_BACK_CENTER,
+    AV_CH_LAYOUT_7POINT1_WIDE
 };
 
-static void ttafilter_init(TTAFilter *c, int32_t shift) {
-    memset(c, 0, sizeof(TTAFilter));
-    c->shift = shift;
-   c->round = shift_1[shift-1];
-//    c->round = 1 << (shift - 1);
-}
-
-// FIXME: copy paste from original
-static inline void memshl(register int32_t *a, register int32_t *b) {
-    *a++ = *b++;
-    *a++ = *b++;
-    *a++ = *b++;
-    *a++ = *b++;
-    *a++ = *b++;
-    *a++ = *b++;
-    *a++ = *b++;
-    *a = *b;
-}
-
-static inline void ttafilter_process(TTAFilter *c, int32_t *in)
+static int tta_check_crc(TTAContext *s, const uint8_t *buf, int buf_size)
 {
-    register int32_t *dl = c->dl, *qm = c->qm, *dx = c->dx, sum = c->round;
-
-    if (!c->error) {
-        sum += *dl++ * *qm, qm++;
-        sum += *dl++ * *qm, qm++;
-        sum += *dl++ * *qm, qm++;
-        sum += *dl++ * *qm, qm++;
-        sum += *dl++ * *qm, qm++;
-        sum += *dl++ * *qm, qm++;
-        sum += *dl++ * *qm, qm++;
-        sum += *dl++ * *qm, qm++;
-        dx += 8;
-    } else if(c->error < 0) {
-        sum += *dl++ * (*qm -= *dx++), qm++;
-        sum += *dl++ * (*qm -= *dx++), qm++;
-        sum += *dl++ * (*qm -= *dx++), qm++;
-        sum += *dl++ * (*qm -= *dx++), qm++;
-        sum += *dl++ * (*qm -= *dx++), qm++;
-        sum += *dl++ * (*qm -= *dx++), qm++;
-        sum += *dl++ * (*qm -= *dx++), qm++;
-        sum += *dl++ * (*qm -= *dx++), qm++;
-    } else {
-        sum += *dl++ * (*qm += *dx++), qm++;
-        sum += *dl++ * (*qm += *dx++), qm++;
-        sum += *dl++ * (*qm += *dx++), qm++;
-        sum += *dl++ * (*qm += *dx++), qm++;
-        sum += *dl++ * (*qm += *dx++), qm++;
-        sum += *dl++ * (*qm += *dx++), qm++;
-        sum += *dl++ * (*qm += *dx++), qm++;
-        sum += *dl++ * (*qm += *dx++), qm++;
-    }
-
-    *(dx-0) = ((*(dl-1) >> 30) | 1) << 2;
-    *(dx-1) = ((*(dl-2) >> 30) | 1) << 1;
-    *(dx-2) = ((*(dl-3) >> 30) | 1) << 1;
-    *(dx-3) = ((*(dl-4) >> 30) | 1);
-
-    c->error = *in;
-    *in += (sum >> c->shift);
-    *dl = *in;
-
-    *(dl-1) = *dl - *(dl-1);
-    *(dl-2) = *(dl-1) - *(dl-2);
-    *(dl-3) = *(dl-2) - *(dl-3);
+    uint32_t crc, CRC;
 
-    memshl(c->dl, c->dl + 1);
-    memshl(c->dx, c->dx + 1);
-}
+    CRC = AV_RL32(buf + buf_size);
+    crc = av_crc(s->crc_table, 0xFFFFFFFFU, buf, buf_size);
+    if (CRC != (crc ^ 0xFFFFFFFFU)) {
+        av_log(s->avctx, AV_LOG_ERROR, "CRC error\n");
+        return AVERROR_INVALIDDATA;
+    }
 
-static void rice_init(TTARice *c, uint32_t k0, uint32_t k1)
-{
-    c->k0 = k0;
-    c->k1 = k1;
-    c->sum0 = shift_16[k0];
-    c->sum1 = shift_16[k1];
+    return 0;
 }
 
-static int tta_get_unary(GetBitContext *gb)
+static uint64_t tta_check_crc64(uint8_t *pass)
 {
-    int ret = 0;
+    uint64_t crc = UINT64_MAX, poly = 0x42F0E1EBA9EA3693U;
+    uint8_t *end = pass + strlen(pass);
+    int i;
+
+    while (pass < end) {
+        crc ^= (uint64_t)*pass++ << 56;
+        for (i = 0; i < 8; i++)
+            crc = (crc << 1) ^ (poly & (((int64_t) crc) >> 63));
+    }
 
-    // count ones
-    while (get_bits_left(gb) > 0 && get_bits1(gb))
-        ret++;
-    return ret;
+    return crc ^ UINT64_MAX;
 }
 
-static int tta_check_crc(TTAContext *s, const uint8_t *buf, int buf_size)
+static int allocate_buffers(AVCodecContext *avctx)
 {
-    uint32_t crc, CRC;
+    TTAContext *s = avctx->priv_data;
 
-    CRC = AV_RL32(buf + buf_size);
-    crc = av_crc(s->crc_table, 0xFFFFFFFFU, buf, buf_size);
-    if (CRC != (crc ^ 0xFFFFFFFFU)) {
-        av_log(s->avctx, AV_LOG_ERROR, "CRC error\n");
-        return AVERROR_INVALIDDATA;
+    if (s->bps < 3) {
+        s->decode_buffer = av_mallocz_array(sizeof(int32_t)*s->frame_length, s->channels);
+        if (!s->decode_buffer)
+            return AVERROR(ENOMEM);
+    } else
+        s->decode_buffer = NULL;
+    s->ch_ctx = av_malloc_array(avctx->channels, sizeof(*s->ch_ctx));
+    if (!s->ch_ctx) {
+        av_freep(&s->decode_buffer);
+        return AVERROR(ENOMEM);
     }
 
     return 0;
@@ -196,58 +121,59 @@ static int tta_check_crc(TTAContext *s, const uint8_t *buf, int buf_size)
 static av_cold int tta_decode_init(AVCodecContext * avctx)
 {
     TTAContext *s = avctx->priv_data;
+    GetBitContext gb;
     int total_frames;
 
     s->avctx = avctx;
 
-    // 30bytes includes a seektable with one frame
-    if (avctx->extradata_size < 30)
-        return -1;
-
-    init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size * 8);
-    if (show_bits_long(&s->gb, 32) == AV_RL32("TTA1"))
-    {
-        if (avctx->err_recognition & AV_EF_CRCCHECK) {
-            s->crc_table = av_crc_get_table(AV_CRC_32_IEEE_LE);
-            tta_check_crc(s, avctx->extradata, 18);
-        }
+    // 30bytes includes TTA1 header
+    if (avctx->extradata_size < 22)
+        return AVERROR_INVALIDDATA;
 
+    s->crc_table = av_crc_get_table(AV_CRC_32_IEEE_LE);
+    init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
+    if (show_bits_long(&gb, 32) == AV_RL32("TTA1")) {
         /* signature */
-        skip_bits_long(&s->gb, 32);
+        skip_bits_long(&gb, 32);
 
-        s->format = get_bits(&s->gb, 16);
+        s->format = get_bits(&gb, 16);
         if (s->format > 2) {
-            av_log(s->avctx, AV_LOG_ERROR, "Invalid format\n");
-            return -1;
+            av_log(avctx, AV_LOG_ERROR, "Invalid format\n");
+            return AVERROR_INVALIDDATA;
         }
         if (s->format == FORMAT_ENCRYPTED) {
-            avpriv_report_missing_feature(s->avctx, "Encrypted TTA");
-            return AVERROR_PATCHWELCOME;
+            if (!s->pass) {
+                av_log(avctx, AV_LOG_ERROR, "Missing password for encrypted stream. Please use the -password option\n");
+                return AVERROR(EINVAL);
+            }
+            AV_WL64(s->crc_pass, tta_check_crc64(s->pass));
         }
-        avctx->channels = s->channels = get_bits(&s->gb, 16);
-        avctx->bits_per_coded_sample = get_bits(&s->gb, 16);
-        s->bps = (avctx->bits_per_coded_sample + 7) / 8;
-        avctx->sample_rate = get_bits_long(&s->gb, 32);
-        s->data_length = get_bits_long(&s->gb, 32);
-        skip_bits_long(&s->gb, 32); // CRC32 of header
+        avctx->channels = s->channels = get_bits(&gb, 16);
+        if (s->channels > 1 && s->channels < 9)
+            avctx->channel_layout = tta_channel_layouts[s->channels-2];
+        avctx->bits_per_raw_sample = get_bits(&gb, 16);
+        s->bps = (avctx->bits_per_raw_sample + 7) / 8;
+        avctx->sample_rate = get_bits_long(&gb, 32);
+        s->data_length = get_bits_long(&gb, 32);
+        skip_bits_long(&gb, 32); // CRC32 of header
 
         if (s->channels == 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "Invalid number of channels\n");
+            av_log(avctx, AV_LOG_ERROR, "Invalid number of channels\n");
             return AVERROR_INVALIDDATA;
         } else if (avctx->sample_rate == 0) {
-            av_log(s->avctx, AV_LOG_ERROR, "Invalid samplerate\n");
+            av_log(avctx, AV_LOG_ERROR, "Invalid samplerate\n");
             return AVERROR_INVALIDDATA;
         }
 
         switch(s->bps) {
+        case 1: avctx->sample_fmt = AV_SAMPLE_FMT_U8; break;
         case 2:
             avctx->sample_fmt = AV_SAMPLE_FMT_S16;
-            avctx->bits_per_raw_sample = 16;
             break;
         case 3:
             avctx->sample_fmt = AV_SAMPLE_FMT_S32;
-            avctx->bits_per_raw_sample = 24;
             break;
+        //case 4: avctx->sample_fmt = AV_SAMPLE_FMT_S32; break;
         default:
             av_log(avctx, AV_LOG_ERROR, "Invalid/unsupported sample format.\n");
             return AVERROR_INVALIDDATA;
@@ -264,54 +190,35 @@ static av_cold int tta_decode_init(AVCodecContext * avctx)
         total_frames = s->data_length / s->frame_length +
                        (s->last_frame_length ? 1 : 0);
 
-        av_log(s->avctx, AV_LOG_DEBUG, "format: %d chans: %d bps: %d rate: %d block: %d\n",
+        av_log(avctx, AV_LOG_DEBUG, "format: %d chans: %d bps: %d rate: %d block: %d\n",
             s->format, avctx->channels, avctx->bits_per_coded_sample, avctx->sample_rate,
             avctx->block_align);
-        av_log(s->avctx, AV_LOG_DEBUG, "data_length: %d frame_length: %d last: %d total: %d\n",
+        av_log(avctx, AV_LOG_DEBUG, "data_length: %d frame_length: %d last: %d total: %d\n",
             s->data_length, s->frame_length, s->last_frame_length, total_frames);
 
-        // FIXME: seek table
-        if (avctx->extradata_size <= 26 || total_frames > INT_MAX / 4 ||
-            avctx->extradata_size - 26 < total_frames * 4)
-            av_log(avctx, AV_LOG_WARNING, "Seek table missing or too small\n");
-        else if (avctx->err_recognition & AV_EF_CRCCHECK) {
-            int ret = tta_check_crc(s, avctx->extradata + 22, total_frames * 4);
-            if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE)
-                return AVERROR_INVALIDDATA;
-        }
-        skip_bits_long(&s->gb, 32 * total_frames);
-        skip_bits_long(&s->gb, 32); // CRC32 of seektable
-
         if(s->frame_length >= UINT_MAX / (s->channels * sizeof(int32_t))){
             av_log(avctx, AV_LOG_ERROR, "frame_length too large\n");
-            return -1;
-        }
-
-        if (s->bps == 2) {
-            s->decode_buffer = av_mallocz(sizeof(int32_t)*s->frame_length*s->channels);
-            if (!s->decode_buffer)
-                return AVERROR(ENOMEM);
-        }
-        s->ch_ctx = av_malloc(avctx->channels * sizeof(*s->ch_ctx));
-        if (!s->ch_ctx) {
-            av_freep(&s->decode_buffer);
-            return AVERROR(ENOMEM);
+            return AVERROR_INVALIDDATA;
         }
     } else {
         av_log(avctx, AV_LOG_ERROR, "Wrong extradata present\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
-    return 0;
+    ff_ttadsp_init(&s->dsp);
+
+    return allocate_buffers(avctx);
 }
 
 static int tta_decode_frame(AVCodecContext *avctx, void *data,
                             int *got_frame_ptr, AVPacket *avpkt)
 {
     AVFrame *frame     = data;
+    ThreadFrame tframe = { .f = data };
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     TTAContext *s = avctx->priv_data;
+    GetBitContext gb;
     int i, ret;
     int cur_chan = 0, framelen = s->frame_length;
     int32_t *p;
@@ -322,14 +229,13 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data,
             return AVERROR_INVALIDDATA;
     }
 
-    init_get_bits(&s->gb, buf, buf_size*8);
+    if ((ret = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
+        return ret;
 
     /* get output buffer */
     frame->nb_samples = framelen;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
         return ret;
-    }
 
     // decode directly to output buffer for 24-bit sample format
     if (s->bps == 3)
@@ -337,9 +243,15 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data,
 
     // init per channel states
     for (i = 0; i < s->channels; i++) {
+        TTAFilter *filter = &s->ch_ctx[i].filter;
         s->ch_ctx[i].predictor = 0;
-        ttafilter_init(&s->ch_ctx[i].filter, ttafilter_configs[s->bps-1]);
-        rice_init(&s->ch_ctx[i].rice, 10, 10);
+        ff_tta_filter_init(filter, ff_tta_filter_configs[s->bps-1]);
+        if (s->format == FORMAT_ENCRYPTED) {
+            int i;
+            for (i = 0; i < 8; i++)
+                filter->qm[i] = sign_extend(s->crc_pass[i], 8);
+        }
+        ff_tta_rice_init(&s->ch_ctx[i].rice, 10, 10);
     }
 
     i = 0;
@@ -350,7 +262,7 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data,
         uint32_t unary, depth, k;
         int32_t value;
 
-        unary = tta_get_unary(&s->gb);
+        unary = get_unary(&gb, 0, get_bits_left(&gb));
 
         if (unary == 0) {
             depth = 0;
@@ -361,7 +273,7 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data,
             unary--;
         }
 
-        if (get_bits_left(&s->gb) < k) {
+        if (get_bits_left(&gb) < k) {
             ret = AVERROR_INVALIDDATA;
             goto error;
         }
@@ -371,7 +283,7 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data,
                 ret = AVERROR_INVALIDDATA;
                 goto error;
             }
-            value = (unary << k) + get_bits(&s->gb, k);
+            value = (unary << k) + get_bits(&gb, k);
         } else
             value = unary;
 
@@ -379,16 +291,16 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data,
         switch (depth) {
         case 1:
             rice->sum1 += value - (rice->sum1 >> 4);
-            if (rice->k1 > 0 && rice->sum1 < shift_16[rice->k1])
+            if (rice->k1 > 0 && rice->sum1 < ff_tta_shift_16[rice->k1])
                 rice->k1--;
-            else if(rice->sum1 > shift_16[rice->k1 + 1])
+            else if(rice->sum1 > ff_tta_shift_16[rice->k1 + 1])
                 rice->k1++;
-            value += shift_1[rice->k0];
+            value += ff_tta_shift_1[rice->k0];
         default:
             rice->sum0 += value - (rice->sum0 >> 4);
-            if (rice->k0 > 0 && rice->sum0 < shift_16[rice->k0])
+            if (rice->k0 > 0 && rice->sum0 < ff_tta_shift_16[rice->k0])
                 rice->k0--;
-            else if(rice->sum0 > shift_16[rice->k0 + 1])
+            else if(rice->sum0 > ff_tta_shift_16[rice->k0 + 1])
                 rice->k0++;
         }
 
@@ -396,10 +308,11 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data,
         *p = 1 + ((value >> 1) ^ ((value & 1) - 1));
 
         // run hybrid filter
-        ttafilter_process(filter, p);
+        s->dsp.ttafilter_process_dec(filter->qm, filter->dx, filter->dl, &filter->error, p,
+                                     filter->shift, filter->round);
 
         // fixed order prediction
-#define PRED(x, k) (int32_t)((((uint64_t)x << k) - x) >> k)
+#define PRED(x, k) (int32_t)((((uint64_t)(x) << (k)) - (x)) >> (k))
         switch (s->bps) {
         case 1: *p += PRED(*predictor, 4); break;
         case 2:
@@ -421,32 +334,43 @@ static int tta_decode_frame(AVCodecContext *avctx, void *data,
             cur_chan = 0;
             i++;
             // check for last frame
-            if (i == s->last_frame_length && get_bits_left(&s->gb) / 8 == 4) {
+            if (i == s->last_frame_length && get_bits_left(&gb) / 8 == 4) {
                 frame->nb_samples = framelen = s->last_frame_length;
                 break;
             }
         }
     }
 
-    align_get_bits(&s->gb);
-    if (get_bits_left(&s->gb) < 32) {
+    align_get_bits(&gb);
+    if (get_bits_left(&gb) < 32) {
         ret = AVERROR_INVALIDDATA;
         goto error;
     }
-    skip_bits_long(&s->gb, 32); // frame crc
+    skip_bits_long(&gb, 32); // frame crc
 
     // convert to output buffer
-    if (s->bps == 2) {
+    switch (s->bps) {
+    case 1: {
+        uint8_t *samples = (uint8_t *)frame->data[0];
+        for (p = s->decode_buffer; p < s->decode_buffer + (framelen * s->channels); p++)
+            *samples++ = *p + 0x80;
+        break;
+        }
+    case 2: {
         int16_t *samples = (int16_t *)frame->data[0];
         for (p = s->decode_buffer; p < s->decode_buffer + (framelen * s->channels); p++)
             *samples++ = *p;
-    } else {
+        break;
+        }
+    case 3: {
         // shift samples for 24-bit sample format
         int32_t *samples = (int32_t *)frame->data[0];
         for (i = 0; i < framelen * s->channels; i++)
             *samples++ <<= 8;
         // reset decode buffer
         s->decode_buffer = NULL;
+        break;
+        }
     }
 
     *got_frame_ptr = 1;
@@ -459,15 +383,38 @@ error:
     return ret;
 }
 
+static int init_thread_copy(AVCodecContext *avctx)
+{
+    TTAContext *s = avctx->priv_data;
+    s->avctx = avctx;
+    return allocate_buffers(avctx);
+}
+
 static av_cold int tta_decode_close(AVCodecContext *avctx) {
     TTAContext *s = avctx->priv_data;
 
-    av_free(s->decode_buffer);
+    if (s->bps < 3)
+        av_free(s->decode_buffer);
+    s->decode_buffer = NULL;
     av_freep(&s->ch_ctx);
 
     return 0;
 }
 
+#define OFFSET(x) offsetof(TTAContext, x)
+#define DEC (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
+static const AVOption options[] = {
+    { "password", "Set decoding password", OFFSET(pass), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC },
+    { NULL },
+};
+
+static const AVClass tta_decoder_class = {
+    .class_name = "TTA Decoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_tta_decoder = {
     .name           = "tta",
     .long_name      = NULL_IF_CONFIG_SMALL("TTA (True Audio)"),
@@ -477,5 +424,7 @@ AVCodec ff_tta_decoder = {
     .init           = tta_decode_init,
     .close          = tta_decode_close,
     .decode         = tta_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .priv_class     = &tta_decoder_class,
 };
diff --git a/libavcodec/ttadata.c b/libavcodec/ttadata.c
new file mode 100644
index 0000000..bf793a4
--- /dev/null
+++ b/libavcodec/ttadata.c
@@ -0,0 +1,52 @@
+/*
+ * TTA (The Lossless True Audio) data
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "ttadata.h"
+
+const uint32_t ff_tta_shift_1[] = {
+    0x00000001, 0x00000002, 0x00000004, 0x00000008,
+    0x00000010, 0x00000020, 0x00000040, 0x00000080,
+    0x00000100, 0x00000200, 0x00000400, 0x00000800,
+    0x00001000, 0x00002000, 0x00004000, 0x00008000,
+    0x00010000, 0x00020000, 0x00040000, 0x00080000,
+    0x00100000, 0x00200000, 0x00400000, 0x00800000,
+    0x01000000, 0x02000000, 0x04000000, 0x08000000,
+    0x10000000, 0x20000000, 0x40000000, 0x80000000,
+    0x80000000, 0x80000000, 0x80000000, 0x80000000,
+    0x80000000, 0x80000000, 0x80000000, 0x80000000
+};
+
+const uint32_t * const ff_tta_shift_16 = ff_tta_shift_1 + 4;
+
+const uint8_t ff_tta_filter_configs[] = { 10, 9, 10, 12 };
+
+void ff_tta_rice_init(TTARice *c, uint32_t k0, uint32_t k1)
+{
+    c->k0 = k0;
+    c->k1 = k1;
+    c->sum0 = ff_tta_shift_16[k0];
+    c->sum1 = ff_tta_shift_16[k1];
+}
+
+void ff_tta_filter_init(TTAFilter *c, int32_t shift) {
+    memset(c, 0, sizeof(TTAFilter));
+    c->shift = shift;
+    c->round = ff_tta_shift_1[shift-1];
+}
diff --git a/libavcodec/ttadata.h b/libavcodec/ttadata.h
new file mode 100644
index 0000000..48c4cd0
--- /dev/null
+++ b/libavcodec/ttadata.h
@@ -0,0 +1,50 @@
+/*
+ * TTA (The Lossless True Audio) data
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_TTADATA_H
+#define AVCODEC_TTADATA_H
+
+#include "internal.h"
+
+#define MAX_ORDER 16
+typedef struct TTAFilter {
+    int32_t shift, round, error;
+    int32_t qm[MAX_ORDER];
+    int32_t dx[MAX_ORDER];
+    int32_t dl[MAX_ORDER];
+} TTAFilter;
+
+typedef struct TTARice {
+    uint32_t k0, k1, sum0, sum1;
+} TTARice;
+
+typedef struct TTAChannel {
+    int32_t predictor;
+    TTAFilter filter;
+    TTARice rice;
+} TTAChannel;
+
+extern const uint32_t ff_tta_shift_1[];
+extern const uint32_t * const ff_tta_shift_16;
+extern const uint8_t ff_tta_filter_configs[];
+
+void ff_tta_rice_init(TTARice *c, uint32_t k0, uint32_t k1);
+void ff_tta_filter_init(TTAFilter *c, int32_t shift);
+#endif /* AVCODEC_TTADATA_H */
diff --git a/libavcodec/ttadsp.c b/libavcodec/ttadsp.c
new file mode 100644
index 0000000..30b7ab9
--- /dev/null
+++ b/libavcodec/ttadsp.c
@@ -0,0 +1,57 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "ttadsp.h"
+
+static void ttafilter_process_dec_c(int32_t *qm, int32_t *dx, int32_t *dl,
+                                    int32_t *error, int32_t *in, int32_t shift,
+                                    int32_t round) {
+    if (*error < 0) {
+        qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3];
+        qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7];
+    } else if (*error > 0) {
+        qm[0] += dx[0]; qm[1] += dx[1]; qm[2] += dx[2]; qm[3] += dx[3];
+        qm[4] += dx[4]; qm[5] += dx[5]; qm[6] += dx[6]; qm[7] += dx[7];
+    }
+
+    round +=    dl[0] * qm[0] + dl[1] * qm[1] + dl[2] * qm[2] + dl[3] * qm[3] +
+                dl[4] * qm[4] + dl[5] * qm[5] + dl[6] * qm[6] + dl[7] * qm[7];
+
+    dx[0] = dx[1]; dx[1] = dx[2]; dx[2] = dx[3]; dx[3] = dx[4];
+    dl[0] = dl[1]; dl[1] = dl[2]; dl[2] = dl[3]; dl[3] = dl[4];
+
+    dx[4] = ((dl[4] >> 30) | 1);
+    dx[5] = ((dl[5] >> 30) | 2) & ~1;
+    dx[6] = ((dl[6] >> 30) | 2) & ~1;
+    dx[7] = ((dl[7] >> 30) | 4) & ~3;
+
+    *error = *in;
+    *in += (round >> shift);
+
+    dl[4] = -dl[5]; dl[5] = -dl[6];
+    dl[6] = *in - dl[7]; dl[7] = *in;
+    dl[5] += dl[6]; dl[4] += dl[5];
+}
+
+av_cold void ff_ttadsp_init(TTADSPContext *c)
+{
+    c->ttafilter_process_dec = ttafilter_process_dec_c;
+
+    if (ARCH_X86)
+        ff_ttadsp_init_x86(c);
+}
diff --git a/libavcodec/ttadsp.h b/libavcodec/ttadsp.h
new file mode 100644
index 0000000..56930f1
--- /dev/null
+++ b/libavcodec/ttadsp.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_TTADSP_H
+#define AVCODEC_TTADSP_H
+
+#include <stdint.h>
+#include "ttadata.h"
+
+typedef struct TTADSPContext {
+    void (*ttafilter_process_dec)(int32_t *qm, int32_t *dx, int32_t *dl,
+                                  int32_t *error, int32_t *in, int32_t shift,
+                                  int32_t round);
+} TTADSPContext;
+
+void ff_ttadsp_init(TTADSPContext *c);
+void ff_ttadsp_init_x86(TTADSPContext *c);
+
+#endif /* AVCODEC_TTADSP_H */
diff --git a/libavcodec/ttaenc.c b/libavcodec/ttaenc.c
new file mode 100644
index 0000000..ccd41a9
--- /dev/null
+++ b/libavcodec/ttaenc.c
@@ -0,0 +1,232 @@
+/*
+ * TTA (The Lossless True Audio) encoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BITSTREAM_WRITER_LE
+#include "ttadata.h"
+#include "avcodec.h"
+#include "put_bits.h"
+#include "internal.h"
+#include "libavutil/crc.h"
+
+typedef struct TTAEncContext {
+    const AVCRC *crc_table;
+    int bps;
+    TTAChannel *ch_ctx;
+} TTAEncContext;
+
+static av_cold int tta_encode_init(AVCodecContext *avctx)
+{
+    TTAEncContext *s = avctx->priv_data;
+
+    s->crc_table = av_crc_get_table(AV_CRC_32_IEEE_LE);
+
+    switch (avctx->sample_fmt) {
+    case AV_SAMPLE_FMT_U8:
+        avctx->bits_per_raw_sample = 8;
+        break;
+    case AV_SAMPLE_FMT_S16:
+        avctx->bits_per_raw_sample = 16;
+        break;
+    case AV_SAMPLE_FMT_S32:
+        if (avctx->bits_per_raw_sample > 24)
+            av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
+        avctx->bits_per_raw_sample = 24;
+    }
+
+    s->bps = avctx->bits_per_raw_sample >> 3;
+    avctx->frame_size = 256 * avctx->sample_rate / 245;
+
+    s->ch_ctx = av_malloc_array(avctx->channels, sizeof(*s->ch_ctx));
+    if (!s->ch_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static inline void ttafilter_process(TTAFilter *c, int32_t *in)
+{
+    register int32_t *dl = c->dl, *qm = c->qm, *dx = c->dx, sum = c->round;
+
+    if (c->error < 0) {
+        qm[0] -= dx[0]; qm[1] -= dx[1]; qm[2] -= dx[2]; qm[3] -= dx[3];
+        qm[4] -= dx[4]; qm[5] -= dx[5]; qm[6] -= dx[6]; qm[7] -= dx[7];
+    } else if (c->error > 0) {
+        qm[0] += dx[0]; qm[1] += dx[1]; qm[2] += dx[2]; qm[3] += dx[3];
+        qm[4] += dx[4]; qm[5] += dx[5]; qm[6] += dx[6]; qm[7] += dx[7];
+    }
+
+    sum += dl[0] * qm[0] + dl[1] * qm[1] + dl[2] * qm[2] + dl[3] * qm[3] +
+           dl[4] * qm[4] + dl[5] * qm[5] + dl[6] * qm[6] + dl[7] * qm[7];
+
+    dx[0] = dx[1]; dx[1] = dx[2]; dx[2] = dx[3]; dx[3] = dx[4];
+    dl[0] = dl[1]; dl[1] = dl[2]; dl[2] = dl[3]; dl[3] = dl[4];
+
+    dx[4] = ((dl[4] >> 30) | 1);
+    dx[5] = ((dl[5] >> 30) | 2) & ~1;
+    dx[6] = ((dl[6] >> 30) | 2) & ~1;
+    dx[7] = ((dl[7] >> 30) | 4) & ~3;
+
+    dl[4] = -dl[5]; dl[5] = -dl[6];
+    dl[6] = *in - dl[7]; dl[7] = *in;
+    dl[5] += dl[6]; dl[4] += dl[5];
+
+    *in -= (sum >> c->shift);
+    c->error = *in;
+}
+
+static int32_t get_sample(const AVFrame *frame, int sample,
+                          enum AVSampleFormat format)
+{
+    int32_t ret;
+
+    if (format == AV_SAMPLE_FMT_U8) {
+        ret = frame->data[0][sample] - 0x80;
+    } else if (format == AV_SAMPLE_FMT_S16) {
+        const int16_t *ptr = (const int16_t *)frame->data[0];
+        ret = ptr[sample];
+    } else {
+        const int32_t *ptr = (const int32_t *)frame->data[0];
+        ret = ptr[sample] >> 8;
+    }
+
+    return ret;
+}
+
+static int tta_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                            const AVFrame *frame, int *got_packet_ptr)
+{
+    TTAEncContext *s = avctx->priv_data;
+    PutBitContext pb;
+    int ret, i, out_bytes, cur_chan = 0, res = 0, samples = 0;
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, frame->nb_samples * 2 * avctx->channels * s->bps)) < 0)
+        return ret;
+    init_put_bits(&pb, avpkt->data, avpkt->size);
+
+    // init per channel states
+    for (i = 0; i < avctx->channels; i++) {
+        s->ch_ctx[i].predictor = 0;
+        ff_tta_filter_init(&s->ch_ctx[i].filter, ff_tta_filter_configs[s->bps - 1]);
+        ff_tta_rice_init(&s->ch_ctx[i].rice, 10, 10);
+    }
+
+    for (i = 0; i < frame->nb_samples * avctx->channels; i++) {
+        TTAChannel *c = &s->ch_ctx[cur_chan];
+        TTAFilter *filter = &c->filter;
+        TTARice *rice = &c->rice;
+        uint32_t k, unary, outval;
+        int32_t value, temp;
+
+        value = get_sample(frame, samples++, avctx->sample_fmt);
+
+        if (avctx->channels > 1) {
+            if (cur_chan < avctx->channels - 1)
+                value  = res = get_sample(frame, samples, avctx->sample_fmt) - value;
+            else
+                value -= res / 2;
+        }
+
+        temp = value;
+#define PRED(x, k) (int32_t)((((uint64_t)(x) << (k)) - (x)) >> (k))
+        switch (s->bps) {
+        case 1: value -= PRED(c->predictor, 4); break;
+        case 2:
+        case 3: value -= PRED(c->predictor, 5); break;
+        }
+        c->predictor = temp;
+
+        ttafilter_process(filter, &value);
+        outval = (value > 0) ? (value << 1) - 1: -value << 1;
+
+        k = rice->k0;
+
+        rice->sum0 += outval - (rice->sum0 >> 4);
+        if (rice->k0 > 0 && rice->sum0 < ff_tta_shift_16[rice->k0])
+            rice->k0--;
+        else if (rice->sum0 > ff_tta_shift_16[rice->k0 + 1])
+            rice->k0++;
+
+        if (outval >= ff_tta_shift_1[k]) {
+            outval -= ff_tta_shift_1[k];
+            k = rice->k1;
+
+            rice->sum1 += outval - (rice->sum1 >> 4);
+            if (rice->k1 > 0 && rice->sum1 < ff_tta_shift_16[rice->k1])
+                rice->k1--;
+            else if (rice->sum1 > ff_tta_shift_16[rice->k1 + 1])
+                rice->k1++;
+
+            unary = 1 + (outval >> k);
+            do {
+                if (unary > 31) {
+                    put_bits(&pb, 31, 0x7FFFFFFF);
+                    unary -= 31;
+                } else {
+                    put_bits(&pb, unary, (1 << unary) - 1);
+                    unary = 0;
+                }
+            } while (unary);
+        }
+
+        put_bits(&pb, 1, 0);
+
+        if (k)
+            put_bits(&pb, k, outval & (ff_tta_shift_1[k] - 1));
+
+        if (cur_chan < avctx->channels - 1)
+            cur_chan++;
+        else
+            cur_chan = 0;
+    }
+
+    flush_put_bits(&pb);
+    out_bytes = put_bits_count(&pb) >> 3;
+    put_bits32(&pb, av_crc(s->crc_table, UINT32_MAX, avpkt->data, out_bytes) ^ UINT32_MAX);
+    flush_put_bits(&pb);
+
+    avpkt->pts      = frame->pts;
+    avpkt->size     = out_bytes + 4;
+    avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
+    *got_packet_ptr = 1;
+    return 0;
+}
+
+static av_cold int tta_encode_close(AVCodecContext *avctx)
+{
+    TTAEncContext *s = avctx->priv_data;
+    av_freep(&s->ch_ctx);
+    return 0;
+}
+
+AVCodec ff_tta_encoder = {
+    .name           = "tta",
+    .long_name      = NULL_IF_CONFIG_SMALL("TTA (True Audio)"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_TTA,
+    .priv_data_size = sizeof(TTAEncContext),
+    .init           = tta_encode_init,
+    .close          = tta_encode_close,
+    .encode2        = tta_encode_frame,
+    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_LOSSLESS,
+    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_U8,
+                                                     AV_SAMPLE_FMT_S16,
+                                                     AV_SAMPLE_FMT_S32,
+                                                     AV_SAMPLE_FMT_NONE },
+};
diff --git a/libavcodec/twinvq.c b/libavcodec/twinvq.c
index e1b1c7b..08a7a9f 100644
--- a/libavcodec/twinvq.c
+++ b/libavcodec/twinvq.c
@@ -2,20 +2,20 @@
  * TwinVQ decoder
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -487,10 +487,8 @@ int ff_twinvq_decode_frame(AVCodecContext *avctx, void *data,
     /* get output buffer */
     if (tctx->discarded_packets >= 2) {
         frame->nb_samples = mtab->size * tctx->frames_per_packet;
-        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
             return ret;
-        }
         out = (float **)frame->extended_data;
     }
 
diff --git a/libavcodec/twinvq.h b/libavcodec/twinvq.h
index 7601e5b..c4e9688 100644
--- a/libavcodec/twinvq.h
+++ b/libavcodec/twinvq.h
@@ -2,20 +2,20 @@
  * TwinVQ decoder
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/twinvq_data.h b/libavcodec/twinvq_data.h
index 01a54a5..375acc2 100644
--- a/libavcodec/twinvq_data.h
+++ b/libavcodec/twinvq_data.h
@@ -2,20 +2,20 @@
  * TwinVQ decoder
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/twinvqdec.c b/libavcodec/twinvqdec.c
index 65028a6..67ce031 100644
--- a/libavcodec/twinvqdec.c
+++ b/libavcodec/twinvqdec.c
@@ -2,20 +2,20 @@
  * TwinVQ decoder
  * Copyright (c) 2009 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -312,7 +312,7 @@ static int twinvq_read_bitstream(AVCodecContext *avctx, TwinVQContext *tctx,
         }
     }
 
-    return 0;
+    return (get_bits_count(&gb) + 7) / 8;
 }
 
 static av_cold int twinvq_decode_init(AVCodecContext *avctx)
diff --git a/libavcodec/txd.c b/libavcodec/txd.c
index 8f12291..ad1a015 100644
--- a/libavcodec/txd.c
+++ b/libavcodec/txd.c
@@ -4,27 +4,27 @@
  *
  * See also: http://wiki.multimedia.cx/index.php?title=TXD
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
-#include "avcodec.h"
 #include "bytestream.h"
+#include "avcodec.h"
 #include "internal.h"
 #include "s3tc.h"
 
@@ -66,10 +66,8 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
         return ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
 
     p->pict_type = AV_PICTURE_TYPE_I;
 
@@ -82,6 +80,8 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             v = bytestream2_get_be32(&gb);
             pal[y] = (v >> 8) + (v << 24);
         }
+        if (bytestream2_get_bytes_left(&gb) < w * h)
+            return AVERROR_INVALIDDATA;
         bytestream2_skip(&gb, 4);
         for (y=0; y<h; y++) {
             bytestream2_get_buffer(&gb, ptr, w);
@@ -94,9 +94,13 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             if (!(flags & 1))
                 goto unsupported;
         case FF_S3TC_DXT1:
+            if (bytestream2_get_bytes_left(&gb) < (w/4) * (h/4) * 8)
+                return AVERROR_INVALIDDATA;
             ff_decode_dxt1(&gb, ptr, w, h, stride);
             break;
         case FF_S3TC_DXT3:
+            if (bytestream2_get_bytes_left(&gb) < (w/4) * (h/4) * 16)
+                return AVERROR_INVALIDDATA;
             ff_decode_dxt3(&gb, ptr, w, h, stride);
             break;
         default:
@@ -106,6 +110,8 @@ static int txd_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         switch (d3d_format) {
         case 0x15:
         case 0x16:
+            if (bytestream2_get_bytes_left(&gb) < h * w * 4)
+                return AVERROR_INVALIDDATA;
             for (y=0; y<h; y++) {
                 bytestream2_get_buffer(&gb, ptr, w * 4);
                 ptr += stride;
diff --git a/libavcodec/ulti.c b/libavcodec/ulti.c
index 186f1a6..6b76214 100644
--- a/libavcodec/ulti.c
+++ b/libavcodec/ulti.c
@@ -2,20 +2,20 @@
  * IBM Ultimotion Video Decoder
  * Copyright (C) 2004 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -227,10 +227,8 @@ static int ulti_decode_frame(AVCodecContext *avctx,
     int skip;
     int tmp;
 
-    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->frame)) < 0)
         return ret;
-    }
 
     bytestream2_init(&s->gb, buf, buf_size);
 
diff --git a/libavcodec/ulti_cb.h b/libavcodec/ulti_cb.h
index 0bd83ff..7061d83 100644
--- a/libavcodec/ulti_cb.h
+++ b/libavcodec/ulti_cb.h
@@ -2,20 +2,20 @@
  * IBM Ultimotion Video Decoder
  * copyright (C) 2004 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/unary.h b/libavcodec/unary.h
index d14929f..908dc93 100644
--- a/libavcodec/unary.h
+++ b/libavcodec/unary.h
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index dc7ffc4..6071380 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,9 +26,11 @@
  */
 
 #include "config.h"
+#include "libavutil/atomic.h"
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/crc.h"
 #include "libavutil/frame.h"
@@ -43,46 +45,139 @@
 #include "me_cmp.h"
 #include "mpegvideo.h"
 #include "thread.h"
+#include "frame_thread_encoder.h"
 #include "internal.h"
+#include "raw.h"
 #include "bytestream.h"
 #include "version.h"
 #include <stdlib.h>
 #include <stdarg.h>
 #include <limits.h>
 #include <float.h>
+#if CONFIG_ICONV
+# include <iconv.h>
+#endif
+
+#if HAVE_PTHREADS
+#include <pthread.h>
+#elif HAVE_W32THREADS
+#include "compat/w32pthreads.h"
+#elif HAVE_OS2THREADS
+#include "compat/os2threads.h"
+#endif
+
+#if HAVE_PTHREADS || HAVE_W32THREADS || HAVE_OS2THREADS
+static int default_lockmgr_cb(void **arg, enum AVLockOp op)
+{
+    void * volatile * mutex = arg;
+    int err;
+
+    switch (op) {
+    case AV_LOCK_CREATE:
+        return 0;
+    case AV_LOCK_OBTAIN:
+        if (!*mutex) {
+            pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t));
+            if (!tmp)
+                return AVERROR(ENOMEM);
+            if ((err = pthread_mutex_init(tmp, NULL))) {
+                av_free(tmp);
+                return AVERROR(err);
+            }
+            if (avpriv_atomic_ptr_cas(mutex, NULL, tmp)) {
+                pthread_mutex_destroy(tmp);
+                av_free(tmp);
+            }
+        }
+
+        if ((err = pthread_mutex_lock(*mutex)))
+            return AVERROR(err);
+
+        return 0;
+    case AV_LOCK_RELEASE:
+        if ((err = pthread_mutex_unlock(*mutex)))
+            return AVERROR(err);
+
+        return 0;
+    case AV_LOCK_DESTROY:
+        if (*mutex)
+            pthread_mutex_destroy(*mutex);
+        av_free(*mutex);
+        avpriv_atomic_ptr_cas(mutex, *mutex, NULL);
+        return 0;
+    }
+    return 1;
+}
+static int (*lockmgr_cb)(void **mutex, enum AVLockOp op) = default_lockmgr_cb;
+#else
+static int (*lockmgr_cb)(void **mutex, enum AVLockOp op) = NULL;
+#endif
 
+
+volatile int ff_avcodec_locked;
 static int volatile entangled_thread_counter = 0;
-static int (*lockmgr_cb)(void **mutex, enum AVLockOp op);
 static void *codec_mutex;
 static void *avformat_mutex;
 
+#if CONFIG_RAISE_MAJOR
+#    define LIBNAME "LIBAVCODEC_155"
+#else
+#    define LIBNAME "LIBAVCODEC_55"
+#endif
+
 #if FF_API_FAST_MALLOC && CONFIG_SHARED && HAVE_SYMVER
-FF_SYMVER(void*, av_fast_realloc, (void *ptr, unsigned int *size, size_t min_size), "LIBAVCODEC_55")
+FF_SYMVER(void*, av_fast_realloc, (void *ptr, unsigned int *size, size_t min_size), LIBNAME)
 {
     return av_fast_realloc(ptr, size, min_size);
 }
 
-FF_SYMVER(void, av_fast_malloc, (void *ptr, unsigned int *size, size_t min_size), "LIBAVCODEC_55")
+FF_SYMVER(void, av_fast_malloc, (void *ptr, unsigned int *size, size_t min_size), LIBNAME)
 {
     av_fast_malloc(ptr, size, min_size);
 }
 #endif
 
-void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size)
+static inline int ff_fast_malloc(void *ptr, unsigned int *size, size_t min_size, int zero_realloc)
 {
     void **p = ptr;
+    if (min_size < *size)
+        return 0;
+    min_size = FFMAX(17 * min_size / 16 + 32, min_size);
+    av_free(*p);
+    *p = zero_realloc ? av_mallocz(min_size) : av_malloc(min_size);
+    if (!*p)
+        min_size = 0;
+    *size = min_size;
+    return 1;
+}
+
+void av_fast_padded_malloc(void *ptr, unsigned int *size, size_t min_size)
+{
+    uint8_t **p = ptr;
     if (min_size > SIZE_MAX - FF_INPUT_BUFFER_PADDING_SIZE) {
         av_freep(p);
         *size = 0;
         return;
     }
-    av_fast_malloc(p, size, min_size + FF_INPUT_BUFFER_PADDING_SIZE);
-    if (*size)
-        memset((uint8_t *)*p + min_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!ff_fast_malloc(p, size, min_size + FF_INPUT_BUFFER_PADDING_SIZE, 1))
+        memset(*p + min_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+}
+
+void av_fast_padded_mallocz(void *ptr, unsigned int *size, size_t min_size)
+{
+    uint8_t **p = ptr;
+    if (min_size > SIZE_MAX - FF_INPUT_BUFFER_PADDING_SIZE) {
+        av_freep(p);
+        *size = 0;
+        return;
+    }
+    if (!ff_fast_malloc(p, size, min_size + FF_INPUT_BUFFER_PADDING_SIZE, 1))
+        memset(*p, 0, min_size + FF_INPUT_BUFFER_PADDING_SIZE);
 }
 
 /* encoder management */
 static AVCodec *first_avcodec = NULL;
+static AVCodec **last_avcodec = &first_avcodec;
 
 AVCodec *av_codec_next(const AVCodec *c)
 {
@@ -118,12 +213,13 @@ av_cold void avcodec_register(AVCodec *codec)
 {
     AVCodec **p;
     avcodec_init();
-    p = &first_avcodec;
-    while (*p != NULL)
-        p = &(*p)->next;
-    *p          = codec;
+    p = last_avcodec;
     codec->next = NULL;
 
+    while(*p || avpriv_atomic_ptr_cas((void * volatile *)p, NULL, codec))
+        p = &(*p)->next;
+    last_avcodec = &codec->next;
+
     if (codec->init_static_data)
         codec->init_static_data(codec);
 }
@@ -138,7 +234,10 @@ unsigned avcodec_get_edge_width(void)
 #if FF_API_SET_DIMENSIONS
 void avcodec_set_dimensions(AVCodecContext *s, int width, int height)
 {
-    ff_set_dimensions(s, width, height);
+    int ret = ff_set_dimensions(s, width, height);
+    if (ret < 0) {
+        av_log(s, AV_LOG_WARNING, "Failed to set dimensions %d %d\n", width, height);
+    }
 }
 #endif
 
@@ -148,8 +247,11 @@ int ff_set_dimensions(AVCodecContext *s, int width, int height)
 
     if (ret < 0)
         width = height = 0;
-    s->width  = s->coded_width  = width;
-    s->height = s->coded_height = height;
+
+    s->coded_width  = width;
+    s->coded_height = height;
+    s->width        = FF_CEIL_RSHIFT(width,  s->lowres);
+    s->height       = FF_CEIL_RSHIFT(height, s->lowres);
 
     return ret;
 }
@@ -189,12 +291,6 @@ int ff_side_data_update_matrix_encoding(AVFrame *frame,
     return 0;
 }
 
-#if HAVE_SIMD_ALIGN_16
-#   define STRIDE_ALIGN 16
-#else
-#   define STRIDE_ALIGN 8
-#endif
-
 void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
                                int linesize_align[AV_NUM_DATA_POINTERS])
 {
@@ -210,6 +306,7 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
     case AV_PIX_FMT_YUV422P:
     case AV_PIX_FMT_YUV440P:
     case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_GBRAP:
     case AV_PIX_FMT_GBRP:
     case AV_PIX_FMT_GRAY8:
     case AV_PIX_FMT_GRAY16BE:
@@ -225,26 +322,63 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
     case AV_PIX_FMT_YUV420P9BE:
     case AV_PIX_FMT_YUV420P10LE:
     case AV_PIX_FMT_YUV420P10BE:
+    case AV_PIX_FMT_YUV420P12LE:
+    case AV_PIX_FMT_YUV420P12BE:
+    case AV_PIX_FMT_YUV420P14LE:
+    case AV_PIX_FMT_YUV420P14BE:
+    case AV_PIX_FMT_YUV420P16LE:
+    case AV_PIX_FMT_YUV420P16BE:
+    case AV_PIX_FMT_YUVA420P9LE:
+    case AV_PIX_FMT_YUVA420P9BE:
+    case AV_PIX_FMT_YUVA420P10LE:
+    case AV_PIX_FMT_YUVA420P10BE:
+    case AV_PIX_FMT_YUVA420P16LE:
+    case AV_PIX_FMT_YUVA420P16BE:
     case AV_PIX_FMT_YUV422P9LE:
     case AV_PIX_FMT_YUV422P9BE:
     case AV_PIX_FMT_YUV422P10LE:
     case AV_PIX_FMT_YUV422P10BE:
+    case AV_PIX_FMT_YUV422P12LE:
+    case AV_PIX_FMT_YUV422P12BE:
+    case AV_PIX_FMT_YUV422P14LE:
+    case AV_PIX_FMT_YUV422P14BE:
+    case AV_PIX_FMT_YUV422P16LE:
+    case AV_PIX_FMT_YUV422P16BE:
+    case AV_PIX_FMT_YUVA422P9LE:
+    case AV_PIX_FMT_YUVA422P9BE:
     case AV_PIX_FMT_YUVA422P10LE:
     case AV_PIX_FMT_YUVA422P10BE:
+    case AV_PIX_FMT_YUVA422P16LE:
+    case AV_PIX_FMT_YUVA422P16BE:
     case AV_PIX_FMT_YUV444P9LE:
     case AV_PIX_FMT_YUV444P9BE:
     case AV_PIX_FMT_YUV444P10LE:
     case AV_PIX_FMT_YUV444P10BE:
+    case AV_PIX_FMT_YUV444P12LE:
+    case AV_PIX_FMT_YUV444P12BE:
+    case AV_PIX_FMT_YUV444P14LE:
+    case AV_PIX_FMT_YUV444P14BE:
+    case AV_PIX_FMT_YUV444P16LE:
+    case AV_PIX_FMT_YUV444P16BE:
+    case AV_PIX_FMT_YUVA444P9LE:
+    case AV_PIX_FMT_YUVA444P9BE:
     case AV_PIX_FMT_YUVA444P10LE:
     case AV_PIX_FMT_YUVA444P10BE:
+    case AV_PIX_FMT_YUVA444P16LE:
+    case AV_PIX_FMT_YUVA444P16BE:
     case AV_PIX_FMT_GBRP9LE:
     case AV_PIX_FMT_GBRP9BE:
     case AV_PIX_FMT_GBRP10LE:
     case AV_PIX_FMT_GBRP10BE:
+    case AV_PIX_FMT_GBRP12LE:
+    case AV_PIX_FMT_GBRP12BE:
+    case AV_PIX_FMT_GBRP14LE:
+    case AV_PIX_FMT_GBRP14BE:
         w_align = 16; //FIXME assume 16 pixel per macroblock
         h_align = 16 * 2; // interlaced needs 2 macroblocks height
         break;
     case AV_PIX_FMT_YUV411P:
+    case AV_PIX_FMT_YUVJ411P:
     case AV_PIX_FMT_UYYVYY411:
         w_align = 32;
         h_align = 8;
@@ -254,15 +388,18 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
             w_align = 64;
             h_align = 64;
         }
+        break;
     case AV_PIX_FMT_RGB555:
         if (s->codec_id == AV_CODEC_ID_RPZA) {
             w_align = 4;
             h_align = 4;
         }
+        break;
     case AV_PIX_FMT_PAL8:
     case AV_PIX_FMT_BGR8:
     case AV_PIX_FMT_RGB8:
-        if (s->codec_id == AV_CODEC_ID_SMC) {
+        if (s->codec_id == AV_CODEC_ID_SMC ||
+            s->codec_id == AV_CODEC_ID_CINEPAK) {
             w_align = 4;
             h_align = 4;
         }
@@ -274,16 +411,27 @@ void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
             h_align = 4;
         }
         break;
+    case AV_PIX_FMT_RGB24:
+        if (s->codec_id == AV_CODEC_ID_CINEPAK) {
+            w_align = 4;
+            h_align = 4;
+        }
+        break;
     default:
         w_align = 1;
         h_align = 1;
         break;
     }
 
+    if (s->codec_id == AV_CODEC_ID_IFF_ILBM || s->codec_id == AV_CODEC_ID_IFF_BYTERUN1) {
+        w_align = FFMAX(w_align, 8);
+    }
+
     *width  = FFALIGN(*width, w_align);
     *height = FFALIGN(*height, h_align);
-    if (s->codec_id == AV_CODEC_ID_H264)
+    if (s->codec_id == AV_CODEC_ID_H264 || s->lowres)
         // some of the optimized chroma MC reads one line too much
+        // which is also done in mpeg decoders with lowres > 0
         *height += 2;
 
     for (i = 0; i < 4; i++)
@@ -305,6 +453,29 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height)
     *width              = FFALIGN(*width, align);
 }
 
+int avcodec_enum_to_chroma_pos(int *xpos, int *ypos, enum AVChromaLocation pos)
+{
+    if (pos <= AVCHROMA_LOC_UNSPECIFIED || pos >= AVCHROMA_LOC_NB)
+        return AVERROR(EINVAL);
+    pos--;
+
+    *xpos = (pos&1) * 128;
+    *ypos = ((pos>>1)^(pos<4)) * 128;
+
+    return 0;
+}
+
+enum AVChromaLocation avcodec_chroma_pos_to_enum(int xpos, int ypos)
+{
+    int pos, xout, yout;
+
+    for (pos = AVCHROMA_LOC_UNSPECIFIED + 1; pos < AVCHROMA_LOC_NB; pos++) {
+        if (avcodec_enum_to_chroma_pos(&xout, &yout, pos) == 0 && xout == xpos && yout == ypos)
+            return pos;
+    }
+    return AVCHROMA_LOC_UNSPECIFIED;
+}
+
 int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
                              enum AVSampleFormat sample_fmt, const uint8_t *buf,
                              int buf_size, int align)
@@ -319,7 +490,7 @@ int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
 
     planar = av_sample_fmt_is_planar(sample_fmt);
     if (planar && nb_channels > AV_NUM_DATA_POINTERS) {
-        if (!(frame->extended_data = av_mallocz(nb_channels *
+        if (!(frame->extended_data = av_mallocz_array(nb_channels,
                                                 sizeof(*frame->extended_data))))
             return AVERROR(ENOMEM);
     } else {
@@ -327,10 +498,10 @@ int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels,
     }
 
     if ((ret = av_samples_fill_arrays(frame->extended_data, &frame->linesize[0],
-                                      buf, nb_channels, frame->nb_samples,
+                                      (uint8_t *)(intptr_t)buf, nb_channels, frame->nb_samples,
                                       sample_fmt, align)) < 0) {
         if (frame->extended_data != frame->data)
-            av_free(frame->extended_data);
+            av_freep(&frame->extended_data);
         return ret;
     }
     if (frame->extended_data != frame->data) {
@@ -385,7 +556,10 @@ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
             av_buffer_pool_uninit(&pool->pools[i]);
             pool->linesize[i] = picture.linesize[i];
             if (size[i]) {
-                pool->pools[i] = av_buffer_pool_init(size[i] + 16, NULL);
+                pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1,
+                                                     CONFIG_MEMORY_POISONING ?
+                                                        NULL :
+                                                        av_buffer_allocz);
                 if (!pool->pools[i]) {
                     ret = AVERROR(ENOMEM);
                     goto fail;
@@ -399,7 +573,7 @@ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame)
         break;
         }
     case AVMEDIA_TYPE_AUDIO: {
-        int ch     = av_get_channel_layout_nb_channels(frame->channel_layout);
+        int ch     = av_frame_get_channels(frame); //av_get_channel_layout_nb_channels(frame->channel_layout);
         int planar = av_sample_fmt_is_planar(frame->format);
         int planes = planar ? ch : 1;
 
@@ -446,17 +620,19 @@ static int audio_get_buffer(AVCodecContext *avctx, AVFrame *frame)
     frame->linesize[0] = pool->linesize[0];
 
     if (planes > AV_NUM_DATA_POINTERS) {
-        frame->extended_data = av_mallocz(planes * sizeof(*frame->extended_data));
+        frame->extended_data = av_mallocz_array(planes, sizeof(*frame->extended_data));
         frame->nb_extended_buf = planes - AV_NUM_DATA_POINTERS;
-        frame->extended_buf  = av_mallocz(frame->nb_extended_buf *
+        frame->extended_buf  = av_mallocz_array(frame->nb_extended_buf,
                                           sizeof(*frame->extended_buf));
         if (!frame->extended_data || !frame->extended_buf) {
             av_freep(&frame->extended_data);
             av_freep(&frame->extended_buf);
             return AVERROR(ENOMEM);
         }
-    } else
+    } else {
         frame->extended_data = frame->data;
+        av_assert0(frame->nb_extended_buf == 0);
+    }
 
     for (i = 0; i < FFMIN(planes, AV_NUM_DATA_POINTERS); i++) {
         frame->buf[i] = av_buffer_pool_get(pool->pools[0]);
@@ -518,6 +694,29 @@ fail:
     return AVERROR(ENOMEM);
 }
 
+void avpriv_color_frame(AVFrame *frame, const int c[4])
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+    int p, y, x;
+
+    av_assert0(desc->flags & AV_PIX_FMT_FLAG_PLANAR);
+
+    for (p = 0; p<desc->nb_components; p++) {
+        uint8_t *dst = frame->data[p];
+        int is_chroma = p == 1 || p == 2;
+        int bytes  = is_chroma ? FF_CEIL_RSHIFT(frame->width,  desc->log2_chroma_w) : frame->width;
+        int height = is_chroma ? FF_CEIL_RSHIFT(frame->height, desc->log2_chroma_h) : frame->height;
+        for (y = 0; y < height; y++) {
+            if (desc->comp[0].depth_minus1 >= 8) {
+                for (x = 0; x<bytes; x++)
+                    ((uint16_t*)dst)[x] = c[p];
+            }else
+                memset(dst, c[p], bytes);
+            dst += frame->linesize[p];
+        }
+    }
+}
+
 int avcodec_default_get_buffer2(AVCodecContext *avctx, AVFrame *frame, int flags)
 {
     int ret;
@@ -541,98 +740,67 @@ FF_ENABLE_DEPRECATION_WARNINGS
     }
 }
 
-#if FF_API_GET_BUFFER
-FF_DISABLE_DEPRECATION_WARNINGS
-int avcodec_default_get_buffer(AVCodecContext *avctx, AVFrame *frame)
-{
-    return avcodec_default_get_buffer2(avctx, frame, 0);
-}
-
-typedef struct CompatReleaseBufPriv {
-    AVCodecContext avctx;
-    AVFrame frame;
-} CompatReleaseBufPriv;
-
-static void compat_free_buffer(void *opaque, uint8_t *data)
-{
-    CompatReleaseBufPriv *priv = opaque;
-    if (priv->avctx.release_buffer)
-        priv->avctx.release_buffer(&priv->avctx, &priv->frame);
-    av_freep(&priv);
-}
-
-static void compat_release_buffer(void *opaque, uint8_t *data)
-{
-    AVBufferRef *buf = opaque;
-    av_buffer_unref(&buf);
-}
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif
-
-int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
+int ff_init_buffer_info(AVCodecContext *avctx, AVFrame *frame)
 {
     AVPacket *pkt = avctx->internal->pkt;
-    uint8_t *packet_sd;
-    int size;
-    AVFrameSideData *frame_sd;
-
-#if FF_API_AVFRAME_COLORSPACE
-    frame->color_primaries = avctx->color_primaries;
-    frame->color_trc       = avctx->color_trc;
-    frame->colorspace      = avctx->colorspace;
-    frame->color_range     = avctx->color_range;
-    frame->chroma_location = avctx->chroma_sample_location;
-#endif
-
-    frame->reordered_opaque = avctx->reordered_opaque;
-    if (!pkt) {
-        frame->pkt_pts = AV_NOPTS_VALUE;
-        return 0;
-    }
 
-    frame->pkt_pts = pkt->pts;
+    if (pkt) {
+        uint8_t *packet_sd;
+        AVFrameSideData *frame_sd;
+        int size;
+        frame->pkt_pts = pkt->pts;
+        av_frame_set_pkt_pos     (frame, pkt->pos);
+        av_frame_set_pkt_duration(frame, pkt->duration);
+        av_frame_set_pkt_size    (frame, pkt->size);
+
+        /* copy the replaygain data to the output frame */
+        packet_sd = av_packet_get_side_data(pkt, AV_PKT_DATA_REPLAYGAIN, &size);
+        if (packet_sd) {
+            frame_sd = av_frame_new_side_data(frame, AV_FRAME_DATA_REPLAYGAIN, size);
+            if (!frame_sd)
+                return AVERROR(ENOMEM);
+
+            memcpy(frame_sd->data, packet_sd, size);
+        }
 
-    /* copy the replaygain data to the output frame */
-    packet_sd = av_packet_get_side_data(pkt, AV_PKT_DATA_REPLAYGAIN, &size);
-    if (packet_sd) {
-        frame_sd = av_frame_new_side_data(frame, AV_FRAME_DATA_REPLAYGAIN, size);
-        if (!frame_sd)
-            return AVERROR(ENOMEM);
+        /* copy the displaymatrix to the output frame */
+        packet_sd = av_packet_get_side_data(pkt, AV_PKT_DATA_DISPLAYMATRIX, &size);
+        if (packet_sd) {
+            frame_sd = av_frame_new_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX, size);
+            if (!frame_sd)
+                return AVERROR(ENOMEM);
 
-        memcpy(frame_sd->data, packet_sd, size);
-    }
-    /* copy the displaymatrix to the output frame */
-    packet_sd = av_packet_get_side_data(pkt, AV_PKT_DATA_DISPLAYMATRIX, &size);
-    if (packet_sd) {
-        frame_sd = av_frame_new_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX, size);
-        if (!frame_sd)
-            return AVERROR(ENOMEM);
-
-        memcpy(frame_sd->data, packet_sd, size);
+            memcpy(frame_sd->data, packet_sd, size);
+        }
+    } else {
+        frame->pkt_pts = AV_NOPTS_VALUE;
+        av_frame_set_pkt_pos     (frame, -1);
+        av_frame_set_pkt_duration(frame, 0);
+        av_frame_set_pkt_size    (frame, -1);
     }
+    frame->reordered_opaque = avctx->reordered_opaque;
 
-    return 0;
-}
-
-int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
-{
-    const AVHWAccel *hwaccel = avctx->hwaccel;
-    int override_dimensions = 1;
-    int ret;
+#if FF_API_AVFRAME_COLORSPACE
+    if (frame->color_primaries == AVCOL_PRI_UNSPECIFIED)
+        frame->color_primaries = avctx->color_primaries;
+    if (frame->color_trc == AVCOL_TRC_UNSPECIFIED)
+        frame->color_trc = avctx->color_trc;
+    if (av_frame_get_colorspace(frame) == AVCOL_SPC_UNSPECIFIED)
+        av_frame_set_colorspace(frame, avctx->colorspace);
+    if (av_frame_get_color_range(frame) == AVCOL_RANGE_UNSPECIFIED)
+        av_frame_set_color_range(frame, avctx->color_range);
+    if (frame->chroma_location == AVCHROMA_LOC_UNSPECIFIED)
+        frame->chroma_location = avctx->chroma_sample_location;
+#endif
 
-    switch (avctx->codec_type) {
+    switch (avctx->codec->type) {
     case AVMEDIA_TYPE_VIDEO:
-        if (frame->width <= 0 || frame->height <= 0) {
-            frame->width  = FFMAX(avctx->width, avctx->coded_width);
-            frame->height = FFMAX(avctx->height, avctx->coded_height);
-            override_dimensions = 0;
-        }
-        if (frame->format < 0)
-            frame->format              = avctx->pix_fmt;
+        frame->format              = avctx->pix_fmt;
         if (!frame->sample_aspect_ratio.num)
             frame->sample_aspect_ratio = avctx->sample_aspect_ratio;
 
-        if (av_image_check_sar(frame->width, frame->height,
+        if (frame->width && frame->height &&
+            av_image_check_sar(frame->width, frame->height,
                                frame->sample_aspect_ratio) < 0) {
             av_log(avctx, AV_LOG_WARNING, "ignoring invalid SAR: %u/%u\n",
                    frame->sample_aspect_ratio.num,
@@ -640,8 +808,6 @@ int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
             frame->sample_aspect_ratio = (AVRational){ 0, 1 };
         }
 
-        if ((ret = av_image_check_size(avctx->width, avctx->height, 0, avctx)) < 0)
-            return ret;
         break;
     case AVMEDIA_TYPE_AUDIO:
         if (!frame->sample_rate)
@@ -664,19 +830,72 @@ int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
                            avctx->channels);
                     return AVERROR(ENOSYS);
                 }
-
-                frame->channel_layout = av_get_default_channel_layout(avctx->channels);
-                if (!frame->channel_layout)
-                    frame->channel_layout = (1ULL << avctx->channels) - 1;
             }
         }
+        av_frame_set_channels(frame, avctx->channels);
         break;
-    default: return AVERROR(EINVAL);
     }
+    return 0;
+}
+
+#if FF_API_GET_BUFFER
+FF_DISABLE_DEPRECATION_WARNINGS
+int avcodec_default_get_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+    return avcodec_default_get_buffer2(avctx, frame, 0);
+}
+
+typedef struct CompatReleaseBufPriv {
+    AVCodecContext avctx;
+    AVFrame frame;
+    uint8_t avframe_padding[1024]; // hack to allow linking to a avutil with larger AVFrame
+} CompatReleaseBufPriv;
+
+static void compat_free_buffer(void *opaque, uint8_t *data)
+{
+    CompatReleaseBufPriv *priv = opaque;
+    if (priv->avctx.release_buffer)
+        priv->avctx.release_buffer(&priv->avctx, &priv->frame);
+    av_freep(&priv);
+}
+
+static void compat_release_buffer(void *opaque, uint8_t *data)
+{
+    AVBufferRef *buf = opaque;
+    av_buffer_unref(&buf);
+}
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
+{
+    return ff_init_buffer_info(avctx, frame);
+}
+
+static int get_buffer_internal(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    const AVHWAccel *hwaccel = avctx->hwaccel;
+    int override_dimensions = 1;
+    int ret;
 
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if ((ret = av_image_check_size(avctx->width, avctx->height, 0, avctx)) < 0 || avctx->pix_fmt<0) {
+            av_log(avctx, AV_LOG_ERROR, "video_get_buffer: image parameters invalid\n");
+            return AVERROR(EINVAL);
+        }
+    }
+    if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
+        if (frame->width <= 0 || frame->height <= 0) {
+            frame->width  = FFMAX(avctx->width,  FF_CEIL_RSHIFT(avctx->coded_width,  avctx->lowres));
+            frame->height = FFMAX(avctx->height, FF_CEIL_RSHIFT(avctx->coded_height, avctx->lowres));
+            override_dimensions = 0;
+        }
+    }
     ret = ff_decode_frame_props(avctx, frame);
     if (ret < 0)
         return ret;
+    if ((ret = ff_init_buffer_info(avctx, frame)) < 0)
+        return ret;
 
     if (hwaccel && hwaccel->alloc_frame) {
         ret = hwaccel->alloc_frame(avctx, frame);
@@ -686,7 +905,7 @@ int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
 #if FF_API_GET_BUFFER
 FF_DISABLE_DEPRECATION_WARNINGS
     /*
-     * Wrap an old get_buffer()-allocated buffer in an bunch of AVBuffers.
+     * Wrap an old get_buffer()-allocated buffer in a bunch of AVBuffers.
      * We wrap each plane in its own AVBuffer. Each of those has a reference to
      * a dummy AVBuffer as its private data, unreffing it on free.
      * When all the planes are freed, the dummy buffer's free callback calls
@@ -709,7 +928,7 @@ FF_DISABLE_DEPRECATION_WARNINGS
          * avcodec_default_get_buffer
          */
         if (frame->buf[0])
-            return 0;
+            goto end0;
 
         priv = av_mallocz(sizeof(*priv));
         if (!priv) {
@@ -766,7 +985,7 @@ do {                                                                    \
 
             if (planes > FF_ARRAY_ELEMS(frame->buf)) {
                 frame->nb_extended_buf = planes - FF_ARRAY_ELEMS(frame->buf);
-                frame->extended_buf = av_malloc(sizeof(*frame->extended_buf) *
+                frame->extended_buf = av_malloc_array(sizeof(*frame->extended_buf),
                                                 frame->nb_extended_buf);
                 if (!frame->extended_buf) {
                     ret = AVERROR(ENOMEM);
@@ -785,6 +1004,7 @@ do {                                                                    \
 
         av_buffer_unref(&dummy_buf);
 
+end0:
         frame->width  = avctx->width;
         frame->height = avctx->height;
 
@@ -810,13 +1030,29 @@ end:
     return ret;
 }
 
-int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame)
+int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
+{
+    int ret = get_buffer_internal(avctx, frame, flags);
+    if (ret < 0)
+        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    return ret;
+}
+
+static int reget_buffer_internal(AVCodecContext *avctx, AVFrame *frame)
 {
     AVFrame *tmp;
     int ret;
 
     av_assert0(avctx->codec_type == AVMEDIA_TYPE_VIDEO);
 
+    if (frame->data[0] && (frame->width != avctx->width || frame->height != avctx->height || frame->format != avctx->pix_fmt)) {
+        av_log(avctx, AV_LOG_WARNING, "Picture changed from size:%dx%d fmt:%s to size:%dx%d fmt:%s in reget buffer()\n",
+               frame->width, frame->height, av_get_pix_fmt_name(frame->format), avctx->width, avctx->height, av_get_pix_fmt_name(avctx->pix_fmt));
+        av_frame_unref(frame);
+    }
+
+    ff_init_buffer_info(avctx, frame);
+
     if (!frame->data[0])
         return ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF);
 
@@ -841,9 +1077,19 @@ int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame)
     return 0;
 }
 
+int ff_reget_buffer(AVCodecContext *avctx, AVFrame *frame)
+{
+    int ret = reget_buffer_internal(avctx, frame);
+    if (ret < 0)
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    return ret;
+}
+
 #if FF_API_GET_BUFFER
 void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic)
 {
+    av_assert0(s->codec_type == AVMEDIA_TYPE_VIDEO);
+
     av_frame_unref(pic);
 }
 
@@ -878,6 +1124,17 @@ int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2,
     return 0;
 }
 
+enum AVPixelFormat avpriv_find_pix_fmt(const PixelFormatTag *tags,
+                                       unsigned int fourcc)
+{
+    while (tags->pix_fmt >= 0) {
+        if (tags->fourcc == fourcc)
+            return tags->pix_fmt;
+        tags++;
+    }
+    return AV_PIX_FMT_NONE;
+}
+
 static int is_hwaccel_pix_fmt(enum AVPixelFormat pix_fmt)
 {
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -918,7 +1175,8 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
     av_freep(&avctx->internal->hwaccel_priv_data);
     avctx->hwaccel = NULL;
 
-    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
+    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL &&
+        !(avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)) {
         AVHWAccel *hwaccel;
         int err;
 
@@ -952,30 +1210,20 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
 #if FF_API_AVFRAME_LAVC
 void avcodec_get_frame_defaults(AVFrame *frame)
 {
-    if (frame->extended_data != frame->data)
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+     // extended_data should explicitly be freed when needed, this code is unsafe currently
+     // also this is not compatible to the <55 ABI/API
+    if (frame->extended_data != frame->data && 0)
         av_freep(&frame->extended_data);
+#endif
 
     memset(frame, 0, sizeof(AVFrame));
-
-    frame->pts                 = AV_NOPTS_VALUE;
-    frame->key_frame           = 1;
-    frame->sample_aspect_ratio = (AVRational) {0, 1 };
-    frame->format              = -1; /* unknown */
-    frame->extended_data       = frame->data;
+    av_frame_unref(frame);
 }
 
 AVFrame *avcodec_alloc_frame(void)
 {
-    AVFrame *frame = av_mallocz(sizeof(AVFrame));
-
-    if (frame == NULL)
-        return NULL;
-
-FF_DISABLE_DEPRECATION_WARNINGS
-    avcodec_get_frame_defaults(frame);
-FF_ENABLE_DEPRECATION_WARNINGS
-
-    return frame;
+    return av_frame_alloc();
 }
 
 void avcodec_free_frame(AVFrame **frame)
@@ -984,6 +1232,58 @@ void avcodec_free_frame(AVFrame **frame)
 }
 #endif
 
+MAKE_ACCESSORS(AVCodecContext, codec, AVRational, pkt_timebase)
+MAKE_ACCESSORS(AVCodecContext, codec, const AVCodecDescriptor *, codec_descriptor)
+MAKE_ACCESSORS(AVCodecContext, codec, int, lowres)
+MAKE_ACCESSORS(AVCodecContext, codec, int, seek_preroll)
+MAKE_ACCESSORS(AVCodecContext, codec, uint16_t*, chroma_intra_matrix)
+
+int av_codec_get_max_lowres(const AVCodec *codec)
+{
+    return codec->max_lowres;
+}
+
+static void avcodec_get_subtitle_defaults(AVSubtitle *sub)
+{
+    memset(sub, 0, sizeof(*sub));
+    sub->pts = AV_NOPTS_VALUE;
+}
+
+static int get_bit_rate(AVCodecContext *ctx)
+{
+    int bit_rate;
+    int bits_per_sample;
+
+    switch (ctx->codec_type) {
+    case AVMEDIA_TYPE_VIDEO:
+    case AVMEDIA_TYPE_DATA:
+    case AVMEDIA_TYPE_SUBTITLE:
+    case AVMEDIA_TYPE_ATTACHMENT:
+        bit_rate = ctx->bit_rate;
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        bits_per_sample = av_get_bits_per_sample(ctx->codec_id);
+        bit_rate = bits_per_sample ? ctx->sample_rate * ctx->channels * bits_per_sample : ctx->bit_rate;
+        break;
+    default:
+        bit_rate = 0;
+        break;
+    }
+    return bit_rate;
+}
+
+int attribute_align_arg ff_codec_open2_recursive(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
+{
+    int ret = 0;
+
+    ff_unlock_avcodec();
+
+    ret = avcodec_open2(avctx, codec, options);
+
+    ff_lock_avcodec(avctx);
+    return ret;
+}
+
 int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
 {
     int ret = 0;
@@ -993,12 +1293,12 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         return 0;
 
     if ((!codec && !avctx->codec)) {
-        av_log(avctx, AV_LOG_ERROR, "No codec provided to avcodec_open2().\n");
+        av_log(avctx, AV_LOG_ERROR, "No codec provided to avcodec_open2()\n");
         return AVERROR(EINVAL);
     }
     if ((codec && avctx->codec && codec != avctx->codec)) {
         av_log(avctx, AV_LOG_ERROR, "This AVCodecContext was allocated for %s, "
-                                    "but %s passed to avcodec_open2().\n", avctx->codec->name, codec->name);
+                                    "but %s passed to avcodec_open2()\n", avctx->codec->name, codec->name);
         return AVERROR(EINVAL);
     }
     if (!codec)
@@ -1010,18 +1310,9 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     if (options)
         av_dict_copy(&tmp, *options, 0);
 
-    /* If there is a user-supplied mutex locking routine, call it. */
-    if (lockmgr_cb) {
-        if ((*lockmgr_cb)(&codec_mutex, AV_LOCK_OBTAIN))
-            return -1;
-    }
-
-    entangled_thread_counter++;
-    if (entangled_thread_counter != 1) {
-        av_log(avctx, AV_LOG_ERROR, "insufficient thread locking around avcodec_open/close()\n");
-        ret = -1;
-        goto end;
-    }
+    ret = ff_lock_avcodec(avctx);
+    if (ret < 0)
+        return ret;
 
     avctx->internal = av_mallocz(sizeof(AVCodecInternal));
     if (!avctx->internal) {
@@ -1061,17 +1352,21 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     if ((ret = av_opt_set_dict(avctx, &tmp)) < 0)
         goto free_and_end;
 
-    if (avctx->coded_width && avctx->coded_height && !avctx->width && !avctx->height)
+    // only call ff_set_dimensions() for non H.264/VP6F codecs so as not to overwrite previously setup dimensions
+    if (!(avctx->coded_width && avctx->coded_height && avctx->width && avctx->height &&
+          (avctx->codec_id == AV_CODEC_ID_H264 || avctx->codec_id == AV_CODEC_ID_VP6F))) {
+    if (avctx->coded_width && avctx->coded_height)
         ret = ff_set_dimensions(avctx, avctx->coded_width, avctx->coded_height);
     else if (avctx->width && avctx->height)
         ret = ff_set_dimensions(avctx, avctx->width, avctx->height);
     if (ret < 0)
         goto free_and_end;
+    }
 
     if ((avctx->coded_width || avctx->coded_height || avctx->width || avctx->height)
         && (  av_image_check_size(avctx->coded_width, avctx->coded_height, 0, avctx) < 0
            || av_image_check_size(avctx->width,       avctx->height,       0, avctx) < 0)) {
-        av_log(avctx, AV_LOG_WARNING, "ignoring invalid width/height values\n");
+        av_log(avctx, AV_LOG_WARNING, "Ignoring invalid width/height values\n");
         ff_set_dimensions(avctx, 0, 0);
     }
 
@@ -1103,14 +1398,25 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     }
     if (avctx->codec_id != codec->id || (avctx->codec_type != codec->type
                                          && avctx->codec_type != AVMEDIA_TYPE_ATTACHMENT)) {
-        av_log(avctx, AV_LOG_ERROR, "codec type or id mismatches\n");
+        av_log(avctx, AV_LOG_ERROR, "Codec type or id mismatches\n");
         ret = AVERROR(EINVAL);
         goto free_and_end;
     }
     avctx->frame_number = 0;
+    avctx->codec_descriptor = avcodec_descriptor_get(avctx->codec_id);
 
     if (avctx->codec->capabilities & CODEC_CAP_EXPERIMENTAL &&
         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+        const char *codec_string = av_codec_is_encoder(codec) ? "encoder" : "decoder";
+        AVCodec *codec2;
+        av_log(avctx, AV_LOG_ERROR,
+               "The %s '%s' is experimental but experimental codecs are not enabled, "
+               "add '-strict %d' if you want to use it.\n",
+               codec_string, codec->name, FF_COMPLIANCE_EXPERIMENTAL);
+        codec2 = av_codec_is_encoder(codec) ? avcodec_find_encoder(codec->id) : avcodec_find_decoder(codec->id);
+        if (!(codec2->capabilities & CODEC_CAP_EXPERIMENTAL))
+            av_log(avctx, AV_LOG_ERROR, "Alternatively use the non experimental %s '%s'.\n",
+                codec_string, codec2->name);
         ret = AVERROR_EXPERIMENTAL;
         goto free_and_end;
     }
@@ -1121,7 +1427,19 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
         avctx->time_base.den = avctx->sample_rate;
     }
 
-    if (HAVE_THREADS) {
+    if (!HAVE_THREADS)
+        av_log(avctx, AV_LOG_WARNING, "Warning: not compiled with thread support, using thread emulation\n");
+
+    if (CONFIG_FRAME_THREAD_ENCODER) {
+        ff_unlock_avcodec(); //we will instanciate a few encoders thus kick the counter to prevent false detection of a problem
+        ret = ff_frame_thread_encoder_init(avctx, options ? *options : NULL);
+        ff_lock_avcodec(avctx);
+        if (ret < 0)
+            goto free_and_end;
+    }
+
+    if (HAVE_THREADS
+        && !(avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))) {
         ret = ff_thread_init(avctx);
         if (ret < 0) {
             goto free_and_end;
@@ -1130,6 +1448,13 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
     if (!HAVE_THREADS && !(codec->capabilities & CODEC_CAP_AUTO_THREADS))
         avctx->thread_count = 1;
 
+    if (avctx->codec->max_lowres < avctx->lowres || avctx->lowres < 0) {
+        av_log(avctx, AV_LOG_ERROR, "The maximum value for lowres supported by the decoder is %d\n",
+               avctx->codec->max_lowres);
+        ret = AVERROR(EINVAL);
+        goto free_and_end;
+    }
+
     if (av_codec_is_encoder(avctx->codec)) {
         int i;
         if (avctx->codec->sample_fmts) {
@@ -1144,7 +1469,10 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
                 }
             }
             if (avctx->codec->sample_fmts[i] == AV_SAMPLE_FMT_NONE) {
-                av_log(avctx, AV_LOG_ERROR, "Specified sample_fmt is not supported.\n");
+                char buf[128];
+                snprintf(buf, sizeof(buf), "%d", avctx->sample_fmt);
+                av_log(avctx, AV_LOG_ERROR, "Specified sample format %s is invalid or not supported\n",
+                       (char *)av_x_if_null(av_get_sample_fmt_name(avctx->sample_fmt), buf));
                 ret = AVERROR(EINVAL);
                 goto free_and_end;
             }
@@ -1153,8 +1481,13 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
             for (i = 0; avctx->codec->pix_fmts[i] != AV_PIX_FMT_NONE; i++)
                 if (avctx->pix_fmt == avctx->codec->pix_fmts[i])
                     break;
-            if (avctx->codec->pix_fmts[i] == AV_PIX_FMT_NONE) {
-                av_log(avctx, AV_LOG_ERROR, "Specified pix_fmt is not supported\n");
+            if (avctx->codec->pix_fmts[i] == AV_PIX_FMT_NONE
+                && !((avctx->codec_id == AV_CODEC_ID_MJPEG || avctx->codec_id == AV_CODEC_ID_LJPEG)
+                     && avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL)) {
+                char buf[128];
+                snprintf(buf, sizeof(buf), "%d", avctx->pix_fmt);
+                av_log(avctx, AV_LOG_ERROR, "Specified pixel format %s is invalid or not supported\n",
+                       (char *)av_x_if_null(av_get_pix_fmt_name(avctx->pix_fmt), buf));
                 ret = AVERROR(EINVAL);
                 goto free_and_end;
             }
@@ -1164,55 +1497,90 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
                 if (avctx->sample_rate == avctx->codec->supported_samplerates[i])
                     break;
             if (avctx->codec->supported_samplerates[i] == 0) {
-                av_log(avctx, AV_LOG_ERROR, "Specified sample_rate is not supported\n");
+                av_log(avctx, AV_LOG_ERROR, "Specified sample rate %d is not supported\n",
+                       avctx->sample_rate);
                 ret = AVERROR(EINVAL);
                 goto free_and_end;
             }
         }
         if (avctx->codec->channel_layouts) {
             if (!avctx->channel_layout) {
-                av_log(avctx, AV_LOG_WARNING, "channel_layout not specified\n");
+                av_log(avctx, AV_LOG_WARNING, "Channel layout not specified\n");
             } else {
                 for (i = 0; avctx->codec->channel_layouts[i] != 0; i++)
                     if (avctx->channel_layout == avctx->codec->channel_layouts[i])
                         break;
                 if (avctx->codec->channel_layouts[i] == 0) {
-                    av_log(avctx, AV_LOG_ERROR, "Specified channel_layout is not supported\n");
+                    char buf[512];
+                    av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
+                    av_log(avctx, AV_LOG_ERROR, "Specified channel layout '%s' is not supported\n", buf);
                     ret = AVERROR(EINVAL);
                     goto free_and_end;
                 }
             }
         }
         if (avctx->channel_layout && avctx->channels) {
-            if (av_get_channel_layout_nb_channels(avctx->channel_layout) != avctx->channels) {
-                av_log(avctx, AV_LOG_ERROR, "channel layout does not match number of channels\n");
+            int channels = av_get_channel_layout_nb_channels(avctx->channel_layout);
+            if (channels != avctx->channels) {
+                char buf[512];
+                av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
+                av_log(avctx, AV_LOG_ERROR,
+                       "Channel layout '%s' with %d channels does not match number of specified channels %d\n",
+                       buf, channels, avctx->channels);
                 ret = AVERROR(EINVAL);
                 goto free_and_end;
             }
         } else if (avctx->channel_layout) {
             avctx->channels = av_get_channel_layout_nb_channels(avctx->channel_layout);
         }
+        if(avctx->codec_type == AVMEDIA_TYPE_VIDEO &&
+           avctx->codec_id != AV_CODEC_ID_PNG // For mplayer
+        ) {
+            if (avctx->width <= 0 || avctx->height <= 0) {
+                av_log(avctx, AV_LOG_ERROR, "dimensions not set\n");
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            }
+        }
+        if (   (avctx->codec_type == AVMEDIA_TYPE_VIDEO || avctx->codec_type == AVMEDIA_TYPE_AUDIO)
+            && avctx->bit_rate>0 && avctx->bit_rate<1000) {
+            av_log(avctx, AV_LOG_WARNING, "Bitrate %d is extremely low, maybe you mean %dk\n", avctx->bit_rate, avctx->bit_rate);
+        }
 
         if (!avctx->rc_initial_buffer_occupancy)
             avctx->rc_initial_buffer_occupancy = avctx->rc_buffer_size * 3 / 4;
     }
 
-    if (avctx->codec->init && !(avctx->active_thread_type & FF_THREAD_FRAME)) {
+    avctx->pts_correction_num_faulty_pts =
+    avctx->pts_correction_num_faulty_dts = 0;
+    avctx->pts_correction_last_pts =
+    avctx->pts_correction_last_dts = INT64_MIN;
+
+    if (   avctx->codec->init && (!(avctx->active_thread_type&FF_THREAD_FRAME)
+        || avctx->internal->frame_thread_encoder)) {
         ret = avctx->codec->init(avctx);
         if (ret < 0) {
             goto free_and_end;
         }
     }
 
+    ret=0;
+
     if (av_codec_is_decoder(avctx->codec)) {
+        if (!avctx->bit_rate)
+            avctx->bit_rate = get_bit_rate(avctx);
         /* validate channel layout from the decoder */
         if (avctx->channel_layout) {
             int channels = av_get_channel_layout_nb_channels(avctx->channel_layout);
             if (!avctx->channels)
                 avctx->channels = channels;
             else if (channels != avctx->channels) {
+                char buf[512];
+                av_get_channel_layout_string(buf, sizeof(buf), -1, avctx->channel_layout);
                 av_log(avctx, AV_LOG_WARNING,
-                       "channel layout does not match number of channels\n");
+                       "Channel layout '%s' with %d channels does not match specified number of channels %d: "
+                       "ignoring specified channel layout\n",
+                       buf, channels, avctx->channels);
                 avctx->channel_layout = 0;
             }
         }
@@ -1221,14 +1589,46 @@ int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *code
             ret = AVERROR(EINVAL);
             goto free_and_end;
         }
+        if (avctx->sub_charenc) {
+            if (avctx->codec_type != AVMEDIA_TYPE_SUBTITLE) {
+                av_log(avctx, AV_LOG_ERROR, "Character encoding is only "
+                       "supported with subtitles codecs\n");
+                ret = AVERROR(EINVAL);
+                goto free_and_end;
+            } else if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB) {
+                av_log(avctx, AV_LOG_WARNING, "Codec '%s' is bitmap-based, "
+                       "subtitles character encoding will be ignored\n",
+                       avctx->codec_descriptor->name);
+                avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_DO_NOTHING;
+            } else {
+                /* input character encoding is set for a text based subtitle
+                 * codec at this point */
+                if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_AUTOMATIC)
+                    avctx->sub_charenc_mode = FF_SUB_CHARENC_MODE_PRE_DECODER;
+
+                if (avctx->sub_charenc_mode == FF_SUB_CHARENC_MODE_PRE_DECODER) {
+#if CONFIG_ICONV
+                    iconv_t cd = iconv_open("UTF-8", avctx->sub_charenc);
+                    if (cd == (iconv_t)-1) {
+                        av_log(avctx, AV_LOG_ERROR, "Unable to open iconv context "
+                               "with input character encoding \"%s\"\n", avctx->sub_charenc);
+                        ret = AVERROR(errno);
+                        goto free_and_end;
+                    }
+                    iconv_close(cd);
+#else
+                    av_log(avctx, AV_LOG_ERROR, "Character encoding subtitles "
+                           "conversion needs a libavcodec built with iconv support "
+                           "for this codec\n");
+                    ret = AVERROR(ENOSYS);
+                    goto free_and_end;
+#endif
+                }
+            }
+        }
     }
 end:
-    entangled_thread_counter--;
-
-    /* Release any user-supplied mutex. */
-    if (lockmgr_cb) {
-        (*lockmgr_cb)(&codec_mutex, AV_LOCK_RELEASE);
-    }
+    ff_unlock_avcodec();
     if (options) {
         av_dict_free(options);
         *options = tmp;
@@ -1247,10 +1647,31 @@ free_and_end:
     goto end;
 }
 
-int ff_alloc_packet(AVPacket *avpkt, int size)
+int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size)
 {
-    if (size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE)
+    if (avpkt->size < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid negative user packet size %d\n", avpkt->size);
         return AVERROR(EINVAL);
+    }
+    if (size < 0 || size > INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid minimum required packet size %"PRId64" (max allowed is %d)\n",
+               size, INT_MAX - FF_INPUT_BUFFER_PADDING_SIZE);
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx) {
+        av_assert0(!avpkt->data || avpkt->data != avctx->internal->byte_buffer);
+        if (!avpkt->data || avpkt->size < size) {
+            av_fast_padded_malloc(&avctx->internal->byte_buffer, &avctx->internal->byte_buffer_size, size);
+            avpkt->data = avctx->internal->byte_buffer;
+            avpkt->size = avctx->internal->byte_buffer_size;
+#if FF_API_DESTRUCT_PACKET
+FF_DISABLE_DEPRECATION_WARNINGS
+            avpkt->destruct = NULL;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        }
+    }
 
     if (avpkt->data) {
         AVBufferRef *buf = avpkt->buf;
@@ -1260,8 +1681,10 @@ FF_DISABLE_DEPRECATION_WARNINGS
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 
-        if (avpkt->size < size)
+        if (avpkt->size < size) {
+            av_log(avctx, AV_LOG_ERROR, "User packet is too small (%d < %"PRId64")\n", avpkt->size, size);
             return AVERROR(EINVAL);
+        }
 
         av_init_packet(avpkt);
 #if FF_API_DESTRUCT_PACKET
@@ -1273,10 +1696,18 @@ FF_ENABLE_DEPRECATION_WARNINGS
         avpkt->size     = size;
         return 0;
     } else {
-        return av_new_packet(avpkt, size);
+        int ret = av_new_packet(avpkt, size);
+        if (ret < 0)
+            av_log(avctx, AV_LOG_ERROR, "Failed to allocate packet of size %"PRId64"\n", size);
+        return ret;
     }
 }
 
+int ff_alloc_packet(AVPacket *avpkt, int size)
+{
+    return ff_alloc_packet2(NULL, avpkt, size);
+}
+
 /**
  * Pad last frame with silence.
  */
@@ -1290,6 +1721,7 @@ static int pad_last_frame(AVCodecContext *s, AVFrame **dst, const AVFrame *src)
 
     frame->format         = src->format;
     frame->channel_layout = src->channel_layout;
+    av_frame_set_channels(frame, av_frame_get_channels(src));
     frame->nb_samples     = s->frame_size;
     ret = av_frame_get_buffer(frame, 32);
     if (ret < 0)
@@ -1321,10 +1753,11 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
                                               const AVFrame *frame,
                                               int *got_packet_ptr)
 {
-    AVFrame tmp;
+    AVFrame *extended_frame = NULL;
     AVFrame *padded_frame = NULL;
     int ret;
-    int user_packet = !!avpkt->data;
+    AVPacket user_pkt = *avpkt;
+    int needs_realloc = !user_pkt.data;
 
     *got_packet_ptr = 0;
 
@@ -1345,28 +1778,36 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
         }
         av_log(avctx, AV_LOG_WARNING, "extended_data is not set.\n");
 
-        tmp = *frame;
-        tmp.extended_data = tmp.data;
-        frame = &tmp;
+        extended_frame = av_frame_alloc();
+        if (!extended_frame)
+            return AVERROR(ENOMEM);
+
+        memcpy(extended_frame, frame, sizeof(AVFrame));
+        extended_frame->extended_data = extended_frame->data;
+        frame = extended_frame;
     }
 
     /* check for valid frame size */
     if (frame) {
         if (avctx->codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME) {
-            if (frame->nb_samples > avctx->frame_size)
-                return AVERROR(EINVAL);
+            if (frame->nb_samples > avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "more samples than frame size (avcodec_encode_audio2)\n");
+                ret = AVERROR(EINVAL);
+                goto end;
+            }
         } else if (!(avctx->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE)) {
             if (frame->nb_samples < avctx->frame_size &&
                 !avctx->internal->last_audio_frame) {
                 ret = pad_last_frame(avctx, &padded_frame, frame);
                 if (ret < 0)
-                    return ret;
+                    goto end;
 
                 frame = padded_frame;
                 avctx->internal->last_audio_frame = 1;
             }
 
             if (frame->nb_samples != avctx->frame_size) {
+                av_log(avctx, AV_LOG_ERROR, "nb_samples (%d) != frame_size (%d) (avcodec_encode_audio2)\n", frame->nb_samples, avctx->frame_size);
                 ret = AVERROR(EINVAL);
                 goto end;
             }
@@ -1387,9 +1828,34 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
         } else {
             avpkt->size = 0;
         }
+    }
+    if (avpkt->data && avpkt->data == avctx->internal->byte_buffer) {
+        needs_realloc = 0;
+        if (user_pkt.data) {
+            if (user_pkt.size >= avpkt->size) {
+                memcpy(user_pkt.data, avpkt->data, avpkt->size);
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "Provided packet is too small, needs to be %d\n", avpkt->size);
+                avpkt->size = user_pkt.size;
+                ret = -1;
+            }
+            avpkt->buf      = user_pkt.buf;
+            avpkt->data     = user_pkt.data;
+#if FF_API_DESTRUCT_PACKET
+FF_DISABLE_DEPRECATION_WARNINGS
+            avpkt->destruct = user_pkt.destruct;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        } else {
+            if (av_dup_packet(avpkt) < 0) {
+                ret = AVERROR(ENOMEM);
+            }
+        }
+    }
 
-        if (!user_packet && avpkt->size) {
-            ret = av_buffer_realloc(&avpkt->buf, avpkt->size);
+    if (!ret) {
+        if (needs_realloc && avpkt->data) {
+            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + FF_INPUT_BUFFER_PADDING_SIZE);
             if (ret >= 0)
                 avpkt->data = avpkt->buf->data;
         }
@@ -1410,20 +1876,149 @@ int attribute_align_arg avcodec_encode_audio2(AVCodecContext *avctx,
 
 end:
     av_frame_free(&padded_frame);
+    av_free(extended_frame);
 
     return ret;
 }
 
+#if FF_API_OLD_ENCODE_AUDIO
+int attribute_align_arg avcodec_encode_audio(AVCodecContext *avctx,
+                                             uint8_t *buf, int buf_size,
+                                             const short *samples)
+{
+    AVPacket pkt;
+    AVFrame *frame;
+    int ret, samples_size, got_packet;
+
+    av_init_packet(&pkt);
+    pkt.data = buf;
+    pkt.size = buf_size;
+
+    if (samples) {
+        frame = av_frame_alloc();
+        if (!frame)
+            return AVERROR(ENOMEM);
+
+        if (avctx->frame_size) {
+            frame->nb_samples = avctx->frame_size;
+        } else {
+            /* if frame_size is not set, the number of samples must be
+             * calculated from the buffer size */
+            int64_t nb_samples;
+            if (!av_get_bits_per_sample(avctx->codec_id)) {
+                av_log(avctx, AV_LOG_ERROR, "avcodec_encode_audio() does not "
+                                            "support this codec\n");
+                av_frame_free(&frame);
+                return AVERROR(EINVAL);
+            }
+            nb_samples = (int64_t)buf_size * 8 /
+                         (av_get_bits_per_sample(avctx->codec_id) *
+                          avctx->channels);
+            if (nb_samples >= INT_MAX) {
+                av_frame_free(&frame);
+                return AVERROR(EINVAL);
+            }
+            frame->nb_samples = nb_samples;
+        }
+
+        /* it is assumed that the samples buffer is large enough based on the
+         * relevant parameters */
+        samples_size = av_samples_get_buffer_size(NULL, avctx->channels,
+                                                  frame->nb_samples,
+                                                  avctx->sample_fmt, 1);
+        if ((ret = avcodec_fill_audio_frame(frame, avctx->channels,
+                                            avctx->sample_fmt,
+                                            (const uint8_t *)samples,
+                                            samples_size, 1)) < 0) {
+            av_frame_free(&frame);
+            return ret;
+        }
+
+        /* fabricate frame pts from sample count.
+         * this is needed because the avcodec_encode_audio() API does not have
+         * a way for the user to provide pts */
+        if (avctx->sample_rate && avctx->time_base.num)
+            frame->pts = ff_samples_to_time_base(avctx,
+                                                 avctx->internal->sample_count);
+        else
+            frame->pts = AV_NOPTS_VALUE;
+        avctx->internal->sample_count += frame->nb_samples;
+    } else {
+        frame = NULL;
+    }
+
+    got_packet = 0;
+    ret = avcodec_encode_audio2(avctx, &pkt, frame, &got_packet);
+    if (!ret && got_packet && avctx->coded_frame) {
+        avctx->coded_frame->pts       = pkt.pts;
+        avctx->coded_frame->key_frame = !!(pkt.flags & AV_PKT_FLAG_KEY);
+    }
+    /* free any side data since we cannot return it */
+    av_packet_free_side_data(&pkt);
+
+    if (frame && frame->extended_data != frame->data)
+        av_freep(&frame->extended_data);
+
+    av_frame_free(&frame);
+    return ret ? ret : pkt.size;
+}
+
+#endif
+
+#if FF_API_OLD_ENCODE_VIDEO
+int attribute_align_arg avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                                             const AVFrame *pict)
+{
+    AVPacket pkt;
+    int ret, got_packet = 0;
+
+    if (buf_size < FF_MIN_BUFFER_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "buffer smaller than minimum size\n");
+        return -1;
+    }
+
+    av_init_packet(&pkt);
+    pkt.data = buf;
+    pkt.size = buf_size;
+
+    ret = avcodec_encode_video2(avctx, &pkt, pict, &got_packet);
+    if (!ret && got_packet && avctx->coded_frame) {
+        avctx->coded_frame->pts       = pkt.pts;
+        avctx->coded_frame->key_frame = !!(pkt.flags & AV_PKT_FLAG_KEY);
+    }
+
+    /* free any side data since we cannot return it */
+    if (pkt.side_data_elems > 0) {
+        int i;
+        for (i = 0; i < pkt.side_data_elems; i++)
+            av_free(pkt.side_data[i].data);
+        av_freep(&pkt.side_data);
+        pkt.side_data_elems = 0;
+    }
+
+    return ret ? ret : pkt.size;
+}
+
+#endif
+
 int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
                                               AVPacket *avpkt,
                                               const AVFrame *frame,
                                               int *got_packet_ptr)
 {
     int ret;
-    int user_packet = !!avpkt->data;
+    AVPacket user_pkt = *avpkt;
+    int needs_realloc = !user_pkt.data;
 
     *got_packet_ptr = 0;
 
+    if(CONFIG_FRAME_THREAD_ENCODER &&
+       avctx->internal->frame_thread_encoder && (avctx->active_thread_type&FF_THREAD_FRAME))
+        return ff_thread_video_encode_frame(avctx, avpkt, frame, got_packet_ptr);
+
+    if ((avctx->flags&CODEC_FLAG_PASS1) && avctx->stats_out)
+        avctx->stats_out[0] = '\0';
+
     if (!(avctx->codec->capabilities & CODEC_CAP_DELAY) && !frame) {
         av_free_packet(avpkt);
         av_init_packet(avpkt);
@@ -1437,14 +2032,40 @@ int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
     av_assert0(avctx->codec->encode2);
 
     ret = avctx->codec->encode2(avctx, avpkt, frame, got_packet_ptr);
+    av_assert0(ret <= 0);
+
+    if (avpkt->data && avpkt->data == avctx->internal->byte_buffer) {
+        needs_realloc = 0;
+        if (user_pkt.data) {
+            if (user_pkt.size >= avpkt->size) {
+                memcpy(user_pkt.data, avpkt->data, avpkt->size);
+            } else {
+                av_log(avctx, AV_LOG_ERROR, "Provided packet is too small, needs to be %d\n", avpkt->size);
+                avpkt->size = user_pkt.size;
+                ret = -1;
+            }
+            avpkt->buf      = user_pkt.buf;
+            avpkt->data     = user_pkt.data;
+#if FF_API_DESTRUCT_PACKET
+FF_DISABLE_DEPRECATION_WARNINGS
+            avpkt->destruct = user_pkt.destruct;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+        } else {
+            if (av_dup_packet(avpkt) < 0) {
+                ret = AVERROR(ENOMEM);
+            }
+        }
+    }
+
     if (!ret) {
         if (!*got_packet_ptr)
             avpkt->size = 0;
         else if (!(avctx->codec->capabilities & CODEC_CAP_DELAY))
             avpkt->pts = avpkt->dts = frame->pts;
 
-        if (!user_packet && avpkt->size) {
-            ret = av_buffer_realloc(&avpkt->buf, avpkt->size);
+        if (needs_realloc && avpkt->data) {
+            ret = av_buffer_realloc(&avpkt->buf, avpkt->size + FF_INPUT_BUFFER_PADDING_SIZE);
             if (ret >= 0)
                 avpkt->data = avpkt->buf->data;
         }
@@ -1454,6 +2075,8 @@ int attribute_align_arg avcodec_encode_video2(AVCodecContext *avctx,
 
     if (ret < 0 || !*got_packet_ptr)
         av_free_packet(avpkt);
+    else
+        av_packet_merge_side_data(avpkt);
 
     emms_c();
     return ret;
@@ -1467,13 +2090,48 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
         av_log(avctx, AV_LOG_ERROR, "start_display_time must be 0.\n");
         return -1;
     }
-    if (sub->num_rects == 0 || !sub->rects)
-        return -1;
+
     ret = avctx->codec->encode_sub(avctx, buf, buf_size, sub);
     avctx->frame_number++;
     return ret;
 }
 
+/**
+ * Attempt to guess proper monotonic timestamps for decoded video frames
+ * which might have incorrect times. Input timestamps may wrap around, in
+ * which case the output will as well.
+ *
+ * @param pts the pts field of the decoded AVPacket, as passed through
+ * AVFrame.pkt_pts
+ * @param dts the dts field of the decoded AVPacket
+ * @return one of the input values, may be AV_NOPTS_VALUE
+ */
+static int64_t guess_correct_pts(AVCodecContext *ctx,
+                                 int64_t reordered_pts, int64_t dts)
+{
+    int64_t pts = AV_NOPTS_VALUE;
+
+    if (dts != AV_NOPTS_VALUE) {
+        ctx->pts_correction_num_faulty_dts += dts <= ctx->pts_correction_last_dts;
+        ctx->pts_correction_last_dts = dts;
+    } else if (reordered_pts != AV_NOPTS_VALUE)
+        ctx->pts_correction_last_dts = reordered_pts;
+
+    if (reordered_pts != AV_NOPTS_VALUE) {
+        ctx->pts_correction_num_faulty_pts += reordered_pts <= ctx->pts_correction_last_pts;
+        ctx->pts_correction_last_pts = reordered_pts;
+    } else if(dts != AV_NOPTS_VALUE)
+        ctx->pts_correction_last_pts = dts;
+
+    if ((ctx->pts_correction_num_faulty_pts<=ctx->pts_correction_num_faulty_dts || dts == AV_NOPTS_VALUE)
+       && reordered_pts != AV_NOPTS_VALUE)
+        pts = reordered_pts;
+    else
+        pts = dts;
+
+    return pts;
+}
+
 static int apply_param_change(AVCodecContext *avctx, AVPacket *avpkt)
 {
     int size = 0, ret;
@@ -1531,6 +2189,18 @@ fail:
     return AVERROR_INVALIDDATA;
 }
 
+static int add_metadata_from_side_data(AVCodecContext *avctx, AVFrame *frame)
+{
+    int size;
+    const uint8_t *side_metadata;
+
+    AVDictionary **frame_md = avpriv_frame_get_metadatap(frame);
+
+    side_metadata = av_packet_get_side_data(avctx->internal->pkt,
+                                            AV_PKT_DATA_STRINGS_METADATA, &size);
+    return av_packet_unpack_dictionary(side_metadata, size, frame_md);
+}
+
 static int unrefcount_frame(AVCodecInternal *avci, AVFrame *frame)
 {
     int ret;
@@ -1549,7 +2219,7 @@ static int unrefcount_frame(AVCodecInternal *avci, AVFrame *frame)
     memcpy(frame->data,     avci->to_free->data,     sizeof(frame->data));
     memcpy(frame->linesize, avci->to_free->linesize, sizeof(frame->linesize));
     if (avci->to_free->extended_data != avci->to_free->data) {
-        int planes = av_get_channel_layout_nb_channels(avci->to_free->channel_layout);
+        int planes = av_frame_get_channels(avci->to_free);
         int size   = planes * sizeof(*frame->extended_data);
 
         if (!size) {
@@ -1572,50 +2242,75 @@ static int unrefcount_frame(AVCodecInternal *avci, AVFrame *frame)
     frame->height         = avci->to_free->height;
     frame->channel_layout = avci->to_free->channel_layout;
     frame->nb_samples     = avci->to_free->nb_samples;
+    av_frame_set_channels(frame, av_frame_get_channels(avci->to_free));
 
     return 0;
 }
 
 int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
                                               int *got_picture_ptr,
-                                              AVPacket *avpkt)
+                                              const AVPacket *avpkt)
 {
     AVCodecInternal *avci = avctx->internal;
     int ret;
+    // copy to ensure we do not change avpkt
+    AVPacket tmp = *avpkt;
+
+    if (!avctx->codec)
+        return AVERROR(EINVAL);
+    if (avctx->codec->type != AVMEDIA_TYPE_VIDEO) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid media type for video\n");
+        return AVERROR(EINVAL);
+    }
 
     *got_picture_ptr = 0;
     if ((avctx->coded_width || avctx->coded_height) && av_image_check_size(avctx->coded_width, avctx->coded_height, 0, avctx))
-        return -1;
-
-    avctx->internal->pkt = avpkt;
-    ret = apply_param_change(avctx, avpkt);
-    if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error applying parameter changes.\n");
-        if (avctx->err_recognition & AV_EF_EXPLODE)
-            return ret;
-    }
+        return AVERROR(EINVAL);
 
     av_frame_unref(picture);
 
     if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type & FF_THREAD_FRAME)) {
+        int did_split = av_packet_split_side_data(&tmp);
+        ret = apply_param_change(avctx, &tmp);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Error applying parameter changes.\n");
+            if (avctx->err_recognition & AV_EF_EXPLODE)
+                goto fail;
+        }
+
+        avctx->internal->pkt = &tmp;
         if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
             ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr,
-                                         avpkt);
+                                         &tmp);
         else {
             ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
-                                       avpkt);
+                                       &tmp);
             picture->pkt_dts = avpkt->dts;
+
+            if(!avctx->has_b_frames){
+                av_frame_set_pkt_pos(picture, avpkt->pos);
+            }
+            //FIXME these should be under if(!avctx->has_b_frames)
             /* get_buffer is supposed to set frame parameters */
             if (!(avctx->codec->capabilities & CODEC_CAP_DR1)) {
-                picture->sample_aspect_ratio = avctx->sample_aspect_ratio;
-                picture->width               = avctx->width;
-                picture->height              = avctx->height;
-                picture->format              = avctx->pix_fmt;
+                if (!picture->sample_aspect_ratio.num)    picture->sample_aspect_ratio = avctx->sample_aspect_ratio;
+                if (!picture->width)                      picture->width               = avctx->width;
+                if (!picture->height)                     picture->height              = avctx->height;
+                if (picture->format == AV_PIX_FMT_NONE)   picture->format              = avctx->pix_fmt;
             }
         }
+        add_metadata_from_side_data(avctx, picture);
 
+fail:
         emms_c(); //needed to avoid an emms_c() call before every return;
 
+        avctx->internal->pkt = NULL;
+        if (did_split) {
+            av_packet_free_side_data(&tmp);
+            if(ret == tmp.size)
+                ret = avpkt->size;
+        }
+
         if (*got_picture_ptr) {
             if (!avctx->refcounted_frames) {
                 int err = unrefcount_frame(avci, picture);
@@ -1624,48 +2319,199 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
             }
 
             avctx->frame_number++;
+            av_frame_set_best_effort_timestamp(picture,
+                                               guess_correct_pts(avctx,
+                                                                 picture->pkt_pts,
+                                                                 picture->pkt_dts));
         } else
             av_frame_unref(picture);
     } else
         ret = 0;
 
+    /* many decoders assign whole AVFrames, thus overwriting extended_data;
+     * make sure it's set correctly */
+    av_assert0(!picture->extended_data || picture->extended_data == picture->data);
+
     return ret;
 }
 
+#if FF_API_OLD_DECODE_AUDIO
+int attribute_align_arg avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
+                                              int *frame_size_ptr,
+                                              AVPacket *avpkt)
+{
+    AVFrame *frame = av_frame_alloc();
+    int ret, got_frame = 0;
+
+    if (!frame)
+        return AVERROR(ENOMEM);
+    if (avctx->get_buffer != avcodec_default_get_buffer) {
+        av_log(avctx, AV_LOG_ERROR, "Custom get_buffer() for use with"
+                                    "avcodec_decode_audio3() detected. Overriding with avcodec_default_get_buffer\n");
+        av_log(avctx, AV_LOG_ERROR, "Please port your application to "
+                                    "avcodec_decode_audio4()\n");
+        avctx->get_buffer = avcodec_default_get_buffer;
+        avctx->release_buffer = avcodec_default_release_buffer;
+    }
+
+    ret = avcodec_decode_audio4(avctx, frame, &got_frame, avpkt);
+
+    if (ret >= 0 && got_frame) {
+        int ch, plane_size;
+        int planar    = av_sample_fmt_is_planar(avctx->sample_fmt);
+        int data_size = av_samples_get_buffer_size(&plane_size, avctx->channels,
+                                                   frame->nb_samples,
+                                                   avctx->sample_fmt, 1);
+        if (*frame_size_ptr < data_size) {
+            av_log(avctx, AV_LOG_ERROR, "output buffer size is too small for "
+                                        "the current frame (%d < %d)\n", *frame_size_ptr, data_size);
+            av_frame_free(&frame);
+            return AVERROR(EINVAL);
+        }
+
+        memcpy(samples, frame->extended_data[0], plane_size);
+
+        if (planar && avctx->channels > 1) {
+            uint8_t *out = ((uint8_t *)samples) + plane_size;
+            for (ch = 1; ch < avctx->channels; ch++) {
+                memcpy(out, frame->extended_data[ch], plane_size);
+                out += plane_size;
+            }
+        }
+        *frame_size_ptr = data_size;
+    } else {
+        *frame_size_ptr = 0;
+    }
+    av_frame_free(&frame);
+    return ret;
+}
+
+#endif
+
 int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
                                               AVFrame *frame,
                                               int *got_frame_ptr,
-                                              AVPacket *avpkt)
+                                              const AVPacket *avpkt)
 {
     AVCodecInternal *avci = avctx->internal;
     int ret = 0;
 
     *got_frame_ptr = 0;
 
-    avctx->internal->pkt = avpkt;
-
     if (!avpkt->data && avpkt->size) {
         av_log(avctx, AV_LOG_ERROR, "invalid packet: NULL data, size != 0\n");
         return AVERROR(EINVAL);
     }
-
-    ret = apply_param_change(avctx, avpkt);
-    if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error applying parameter changes.\n");
-        if (avctx->err_recognition & AV_EF_EXPLODE)
-            return ret;
+    if (!avctx->codec)
+        return AVERROR(EINVAL);
+    if (avctx->codec->type != AVMEDIA_TYPE_AUDIO) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid media type for audio\n");
+        return AVERROR(EINVAL);
     }
 
     av_frame_unref(frame);
 
-    if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size) {
-        ret = avctx->codec->decode(avctx, frame, got_frame_ptr, avpkt);
+    if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type & FF_THREAD_FRAME)) {
+        uint8_t *side;
+        int side_size;
+        uint32_t discard_padding = 0;
+        // copy to ensure we do not change avpkt
+        AVPacket tmp = *avpkt;
+        int did_split = av_packet_split_side_data(&tmp);
+        ret = apply_param_change(avctx, &tmp);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Error applying parameter changes.\n");
+            if (avctx->err_recognition & AV_EF_EXPLODE)
+                goto fail;
+        }
+
+        avctx->internal->pkt = &tmp;
+        if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
+            ret = ff_thread_decode_frame(avctx, frame, got_frame_ptr, &tmp);
+        else {
+            ret = avctx->codec->decode(avctx, frame, got_frame_ptr, &tmp);
+            frame->pkt_dts = avpkt->dts;
+        }
         if (ret >= 0 && *got_frame_ptr) {
+            add_metadata_from_side_data(avctx, frame);
             avctx->frame_number++;
-            frame->pkt_dts = avpkt->dts;
+            av_frame_set_best_effort_timestamp(frame,
+                                               guess_correct_pts(avctx,
+                                                                 frame->pkt_pts,
+                                                                 frame->pkt_dts));
             if (frame->format == AV_SAMPLE_FMT_NONE)
                 frame->format = avctx->sample_fmt;
+            if (!frame->channel_layout)
+                frame->channel_layout = avctx->channel_layout;
+            if (!av_frame_get_channels(frame))
+                av_frame_set_channels(frame, avctx->channels);
+            if (!frame->sample_rate)
+                frame->sample_rate = avctx->sample_rate;
+        }
 
+        side= av_packet_get_side_data(avctx->internal->pkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
+        if(side && side_size>=10) {
+            avctx->internal->skip_samples = AV_RL32(side);
+            av_log(avctx, AV_LOG_DEBUG, "skip %d samples due to side data\n",
+                   avctx->internal->skip_samples);
+            discard_padding = AV_RL32(side + 4);
+        }
+        if (avctx->internal->skip_samples && *got_frame_ptr) {
+            if(frame->nb_samples <= avctx->internal->skip_samples){
+                *got_frame_ptr = 0;
+                avctx->internal->skip_samples -= frame->nb_samples;
+                av_log(avctx, AV_LOG_DEBUG, "skip whole frame, skip left: %d\n",
+                       avctx->internal->skip_samples);
+            } else {
+                av_samples_copy(frame->extended_data, frame->extended_data, 0, avctx->internal->skip_samples,
+                                frame->nb_samples - avctx->internal->skip_samples, avctx->channels, frame->format);
+                if(avctx->pkt_timebase.num && avctx->sample_rate) {
+                    int64_t diff_ts = av_rescale_q(avctx->internal->skip_samples,
+                                                   (AVRational){1, avctx->sample_rate},
+                                                   avctx->pkt_timebase);
+                    if(frame->pkt_pts!=AV_NOPTS_VALUE)
+                        frame->pkt_pts += diff_ts;
+                    if(frame->pkt_dts!=AV_NOPTS_VALUE)
+                        frame->pkt_dts += diff_ts;
+                    if (av_frame_get_pkt_duration(frame) >= diff_ts)
+                        av_frame_set_pkt_duration(frame, av_frame_get_pkt_duration(frame) - diff_ts);
+                } else {
+                    av_log(avctx, AV_LOG_WARNING, "Could not update timestamps for skipped samples.\n");
+                }
+                av_log(avctx, AV_LOG_DEBUG, "skip %d/%d samples\n",
+                       avctx->internal->skip_samples, frame->nb_samples);
+                frame->nb_samples -= avctx->internal->skip_samples;
+                avctx->internal->skip_samples = 0;
+            }
+        }
+
+        if (discard_padding > 0 && discard_padding <= frame->nb_samples && *got_frame_ptr) {
+            if (discard_padding == frame->nb_samples) {
+                *got_frame_ptr = 0;
+            } else {
+                if(avctx->pkt_timebase.num && avctx->sample_rate) {
+                    int64_t diff_ts = av_rescale_q(frame->nb_samples - discard_padding,
+                                                   (AVRational){1, avctx->sample_rate},
+                                                   avctx->pkt_timebase);
+                    if (av_frame_get_pkt_duration(frame) >= diff_ts)
+                        av_frame_set_pkt_duration(frame, av_frame_get_pkt_duration(frame) - diff_ts);
+                } else {
+                    av_log(avctx, AV_LOG_WARNING, "Could not update timestamps for discarded samples.\n");
+                }
+                av_log(avctx, AV_LOG_DEBUG, "discard %d/%d samples\n",
+                       discard_padding, frame->nb_samples);
+                frame->nb_samples -= discard_padding;
+            }
+        }
+fail:
+        avctx->internal->pkt = NULL;
+        if (did_split) {
+            av_packet_free_side_data(&tmp);
+            if(ret == tmp.size)
+                ret = avpkt->size;
+        }
+
+        if (ret >= 0 && *got_frame_ptr) {
             if (!avctx->refcounted_frames) {
                 int err = unrefcount_frame(avci, frame);
                 if (err < 0)
@@ -1675,21 +2521,178 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
             av_frame_unref(frame);
     }
 
+    return ret;
+}
+
+#define UTF8_MAX_BYTES 4 /* 5 and 6 bytes sequences should not be used */
+static int recode_subtitle(AVCodecContext *avctx,
+                           AVPacket *outpkt, const AVPacket *inpkt)
+{
+#if CONFIG_ICONV
+    iconv_t cd = (iconv_t)-1;
+    int ret = 0;
+    char *inb, *outb;
+    size_t inl, outl;
+    AVPacket tmp;
+#endif
+
+    if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_PRE_DECODER || inpkt->size == 0)
+        return 0;
+
+#if CONFIG_ICONV
+    cd = iconv_open("UTF-8", avctx->sub_charenc);
+    av_assert0(cd != (iconv_t)-1);
+
+    inb = inpkt->data;
+    inl = inpkt->size;
+
+    if (inl >= INT_MAX / UTF8_MAX_BYTES - FF_INPUT_BUFFER_PADDING_SIZE) {
+        av_log(avctx, AV_LOG_ERROR, "Subtitles packet is too big for recoding\n");
+        ret = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    ret = av_new_packet(&tmp, inl * UTF8_MAX_BYTES);
+    if (ret < 0)
+        goto end;
+    outpkt->buf  = tmp.buf;
+    outpkt->data = tmp.data;
+    outpkt->size = tmp.size;
+    outb = outpkt->data;
+    outl = outpkt->size;
+
+    if (iconv(cd, &inb, &inl, &outb, &outl) == (size_t)-1 ||
+        iconv(cd, NULL, NULL, &outb, &outl) == (size_t)-1 ||
+        outl >= outpkt->size || inl != 0) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to recode subtitle event \"%s\" "
+               "from %s to UTF-8\n", inpkt->data, avctx->sub_charenc);
+        av_free_packet(&tmp);
+        ret = AVERROR(errno);
+        goto end;
+    }
+    outpkt->size -= outl;
+    memset(outpkt->data + outpkt->size, 0, outl);
 
+end:
+    if (cd != (iconv_t)-1)
+        iconv_close(cd);
     return ret;
+#else
+    av_log(avctx, AV_LOG_ERROR, "requesting subtitles recoding without iconv");
+    return AVERROR(EINVAL);
+#endif
+}
+
+static int utf8_check(const uint8_t *str)
+{
+    const uint8_t *byte;
+    uint32_t codepoint, min;
+
+    while (*str) {
+        byte = str;
+        GET_UTF8(codepoint, *(byte++), return 0;);
+        min = byte - str == 1 ? 0 : byte - str == 2 ? 0x80 :
+              1 << (5 * (byte - str) - 4);
+        if (codepoint < min || codepoint >= 0x110000 ||
+            codepoint == 0xFFFE /* BOM */ ||
+            codepoint >= 0xD800 && codepoint <= 0xDFFF /* surrogates */)
+            return 0;
+        str = byte;
+    }
+    return 1;
 }
 
 int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                              int *got_sub_ptr,
                              AVPacket *avpkt)
 {
-    int ret;
+    int i, ret = 0;
+
+    if (!avpkt->data && avpkt->size) {
+        av_log(avctx, AV_LOG_ERROR, "invalid packet: NULL data, size != 0\n");
+        return AVERROR(EINVAL);
+    }
+    if (!avctx->codec)
+        return AVERROR(EINVAL);
+    if (avctx->codec->type != AVMEDIA_TYPE_SUBTITLE) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid media type for subtitles\n");
+        return AVERROR(EINVAL);
+    }
 
-    avctx->internal->pkt = avpkt;
     *got_sub_ptr = 0;
-    ret = avctx->codec->decode(avctx, sub, got_sub_ptr, avpkt);
-    if (*got_sub_ptr)
-        avctx->frame_number++;
+    avcodec_get_subtitle_defaults(sub);
+
+    if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size) {
+        AVPacket pkt_recoded;
+        AVPacket tmp = *avpkt;
+        int did_split = av_packet_split_side_data(&tmp);
+        //apply_param_change(avctx, &tmp);
+
+        if (did_split) {
+            /* FFMIN() prevents overflow in case the packet wasn't allocated with
+             * proper padding.
+             * If the side data is smaller than the buffer padding size, the
+             * remaining bytes should have already been filled with zeros by the
+             * original packet allocation anyway. */
+            memset(tmp.data + tmp.size, 0,
+                   FFMIN(avpkt->size - tmp.size, FF_INPUT_BUFFER_PADDING_SIZE));
+        }
+
+        pkt_recoded = tmp;
+        ret = recode_subtitle(avctx, &pkt_recoded, &tmp);
+        if (ret < 0) {
+            *got_sub_ptr = 0;
+        } else {
+            avctx->internal->pkt = &pkt_recoded;
+
+            if (avctx->pkt_timebase.den && avpkt->pts != AV_NOPTS_VALUE)
+                sub->pts = av_rescale_q(avpkt->pts,
+                                        avctx->pkt_timebase, AV_TIME_BASE_Q);
+            ret = avctx->codec->decode(avctx, sub, got_sub_ptr, &pkt_recoded);
+            av_assert1((ret >= 0) >= !!*got_sub_ptr &&
+                       !!*got_sub_ptr >= !!sub->num_rects);
+
+            if (sub->num_rects && !sub->end_display_time && avpkt->duration &&
+                avctx->pkt_timebase.num) {
+                AVRational ms = { 1, 1000 };
+                sub->end_display_time = av_rescale_q(avpkt->duration,
+                                                     avctx->pkt_timebase, ms);
+            }
+
+            for (i = 0; i < sub->num_rects; i++) {
+                if (sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) {
+                    av_log(avctx, AV_LOG_ERROR,
+                           "Invalid UTF-8 in decoded subtitles text; "
+                           "maybe missing -sub_charenc option\n");
+                    avsubtitle_free(sub);
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+
+            if (tmp.data != pkt_recoded.data) { // did we recode?
+                /* prevent from destroying side data from original packet */
+                pkt_recoded.side_data = NULL;
+                pkt_recoded.side_data_elems = 0;
+
+                av_free_packet(&pkt_recoded);
+            }
+            if (avctx->codec_descriptor->props & AV_CODEC_PROP_BITMAP_SUB)
+                sub->format = 0;
+            else if (avctx->codec_descriptor->props & AV_CODEC_PROP_TEXT_SUB)
+                sub->format = 1;
+            avctx->internal->pkt = NULL;
+        }
+
+        if (did_split) {
+            av_packet_free_side_data(&tmp);
+            if(ret == tmp.size)
+                ret = avpkt->size;
+        }
+
+        if (*got_sub_ptr)
+            avctx->frame_number++;
+    }
+
     return ret;
 }
 
@@ -1714,14 +2717,23 @@ void avsubtitle_free(AVSubtitle *sub)
 
 av_cold int avcodec_close(AVCodecContext *avctx)
 {
+    if (!avctx)
+        return 0;
+
     if (avcodec_is_open(avctx)) {
         FramePool *pool = avctx->internal->pool;
         int i;
+        if (CONFIG_FRAME_THREAD_ENCODER &&
+            avctx->internal->frame_thread_encoder && avctx->thread_count > 1) {
+            ff_frame_thread_encoder_free(avctx);
+        }
         if (HAVE_THREADS && avctx->internal->thread_ctx)
             ff_thread_free(avctx);
         if (avctx->codec && avctx->codec->close)
             avctx->codec->close(avctx);
         avctx->coded_frame = NULL;
+        avctx->internal->byte_buffer_size = 0;
+        av_freep(&avctx->internal->byte_buffer);
         av_frame_free(&avctx->internal->to_free);
         for (i = 0; i < FF_ARRAY_ELEMS(pool->pools); i++)
             av_buffer_pool_uninit(&pool->pools[i]);
@@ -1746,10 +2758,39 @@ av_cold int avcodec_close(AVCodecContext *avctx)
     return 0;
 }
 
+static enum AVCodecID remap_deprecated_codec_id(enum AVCodecID id)
+{
+    switch(id){
+        //This is for future deprecatec codec ids, its empty since
+        //last major bump but will fill up again over time, please don't remove it
+//         case AV_CODEC_ID_UTVIDEO_DEPRECATED: return AV_CODEC_ID_UTVIDEO;
+        case AV_CODEC_ID_BRENDER_PIX_DEPRECATED         : return AV_CODEC_ID_BRENDER_PIX;
+        case AV_CODEC_ID_OPUS_DEPRECATED                : return AV_CODEC_ID_OPUS;
+        case AV_CODEC_ID_TAK_DEPRECATED                 : return AV_CODEC_ID_TAK;
+        case AV_CODEC_ID_PAF_AUDIO_DEPRECATED           : return AV_CODEC_ID_PAF_AUDIO;
+        case AV_CODEC_ID_PCM_S24LE_PLANAR_DEPRECATED    : return AV_CODEC_ID_PCM_S24LE_PLANAR;
+        case AV_CODEC_ID_PCM_S32LE_PLANAR_DEPRECATED    : return AV_CODEC_ID_PCM_S32LE_PLANAR;
+        case AV_CODEC_ID_ADPCM_VIMA_DEPRECATED          : return AV_CODEC_ID_ADPCM_VIMA;
+        case AV_CODEC_ID_ESCAPE130_DEPRECATED           : return AV_CODEC_ID_ESCAPE130;
+        case AV_CODEC_ID_EXR_DEPRECATED                 : return AV_CODEC_ID_EXR;
+        case AV_CODEC_ID_G2M_DEPRECATED                 : return AV_CODEC_ID_G2M;
+        case AV_CODEC_ID_PAF_VIDEO_DEPRECATED           : return AV_CODEC_ID_PAF_VIDEO;
+        case AV_CODEC_ID_WEBP_DEPRECATED                : return AV_CODEC_ID_WEBP;
+        case AV_CODEC_ID_HEVC_DEPRECATED                : return AV_CODEC_ID_HEVC;
+        case AV_CODEC_ID_MVC1_DEPRECATED                : return AV_CODEC_ID_MVC1;
+        case AV_CODEC_ID_MVC2_DEPRECATED                : return AV_CODEC_ID_MVC2;
+        case AV_CODEC_ID_SANM_DEPRECATED                : return AV_CODEC_ID_SANM;
+        case AV_CODEC_ID_SGIRLE_DEPRECATED              : return AV_CODEC_ID_SGIRLE;
+        case AV_CODEC_ID_VP7_DEPRECATED                 : return AV_CODEC_ID_VP7;
+        default                                         : return id;
+    }
+}
+
 static AVCodec *find_encdec(enum AVCodecID id, int encoder)
 {
     AVCodec *p, *experimental = NULL;
     p = first_avcodec;
+    id= remap_deprecated_codec_id(id);
     while (p) {
         if ((encoder ? av_codec_is_encoder(p) : av_codec_is_decoder(p)) &&
             p->id == id) {
@@ -1801,27 +2842,24 @@ AVCodec *avcodec_find_decoder_by_name(const char *name)
     return NULL;
 }
 
-static int get_bit_rate(AVCodecContext *ctx)
+const char *avcodec_get_name(enum AVCodecID id)
 {
-    int bit_rate;
-    int bits_per_sample;
+    const AVCodecDescriptor *cd;
+    AVCodec *codec;
 
-    switch (ctx->codec_type) {
-    case AVMEDIA_TYPE_VIDEO:
-    case AVMEDIA_TYPE_DATA:
-    case AVMEDIA_TYPE_SUBTITLE:
-    case AVMEDIA_TYPE_ATTACHMENT:
-        bit_rate = ctx->bit_rate;
-        break;
-    case AVMEDIA_TYPE_AUDIO:
-        bits_per_sample = av_get_bits_per_sample(ctx->codec_id);
-        bit_rate = bits_per_sample ? ctx->sample_rate * ctx->channels * bits_per_sample : ctx->bit_rate;
-        break;
-    default:
-        bit_rate = 0;
-        break;
-    }
-    return bit_rate;
+    if (id == AV_CODEC_ID_NONE)
+        return "none";
+    cd = avcodec_descriptor_get(id);
+    if (cd)
+        return cd->name;
+    av_log(NULL, AV_LOG_WARNING, "Codec 0x%x is not in the full list.\n", id);
+    codec = avcodec_find_decoder(id);
+    if (codec)
+        return codec->name;
+    codec = avcodec_find_encoder(id);
+    if (codec)
+        return codec->name;
+    return "unknown_codec";
 }
 
 size_t av_get_codec_tag_string(char *buf, size_t buf_size, unsigned int codec_tag)
@@ -1831,7 +2869,7 @@ size_t av_get_codec_tag_string(char *buf, size_t buf_size, unsigned int codec_ta
 #define TAG_PRINT(x)                                              \
     (((x) >= '0' && (x) <= '9') ||                                \
      ((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z') ||  \
-     ((x) == '.' || (x) == ' '))
+     ((x) == '.' || (x) == ' ' || (x) == '-' || (x) == '_'))
 
     for (i = 0; i < 4; i++) {
         len = snprintf(buf, buf_size,
@@ -1846,47 +2884,66 @@ size_t av_get_codec_tag_string(char *buf, size_t buf_size, unsigned int codec_ta
 
 void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
 {
+    const char *codec_type;
     const char *codec_name;
     const char *profile = NULL;
     const AVCodec *p;
-    char buf1[32];
     int bitrate;
     AVRational display_aspect_ratio;
 
-    if (enc->codec)
-        p = enc->codec;
-    else if (encode)
-        p = avcodec_find_encoder(enc->codec_id);
-    else
-        p = avcodec_find_decoder(enc->codec_id);
-
-    if (p) {
-        codec_name = p->name;
-        profile = av_get_profile_name(p, enc->profile);
-    } else if (enc->codec_id == AV_CODEC_ID_MPEG2TS) {
-        /* fake mpeg2 transport stream codec (currently not
-         * registered) */
-        codec_name = "mpeg2ts";
-    } else {
-        /* output avi tags */
+    if (!buf || buf_size <= 0)
+        return;
+    codec_type = av_get_media_type_string(enc->codec_type);
+    codec_name = avcodec_get_name(enc->codec_id);
+    if (enc->profile != FF_PROFILE_UNKNOWN) {
+        if (enc->codec)
+            p = enc->codec;
+        else
+            p = encode ? avcodec_find_encoder(enc->codec_id) :
+                        avcodec_find_decoder(enc->codec_id);
+        if (p)
+            profile = av_get_profile_name(p, enc->profile);
+    }
+
+    snprintf(buf, buf_size, "%s: %s", codec_type ? codec_type : "unknown",
+             codec_name);
+    buf[0] ^= 'a' ^ 'A'; /* first letter in uppercase */
+
+    if (enc->codec && strcmp(enc->codec->name, codec_name))
+        snprintf(buf + strlen(buf), buf_size - strlen(buf), " (%s)", enc->codec->name);
+
+    if (profile)
+        snprintf(buf + strlen(buf), buf_size - strlen(buf), " (%s)", profile);
+    if (enc->codec_tag) {
         char tag_buf[32];
         av_get_codec_tag_string(tag_buf, sizeof(tag_buf), enc->codec_tag);
-        snprintf(buf1, sizeof(buf1), "%s / 0x%04X", tag_buf, enc->codec_tag);
-        codec_name = buf1;
+        snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                 " (%s / 0x%04X)", tag_buf, enc->codec_tag);
     }
 
     switch (enc->codec_type) {
     case AVMEDIA_TYPE_VIDEO:
-        snprintf(buf, buf_size,
-                 "Video: %s%s",
-                 codec_name, enc->mb_decision ? " (hq)" : "");
-        if (profile)
-            snprintf(buf + strlen(buf), buf_size - strlen(buf),
-                     " (%s)", profile);
         if (enc->pix_fmt != AV_PIX_FMT_NONE) {
+            char detail[256] = "(";
+            const char *colorspace_name;
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
                      ", %s",
                      av_get_pix_fmt_name(enc->pix_fmt));
+            if (enc->bits_per_raw_sample &&
+                enc->bits_per_raw_sample <= av_pix_fmt_desc_get(enc->pix_fmt)->comp[0].depth_minus1)
+                av_strlcatf(detail, sizeof(detail), "%d bpc, ", enc->bits_per_raw_sample);
+            if (enc->color_range != AVCOL_RANGE_UNSPECIFIED)
+                av_strlcatf(detail, sizeof(detail),
+                            enc->color_range == AVCOL_RANGE_MPEG ? "tv, ": "pc, ");
+
+            colorspace_name = av_get_colorspace_name(enc->colorspace);
+            if (colorspace_name)
+                av_strlcatf(detail, sizeof(detail), "%s, ", colorspace_name);
+
+            if (strlen(detail) > 1) {
+                detail[strlen(detail) - 2] = 0;
+                av_strlcatf(buf, buf_size, "%s)", detail);
+            }
         }
         if (enc->width) {
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
@@ -1898,7 +2955,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
                           enc->height * enc->sample_aspect_ratio.den,
                           1024 * 1024);
                 snprintf(buf + strlen(buf), buf_size - strlen(buf),
-                         " [PAR %d:%d DAR %d:%d]",
+                         " [SAR %d:%d DAR %d:%d]",
                          enc->sample_aspect_ratio.num, enc->sample_aspect_ratio.den,
                          display_aspect_ratio.num, display_aspect_ratio.den);
             }
@@ -1915,12 +2972,6 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
         }
         break;
     case AVMEDIA_TYPE_AUDIO:
-        snprintf(buf, buf_size,
-                 "Audio: %s",
-                 codec_name);
-        if (profile)
-            snprintf(buf + strlen(buf), buf_size - strlen(buf),
-                     " (%s)", profile);
         if (enc->sample_rate) {
             snprintf(buf + strlen(buf), buf_size - strlen(buf),
                      ", %d Hz", enc->sample_rate);
@@ -1933,16 +2984,20 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
         }
         break;
     case AVMEDIA_TYPE_DATA:
-        snprintf(buf, buf_size, "Data: %s", codec_name);
+        if (av_log_get_level() >= AV_LOG_DEBUG) {
+            int g = av_gcd(enc->time_base.num, enc->time_base.den);
+            if (g)
+                snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                         ", %d/%d",
+                         enc->time_base.num / g, enc->time_base.den / g);
+        }
         break;
     case AVMEDIA_TYPE_SUBTITLE:
-        snprintf(buf, buf_size, "Subtitle: %s", codec_name);
-        break;
-    case AVMEDIA_TYPE_ATTACHMENT:
-        snprintf(buf, buf_size, "Attachment: %s", codec_name);
+        if (enc->width)
+            snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                     ", %dx%d", enc->width, enc->height);
         break;
     default:
-        snprintf(buf, buf_size, "Invalid Codec type %d", enc->codec_type);
         return;
     }
     if (encode) {
@@ -1957,6 +3012,9 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
     if (bitrate != 0) {
         snprintf(buf + strlen(buf), buf_size - strlen(buf),
                  ", %d kb/s", bitrate / 1000);
+    } else if (enc->rc_max_rate > 0) {
+        snprintf(buf + strlen(buf), buf_size - strlen(buf),
+                 ", max. %d kb/s", enc->rc_max_rate / 1000);
     }
 }
 
@@ -1975,18 +3033,30 @@ const char *av_get_profile_name(const AVCodec *codec, int profile)
 
 unsigned avcodec_version(void)
 {
+//    av_assert0(AV_CODEC_ID_V410==164);
+    av_assert0(AV_CODEC_ID_PCM_S8_PLANAR==65563);
+    av_assert0(AV_CODEC_ID_ADPCM_G722==69660);
+//     av_assert0(AV_CODEC_ID_BMV_AUDIO==86071);
+    av_assert0(AV_CODEC_ID_SRT==94216);
+    av_assert0(LIBAVCODEC_VERSION_MICRO >= 100);
+
+    av_assert0(CODEC_ID_CLLC == AV_CODEC_ID_CLLC);
+    av_assert0(CODEC_ID_PCM_S8_PLANAR == AV_CODEC_ID_PCM_S8_PLANAR);
+    av_assert0(CODEC_ID_ADPCM_IMA_APC == AV_CODEC_ID_ADPCM_IMA_APC);
+    av_assert0(CODEC_ID_ILBC == AV_CODEC_ID_ILBC);
+    av_assert0(CODEC_ID_SRT == AV_CODEC_ID_SRT);
     return LIBAVCODEC_VERSION_INT;
 }
 
 const char *avcodec_configuration(void)
 {
-    return LIBAV_CONFIGURATION;
+    return FFMPEG_CONFIGURATION;
 }
 
 const char *avcodec_license(void)
 {
 #define LICENSE_PREFIX "libavcodec license: "
-    return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 }
 
 void avcodec_flush_buffers(AVCodecContext *avctx)
@@ -1996,6 +3066,9 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
     else if (avctx->codec->flush)
         avctx->codec->flush(avctx);
 
+    avctx->pts_correction_last_pts =
+    avctx->pts_correction_last_dts = INT64_MIN;
+
     if (!avctx->refcounted_frames)
         av_frame_unref(avctx->internal->to_free);
 }
@@ -2003,20 +3076,29 @@ void avcodec_flush_buffers(AVCodecContext *avctx)
 int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
 {
     switch (codec_id) {
+    case AV_CODEC_ID_8SVX_EXP:
+    case AV_CODEC_ID_8SVX_FIB:
     case AV_CODEC_ID_ADPCM_CT:
     case AV_CODEC_ID_ADPCM_IMA_APC:
     case AV_CODEC_ID_ADPCM_IMA_EA_SEAD:
+    case AV_CODEC_ID_ADPCM_IMA_OKI:
     case AV_CODEC_ID_ADPCM_IMA_WS:
     case AV_CODEC_ID_ADPCM_G722:
     case AV_CODEC_ID_ADPCM_YAMAHA:
         return 4;
+    case AV_CODEC_ID_DSD_LSBF:
+    case AV_CODEC_ID_DSD_MSBF:
+    case AV_CODEC_ID_DSD_LSBF_PLANAR:
+    case AV_CODEC_ID_DSD_MSBF_PLANAR:
     case AV_CODEC_ID_PCM_ALAW:
     case AV_CODEC_ID_PCM_MULAW:
     case AV_CODEC_ID_PCM_S8:
+    case AV_CODEC_ID_PCM_S8_PLANAR:
     case AV_CODEC_ID_PCM_U8:
     case AV_CODEC_ID_PCM_ZORK:
         return 8;
     case AV_CODEC_ID_PCM_S16BE:
+    case AV_CODEC_ID_PCM_S16BE_PLANAR:
     case AV_CODEC_ID_PCM_S16LE:
     case AV_CODEC_ID_PCM_S16LE_PLANAR:
     case AV_CODEC_ID_PCM_U16BE:
@@ -2045,6 +3127,27 @@ int av_get_exact_bits_per_sample(enum AVCodecID codec_id)
     }
 }
 
+enum AVCodecID av_get_pcm_codec(enum AVSampleFormat fmt, int be)
+{
+    static const enum AVCodecID map[AV_SAMPLE_FMT_NB][2] = {
+        [AV_SAMPLE_FMT_U8  ] = { AV_CODEC_ID_PCM_U8,    AV_CODEC_ID_PCM_U8    },
+        [AV_SAMPLE_FMT_S16 ] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE },
+        [AV_SAMPLE_FMT_S32 ] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE },
+        [AV_SAMPLE_FMT_FLT ] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE },
+        [AV_SAMPLE_FMT_DBL ] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE },
+        [AV_SAMPLE_FMT_U8P ] = { AV_CODEC_ID_PCM_U8,    AV_CODEC_ID_PCM_U8    },
+        [AV_SAMPLE_FMT_S16P] = { AV_CODEC_ID_PCM_S16LE, AV_CODEC_ID_PCM_S16BE },
+        [AV_SAMPLE_FMT_S32P] = { AV_CODEC_ID_PCM_S32LE, AV_CODEC_ID_PCM_S32BE },
+        [AV_SAMPLE_FMT_FLTP] = { AV_CODEC_ID_PCM_F32LE, AV_CODEC_ID_PCM_F32BE },
+        [AV_SAMPLE_FMT_DBLP] = { AV_CODEC_ID_PCM_F64LE, AV_CODEC_ID_PCM_F64BE },
+    };
+    if (fmt < 0 || fmt >= AV_SAMPLE_FMT_NB)
+        return AV_CODEC_ID_NONE;
+    if (be < 0 || be > 1)
+        be = AV_NE(1, 0);
+    return map[fmt][be];
+}
+
 int av_get_bits_per_sample(enum AVCodecID codec_id)
 {
     switch (codec_id) {
@@ -2075,8 +3178,8 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
     bps = av_get_exact_bits_per_sample(avctx->codec_id);
 
     /* codecs with an exact constant bits per sample */
-    if (bps > 0 && ch > 0 && frame_bytes > 0)
-        return (frame_bytes * 8) / (bps * ch);
+    if (bps > 0 && ch > 0 && frame_bytes > 0 && ch < 32768 && bps < 32768)
+        return (frame_bytes * 8LL) / (bps * ch);
     bps = avctx->bits_per_coded_sample;
 
     /* codecs with a fixed packet duration */
@@ -2085,11 +3188,10 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
     case AV_CODEC_ID_ADPCM_IMA_QT: return   64;
     case AV_CODEC_ID_ADPCM_EA_XAS: return  128;
     case AV_CODEC_ID_AMR_NB:
+    case AV_CODEC_ID_EVRC:
     case AV_CODEC_ID_GSM:
     case AV_CODEC_ID_QCELP:
-    case AV_CODEC_ID_RA_144:
     case AV_CODEC_ID_RA_288:       return  160;
-    case AV_CODEC_ID_IMC:          return  256;
     case AV_CODEC_ID_AMR_WB:
     case AV_CODEC_ID_GSM_MS:       return  320;
     case AV_CODEC_ID_MP1:          return  384;
@@ -2135,6 +3237,10 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
             return 240 * (frame_bytes / 32);
         if (id == AV_CODEC_ID_NELLYMOSER)
             return 256 * (frame_bytes / 64);
+        if (id == AV_CODEC_ID_RA_144)
+            return 160 * (frame_bytes / 20);
+        if (id == AV_CODEC_ID_G723_1)
+            return 240 * (frame_bytes / 24);
 
         if (bps > 0) {
             /* calc from frame_bytes and bits_per_coded_sample */
@@ -2145,6 +3251,10 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
         if (ch > 0) {
             /* calc from frame_bytes and channels */
             switch (id) {
+            case AV_CODEC_ID_ADPCM_AFC:
+                return frame_bytes / (9 * ch) * 16;
+            case AV_CODEC_ID_ADPCM_DTK:
+                return frame_bytes / (16 * ch) * 28;
             case AV_CODEC_ID_ADPCM_4XM:
             case AV_CODEC_ID_ADPCM_IMA_ISS:
                 return (frame_bytes - 4 * ch) * 2 / ch;
@@ -2166,6 +3276,9 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
                 return 6 * frame_bytes / ch;
             case AV_CODEC_ID_PCM_LXF:
                 return 2 * (frame_bytes / (5 * ch));
+            case AV_CODEC_ID_IAC:
+            case AV_CODEC_ID_IMC:
+                return 4 * frame_bytes / ch;
             }
 
             if (tag) {
@@ -2183,11 +3296,15 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
                 int blocks = frame_bytes / ba;
                 switch (avctx->codec_id) {
                 case AV_CODEC_ID_ADPCM_IMA_WAV:
-                    return blocks * (1 + (ba - 4 * ch) / (4 * ch) * 8);
+                    if (bps < 2 || bps > 5)
+                        return 0;
+                    return blocks * (1 + (ba - 4 * ch) / (bps * ch) * 8);
                 case AV_CODEC_ID_ADPCM_IMA_DK3:
                     return blocks * (((ba - 16) * 2 / 3 * 4) / ch);
                 case AV_CODEC_ID_ADPCM_IMA_DK4:
                     return blocks * (1 + (ba - 4 * ch) * 2 / ch);
+                case AV_CODEC_ID_ADPCM_IMA_RAD:
+                    return blocks * ((ba - 4 * ch) * 2 / ch);
                 case AV_CODEC_ID_ADPCM_MS:
                     return blocks * (2 + (ba - 7 * ch) * 2 / ch);
                 }
@@ -2197,8 +3314,12 @@ int av_get_audio_frame_duration(AVCodecContext *avctx, int frame_bytes)
                 /* calc from frame_bytes, channels, and bits_per_coded_sample */
                 switch (avctx->codec_id) {
                 case AV_CODEC_ID_PCM_DVD:
+                    if(bps<4)
+                        return 0;
                     return 2 * (frame_bytes / ((bps * 2 / 8) * ch));
                 case AV_CODEC_ID_PCM_BLURAY:
+                    if(bps<4)
+                        return 0;
                     return frame_bytes / ((FFALIGN(ch, 2) * bps) / 8);
                 case AV_CODEC_ID_S302M:
                     return 2 * (frame_bytes / ((bps + 4) / 4)) / ch;
@@ -2243,7 +3364,7 @@ int ff_match_2uint16(const uint16_t(*tab)[2], int size, int a, int b)
 FF_DISABLE_DEPRECATION_WARNINGS
 void av_log_missing_feature(void *avc, const char *feature, int want_sample)
 {
-    av_log(avc, AV_LOG_WARNING, "%s is not implemented. Update your Libav "
+    av_log(avc, AV_LOG_WARNING, "%s is not implemented. Update your FFmpeg "
             "version to the newest one from Git. If the problem still "
             "occurs, it means that your file has a feature which has not "
             "been implemented.\n", feature);
@@ -2260,8 +3381,8 @@ void av_log_ask_for_sample(void *avc, const char *msg, ...)
     if (msg)
         av_vlog(avc, AV_LOG_WARNING, msg, argument_list);
     av_log(avc, AV_LOG_WARNING, "If you want to help, upload a sample "
-            "of this file to ftp://upload.libav.org/incoming/ "
-            "and contact the libav-devel mailing list.\n");
+            "of this file to ftp://upload.ffmpeg.org/incoming/ "
+            "and contact the ffmpeg-devel mailing list. (ffmpeg-devel@ffmpeg.org)\n");
 
     va_end(argument_list);
 }
@@ -2269,14 +3390,15 @@ FF_ENABLE_DEPRECATION_WARNINGS
 #endif /* FF_API_MISSING_SAMPLE */
 
 static AVHWAccel *first_hwaccel = NULL;
+static AVHWAccel **last_hwaccel = &first_hwaccel;
 
 void av_register_hwaccel(AVHWAccel *hwaccel)
 {
-    AVHWAccel **p = &first_hwaccel;
-    while (*p)
-        p = &(*p)->next;
-    *p = hwaccel;
+    AVHWAccel **p = last_hwaccel;
     hwaccel->next = NULL;
+    while(*p || avpriv_atomic_ptr_cas((void * volatile *)p, NULL, hwaccel))
+        p = &(*p)->next;
+    last_hwaccel = &hwaccel->next;
 }
 
 AVHWAccel *av_hwaccel_next(const AVHWAccel *hwaccel)
@@ -2304,6 +3426,38 @@ int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op))
     return 0;
 }
 
+int ff_lock_avcodec(AVCodecContext *log_ctx)
+{
+    if (lockmgr_cb) {
+        if ((*lockmgr_cb)(&codec_mutex, AV_LOCK_OBTAIN))
+            return -1;
+    }
+    entangled_thread_counter++;
+    if (entangled_thread_counter != 1) {
+        av_log(log_ctx, AV_LOG_ERROR, "Insufficient thread locking around avcodec_open/close()\n");
+        if (!lockmgr_cb)
+            av_log(log_ctx, AV_LOG_ERROR, "No lock manager is set, please see av_lockmgr_register()\n");
+        ff_avcodec_locked = 1;
+        ff_unlock_avcodec();
+        return AVERROR(EINVAL);
+    }
+    av_assert0(!ff_avcodec_locked);
+    ff_avcodec_locked = 1;
+    return 0;
+}
+
+int ff_unlock_avcodec(void)
+{
+    av_assert0(ff_avcodec_locked);
+    ff_avcodec_locked = 0;
+    entangled_thread_counter--;
+    if (lockmgr_cb) {
+        if ((*lockmgr_cb)(&codec_mutex, AV_LOCK_RELEASE))
+            return -1;
+    }
+    return 0;
+}
+
 int avpriv_lock_avformat(void)
 {
     if (lockmgr_cb) {
@@ -2327,7 +3481,7 @@ unsigned int avpriv_toupper4(unsigned int x)
     return av_toupper(x & 0xFF) +
           (av_toupper((x >>  8) & 0xFF) << 8)  +
           (av_toupper((x >> 16) & 0xFF) << 16) +
-          (av_toupper((x >> 24) & 0xFF) << 24);
+((unsigned)av_toupper((x >> 24) & 0xFF) << 24);
 }
 
 int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src)
@@ -2351,6 +3505,11 @@ int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src)
 
 #if !HAVE_THREADS
 
+enum AVPixelFormat ff_thread_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
+{
+    return ff_get_format(avctx, fmt);
+}
+
 int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
 {
     f->owner = avctx;
@@ -2375,10 +3534,38 @@ void ff_thread_await_progress(ThreadFrame *f, int progress, int field)
 {
 }
 
+int ff_thread_can_start_frame(AVCodecContext *avctx)
+{
+    return 1;
+}
+
+int ff_alloc_entries(AVCodecContext *avctx, int count)
+{
+    return 0;
+}
+
+void ff_reset_entries(AVCodecContext *avctx)
+{
+}
+
+void ff_thread_await_progress2(AVCodecContext *avctx, int field, int thread, int shift)
+{
+}
+
+void ff_thread_report_progress2(AVCodecContext *avctx, int field, int thread, int n)
+{
+}
+
 #endif
 
 enum AVMediaType avcodec_get_type(enum AVCodecID codec_id)
 {
+    AVCodec *c= avcodec_find_decoder(codec_id);
+    if(!c)
+        c= avcodec_find_encoder(codec_id);
+    if(c)
+        return c->type;
+
     if (codec_id <= AV_CODEC_ID_NONE)
         return AVMEDIA_TYPE_UNKNOWN;
     else if (codec_id < AV_CODEC_ID_FIRST_AUDIO)
@@ -2396,13 +3583,31 @@ int avcodec_is_open(AVCodecContext *s)
     return !!s->internal;
 }
 
-const uint8_t *avpriv_find_start_code(const uint8_t *restrict p,
+int avpriv_bprint_to_extradata(AVCodecContext *avctx, struct AVBPrint *buf)
+{
+    int ret;
+    char *str;
+
+    ret = av_bprint_finalize(buf, &str);
+    if (ret < 0)
+        return ret;
+    avctx->extradata = str;
+    /* Note: the string is NUL terminated (so extradata can be read as a
+     * string), but the ending character is not accounted in the size (in
+     * binary formats you are likely not supposed to mux that character). When
+     * extradata is copied, it is also padded with FF_INPUT_BUFFER_PADDING_SIZE
+     * zeros. */
+    avctx->extradata_size = buf->len;
+    return 0;
+}
+
+const uint8_t *avpriv_find_start_code(const uint8_t *av_restrict p,
                                       const uint8_t *end,
-                                      uint32_t * restrict state)
+                                      uint32_t *av_restrict state)
 {
     int i;
 
-    assert(p <= end);
+    av_assert0(p <= end);
     if (p >= end)
         return end;
 
diff --git a/libavcodec/utvideo.c b/libavcodec/utvideo.c
index eb5a924..308adb7 100644
--- a/libavcodec/utvideo.c
+++ b/libavcodec/utvideo.c
@@ -2,20 +2,20 @@
  * Common Ut Video code
  * Copyright (c) 2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h
index 718273c..78c3ec5 100644
--- a/libavcodec/utvideo.h
+++ b/libavcodec/utvideo.h
@@ -2,20 +2,20 @@
  * Common Ut Video header
  * Copyright (c) 2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c
index 7d75c59..afd56ea 100644
--- a/libavcodec/utvideodec.c
+++ b/libavcodec/utvideodec.c
@@ -2,20 +2,20 @@
  * Ut Video decoder
  * Copyright (c) 2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -71,7 +71,7 @@ static int build_huff(const uint8_t *src, VLC *vlc, int *fsym)
         code += 0x80000000u >> (he[i].len - 1);
     }
 
-    return ff_init_vlc_sparse(vlc, FFMIN(he[last].len, 9), last + 1,
+    return ff_init_vlc_sparse(vlc, FFMIN(he[last].len, 11), last + 1,
                               bits,  sizeof(*bits),  sizeof(*bits),
                               codes, sizeof(*codes), sizeof(*codes),
                               syms,  sizeof(*syms),  sizeof(*syms), 0);
@@ -156,7 +156,7 @@ static int decode_plane(UtvideoContext *c, int plane_no,
                            "Slice decoding ran out of bits\n");
                     goto fail;
                 }
-                pix = get_vlc2(&gb, vlc.table, vlc.bits, 4);
+                pix = get_vlc2(&gb, vlc.table, vlc.bits, 3);
                 if (pix < 0) {
                     av_log(c->avctx, AV_LOG_ERROR, "Decoding error\n");
                     goto fail;
@@ -335,12 +335,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     GetByteContext gb;
     ThreadFrame frame = { .f = data };
 
-    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
         return ret;
-    }
-
-    ff_thread_finish_setup(avctx);
 
     /* parse plane structure to get frame flags and validate slice offsets */
     bytestream2_init(&gb, buf, buf_size);
diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c
index 8dc208b..cbd34d0 100644
--- a/libavcodec/utvideoenc.c
+++ b/libavcodec/utvideoenc.c
@@ -2,20 +2,20 @@
  * Ut Video encoder
  * Copyright (c) 2012 Jan Ekström
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -401,6 +401,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
     uint32_t offset = 0, slice_len = 0;
     int      i, sstart, send = 0;
     int      symbol;
+    int      ret;
 
     /* Do prediction / make planes */
     switch (c->frame_pred) {
@@ -443,7 +444,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
         /* If non-zero count is found, see if it matches width * height */
         if (counts[symbol]) {
             /* Special case if only one symbol was used */
-            if (counts[symbol] == width * height) {
+            if (counts[symbol] == width * (int64_t)height) {
                 /*
                  * Write a zero for the single symbol
                  * used in the plane, else 0xFF.
@@ -467,7 +468,8 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
     }
 
     /* Calculate huffman lengths */
-    ff_huff_gen_len_table(lengths, counts);
+    if ((ret = ff_huff_gen_len_table(lengths, counts, 256, 1)) < 0)
+        return ret;
 
     /*
      * Write the plane's header into the output packet:
@@ -494,7 +496,7 @@ static int encode_plane(AVCodecContext *avctx, uint8_t *src,
          * get the offset in bits and convert to bytes.
          */
         offset += write_huff_codes(dst + sstart * width, c->slice_bits,
-                                   width * (send - sstart), width,
+                                   width * height + 4, width,
                                    send - sstart, he) >> 3;
 
         slice_len = offset - slice_len;
@@ -541,22 +543,17 @@ static int utvideo_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     int i, ret = 0;
 
     /* Allocate a new packet if needed, and set it to the pointer dst */
-    ret = ff_alloc_packet(pkt, (256 + 4 * c->slices + width * height) *
-                          c->planes + 4);
+    ret = ff_alloc_packet2(avctx, pkt, (256 + 4 * c->slices + width * height) *
+                           c->planes + 4);
 
-    if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Error allocating the output packet, or the provided packet "
-               "was too small.\n");
+    if (ret < 0)
         return ret;
-    }
 
     dst = pkt->data;
 
     bytestream2_init_writer(&pb, dst, pkt->size);
 
-    av_fast_malloc(&c->slice_bits, &c->slice_bits_size,
-                   width * height + FF_INPUT_BUFFER_PADDING_SIZE);
+    av_fast_padded_malloc(&c->slice_bits, &c->slice_bits_size, width * height + 4);
 
     if (!c->slice_bits) {
         av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer 2.\n");
@@ -645,6 +642,7 @@ AVCodec ff_utvideo_encoder = {
     .init           = utvideo_encode_init,
     .encode2        = utvideo_encode_frame,
     .close          = utvideo_encode_close,
+    .capabilities   = CODEC_CAP_FRAME_THREADS | CODEC_CAP_INTRA_ONLY,
     .pix_fmts       = (const enum AVPixelFormat[]) {
                           AV_PIX_FMT_RGB24, AV_PIX_FMT_RGBA, AV_PIX_FMT_YUV422P,
                           AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE
diff --git a/libavcodec/v210dec.c b/libavcodec/v210dec.c
index 8827397..ae03952 100644
--- a/libavcodec/v210dec.c
+++ b/libavcodec/v210dec.c
@@ -4,31 +4,55 @@
  * Copyright (C) 2009 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2009 Baptiste Coudurier <baptiste dot coudurier at gmail dot com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
 #include "internal.h"
+#include "v210dec.h"
 #include "libavutil/bswap.h"
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 
+#define READ_PIXELS(a, b, c)         \
+    do {                             \
+        val  = av_le2ne32(*src++);   \
+        *a++ =  val & 0x3FF;         \
+        *b++ = (val >> 10) & 0x3FF;  \
+        *c++ = (val >> 20) & 0x3FF;  \
+    } while (0)
+
+static void v210_planar_unpack_c(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width)
+{
+    uint32_t val;
+    int i;
+
+    for( i = 0; i < width-5; i += 6 ){
+        READ_PIXELS(u, y, v);
+        READ_PIXELS(y, u, y);
+        READ_PIXELS(v, y, u);
+        READ_PIXELS(y, v, y);
+    }
+}
+
 static av_cold int decode_init(AVCodecContext *avctx)
 {
+    V210DecContext *s = avctx->priv_data;
+
     if (avctx->width & 1) {
         av_log(avctx, AV_LOG_ERROR, "v210 needs even width\n");
         return AVERROR_INVALIDDATA;
@@ -36,22 +60,48 @@ static av_cold int decode_init(AVCodecContext *avctx)
     avctx->pix_fmt             = AV_PIX_FMT_YUV422P10;
     avctx->bits_per_raw_sample = 10;
 
+    s->unpack_frame            = v210_planar_unpack_c;
+
+    if (HAVE_MMX)
+        v210_x86_init(s);
+
     return 0;
 }
 
 static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                         AVPacket *avpkt)
 {
-    int h, w, ret;
+    V210DecContext *s = avctx->priv_data;
+
+    int h, w, ret, stride, aligned_input;
     AVFrame *pic = data;
     const uint8_t *psrc = avpkt->data;
     uint16_t *y, *u, *v;
-    int aligned_width = ((avctx->width + 47) / 48) * 48;
-    int stride = aligned_width * 8 / 3;
+
+    if (s->custom_stride )
+        stride = s->custom_stride;
+    else {
+        int aligned_width = ((avctx->width + 47) / 48) * 48;
+        stride = aligned_width * 8 / 3;
+    }
 
     if (avpkt->size < stride * avctx->height) {
-        av_log(avctx, AV_LOG_ERROR, "packet too small\n");
-        return AVERROR_INVALIDDATA;
+        if ((((avctx->width + 23) / 24) * 24 * 8) / 3 * avctx->height == avpkt->size) {
+            stride = avpkt->size / avctx->height;
+            if (!s->stride_warning_shown)
+                av_log(avctx, AV_LOG_WARNING, "Broken v210 with too small padding (64 byte) detected\n");
+            s->stride_warning_shown = 1;
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "packet too small\n");
+            return AVERROR_INVALIDDATA;
+        }
+    }
+
+    aligned_input = !((uintptr_t)psrc & 0xf) && !(stride & 0xf);
+    if (aligned_input != s->aligned_input) {
+        s->aligned_input = aligned_input;
+        if (HAVE_MMX)
+            v210_x86_init(s);
     }
 
     if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
@@ -63,36 +113,31 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     pic->pict_type = AV_PICTURE_TYPE_I;
     pic->key_frame = 1;
 
-#define READ_PIXELS(a, b, c)         \
-    do {                             \
-        val  = av_le2ne32(*src++);   \
-        *a++ =  val & 0x3FF;         \
-        *b++ = (val >> 10) & 0x3FF;  \
-        *c++ = (val >> 20) & 0x3FF;  \
-    } while (0)
-
     for (h = 0; h < avctx->height; h++) {
         const uint32_t *src = (const uint32_t*)psrc;
         uint32_t val;
-        for (w = 0; w < avctx->width - 5; w += 6) {
-            READ_PIXELS(u, y, v);
-            READ_PIXELS(y, u, y);
-            READ_PIXELS(v, y, u);
-            READ_PIXELS(y, v, y);
-        }
+
+        w = (avctx->width / 6) * 6;
+        s->unpack_frame(src, y, u, v, w);
+
+        y += w;
+        u += w >> 1;
+        v += w >> 1;
+        src += (w << 1) / 3;
+
         if (w < avctx->width - 1) {
             READ_PIXELS(u, y, v);
 
             val  = av_le2ne32(*src++);
             *y++ =  val & 0x3FF;
-        }
-        if (w < avctx->width - 3) {
-            *u++ = (val >> 10) & 0x3FF;
-            *y++ = (val >> 20) & 0x3FF;
+            if (w < avctx->width - 3) {
+                *u++ = (val >> 10) & 0x3FF;
+                *y++ = (val >> 20) & 0x3FF;
 
-            val  = av_le2ne32(*src++);
-            *v++ =  val & 0x3FF;
-            *y++ = (val >> 10) & 0x3FF;
+                val  = av_le2ne32(*src++);
+                *v++ =  val & 0x3FF;
+                *y++ = (val >> 10) & 0x3FF;
+            }
         }
 
         psrc += stride;
@@ -101,17 +146,40 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         v += pic->linesize[2] / 2 - avctx->width / 2;
     }
 
+    if (avctx->field_order > AV_FIELD_PROGRESSIVE) {
+        /* we have interlaced material flagged in container */
+        pic->interlaced_frame = 1;
+        if (avctx->field_order == AV_FIELD_TT || avctx->field_order == AV_FIELD_TB)
+            pic->top_field_first = 1;
+    }
+
     *got_frame      = 1;
 
     return avpkt->size;
 }
 
+#define V210DEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption v210dec_options[] = {
+    {"custom_stride", "Custom V210 stride", offsetof(V210DecContext, custom_stride), FF_OPT_TYPE_INT,
+     {.i64 = 0}, INT_MIN, INT_MAX, V210DEC_FLAGS},
+    {NULL}
+};
+
+static const AVClass v210dec_class = {
+    "V210 Decoder",
+    av_default_item_name,
+    v210dec_options,
+    LIBAVUTIL_VERSION_INT,
+};
+
 AVCodec ff_v210_decoder = {
     .name           = "v210",
     .long_name      = NULL_IF_CONFIG_SMALL("Uncompressed 4:2:2 10-bit"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_V210,
+    .priv_data_size = sizeof(V210DecContext),
     .init           = decode_init,
     .decode         = decode_frame,
     .capabilities   = CODEC_CAP_DR1,
+    .priv_class     = &v210dec_class,
 };
diff --git a/libavcodec/v210dec.h b/libavcodec/v210dec.h
new file mode 100644
index 0000000..a8db7d6
--- /dev/null
+++ b/libavcodec/v210dec.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_V210DEC_H
+#define AVCODEC_V210DEC_H
+
+#include "libavutil/log.h"
+#include "libavutil/opt.h"
+
+
+typedef struct {
+    AVClass *av_class;
+    int custom_stride;
+    int aligned_input;
+    int stride_warning_shown;
+    void (*unpack_frame)(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+} V210DecContext;
+
+void v210_x86_init(V210DecContext *s);
+
+#endif /* AVCODEC_V210DEC_H */
diff --git a/libavcodec/v210enc.c b/libavcodec/v210enc.c
index ef0d6ab..1e53bdb 100644
--- a/libavcodec/v210enc.c
+++ b/libavcodec/v210enc.c
@@ -4,20 +4,20 @@
  * Copyright (C) 2009 Michael Niedermayer <michaelni@gmx.at>
  * Copyright (c) 2009 Baptiste Coudurier <baptiste dot coudurier at gmail dot com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -57,10 +57,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     const uint16_t *v = (const uint16_t*)pic->data[2];
     PutByteContext p;
 
-    if ((ret = ff_alloc_packet(pkt, avctx->height * stride)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->height * stride)) < 0)
         return ret;
-    }
 
     bytestream2_init_writer(&p, pkt->data, pkt->size);
 
@@ -88,13 +86,13 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
             val = CLIP(*y++);
             if (w == avctx->width - 2)
                 bytestream2_put_le32u(&p, val);
-        }
-        if (w < avctx->width - 3) {
-            val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20);
-            bytestream2_put_le32u(&p, val);
+            if (w < avctx->width - 3) {
+                val |= (CLIP(*u++) << 10) | (CLIP(*y++) << 20);
+                bytestream2_put_le32u(&p, val);
 
-            val = CLIP(*v++) | (CLIP(*y++) << 10);
-            bytestream2_put_le32u(&p, val);
+                val = CLIP(*v++) | (CLIP(*y++) << 10);
+                bytestream2_put_le32u(&p, val);
+            }
         }
 
         bytestream2_set_buffer(&p, 0, line_padding);
diff --git a/libavcodec/v210x.c b/libavcodec/v210x.c
index 2922c05..6330715 100644
--- a/libavcodec/v210x.c
+++ b/libavcodec/v210x.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2009 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/v308dec.c b/libavcodec/v308dec.c
new file mode 100644
index 0000000..1d31f0a
--- /dev/null
+++ b/libavcodec/v308dec.c
@@ -0,0 +1,83 @@
+/*
+ * v308 decoder
+ * Copyright (c) 2011 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int v308_decode_init(AVCodecContext *avctx)
+{
+    avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+
+    if (avctx->width & 1)
+        av_log(avctx, AV_LOG_WARNING, "v308 requires width to be even.\n");
+
+    return 0;
+}
+
+static int v308_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame, AVPacket *avpkt)
+{
+    AVFrame *pic = data;
+    const uint8_t *src = avpkt->data;
+    uint8_t *y, *u, *v;
+    int i, j, ret;
+
+    if (avpkt->size < 3 * avctx->height * avctx->width) {
+        av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
+
+    pic->key_frame = 1;
+    pic->pict_type = AV_PICTURE_TYPE_I;
+
+    y = pic->data[0];
+    u = pic->data[1];
+    v = pic->data[2];
+
+    for (i = 0; i < avctx->height; i++) {
+        for (j = 0; j < avctx->width; j++) {
+            v[j] = *src++;
+            y[j] = *src++;
+            u[j] = *src++;
+        }
+
+        y += pic->linesize[0];
+        u += pic->linesize[1];
+        v += pic->linesize[2];
+    }
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_v308_decoder = {
+    .name         = "v308",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:4:4"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_V308,
+    .init         = v308_decode_init,
+    .decode       = v308_decode_frame,
+    .capabilities = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/v308enc.c b/libavcodec/v308enc.c
new file mode 100644
index 0000000..c6c5ac5
--- /dev/null
+++ b/libavcodec/v308enc.c
@@ -0,0 +1,94 @@
+/*
+ * v308 encoder
+ *
+ * Copyright (c) 2011 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int v308_encode_init(AVCodecContext *avctx)
+{
+    if (avctx->width & 1) {
+        av_log(avctx, AV_LOG_ERROR, "v308 requires width to be even.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    avctx->coded_frame = av_frame_alloc();
+
+    if (!avctx->coded_frame) {
+        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int v308_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                             const AVFrame *pic, int *got_packet)
+{
+    uint8_t *dst;
+    uint8_t *y, *u, *v;
+    int i, j, ret;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 3)) < 0)
+        return ret;
+    dst = pkt->data;
+
+    avctx->coded_frame->key_frame = 1;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
+    y = pic->data[0];
+    u = pic->data[1];
+    v = pic->data[2];
+
+    for (i = 0; i < avctx->height; i++) {
+        for (j = 0; j < avctx->width; j++) {
+            *dst++ = v[j];
+            *dst++ = y[j];
+            *dst++ = u[j];
+        }
+        y += pic->linesize[0];
+        u += pic->linesize[1];
+        v += pic->linesize[2];
+    }
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int v308_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+AVCodec ff_v308_encoder = {
+    .name         = "v308",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:4:4"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_V308,
+    .init         = v308_encode_init,
+    .encode2      = v308_encode_frame,
+    .close        = v308_encode_close,
+    .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/v408dec.c b/libavcodec/v408dec.c
new file mode 100644
index 0000000..be442fa
--- /dev/null
+++ b/libavcodec/v408dec.c
@@ -0,0 +1,103 @@
+/*
+ * v408 decoder
+ * Copyright (c) 2012 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int v408_decode_init(AVCodecContext *avctx)
+{
+    avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
+
+    return 0;
+}
+
+static int v408_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame, AVPacket *avpkt)
+{
+    AVFrame *pic = data;
+    const uint8_t *src = avpkt->data;
+    uint8_t *y, *u, *v, *a;
+    int i, j, ret;
+
+    if (avpkt->size < 4 * avctx->height * avctx->width) {
+        av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
+
+    pic->key_frame = 1;
+    pic->pict_type = AV_PICTURE_TYPE_I;
+
+    y = pic->data[0];
+    u = pic->data[1];
+    v = pic->data[2];
+    a = pic->data[3];
+
+    for (i = 0; i < avctx->height; i++) {
+        for (j = 0; j < avctx->width; j++) {
+            if (avctx->codec_id==AV_CODEC_ID_AYUV) {
+                v[j] = *src++;
+                u[j] = *src++;
+                y[j] = *src++;
+                a[j] = *src++;
+            } else {
+                u[j] = *src++;
+                y[j] = *src++;
+                v[j] = *src++;
+                a[j] = *src++;
+            }
+        }
+
+        y += pic->linesize[0];
+        u += pic->linesize[1];
+        v += pic->linesize[2];
+        a += pic->linesize[3];
+    }
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+#if CONFIG_AYUV_DECODER
+AVCodec ff_ayuv_decoder = {
+    .name         = "ayuv",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed packed MS 4:4:4:4"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_AYUV,
+    .init         = v408_decode_init,
+    .decode       = v408_decode_frame,
+    .capabilities = CODEC_CAP_DR1,
+};
+#endif
+#if CONFIG_V408_DECODER
+AVCodec ff_v408_decoder = {
+    .name         = "v408",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed packed QT 4:4:4:4"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_V408,
+    .init         = v408_decode_init,
+    .decode       = v408_decode_frame,
+    .capabilities = CODEC_CAP_DR1,
+};
+#endif
diff --git a/libavcodec/v408enc.c b/libavcodec/v408enc.c
new file mode 100644
index 0000000..20f08c7
--- /dev/null
+++ b/libavcodec/v408enc.c
@@ -0,0 +1,113 @@
+/*
+ * v408 encoder
+ *
+ * Copyright (c) 2012 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int v408_encode_init(AVCodecContext *avctx)
+{
+    avctx->coded_frame = av_frame_alloc();
+
+    if (!avctx->coded_frame) {
+        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int v408_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                             const AVFrame *pic, int *got_packet)
+{
+    uint8_t *dst;
+    uint8_t *y, *u, *v, *a;
+    int i, j, ret;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 4)) < 0)
+        return ret;
+    dst = pkt->data;
+
+    avctx->coded_frame->key_frame = 1;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
+    y = pic->data[0];
+    u = pic->data[1];
+    v = pic->data[2];
+    a = pic->data[3];
+
+    for (i = 0; i < avctx->height; i++) {
+        for (j = 0; j < avctx->width; j++) {
+           if (avctx->codec_id==AV_CODEC_ID_AYUV) {
+                *dst++ = v[j];
+                *dst++ = u[j];
+                *dst++ = y[j];
+                *dst++ = a[j];
+            } else {
+                *dst++ = u[j];
+                *dst++ = y[j];
+                *dst++ = v[j];
+                *dst++ = a[j];
+            }
+        }
+        y += pic->linesize[0];
+        u += pic->linesize[1];
+        v += pic->linesize[2];
+        a += pic->linesize[3];
+    }
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int v408_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+#if CONFIG_AYUV_ENCODER
+AVCodec ff_ayuv_encoder = {
+    .name         = "ayuv",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed packed MS 4:4:4:4"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_AYUV,
+    .init         = v408_encode_init,
+    .encode2      = v408_encode_frame,
+    .close        = v408_encode_close,
+    .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE },
+};
+#endif
+#if CONFIG_V408_ENCODER
+AVCodec ff_v408_encoder = {
+    .name         = "v408",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed packed QT 4:4:4:4"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_V408,
+    .init         = v408_encode_init,
+    .encode2      = v408_encode_frame,
+    .close        = v408_encode_close,
+    .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE },
+};
+#endif
diff --git a/libavcodec/v410dec.c b/libavcodec/v410dec.c
index 07be502..e7a9c0e 100644
--- a/libavcodec/v410dec.c
+++ b/libavcodec/v410dec.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2011 Derek Buitenhuis
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -49,17 +49,15 @@ static int v410_decode_frame(AVCodecContext *avctx, void *data,
     uint8_t *src = avpkt->data;
     uint16_t *y, *u, *v;
     uint32_t val;
-    int i, j;
+    int i, j, ret;
 
     if (avpkt->size < 4 * avctx->height * avctx->width) {
         av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
         return AVERROR(EINVAL);
     }
 
-    if (ff_get_buffer(avctx, pic, 0) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate buffer.\n");
-        return AVERROR(ENOMEM);
-    }
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
 
     pic->key_frame = 1;
     pic->pict_type = AV_PICTURE_TYPE_I;
diff --git a/libavcodec/v410enc.c b/libavcodec/v410enc.c
index 77b32d7..0e2e82a 100644
--- a/libavcodec/v410enc.c
+++ b/libavcodec/v410enc.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2011 Derek Buitenhuis
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,7 +28,7 @@
 static av_cold int v410_encode_init(AVCodecContext *avctx)
 {
     if (avctx->width & 1) {
-        av_log(avctx, AV_LOG_ERROR, "v410 requires even width.\n");
+        av_log(avctx, AV_LOG_ERROR, "v410 requires width to be even.\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -50,10 +50,8 @@ static int v410_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint32_t val;
     int i, j, ret;
 
-    if ((ret = ff_alloc_packet(pkt, avctx->width * avctx->height * 4)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 4)) < 0)
         return ret;
-    }
     dst = pkt->data;
 
     avctx->coded_frame->key_frame = 1;
diff --git a/libavcodec/vaapi.c b/libavcodec/vaapi.c
index b2dc41d..db4d29d 100644
--- a/libavcodec/vaapi.c
+++ b/libavcodec/vaapi.c
@@ -4,20 +4,20 @@
  *
  * Copyright (C) 2008-2009 Splitted-Desktop Systems
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -46,6 +46,9 @@ int ff_vaapi_render_picture(struct vaapi_context *vactx, VASurfaceID surface)
     VABufferID va_buffers[3];
     unsigned int n_va_buffers = 0;
 
+    if (!vactx->pic_param_buf_id)
+        return 0;
+
     vaUnmapBuffer(vactx->display, vactx->pic_param_buf_id);
     va_buffers[n_va_buffers++] = vactx->pic_param_buf_id;
 
@@ -194,26 +197,4 @@ void ff_vaapi_common_end_frame(AVCodecContext *avctx)
     vactx->slice_params_alloc  = 0;
 }
 
-int ff_vaapi_mpeg_end_frame(AVCodecContext *avctx)
-{
-    struct vaapi_context * const vactx = avctx->hwaccel_context;
-    MpegEncContext *s = avctx->priv_data;
-    int ret;
-
-    ret = ff_vaapi_commit_slices(vactx);
-    if (ret < 0)
-        goto finish;
-
-    ret = ff_vaapi_render_picture(vactx,
-                                  ff_vaapi_get_surface_id(s->current_picture_ptr->f));
-    if (ret < 0)
-        goto finish;
-
-    ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
-
-finish:
-    ff_vaapi_common_end_frame(avctx);
-    return ret;
-}
-
 /* @} */
diff --git a/libavcodec/vaapi.h b/libavcodec/vaapi.h
index 39e8825..815a27e 100644
--- a/libavcodec/vaapi.h
+++ b/libavcodec/vaapi.h
@@ -1,23 +1,23 @@
 /*
- * Video Acceleration API (shared data between Libav and the video player)
+ * Video Acceleration API (shared data between FFmpeg and the video player)
  * HW decode acceleration for MPEG-2, MPEG-4, H.264 and VC-1
  *
  * Copyright (C) 2008-2009 Splitted-Desktop Systems
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,7 +39,7 @@
  */
 
 /**
- * This structure is used to share data between the Libav library and
+ * This structure is used to share data between the FFmpeg library and
  * the client video application.
  * This shall be zero-allocated and available as
  * AVCodecContext.hwaccel_context. All user members can be set once
diff --git a/libavcodec/vaapi_h264.c b/libavcodec/vaapi_h264.c
index 7529f04..8eb8a66 100644
--- a/libavcodec/vaapi_h264.c
+++ b/libavcodec/vaapi_h264.c
@@ -3,20 +3,20 @@
  *
  * Copyright (C) 2008-2009 Splitted-Desktop Systems
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,7 +26,7 @@
 
 /**
  * @file
- * This file implements the glue code between Libav's and VA API's
+ * This file implements the glue code between FFmpeg's and VA API's
  * structures for H.264 decoding.
  */
 
@@ -44,10 +44,10 @@ static void init_vaapi_pic(VAPictureH264 *va_pic)
 }
 
 /**
- * Translate an Libav Picture into its VA API form.
+ * Translate an FFmpeg Picture into its VA API form.
  *
  * @param[out] va_pic          A pointer to VA API's own picture struct
- * @param[in]  pic             A pointer to the Libav picture struct to convert
+ * @param[in]  pic             A pointer to the FFmpeg picture struct to convert
  * @param[in]  pic_structure   The picture field type (as defined in mpegvideo.h),
  *                             supersedes pic's field type if nonzero.
  */
@@ -148,11 +148,11 @@ static int fill_vaapi_ReferenceFrames(VAPictureParameterBufferH264 *pic_param,
 }
 
 /**
- * Fill in VA API reference picture lists from the Libav reference
+ * Fill in VA API reference picture lists from the FFmpeg reference
  * picture list.
  *
  * @param[out] RefPicList  VA API internal reference picture list
- * @param[in]  ref_list    A pointer to the Libav reference list
+ * @param[in]  ref_list    A pointer to the FFmpeg reference list
  * @param[in]  ref_count   The number of reference pictures in ref_list
  */
 static void fill_vaapi_RefPicList(VAPictureH264 RefPicList[32],
@@ -259,7 +259,7 @@ static int vaapi_h264_start_frame(AVCodecContext          *avctx,
     pic_param->seq_fields.bits.delta_pic_order_always_zero_flag = h->sps.delta_pic_order_always_zero_flag;
     pic_param->num_slice_groups_minus1                          = h->pps.slice_group_count - 1;
     pic_param->slice_group_map_type                             = h->pps.mb_slice_group_map_type;
-    pic_param->slice_group_change_rate_minus1                   = 0; /* XXX: unimplemented in Libav */
+    pic_param->slice_group_change_rate_minus1                   = 0; /* XXX: unimplemented in FFmpeg */
     pic_param->pic_init_qp_minus26                              = h->pps.init_qp - 26;
     pic_param->pic_init_qs_minus26                              = h->pps.init_qs - 26;
     pic_param->chroma_qp_index_offset                           = h->pps.chroma_qp_index_offset[0];
@@ -282,7 +282,8 @@ static int vaapi_h264_start_frame(AVCodecContext          *avctx,
     if (!iq_matrix)
         return -1;
     memcpy(iq_matrix->ScalingList4x4, h->pps.scaling_matrix4, sizeof(iq_matrix->ScalingList4x4));
-    memcpy(iq_matrix->ScalingList8x8, h->pps.scaling_matrix8, sizeof(iq_matrix->ScalingList8x8));
+    memcpy(iq_matrix->ScalingList8x8[0], h->pps.scaling_matrix8[0], sizeof(iq_matrix->ScalingList8x8[0]));
+    memcpy(iq_matrix->ScalingList8x8[1], h->pps.scaling_matrix8[3], sizeof(iq_matrix->ScalingList8x8[0]));
     return 0;
 }
 
diff --git a/libavcodec/vaapi_internal.h b/libavcodec/vaapi_internal.h
index d0fa7ae..918c718 100644
--- a/libavcodec/vaapi_internal.h
+++ b/libavcodec/vaapi_internal.h
@@ -4,20 +4,20 @@
  *
  * Copyright (C) 2008-2009 Splitted-Desktop Systems
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vaapi_mpeg.c b/libavcodec/vaapi_mpeg.c
new file mode 100644
index 0000000..63879e5
--- /dev/null
+++ b/libavcodec/vaapi_mpeg.c
@@ -0,0 +1,48 @@
+/*
+ * Video Acceleration API (video decoding)
+ * HW decode acceleration for MPEG-2, MPEG-4, H.264 and VC-1
+ *
+ * Copyright (C) 2013 Anton Khirnov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "vaapi_internal.h"
+
+int ff_vaapi_mpeg_end_frame(AVCodecContext *avctx)
+{
+    struct vaapi_context * const vactx = avctx->hwaccel_context;
+    MpegEncContext *s = avctx->priv_data;
+    int ret;
+
+    ret = ff_vaapi_commit_slices(vactx);
+    if (ret < 0)
+        goto finish;
+
+    ret = ff_vaapi_render_picture(vactx,
+                                  ff_vaapi_get_surface_id(s->current_picture_ptr->f));
+    if (ret < 0)
+        goto finish;
+
+    ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
+
+finish:
+    ff_vaapi_common_end_frame(avctx);
+    return ret;
+}
+
diff --git a/libavcodec/vaapi_mpeg2.c b/libavcodec/vaapi_mpeg2.c
index 31e0218..0d9059b 100644
--- a/libavcodec/vaapi_mpeg2.c
+++ b/libavcodec/vaapi_mpeg2.c
@@ -3,20 +3,20 @@
  *
  * Copyright (C) 2008-2009 Splitted-Desktop Systems
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -116,8 +116,8 @@ static int vaapi_mpeg2_decode_slice(AVCodecContext *avctx, const uint8_t *buffer
     intra_slice_flag = get_bits1(&gb);
     if (intra_slice_flag) {
         skip_bits(&gb, 8);
-        while (get_bits1(&gb) != 0)
-            skip_bits(&gb, 8);
+        if (skip_1stop_8data_bits(&gb) < 0)
+            return AVERROR_INVALIDDATA;
     }
     macroblock_offset = get_bits_count(&gb);
 
diff --git a/libavcodec/vaapi_mpeg4.c b/libavcodec/vaapi_mpeg4.c
index abdb6d9..19adb2b 100644
--- a/libavcodec/vaapi_mpeg4.c
+++ b/libavcodec/vaapi_mpeg4.c
@@ -3,20 +3,20 @@
  *
  * Copyright (C) 2008-2009 Splitted-Desktop Systems
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -124,25 +124,14 @@ static int vaapi_mpeg4_decode_slice(AVCodecContext *avctx, const uint8_t *buffer
 
     av_dlog(avctx, "vaapi_mpeg4_decode_slice(): buffer %p, size %d\n", buffer, size);
 
-    /* video_plane_with_short_video_header() contains all GOBs
-     * in-order, and this is what VA API (Intel backend) expects: only
-     * a single slice param. So fake macroblock_number for Libav so
-     * that we don't call vaapi_mpeg4_decode_slice() again
-     */
-    if (avctx->codec->id == AV_CODEC_ID_H263)
-        size = s->gb.buffer_end - buffer;
-
     /* Fill in VASliceParameterBufferMPEG4 */
     slice_param = (VASliceParameterBufferMPEG4 *)ff_vaapi_alloc_slice(avctx->hwaccel_context, buffer, size);
     if (!slice_param)
         return -1;
     slice_param->macroblock_offset      = get_bits_count(&s->gb) % 8;
-    slice_param->macroblock_number      = s->mb_y * s->mb_width + s->mb_x;
+    slice_param->macroblock_number      = 0;
     slice_param->quant_scale            = s->qscale;
 
-    if (avctx->codec->id == AV_CODEC_ID_H263)
-        s->mb_y = s->mb_height;
-
     return 0;
 }
 
diff --git a/libavcodec/vaapi_vc1.c b/libavcodec/vaapi_vc1.c
index 8886b0b..bba46d3 100644
--- a/libavcodec/vaapi_vc1.c
+++ b/libavcodec/vaapi_vc1.c
@@ -3,20 +3,20 @@
  *
  * Copyright (C) 2008-2009 Splitted-Desktop Systems
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,7 +24,7 @@
 #include "vc1.h"
 #include "vc1data.h"
 
-/** Translate Libav MV modes to VA API */
+/** Translate FFmpeg MV modes to VA API */
 static int get_VAMvModeVC1(enum MVModes mv_mode)
 {
     switch (mv_mode) {
@@ -128,7 +128,7 @@ static inline int vc1_get_TTFRM(VC1Context *v)
     return 0;
 }
 
-/** Pack Libav bitplanes into a VABitPlaneBuffer element */
+/** Pack FFmpeg bitplanes into a VABitPlaneBuffer element */
 static inline void vc1_pack_bitplanes(uint8_t *bitplane, int n, const uint8_t *ff_bp[3], int x, int y, int stride)
 {
     const int bitplane_index = n / 2;
diff --git a/libavcodec/vb.c b/libavcodec/vb.c
index 56094d8..3c89a29 100644
--- a/libavcodec/vb.c
+++ b/libavcodec/vb.c
@@ -2,20 +2,20 @@
  * Beam Software VB decoder
  * Copyright (c) 2007 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -73,7 +73,7 @@ static void vb_decode_palette(VBDecContext *c, int data_size)
         return;
     }
     for (i = start; i <= start + size; i++)
-        c->pal[i] = bytestream2_get_be24(&c->stream);
+        c->pal[i] = 0xFFU << 24 | bytestream2_get_be24(&c->stream);
 }
 
 static inline int check_pixel(uint8_t *buf, uint8_t *start, uint8_t *end)
@@ -197,10 +197,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     bytestream2_init(&c->stream, avpkt->data, avpkt->size);
 
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     flags = bytestream2_get_le16(&c->stream);
 
@@ -211,6 +209,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     }
     if (flags & VB_HAS_VIDEO) {
         size = bytestream2_get_le32(&c->stream);
+        if(size > bytestream2_get_bytes_left(&c->stream)+4 || size<4){
+            av_log(avctx, AV_LOG_ERROR, "Frame size invalid\n");
+            return -1;
+        }
         vb_decode_framedata(c, offset);
         bytestream2_skip(&c->stream, size - 4);
     }
diff --git a/libavcodec/vble.c b/libavcodec/vble.c
index 4d5cd96..1bf25ba 100644
--- a/libavcodec/vble.c
+++ b/libavcodec/vble.c
@@ -2,20 +2,20 @@
  * VBLE Decoder
  * Copyright (c) 2011 Derek Buitenhuis
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,56 +37,52 @@ typedef struct {
     HuffYUVDSPContext hdsp;
 
     int            size;
-    uint8_t        *val; /* First holds the lengths of vlc symbols and then their values */
+    uint8_t        *val; ///< This array first holds the lengths of vlc symbols and then their value.
 } VBLEContext;
 
-static uint8_t vble_read_reverse_unary(GetBitContext *gb)
-{
-    /* At most we need to read 9 bits total to get indices up to 8 */
-    uint8_t val = show_bits(gb, 8);
-
-    if (val) {
-        val = 7 - av_log2_16bit(ff_reverse[val]);
-        skip_bits(gb, val + 1);
-        return val;
-    } else {
-        skip_bits(gb, 8);
-        if (get_bits1(gb))
-            return 8;
-    }
-
-    /* Return something larger than 8 on error */
-    return UINT8_MAX;
-}
-
 static int vble_unpack(VBLEContext *ctx, GetBitContext *gb)
 {
     int i;
+    int allbits = 0;
+    static const uint8_t LUT[256] = {
+        8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+        5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+        6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+        5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+        7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+        5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+        6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+        5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,
+    };
 
     /* Read all the lengths in first */
     for (i = 0; i < ctx->size; i++) {
-        ctx->val[i] = vble_read_reverse_unary(gb);
-
-        if (ctx->val[i] == UINT8_MAX)
-            return -1;
-    }
-
-    for (i = 0; i < ctx->size; i++) {
-        /* Check we have enough bits left */
-        if (get_bits_left(gb) < ctx->val[i])
-            return -1;
-
-        /* get_bits can't take a length of 0 */
-        if (ctx->val[i])
-            ctx->val[i] = (1 << ctx->val[i]) + get_bits(gb, ctx->val[i]) - 1;
+        /* At most we need to read 9 bits total to get indices up to 8 */
+        int val = show_bits(gb, 8);
+
+        // read reverse unary
+        if (val) {
+            val = LUT[val];
+            skip_bits(gb, val + 1);
+            ctx->val[i] = val;
+        } else {
+            skip_bits(gb, 8);
+            if (!get_bits1(gb))
+                return -1;
+            ctx->val[i] = 8;
+        }
+        allbits += ctx->val[i];
     }
 
+    /* Check we have enough bits left */
+    if (get_bits_left(gb) < allbits)
+        return -1;
     return 0;
 }
 
 static void vble_restore_plane(VBLEContext *ctx, AVFrame *pic,
-                               int plane, int offset,
-                               int width, int height)
+                               GetBitContext *gb, int plane,
+                               int offset, int width, int height)
 {
     uint8_t *dst = pic->data[plane];
     uint8_t *val = ctx->val + offset;
@@ -94,9 +90,13 @@ static void vble_restore_plane(VBLEContext *ctx, AVFrame *pic,
     int i, j, left, left_top;
 
     for (i = 0; i < height; i++) {
-        for (j = 0; j < width; j++)
-            val[j] = (val[j] >> 1) ^ -(val[j] & 1);
-
+        for (j = 0; j < width; j++) {
+            /* get_bits can't take a length of 0 */
+            if (val[j]) {
+                int v = (1 << val[j]) + get_bits(gb, val[j]) - 1;
+                val[j] = (v >> 1) ^ -(v & 1);
+            }
+        }
         if (i) {
             left = 0;
             left_top = dst[-stride];
@@ -122,13 +122,17 @@ static int vble_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     int version;
     int offset = 0;
     int width_uv = avctx->width / 2, height_uv = avctx->height / 2;
+    int ret;
 
-    /* Allocate buffer */
-    if (ff_get_buffer(avctx, pic, 0) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate buffer.\n");
-        return AVERROR(ENOMEM);
+    if (avpkt->size < 4 || avpkt->size - 4 > INT_MAX/8) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid packet size\n");
+        return AVERROR_INVALIDDATA;
     }
 
+    /* Allocate buffer */
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
+
     /* Set flags */
     pic->key_frame = 1;
     pic->pict_type = AV_PICTURE_TYPE_I;
@@ -148,15 +152,15 @@ static int vble_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     }
 
     /* Restore planes. Should be almost identical to Huffyuv's. */
-    vble_restore_plane(ctx, pic, 0, offset, avctx->width, avctx->height);
+    vble_restore_plane(ctx, pic, &gb, 0, offset, avctx->width, avctx->height);
 
     /* Chroma */
     if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
         offset += avctx->width * avctx->height;
-        vble_restore_plane(ctx, pic, 1, offset, width_uv, height_uv);
+        vble_restore_plane(ctx, pic, &gb, 1, offset, width_uv, height_uv);
 
         offset += width_uv * height_uv;
-        vble_restore_plane(ctx, pic, 2, offset, width_uv, height_uv);
+        vble_restore_plane(ctx, pic, &gb, 2, offset, width_uv, height_uv);
     }
 
     *got_frame       = 1;
@@ -186,7 +190,7 @@ static av_cold int vble_decode_init(AVCodecContext *avctx)
     ctx->size = avpicture_get_size(avctx->pix_fmt,
                                    avctx->width, avctx->height);
 
-    ctx->val = av_malloc(ctx->size * sizeof(*ctx->val));
+    ctx->val = av_malloc_array(ctx->size, sizeof(*ctx->val));
 
     if (!ctx->val) {
         av_log(avctx, AV_LOG_ERROR, "Could not allocate values buffer.\n");
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 1978b08..d23efec 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2006-2007 Konstantin Shishkov
  * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,21 +47,6 @@
  * @{
  */
 
-/**
- * Imode types
- * @{
- */
-enum Imode {
-    IMODE_RAW,
-    IMODE_NORM2,
-    IMODE_DIFF2,
-    IMODE_NORM6,
-    IMODE_DIFF6,
-    IMODE_ROWSKIP,
-    IMODE_COLSKIP
-};
-/** @} */ //imode defines
-
 /** Decode rows by checking if they are skipped
  * @param plane Buffer to store decoded bits
  * @param[in] width Width of this buffer
@@ -137,12 +122,16 @@ static int bitplane_decoding(uint8_t* data, int *raw_flag, VC1Context *v)
     case IMODE_NORM2:
         if ((height * width) & 1) {
             *planep++ = get_bits1(gb);
-            offset    = 1;
+            y = offset = 1;
+            if (offset == width) {
+                offset = 0;
+                planep += stride - width;
+            }
         }
         else
-            offset = 0;
+            y = offset = 0;
         // decode bitplane as one long line
-        for (y = offset; y < height * width; y += 2) {
+        for (; y < height * width; y += 2) {
             code = get_vlc2(gb, ff_vc1_norm2_vlc.table, VC1_NORM2_VLC_BITS, 1);
             *planep++ = code & 1;
             offset++;
@@ -293,7 +282,7 @@ static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb);
  */
 int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitContext *gb)
 {
-    av_log(avctx, AV_LOG_DEBUG, "Header: %0X\n", show_bits(gb, 32));
+    av_log(avctx, AV_LOG_DEBUG, "Header: %0X\n", show_bits_long(gb, 32));
     v->profile = get_bits(gb, 2);
     if (v->profile == PROFILE_COMPLEX) {
         av_log(avctx, AV_LOG_WARNING, "WMV3 Complex Profile is not fully supported\n");
@@ -304,6 +293,7 @@ int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitCo
         v->zz_4x8 = ff_vc1_adv_progressive_4x8_zz;
         return decode_sequence_header_adv(v, gb);
     } else {
+        v->chromaformat = 1;
         v->zz_8x4 = ff_wmv2_scantableA;
         v->zz_4x8 = ff_wmv2_scantableB;
         v->res_y411   = get_bits1(gb);
@@ -380,8 +370,13 @@ int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitCo
     v->finterpflag = get_bits1(gb); //common
 
     if (v->res_sprite) {
-        v->s.avctx->width  = v->s.avctx->coded_width  = get_bits(gb, 11);
-        v->s.avctx->height = v->s.avctx->coded_height = get_bits(gb, 11);
+        int w = get_bits(gb, 11);
+        int h = get_bits(gb, 11);
+        int ret = ff_set_dimensions(v->s.avctx, w, h);
+        if (ret < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set dimensions %d %d\n", w, h);
+            return ret;
+        }
         skip_bits(gb, 5); //frame rate
         v->res_x8 = get_bits1(gb);
         if (get_bits1(gb)) { // something to do with DC VLC selection
@@ -433,10 +428,8 @@ static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb)
     v->bitrtq_postproc       = get_bits(gb, 5); //common
     v->postprocflag          = get_bits1(gb);   //common
 
-    v->s.avctx->coded_width  = (get_bits(gb, 12) + 1) << 1;
-    v->s.avctx->coded_height = (get_bits(gb, 12) + 1) << 1;
-    v->s.avctx->width        = v->s.avctx->coded_width;
-    v->s.avctx->height       = v->s.avctx->coded_height;
+    v->max_coded_width       = (get_bits(gb, 12) + 1) << 1;
+    v->max_coded_height      = (get_bits(gb, 12) + 1) << 1;
     v->broadcast             = get_bits1(gb);
     v->interlace             = get_bits1(gb);
     v->tfcntrflag            = get_bits1(gb);
@@ -503,9 +496,10 @@ static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb)
         }
 
         if (get_bits1(gb)) {
-            v->color_prim    = get_bits(gb, 8);
-            v->transfer_char = get_bits(gb, 8);
-            v->matrix_coef   = get_bits(gb, 8);
+            v->s.avctx->color_primaries = get_bits(gb, 8);
+            v->s.avctx->color_trc       = get_bits(gb, 8);
+            v->s.avctx->colorspace      = get_bits(gb, 8);
+            v->s.avctx->color_range     = AVCOL_RANGE_MPEG;
         }
     }
 
@@ -526,6 +520,8 @@ static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb)
 int ff_vc1_decode_entry_point(AVCodecContext *avctx, VC1Context *v, GetBitContext *gb)
 {
     int i;
+    int w,h;
+    int ret;
 
     av_log(avctx, AV_LOG_DEBUG, "Entry point: %08X\n", show_bits_long(gb, 32));
     v->broken_link    = get_bits1(gb);
@@ -533,6 +529,8 @@ int ff_vc1_decode_entry_point(AVCodecContext *avctx, VC1Context *v, GetBitContex
     v->panscanflag    = get_bits1(gb);
     v->refdist_flag   = get_bits1(gb);
     v->s.loop_filter  = get_bits1(gb);
+    if (v->s.avctx->skip_loop_filter >= AVDISCARD_ALL)
+        v->s.loop_filter = 0;
     v->fastuvmc       = get_bits1(gb);
     v->extended_mv    = get_bits1(gb);
     v->dquant         = get_bits(gb, 2);
@@ -546,10 +544,18 @@ int ff_vc1_decode_entry_point(AVCodecContext *avctx, VC1Context *v, GetBitContex
         }
     }
 
-    if (get_bits1(gb)) {
-        avctx->width  = avctx->coded_width  = (get_bits(gb, 12) + 1) << 1;
-        avctx->height = avctx->coded_height = (get_bits(gb, 12) + 1) << 1;
+    if(get_bits1(gb)){
+        w = (get_bits(gb, 12)+1)<<1;
+        h = (get_bits(gb, 12)+1)<<1;
+    } else {
+        w = v->max_coded_width;
+        h = v->max_coded_height;
     }
+    if ((ret = ff_set_dimensions(avctx, w, h)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to set dimensions %d %d\n", w, h);
+        return ret;
+    }
+
     if (v->extended_mv)
         v->extended_dmv = get_bits1(gb);
     if ((v->range_mapy_flag = get_bits1(gb))) {
@@ -601,32 +607,44 @@ static void rotate_luts(VC1Context *v)
             C = A;                                            \
         } else {                                              \
             DEF;                                              \
-            memcpy(&tmp, &L  , sizeof(tmp));                  \
-            memcpy(&L  , &N  , sizeof(tmp));                  \
-            memcpy(&N  , &tmp, sizeof(tmp));                  \
+            memcpy(&tmp, L   , sizeof(tmp));                  \
+            memcpy(L   , N   , sizeof(tmp));                  \
+            memcpy(N   , &tmp, sizeof(tmp));                  \
             C = N;                                            \
         }                                                     \
     } while(0)
 
-    ROTATE(int tmp,             v->last_use_ic, v->next_use_ic, v->curr_use_ic, v->aux_use_ic);
+    ROTATE(int tmp,             &v->last_use_ic, &v->next_use_ic, v->curr_use_ic, &v->aux_use_ic);
     ROTATE(uint8_t tmp[2][256], v->last_luty,   v->next_luty,   v->curr_luty,   v->aux_luty);
     ROTATE(uint8_t tmp[2][256], v->last_lutuv,  v->next_lutuv,  v->curr_lutuv,  v->aux_lutuv);
 
     INIT_LUT(32, 0, v->curr_luty[0], v->curr_lutuv[0], 0);
     INIT_LUT(32, 0, v->curr_luty[1], v->curr_lutuv[1], 0);
-    v->curr_use_ic = 0;
-    if (v->curr_luty == v->next_luty) {
-        // If we just initialized next_lut, clear next_use_ic to match.
-        v->next_use_ic = 0;
+    *v->curr_use_ic = 0;
+}
+
+static int read_bfraction(VC1Context *v, GetBitContext* gb) {
+    int bfraction_lut_index = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
+
+    if (bfraction_lut_index == 21 || bfraction_lut_index < 0) {
+        av_log(v->s.avctx, AV_LOG_ERROR, "bfraction invalid\n");
+        return AVERROR_INVALIDDATA;
     }
+    v->bfraction_lut_index = bfraction_lut_index;
+    v->bfraction           = ff_vc1_bfraction_lut[v->bfraction_lut_index];
+    return 0;
 }
 
 int ff_vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
 {
     int pqindex, lowquant, status;
 
+    v->field_mode = 0;
+    v->fcm = 0;
     if (v->finterpflag)
         v->interpfrm = get_bits1(gb);
+    if (!v->s.avctx->codec)
+        return -1;
     if (v->s.avctx->codec_id == AV_CODEC_ID_MSS2)
         v->respic   =
         v->rangered =
@@ -650,8 +668,8 @@ int ff_vc1_parse_frame_header(VC1Context *v, GetBitContext* gb)
 
     v->bi_type = 0;
     if (v->s.pict_type == AV_PICTURE_TYPE_B) {
-        v->bfraction_lut_index = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
-        v->bfraction           = ff_vc1_bfraction_lut[v->bfraction_lut_index];
+        if (read_bfraction(v, gb) < 0)
+            return AVERROR_INVALIDDATA;
         if (v->bfraction == 0) {
             v->s.pict_type = AV_PICTURE_TYPE_BI;
         }
@@ -846,6 +864,8 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
     v->numref          = 0;
     v->p_frame_skipped = 0;
     if (v->second_field) {
+        if(v->fcm!=2 || v->field_mode!=1)
+            return -1;
         v->s.pict_type = (v->fptype & 1) ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
         if (v->fptype & 4)
             v->s.pict_type = (v->fptype & 1) ? AV_PICTURE_TYPE_BI : AV_PICTURE_TYPE_B;
@@ -869,12 +889,15 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
     v->field_mode = field_mode;
     v->fcm = fcm;
 
+    av_assert0(    v->s.mb_height == v->s.height + 15 >> 4
+                || v->s.mb_height == FFALIGN(v->s.height + 15 >> 4, 2));
     if (v->field_mode) {
         v->s.mb_height = FFALIGN(v->s.height + 15 >> 4, 2);
         v->fptype = get_bits(gb, 3);
         v->s.pict_type = (v->fptype & 2) ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
         if (v->fptype & 4) // B-picture
             v->s.pict_type = (v->fptype & 2) ? AV_PICTURE_TYPE_BI : AV_PICTURE_TYPE_B;
+
     } else {
         v->s.mb_height = v->s.height + 15 >> 4;
         switch (get_unary(gb, 0, 4)) {
@@ -905,6 +928,8 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
             v->tff = get_bits1(gb);
             v->rff = get_bits1(gb);
         }
+    } else {
+        v->tff = 1;
     }
     if (v->panscanflag) {
         avpriv_report_missing_feature(v->s.avctx, "Pan-scan");
@@ -916,6 +941,8 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
     v->rnd = get_bits1(gb);
     if (v->interlace)
         v->uvsamp = get_bits1(gb);
+    if(!ff_vc1_bfraction_vlc.table)
+        return 0; //parsing only, vlc tables havnt been allocated
     if (v->field_mode) {
         if (!v->refdist_flag)
             v->refdist = 0;
@@ -925,8 +952,8 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
                 v->refdist += get_unary(gb, 0, 16);
         }
         if ((v->s.pict_type == AV_PICTURE_TYPE_B) || (v->s.pict_type == AV_PICTURE_TYPE_BI)) {
-            v->bfraction_lut_index = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
-            v->bfraction           = ff_vc1_bfraction_lut[v->bfraction_lut_index];
+            if (read_bfraction(v, gb) < 0)
+                return AVERROR_INVALIDDATA;
             v->frfd = (v->bfraction * v->refdist) >> 8;
             v->brfd = v->refdist - v->frfd - 1;
             if (v->brfd < 0)
@@ -938,8 +965,8 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
         if (v->finterpflag)
             v->interpfrm = get_bits1(gb);
         if (v->s.pict_type == AV_PICTURE_TYPE_B) {
-            v->bfraction_lut_index = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
-            v->bfraction           = ff_vc1_bfraction_lut[v->bfraction_lut_index];
+            if (read_bfraction(v, gb) < 0)
+                return AVERROR_INVALIDDATA;
             if (v->bfraction == 0) {
                 v->s.pict_type = AV_PICTURE_TYPE_BI; /* XXX: should not happen here */
             }
@@ -1100,7 +1127,7 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
                         INIT_LUT(v->lumscale2, v->lumshift2, v->curr_luty[v->cur_field_type^1], v->curr_lutuv[v->cur_field_type^1], 0);
                         INIT_LUT(v->lumscale , v->lumshift , v->last_luty[v->cur_field_type  ], v->last_lutuv[v->cur_field_type  ], 1);
                     }
-                    v->next_use_ic = v->curr_use_ic = 1;
+                    v->next_use_ic = *v->curr_use_ic = 1;
                 } else {
                     INIT_LUT(v->lumscale , v->lumshift , v->last_luty[0], v->last_lutuv[0], 1);
                     INIT_LUT(v->lumscale2, v->lumshift2, v->last_luty[1], v->last_lutuv[1], 1);
@@ -1183,8 +1210,8 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
         break;
     case AV_PICTURE_TYPE_B:
         if (v->fcm == ILACE_FRAME) {
-            v->bfraction_lut_index = get_vlc2(gb, ff_vc1_bfraction_vlc.table, VC1_BFRACTION_VLC_BITS, 1);
-            v->bfraction           = ff_vc1_bfraction_lut[v->bfraction_lut_index];
+            if (read_bfraction(v, gb) < 0)
+                return AVERROR_INVALIDDATA;
             if (v->bfraction == 0) {
                 return -1;
             }
@@ -1207,6 +1234,7 @@ int ff_vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
 
         if (v->field_mode) {
             int mvmode;
+            av_log(v->s.avctx, AV_LOG_DEBUG, "B Fields\n");
             if (v->extended_dmv)
                 v->dmvrange = get_unary(gb, 0, 3);
             mvmode = get_unary(gb, 1, 3);
@@ -1688,5 +1716,7 @@ av_cold int ff_vc1_init_common(VC1Context *v)
     v->pq      = -1;
     v->mvrange = 0; /* 7.1.1.18, p80 */
 
+    ff_vc1dsp_init(&v->vc1dsp);
+
     return 0;
 }
diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index 9db8edd..662b58b 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2006-2007 Konstantin Shishkov
  * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -175,6 +175,21 @@ enum FrameCodingMode {
     ILACE_FIELD         ///<  in the bitstream is reported as 11b
 };
 
+/**
+ * Imode types
+ * @{
+ */
+enum Imode {
+    IMODE_RAW,
+    IMODE_NORM2,
+    IMODE_DIFF2,
+    IMODE_NORM6,
+    IMODE_DIFF6,
+    IMODE_ROWSKIP,
+    IMODE_COLSKIP
+};
+/** @} */ //imode defines
+
 /** The VC1 Context
  * @todo Change size wherever another size is more efficient
  * Many members are only used for Advanced Profile
@@ -212,9 +227,6 @@ typedef struct VC1Context{
     int panscanflag;      ///< NUMPANSCANWIN, TOPLEFT{X,Y}, BOTRIGHT{X,Y} present
     int refdist_flag;     ///< REFDIST syntax element present in II, IP, PI or PP field picture headers
     int extended_dmv;     ///< Additional extended dmv range at P/B frame-level
-    int color_prim;       ///< 8bits, chroma coordinates of the color primaries
-    int transfer_char;    ///< 8bits, Opto-electronic transfer characteristics
-    int matrix_coef;      ///< 8bits, Color primaries->YCbCr transform matrix
     int hrd_param_flag;   ///< Presence of Hypothetical Reference
                           ///< Decoder parameters
     int psf;              ///< Progressive Segmented Frame
@@ -227,6 +239,7 @@ typedef struct VC1Context{
     int profile;          ///< 2bits, Profile
     int frmrtq_postproc;  ///< 3bits,
     int bitrtq_postproc;  ///< 5bits, quantized framerate-based postprocessing strength
+    int max_coded_width, max_coded_height;
     int fastuvmc;         ///< Rounding of qpel vector to hpel ? (not in Simple)
     int extended_mv;      ///< Ext MV in P/B (not in Simple)
     int dquant;           ///< How qscale varies with MBs, 2bits (not in Simple)
@@ -302,7 +315,7 @@ typedef struct VC1Context{
     uint8_t  aux_luty[2][256],  aux_lutuv[2][256];  ///< lookup tables used for intensity compensation
     uint8_t next_luty[2][256], next_lutuv[2][256];  ///< lookup tables used for intensity compensation
     uint8_t (*curr_luty)[256]  ,(*curr_lutuv)[256];
-    int last_use_ic, curr_use_ic, next_use_ic, aux_use_ic;
+    int last_use_ic, *curr_use_ic, next_use_ic, aux_use_ic;
     int rnd;                        ///< rounding control
 
     /** Frame decoding info for S/M profiles only */
@@ -353,7 +366,7 @@ typedef struct VC1Context{
     uint8_t fourmvbp;
     uint8_t* fieldtx_plane;
     int fieldtx_is_raw;
-    int8_t zzi_8x8[64];
+    uint8_t zzi_8x8[64];
     uint8_t *blk_mv_type_base, *blk_mv_type;    ///< 0: frame MV, 1: field MV (interlaced frame)
     uint8_t *mv_f_base, *mv_f[2];               ///< 0: MV obtained from same field, 1: opposite field
     uint8_t *mv_f_next_base, *mv_f_next[2];
diff --git a/libavcodec/vc1_parser.c b/libavcodec/vc1_parser.c
index 1bedd98..0021feb 100644
--- a/libavcodec/vc1_parser.c
+++ b/libavcodec/vc1_parser.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2006-2007 Konstantin Shishkov
  * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,117 +30,88 @@
 #include "vc1.h"
 #include "get_bits.h"
 
+/** The maximum number of bytes of a sequence, entry point or
+ *  frame header whose values we pay any attention to */
+#define UNESCAPED_THRESHOLD 37
+
+/** The maximum number of bytes of a sequence, entry point or
+ *  frame header which must be valid memory (because they are
+ *  used to update the bitstream cache in skip_bits() calls)
+ */
+#define UNESCAPED_LIMIT 144
+
+typedef enum {
+    NO_MATCH,
+    ONE_ZERO,
+    TWO_ZEROS,
+    ONE
+} VC1ParseSearchState;
+
 typedef struct {
     ParseContext pc;
     VC1Context v;
+    uint8_t prev_start_code;
+    size_t bytes_to_skip;
+    uint8_t unesc_buffer[UNESCAPED_LIMIT];
+    size_t unesc_index;
+    VC1ParseSearchState search_state;
 } VC1ParseContext;
 
-static void vc1_extract_headers(AVCodecParserContext *s, AVCodecContext *avctx,
-                                const uint8_t *buf, int buf_size)
+static void vc1_extract_header(AVCodecParserContext *s, AVCodecContext *avctx,
+                               const uint8_t *buf, int buf_size)
 {
+    /* Parse the header we just finished unescaping */
     VC1ParseContext *vpc = s->priv_data;
     GetBitContext gb;
-    const uint8_t *start, *end, *next;
-    uint8_t *buf2 = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
-
+    int ret;
     vpc->v.s.avctx = avctx;
     vpc->v.parse_only = 1;
-    next = buf;
-    s->repeat_pict = 0;
-
-    for(start = buf, end = buf + buf_size; next < end; start = next){
-        int buf2_size, size;
-
-        next = find_next_marker(start + 4, end);
-        size = next - start - 4;
-        buf2_size = vc1_unescape_buffer(start + 4, size, buf2);
-        init_get_bits(&gb, buf2, buf2_size * 8);
-        if(size <= 0) continue;
-        switch(AV_RB32(start)){
-        case VC1_CODE_SEQHDR:
-            ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
-            break;
-        case VC1_CODE_ENTRYPOINT:
-            ff_vc1_decode_entry_point(avctx, &vpc->v, &gb);
-            break;
-        case VC1_CODE_FRAME:
-            if(vpc->v.profile < PROFILE_ADVANCED)
-                ff_vc1_parse_frame_header    (&vpc->v, &gb);
-            else
-                ff_vc1_parse_frame_header_adv(&vpc->v, &gb);
-
-            /* keep AV_PICTURE_TYPE_BI internal to VC1 */
-            if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI)
-                s->pict_type = AV_PICTURE_TYPE_B;
-            else
-                s->pict_type = vpc->v.s.pict_type;
-
-            if (avctx->ticks_per_frame > 1){
-                // process pulldown flags
-                s->repeat_pict = 1;
-                // Pulldown flags are only valid when 'broadcast' has been set.
-                // So ticks_per_frame will be 2
-                if (vpc->v.rff){
-                    // repeat field
-                    s->repeat_pict = 2;
-                }else if (vpc->v.rptfrm){
-                    // repeat frames
-                    s->repeat_pict = vpc->v.rptfrm * 2 + 1;
-                }
-            }
-
-            if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf)
-                s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB;
-            else
-                s->field_order = AV_FIELD_PROGRESSIVE;
+    init_get_bits(&gb, buf, buf_size * 8);
+    switch (vpc->prev_start_code) {
+    case VC1_CODE_SEQHDR & 0xFF:
+        ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
+        break;
+    case VC1_CODE_ENTRYPOINT & 0xFF:
+        ff_vc1_decode_entry_point(avctx, &vpc->v, &gb);
+        break;
+    case VC1_CODE_FRAME & 0xFF:
+        if(vpc->v.profile < PROFILE_ADVANCED)
+            ret = ff_vc1_parse_frame_header    (&vpc->v, &gb);
+        else
+            ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb);
 
+        if (ret < 0)
             break;
-        }
-    }
 
-    av_free(buf2);
-}
+        /* keep AV_PICTURE_TYPE_BI internal to VC1 */
+        if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI)
+            s->pict_type = AV_PICTURE_TYPE_B;
+        else
+            s->pict_type = vpc->v.s.pict_type;
 
-/**
- * Find the end of the current frame in the bitstream.
- * @return the position of the first byte of the next frame, or -1
- */
-static int vc1_find_frame_end(ParseContext *pc, const uint8_t *buf,
-                               int buf_size) {
-    int pic_found, i;
-    uint32_t state;
-
-    pic_found= pc->frame_start_found;
-    state= pc->state;
-
-    i=0;
-    if(!pic_found){
-        for(i=0; i<buf_size; i++){
-            state= (state<<8) | buf[i];
-            if(state == VC1_CODE_FRAME || state == VC1_CODE_FIELD){
-                i++;
-                pic_found=1;
-                break;
+        if (avctx->ticks_per_frame > 1){
+            // process pulldown flags
+            s->repeat_pict = 1;
+            // Pulldown flags are only valid when 'broadcast' has been set.
+            // So ticks_per_frame will be 2
+            if (vpc->v.rff){
+                // repeat field
+                s->repeat_pict = 2;
+            }else if (vpc->v.rptfrm){
+                // repeat frames
+                s->repeat_pict = vpc->v.rptfrm * 2 + 1;
             }
+        }else{
+            s->repeat_pict = 0;
         }
-    }
 
-    if(pic_found){
-        /* EOF considered as end of frame */
-        if (buf_size == 0)
-            return 0;
-        for(; i<buf_size; i++){
-            state= (state<<8) | buf[i];
-            if(IS_MARKER(state) && state != VC1_CODE_FIELD && state != VC1_CODE_SLICE){
-                pc->frame_start_found=0;
-                pc->state=-1;
-                return i-3;
-            }
-        }
+        if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf)
+            s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB;
+        else
+            s->field_order = AV_FIELD_PROGRESSIVE;
+
+        break;
     }
-    pc->frame_start_found= pic_found;
-    pc->state= state;
-    return END_NOT_FOUND;
 }
 
 static int vc1_parse(AVCodecParserContext *s,
@@ -148,22 +119,127 @@ static int vc1_parse(AVCodecParserContext *s,
                            const uint8_t **poutbuf, int *poutbuf_size,
                            const uint8_t *buf, int buf_size)
 {
+    /* Here we do the searching for frame boundaries and headers at
+     * the same time. Only a minimal amount at the start of each
+     * header is unescaped. */
     VC1ParseContext *vpc = s->priv_data;
-    int next;
+    int pic_found = vpc->pc.frame_start_found;
+    uint8_t *unesc_buffer = vpc->unesc_buffer;
+    size_t unesc_index = vpc->unesc_index;
+    VC1ParseSearchState search_state = vpc->search_state;
+    int next = END_NOT_FOUND;
+    int i = vpc->bytes_to_skip;
 
-    if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
-        next= buf_size;
-    }else{
-        next= vc1_find_frame_end(&vpc->pc, buf, buf_size);
+    if (pic_found && buf_size == 0) {
+        /* EOF considered as end of frame */
+        memset(unesc_buffer + unesc_index, 0, UNESCAPED_THRESHOLD - unesc_index);
+        vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+        next = 0;
+    }
+    while (i < buf_size) {
+        int start_code_found = 0;
+        uint8_t b;
+        while (i < buf_size && unesc_index < UNESCAPED_THRESHOLD) {
+            b = buf[i++];
+            unesc_buffer[unesc_index++] = b;
+            if (search_state <= ONE_ZERO)
+                search_state = b ? NO_MATCH : search_state + 1;
+            else if (search_state == TWO_ZEROS) {
+                if (b == 1)
+                    search_state = ONE;
+                else if (b > 1) {
+                    if (b == 3)
+                        unesc_index--; // swallow emulation prevention byte
+                    search_state = NO_MATCH;
+                }
+            }
+            else { // search_state == ONE
+                // Header unescaping terminates early due to detection of next start code
+                search_state = NO_MATCH;
+                start_code_found = 1;
+                break;
+            }
+        }
+        if ((s->flags & PARSER_FLAG_COMPLETE_FRAMES) &&
+                unesc_index >= UNESCAPED_THRESHOLD &&
+                vpc->prev_start_code == (VC1_CODE_FRAME & 0xFF))
+        {
+            // No need to keep scanning the rest of the buffer for
+            // start codes if we know it contains a complete frame and
+            // we've already unescaped all we need of the frame header
+            vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+            break;
+        }
+        if (unesc_index >= UNESCAPED_THRESHOLD && !start_code_found) {
+            while (i < buf_size) {
+                if (search_state == NO_MATCH) {
+                    i += vpc->v.vc1dsp.vc1_find_start_code_candidate(buf + i, buf_size - i);
+                    if (i < buf_size) {
+                        search_state = ONE_ZERO;
+                    }
+                    i++;
+                } else {
+                    b = buf[i++];
+                    if (search_state == ONE_ZERO)
+                        search_state = b ? NO_MATCH : TWO_ZEROS;
+                    else if (search_state == TWO_ZEROS) {
+                        if (b >= 1)
+                            search_state = b == 1 ? ONE : NO_MATCH;
+                    }
+                    else { // search_state == ONE
+                        search_state = NO_MATCH;
+                        start_code_found = 1;
+                        break;
+                    }
+                }
+            }
+        }
+        if (start_code_found) {
+            vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+
+            vpc->prev_start_code = b;
+            unesc_index = 0;
+
+            if (!(s->flags & PARSER_FLAG_COMPLETE_FRAMES)) {
+                if (!pic_found && (b == (VC1_CODE_FRAME & 0xFF) || b == (VC1_CODE_FIELD & 0xFF))) {
+                    pic_found = 1;
+                }
+                else if (pic_found && b != (VC1_CODE_FIELD & 0xFF) && b != (VC1_CODE_SLICE & 0xFF)) {
+                    next = i - 4;
+                    pic_found = b == (VC1_CODE_FRAME & 0xFF);
+                    break;
+                }
+            }
+        }
+    }
 
+    vpc->pc.frame_start_found = pic_found;
+    vpc->unesc_index = unesc_index;
+    vpc->search_state = search_state;
+
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        next = buf_size;
+    } else {
         if (ff_combine_frame(&vpc->pc, next, &buf, &buf_size) < 0) {
+            vpc->bytes_to_skip = 0;
             *poutbuf = NULL;
             *poutbuf_size = 0;
             return buf_size;
         }
     }
 
-    vc1_extract_headers(s, avctx, buf, buf_size);
+    vpc->v.first_pic_header_flag = 1;
+
+    /* If we return with a valid pointer to a combined frame buffer
+     * then on the next call then we'll have been unhelpfully rewound
+     * by up to 4 bytes (depending upon whether the start code
+     * overlapped the input buffer, and if so by how much). We don't
+     * want this: it will either cause spurious second detections of
+     * the start code we've already seen, or cause extra bytes to be
+     * inserted at the start of the unescaped buffer. */
+    vpc->bytes_to_skip = 4;
+    if (next < 0 && next != END_NOT_FOUND)
+        vpc->bytes_to_skip += next;
 
     *poutbuf = buf;
     *poutbuf_size = buf_size;
@@ -194,6 +270,11 @@ static av_cold int vc1_parse_init(AVCodecParserContext *s)
 {
     VC1ParseContext *vpc = s->priv_data;
     vpc->v.s.slice_context_count = 1;
+    vpc->v.first_pic_header_flag = 1;
+    vpc->prev_start_code = 0;
+    vpc->bytes_to_skip = 0;
+    vpc->unesc_index = 0;
+    vpc->search_state = NO_MATCH;
     return ff_vc1_init_common(&vpc->v);
 }
 
diff --git a/libavcodec/vc1acdata.h b/libavcodec/vc1acdata.h
index 73ebe40..a70b44a 100644
--- a/libavcodec/vc1acdata.h
+++ b/libavcodec/vc1acdata.h
@@ -2,20 +2,20 @@
  * VC-1 and WMV3 decoder
  * copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vc1data.c b/libavcodec/vc1data.c
index 70cead8..fc9ba6d 100644
--- a/libavcodec/vc1data.c
+++ b/libavcodec/vc1data.c
@@ -4,20 +4,20 @@
  * copyright (c) 2006 Konstantin Shishkov
  * (c) 2005 anonymous, Alex Beregszaszi, Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -1019,21 +1019,21 @@ const uint8_t ff_vc1_mv_diff_bits[4][73] = {
 /* DC differentials low+hi-mo, p217 are the same as in msmpeg4data .h */
 
 /* Table 232 */
-const int8_t ff_vc1_simple_progressive_4x4_zz [16] = {
+const uint8_t ff_vc1_simple_progressive_4x4_zz [16] = {
      0,     8,    16,     1,
      9,    24,    17,     2,
     10,    18,    25,     3,
     11,    26,    19,    27
 };
 
-const int8_t ff_vc1_adv_progressive_8x4_zz [32] = { /* Table 233 */
+const uint8_t ff_vc1_adv_progressive_8x4_zz [32] = { /* Table 233 */
      0,     8,     1,    16,     2,     9,    10,     3,
     24,    17,     4,    11,    18,    12,     5,    19,
     25,    13,    20,    26,    27,     6,    21,    28,
     14,    22,    29,     7,    30,    15,    23,    31
 };
 
-const int8_t ff_vc1_adv_progressive_4x8_zz [32] = { /* Table 234 */
+const uint8_t ff_vc1_adv_progressive_4x8_zz [32] = { /* Table 234 */
      0,     1,     8,     2,
      9,    16,    17,    24,
     10,    32,    25,    18,
@@ -1044,7 +1044,7 @@ const int8_t ff_vc1_adv_progressive_4x8_zz [32] = { /* Table 234 */
     35,    43,    51,    59
 };
 
-const int8_t ff_vc1_adv_interlaced_8x8_zz [64] = { /* Table 235 */
+const uint8_t ff_vc1_adv_interlaced_8x8_zz [64] = { /* Table 235 */
      0,     8,     1,    16,    24,     9,     2,    32,
     40,    48,    56,    17,    10,     3,    25,    18,
     11,     4,    33,    41,    49,    57,    26,    34,
@@ -1055,14 +1055,14 @@ const int8_t ff_vc1_adv_interlaced_8x8_zz [64] = { /* Table 235 */
     61,    62,    54,    46,    39,    47,    55,    63
 };
 
-const int8_t ff_vc1_adv_interlaced_8x4_zz [32] = { /* Table 236 */
+const uint8_t ff_vc1_adv_interlaced_8x4_zz [32] = { /* Table 236 */
      0,     8,    16,    24,     1,     9,     2,    17,
     25,    10,     3,    18,    26,     4,    11,    19,
     12,     5,    13,    20,    27,     6,    21,    28,
     14,    22,    29,     7,    30,    15,    23,    31
 };
 
-const int8_t ff_vc1_adv_interlaced_4x8_zz [32] = { /* Table 237 */
+const uint8_t ff_vc1_adv_interlaced_4x8_zz [32] = { /* Table 237 */
      0,     1,     2,     8,
     16,     9,    24,    17,
     10,     3,    32,    40,
@@ -1073,7 +1073,7 @@ const int8_t ff_vc1_adv_interlaced_4x8_zz [32] = { /* Table 237 */
     35,    43,    51,    59
 };
 
-const int8_t ff_vc1_adv_interlaced_4x4_zz [16] = { /* Table 238 */
+const uint8_t ff_vc1_adv_interlaced_4x4_zz [16] = { /* Table 238 */
      0,     8,    16,    24,
      1,     9,    17,     2,
     25,    10,    18,     3,
diff --git a/libavcodec/vc1data.h b/libavcodec/vc1data.h
index 84e8188..eecb045 100644
--- a/libavcodec/vc1data.h
+++ b/libavcodec/vc1data.h
@@ -3,20 +3,20 @@
  * copyright (c) 2006 Konstantin Shishkov
  * (c) 2005 anonymous, Alex Beregszaszi, Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -183,15 +183,15 @@ extern const uint8_t ff_vc1_2ref_mvdata_bits[8][126];
 /* DC differentials low+hi-mo, p217 are the same as in msmpeg4data .h */
 
 /* Scantables/ZZ scan are at 11.9 (p262) and 8.1.1.12 (p10) */
-extern const int8_t ff_vc1_simple_progressive_4x4_zz [16];
-extern const int8_t ff_vc1_adv_progressive_8x4_zz [32];
-extern const int8_t ff_vc1_adv_progressive_4x8_zz [32];
-extern const int8_t ff_vc1_adv_interlaced_8x8_zz [64];
-extern const int8_t ff_vc1_adv_interlaced_8x4_zz [32];
-extern const int8_t ff_vc1_adv_interlaced_4x8_zz [32];
-extern const int8_t ff_vc1_adv_interlaced_4x4_zz [16];
-extern const int8_t ff_vc1_intra_horz_8x8_zz [64];
-extern const int8_t ff_vc1_intra_vert_8x8_zz [64];
+extern const uint8_t ff_vc1_simple_progressive_4x4_zz [16];
+extern const uint8_t ff_vc1_adv_progressive_8x4_zz [32];
+extern const uint8_t ff_vc1_adv_progressive_4x8_zz [32];
+extern const uint8_t ff_vc1_adv_interlaced_8x8_zz [64];
+extern const uint8_t ff_vc1_adv_interlaced_8x4_zz [32];
+extern const uint8_t ff_vc1_adv_interlaced_4x8_zz [32];
+extern const uint8_t ff_vc1_adv_interlaced_4x4_zz [16];
+extern const uint8_t ff_vc1_intra_horz_8x8_zz [64];
+extern const uint8_t ff_vc1_intra_vert_8x8_zz [64];
 
 /* DQScale as specified in 8.1.3.9 - almost identical to 0x40000/i */
 extern const int32_t ff_vc1_dqscale[63];
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index c83bb4f..06deb7f 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2006-2007 Konstantin Shishkov
  * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -41,6 +41,8 @@
 #include "msmpeg4data.h"
 #include "unary.h"
 #include "mathops.h"
+#include "vdpau_internal.h"
+#include "libavutil/avassert.h"
 
 #undef NDEBUG
 #include <assert.h>
@@ -60,20 +62,6 @@ static const int offset_table2[9] = {  0,  1,  3,  7, 15, 31, 63, 127, 255 };
  * @{
  */
 
-/**
- * Imode types
- * @{
- */
-enum Imode {
-    IMODE_RAW,
-    IMODE_NORM2,
-    IMODE_DIFF2,
-    IMODE_NORM6,
-    IMODE_DIFF6,
-    IMODE_ROWSKIP,
-    IMODE_COLSKIP
-};
-/** @} */ //imode defines
 
 static void init_block_index(VC1Context *v)
 {
@@ -392,7 +380,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
             srcV = s->current_picture.f->data[2];
             luty  = v->curr_luty;
             lutuv = v->curr_lutuv;
-            use_ic = v->curr_use_ic;
+            use_ic = *v->curr_use_ic;
         } else {
             srcY = s->last_picture.f->data[0];
             srcU = s->last_picture.f->data[1];
@@ -452,7 +440,8 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
         || s->h_edge_pos < 22 || v_edge_pos < 22
         || (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx&3) - 16 - s->mspel * 3
         || (unsigned)(src_y - 1)        > v_edge_pos    - (my&3) - 16 - 3) {
-        uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
+        uint8_t *ubuf = s->edge_emu_buffer + 19 * s->linesize;
+        uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
 
         srcY -= s->mspel * (1 + s->linesize);
         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY,
@@ -461,16 +450,18 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
                                  src_x - s->mspel, src_y - s->mspel,
                                  s->h_edge_pos, v_edge_pos);
         srcY = s->edge_emu_buffer;
-        s->vdsp.emulated_edge_mc(uvbuf, srcU,
+        s->vdsp.emulated_edge_mc(ubuf, srcU,
                                  s->uvlinesize, s->uvlinesize,
                                  8 + 1, 8 + 1,
-                                 uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
-        s->vdsp.emulated_edge_mc(uvbuf + 16, srcV,
+                                 uvsrc_x, uvsrc_y,
+                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
+        s->vdsp.emulated_edge_mc(vbuf, srcV,
                                  s->uvlinesize, s->uvlinesize,
                                  8 + 1, 8 + 1,
-                                 uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
-        srcU = uvbuf;
-        srcV = uvbuf + 16;
+                                 uvsrc_x, uvsrc_y,
+                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
+        srcU = ubuf;
+        srcV = vbuf;
         /* if we deal with range reduction we need to scale source blocks */
         if (v->rangeredfrm) {
             int i, j;
@@ -522,11 +513,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
 
     if (s->mspel) {
         dxy = ((my & 3) << 2) | (mx & 3);
-        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
-        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
-        srcY += s->linesize * 8;
-        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
-        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+        v->vc1dsp.put_vc1_mspel_pixels_tab[0][dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
     } else { // hpel mc - always used for luma
         dxy = (my & 2) | ((mx & 2) >> 1);
         if (!v->rnd)
@@ -584,7 +571,7 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg)
         if (v->field_mode && (v->cur_field_type != v->ref_field_type[dir]) && v->second_field) {
             srcY = s->current_picture.f->data[0];
             luty = v->curr_luty;
-            use_ic = v->curr_use_ic;
+            use_ic = *v->curr_use_ic;
         } else {
             srcY = s->last_picture.f->data[0];
             luty = v->last_luty;
@@ -633,6 +620,8 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg)
             tx = (chosen_mv[f][0][0] + chosen_mv[f][1][0]) / 2;
             ty = (chosen_mv[f][0][1] + chosen_mv[f][1][1]) / 2;
             break;
+        default:
+            av_assert0(0);
         }
         s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = tx;
         s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][1] = ty;
@@ -738,9 +727,9 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n, int dir, int avg)
     if (s->mspel) {
         dxy = ((my & 3) << 2) | (mx & 3);
         if (avg)
-            v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
+            v->vc1dsp.avg_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
         else
-            v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
+            v->vc1dsp.put_vc1_mspel_pixels_tab[1][dxy](s->dest[0] + off, srcY, s->linesize << fieldmv, v->rnd);
     } else { // hpel mc - always used for luma
         dxy = (my & 2) | ((mx & 2) >> 1);
         if (!v->rnd)
@@ -884,7 +873,7 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
             srcU = s->current_picture.f->data[1];
             srcV = s->current_picture.f->data[2];
             lutuv = v->curr_lutuv;
-            use_ic = v->curr_use_ic;
+            use_ic = *v->curr_use_ic;
         } else {
             srcU = s->last_picture.f->data[1];
             srcV = s->last_picture.f->data[2];
@@ -1014,21 +1003,26 @@ static void vc1_mc_4mv_chroma4(VC1Context *v, int dir, int dir2, int avg)
         uvsrc_x = av_clip(uvsrc_x, -8, s->avctx->coded_width  >> 1);
         uvsrc_y = av_clip(uvsrc_y, -8, s->avctx->coded_height >> 1);
         if (i < 2 ? dir : dir2) {
-            srcU = s->next_picture.f->data[1] + uvsrc_y * s->uvlinesize + uvsrc_x;
-            srcV = s->next_picture.f->data[2] + uvsrc_y * s->uvlinesize + uvsrc_x;
+            srcU = s->next_picture.f->data[1];
+            srcV = s->next_picture.f->data[2];
             lutuv  = v->next_lutuv;
             use_ic = v->next_use_ic;
         } else {
-            srcU = s->last_picture.f->data[1] + uvsrc_y * s->uvlinesize + uvsrc_x;
-            srcV = s->last_picture.f->data[2] + uvsrc_y * s->uvlinesize + uvsrc_x;
+            srcU = s->last_picture.f->data[1];
+            srcV = s->last_picture.f->data[2];
             lutuv  = v->last_lutuv;
             use_ic = v->last_use_ic;
         }
+        if (!srcU)
+            return;
+        srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
+        srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
         uvmx_field[i] = (uvmx_field[i] & 3) << 1;
         uvmy_field[i] = (uvmy_field[i] & 3) << 1;
 
         if (fieldmv && !(uvsrc_y & 1))
-            v_edge_pos--;
+            v_edge_pos = (s->v_edge_pos >> 1) - 1;
+
         if (fieldmv && (uvsrc_y & 1) && uvsrc_y < 2)
             uvsrc_y--;
         if (use_ic
@@ -1217,6 +1211,7 @@ static av_always_inline void get_mvdata_interlaced(VC1Context *v, int *dmv_x,
         }
     }
     else {
+        av_assert0(index < esc);
         if (extend_x)
             offs_tab = offset_table2;
         else
@@ -1679,7 +1674,7 @@ static inline void vc1_pred_mv_intfr(VC1Context *v, int n, int dmv_x, int dmv_y,
     MpegEncContext *s = &v->s;
     int xy, wrap, off = 0;
     int A[2], B[2], C[2];
-    int px, py;
+    int px = 0, py = 0;
     int a_valid = 0, b_valid = 0, c_valid = 0;
     int field_a, field_b, field_c; // 0: same, 1: opposit
     int total_valid, num_samefield, num_oppfield;
@@ -1813,11 +1808,10 @@ static inline void vc1_pred_mv_intfr(VC1Context *v, int n, int dmv_x, int dmv_y,
                 px = mid_pred(A[0], B[0], C[0]);
                 py = mid_pred(A[1], B[1], C[1]);
             } else if (total_valid) {
-                if (a_valid) { px = A[0]; py = A[1]; }
-                if (b_valid) { px = B[0]; py = B[1]; }
-                if (c_valid) { px = C[0]; py = C[1]; }
-            } else
-                px = py = 0;
+                if      (a_valid) { px = A[0]; py = A[1]; }
+                else if (b_valid) { px = B[0]; py = B[1]; }
+                else              { px = C[0]; py = C[1]; }
+            }
         }
     } else {
         if (a_valid)
@@ -1856,28 +1850,28 @@ static inline void vc1_pred_mv_intfr(VC1Context *v, int n, int dmv_x, int dmv_y,
                 } else if (!field_b && b_valid) {
                     px = B[0];
                     py = B[1];
-                } else if (c_valid) {
+                } else /*if (c_valid)*/ {
+                    av_assert1(c_valid);
                     px = C[0];
                     py = C[1];
-                } else px = py = 0;
+                } /*else px = py = 0;*/
             } else {
                 if (field_a && a_valid) {
                     px = A[0];
                     py = A[1];
-                } else if (field_b && b_valid) {
+                } else /*if (field_b && b_valid)*/ {
+                    av_assert1(field_b && b_valid);
                     px = B[0];
                     py = B[1];
-                } else if (c_valid) {
+                } /*else if (c_valid) {
                     px = C[0];
                     py = C[1];
-                } else
-                    px = py = 0;
+                }*/
             }
         } else if (total_valid == 1) {
             px = (a_valid) ? A[0] : ((b_valid) ? B[0] : C[0]);
             py = (a_valid) ? A[1] : ((b_valid) ? B[1] : C[1]);
-        } else
-            px = py = 0;
+        }
     }
 
     /* store MV using signed modulus of MV range defined in 4.11 */
@@ -1918,9 +1912,10 @@ static void vc1_interp_mc(VC1Context *v)
     uvmx = (mx + ((mx & 3) == 3)) >> 1;
     uvmy = (my + ((my & 3) == 3)) >> 1;
     if (v->field_mode) {
-        if (v->cur_field_type != v->ref_field_type[1])
+        if (v->cur_field_type != v->ref_field_type[1]) {
             my   = my   - 2 + 4 * v->cur_field_type;
             uvmy = uvmy - 2 + 4 * v->cur_field_type;
+        }
     }
     if (v->fastuvmc) {
         uvmx = uvmx + ((uvmx < 0) ? -(uvmx & 1) : (uvmx & 1));
@@ -1966,7 +1961,8 @@ static void vc1_interp_mc(VC1Context *v)
     if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22 || use_ic
         || (unsigned)(src_x - 1) > s->h_edge_pos - (mx & 3) - 16 - 3
         || (unsigned)(src_y - 1) > v_edge_pos    - (my & 3) - 16 - 3) {
-        uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize;
+        uint8_t *ubuf = s->edge_emu_buffer + 19 * s->linesize;
+        uint8_t *vbuf = ubuf + 9 * s->uvlinesize;
 
         srcY -= s->mspel * (1 + s->linesize);
         s->vdsp.emulated_edge_mc(s->edge_emu_buffer, srcY,
@@ -1975,16 +1971,18 @@ static void vc1_interp_mc(VC1Context *v)
                                  src_x - s->mspel, src_y - s->mspel,
                                  s->h_edge_pos, v_edge_pos);
         srcY = s->edge_emu_buffer;
-        s->vdsp.emulated_edge_mc(uvbuf, srcU,
+        s->vdsp.emulated_edge_mc(ubuf, srcU,
                                  s->uvlinesize, s->uvlinesize,
                                  8 + 1, 8 + 1,
-                                 uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
-        s->vdsp.emulated_edge_mc(uvbuf + 16, srcV,
+                                 uvsrc_x, uvsrc_y,
+                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
+        s->vdsp.emulated_edge_mc(vbuf, srcV,
                                  s->uvlinesize, s->uvlinesize,
                                  8 + 1, 8 + 1,
-                                 uvsrc_x, uvsrc_y, s->h_edge_pos >> 1, v_edge_pos >> 1);
-        srcU = uvbuf;
-        srcV = uvbuf + 16;
+                                 uvsrc_x, uvsrc_y,
+                                 s->h_edge_pos >> 1, v_edge_pos >> 1);
+        srcU = ubuf;
+        srcV = vbuf;
         /* if we deal with range reduction we need to scale source blocks */
         if (v->rangeredfrm) {
             int i, j;
@@ -2041,11 +2039,7 @@ static void vc1_interp_mc(VC1Context *v)
 
     if (s->mspel) {
         dxy = ((my & 3) << 2) | (mx & 3);
-        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off    , srcY    , s->linesize, v->rnd);
-        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8, srcY + 8, s->linesize, v->rnd);
-        srcY += s->linesize * 8;
-        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
-        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + off + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+        v->vc1dsp.avg_vc1_mspel_pixels_tab[0][dxy](s->dest[0] + off    , srcY    , s->linesize, v->rnd);
     } else { // hpel mc
         dxy = (my & 2) | ((mx & 2) >> 1);
 
@@ -2117,6 +2111,8 @@ static inline void vc1_pred_b_mv(VC1Context *v, int dmv_x[2], int dmv_y[2],
     int r_x, r_y;
     const uint8_t *is_intra = v->mb_type[0];
 
+    av_assert0(!v->field_mode);
+
     r_x = v->range_x;
     r_y = v->range_y;
     /* scale MV difference to be quad-pel */
@@ -2129,13 +2125,15 @@ static inline void vc1_pred_b_mv(VC1Context *v, int dmv_x[2], int dmv_y[2],
     xy = s->block_index[0];
 
     if (s->mb_intra) {
-        s->current_picture.motion_val[0][xy + v->blocks_off][0] =
-        s->current_picture.motion_val[0][xy + v->blocks_off][1] =
-        s->current_picture.motion_val[1][xy + v->blocks_off][0] =
-        s->current_picture.motion_val[1][xy + v->blocks_off][1] = 0;
+        s->current_picture.motion_val[0][xy][0] =
+        s->current_picture.motion_val[0][xy][1] =
+        s->current_picture.motion_val[1][xy][0] =
+        s->current_picture.motion_val[1][xy][1] = 0;
         return;
     }
-    if (!v->field_mode) {
+        if (direct && s->next_picture_ptr->field_picture)
+            av_log(s->avctx, AV_LOG_WARNING, "Mixed frame/field direct mode not supported\n");
+
         s->mv[0][0][0] = scale_mv(s->next_picture.motion_val[1][xy][0], v->bfraction, 0, s->quarter_sample);
         s->mv[0][0][1] = scale_mv(s->next_picture.motion_val[1][xy][1], v->bfraction, 0, s->quarter_sample);
         s->mv[1][0][0] = scale_mv(s->next_picture.motion_val[1][xy][0], v->bfraction, 1, s->quarter_sample);
@@ -2146,12 +2144,11 @@ static inline void vc1_pred_b_mv(VC1Context *v, int dmv_x[2], int dmv_y[2],
         s->mv[0][0][1] = av_clip(s->mv[0][0][1], -60 - (s->mb_y << 6), (s->mb_height << 6) - 4 - (s->mb_y << 6));
         s->mv[1][0][0] = av_clip(s->mv[1][0][0], -60 - (s->mb_x << 6), (s->mb_width  << 6) - 4 - (s->mb_x << 6));
         s->mv[1][0][1] = av_clip(s->mv[1][0][1], -60 - (s->mb_y << 6), (s->mb_height << 6) - 4 - (s->mb_y << 6));
-    }
     if (direct) {
-        s->current_picture.motion_val[0][xy + v->blocks_off][0] = s->mv[0][0][0];
-        s->current_picture.motion_val[0][xy + v->blocks_off][1] = s->mv[0][0][1];
-        s->current_picture.motion_val[1][xy + v->blocks_off][0] = s->mv[1][0][0];
-        s->current_picture.motion_val[1][xy + v->blocks_off][1] = s->mv[1][0][1];
+        s->current_picture.motion_val[0][xy][0] = s->mv[0][0][0];
+        s->current_picture.motion_val[0][xy][1] = s->mv[0][0][1];
+        s->current_picture.motion_val[1][xy][0] = s->mv[1][0][0];
+        s->current_picture.motion_val[1][xy][1] = s->mv[1][0][1];
         return;
     }
 
@@ -2799,7 +2796,7 @@ static int vc1_decode_i_block_adv(VC1Context *v, int16_t block[64], int n,
     MpegEncContext *s = &v->s;
     int dc_pred_dir = 0; /* Direction of the DC prediction used */
     int i;
-    int16_t *dc_val;
+    int16_t *dc_val = NULL;
     int16_t *ac_val, *ac_val2;
     int dcdiff;
     int a_avail = v->a_avail, c_avail = v->c_avail;
@@ -3011,7 +3008,7 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
     MpegEncContext *s = &v->s;
     int dc_pred_dir = 0; /* Direction of the DC prediction used */
     int i;
-    int16_t *dc_val;
+    int16_t *dc_val = NULL;
     int16_t *ac_val, *ac_val2;
     int dcdiff;
     int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
@@ -3791,7 +3788,7 @@ static int vc1_decode_p_mb_intfr(VC1Context *v)
     int idx_mbmode = 0, mvbp;
     int stride_y, fieldtx;
 
-    mquant = v->pq; /* Loosy initialization */
+    mquant = v->pq; /* Lossy initialization */
 
     if (v->skip_is_raw)
         skipped = get_bits1(gb);
@@ -3999,11 +3996,11 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
     int val; /* temp values */
     int first_block = 1;
     int dst_idx, off;
-    int pred_flag;
+    int pred_flag = 0;
     int block_cbp = 0, pat, block_tt = 0;
     int idx_mbmode = 0;
 
-    mquant = v->pq; /* Loosy initialization */
+    mquant = v->pq; /* Lossy initialization */
 
     idx_mbmode = get_vlc2(gb, v->mbmode_vlc->table, VC1_IF_MBMODE_VLC_BITS, 2);
     if (idx_mbmode <= 1) { // intra MB
@@ -4277,9 +4274,9 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
     int fwd;
     int dmv_x[2], dmv_y[2], pred_flag[2];
     int bmvtype = BMV_TYPE_BACKWARD;
-    int idx_mbmode, interpmvp;
+    int idx_mbmode;
 
-    mquant      = v->pq; /* Loosy initialization */
+    mquant      = v->pq; /* Lossy initialization */
     s->mb_intra = 0;
 
     idx_mbmode = get_vlc2(gb, v->mbmode_vlc->table, VC1_IF_MBMODE_VLC_BITS, 2);
@@ -4333,6 +4330,7 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
         else
             fwd = v->forward_mb_plane[mb_pos];
         if (idx_mbmode <= 5) { // 1-MV
+            int interpmvp = 0;
             dmv_x[0]     = dmv_x[1] = dmv_y[0] = dmv_y[1] = 0;
             pred_flag[0] = pred_flag[1] = 0;
             if (fwd)
@@ -4355,12 +4353,16 @@ static void vc1_decode_b_mb_intfi(VC1Context *v)
             if (bmvtype != BMV_TYPE_DIRECT && idx_mbmode & 1) {
                 get_mvdata_interlaced(v, &dmv_x[bmvtype == BMV_TYPE_BACKWARD], &dmv_y[bmvtype == BMV_TYPE_BACKWARD], &pred_flag[bmvtype == BMV_TYPE_BACKWARD]);
             }
-            if (bmvtype == BMV_TYPE_INTERPOLATED && interpmvp) {
+            if (interpmvp) {
                 get_mvdata_interlaced(v, &dmv_x[1], &dmv_y[1], &pred_flag[1]);
             }
             if (bmvtype == BMV_TYPE_DIRECT) {
                 dmv_x[0] = dmv_y[0] = pred_flag[0] = 0;
                 dmv_x[1] = dmv_y[1] = pred_flag[0] = 0;
+                if (!s->next_picture_ptr->field_picture) {
+                    av_log(s->avctx, AV_LOG_ERROR, "Mixed field/frame direct mode not supported\n");
+                    return;
+                }
             }
             vc1_pred_b_mv_intfi(v, 0, dmv_x, dmv_y, 1, pred_flag);
             vc1_b_mc(v, dmv_x, dmv_y, (bmvtype == BMV_TYPE_DIRECT), bmvtype);
@@ -4468,6 +4470,8 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
         direct = v->direct_mb_plane[mb_pos];
 
     if (direct) {
+        if (s->next_picture_ptr->field_picture)
+            av_log(s->avctx, AV_LOG_WARNING, "Mixed frame/field direct mode not supported\n");
         s->mv[0][0][0] = s->current_picture.motion_val[0][s->block_index[0]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][0], v->bfraction, 0, s->quarter_sample);
         s->mv[0][0][1] = s->current_picture.motion_val[0][s->block_index[0]][1] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][1], v->bfraction, 0, s->quarter_sample);
         s->mv[1][0][0] = s->current_picture.motion_val[1][s->block_index[0]][0] = scale_mv(s->next_picture.motion_val[1][s->block_index[0]][0], v->bfraction, 1, s->quarter_sample);
@@ -4570,9 +4574,9 @@ static int vc1_decode_b_mb_intfr(VC1Context *v)
             if (mb_has_coeffs)
                 cbp = 1 + get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
             if (!direct) {
-                if (bmvtype == BMV_TYPE_INTERPOLATED & twomv) {
+                if (bmvtype == BMV_TYPE_INTERPOLATED && twomv) {
                     v->fourmvbp = get_vlc2(gb, v->fourmvbp_vlc->table, VC1_4MV_BLOCK_PATTERN_VLC_BITS, 1);
-                } else if (bmvtype == BMV_TYPE_INTERPOLATED | twomv) {
+                } else if (bmvtype == BMV_TYPE_INTERPOLATED || twomv) {
                     v->twomvbp = get_vlc2(gb, v->twomvbp_vlc->table, VC1_2MV_BLOCK_PATTERN_VLC_BITS, 1);
                 }
             }
@@ -5299,7 +5303,7 @@ static void vc1_sprite_parse_transform(GetBitContext* gb, int c[7])
         c[6] = 1 << 16;
 }
 
-static void vc1_parse_sprites(VC1Context *v, GetBitContext* gb, SpriteData* sd)
+static int vc1_parse_sprites(VC1Context *v, GetBitContext* gb, SpriteData* sd)
 {
     AVCodecContext *avctx = v->s.avctx;
     int sprite, i;
@@ -5343,7 +5347,7 @@ static void vc1_parse_sprites(VC1Context *v, GetBitContext* gb, SpriteData* sd)
         sd->effect_pcount2 = get_bits(gb, 16);
         if (sd->effect_pcount2 > 10) {
             av_log(avctx, AV_LOG_ERROR, "Too many effect parameters\n");
-            return;
+            return AVERROR_INVALIDDATA;
         } else if (sd->effect_pcount2) {
             i = -1;
             av_log(avctx, AV_LOG_DEBUG, "Effect params 2: ");
@@ -5360,10 +5364,14 @@ static void vc1_parse_sprites(VC1Context *v, GetBitContext* gb, SpriteData* sd)
         av_log(avctx, AV_LOG_DEBUG, "Effect flag set\n");
 
     if (get_bits_count(gb) >= gb->size_in_bits +
-       (avctx->codec_id == AV_CODEC_ID_WMV3IMAGE ? 64 : 0))
+       (avctx->codec_id == AV_CODEC_ID_WMV3IMAGE ? 64 : 0)) {
         av_log(avctx, AV_LOG_ERROR, "Buffer overrun\n");
+        return AVERROR_INVALIDDATA;
+    }
     if (get_bits_count(gb) < gb->size_in_bits - 8)
         av_log(avctx, AV_LOG_WARNING, "Buffer not fully read\n");
+
+    return 0;
 }
 
 static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
@@ -5375,7 +5383,7 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
     int ysub[2];
     MpegEncContext *s = &v->s;
 
-    for (i = 0; i < 2; i++) {
+    for (i = 0; i <= v->two_sprites; i++) {
         xoff[i] = av_clip(sd->coefs[i][2], 0, v->sprite_width-1 << 16);
         xadv[i] = sd->coefs[i][0];
         if (xadv[i] != 1<<16 || (v->sprite_width << 16) - (v->output_width << 16) - xoff[i])
@@ -5453,7 +5461,7 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
         }
 
         if (!plane) {
-            for (i = 0; i < 2; i++) {
+            for (i = 0; i <= v->two_sprites; i++) {
                 xoff[i] >>= 1;
                 yoff[i] >>= 1;
             }
@@ -5465,11 +5473,16 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
 
 static int vc1_decode_sprites(VC1Context *v, GetBitContext* gb)
 {
+    int ret;
     MpegEncContext *s     = &v->s;
     AVCodecContext *avctx = s->avctx;
     SpriteData sd;
 
-    vc1_parse_sprites(v, gb, &sd);
+    memset(&sd, 0, sizeof(sd));
+
+    ret = vc1_parse_sprites(v, gb, &sd);
+    if (ret < 0)
+        return ret;
 
     if (!s->current_picture.f->data[0]) {
         av_log(avctx, AV_LOG_ERROR, "Got no sprites\n");
@@ -5482,10 +5495,8 @@ static int vc1_decode_sprites(VC1Context *v, GetBitContext* gb)
     }
 
     av_frame_unref(v->sprite_output_frame);
-    if (ff_get_buffer(avctx, v->sprite_output_frame, 0) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-        return -1;
-    }
+    if ((ret = ff_get_buffer(avctx, v->sprite_output_frame, 0)) < 0)
+        return ret;
 
     vc1_draw_sprites(v, &sd);
 
@@ -5534,7 +5545,7 @@ av_cold int ff_vc1_decode_init_alloc_tables(VC1Context *v)
     v->ttblk            = v->ttblk_base + s->mb_stride;
     v->is_intra_base    = av_mallocz(sizeof(v->is_intra_base[0]) * 2 * s->mb_stride);
     v->is_intra         = v->is_intra_base + s->mb_stride;
-    v->luma_mv_base     = av_malloc(sizeof(v->luma_mv_base[0]) * 2 * s->mb_stride);
+    v->luma_mv_base     = av_mallocz(sizeof(v->luma_mv_base[0]) * 2 * s->mb_stride);
     v->luma_mv          = v->luma_mv_base + s->mb_stride;
 
     /* allocate block type info in that way so it could be used with s->block_index[] */
@@ -5565,7 +5576,8 @@ av_cold int ff_vc1_decode_init_alloc_tables(VC1Context *v)
 
     if (s->avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || s->avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
         for (i = 0; i < 4; i++)
-            if (!(v->sr_rows[i >> 1][i & 1] = av_malloc(v->output_width))) return -1;
+            if (!(v->sr_rows[i >> 1][i & 1] = av_malloc(v->output_width)))
+                return AVERROR(ENOMEM);
     }
 
     if (!v->mv_type_mb_plane || !v->direct_mb_plane || !v->acpred_plane || !v->over_flags_plane ||
@@ -5591,7 +5603,7 @@ av_cold void ff_vc1_init_transposed_scantables(VC1Context *v)
 {
     int i;
     for (i = 0; i < 64; i++) {
-#define transpose(x) ((x >> 3) | ((x & 7) << 3))
+#define transpose(x) (((x) >> 3) | (((x) & 7) << 3))
         v->zz_8x8[0][i] = transpose(ff_wmv1_scantable[0][i]);
         v->zz_8x8[1][i] = transpose(ff_wmv1_scantable[1][i]);
         v->zz_8x8[2][i] = transpose(ff_wmv1_scantable[2][i]);
@@ -5611,6 +5623,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
     VC1Context *v = avctx->priv_data;
     MpegEncContext *s = &v->s;
     GetBitContext gb;
+    int ret;
 
     /* save the container output size for WMImage */
     v->output_width  = avctx->width;
@@ -5624,12 +5637,21 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
         avctx->pix_fmt = AV_PIX_FMT_GRAY8;
     v->s.avctx = avctx;
 
-    if (ff_vc1_init_common(v) < 0)
-        return -1;
+    if ((ret = ff_vc1_init_common(v)) < 0)
+        return ret;
+    // ensure static VLC tables are initialized
+    if ((ret = ff_msmpeg4_decode_init(avctx)) < 0)
+        return ret;
+    if ((ret = ff_vc1_decode_init_alloc_tables(v)) < 0)
+        return ret;
+    // Hack to ensure the above functions will be called
+    // again once we know all necessary settings.
+    // That this is necessary might indicate a bug.
+    ff_vc1_decode_end(avctx);
+
     ff_blockdsp_init(&s->bdsp, avctx);
     ff_h264chroma_init(&v->h264chroma, 8);
     ff_qpeldsp_init(&s->qdsp);
-    ff_vc1dsp_init(&v->vc1dsp);
 
     if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
         int count = 0;
@@ -5641,8 +5663,8 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
 
         init_get_bits(&gb, avctx->extradata, avctx->extradata_size*8);
 
-        if (ff_vc1_decode_sequence_header(avctx, v, &gb) < 0)
-          return -1;
+        if ((ret = ff_vc1_decode_sequence_header(avctx, v, &gb)) < 0)
+          return ret;
 
         count = avctx->extradata_size*8 - get_bits_count(&gb);
         if (count > 0) {
@@ -5676,16 +5698,16 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
             init_get_bits(&gb, buf2, buf2_size * 8);
             switch (AV_RB32(start)) {
             case VC1_CODE_SEQHDR:
-                if (ff_vc1_decode_sequence_header(avctx, v, &gb) < 0) {
+                if ((ret = ff_vc1_decode_sequence_header(avctx, v, &gb)) < 0) {
                     av_free(buf2);
-                    return -1;
+                    return ret;
                 }
                 seq_initialized = 1;
                 break;
             case VC1_CODE_ENTRYPOINT:
-                if (ff_vc1_decode_entry_point(avctx, v, &gb) < 0) {
+                if ((ret = ff_vc1_decode_entry_point(avctx, v, &gb)) < 0) {
                     av_free(buf2);
-                    return -1;
+                    return ret;
                 }
                 ep_initialized = 1;
                 break;
@@ -5732,6 +5754,11 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
             v->sprite_height > 1 << 14 ||
             v->output_width  > 1 << 14 ||
             v->output_height > 1 << 14) return -1;
+
+        if ((v->sprite_width&1) || (v->sprite_height&1)) {
+            avpriv_request_sample(avctx, "odd sprites support");
+            return AVERROR_PATCHWELCOME;
+        }
     }
     return 0;
 }
@@ -5783,14 +5810,19 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     MpegEncContext *s = &v->s;
     AVFrame *pict = data;
     uint8_t *buf2 = NULL;
-    const uint8_t *buf_start = buf;
-    int mb_height, n_slices1;
+    const uint8_t *buf_start = buf, *buf_start_second_field = NULL;
+    int mb_height, n_slices1=-1;
     struct {
         uint8_t *buf;
         GetBitContext gb;
         int mby_start;
     } *slices = NULL, *tmp;
 
+    v->second_field = 0;
+
+    if(s->flags & CODEC_FLAG_LOW_DELAY)
+        s->low_delay = 1;
+
     /* no supplementary picture */
     if (buf_size == 0 || (buf_size == 4 && AV_RB32(buf) == VC1_CODE_ENDOFSEQ)) {
         /* special case for last picture */
@@ -5802,13 +5834,22 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
             *got_frame = 1;
         }
 
-        return 0;
+        return buf_size;
+    }
+
+    if (s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) {
+        if (v->profile < PROFILE_ADVANCED)
+            avctx->pix_fmt = AV_PIX_FMT_VDPAU_WMV3;
+        else
+            avctx->pix_fmt = AV_PIX_FMT_VDPAU_VC1;
     }
 
     //for advanced profile we may need to parse and unescape data
     if (avctx->codec_id == AV_CODEC_ID_VC1 || avctx->codec_id == AV_CODEC_ID_VC1IMAGE) {
         int buf_size2 = 0;
         buf2 = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+        if (!buf2)
+            return AVERROR(ENOMEM);
 
         if (IS_MARKER(AV_RB32(buf))) { /* frame starts with marker and needs to be parsed */
             const uint8_t *start, *end, *next;
@@ -5821,13 +5862,17 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 if (size <= 0) continue;
                 switch (AV_RB32(start)) {
                 case VC1_CODE_FRAME:
-                    if (avctx->hwaccel)
+                    if (avctx->hwaccel ||
+                        s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
                         buf_start = start;
                     buf_size2 = vc1_unescape_buffer(start + 4, size, buf2);
                     break;
                 case VC1_CODE_FIELD: {
                     int buf_size3;
-                    tmp = av_realloc(slices, sizeof(*slices) * (n_slices+1));
+                    if (avctx->hwaccel ||
+                        s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+                        buf_start_second_field = start;
+                    tmp = av_realloc_array(slices, sizeof(*slices), (n_slices+1));
                     if (!tmp)
                         goto err;
                     slices = tmp;
@@ -5840,7 +5885,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                                   buf_size3 << 3);
                     /* assuming that the field marker is at the exact middle,
                        hope it's correct */
-                    slices[n_slices].mby_start = s->mb_height >> 1;
+                    slices[n_slices].mby_start = s->mb_height + 1 >> 1;
                     n_slices1 = n_slices - 1; // index of the last slice of the first field
                     n_slices++;
                     break;
@@ -5852,7 +5897,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                     break;
                 case VC1_CODE_SLICE: {
                     int buf_size3;
-                    tmp = av_realloc(slices, sizeof(*slices) * (n_slices+1));
+                    tmp = av_realloc_array(slices, sizeof(*slices), (n_slices+1));
                     if (!tmp)
                         goto err;
                     slices = tmp;
@@ -5878,7 +5923,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 av_log(avctx, AV_LOG_ERROR, "Error in WVC1 interlaced frame\n");
                 goto err;
             } else { // found field marker, unescape second field
-                tmp = av_realloc(slices, sizeof(*slices) * (n_slices+1));
+                if (avctx->hwaccel ||
+                    s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+                    buf_start_second_field = divider;
+                tmp = av_realloc_array(slices, sizeof(*slices), (n_slices+1));
                 if (!tmp)
                     goto err;
                 slices = tmp;
@@ -5888,7 +5936,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 buf_size3 = vc1_unescape_buffer(divider + 4, buf + buf_size - divider - 4, slices[n_slices].buf);
                 init_get_bits(&slices[n_slices].gb, slices[n_slices].buf,
                               buf_size3 << 3);
-                slices[n_slices].mby_start = s->mb_height >> 1;
+                slices[n_slices].mby_start = s->mb_height + 1 >> 1;
                 n_slices1 = n_slices - 1;
                 n_slices++;
             }
@@ -5935,6 +5983,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         s->low_delay = !avctx->has_b_frames || v->res_sprite;
 
         if (v->profile == PROFILE_ADVANCED) {
+            if(avctx->coded_width<=1 || avctx->coded_height<=1)
+                goto err;
             s->h_edge_pos = avctx->coded_width;
             s->v_edge_pos = avctx->coded_height;
         }
@@ -5954,18 +6004,27 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     }
     v->first_pic_header_flag = 0;
 
+    if (avctx->debug & FF_DEBUG_PICT_INFO)
+        av_log(v->s.avctx, AV_LOG_DEBUG, "pict_type: %c\n", av_get_picture_type_char(s->pict_type));
+
     if ((avctx->codec_id == AV_CODEC_ID_WMV3IMAGE || avctx->codec_id == AV_CODEC_ID_VC1IMAGE)
         && s->pict_type != AV_PICTURE_TYPE_I) {
         av_log(v->s.avctx, AV_LOG_ERROR, "Sprite decoder: expected I-frame\n");
         goto err;
     }
 
+    if ((s->mb_height >> v->field_mode) == 0) {
+        av_log(v->s.avctx, AV_LOG_ERROR, "image too short\n");
+        goto err;
+    }
+
     // for skipping the frame
     s->current_picture.f->pict_type = s->pict_type;
     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
 
     /* skip B-frames if we don't have reference frames */
     if (s->last_picture_ptr == NULL && (s->pict_type == AV_PICTURE_TYPE_B || s->droppable)) {
+        av_log(v->s.avctx, AV_LOG_DEBUG, "Skipping B frame without reference frames\n");
         goto end;
     }
     if ((avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B) ||
@@ -5985,6 +6044,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         goto err;
     }
 
+    v->s.current_picture_ptr->field_picture = v->field_mode;
+    v->s.current_picture_ptr->f->interlaced_frame = (v->fcm != PROGRESSIVE);
+    v->s.current_picture_ptr->f->top_field_first  = v->tff;
+
     // process pulldown flags
     s->current_picture_ptr->f->repeat_pict = 0;
     // Pulldown flags are only valid when 'broadcast' has been set.
@@ -6000,13 +6063,51 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
     s->me.qpel_put = s->qdsp.put_qpel_pixels_tab;
     s->me.qpel_avg = s->qdsp.avg_qpel_pixels_tab;
 
-    if (avctx->hwaccel) {
-        if (avctx->hwaccel->start_frame(avctx, buf, buf_size) < 0)
-            goto err;
-        if (avctx->hwaccel->decode_slice(avctx, buf_start, (buf + buf_size) - buf_start) < 0)
-            goto err;
-        if (avctx->hwaccel->end_frame(avctx) < 0)
-            goto err;
+    if ((CONFIG_VC1_VDPAU_DECODER)
+        &&s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) {
+        if (v->field_mode && buf_start_second_field) {
+            ff_vdpau_vc1_decode_picture(s, buf_start, buf_start_second_field - buf_start);
+            ff_vdpau_vc1_decode_picture(s, buf_start_second_field, (buf + buf_size) - buf_start_second_field);
+        } else {
+            ff_vdpau_vc1_decode_picture(s, buf_start, (buf + buf_size) - buf_start);
+        }
+    } else if (avctx->hwaccel) {
+        if (v->field_mode && buf_start_second_field) {
+            // decode first field
+            s->picture_structure = PICT_BOTTOM_FIELD - v->tff;
+            if (avctx->hwaccel->start_frame(avctx, buf_start, buf_start_second_field - buf_start) < 0)
+                goto err;
+            if (avctx->hwaccel->decode_slice(avctx, buf_start, buf_start_second_field - buf_start) < 0)
+                goto err;
+            if (avctx->hwaccel->end_frame(avctx) < 0)
+                goto err;
+
+            // decode second field
+            s->gb = slices[n_slices1 + 1].gb;
+            s->picture_structure = PICT_TOP_FIELD + v->tff;
+            v->second_field = 1;
+            v->pic_header_flag = 0;
+            if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
+                av_log(avctx, AV_LOG_ERROR, "parsing header for second field failed");
+                goto err;
+            }
+            v->s.current_picture_ptr->f->pict_type = v->s.pict_type;
+
+            if (avctx->hwaccel->start_frame(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field) < 0)
+                goto err;
+            if (avctx->hwaccel->decode_slice(avctx, buf_start_second_field, (buf + buf_size) - buf_start_second_field) < 0)
+                goto err;
+            if (avctx->hwaccel->end_frame(avctx) < 0)
+                goto err;
+        } else {
+            s->picture_structure = PICT_FRAME;
+            if (avctx->hwaccel->start_frame(avctx, buf_start, (buf + buf_size) - buf_start) < 0)
+                goto err;
+            if (avctx->hwaccel->decode_slice(avctx, buf_start, (buf + buf_size) - buf_start) < 0)
+                goto err;
+            if (avctx->hwaccel->end_frame(avctx) < 0)
+                goto err;
+        }
     } else {
         int header_ret = 0;
 
@@ -6023,10 +6124,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         }
         mb_height = s->mb_height >> v->field_mode;
 
-        if (!mb_height) {
-            av_log(v->s.avctx, AV_LOG_ERROR, "Invalid mb_height.\n");
-            goto err;
-        }
+        av_assert0 (mb_height > 0);
 
         for (i = 0; i <= n_slices; i++) {
             if (i > 0 &&  slices[i - 1].mby_start >= mb_height) {
@@ -6037,7 +6135,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                     continue;
                 }
                 v->second_field = 1;
-                v->blocks_off   = s->mb_width  * s->mb_height << 1;
+                av_assert0((s->mb_height & 1) == 0);
+                v->blocks_off   = s->b8_stride * (s->mb_height&~1);
                 v->mb_off       = s->mb_stride * s->mb_height >> 1;
             } else {
                 v->second_field = 0;
@@ -6068,8 +6167,21 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
             s->start_mb_y = (i == 0) ? 0 : FFMAX(0, slices[i-1].mby_start % mb_height);
             if (!v->field_mode || v->second_field)
                 s->end_mb_y = (i == n_slices     ) ? mb_height : FFMIN(mb_height, slices[i].mby_start % mb_height);
-            else
+            else {
+                if (i >= n_slices) {
+                    av_log(v->s.avctx, AV_LOG_ERROR, "first field slice count too large\n");
+                    continue;
+                }
                 s->end_mb_y = (i <= n_slices1 + 1) ? mb_height : FFMIN(mb_height, slices[i].mby_start % mb_height);
+            }
+            if (s->end_mb_y <= s->start_mb_y) {
+                av_log(v->s.avctx, AV_LOG_ERROR, "end mb y %d %d invalid\n", s->end_mb_y, s->start_mb_y);
+                continue;
+            }
+            if (!v->p_frame_skipped && s->pict_type != AV_PICTURE_TYPE_I && !v->cbpcy_vlc) {
+                av_log(v->s.avctx, AV_LOG_ERROR, "missing cbpcy_vlc\n");
+                continue;
+            }
             ff_vc1_decode_blocks(v);
             if (i != n_slices)
                 s->gb = slices[i].gb;
@@ -6090,6 +6202,8 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 get_bits_count(&s->gb), s->gb.size_in_bits);
 //  if (get_bits_count(&s->gb) > buf_size * 8)
 //      return -1;
+        if(s->er.error_occurred && s->pict_type == AV_PICTURE_TYPE_B)
+            goto err;
         if (!v->field_mode)
             ff_er_frame_end(&s->er);
     }
@@ -6113,12 +6227,12 @@ image:
         if (s->pict_type == AV_PICTURE_TYPE_B || s->low_delay) {
             if ((ret = av_frame_ref(pict, s->current_picture_ptr->f)) < 0)
                 goto err;
-            ff_print_debug_info(s, s->current_picture_ptr);
+            ff_print_debug_info(s, s->current_picture_ptr, pict);
             *got_frame = 1;
         } else if (s->last_picture_ptr != NULL) {
             if ((ret = av_frame_ref(pict, s->last_picture_ptr->f)) < 0)
                 goto err;
-            ff_print_debug_info(s, s->last_picture_ptr);
+            ff_print_debug_info(s, s->last_picture_ptr, pict);
             *got_frame = 1;
         }
     }
@@ -6193,6 +6307,38 @@ AVCodec ff_wmv3_decoder = {
 };
 #endif
 
+#if CONFIG_WMV3_VDPAU_DECODER
+AVCodec ff_wmv3_vdpau_decoder = {
+    .name           = "wmv3_vdpau",
+    .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 9 VDPAU"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_WMV3,
+    .priv_data_size = sizeof(VC1Context),
+    .init           = vc1_decode_init,
+    .close          = ff_vc1_decode_end,
+    .decode         = vc1_decode_frame,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_VDPAU_WMV3, AV_PIX_FMT_NONE },
+    .profiles       = NULL_IF_CONFIG_SMALL(profiles)
+};
+#endif
+
+#if CONFIG_VC1_VDPAU_DECODER
+AVCodec ff_vc1_vdpau_decoder = {
+    .name           = "vc1_vdpau",
+    .long_name      = NULL_IF_CONFIG_SMALL("SMPTE VC-1 VDPAU"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_VC1,
+    .priv_data_size = sizeof(VC1Context),
+    .init           = vc1_decode_init,
+    .close          = ff_vc1_decode_end,
+    .decode         = vc1_decode_frame,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+    .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_VDPAU_VC1, AV_PIX_FMT_NONE },
+    .profiles       = NULL_IF_CONFIG_SMALL(profiles)
+};
+#endif
+
 #if CONFIG_WMV3IMAGE_DECODER
 AVCodec ff_wmv3image_decoder = {
     .name           = "wmv3image",
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index 3b92eb2..fae9dae 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -2,20 +2,20 @@
  * VC-1 and WMV3 decoder - DSP functions
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,10 +25,14 @@
  *
  */
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 #include "h264chroma.h"
 #include "qpeldsp.h"
+#include "rnd_avg.h"
 #include "vc1dsp.h"
+#include "startcode.h"
 
 /* Apply overlap transform to horizontal edge */
 static void vc1_v_overlap_c(uint8_t *src, int stride)
@@ -581,10 +585,10 @@ static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride,
 }
 
 /* Function used to do motion compensation with bicubic interpolation */
-#define VC1_MSPEL_MC(OP, OPNAME)                                              \
+#define VC1_MSPEL_MC(OP, OP4, OPNAME)                                         \
 static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst,             \
                                                     const uint8_t *src,       \
-                                                    int stride,               \
+                                                    ptrdiff_t stride,         \
                                                     int hmode,                \
                                                     int vmode,                \
                                                     int rnd)                  \
@@ -639,13 +643,93 @@ static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst,             \
         dst += stride;                                                        \
         src += stride;                                                        \
     }                                                                         \
+}\
+static av_always_inline void OPNAME ## vc1_mspel_mc_16(uint8_t *dst,          \
+                                                       const uint8_t *src,    \
+                                                       ptrdiff_t stride,      \
+                                                       int hmode,             \
+                                                       int vmode,             \
+                                                       int rnd)               \
+{                                                                             \
+    int i, j;                                                                 \
+                                                                              \
+    if (vmode) { /* Horizontal filter to apply */                             \
+        int r;                                                                \
+                                                                              \
+        if (hmode) { /* Vertical filter to apply, output to tmp */            \
+            static const int shift_value[] = { 0, 5, 1, 5 };                  \
+            int shift = (shift_value[hmode] + shift_value[vmode]) >> 1;       \
+            int16_t tmp[19 * 16], *tptr = tmp;                                \
+                                                                              \
+            r = (1 << (shift - 1)) + rnd - 1;                                 \
+                                                                              \
+            src -= 1;                                                         \
+            for (j = 0; j < 16; j++) {                                        \
+                for (i = 0; i < 19; i++)                                      \
+                    tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode) + r) >> shift; \
+                src  += stride;                                               \
+                tptr += 19;                                                   \
+            }                                                                 \
+                                                                              \
+            r    = 64 - rnd;                                                  \
+            tptr = tmp + 1;                                                   \
+            for (j = 0; j < 16; j++) {                                        \
+                for (i = 0; i < 16; i++)                                      \
+                    OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode) + r) >> 7); \
+                dst  += stride;                                               \
+                tptr += 19;                                                   \
+            }                                                                 \
+                                                                              \
+            return;                                                           \
+        } else { /* No horizontal filter, output 8 lines to dst */            \
+            r = 1 - rnd;                                                      \
+                                                                              \
+            for (j = 0; j < 16; j++) {                                        \
+                for (i = 0; i < 16; i++)                                      \
+                    OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));  \
+                src += stride;                                                \
+                dst += stride;                                                \
+            }                                                                 \
+            return;                                                           \
+        }                                                                     \
+    }                                                                         \
+                                                                              \
+    /* Horizontal mode with no vertical mode */                               \
+    for (j = 0; j < 16; j++) {                                                \
+        for (i = 0; i < 16; i++)                                              \
+            OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));             \
+        dst += stride;                                                        \
+        src += stride;                                                        \
+    }                                                                         \
+}\
+static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
+    int i;\
+    for(i=0; i<8; i++){\
+        OP4(*(uint32_t*)(block  ), AV_RN32(pixels  ));\
+        OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## pixels16x16_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
+    int i;\
+    for(i=0; i<16; i++){\
+        OP4(*(uint32_t*)(block   ), AV_RN32(pixels   ));\
+        OP4(*(uint32_t*)(block+ 4), AV_RN32(pixels+ 4));\
+        OP4(*(uint32_t*)(block+ 8), AV_RN32(pixels+ 8));\
+        OP4(*(uint32_t*)(block+12), AV_RN32(pixels+12));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
 }
 
 #define op_put(a, b) a = av_clip_uint8(b)
 #define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
+#define op4_avg(a, b) a = rnd_avg32(a, b)
+#define op4_put(a, b) a = b
 
-VC1_MSPEL_MC(op_put, put_)
-VC1_MSPEL_MC(op_avg, avg_)
+VC1_MSPEL_MC(op_put, op4_put, put_)
+VC1_MSPEL_MC(op_avg, op4_avg, avg_)
 
 /* pixel functions - really are entry points to vc1_mspel_mc */
 
@@ -661,6 +745,18 @@ static void avg_vc1_mspel_mc ## a ## b ## _c(uint8_t *dst,                    \
                                              ptrdiff_t stride, int rnd)       \
 {                                                                             \
     avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                            \
+}                                                                             \
+static void put_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst,                 \
+                                                const uint8_t *src,           \
+                                                ptrdiff_t stride, int rnd)    \
+{                                                                             \
+    put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                         \
+}                                                                             \
+static void avg_vc1_mspel_mc ## a ## b ## _16_c(uint8_t *dst,                 \
+                                                const uint8_t *src,           \
+                                                ptrdiff_t stride, int rnd)    \
+{                                                                             \
+    avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                         \
 }
 
 PUT_VC1_MSPEL(1, 0)
@@ -682,19 +778,6 @@ PUT_VC1_MSPEL(1, 3)
 PUT_VC1_MSPEL(2, 3)
 PUT_VC1_MSPEL(3, 3)
 
-
-static void put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src,
-                                 ptrdiff_t stride, int rnd)
-{
-    ff_put_pixels8x8_c(dst, src, stride);
-}
-
-static void avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src,
-                                 ptrdiff_t stride, int rnd)
-{
-    ff_avg_pixels8x8_c(dst, src, stride);
-}
-
 #define chroma_mc(a) \
     ((A * src[a] + B * src[a + 1] + \
       C * src[stride + a] + D * src[stride + a + 1] + 32 - 4) >> 6)
@@ -708,7 +791,7 @@ static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
     const int D =     (x) *     (y);
     int i;
 
-    assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
     for (i = 0; i < h; i++) {
         dst[0] = chroma_mc(0);
@@ -733,7 +816,7 @@ static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src,
     const int D =     (x) *     (y);
     int i;
 
-    assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
     for (i = 0; i < h; i++) {
         dst[0] = chroma_mc(0);
@@ -756,7 +839,7 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst /* align 8 */,
     const int D =     (x) *     (y);
     int i;
 
-    assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
     for (i = 0; i < h; i++) {
         dst[0] = avg2(dst[0], chroma_mc(0));
@@ -782,7 +865,7 @@ static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst /* align 8 */,
     const int D = (    x) * (    y);
     int i;
 
-    assert(x < 8 && y < 8 && x >= 0 && y >= 0);
+    av_assert2(x < 8 && y < 8 && x >= 0 && y >= 0);
 
     for (i = 0; i < h; i++) {
         dst[0] = avg2(dst[0], chroma_mc(0));
@@ -877,6 +960,11 @@ static void sprite_v_double_twoscale_c(uint8_t *dst,
 }
 
 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
+#define FN_ASSIGN(X, Y) \
+    dsp->put_vc1_mspel_pixels_tab[1][X+4*Y] = put_vc1_mspel_mc##X##Y##_c; \
+    dsp->put_vc1_mspel_pixels_tab[0][X+4*Y] = put_vc1_mspel_mc##X##Y##_16_c; \
+    dsp->avg_vc1_mspel_pixels_tab[1][X+4*Y] = avg_vc1_mspel_mc##X##Y##_c; \
+    dsp->avg_vc1_mspel_pixels_tab[0][X+4*Y] = avg_vc1_mspel_mc##X##Y##_16_c
 
 av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
 {
@@ -901,39 +989,28 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
     dsp->vc1_v_loop_filter16  = vc1_v_loop_filter16_c;
     dsp->vc1_h_loop_filter16  = vc1_h_loop_filter16_c;
 
-    dsp->put_vc1_mspel_pixels_tab[0]  = put_vc1_mspel_mc00_c;
-    dsp->put_vc1_mspel_pixels_tab[1]  = put_vc1_mspel_mc10_c;
-    dsp->put_vc1_mspel_pixels_tab[2]  = put_vc1_mspel_mc20_c;
-    dsp->put_vc1_mspel_pixels_tab[3]  = put_vc1_mspel_mc30_c;
-    dsp->put_vc1_mspel_pixels_tab[4]  = put_vc1_mspel_mc01_c;
-    dsp->put_vc1_mspel_pixels_tab[5]  = put_vc1_mspel_mc11_c;
-    dsp->put_vc1_mspel_pixels_tab[6]  = put_vc1_mspel_mc21_c;
-    dsp->put_vc1_mspel_pixels_tab[7]  = put_vc1_mspel_mc31_c;
-    dsp->put_vc1_mspel_pixels_tab[8]  = put_vc1_mspel_mc02_c;
-    dsp->put_vc1_mspel_pixels_tab[9]  = put_vc1_mspel_mc12_c;
-    dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c;
-    dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c;
-    dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c;
-    dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c;
-    dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
-    dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
-
-    dsp->avg_vc1_mspel_pixels_tab[0]  = avg_vc1_mspel_mc00_c;
-    dsp->avg_vc1_mspel_pixels_tab[1]  = avg_vc1_mspel_mc10_c;
-    dsp->avg_vc1_mspel_pixels_tab[2]  = avg_vc1_mspel_mc20_c;
-    dsp->avg_vc1_mspel_pixels_tab[3]  = avg_vc1_mspel_mc30_c;
-    dsp->avg_vc1_mspel_pixels_tab[4]  = avg_vc1_mspel_mc01_c;
-    dsp->avg_vc1_mspel_pixels_tab[5]  = avg_vc1_mspel_mc11_c;
-    dsp->avg_vc1_mspel_pixels_tab[6]  = avg_vc1_mspel_mc21_c;
-    dsp->avg_vc1_mspel_pixels_tab[7]  = avg_vc1_mspel_mc31_c;
-    dsp->avg_vc1_mspel_pixels_tab[8]  = avg_vc1_mspel_mc02_c;
-    dsp->avg_vc1_mspel_pixels_tab[9]  = avg_vc1_mspel_mc12_c;
-    dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c;
-    dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c;
-    dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c;
-    dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c;
-    dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c;
-    dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c;
+    dsp->put_vc1_mspel_pixels_tab[0][0] = put_pixels16x16_c;
+    dsp->avg_vc1_mspel_pixels_tab[0][0] = avg_pixels16x16_c;
+    dsp->put_vc1_mspel_pixels_tab[1][0] = put_pixels8x8_c;
+    dsp->avg_vc1_mspel_pixels_tab[1][0] = avg_pixels8x8_c;
+    FN_ASSIGN(0, 1);
+    FN_ASSIGN(0, 2);
+    FN_ASSIGN(0, 3);
+
+    FN_ASSIGN(1, 0);
+    FN_ASSIGN(1, 1);
+    FN_ASSIGN(1, 2);
+    FN_ASSIGN(1, 3);
+
+    FN_ASSIGN(2, 0);
+    FN_ASSIGN(2, 1);
+    FN_ASSIGN(2, 2);
+    FN_ASSIGN(2, 3);
+
+    FN_ASSIGN(3, 0);
+    FN_ASSIGN(3, 1);
+    FN_ASSIGN(3, 2);
+    FN_ASSIGN(3, 3);
 
     dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_c;
     dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_c;
@@ -948,6 +1025,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
     dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
 #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
 
+    dsp->vc1_find_start_code_candidate = ff_startcode_find_candidate_c;
+
     if (ARCH_AARCH64)
         ff_vc1dsp_init_aarch64(dsp);
     if (ARCH_ARM)
diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
index 7de6a3d..aa5b6d3 100644
--- a/libavcodec/vc1dsp.h
+++ b/libavcodec/vc1dsp.h
@@ -2,20 +2,20 @@
  * VC-1 and WMV3 decoder - DSP functions
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,8 @@
 #include "hpeldsp.h"
 #include "h264chroma.h"
 
+typedef void (*vc1op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h);
+
 typedef struct VC1DSPContext {
     /* vc1 functions */
     void (*vc1_inv_trans_8x8)(int16_t *b);
@@ -55,8 +57,8 @@ typedef struct VC1DSPContext {
     /* put 8x8 block with bicubic interpolation and quarterpel precision
      * last argument is actually round value instead of height
      */
-    op_pixels_func put_vc1_mspel_pixels_tab[16];
-    op_pixels_func avg_vc1_mspel_pixels_tab[16];
+    vc1op_pixels_func put_vc1_mspel_pixels_tab[2][16];
+    vc1op_pixels_func avg_vc1_mspel_pixels_tab[2][16];
 
     /* This is really one func used in VC-1 decoding */
     h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
@@ -71,6 +73,14 @@ typedef struct VC1DSPContext {
     void (*sprite_v_double_twoscale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1,
                                                    const uint8_t *src2a, const uint8_t *src2b, int offset2,
                                      int alpha, int width);
+
+    /**
+     * Search buf from the start for up to size bytes. Return the index
+     * of a zero byte, or >= size if not found. Ideally, use lookahead
+     * to filter out any zero bytes that are known to not be followed by
+     * one or more further zero bytes and a one byte.
+     */
+    int (*vc1_find_start_code_candidate)(const uint8_t *buf, int size);
 } VC1DSPContext;
 
 void ff_vc1dsp_init(VC1DSPContext* c);
diff --git a/libavcodec/vcr1.c b/libavcodec/vcr1.c
index 161704f..f8281ea 100644
--- a/libavcodec/vcr1.c
+++ b/libavcodec/vcr1.c
@@ -2,20 +2,20 @@
  * ATI VCR1 codec
  * Copyright (c) 2003 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
 
 #include "avcodec.h"
 #include "internal.h"
+#include "libavutil/avassert.h"
 #include "libavutil/internal.h"
 
 typedef struct VCR1Context {
@@ -37,8 +38,8 @@ static av_cold int vcr1_decode_init(AVCodecContext *avctx)
 {
     avctx->pix_fmt = AV_PIX_FMT_YUV410P;
 
-    if (avctx->width & 7) {
-        av_log(avctx, AV_LOG_ERROR, "Width %d is not divisble by 8.\n", avctx->width);
+    if (avctx->width % 8 || avctx->height%4) {
+        avpriv_request_sample(avctx, "odd dimensions (%d x %d) support", avctx->width, avctx->height);
         return AVERROR_INVALIDDATA;
     }
 
@@ -48,27 +49,25 @@ static av_cold int vcr1_decode_init(AVCodecContext *avctx)
 static int vcr1_decode_frame(AVCodecContext *avctx, void *data,
                              int *got_frame, AVPacket *avpkt)
 {
-    const uint8_t *buf        = avpkt->data;
-    int buf_size              = avpkt->size;
     VCR1Context *const a      = avctx->priv_data;
     AVFrame *const p          = data;
-    const uint8_t *bytestream = buf;
+    const uint8_t *bytestream = avpkt->data;
+    const uint8_t *bytestream_end = bytestream + avpkt->size;
     int i, x, y, ret;
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-        return ret;
+    if(avpkt->size < 32 + avctx->height + avctx->width*avctx->height*5/8){
+        av_log(avctx, AV_LOG_ERROR, "Insufficient input data. %d < %d\n", avpkt->size ,  32 + avctx->height + avctx->width*avctx->height*5/8);
+        return AVERROR(EINVAL);
     }
+
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
+        return ret;
     p->pict_type = AV_PICTURE_TYPE_I;
     p->key_frame = 1;
 
-    if (buf_size < 32)
-        goto packet_small;
-
     for (i = 0; i < 16; i++) {
         a->delta[i] = *bytestream++;
         bytestream++;
-        buf_size--;
     }
 
     for (y = 0; y < avctx->height; y++) {
@@ -79,12 +78,10 @@ static int vcr1_decode_frame(AVCodecContext *avctx, void *data,
             uint8_t *cb = &p->data[1][(y >> 2) * p->linesize[1]];
             uint8_t *cr = &p->data[2][(y >> 2) * p->linesize[2]];
 
-            if (buf_size < 4 + avctx->width)
-                goto packet_small;
+            av_assert0 (bytestream_end - bytestream >= 4 + avctx->width);
 
             for (i = 0; i < 4; i++)
                 a->offset[i] = *bytestream++;
-            buf_size -= 4;
 
             offset = a->offset[0] - a->delta[bytestream[2] & 0xF];
             for (x = 0; x < avctx->width; x += 4) {
@@ -98,11 +95,9 @@ static int vcr1_decode_frame(AVCodecContext *avctx, void *data,
                 *cr++       = bytestream[1];
 
                 bytestream += 4;
-                buf_size   -= 4;
             }
         } else {
-            if (buf_size < avctx->width / 2)
-                goto packet_small;
+            av_assert0 (bytestream_end - bytestream >= avctx->width / 2);
 
             offset = a->offset[y & 3] - a->delta[bytestream[2] & 0xF];
 
@@ -117,17 +112,13 @@ static int vcr1_decode_frame(AVCodecContext *avctx, void *data,
                 luma[7]     = offset += a->delta[bytestream[1] >>  4];
                 luma       += 8;
                 bytestream += 4;
-                buf_size   -= 4;
             }
         }
     }
 
     *got_frame = 1;
 
-    return buf_size;
-packet_small:
-    av_log(avctx, AV_LOG_ERROR, "Input packet too small.\n");
-    return AVERROR_INVALIDDATA;
+    return bytestream - avpkt->data;
 }
 
 AVCodec ff_vcr1_decoder = {
diff --git a/libavcodec/vda.c b/libavcodec/vda.c
index f71fb16..170580f 100644
--- a/libavcodec/vda.c
+++ b/libavcodec/vda.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vda.h b/libavcodec/vda.h
index 9aa5d29..12330aa 100644
--- a/libavcodec/vda.h
+++ b/libavcodec/vda.h
@@ -3,20 +3,20 @@
  *
  * copyright (c) 2011 Sebastien Zwickert
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,7 +30,6 @@
  */
 
 #include "libavcodec/avcodec.h"
-#include "libavcodec/version.h"
 
 #include <stdint.h>
 
@@ -42,6 +41,14 @@
 #include <VideoDecodeAcceleration/VDADecoder.h>
 #undef Picture
 
+#include "libavcodec/version.h"
+
+// extra flags not defined in VDADecoder.h
+enum {
+    kVDADecodeInfo_Asynchronous = 1UL << 0,
+    kVDADecodeInfo_FrameDropped = 1UL << 1
+};
+
 /**
  * @defgroup lavc_codec_hwaccel_vda VDA
  * @ingroup lavc_codec_hwaccel
@@ -51,7 +58,7 @@
 
 /**
  * This structure is used to provide the necessary configurations and data
- * to the VDA Libav HWAccel implementation.
+ * to the VDA FFmpeg HWAccel implementation.
  *
  * The application must make it available as AVCodecContext.hwaccel_context.
  */
@@ -126,6 +133,17 @@ struct vda_context {
      * unused
      */
     int                 priv_allocated_size;
+
+    /**
+     * Use av_buffer to manage buffer.
+     * When the flag is set, the CVPixelBuffers returned by the decoder will
+     * be released automatically, so you have to retain them if necessary.
+     * Not setting this flag may cause memory leak.
+     *
+     * encoding: unused
+     * decoding: Set by user.
+     */
+    int                 use_ref_buffer;
 };
 
 /** Create the video decoder. */
diff --git a/libavcodec/vda_h264.c b/libavcodec/vda_h264.c
index c7f6a74..61fb3c0 100644
--- a/libavcodec/vda_h264.c
+++ b/libavcodec/vda_h264.c
@@ -1,33 +1,37 @@
 /*
- * VDA H.264 hardware acceleration
+ * VDA H264 HW acceleration.
  *
  * copyright (c) 2011 Sebastien Zwickert
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include <CoreFoundation/CFDictionary.h>
 #include <CoreFoundation/CFNumber.h>
 #include <CoreFoundation/CFData.h>
-#include <CoreFoundation/CFString.h>
 
+#include "vda.h"
 #include "libavutil/avutil.h"
 #include "h264.h"
+
+struct vda_buffer {
+    CVPixelBufferRef cv_buffer;
+};
 #include "internal.h"
-#include "vda.h"
 #include "vda_internal.h"
 
 typedef struct VDAContext {
@@ -43,7 +47,7 @@ typedef struct VDAContext {
     CVImageBufferRef frame;
 } VDAContext;
 
-/* Decoder callback that adds the VDA frame to the queue in display order. */
+/* Decoder callback that adds the vda frame to the queue in display order. */
 static void vda_decoder_callback(void *vda_hw_ctx,
                                  CFDictionaryRef user_info,
                                  OSStatus status,
@@ -52,6 +56,9 @@ static void vda_decoder_callback(void *vda_hw_ctx,
 {
     struct vda_context *vda_ctx = vda_hw_ctx;
 
+    if (infoFlags & kVDADecodeInfo_FrameDropped)
+        vda_ctx->cv_buffer = NULL;
+
     if (!image_buffer)
         return;
 
@@ -87,7 +94,7 @@ static int vda_old_h264_start_frame(AVCodecContext *avctx,
                                 av_unused uint32_t size)
 {
     VDAContext *vda = avctx->internal->hwaccel_priv_data;
-    struct vda_context *vda_ctx         = avctx->hwaccel_context;
+    struct vda_context *vda_ctx = avctx->hwaccel_context;
 
     if (!vda_ctx->decoder)
         return -1;
@@ -101,8 +108,8 @@ static int vda_old_h264_decode_slice(AVCodecContext *avctx,
                                  const uint8_t *buffer,
                                  uint32_t size)
 {
-    VDAContext *vda                     = avctx->internal->hwaccel_priv_data;
-    struct vda_context *vda_ctx         = avctx->hwaccel_context;
+    VDAContext *vda             = avctx->internal->hwaccel_priv_data;
+    struct vda_context *vda_ctx = avctx->hwaccel_context;
     void *tmp;
 
     if (!vda_ctx->decoder)
@@ -124,12 +131,21 @@ static int vda_old_h264_decode_slice(AVCodecContext *avctx,
     return 0;
 }
 
+static void vda_h264_release_buffer(void *opaque, uint8_t *data)
+{
+    struct vda_buffer *context = opaque;
+    CVPixelBufferRelease(context->cv_buffer);
+    av_free(context);
+}
+
 static int vda_old_h264_end_frame(AVCodecContext *avctx)
 {
     H264Context *h                      = avctx->priv_data;
     VDAContext *vda                     = avctx->internal->hwaccel_priv_data;
     struct vda_context *vda_ctx         = avctx->hwaccel_context;
     AVFrame *frame                      = &h->cur_pic_ptr->f;
+    struct vda_buffer *context;
+    AVBufferRef *buffer;
     int status;
 
     if (!vda_ctx->decoder || !vda->bitstream)
@@ -141,6 +157,20 @@ static int vda_old_h264_end_frame(AVCodecContext *avctx)
     if (status)
         av_log(avctx, AV_LOG_ERROR, "Failed to decode frame (%d)\n", status);
 
+    if (!vda_ctx->use_ref_buffer || status)
+        return status;
+
+    context = av_mallocz(sizeof(*context));
+    buffer = av_buffer_create(NULL, 0, vda_h264_release_buffer, context, 0);
+    if (!context || !buffer) {
+        CVPixelBufferRelease(vda_ctx->cv_buffer);
+        av_free(context);
+        return -1;
+    }
+
+    context->cv_buffer = vda_ctx->cv_buffer;
+    frame->buf[3] = buffer;
+
     return status;
 }
 
@@ -148,7 +178,7 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
                           uint8_t *extradata,
                           int extradata_size)
 {
-    OSStatus status = kVDADecoderNoErr;
+    OSStatus status;
     CFNumberRef height;
     CFNumberRef width;
     CFNumberRef format;
@@ -158,7 +188,10 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
     CFMutableDictionaryRef io_surface_properties;
     CFNumberRef cv_pix_fmt;
 
-    /* Each VCL NAL in the bistream sent to the decoder
+    vda_ctx->priv_bitstream = NULL;
+    vda_ctx->priv_allocated_size = 0;
+
+    /* Each VCL NAL in the bitstream sent to the decoder
      * is preceded by a 4 bytes length header.
      * Change the avcC atom header if needed, to signal headers of 4 bytes. */
     if (extradata_size >= 4 && (extradata[4] & 0x03) != 0x03) {
@@ -200,9 +233,9 @@ int ff_vda_create_decoder(struct vda_context *vda_ctx,
                                                       0,
                                                       &kCFTypeDictionaryKeyCallBacks,
                                                       &kCFTypeDictionaryValueCallBacks);
-    cv_pix_fmt      = CFNumberCreate(kCFAllocatorDefault,
-                                     kCFNumberSInt32Type,
-                                     &vda_ctx->cv_pix_fmt_type);
+    cv_pix_fmt  = CFNumberCreate(kCFAllocatorDefault,
+                                 kCFNumberSInt32Type,
+                                 &vda_ctx->cv_pix_fmt_type);
     CFDictionarySetValue(buffer_attributes,
                          kCVPixelBufferPixelFormatTypeKey,
                          cv_pix_fmt);
@@ -241,9 +274,11 @@ int ff_vda_destroy_decoder(struct vda_context *vda_ctx)
 static int vda_h264_uninit(AVCodecContext *avctx)
 {
     VDAContext *vda = avctx->internal->hwaccel_priv_data;
-    av_freep(&vda->bitstream);
-    if (vda->frame)
-        CVPixelBufferRelease(vda->frame);
+    if (vda) {
+        av_freep(&vda->bitstream);
+        if (vda->frame)
+            CVPixelBufferRelease(vda->frame);
+    }
     return 0;
 }
 
@@ -383,7 +418,7 @@ int ff_vda_default_init(AVCodecContext *avctx)
 
     // kCVPixelFormatType_420YpCbCr8Planar;
 
-    /* Each VCL NAL in the bistream sent to the decoder
+    /* Each VCL NAL in the bitstream sent to the decoder
      * is preceded by a 4 bytes length header.
      * Change the avcC atom header if needed, to signal headers of 4 bytes. */
     if (avctx->extradata_size >= 4 && (avctx->extradata[4] & 0x03) != 0x03) {
diff --git a/libavcodec/vda_h264_dec.c b/libavcodec/vda_h264_dec.c
new file mode 100644
index 0000000..c00e7e4
--- /dev/null
+++ b/libavcodec/vda_h264_dec.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2012, Xidorn Quan
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.264 decoder via VDA
+ * @author Xidorn Quan <quanxunzhen@gmail.com>
+ */
+
+#include <string.h>
+#include <CoreFoundation/CoreFoundation.h>
+
+#include "vda.h"
+#include "h264.h"
+#include "avcodec.h"
+
+#ifndef kCFCoreFoundationVersionNumber10_7
+#define kCFCoreFoundationVersionNumber10_7      635.00
+#endif
+
+extern AVCodec ff_h264_decoder, ff_h264_vda_decoder;
+
+static const enum AVPixelFormat vda_pixfmts_prior_10_7[] = {
+    AV_PIX_FMT_UYVY422,
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_NONE
+};
+
+static const enum AVPixelFormat vda_pixfmts[] = {
+    AV_PIX_FMT_UYVY422,
+    AV_PIX_FMT_YUYV422,
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_NONE
+};
+
+typedef struct {
+    H264Context h264ctx;
+    int h264_initialized;
+    struct vda_context vda_ctx;
+    enum AVPixelFormat pix_fmt;
+
+    /* for backing-up fields set by user.
+     * we have to gain full control of such fields here */
+    void *hwaccel_context;
+    enum AVPixelFormat (*get_format)(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
+    int (*get_buffer2)(struct AVCodecContext *s, AVFrame *frame, int flags);
+#if FF_API_GET_BUFFER
+    int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic);
+#endif
+} VDADecoderContext;
+
+static enum AVPixelFormat get_format(struct AVCodecContext *avctx,
+        const enum AVPixelFormat *fmt)
+{
+    return AV_PIX_FMT_VDA_VLD;
+}
+
+typedef struct {
+    CVPixelBufferRef cv_buffer;
+} VDABufferContext;
+
+static void release_buffer(void *opaque, uint8_t *data)
+{
+    VDABufferContext *context = opaque;
+    CVPixelBufferUnlockBaseAddress(context->cv_buffer, 0);
+    CVPixelBufferRelease(context->cv_buffer);
+    av_free(context);
+}
+
+static int get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flag)
+{
+    VDABufferContext *context = av_mallocz(sizeof(VDABufferContext));
+    AVBufferRef *buffer = av_buffer_create(NULL, 0, release_buffer, context, 0);
+    if (!context || !buffer) {
+        av_free(context);
+        return AVERROR(ENOMEM);
+    }
+
+    pic->buf[0] = buffer;
+    pic->data[0] = (void *)1;
+    return 0;
+}
+
+static inline void set_context(AVCodecContext *avctx)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    ctx->hwaccel_context = avctx->hwaccel_context;
+    avctx->hwaccel_context = &ctx->vda_ctx;
+    ctx->get_format = avctx->get_format;
+    avctx->get_format = get_format;
+    ctx->get_buffer2 = avctx->get_buffer2;
+    avctx->get_buffer2 = get_buffer2;
+#if FF_API_GET_BUFFER
+    ctx->get_buffer = avctx->get_buffer;
+    avctx->get_buffer = NULL;
+#endif
+}
+
+static inline void restore_context(AVCodecContext *avctx)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    avctx->hwaccel_context = ctx->hwaccel_context;
+    avctx->get_format = ctx->get_format;
+    avctx->get_buffer2 = ctx->get_buffer2;
+#if FF_API_GET_BUFFER
+    avctx->get_buffer = ctx->get_buffer;
+#endif
+}
+
+static int vdadec_decode(AVCodecContext *avctx,
+        void *data, int *got_frame, AVPacket *avpkt)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    AVFrame *pic = data;
+    int ret;
+
+    set_context(avctx);
+    ret = ff_h264_decoder.decode(avctx, data, got_frame, avpkt);
+    restore_context(avctx);
+    if (*got_frame) {
+        AVBufferRef *buffer = pic->buf[0];
+        VDABufferContext *context = av_buffer_get_opaque(buffer);
+        CVPixelBufferRef cv_buffer = (CVPixelBufferRef)pic->data[3];
+
+        CVPixelBufferRetain(cv_buffer);
+        CVPixelBufferLockBaseAddress(cv_buffer, 0);
+        context->cv_buffer = cv_buffer;
+        pic->format = ctx->pix_fmt;
+        if (CVPixelBufferIsPlanar(cv_buffer)) {
+            int i, count = CVPixelBufferGetPlaneCount(cv_buffer);
+            av_assert0(count < 4);
+            for (i = 0; i < count; i++) {
+                pic->data[i] = CVPixelBufferGetBaseAddressOfPlane(cv_buffer, i);
+                pic->linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(cv_buffer, i);
+            }
+        } else {
+            pic->data[0] = CVPixelBufferGetBaseAddress(cv_buffer);
+            pic->linesize[0] = CVPixelBufferGetBytesPerRow(cv_buffer);
+        }
+    }
+    avctx->pix_fmt = ctx->pix_fmt;
+
+    return ret;
+}
+
+static av_cold int vdadec_close(AVCodecContext *avctx)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    /* release buffers and decoder */
+    ff_vda_destroy_decoder(&ctx->vda_ctx);
+    /* close H.264 decoder */
+    if (ctx->h264_initialized) {
+        set_context(avctx);
+        ff_h264_decoder.close(avctx);
+        restore_context(avctx);
+    }
+    return 0;
+}
+
+static av_cold int vdadec_init(AVCodecContext *avctx)
+{
+    VDADecoderContext *ctx = avctx->priv_data;
+    struct vda_context *vda_ctx = &ctx->vda_ctx;
+    OSStatus status;
+    int ret, i;
+
+    ctx->h264_initialized = 0;
+
+    /* init pix_fmts of codec */
+    if (!ff_h264_vda_decoder.pix_fmts) {
+        if (kCFCoreFoundationVersionNumber < kCFCoreFoundationVersionNumber10_7)
+            ff_h264_vda_decoder.pix_fmts = vda_pixfmts_prior_10_7;
+        else
+            ff_h264_vda_decoder.pix_fmts = vda_pixfmts;
+    }
+
+    /* init vda */
+    memset(vda_ctx, 0, sizeof(struct vda_context));
+    vda_ctx->width = avctx->width;
+    vda_ctx->height = avctx->height;
+    vda_ctx->format = 'avc1';
+    vda_ctx->use_sync_decoding = 1;
+    vda_ctx->use_ref_buffer = 1;
+    ctx->pix_fmt = avctx->get_format(avctx, avctx->codec->pix_fmts);
+    switch (ctx->pix_fmt) {
+    case AV_PIX_FMT_UYVY422:
+        vda_ctx->cv_pix_fmt_type = '2vuy';
+        break;
+    case AV_PIX_FMT_YUYV422:
+        vda_ctx->cv_pix_fmt_type = 'yuvs';
+        break;
+    case AV_PIX_FMT_NV12:
+        vda_ctx->cv_pix_fmt_type = '420v';
+        break;
+    case AV_PIX_FMT_YUV420P:
+        vda_ctx->cv_pix_fmt_type = 'y420';
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Unsupported pixel format: %d\n", avctx->pix_fmt);
+        goto failed;
+    }
+    status = ff_vda_create_decoder(vda_ctx,
+                                   avctx->extradata, avctx->extradata_size);
+    if (status != kVDADecoderNoErr) {
+        av_log(avctx, AV_LOG_ERROR,
+                "Failed to init VDA decoder: %d.\n", status);
+        goto failed;
+    }
+
+    /* init H.264 decoder */
+    set_context(avctx);
+    ret = ff_h264_decoder.init(avctx);
+    restore_context(avctx);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to open H.264 decoder.\n");
+        goto failed;
+    }
+    ctx->h264_initialized = 1;
+
+    for (i = 0; i < MAX_SPS_COUNT; i++) {
+        SPS *sps = ctx->h264ctx.sps_buffers[i];
+        if (sps && (sps->bit_depth_luma != 8 ||
+                sps->chroma_format_idc == 2 ||
+                sps->chroma_format_idc == 3)) {
+            av_log(avctx, AV_LOG_ERROR, "Format is not supported.\n");
+            goto failed;
+        }
+    }
+
+    return 0;
+
+failed:
+    vdadec_close(avctx);
+    return -1;
+}
+
+static void vdadec_flush(AVCodecContext *avctx)
+{
+    set_context(avctx);
+    ff_h264_decoder.flush(avctx);
+    restore_context(avctx);
+}
+
+AVCodec ff_h264_vda_decoder = {
+    .name           = "h264_vda",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_H264,
+    .priv_data_size = sizeof(VDADecoderContext),
+    .init           = vdadec_init,
+    .close          = vdadec_close,
+    .decode         = vdadec_decode,
+    .capabilities   = CODEC_CAP_DELAY,
+    .flush          = vdadec_flush,
+    .long_name      = NULL_IF_CONFIG_SMALL("H.264 (VDA acceleration)"),
+};
diff --git a/libavcodec/vda_internal.h b/libavcodec/vda_internal.h
index 9d0ed80..457916b 100644
--- a/libavcodec/vda_internal.h
+++ b/libavcodec/vda_internal.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vdpau.c b/libavcodec/vdpau.c
index 5406874..0dc5355 100644
--- a/libavcodec/vdpau.c
+++ b/libavcodec/vdpau.c
@@ -4,20 +4,20 @@
  *
  * Copyright (c) 2008 NVIDIA
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,6 +38,13 @@
  * @{
  */
 
+AVVDPAUContext *av_alloc_vdpaucontext(void)
+{
+    return av_vdpau_alloc_context();
+}
+
+MAKE_ACCESSORS(AVVDPAUContext, vdpau_hwaccel, AVVDPAU_Render2, render2)
+
 int ff_vdpau_common_start_frame(struct vdpau_picture_context *pic_ctx,
                                 av_unused const uint8_t *buffer,
                                 av_unused uint32_t size)
@@ -53,19 +60,41 @@ int ff_vdpau_common_start_frame(struct vdpau_picture_context *pic_ctx,
     CONFIG_VC1_VDPAU_HWACCEL   || CONFIG_WMV3_VDPAU_HWACCEL
 int ff_vdpau_mpeg_end_frame(AVCodecContext *avctx)
 {
+    int res = 0;
     AVVDPAUContext *hwctx = avctx->hwaccel_context;
     MpegEncContext *s = avctx->priv_data;
     Picture *pic = s->current_picture_ptr;
     struct vdpau_picture_context *pic_ctx = pic->hwaccel_picture_private;
     VdpVideoSurface surf = ff_vdpau_get_surface_id(pic->f);
 
+#if FF_API_BUFS_VDPAU
+FF_DISABLE_DEPRECATION_WARNINGS
+    hwctx->info = pic_ctx->info;
+    hwctx->bitstream_buffers = pic_ctx->bitstream_buffers;
+    hwctx->bitstream_buffers_used = pic_ctx->bitstream_buffers_used;
+    hwctx->bitstream_buffers_allocated = pic_ctx->bitstream_buffers_allocated;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (!hwctx->render) {
+        res = hwctx->render2(avctx, pic->f, (void *)&pic_ctx->info,
+                             pic_ctx->bitstream_buffers_used, pic_ctx->bitstream_buffers);
+    } else
     hwctx->render(hwctx->decoder, surf, (void *)&pic_ctx->info,
                   pic_ctx->bitstream_buffers_used, pic_ctx->bitstream_buffers);
 
     ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
     av_freep(&pic_ctx->bitstream_buffers);
 
-    return 0;
+#if FF_API_BUFS_VDPAU
+FF_DISABLE_DEPRECATION_WARNINGS
+    hwctx->bitstream_buffers = NULL;
+    hwctx->bitstream_buffers_used = 0;
+    hwctx->bitstream_buffers_allocated = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    return res;
 }
 #endif
 
@@ -88,6 +117,343 @@ int ff_vdpau_add_buffer(struct vdpau_picture_context *pic_ctx,
     return 0;
 }
 
+/* Obsolete non-hwaccel VDPAU support below... */
+
+void ff_vdpau_h264_set_reference_frames(H264Context *h)
+{
+    struct vdpau_render_state *render, *render_ref;
+    VdpReferenceFrameH264 *rf, *rf2;
+    H264Picture *pic;
+    int i, list, pic_frame_idx;
+
+    render = (struct vdpau_render_state *)h->cur_pic_ptr->f.data[0];
+    assert(render);
+
+    rf = &render->info.h264.referenceFrames[0];
+#define H264_RF_COUNT FF_ARRAY_ELEMS(render->info.h264.referenceFrames)
+
+    for (list = 0; list < 2; ++list) {
+        H264Picture **lp = list ? h->long_ref : h->short_ref;
+        int ls = list ? 16 : h->short_ref_count;
+
+        for (i = 0; i < ls; ++i) {
+            pic = lp[i];
+            if (!pic || !pic->reference)
+                continue;
+            pic_frame_idx = pic->long_ref ? pic->pic_id : pic->frame_num;
+
+            render_ref = (struct vdpau_render_state *)pic->f.data[0];
+            assert(render_ref);
+
+            rf2 = &render->info.h264.referenceFrames[0];
+            while (rf2 != rf) {
+                if (
+                    (rf2->surface == render_ref->surface)
+                    && (rf2->is_long_term == pic->long_ref)
+                    && (rf2->frame_idx == pic_frame_idx)
+                )
+                    break;
+                ++rf2;
+            }
+            if (rf2 != rf) {
+                rf2->top_is_reference    |= (pic->reference & PICT_TOP_FIELD)    ? VDP_TRUE : VDP_FALSE;
+                rf2->bottom_is_reference |= (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
+                continue;
+            }
+
+            if (rf >= &render->info.h264.referenceFrames[H264_RF_COUNT])
+                continue;
+
+            rf->surface             = render_ref->surface;
+            rf->is_long_term        = pic->long_ref;
+            rf->top_is_reference    = (pic->reference & PICT_TOP_FIELD)    ? VDP_TRUE : VDP_FALSE;
+            rf->bottom_is_reference = (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
+            rf->field_order_cnt[0]  = pic->field_poc[0];
+            rf->field_order_cnt[1]  = pic->field_poc[1];
+            rf->frame_idx           = pic_frame_idx;
+
+            ++rf;
+        }
+    }
+
+    for (; rf < &render->info.h264.referenceFrames[H264_RF_COUNT]; ++rf) {
+        rf->surface             = VDP_INVALID_HANDLE;
+        rf->is_long_term        = 0;
+        rf->top_is_reference    = 0;
+        rf->bottom_is_reference = 0;
+        rf->field_order_cnt[0]  = 0;
+        rf->field_order_cnt[1]  = 0;
+        rf->frame_idx           = 0;
+    }
+}
+
+void ff_vdpau_add_data_chunk(uint8_t *data, const uint8_t *buf, int buf_size)
+{
+    struct vdpau_render_state *render = (struct vdpau_render_state*)data;
+    assert(render);
+
+    render->bitstream_buffers= av_fast_realloc(
+        render->bitstream_buffers,
+        &render->bitstream_buffers_allocated,
+        sizeof(*render->bitstream_buffers)*(render->bitstream_buffers_used + 1)
+    );
+
+    render->bitstream_buffers[render->bitstream_buffers_used].struct_version  = VDP_BITSTREAM_BUFFER_VERSION;
+    render->bitstream_buffers[render->bitstream_buffers_used].bitstream       = buf;
+    render->bitstream_buffers[render->bitstream_buffers_used].bitstream_bytes = buf_size;
+    render->bitstream_buffers_used++;
+}
+
+#if CONFIG_H264_VDPAU_DECODER
+void ff_vdpau_h264_picture_start(H264Context *h)
+{
+    struct vdpau_render_state *render;
+    int i;
+
+    render = (struct vdpau_render_state *)h->cur_pic_ptr->f.data[0];
+    assert(render);
+
+    for (i = 0; i < 2; ++i) {
+        int foc = h->cur_pic_ptr->field_poc[i];
+        if (foc == INT_MAX)
+            foc = 0;
+        render->info.h264.field_order_cnt[i] = foc;
+    }
+
+    render->info.h264.frame_num = h->frame_num;
+}
+
+void ff_vdpau_h264_picture_complete(H264Context *h)
+{
+    struct vdpau_render_state *render;
+
+    render = (struct vdpau_render_state *)h->cur_pic_ptr->f.data[0];
+    assert(render);
+
+    render->info.h264.slice_count = h->slice_num;
+    if (render->info.h264.slice_count < 1)
+        return;
+
+    render->info.h264.is_reference                           = (h->cur_pic_ptr->reference & 3) ? VDP_TRUE : VDP_FALSE;
+    render->info.h264.field_pic_flag                         = h->picture_structure != PICT_FRAME;
+    render->info.h264.bottom_field_flag                      = h->picture_structure == PICT_BOTTOM_FIELD;
+    render->info.h264.num_ref_frames                         = h->sps.ref_frame_count;
+    render->info.h264.mb_adaptive_frame_field_flag           = h->sps.mb_aff && !render->info.h264.field_pic_flag;
+    render->info.h264.constrained_intra_pred_flag            = h->pps.constrained_intra_pred;
+    render->info.h264.weighted_pred_flag                     = h->pps.weighted_pred;
+    render->info.h264.weighted_bipred_idc                    = h->pps.weighted_bipred_idc;
+    render->info.h264.frame_mbs_only_flag                    = h->sps.frame_mbs_only_flag;
+    render->info.h264.transform_8x8_mode_flag                = h->pps.transform_8x8_mode;
+    render->info.h264.chroma_qp_index_offset                 = h->pps.chroma_qp_index_offset[0];
+    render->info.h264.second_chroma_qp_index_offset          = h->pps.chroma_qp_index_offset[1];
+    render->info.h264.pic_init_qp_minus26                    = h->pps.init_qp - 26;
+    render->info.h264.num_ref_idx_l0_active_minus1           = h->pps.ref_count[0] - 1;
+    render->info.h264.num_ref_idx_l1_active_minus1           = h->pps.ref_count[1] - 1;
+    render->info.h264.log2_max_frame_num_minus4              = h->sps.log2_max_frame_num - 4;
+    render->info.h264.pic_order_cnt_type                     = h->sps.poc_type;
+    render->info.h264.log2_max_pic_order_cnt_lsb_minus4      = h->sps.poc_type ? 0 : h->sps.log2_max_poc_lsb - 4;
+    render->info.h264.delta_pic_order_always_zero_flag       = h->sps.delta_pic_order_always_zero_flag;
+    render->info.h264.direct_8x8_inference_flag              = h->sps.direct_8x8_inference_flag;
+    render->info.h264.entropy_coding_mode_flag               = h->pps.cabac;
+    render->info.h264.pic_order_present_flag                 = h->pps.pic_order_present;
+    render->info.h264.deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present;
+    render->info.h264.redundant_pic_cnt_present_flag         = h->pps.redundant_pic_cnt_present;
+    memcpy(render->info.h264.scaling_lists_4x4, h->pps.scaling_matrix4, sizeof(render->info.h264.scaling_lists_4x4));
+    memcpy(render->info.h264.scaling_lists_8x8[0], h->pps.scaling_matrix8[0], sizeof(render->info.h264.scaling_lists_8x8[0]));
+    memcpy(render->info.h264.scaling_lists_8x8[1], h->pps.scaling_matrix8[3], sizeof(render->info.h264.scaling_lists_8x8[0]));
+
+    ff_h264_draw_horiz_band(h, 0, h->avctx->height);
+    render->bitstream_buffers_used = 0;
+}
+#endif /* CONFIG_H264_VDPAU_DECODER */
+
+#if CONFIG_MPEG_VDPAU_DECODER || CONFIG_MPEG1_VDPAU_DECODER
+void ff_vdpau_mpeg_picture_complete(MpegEncContext *s, const uint8_t *buf,
+                                    int buf_size, int slice_count)
+{
+    struct vdpau_render_state *render, *last, *next;
+    int i;
+
+    if (!s->current_picture_ptr) return;
+
+    render = (struct vdpau_render_state *)s->current_picture_ptr->f->data[0];
+    assert(render);
+
+    /* fill VdpPictureInfoMPEG1Or2 struct */
+    render->info.mpeg.picture_structure          = s->picture_structure;
+    render->info.mpeg.picture_coding_type        = s->pict_type;
+    render->info.mpeg.intra_dc_precision         = s->intra_dc_precision;
+    render->info.mpeg.frame_pred_frame_dct       = s->frame_pred_frame_dct;
+    render->info.mpeg.concealment_motion_vectors = s->concealment_motion_vectors;
+    render->info.mpeg.intra_vlc_format           = s->intra_vlc_format;
+    render->info.mpeg.alternate_scan             = s->alternate_scan;
+    render->info.mpeg.q_scale_type               = s->q_scale_type;
+    render->info.mpeg.top_field_first            = s->top_field_first;
+    render->info.mpeg.full_pel_forward_vector    = s->full_pel[0]; // MPEG-1 only.  Set 0 for MPEG-2
+    render->info.mpeg.full_pel_backward_vector   = s->full_pel[1]; // MPEG-1 only.  Set 0 for MPEG-2
+    render->info.mpeg.f_code[0][0]               = s->mpeg_f_code[0][0]; // For MPEG-1 fill both horiz. & vert.
+    render->info.mpeg.f_code[0][1]               = s->mpeg_f_code[0][1];
+    render->info.mpeg.f_code[1][0]               = s->mpeg_f_code[1][0];
+    render->info.mpeg.f_code[1][1]               = s->mpeg_f_code[1][1];
+    for (i = 0; i < 64; ++i) {
+        render->info.mpeg.intra_quantizer_matrix[i]     = s->intra_matrix[i];
+        render->info.mpeg.non_intra_quantizer_matrix[i] = s->inter_matrix[i];
+    }
+
+    render->info.mpeg.forward_reference          = VDP_INVALID_HANDLE;
+    render->info.mpeg.backward_reference         = VDP_INVALID_HANDLE;
+
+    switch(s->pict_type){
+    case  AV_PICTURE_TYPE_B:
+        next = (struct vdpau_render_state *)s->next_picture.f->data[0];
+        assert(next);
+        render->info.mpeg.backward_reference     = next->surface;
+        // no return here, going to set forward prediction
+    case  AV_PICTURE_TYPE_P:
+        last = (struct vdpau_render_state *)s->last_picture.f->data[0];
+        if (!last) // FIXME: Does this test make sense?
+            last = render; // predict second field from the first
+        render->info.mpeg.forward_reference      = last->surface;
+    }
+
+    ff_vdpau_add_data_chunk(s->current_picture_ptr->f->data[0], buf, buf_size);
+
+    render->info.mpeg.slice_count                = slice_count;
+
+    if (slice_count)
+        ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
+    render->bitstream_buffers_used               = 0;
+}
+#endif /* CONFIG_MPEG_VDPAU_DECODER || CONFIG_MPEG1_VDPAU_DECODER */
+
+#if CONFIG_VC1_VDPAU_DECODER
+void ff_vdpau_vc1_decode_picture(MpegEncContext *s, const uint8_t *buf,
+                                 int buf_size)
+{
+    VC1Context *v = s->avctx->priv_data;
+    struct vdpau_render_state *render, *last, *next;
+
+    render = (struct vdpau_render_state *)s->current_picture.f->data[0];
+    assert(render);
+
+    /*  fill LvPictureInfoVC1 struct */
+    render->info.vc1.frame_coding_mode  = v->fcm ? v->fcm + 1 : 0;
+    render->info.vc1.postprocflag       = v->postprocflag;
+    render->info.vc1.pulldown           = v->broadcast;
+    render->info.vc1.interlace          = v->interlace;
+    render->info.vc1.tfcntrflag         = v->tfcntrflag;
+    render->info.vc1.finterpflag        = v->finterpflag;
+    render->info.vc1.psf                = v->psf;
+    render->info.vc1.dquant             = v->dquant;
+    render->info.vc1.panscan_flag       = v->panscanflag;
+    render->info.vc1.refdist_flag       = v->refdist_flag;
+    render->info.vc1.quantizer          = v->quantizer_mode;
+    render->info.vc1.extended_mv        = v->extended_mv;
+    render->info.vc1.extended_dmv       = v->extended_dmv;
+    render->info.vc1.overlap            = v->overlap;
+    render->info.vc1.vstransform        = v->vstransform;
+    render->info.vc1.loopfilter         = v->s.loop_filter;
+    render->info.vc1.fastuvmc           = v->fastuvmc;
+    render->info.vc1.range_mapy_flag    = v->range_mapy_flag;
+    render->info.vc1.range_mapy         = v->range_mapy;
+    render->info.vc1.range_mapuv_flag   = v->range_mapuv_flag;
+    render->info.vc1.range_mapuv        = v->range_mapuv;
+    /* Specific to simple/main profile only */
+    render->info.vc1.multires           = v->multires;
+    render->info.vc1.syncmarker         = v->resync_marker;
+    render->info.vc1.rangered           = v->rangered | (v->rangeredfrm << 1);
+    render->info.vc1.maxbframes         = v->s.max_b_frames;
+
+    render->info.vc1.deblockEnable      = v->postprocflag & 1;
+    render->info.vc1.pquant             = v->pq;
+
+    render->info.vc1.forward_reference  = VDP_INVALID_HANDLE;
+    render->info.vc1.backward_reference = VDP_INVALID_HANDLE;
+
+    if (v->bi_type)
+        render->info.vc1.picture_type = 4;
+    else
+        render->info.vc1.picture_type = s->pict_type - 1 + s->pict_type / 3;
+
+    switch(s->pict_type){
+    case  AV_PICTURE_TYPE_B:
+        next = (struct vdpau_render_state *)s->next_picture.f->data[0];
+        assert(next);
+        render->info.vc1.backward_reference = next->surface;
+        // no break here, going to set forward prediction
+    case  AV_PICTURE_TYPE_P:
+        last = (struct vdpau_render_state *)s->last_picture.f->data[0];
+        if (!last) // FIXME: Does this test make sense?
+            last = render; // predict second field from the first
+        render->info.vc1.forward_reference = last->surface;
+    }
+
+    ff_vdpau_add_data_chunk(s->current_picture_ptr->f->data[0], buf, buf_size);
+
+    render->info.vc1.slice_count          = 1;
+
+    ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
+    render->bitstream_buffers_used        = 0;
+}
+#endif /* (CONFIG_VC1_VDPAU_DECODER */
+
+#if CONFIG_MPEG4_VDPAU_DECODER
+void ff_vdpau_mpeg4_decode_picture(Mpeg4DecContext *ctx, const uint8_t *buf,
+                                   int buf_size)
+{
+    MpegEncContext *s = &ctx->m;
+    struct vdpau_render_state *render, *last, *next;
+    int i;
+
+    if (!s->current_picture_ptr) return;
+
+    render = (struct vdpau_render_state *)s->current_picture_ptr->f->data[0];
+    assert(render);
+
+    /* fill VdpPictureInfoMPEG4Part2 struct */
+    render->info.mpeg4.trd[0]                            = s->pp_time;
+    render->info.mpeg4.trb[0]                            = s->pb_time;
+    render->info.mpeg4.trd[1]                            = s->pp_field_time >> 1;
+    render->info.mpeg4.trb[1]                            = s->pb_field_time >> 1;
+    render->info.mpeg4.vop_time_increment_resolution     = s->avctx->time_base.den;
+    render->info.mpeg4.vop_coding_type                   = 0;
+    render->info.mpeg4.vop_fcode_forward                 = s->f_code;
+    render->info.mpeg4.vop_fcode_backward                = s->b_code;
+    render->info.mpeg4.resync_marker_disable             = !ctx->resync_marker;
+    render->info.mpeg4.interlaced                        = !s->progressive_sequence;
+    render->info.mpeg4.quant_type                        = s->mpeg_quant;
+    render->info.mpeg4.quarter_sample                    = s->quarter_sample;
+    render->info.mpeg4.short_video_header                = s->avctx->codec->id == AV_CODEC_ID_H263;
+    render->info.mpeg4.rounding_control                  = s->no_rounding;
+    render->info.mpeg4.alternate_vertical_scan_flag      = s->alternate_scan;
+    render->info.mpeg4.top_field_first                   = s->top_field_first;
+    for (i = 0; i < 64; ++i) {
+        render->info.mpeg4.intra_quantizer_matrix[i]     = s->intra_matrix[i];
+        render->info.mpeg4.non_intra_quantizer_matrix[i] = s->inter_matrix[i];
+    }
+    render->info.mpeg4.forward_reference                 = VDP_INVALID_HANDLE;
+    render->info.mpeg4.backward_reference                = VDP_INVALID_HANDLE;
+
+    switch (s->pict_type) {
+    case AV_PICTURE_TYPE_B:
+        next = (struct vdpau_render_state *)s->next_picture.f->data[0];
+        assert(next);
+        render->info.mpeg4.backward_reference     = next->surface;
+        render->info.mpeg4.vop_coding_type        = 2;
+        // no break here, going to set forward prediction
+    case AV_PICTURE_TYPE_P:
+        last = (struct vdpau_render_state *)s->last_picture.f->data[0];
+        assert(last);
+        render->info.mpeg4.forward_reference      = last->surface;
+    }
+
+    ff_vdpau_add_data_chunk(s->current_picture_ptr->f->data[0], buf, buf_size);
+
+    ff_mpeg_draw_horiz_band(s, 0, s->avctx->height);
+    render->bitstream_buffers_used = 0;
+}
+#endif /* CONFIG_MPEG4_VDPAU_DECODER */
+
 int av_vdpau_get_profile(AVCodecContext *avctx, VdpDecoderProfile *profile)
 {
 #define PROFILE(prof)       \
diff --git a/libavcodec/vdpau.h b/libavcodec/vdpau.h
index 75cb1bf..e25cc42 100644
--- a/libavcodec/vdpau.h
+++ b/libavcodec/vdpau.h
@@ -4,20 +4,20 @@
  *
  * Copyright (C) 2008 NVIDIA
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -39,7 +39,7 @@
  * - VDPAU decoding
  * - VDPAU presentation
  *
- * The VDPAU decoding module parses all headers using Libav
+ * The VDPAU decoding module parses all headers using FFmpeg
  * parsing mechanisms and uses VDPAU for the actual decoding.
  *
  * As per the current implementation, the actual decoding
@@ -51,7 +51,7 @@
 
 #include <vdpau/vdpau.h>
 #include <vdpau/vdpau_x11.h>
-
+#include "libavutil/avconfig.h"
 #include "libavutil/attributes.h"
 
 #include "avcodec.h"
@@ -66,10 +66,18 @@ union AVVDPAUPictureInfo {
 };
 #endif
 
+struct AVCodecContext;
+struct AVFrame;
+
+typedef int (*AVVDPAU_Render2)(struct AVCodecContext *, struct AVFrame *,
+                               const VdpPictureInfo *, uint32_t,
+                               const VdpBitstreamBuffer *);
+
 /**
  * This structure is used to share data between the libavcodec library and
  * the client video application.
- * The user shall zero-allocate the structure and make it available as
+ * The user shall allocate the structure via the av_alloc_vdpau_hwaccel
+ * function and make it available as
  * AVCodecContext.hwaccel_context. Members can be set by the user once
  * during initialization or through each AVCodecContext.get_buffer()
  * function call. In any case, they must be valid prior to calling
@@ -128,9 +136,20 @@ typedef struct AVVDPAUContext {
     attribute_deprecated
     VdpBitstreamBuffer *bitstream_buffers;
 #endif
+    AVVDPAU_Render2 render2;
 } AVVDPAUContext;
 
 /**
+ * @brief allocation function for AVVDPAUContext
+ *
+ * Allows extending the struct without breaking API/ABI
+ */
+AVVDPAUContext *av_alloc_vdpaucontext(void);
+
+AVVDPAU_Render2 av_vdpau_hwaccel_get_render2(const AVVDPAUContext *);
+void av_vdpau_hwaccel_set_render2(AVVDPAUContext *, AVVDPAU_Render2);
+
+/**
  * Allocate an AVVDPAUContext.
  *
  * @return Newly-allocated AVVDPAUContext or NULL on failure.
@@ -161,19 +180,21 @@ int av_vdpau_get_profile(AVCodecContext *avctx, VdpDecoderProfile *profile);
 #define FF_VDPAU_STATE_USED_FOR_REFERENCE 2
 
 /**
- * @brief This structure is used as a callback between the Libav
+ * @brief This structure is used as a callback between the FFmpeg
  * decoder (vd_) and presentation (vo_) module.
  * This is used for defining a video frame containing surface,
  * picture parameter, bitstream information etc which are passed
- * between the Libav decoder and its clients.
+ * between the FFmpeg decoder and its clients.
  */
 struct vdpau_render_state {
     VdpVideoSurface surface; ///< Used as rendered surface, never changed.
 
     int state; ///< Holds FF_VDPAU_STATE_* values.
 
+#if AV_HAVE_INCOMPATIBLE_LIBAV_ABI
     /** picture parameter information for all supported codecs */
     union AVVDPAUPictureInfo info;
+#endif
 
     /** Describe size/location of the compressed video data.
         Set to 0 when freeing bitstream_buffers. */
@@ -181,6 +202,11 @@ struct vdpau_render_state {
     int bitstream_buffers_used;
     /** The user is responsible for freeing this buffer using av_freep(). */
     VdpBitstreamBuffer *bitstream_buffers;
+
+#if !AV_HAVE_INCOMPATIBLE_LIBAV_ABI
+    /** picture parameter information for all supported codecs */
+    union AVVDPAUPictureInfo info;
+#endif
 };
 #endif
 
diff --git a/libavcodec/vdpau_h264.c b/libavcodec/vdpau_h264.c
index 32e9c28..05a41d0 100644
--- a/libavcodec/vdpau_h264.c
+++ b/libavcodec/vdpau_h264.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 NVIDIA
  * Copyright (c) 2013 Rémi Denis-Courmont
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software Foundation,
+ * License along with FFmpeg; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -189,19 +189,41 @@ static int vdpau_h264_decode_slice(AVCodecContext *avctx,
 
 static int vdpau_h264_end_frame(AVCodecContext *avctx)
 {
+    int res = 0;
     AVVDPAUContext *hwctx = avctx->hwaccel_context;
     H264Context *h = avctx->priv_data;
     H264Picture *pic = h->cur_pic_ptr;
     struct vdpau_picture_context *pic_ctx = pic->hwaccel_picture_private;
     VdpVideoSurface surf = ff_vdpau_get_surface_id(&pic->f);
 
+#if FF_API_BUFS_VDPAU
+FF_DISABLE_DEPRECATION_WARNINGS
+    hwctx->info = pic_ctx->info;
+    hwctx->bitstream_buffers = pic_ctx->bitstream_buffers;
+    hwctx->bitstream_buffers_used = pic_ctx->bitstream_buffers_used;
+    hwctx->bitstream_buffers_allocated = pic_ctx->bitstream_buffers_allocated;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    if (!hwctx->render) {
+        res = hwctx->render2(avctx, &pic->f, (void *)&pic_ctx->info,
+                             pic_ctx->bitstream_buffers_used, pic_ctx->bitstream_buffers);
+    } else
     hwctx->render(hwctx->decoder, surf, (void *)&pic_ctx->info,
                   pic_ctx->bitstream_buffers_used, pic_ctx->bitstream_buffers);
 
     ff_h264_draw_horiz_band(h, 0, h->avctx->height);
     av_freep(&pic_ctx->bitstream_buffers);
 
-    return 0;
+#if FF_API_BUFS_VDPAU
+FF_DISABLE_DEPRECATION_WARNINGS
+    hwctx->bitstream_buffers = NULL;
+    hwctx->bitstream_buffers_used = 0;
+    hwctx->bitstream_buffers_allocated = 0;
+FF_ENABLE_DEPRECATION_WARNINGS
+#endif
+
+    return res;
 }
 
 AVHWAccel ff_h264_vdpau_hwaccel = {
diff --git a/libavcodec/vdpau_internal.h b/libavcodec/vdpau_internal.h
index 2443e0a..0f3652b 100644
--- a/libavcodec/vdpau_internal.h
+++ b/libavcodec/vdpau_internal.h
@@ -4,30 +4,35 @@
  *
  * Copyright (C) 2008 NVIDIA
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef AVCODEC_VDPAU_INTERNAL_H
 #define AVCODEC_VDPAU_INTERNAL_H
 
+#include "config.h"
 #include <stdint.h>
+#if CONFIG_VDPAU
 #include <vdpau/vdpau.h>
+#endif
+#include "h264.h"
 
 #include "avcodec.h"
+#include "mpeg4video.h"
 #include "mpegvideo.h"
 #include "version.h"
 
@@ -37,6 +42,8 @@ static inline uintptr_t ff_vdpau_get_surface_id(AVFrame *pic)
     return (uintptr_t)pic->data[3];
 }
 
+struct vdpau_picture_context;
+#if CONFIG_VDPAU
 #if !FF_API_BUFS_VDPAU
 union AVVDPAUPictureInfo {
     VdpPictureInfoH264        h264;
@@ -69,6 +76,7 @@ struct vdpau_picture_context {
      */
     VdpBitstreamBuffer *bitstream_buffers;
 };
+#endif
 
 int ff_vdpau_common_start_frame(struct vdpau_picture_context *pic,
                                 const uint8_t *buffer, uint32_t size);
@@ -76,4 +84,21 @@ int ff_vdpau_mpeg_end_frame(AVCodecContext *avctx);
 int ff_vdpau_add_buffer(struct vdpau_picture_context *pic, const uint8_t *buf,
                         uint32_t buf_size);
 
+
+void ff_vdpau_add_data_chunk(uint8_t *data, const uint8_t *buf,
+                             int buf_size);
+
+void ff_vdpau_mpeg_picture_complete(MpegEncContext *s, const uint8_t *buf,
+                                    int buf_size, int slice_count);
+
+void ff_vdpau_h264_picture_start(H264Context *h);
+void ff_vdpau_h264_set_reference_frames(H264Context *h);
+void ff_vdpau_h264_picture_complete(H264Context *h);
+
+void ff_vdpau_vc1_decode_picture(MpegEncContext *s, const uint8_t *buf,
+                                 int buf_size);
+
+void ff_vdpau_mpeg4_decode_picture(Mpeg4DecContext *s, const uint8_t *buf,
+                                   int buf_size);
+
 #endif /* AVCODEC_VDPAU_INTERNAL_H */
diff --git a/libavcodec/vdpau_mpeg12.c b/libavcodec/vdpau_mpeg12.c
index 2b53e66..84a971c 100644
--- a/libavcodec/vdpau_mpeg12.c
+++ b/libavcodec/vdpau_mpeg12.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 NVIDIA
  * Copyright (c) 2013 Rémi Denis-Courmont
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software Foundation,
+ * License along with FFmpeg; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vdpau_mpeg4.c b/libavcodec/vdpau_mpeg4.c
index 64e781d..64669a6 100644
--- a/libavcodec/vdpau_mpeg4.c
+++ b/libavcodec/vdpau_mpeg4.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 NVIDIA
  * Copyright (c) 2013 Rémi Denis-Courmont
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software Foundation,
+ * License along with FFmpeg; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vdpau_vc1.c b/libavcodec/vdpau_vc1.c
index f7a7ecc..13c41df 100644
--- a/libavcodec/vdpau_vc1.c
+++ b/libavcodec/vdpau_vc1.c
@@ -4,20 +4,20 @@
  * Copyright (c) 2008 NVIDIA
  * Copyright (c) 2013 Rémi Denis-Courmont
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software Foundation,
+ * License along with FFmpeg; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -44,14 +44,18 @@ static int vdpau_vc1_start_frame(AVCodecContext *avctx,
 
     switch (s->pict_type) {
     case AV_PICTURE_TYPE_B:
+        if (s->next_picture_ptr) {
         ref = ff_vdpau_get_surface_id(s->next_picture.f);
         assert(ref != VDP_INVALID_HANDLE);
         info->backward_reference = ref;
+        }
         /* fall-through */
     case AV_PICTURE_TYPE_P:
+        if (s->last_picture_ptr) {
         ref = ff_vdpau_get_surface_id(s->last_picture.f);
         assert(ref != VDP_INVALID_HANDLE);
         info->forward_reference  = ref;
+        }
     }
 
     info->slice_count       = 0;
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 2c22adb..dc7af8b 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -1,19 +1,19 @@
 /*
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,8 +29,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 55
-#define LIBAVCODEC_VERSION_MINOR 57
-#define LIBAVCODEC_VERSION_MICRO  2
+#define LIBAVCODEC_VERSION_MINOR  72
+#define LIBAVCODEC_VERSION_MICRO 101
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
@@ -51,6 +51,24 @@
 #ifndef FF_API_REQUEST_CHANNELS
 #define FF_API_REQUEST_CHANNELS (LIBAVCODEC_VERSION_MAJOR < 56)
 #endif
+#ifndef FF_API_OLD_DECODE_AUDIO
+#define FF_API_OLD_DECODE_AUDIO (LIBAVCODEC_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_OLD_ENCODE_AUDIO
+#define FF_API_OLD_ENCODE_AUDIO (LIBAVCODEC_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_OLD_ENCODE_VIDEO
+#define FF_API_OLD_ENCODE_VIDEO (LIBAVCODEC_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_CODEC_ID
+#define FF_API_CODEC_ID          (LIBAVCODEC_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_AUDIO_CONVERT
+#define FF_API_AUDIO_CONVERT     (LIBAVCODEC_VERSION_MAJOR < 56)
+#endif
+#ifndef FF_API_AVCODEC_RESAMPLE
+#define FF_API_AVCODEC_RESAMPLE  FF_API_AUDIO_CONVERT
+#endif
 #ifndef FF_API_DEINTERLACE
 #define FF_API_DEINTERLACE       (LIBAVCODEC_VERSION_MAJOR < 56)
 #endif
@@ -123,6 +141,9 @@
 #ifndef FF_API_EMU_EDGE
 #define FF_API_EMU_EDGE          (LIBAVCODEC_VERSION_MAJOR < 56)
 #endif
+#ifndef FF_API_DSPUTIL
+#define FF_API_DSPUTIL           (LIBAVCODEC_VERSION_MAJOR < 56)
+#endif
 #ifndef FF_API_ARCH_SH4
 #define FF_API_ARCH_SH4          (LIBAVCODEC_VERSION_MAJOR < 56)
 #endif
diff --git a/libavcodec/videodsp.c b/libavcodec/videodsp.c
index e6d9303..ba618a7 100644
--- a/libavcodec/videodsp.c
+++ b/libavcodec/videodsp.c
@@ -1,24 +1,25 @@
 /*
  * Copyright (C) 2012 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "videodsp.h"
 
diff --git a/libavcodec/videodsp.h b/libavcodec/videodsp.h
index 04c012a..fc01a31 100644
--- a/libavcodec/videodsp.h
+++ b/libavcodec/videodsp.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2012 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,14 +29,25 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#define EMULATED_EDGE(depth) \
+void ff_emulated_edge_mc_ ## depth(uint8_t *dst, const uint8_t *src, \
+                                   ptrdiff_t dst_stride, ptrdiff_t src_stride, \
+                                   int block_w, int block_h,\
+                                   int src_x, int src_y, int w, int h);
+
+EMULATED_EDGE(8)
+EMULATED_EDGE(16)
+
 typedef struct VideoDSPContext {
     /**
      * Copy a rectangular area of samples to a temporary buffer and replicate
      * the border samples.
      *
-     * @param buf destination buffer
+     * @param dst destination buffer
+     * @param dst_stride number of bytes between 2 vertically adjacent samples
+     *                   in destination buffer
      * @param src source buffer
-     * @param buf_linesize number of bytes between 2 vertically adjacent
+     * @param dst_linesize number of bytes between 2 vertically adjacent
      *                     samples in the destination buffer
      * @param src_linesize number of bytes between 2 vertically adjacent
      *                     samples in both the source buffer
@@ -49,8 +60,8 @@ typedef struct VideoDSPContext {
      * @param w width of the source buffer
      * @param h height of the source buffer
      */
-    void (*emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
-                             ptrdiff_t buf_linesize,
+    void (*emulated_edge_mc)(uint8_t *dst, const uint8_t *src,
+                             ptrdiff_t dst_linesize,
                              ptrdiff_t src_linesize,
                              int block_w, int block_h,
                              int src_x, int src_y, int w, int h);
diff --git a/libavcodec/videodsp_template.c b/libavcodec/videodsp_template.c
index 28b8c32..f4ff2ba 100644
--- a/libavcodec/videodsp_template.c
+++ b/libavcodec/videodsp_template.c
@@ -1,42 +1,46 @@
 /*
- * Copyright (c) 2002-2004 Michael Niedermayer
+ * Copyright (c) 2002-2012 Michael Niedermayer
  * Copyright (C) 2012 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <assert.h>
 
 #include "bit_depth_template.c"
-
-static void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
-                                      ptrdiff_t buf_linesize,
-                                      ptrdiff_t src_linesize,
-                                      int block_w, int block_h,
-                                      int src_x, int src_y, int w, int h)
+void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
+                               ptrdiff_t buf_linesize,
+                               ptrdiff_t src_linesize,
+                               int block_w, int block_h,
+                               int src_x, int src_y, int w, int h)
 {
     int x, y;
     int start_y, start_x, end_y, end_x;
 
+    if (!w || !h)
+        return;
+
     if (src_y >= h) {
-        src  += (h - 1 - src_y) * src_linesize;
+        src -= src_y * src_linesize;
+        src += (h - 1) * src_linesize;
         src_y = h - 1;
     } else if (src_y <= -block_h) {
-        src  += (1 - block_h - src_y) * src_linesize;
+        src -= src_y * src_linesize;
+        src += (1 - block_h) * src_linesize;
         src_y = 1 - block_h;
     }
     if (src_x >= w) {
@@ -51,8 +55,8 @@ static void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src,
     start_x = FFMAX(0, -src_x);
     end_y = FFMIN(block_h, h-src_y);
     end_x = FFMIN(block_w, w-src_x);
-    assert(start_y < end_y && block_h);
-    assert(start_x < end_x && block_w);
+    av_assert2(start_y < end_y && block_h);
+    av_assert2(start_x < end_x && block_w);
 
     w    = end_x - start_x;
     src += start_y * src_linesize + start_x * sizeof(pixel);
diff --git a/libavcodec/vima.c b/libavcodec/vima.c
index 14a3bca..ba3f07a 100644
--- a/libavcodec/vima.c
+++ b/libavcodec/vima.c
@@ -2,20 +2,20 @@
  * LucasArts VIMA decoder
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -216,3 +216,13 @@ AVCodec ff_adpcm_vima_decoder = {
     .decode       = decode_frame,
     .capabilities = CODEC_CAP_DR1,
 };
+
+AVCodec ff_vima_decoder = {
+    .name         = "vima",
+    .long_name    = NULL_IF_CONFIG_SMALL("LucasArts VIMA audio"),
+    .type         = AVMEDIA_TYPE_AUDIO,
+    .id           = AV_CODEC_ID_ADPCM_VIMA,
+    .init         = decode_init,
+    .decode       = decode_frame,
+    .capabilities = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/vmdaudio.c b/libavcodec/vmdaudio.c
index 66c5865..0090aef 100644
--- a/libavcodec/vmdaudio.c
+++ b/libavcodec/vmdaudio.c
@@ -1,20 +1,21 @@
 /*
  * Sierra VMD audio decoder
+ * Copyright (C) 2004 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +35,7 @@
 
 #include <string.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/channel_layout.h"
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
@@ -74,7 +76,7 @@ static av_cold int vmdaudio_decode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "invalid number of channels\n");
         return AVERROR(EINVAL);
     }
-    if (avctx->block_align < 1) {
+    if (avctx->block_align < 1 || avctx->block_align % avctx->channels) {
         av_log(avctx, AV_LOG_ERROR, "invalid block align\n");
         return AVERROR(EINVAL);
     }
@@ -180,17 +182,16 @@ static int vmdaudio_decode_frame(AVCodecContext *avctx, void *data,
     /* get output buffer */
     frame->nb_samples = ((silent_chunks + audio_chunks) * avctx->block_align) /
                         avctx->channels;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     output_samples_u8  =            frame->data[0];
     output_samples_s16 = (int16_t *)frame->data[0];
 
     /* decode silent chunks */
     if (silent_chunks > 0) {
-        int silent_size = FFMIN(avctx->block_align * silent_chunks,
-                                frame->nb_samples * avctx->channels);
+        int silent_size = avctx->block_align * silent_chunks;
+        av_assert0(avctx->block_align * silent_chunks <= frame->nb_samples * avctx->channels);
+
         if (s->out_bps == 2) {
             memset(output_samples_s16, 0x00, silent_size * 2);
             output_samples_s16 += silent_size;
@@ -202,8 +203,9 @@ static int vmdaudio_decode_frame(AVCodecContext *avctx, void *data,
 
     /* decode audio chunks */
     if (audio_chunks > 0) {
-        buf_end = buf + (buf_size & ~(avctx->channels > 1));
-        while (buf + s->chunk_size <= buf_end) {
+        buf_end = buf + buf_size;
+        av_assert0((buf_size & (avctx->channels > 1)) == 0);
+        while (buf_end - buf >= s->chunk_size) {
             if (s->out_bps == 2) {
                 decode_audio_s16(output_samples_s16, buf, s->chunk_size,
                                  avctx->channels);
diff --git a/libavcodec/vmdvideo.c b/libavcodec/vmdvideo.c
index aaeff43..279c56a 100644
--- a/libavcodec/vmdvideo.c
+++ b/libavcodec/vmdvideo.c
@@ -1,20 +1,21 @@
 /*
  * Sierra VMD video decoder
+ * Copyright (C) 2004 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -62,7 +63,7 @@ typedef struct VmdVideoContext {
 #define QUEUE_SIZE 0x1000
 #define QUEUE_MASK 0x0FFF
 
-static void lz_unpack(const unsigned char *src, int src_len,
+static int lz_unpack(const unsigned char *src, int src_len,
                       unsigned char *dest, int dest_len)
 {
     unsigned char *d;
@@ -83,9 +84,9 @@ static void lz_unpack(const unsigned char *src, int src_len,
     dataleft = bytestream2_get_le32(&gb);
     memset(queue, 0x20, QUEUE_SIZE);
     if (bytestream2_get_bytes_left(&gb) < 4)
-        return;
+        return AVERROR_INVALIDDATA;
     if (bytestream2_peek_le32(&gb) == 0x56781234) {
-        bytestream2_get_le32(&gb);
+        bytestream2_skipu(&gb, 4);
         qpos = 0x111;
         speclen = 0xF + 3;
     } else {
@@ -96,8 +97,8 @@ static void lz_unpack(const unsigned char *src, int src_len,
     while (dataleft > 0 && bytestream2_get_bytes_left(&gb) > 0) {
         tag = bytestream2_get_byteu(&gb);
         if ((tag == 0xFF) && (dataleft > 8)) {
-            if (d + 8 > d_end || bytestream2_get_bytes_left(&gb) < 8)
-                return;
+            if (d_end - d < 8 || bytestream2_get_bytes_left(&gb) < 8)
+                return AVERROR_INVALIDDATA;
             for (i = 0; i < 8; i++) {
                 queue[qpos++] = *d++ = bytestream2_get_byteu(&gb);
                 qpos &= QUEUE_MASK;
@@ -108,9 +109,9 @@ static void lz_unpack(const unsigned char *src, int src_len,
                 if (dataleft == 0)
                     break;
                 if (tag & 0x01) {
-                    if (d + 1 > d_end || bytestream2_get_bytes_left(&gb) < 1)
-                        return;
-                    queue[qpos++] = *d++ = bytestream2_get_byte(&gb);
+                    if (d_end - d < 1 || bytestream2_get_bytes_left(&gb) < 1)
+                        return AVERROR_INVALIDDATA;
+                    queue[qpos++] = *d++ = bytestream2_get_byteu(&gb);
                     qpos &= QUEUE_MASK;
                     dataleft--;
                 } else {
@@ -120,8 +121,8 @@ static void lz_unpack(const unsigned char *src, int src_len,
                     if (chainlen == speclen) {
                         chainlen = bytestream2_get_byte(&gb) + 0xF + 3;
                     }
-                    if (d + chainlen > d_end)
-                        return;
+                    if (d_end - d < chainlen)
+                        return AVERROR_INVALIDDATA;
                     for (j = 0; j < chainlen; j++) {
                         *d = queue[chainofs++ & QUEUE_MASK];
                         queue[qpos++] = *d++;
@@ -133,10 +134,10 @@ static void lz_unpack(const unsigned char *src, int src_len,
             }
         }
     }
+    return d - dest;
 }
-
 static int rle_unpack(const unsigned char *src, unsigned char *dest,
-    int src_count, int src_size, int dest_len)
+                      int src_count, int src_size, int dest_len)
 {
     unsigned char *pd;
     int i, l, used = 0;
@@ -159,12 +160,12 @@ static int rle_unpack(const unsigned char *src, unsigned char *dest,
         l = bytestream2_get_byteu(&gb);
         if (l & 0x80) {
             l = (l & 0x7F) * 2;
-            if (pd + l > dest_end || bytestream2_get_bytes_left(&gb) < l)
+            if (dest_end - pd < l || bytestream2_get_bytes_left(&gb) < l)
                 return bytestream2_tell(&gb);
-            bytestream2_get_buffer(&gb, pd, l);
+            bytestream2_get_bufferu(&gb, pd, l);
             pd += l;
         } else {
-            if (pd + l > dest_end || bytestream2_get_bytes_left(&gb) < 2)
+            if (dest_end - pd < 2*l || bytestream2_get_bytes_left(&gb) < 2)
                 return bytestream2_tell(&gb);
             run_val = bytestream2_get_ne16(&gb);
             for (i = 0; i < l; i++) {
@@ -200,6 +201,16 @@ static int vmd_decode(VmdVideoContext *s, AVFrame *frame)
     frame_y = AV_RL16(&s->buf[8]);
     frame_width = AV_RL16(&s->buf[10]) - frame_x + 1;
     frame_height = AV_RL16(&s->buf[12]) - frame_y + 1;
+
+    if ((frame_width == s->avctx->width && frame_height == s->avctx->height) &&
+        (frame_x || frame_y)) {
+
+        s->x_off = frame_x;
+        s->y_off = frame_y;
+    }
+    frame_x -= s->x_off;
+    frame_y -= s->y_off;
+
     if (frame_x < 0 || frame_width < 0 ||
         frame_x >= s->avctx->width ||
         frame_width > s->avctx->width ||
@@ -219,15 +230,6 @@ static int vmd_decode(VmdVideoContext *s, AVFrame *frame)
         return AVERROR_INVALIDDATA;
     }
 
-    if ((frame_width == s->avctx->width && frame_height == s->avctx->height) &&
-        (frame_x || frame_y)) {
-
-        s->x_off = frame_x;
-        s->y_off = frame_y;
-    }
-    frame_x -= s->x_off;
-    frame_y -= s->y_off;
-
     /* if only a certain region will be updated, copy the entire previous
      * frame before the decode */
     if (s->prev_frame->data[0] &&
@@ -248,13 +250,13 @@ static int vmd_decode(VmdVideoContext *s, AVFrame *frame)
                 r = bytestream2_get_byteu(&gb) * 4;
                 g = bytestream2_get_byteu(&gb) * 4;
                 b = bytestream2_get_byteu(&gb) * 4;
-                palette32[i] = (r << 16) | (g << 8) | (b);
+                palette32[i] = 0xFFU << 24 | (r << 16) | (g << 8) | (b);
+                palette32[i] |= palette32[i] >> 6 & 0x30303;
             }
         } else {
             av_log(s->avctx, AV_LOG_ERROR, "Incomplete palette\n");
             return AVERROR_INVALIDDATA;
         }
-        s->size -= PALETTE_COUNT * 3 + 2;
     }
 
     if (!s->size)
@@ -265,15 +267,18 @@ static int vmd_decode(VmdVideoContext *s, AVFrame *frame)
         return AVERROR_INVALIDDATA;
     meth = bytestream2_get_byteu(&gb);
     if (meth & 0x80) {
+        int size;
         if (!s->unpack_buffer_size) {
             av_log(s->avctx, AV_LOG_ERROR,
                    "Trying to unpack LZ-compressed frame with no LZ buffer\n");
             return AVERROR_INVALIDDATA;
         }
-        lz_unpack(gb.buffer, bytestream2_get_bytes_left(&gb),
-                  s->unpack_buffer, s->unpack_buffer_size);
+        size = lz_unpack(gb.buffer, bytestream2_get_bytes_left(&gb),
+                         s->unpack_buffer, s->unpack_buffer_size);
+        if (size < 0)
+            return size;
         meth &= 0x7F;
-        bytestream2_init(&gb, s->unpack_buffer, s->unpack_buffer_size);
+        bytestream2_init(&gb, s->unpack_buffer, size);
     }
 
     dp = &frame->data[0][frame_y * frame->linesize[0] + frame_x];
@@ -289,7 +294,7 @@ static int vmd_decode(VmdVideoContext *s, AVFrame *frame)
                     if (ofs + len > frame_width ||
                         bytestream2_get_bytes_left(&gb) < len)
                         return AVERROR_INVALIDDATA;
-                    bytestream2_get_buffer(&gb, &dp[ofs], len);
+                    bytestream2_get_bufferu(&gb, &dp[ofs], len);
                     ofs += len;
                 } else {
                     /* interframe pixel copy */
@@ -301,7 +306,7 @@ static int vmd_decode(VmdVideoContext *s, AVFrame *frame)
             } while (ofs < frame_width);
             if (ofs > frame_width) {
                 av_log(s->avctx, AV_LOG_ERROR,
-                       "VMD video: offset > width (%d > %d)\n",
+                       "offset > width (%d > %d)\n",
                        ofs, frame_width);
                 return AVERROR_INVALIDDATA;
             }
@@ -347,7 +352,7 @@ static int vmd_decode(VmdVideoContext *s, AVFrame *frame)
             } while (ofs < frame_width);
             if (ofs > frame_width) {
                 av_log(s->avctx, AV_LOG_ERROR,
-                       "VMD video: offset > width (%d > %d)\n",
+                       "offset > width (%d > %d)\n",
                        ofs, frame_width);
                 return AVERROR_INVALIDDATA;
             }
@@ -364,7 +369,8 @@ static av_cold int vmdvideo_decode_end(AVCodecContext *avctx)
     VmdVideoContext *s = avctx->priv_data;
 
     av_frame_free(&s->prev_frame);
-    av_free(s->unpack_buffer);
+    av_freep(&s->unpack_buffer);
+    s->unpack_buffer_size = 0;
 
     return 0;
 }
@@ -384,9 +390,9 @@ static av_cold int vmdvideo_decode_init(AVCodecContext *avctx)
 
     /* make sure the VMD header made it */
     if (s->avctx->extradata_size != VMD_HEADER_SIZE) {
-        av_log(s->avctx, AV_LOG_ERROR, "VMD video: expected extradata size of %d\n",
+        av_log(s->avctx, AV_LOG_ERROR, "expected extradata size of %d\n",
             VMD_HEADER_SIZE);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     vmd_header = (unsigned char *)avctx->extradata;
 
@@ -404,7 +410,8 @@ static av_cold int vmdvideo_decode_init(AVCodecContext *avctx)
         r = raw_palette[palette_index++] * 4;
         g = raw_palette[palette_index++] * 4;
         b = raw_palette[palette_index++] * 4;
-        palette32[i] = (r << 16) | (g << 8) | (b);
+        palette32[i] = 0xFFU << 24 | (r << 16) | (g << 8) | (b);
+        palette32[i] |= palette32[i] >> 6 & 0x30303;
     }
 
     s->prev_frame = av_frame_alloc();
@@ -432,10 +439,8 @@ static int vmdvideo_decode_frame(AVCodecContext *avctx,
     if (buf_size < 16)
         return AVERROR_INVALIDDATA;
 
-    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "VMD Video: get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
 
     if ((ret = vmd_decode(s, frame)) < 0)
         return ret;
diff --git a/libavcodec/vmnc.c b/libavcodec/vmnc.c
index 16984fb..46bd52e 100644
--- a/libavcodec/vmnc.c
+++ b/libavcodec/vmnc.c
@@ -2,20 +2,20 @@
  * VMware Screen Codec (VMnc) decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -291,6 +291,11 @@ static int decode_hextile(VmncContext *c, uint8_t* dst, GetByteContext *gb,
                         fg = vmnc_get_pixel(gb, bpp, c->bigendian);
                     xy = bytestream2_get_byte(gb);
                     wh = bytestream2_get_byte(gb);
+                    if (   (xy >> 4) + (wh >> 4) + 1 > w - i
+                        || (xy & 0xF) + (wh & 0xF)+1 > h - j) {
+                        av_log(c->avctx, AV_LOG_ERROR, "Rectangle outside picture\n");
+                        return AVERROR_INVALIDDATA;
+                    }
                     paint_rect(dst2, xy >> 4, xy & 0xF,
                                (wh>>4)+1, (wh & 0xF)+1, fg, bpp, stride);
                 }
@@ -307,6 +312,8 @@ static void reset_buffers(VmncContext *c)
     av_freep(&c->curmask);
     av_freep(&c->screendta);
     c->cur_w = c->cur_h = 0;
+    c->cur_hx = c->cur_hy = 0;
+
 }
 
 static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
@@ -319,10 +326,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     uint8_t *outptr;
     int dx, dy, w, h, depth, enc, chunks, res, size_left, ret;
 
-    if ((ret = ff_reget_buffer(avctx, c->pic)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, c->pic)) < 0)
         return ret;
-    }
 
     bytestream2_init(gb, buf, buf_size);
 
@@ -360,6 +365,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     bytestream2_skip(gb, 2);
     chunks = bytestream2_get_be16(gb);
     while (chunks--) {
+        if (bytestream2_get_bytes_left(gb) < 12) {
+            av_log(avctx, AV_LOG_ERROR, "Premature end of data!\n");
+            return -1;
+        }
         dx  = bytestream2_get_be16(gb);
         dy  = bytestream2_get_be16(gb);
         w   = bytestream2_get_be16(gb);
@@ -369,6 +378,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         size_left = bytestream2_get_bytes_left(gb);
         switch (enc) {
         case MAGIC_WMVd: // cursor
+            if (w*(int64_t)h*c->bpp2 > INT_MAX/2 - 2) {
+                av_log(avctx, AV_LOG_ERROR, "dimensions too large\n");
+                return AVERROR_INVALIDDATA;
+            }
             if (size_left < 2 + w * h * c->bpp2 * 2) {
                 av_log(avctx, AV_LOG_ERROR,
                        "Premature end of data! (need %i got %i)\n",
@@ -545,9 +558,9 @@ static av_cold int decode_end(AVCodecContext *avctx)
 
     av_frame_free(&c->pic);
 
-    av_free(c->curbits);
-    av_free(c->curmask);
-    av_free(c->screendta);
+    av_freep(&c->curbits);
+    av_freep(&c->curmask);
+    av_freep(&c->screendta);
     return 0;
 }
 
diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c
index 66fa21b..86d1040 100644
--- a/libavcodec/vorbis.c
+++ b/libavcodec/vorbis.c
@@ -1,18 +1,22 @@
-/*
- * This file is part of Libav.
+/**
+ * @file
+ * Common code for Vorbis I encoder and decoder
+ * @author Denes Balatoni  ( dbalatoni programozo hu )
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -67,7 +71,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num)
 
     codes[p] = 0;
     if (bits[p] > 32)
-        return 1;
+        return AVERROR_INVALIDDATA;
     for (i = 0; i < bits[p]; ++i)
         exit_at_level[i+1] = 1 << i;
 
@@ -81,9 +85,14 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num)
 
     ++p;
 
+    for (i = p; (bits[i] == 0) && (i < num); ++i)
+        ;
+    if (i == num)
+        return 0;
+
     for (; p < num; ++p) {
         if (bits[p] > 32)
-             return 1;
+             return AVERROR_INVALIDDATA;
         if (bits[p] == 0)
              continue;
         // find corresponding exit(node which the tree can grow further from)
@@ -91,7 +100,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num)
             if (exit_at_level[i])
                 break;
         if (!i) // overspecified tree
-             return 1;
+             return AVERROR_INVALIDDATA;
         code = exit_at_level[i];
         exit_at_level[i] = 0;
         // construct code (append 0s to end) and introduce new exits
@@ -112,7 +121,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num)
     //no exits should be left (underspecified tree - ie. unused valid vlcs - not allowed by SPEC)
     for (p = 1; p < 33; p++)
         if (exit_at_level[p])
-            return 1;
+            return AVERROR_INVALIDDATA;
 
     return 0;
 }
diff --git a/libavcodec/vorbis.h b/libavcodec/vorbis.h
index 5ae20ac..98dd14f 100644
--- a/libavcodec/vorbis.h
+++ b/libavcodec/vorbis.h
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vorbis_data.c b/libavcodec/vorbis_data.c
index bafb77b..063a075 100644
--- a/libavcodec/vorbis_data.c
+++ b/libavcodec/vorbis_data.c
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2005 Denes Balatoni ( dbalatoni programozo hu )
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h
index a1e743e..a51aaec 100644
--- a/libavcodec/vorbis_enc_data.h
+++ b/libavcodec/vorbis_enc_data.h
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -492,13 +492,13 @@ static const struct {
     int dim;
     int subclass;
     int masterbook;
-    const int *nbooks;
+    const int nbooks[4];
 } floor_classes[] = {
-    { 3, 0, 0, (const int[]){  4             } },
-    { 4, 1, 0, (const int[]){  5,  6         } },
-    { 3, 1, 1, (const int[]){  7,  8         } },
-    { 4, 2, 2, (const int[]){ -1,  9, 10, 11 } },
-    { 3, 2, 3, (const int[]){ -1, 12, 13, 14 } },
+    { 3, 0, 0, {  4             } },
+    { 4, 1, 0, {  5,  6         } },
+    { 3, 1, 1, {  7,  8         } },
+    { 4, 2, 2, { -1,  9, 10, 11 } },
+    { 3, 2, 3, { -1, 12, 13, 14 } },
 };
 
 #endif /* AVCODEC_VORBIS_ENC_DATA_H */
diff --git a/libavcodec/vorbis_parser.c b/libavcodec/vorbis_parser.c
index c413135..1e2cab3 100644
--- a/libavcodec/vorbis_parser.c
+++ b/libavcodec/vorbis_parser.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -165,7 +165,7 @@ static int parse_setup_header(AVCodecContext *avctx, VorbisParseContext *s,
     skip_bits_long(&gb, got_framing_bit);
     for (i = mode_count - 1; i >= 0; i--) {
         skip_bits_long(&gb, 40);
-        s->mode_blocksize[i] = s->blocksize[get_bits1(&gb)];
+        s->mode_blocksize[i] = get_bits1(&gb);
     }
 
 bad_header:
@@ -196,13 +196,13 @@ int avpriv_vorbis_parse_extradata(AVCodecContext *avctx, VorbisParseContext *s)
         return ret;
 
     s->valid_extradata = 1;
-    s->previous_blocksize = s->mode_blocksize[0];
+    s->previous_blocksize = s->blocksize[s->mode_blocksize[0]];
 
     return 0;
 }
 
-int avpriv_vorbis_parse_frame(VorbisParseContext *s, const uint8_t *buf,
-                              int buf_size)
+int avpriv_vorbis_parse_frame_flags(VorbisParseContext *s, const uint8_t *buf,
+                                    int buf_size, int *flags)
 {
     int duration = 0;
 
@@ -211,6 +211,22 @@ int avpriv_vorbis_parse_frame(VorbisParseContext *s, const uint8_t *buf,
         int previous_blocksize = s->previous_blocksize;
 
         if (buf[0] & 1) {
+            /* If the user doesn't care about special packets, it's a bad one. */
+            if (!flags)
+                goto bad_packet;
+
+            /* Set the flag for which kind of special packet it is. */
+            if (buf[0] == 1)
+                *flags |= VORBIS_FLAG_HEADER;
+            else if (buf[0] == 3)
+                *flags |= VORBIS_FLAG_COMMENT;
+            else
+                goto bad_packet;
+
+            /* Special packets have no duration. */
+            return 0;
+
+bad_packet:
             av_log(s->avctx, AV_LOG_ERROR, "Invalid packet\n");
             return AVERROR_INVALIDDATA;
         }
@@ -222,11 +238,11 @@ int avpriv_vorbis_parse_frame(VorbisParseContext *s, const uint8_t *buf,
             av_log(s->avctx, AV_LOG_ERROR, "Invalid mode in packet\n");
             return AVERROR_INVALIDDATA;
         }
-        if (mode) {
+        if(s->mode_blocksize[mode]){
             int flag = !!(buf[0] & s->prev_mask);
             previous_blocksize = s->blocksize[flag];
         }
-        current_blocksize     = s->mode_blocksize[mode];
+        current_blocksize     = s->blocksize[s->mode_blocksize[mode]];
         duration              = (previous_blocksize + current_blocksize) >> 2;
         s->previous_blocksize = current_blocksize;
     }
@@ -234,10 +250,16 @@ int avpriv_vorbis_parse_frame(VorbisParseContext *s, const uint8_t *buf,
     return duration;
 }
 
+int avpriv_vorbis_parse_frame(VorbisParseContext *s, const uint8_t *buf,
+                              int buf_size)
+{
+    return avpriv_vorbis_parse_frame_flags(s, buf, buf_size, NULL);
+}
+
 void avpriv_vorbis_parse_reset(VorbisParseContext *s)
 {
     if (s->valid_extradata)
-        s->previous_blocksize = s->mode_blocksize[0];
+        s->previous_blocksize = s->blocksize[0];
 }
 
 #if CONFIG_VORBIS_PARSER
diff --git a/libavcodec/vorbis_parser.h b/libavcodec/vorbis_parser.h
index 480a918..590101b 100644
--- a/libavcodec/vorbis_parser.h
+++ b/libavcodec/vorbis_parser.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -50,6 +50,24 @@ typedef struct VorbisParseContext {
  */
 int avpriv_vorbis_parse_extradata(AVCodecContext *avctx, VorbisParseContext *s);
 
+#define VORBIS_FLAG_HEADER  0x00000001
+#define VORBIS_FLAG_COMMENT 0x00000002
+
+/**
+ * Get the duration for a Vorbis packet.
+ *
+ * avpriv_vorbis_parse_extradata() must have been successfully called prior to
+ * this in order for a correct duration to be returned. If @p flags is @c NULL,
+ * special frames are considered invalid.
+ *
+ * @param s        Vorbis parser context
+ * @param buf      buffer containing a Vorbis frame
+ * @param buf_size size of the buffer
+ * @param flags    flags for special frames
+ */
+int avpriv_vorbis_parse_frame_flags(VorbisParseContext *s, const uint8_t *buf,
+                                    int buf_size, int *flags);
+
 /**
  * Get the duration for a Vorbis packet.
  *
diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index d7fec98..87d1bbb 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -1,18 +1,22 @@
-/*
- * This file is part of Libav.
+/**
+ * @file
+ * Vorbis I decoder
+ * @author Denes Balatoni  ( dbalatoni programozo hu )
+ *
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +31,7 @@
 
 #define BITSTREAM_READER_LE
 #include "libavutil/float_dsp.h"
+#include "libavutil/avassert.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "fft.h"
@@ -146,7 +151,7 @@ typedef struct vorbis_context_s {
     uint8_t       mode_count;
     vorbis_mode  *modes;
     uint8_t       mode_number; // mode number for the current packet
-    uint8_t       previous_window;
+    int8_t       previous_window;
     float        *channel_residues;
     float        *saved;
 } vorbis_context;
@@ -189,37 +194,41 @@ static void vorbis_free(vorbis_context *vc)
     av_freep(&vc->channel_residues);
     av_freep(&vc->saved);
 
-    for (i = 0; i < vc->residue_count; i++)
-        av_free(vc->residues[i].classifs);
+    if (vc->residues)
+        for (i = 0; i < vc->residue_count; i++)
+            av_free(vc->residues[i].classifs);
     av_freep(&vc->residues);
     av_freep(&vc->modes);
 
     ff_mdct_end(&vc->mdct[0]);
     ff_mdct_end(&vc->mdct[1]);
 
-    for (i = 0; i < vc->codebook_count; ++i) {
-        av_free(vc->codebooks[i].codevectors);
-        ff_free_vlc(&vc->codebooks[i].vlc);
-    }
+    if (vc->codebooks)
+        for (i = 0; i < vc->codebook_count; ++i) {
+            av_free(vc->codebooks[i].codevectors);
+            ff_free_vlc(&vc->codebooks[i].vlc);
+        }
     av_freep(&vc->codebooks);
 
-    for (i = 0; i < vc->floor_count; ++i) {
-        if (vc->floors[i].floor_type == 0) {
-            av_free(vc->floors[i].data.t0.map[0]);
-            av_free(vc->floors[i].data.t0.map[1]);
-            av_free(vc->floors[i].data.t0.book_list);
-            av_free(vc->floors[i].data.t0.lsp);
-        } else {
-            av_free(vc->floors[i].data.t1.list);
+    if (vc->floors)
+        for (i = 0; i < vc->floor_count; ++i) {
+            if (vc->floors[i].floor_type == 0) {
+                av_free(vc->floors[i].data.t0.map[0]);
+                av_free(vc->floors[i].data.t0.map[1]);
+                av_free(vc->floors[i].data.t0.book_list);
+                av_free(vc->floors[i].data.t0.lsp);
+            } else {
+                av_free(vc->floors[i].data.t1.list);
+            }
         }
-    }
     av_freep(&vc->floors);
 
-    for (i = 0; i < vc->mapping_count; ++i) {
-        av_free(vc->mappings[i].magnitude);
-        av_free(vc->mappings[i].angle);
-        av_free(vc->mappings[i].mux);
-    }
+    if (vc->mappings)
+        for (i = 0; i < vc->mapping_count; ++i) {
+            av_free(vc->mappings[i].magnitude);
+            av_free(vc->mappings[i].angle);
+            av_free(vc->mappings[i].mux);
+        }
     av_freep(&vc->mappings);
 }
 
@@ -369,7 +378,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc)
             }
 
 // Weed out unused vlcs and build codevector vector
-            codebook_setup->codevectors = used_entries ? av_mallocz(used_entries *
+            codebook_setup->codevectors = used_entries ? av_mallocz_array(used_entries,
                                                                     codebook_setup->dimensions *
                                                                     sizeof(*codebook_setup->codevectors))
                                                        : NULL;
@@ -552,7 +561,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
             for (j = 0; j < floor_setup->data.t1.partitions; ++j)
                 floor_setup->data.t1.x_list_dim+=floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]];
 
-            floor_setup->data.t1.list = av_mallocz(floor_setup->data.t1.x_list_dim *
+            floor_setup->data.t1.list = av_mallocz_array(floor_setup->data.t1.x_list_dim,
                                                    sizeof(*floor_setup->data.t1.list));
             if (!floor_setup->data.t1.list)
                 return AVERROR(ENOMEM);
@@ -631,8 +640,8 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc)
             /* codebook dim is for padding if codebook dim doesn't *
              * divide order+1 then we need to read more data       */
             floor_setup->data.t0.lsp =
-                av_malloc((floor_setup->data.t0.order + 1 + max_codebook_dim)
-                          * sizeof(*floor_setup->data.t0.lsp));
+                av_malloc_array((floor_setup->data.t0.order + 1 + max_codebook_dim),
+                                sizeof(*floor_setup->data.t0.lsp));
             if (!floor_setup->data.t0.lsp)
                 return AVERROR(ENOMEM);
 
@@ -692,8 +701,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
         res_setup->partition_size = get_bits(gb, 24) + 1;
         /* Validations to prevent a buffer overflow later. */
         if (res_setup->begin>res_setup->end ||
-            res_setup->end > (res_setup->type == 2 ? vc->avctx->channels : 1) * vc->blocksize[1] / 2 ||
-            (res_setup->end-res_setup->begin) / res_setup->partition_size > V_MAX_PARTITIONS) {
+            (res_setup->end-res_setup->begin) / res_setup->partition_size > FFMIN(V_MAX_PARTITIONS, 65535)) {
             av_log(vc->avctx, AV_LOG_ERROR,
                    "partition out of bounds: type, begin, end, size, blocksize: %"PRIu16", %"PRIu32", %"PRIu32", %u, %"PRIu32"\n",
                    res_setup->type, res_setup->begin, res_setup->end,
@@ -706,7 +714,7 @@ static int vorbis_parse_setup_hdr_residues(vorbis_context *vc)
 
         res_setup->ptns_to_read =
             (res_setup->end - res_setup->begin) / res_setup->partition_size;
-        res_setup->classifs = av_malloc(res_setup->ptns_to_read *
+        res_setup->classifs = av_malloc_array(res_setup->ptns_to_read,
                                         vc->audio_channels *
                                         sizeof(*res_setup->classifs));
         if (!res_setup->classifs)
@@ -799,7 +807,7 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc)
         }
 
         if (mapping_setup->submaps>1) {
-            mapping_setup->mux = av_mallocz(vc->audio_channels *
+            mapping_setup->mux = av_mallocz_array(vc->audio_channels,
                                             sizeof(*mapping_setup->mux));
             if (!mapping_setup->mux)
                 return AVERROR(ENOMEM);
@@ -834,7 +842,7 @@ static int create_map(vorbis_context *vc, unsigned floor_number)
     for (blockflag = 0; blockflag < 2; ++blockflag) {
         n = vc->blocksize[blockflag] / 2;
         floors[floor_number].data.t0.map[blockflag] =
-            av_malloc((n + 1) * sizeof(int32_t)); // n + sentinel
+            av_malloc_array(n + 1, sizeof(int32_t)); // n + sentinel
         if (!floors[floor_number].data.t0.map[blockflag])
             return AVERROR(ENOMEM);
 
@@ -961,12 +969,12 @@ static int vorbis_parse_id_hdr(vorbis_context *vc)
     vc->bitrate_minimum = get_bits_long(gb, 32);
     bl0 = get_bits(gb, 4);
     bl1 = get_bits(gb, 4);
-    vc->blocksize[0] = (1 << bl0);
-    vc->blocksize[1] = (1 << bl1);
     if (bl0 > 13 || bl0 < 6 || bl1 > 13 || bl1 < 6 || bl1 < bl0) {
         av_log(vc->avctx, AV_LOG_ERROR, " Vorbis id header packet corrupt (illegal blocksize). \n");
         return AVERROR_INVALIDDATA;
     }
+    vc->blocksize[0] = (1 << bl0);
+    vc->blocksize[1] = (1 << bl1);
     vc->win[0] = ff_vorbis_vwin[bl0 - 6];
     vc->win[1] = ff_vorbis_vwin[bl1 - 6];
 
@@ -975,12 +983,12 @@ static int vorbis_parse_id_hdr(vorbis_context *vc)
         return AVERROR_INVALIDDATA;
     }
 
-    vc->channel_residues =  av_malloc((vc->blocksize[1]  / 2) * vc->audio_channels * sizeof(*vc->channel_residues));
-    vc->saved            =  av_mallocz((vc->blocksize[1] / 4) * vc->audio_channels * sizeof(*vc->saved));
+    vc->channel_residues =  av_malloc_array(vc->blocksize[1]  / 2, vc->audio_channels * sizeof(*vc->channel_residues));
+    vc->saved            =  av_mallocz_array(vc->blocksize[1] / 4, vc->audio_channels * sizeof(*vc->saved));
     if (!vc->channel_residues || !vc->saved)
         return AVERROR(ENOMEM);
 
-    vc->previous_window  = 0;
+    vc->previous_window  = -1;
 
     ff_mdct_init(&vc->mdct[0], bl0, 1, -1.0);
     ff_mdct_init(&vc->mdct[1], bl1, 1, -1.0);
@@ -1185,7 +1193,7 @@ static int vorbis_floor1_decode(vorbis_context *vc,
     uint16_t floor1_Y[258];
     uint16_t floor1_Y_final[258];
     int floor1_flag[258];
-    unsigned class, cdim, cbits, csub, cval, offset, i, j;
+    unsigned partition_class, cdim, cbits, csub, cval, offset, i, j;
     int book, adx, ady, dy, off, predicted, err;
 
 
@@ -1201,28 +1209,31 @@ static int vorbis_floor1_decode(vorbis_context *vc,
 
     offset = 2;
     for (i = 0; i < vf->partitions; ++i) {
-        class = vf->partition_class[i];
-        cdim   = vf->class_dimensions[class];
-        cbits  = vf->class_subclasses[class];
+        partition_class = vf->partition_class[i];
+        cdim   = vf->class_dimensions[partition_class];
+        cbits  = vf->class_subclasses[partition_class];
         csub = (1 << cbits) - 1;
         cval = 0;
 
         av_dlog(NULL, "Cbits %u\n", cbits);
 
         if (cbits) // this reads all subclasses for this partition's class
-            cval = get_vlc2(gb, vc->codebooks[vf->class_masterbook[class]].vlc.table,
-                            vc->codebooks[vf->class_masterbook[class]].nb_bits, 3);
+            cval = get_vlc2(gb, vc->codebooks[vf->class_masterbook[partition_class]].vlc.table,
+                            vc->codebooks[vf->class_masterbook[partition_class]].nb_bits, 3);
 
         for (j = 0; j < cdim; ++j) {
-            book = vf->subclass_books[class][cval & csub];
+            book = vf->subclass_books[partition_class][cval & csub];
 
             av_dlog(NULL, "book %d Cbits %u cval %u  bits:%d\n",
                     book, cbits, cval, get_bits_count(gb));
 
             cval = cval >> cbits;
             if (book > -1) {
-                floor1_Y[offset+j] = get_vlc2(gb, vc->codebooks[book].vlc.table,
-                vc->codebooks[book].nb_bits, 3);
+                int v = get_vlc2(gb, vc->codebooks[book].vlc.table,
+                                 vc->codebooks[book].nb_bits, 3);
+                if (v < 0)
+                    return AVERROR_INVALIDDATA;
+                floor1_Y[offset+j] = v;
             } else {
                 floor1_Y[offset+j] = 0;
             }
@@ -1316,6 +1327,11 @@ static av_always_inline int setup_classifs(vorbis_context *vc,
 
             av_dlog(NULL, "Classword: %u\n", temp);
 
+            if ((int)temp < 0)
+                return temp;
+
+            av_assert0(vr->classifications > 1); //needed for inverse[]
+
             if (temp <= 65536) {
                 for (i = partition_count + c_p_c - 1; i >= partition_count; i--) {
                     temp2 = (((uint64_t)temp) * inverse_class) >> 32;
@@ -1355,6 +1371,7 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
     unsigned pass, ch_used, i, j, k, l;
     unsigned max_output = (ch - 1) * vlen;
     int ptns_to_read = vr->ptns_to_read;
+    int libvorbis_bug = 0;
 
     if (vr_type == 2) {
         for (j = 1; j < ch; ++j)
@@ -1369,8 +1386,13 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
     }
 
     if (max_output > ch_left * vlen) {
-        av_log(vc->avctx, AV_LOG_ERROR, "Insufficient output buffer\n");
-        return AVERROR_INVALIDDATA;
+        if (max_output <= ch_left * vlen + vr->partition_size*ch_used/ch) {
+            ptns_to_read--;
+            libvorbis_bug = 1;
+        } else {
+            av_log(vc->avctx, AV_LOG_ERROR, "Insufficient output buffer\n");
+            return AVERROR_INVALIDDATA;
+        }
     }
 
     av_dlog(NULL, " residue type 0/1/2 decode begin, ch: %d  cpc %d  \n", ch, c_p_c);
@@ -1381,7 +1403,9 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
         voffset = vr->begin;
         for (partition_count = 0; partition_count < ptns_to_read;) {  // SPEC        error
             if (!pass) {
-                setup_classifs(vc, vr, do_not_decode, ch_used, partition_count);
+                int ret;
+                if ((ret = setup_classifs(vc, vr, do_not_decode, ch_used, partition_count)) < 0)
+                    return ret;
             }
             for (i = 0; (i < c_p_c) && (partition_count < ptns_to_read); ++i) {
                 for (j_times_ptns_to_read = 0, j = 0; j < ch_used; ++j) {
@@ -1477,6 +1501,14 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
                 voffset += vr->partition_size;
             }
         }
+        if (libvorbis_bug && !pass) {
+            for (j = 0; j < ch_used; ++j) {
+                if (!do_not_decode[j]) {
+                    get_vlc2(&vc->gb, vc->codebooks[vr->classbook].vlc.table,
+                                vc->codebooks[vr->classbook].nb_bits, 3);
+                }
+            }
+        }
     }
     return 0;
 }
@@ -1529,7 +1561,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc, float **floor_ptr)
 {
     GetBitContext *gb = &vc->gb;
     FFTContext *mdct;
-    unsigned previous_window = vc->previous_window;
+    int previous_window = vc->previous_window;
     unsigned mode_number, blockflag, blocksize;
     int i, j;
     uint8_t no_residue[255];
@@ -1562,9 +1594,11 @@ static int vorbis_parse_audio_packet(vorbis_context *vc, float **floor_ptr)
     blocksize = vc->blocksize[blockflag];
     vlen = blocksize / 2;
     if (blockflag) {
-        previous_window = get_bits(gb, 1);
-        skip_bits1(gb); // next_window
-    }
+        int code = get_bits(gb, 2);
+        if (previous_window < 0)
+            previous_window = code>>1;
+    } else if (previous_window < 0)
+        previous_window = 0;
 
     memset(ch_res_ptr,   0, sizeof(float) * vc->audio_channels * vlen); //FIXME can this be removed ?
     for (i = 0; i < vc->audio_channels; ++i)
@@ -1699,12 +1733,49 @@ static int vorbis_decode_frame(AVCodecContext *avctx, void *data,
 
     av_dlog(NULL, "packet length %d \n", buf_size);
 
+    if (*buf == 1 && buf_size > 7) {
+        init_get_bits(gb, buf+1, buf_size*8 - 8);
+        vorbis_free(vc);
+        if ((ret = vorbis_parse_id_hdr(vc))) {
+            av_log(avctx, AV_LOG_ERROR, "Id header corrupt.\n");
+            vorbis_free(vc);
+            return ret;
+        }
+
+        if (vc->audio_channels > 8)
+            avctx->channel_layout = 0;
+        else
+            avctx->channel_layout = ff_vorbis_channel_layouts[vc->audio_channels - 1];
+
+        avctx->channels    = vc->audio_channels;
+        avctx->sample_rate = vc->audio_samplerate;
+        return buf_size;
+    }
+
+    if (*buf == 3 && buf_size > 7) {
+        av_log(avctx, AV_LOG_DEBUG, "Ignoring comment header\n");
+        return buf_size;
+    }
+
+    if (*buf == 5 && buf_size > 7 && vc->channel_residues && !vc->modes) {
+        init_get_bits(gb, buf+1, buf_size*8 - 8);
+        if ((ret = vorbis_parse_setup_hdr(vc))) {
+            av_log(avctx, AV_LOG_ERROR, "Setup header corrupt.\n");
+            vorbis_free(vc);
+            return ret;
+        }
+        return buf_size;
+    }
+
+    if (!vc->channel_residues || !vc->modes) {
+        av_log(avctx, AV_LOG_ERROR, "Data packet before valid headers\n");
+        return AVERROR_INVALIDDATA;
+    }
+
     /* get output buffer */
     frame->nb_samples = vc->blocksize[1] / 2;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
 
     if (vc->audio_channels > 8) {
         for (i = 0; i < vc->audio_channels; i++)
@@ -1756,7 +1827,8 @@ static av_cold void vorbis_decode_flush(AVCodecContext *avctx)
         memset(vc->saved, 0, (vc->blocksize[1] / 4) * vc->audio_channels *
                              sizeof(*vc->saved));
     }
-    vc->previous_window = 0;
+    vc->previous_window = -1;
+    vc->first_frame = 0;
 }
 
 AVCodec ff_vorbis_decoder = {
diff --git a/libavcodec/vorbisdsp.c b/libavcodec/vorbisdsp.c
index c37e2c4..362a276 100644
--- a/libavcodec/vorbisdsp.c
+++ b/libavcodec/vorbisdsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vorbisdsp.h b/libavcodec/vorbisdsp.h
index ea41c40..7abec4e 100644
--- a/libavcodec/vorbisdsp.h
+++ b/libavcodec/vorbisdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
index 830e5d1..fa6bccc 100644
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c
@@ -1,20 +1,20 @@
 /*
  * copyright (c) 2006 Oded Shimon <ods15@ods15.dyndns.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -142,9 +142,9 @@ typedef struct {
 static inline int put_codeword(PutBitContext *pb, vorbis_enc_codebook *cb,
                                int entry)
 {
-    assert(entry >= 0);
-    assert(entry < cb->nentries);
-    assert(cb->lens[entry]);
+    av_assert2(entry >= 0);
+    av_assert2(entry < cb->nentries);
+    av_assert2(cb->lens[entry]);
     if (pb->size_in_bits - put_bits_count(pb) < cb->lens[entry])
         return AVERROR(EINVAL);
     put_bits(pb, cb->lens[entry], cb->codewords[entry]);
@@ -200,7 +200,7 @@ static int ready_codebook(vorbis_enc_codebook *cb)
 static int ready_residue(vorbis_enc_residue *rc, vorbis_enc_context *venc)
 {
     int i;
-    assert(rc->type == 2);
+    av_assert0(rc->type == 2);
     rc->maxes = av_mallocz(sizeof(float[2]) * rc->classifications);
     if (!rc->maxes)
         return AVERROR(ENOMEM);
@@ -880,8 +880,8 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc,
     int classes[MAX_CHANNELS][NUM_RESIDUE_PARTITIONS];
     int classwords = venc->codebooks[rc->classbook].ndimensions;
 
-    assert(rc->type == 2);
-    assert(real_ch == 2);
+    av_assert0(rc->type == 2);
+    av_assert0(real_ch == 2);
     for (p = 0; p < partitions; p++) {
         float max1 = 0.0, max2 = 0.0;
         int s = rc->begin + p * psize;
@@ -1015,7 +1015,6 @@ static int apply_window_and_mdct(vorbis_enc_context *venc,
     return 1;
 }
 
-
 static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                                const AVFrame *frame, int *got_packet_ptr)
 {
@@ -1031,10 +1030,8 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
         return 0;
     samples = 1 << (venc->log2_blocksize[0] - 1);
 
-    if ((ret = ff_alloc_packet(avpkt, 8192))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 8192)) < 0)
         return ret;
-    }
 
     init_put_bits(&pb, avpkt->data, avpkt->size);
 
@@ -1091,10 +1088,10 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
     avpkt->size = put_bits_count(&pb) >> 3;
 
     avpkt->duration = ff_samples_to_time_base(avctx, avctx->frame_size);
-    if (frame)
+    if (frame) {
         if (frame->pts != AV_NOPTS_VALUE)
             avpkt->pts = ff_samples_to_time_base(avctx, frame->pts);
-    else
+    } else
         avpkt->pts = venc->next_pts;
     if (avpkt->pts != AV_NOPTS_VALUE)
         venc->next_pts = avpkt->pts + avpkt->duration;
@@ -1169,7 +1166,7 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx)
     int ret;
 
     if (avctx->channels != 2) {
-        av_log(avctx, AV_LOG_ERROR, "Current Libav Vorbis encoder only supports 2 channels.\n");
+        av_log(avctx, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only supports 2 channels.\n");
         return -1;
     }
 
@@ -1180,7 +1177,7 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx)
     if (avctx->flags & CODEC_FLAG_QSCALE)
         venc->quality = avctx->global_quality / (float)FF_QP2LAMBDA;
     else
-        venc->quality = 3.0;
+        venc->quality = 8;
     venc->quality *= venc->quality;
 
     if ((ret = put_main_header(venc, (uint8_t**)&avctx->extradata)) < 0)
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index c215fbb..3f04d8f 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2003-2004 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -77,6 +77,10 @@ typedef struct Vp3Fragment {
 /* special internal mode */
 #define MODE_COPY             8
 
+static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb);
+static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb);
+
+
 /* There are 6 preset schemes, plus a free-form scheme */
 static const int ModeAlphabet[6][CODING_MODE_COUNT] = {
     /* scheme 1: Last motion vector dominates */
@@ -258,6 +262,20 @@ typedef struct Vp3DecodeContext {
  * VP3 specific functions
  ************************************************************************/
 
+static av_cold void free_tables(AVCodecContext *avctx)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+
+    av_freep(&s->superblock_coding);
+    av_freep(&s->all_fragments);
+    av_freep(&s->coded_fragment_list[0]);
+    av_freep(&s->dct_tokens_base);
+    av_freep(&s->superblock_fragments);
+    av_freep(&s->macroblock_coding);
+    av_freep(&s->motion_val[0]);
+    av_freep(&s->motion_val[1]);
+}
+
 static void vp3_decode_flush(AVCodecContext *avctx)
 {
     Vp3DecodeContext *s = avctx->priv_data;
@@ -275,16 +293,11 @@ static av_cold int vp3_decode_end(AVCodecContext *avctx)
     Vp3DecodeContext *s = avctx->priv_data;
     int i;
 
-    av_freep(&s->superblock_coding);
-    av_freep(&s->all_fragments);
-    av_freep(&s->coded_fragment_list[0]);
-    av_freep(&s->dct_tokens_base);
-    av_freep(&s->superblock_fragments);
-    av_freep(&s->macroblock_coding);
-    av_freep(&s->motion_val[0]);
-    av_freep(&s->motion_val[1]);
+    free_tables(avctx);
     av_freep(&s->edge_emu_buffer);
 
+    s->theora_tables = 0;
+
     /* release all frames */
     vp3_decode_flush(avctx);
     av_frame_free(&s->current_frame.f);
@@ -310,7 +323,7 @@ static av_cold int vp3_decode_end(AVCodecContext *avctx)
     return 0;
 }
 
-/*
+/**
  * This function sets up all of the various blocks mappings:
  * superblocks <-> fragments, macroblocks <-> fragments,
  * superblocks <-> macroblocks
@@ -401,7 +414,7 @@ static void init_loop_filter(Vp3DecodeContext *s)
     int value;
 
     filter_limit = s->filter_limit_values[s->qps[0]];
-    assert(filter_limit < 128);
+    av_assert0(filter_limit < 128U);
 
     /* set up the bounding values */
     memset(s->bounding_values_array, 0, 256 * sizeof(int));
@@ -1019,7 +1032,7 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
     if (blocks_ended > s->num_coded_frags[plane][coeff_index])
         av_log(s->avctx, AV_LOG_ERROR, "More blocks ended than coded!\n");
 
-    // decrement the number of blocks that have higher coeffecients for each
+    // decrement the number of blocks that have higher coefficients for each
     // EOB run at this level
     if (blocks_ended)
         for (i = coeff_index + 1; i < 64; i++)
@@ -1598,20 +1611,14 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                         /* invert DCT and place (or add) in final output */
 
                         if (s->all_fragments[i].coding_method == MODE_INTRA) {
-                            int index;
-                            index = vp3_dequant(s, s->all_fragments + i,
-                                                plane, 0, block);
-                            if (index > 63)
-                                continue;
+                            vp3_dequant(s, s->all_fragments + i,
+                                        plane, 0, block);
                             s->vp3dsp.idct_put(output_plane + first_pixel,
                                                stride,
                                                block);
                         } else {
-                            int index = vp3_dequant(s, s->all_fragments + i,
-                                                    plane, 1, block);
-                            if (index > 63)
-                                continue;
-                            if (index > 0) {
+                            if (vp3_dequant(s, s->all_fragments + i,
+                                            plane, 1, block)) {
                                 s->vp3dsp.idct_add(output_plane + first_pixel,
                                                    stride,
                                                    block);
@@ -1655,22 +1662,24 @@ static av_cold int allocate_tables(AVCodecContext *avctx)
     Vp3DecodeContext *s = avctx->priv_data;
     int y_fragment_count, c_fragment_count;
 
+    free_tables(avctx);
+
     y_fragment_count = s->fragment_width[0] * s->fragment_height[0];
     c_fragment_count = s->fragment_width[1] * s->fragment_height[1];
 
-    s->superblock_coding = av_malloc(s->superblock_count);
-    s->all_fragments     = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
+    s->superblock_coding = av_mallocz(s->superblock_count);
+    s->all_fragments     = av_mallocz_array(s->fragment_count, sizeof(Vp3Fragment));
 
-    s->coded_fragment_list[0] = av_malloc(s->fragment_count * sizeof(int));
+    s->coded_fragment_list[0] = av_mallocz_array(s->fragment_count, sizeof(int));
 
-    s->dct_tokens_base = av_malloc(64 * s->fragment_count *
-                                   sizeof(*s->dct_tokens_base));
-    s->motion_val[0] = av_malloc(y_fragment_count * sizeof(*s->motion_val[0]));
-    s->motion_val[1] = av_malloc(c_fragment_count * sizeof(*s->motion_val[1]));
+    s->dct_tokens_base = av_mallocz_array(s->fragment_count,
+                                          64 * sizeof(*s->dct_tokens_base));
+    s->motion_val[0] = av_mallocz_array(y_fragment_count, sizeof(*s->motion_val[0]));
+    s->motion_val[1] = av_mallocz_array(c_fragment_count, sizeof(*s->motion_val[1]));
 
     /* work out the block mapping tables */
-    s->superblock_fragments = av_malloc(s->superblock_count * 16 * sizeof(int));
-    s->macroblock_coding    = av_malloc(s->macroblock_count + 1);
+    s->superblock_fragments = av_mallocz_array(s->superblock_count, 16 * sizeof(int));
+    s->macroblock_coding    = av_mallocz(s->macroblock_count + 1);
 
     if (!s->superblock_coding    || !s->all_fragments          ||
         !s->dct_tokens_base      || !s->coded_fragment_list[0] ||
@@ -1723,7 +1732,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     s->avctx  = avctx;
     s->width  = FFALIGN(avctx->width, 16);
     s->height = FFALIGN(avctx->height, 16);
-    if (avctx->pix_fmt == AV_PIX_FMT_NONE)
+    if (avctx->codec_id != AV_CODEC_ID_THEORA)
         avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
     ff_hpeldsp_init(&s->hdsp, avctx->flags | CODEC_FLAG_BITEXACT);
@@ -1731,7 +1740,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
 
     for (i = 0; i < 64; i++) {
-#define TRANSPOSE(x) (x >> 3) | ((x & 7) << 3)
+#define TRANSPOSE(x) (((x) >> 3) | (((x) & 7) << 3))
         s->idct_permutation[i] = TRANSPOSE(i);
         s->idct_scantable[i]   = TRANSPOSE(ff_zigzag_direct[i]);
 #undef TRANSPOSE
@@ -1742,8 +1751,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     for (i = 0; i < 3; i++)
         s->qps[i] = -1;
 
-    av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift,
-                                     &s->chroma_y_shift);
+    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
 
     s->y_superblock_width  = (s->width  + 31) / 32;
     s->y_superblock_height = (s->height + 31) / 32;
@@ -1989,13 +1997,44 @@ static int vp3_decode_frame(AVCodecContext *avctx,
 
     init_get_bits(&gb, buf, buf_size * 8);
 
+#if CONFIG_THEORA_DECODER
     if (s->theora && get_bits1(&gb)) {
+        int type = get_bits(&gb, 7);
+        skip_bits_long(&gb, 6*8); /* "theora" */
+
+        if (s->avctx->active_thread_type&FF_THREAD_FRAME) {
+            av_log(avctx, AV_LOG_ERROR, "midstream reconfiguration with multithreading is unsupported, try -threads 1\n");
+            return AVERROR_PATCHWELCOME;
+        }
+        if (type == 0) {
+            vp3_decode_end(avctx);
+            ret = theora_decode_header(avctx, &gb);
+
+            if (ret < 0) {
+                vp3_decode_end(avctx);
+            } else
+                ret = vp3_decode_init(avctx);
+            return ret;
+        } else if (type == 2) {
+            ret = theora_decode_tables(avctx, &gb);
+            if (ret < 0) {
+                vp3_decode_end(avctx);
+            } else
+                ret = vp3_decode_init(avctx);
+            return ret;
+        }
+
         av_log(avctx, AV_LOG_ERROR,
                "Header packet passed to frame decoder, skipping\n");
         return -1;
     }
+#endif
 
     s->keyframe = !get_bits1(&gb);
+    if (!s->all_fragments) {
+        av_log(avctx, AV_LOG_ERROR, "Data packet without prior valid headers\n");
+        return -1;
+    }
     if (!s->theora)
         skip_bits(&gb, 1);
     for (i = 0; i < 3; i++)
@@ -2030,10 +2069,9 @@ static int vp3_decode_frame(AVCodecContext *avctx,
 
     s->current_frame.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
                                                 : AV_PICTURE_TYPE_P;
-    if (ff_thread_get_buffer(avctx, &s->current_frame, AV_GET_BUFFER_FLAG_REF) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    s->current_frame.f->key_frame = s->keyframe;
+    if (ff_thread_get_buffer(avctx, &s->current_frame, AV_GET_BUFFER_FLAG_REF) < 0)
         goto error;
-    }
 
     if (!s->edge_emu_buffer)
         s->edge_emu_buffer = av_malloc(9 * FFABS(s->current_frame.f->linesize[0]));
@@ -2062,10 +2100,8 @@ static int vp3_decode_frame(AVCodecContext *avctx,
 
             s->golden_frame.f->pict_type = AV_PICTURE_TYPE_I;
             if (ff_thread_get_buffer(avctx, &s->golden_frame,
-                                     AV_GET_BUFFER_FLAG_REF) < 0) {
-                av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                                     AV_GET_BUFFER_FLAG_REF) < 0)
                 goto error;
-            }
             ff_thread_release_buffer(avctx, &s->last_frame);
             if ((ret = ff_thread_ref_frame(&s->last_frame,
                                            &s->golden_frame)) < 0)
@@ -2256,6 +2292,10 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
     if (s->theora >= 0x030200) {
         skip_bits(gb, 5); /* keyframe frequency force */
         avctx->pix_fmt = theora_pix_fmts[get_bits(gb, 2)];
+        if (avctx->pix_fmt == AV_PIX_FMT_NONE) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid pixel format\n");
+            return AVERROR_INVALIDDATA;
+        }
         skip_bits(gb, 3); /* reserved */
     }
 
@@ -2401,6 +2441,8 @@ static av_cold int theora_decode_init(AVCodecContext *avctx)
     int header_len[3];
     int i;
 
+    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+
     s->theora = 1;
 
     if (!avctx->extradata_size) {
@@ -2431,7 +2473,8 @@ static av_cold int theora_decode_init(AVCodecContext *avctx)
 
         switch (ptype) {
         case 0x80:
-            theora_decode_header(avctx, &gb);
+            if (theora_decode_header(avctx, &gb) < 0)
+                return -1;
             break;
         case 0x81:
 // FIXME: is this needed? it breaks sometimes
diff --git a/libavcodec/vp3_parser.c b/libavcodec/vp3_parser.c
index e8fdcca..7ee046c 100644
--- a/libavcodec/vp3_parser.c
+++ b/libavcodec/vp3_parser.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2008 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp3data.h b/libavcodec/vp3data.h
index da325c0..bffc5bc 100644
--- a/libavcodec/vp3data.h
+++ b/libavcodec/vp3data.h
@@ -1,20 +1,20 @@
 /*
  * copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index af687ec..6cf7e12 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2004 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,7 +25,6 @@
  */
 
 #include "libavutil/attributes.h"
-#include "libavutil/intreadwrite.h"
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
 
diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h
index 3099a7e..b95adae 100644
--- a/libavcodec/vp3dsp.h
+++ b/libavcodec/vp3dsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp5.c b/libavcodec/vp5.c
index b609282..1923d63 100644
--- a/libavcodec/vp5.c
+++ b/libavcodec/vp5.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,8 +35,7 @@
 #include "vp5data.h"
 
 
-static int vp5_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
-                            int *golden_frame)
+static int vp5_parse_header(VP56Context *s, const uint8_t *buf, int buf_size)
 {
     VP56RangeCoder *c = &s->c;
     int rows, cols;
@@ -86,7 +85,7 @@ static void vp5_parse_vector_adjustment(VP56Context *s, VP56mv *vect)
 
     for (comp=0; comp<2; comp++) {
         int delta = 0;
-        if (vp56_rac_get_prob(c, model->vector_dct[comp])) {
+        if (vp56_rac_get_prob_branchy(c, model->vector_dct[comp])) {
             int sign = vp56_rac_get_prob(c, model->vector_sig[comp]);
             di  = vp56_rac_get_prob(c, model->vector_pdi[comp][0]);
             di |= vp56_rac_get_prob(c, model->vector_pdi[comp][1]) << 1;
@@ -109,19 +108,19 @@ static void vp5_parse_vector_models(VP56Context *s)
     int comp, node;
 
     for (comp=0; comp<2; comp++) {
-        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][0]))
+        if (vp56_rac_get_prob_branchy(c, vp5_vmc_pct[comp][0]))
             model->vector_dct[comp] = vp56_rac_gets_nn(c, 7);
-        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][1]))
+        if (vp56_rac_get_prob_branchy(c, vp5_vmc_pct[comp][1]))
             model->vector_sig[comp] = vp56_rac_gets_nn(c, 7);
-        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][2]))
+        if (vp56_rac_get_prob_branchy(c, vp5_vmc_pct[comp][2]))
             model->vector_pdi[comp][0] = vp56_rac_gets_nn(c, 7);
-        if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][3]))
+        if (vp56_rac_get_prob_branchy(c, vp5_vmc_pct[comp][3]))
             model->vector_pdi[comp][1] = vp56_rac_gets_nn(c, 7);
     }
 
     for (comp=0; comp<2; comp++)
         for (node=0; node<7; node++)
-            if (vp56_rac_get_prob(c, vp5_vmc_pct[comp][4 + node]))
+            if (vp56_rac_get_prob_branchy(c, vp5_vmc_pct[comp][4 + node]))
                 model->vector_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
 }
 
@@ -138,7 +137,7 @@ static int vp5_parse_coeff_models(VP56Context *s)
 
     for (pt=0; pt<2; pt++)
         for (node=0; node<11; node++)
-            if (vp56_rac_get_prob(c, vp5_dccv_pct[pt][node])) {
+            if (vp56_rac_get_prob_branchy(c, vp5_dccv_pct[pt][node])) {
                 def_prob[node] = vp56_rac_gets_nn(c, 7);
                 model->coeff_dccv[pt][node] = def_prob[node];
             } else if (s->frames[VP56_FRAME_CURRENT]->key_frame) {
@@ -149,7 +148,7 @@ static int vp5_parse_coeff_models(VP56Context *s)
         for (pt=0; pt<2; pt++)
             for (cg=0; cg<6; cg++)
                 for (node=0; node<11; node++)
-                    if (vp56_rac_get_prob(c, vp5_ract_pct[ct][pt][cg][node])) {
+                    if (vp56_rac_get_prob_branchy(c, vp5_ract_pct[ct][pt][cg][node])) {
                         def_prob[node] = vp56_rac_gets_nn(c, 7);
                         model->coeff_ract[pt][ct][cg][node] = def_prob[node];
                     } else if (s->frames[VP56_FRAME_CURRENT]->key_frame) {
@@ -194,9 +193,9 @@ static void vp5_parse_coeff(VP56Context *s)
 
         coeff_idx = 0;
         for (;;) {
-            if (vp56_rac_get_prob(c, model2[0])) {
-                if (vp56_rac_get_prob(c, model2[2])) {
-                    if (vp56_rac_get_prob(c, model2[3])) {
+            if (vp56_rac_get_prob_branchy(c, model2[0])) {
+                if (vp56_rac_get_prob_branchy(c, model2[2])) {
+                    if (vp56_rac_get_prob_branchy(c, model2[3])) {
                         s->coeff_ctx[ff_vp56_b6to4[b]][coeff_idx] = 4;
                         idx = vp56_rac_get_tree(c, ff_vp56_pc_tree, model1);
                         sign = vp56_rac_get(c);
@@ -204,7 +203,7 @@ static void vp5_parse_coeff(VP56Context *s)
                         for (i=ff_vp56_coeff_bit_length[idx]; i>=0; i--)
                             coeff += vp56_rac_get_prob(c, ff_vp56_coeff_parse_table[idx][i]) << i;
                     } else {
-                        if (vp56_rac_get_prob(c, model2[4])) {
+                        if (vp56_rac_get_prob_branchy(c, model2[4])) {
                             coeff = 3 + vp56_rac_get_prob(c, model1[5]);
                             s->coeff_ctx[ff_vp56_b6to4[b]][coeff_idx] = 3;
                         } else {
@@ -225,7 +224,7 @@ static void vp5_parse_coeff(VP56Context *s)
                     coeff *= s->dequant_ac;
                 s->block_coeff[b][permute[coeff_idx]] = coeff;
             } else {
-                if (ct && !vp56_rac_get_prob(c, model2[1]))
+                if (ct && !vp56_rac_get_prob_branchy(c, model2[1]))
                     break;
                 ct = 0;
                 s->coeff_ctx[ff_vp56_b6to4[b]][coeff_idx] = 0;
diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index b9d3c87..ba39b56 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -83,16 +83,16 @@ static void vp56_parse_mb_type_models(VP56Context *s)
     int i, ctx, type;
 
     for (ctx=0; ctx<3; ctx++) {
-        if (vp56_rac_get_prob(c, 174)) {
+        if (vp56_rac_get_prob_branchy(c, 174)) {
             int idx = vp56_rac_gets(c, 4);
             memcpy(model->mb_types_stats[ctx],
                    ff_vp56_pre_def_mb_type_stats[idx][ctx],
                    sizeof(model->mb_types_stats[ctx]));
         }
-        if (vp56_rac_get_prob(c, 254)) {
+        if (vp56_rac_get_prob_branchy(c, 254)) {
             for (type=0; type<10; type++) {
                 for(i=0; i<2; i++) {
-                    if (vp56_rac_get_prob(c, 205)) {
+                    if (vp56_rac_get_prob_branchy(c, 205)) {
                         int delta, sign = vp56_rac_get(c);
 
                         delta = vp56_rac_get_tree(c, ff_vp56_pmbtm_tree,
@@ -153,7 +153,7 @@ static VP56mb vp56_parse_mb_type(VP56Context *s,
     uint8_t *mb_type_model = s->modelp->mb_type[ctx][prev_type];
     VP56RangeCoder *c = &s->c;
 
-    if (vp56_rac_get_prob(c, mb_type_model[0]))
+    if (vp56_rac_get_prob_branchy(c, mb_type_model[0]))
         return prev_type;
     else
         return vp56_rac_get_tree(c, ff_vp56_pmbt_tree, mb_type_model);
@@ -340,11 +340,11 @@ static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src,
     if (x<0 || x+12>=s->plane_width[plane] ||
         y<0 || y+12>=s->plane_height[plane]) {
         s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
-                            src + s->block_offset[b] + (dy-2)*stride + (dx-2),
-                            stride, stride,
-                            12, 12, x, y,
-                            s->plane_width[plane],
-                            s->plane_height[plane]);
+                                 src + s->block_offset[b] + (dy-2)*stride + (dx-2),
+                                 stride, stride,
+                                 12, 12, x, y,
+                                 s->plane_width[plane],
+                                 s->plane_height[plane]);
         src_block = s->edge_emu_buffer;
         src_offset = 2 + 2*stride;
     } else if (deblock_filtering) {
@@ -453,9 +453,9 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha)
     }
 }
 
-static int vp56_size_changed(AVCodecContext *avctx)
+static int vp56_size_changed(VP56Context *s)
 {
-    VP56Context *s = avctx->priv_data;
+    AVCodecContext *avctx = s->avctx;
     int stride = s->frames[VP56_FRAME_CURRENT]->linesize[0];
     int i;
 
@@ -476,19 +476,26 @@ static int vp56_size_changed(AVCodecContext *avctx)
         return -1;
     }
 
-    s->above_blocks = av_realloc(s->above_blocks,
-                                 (4*s->mb_width+6) * sizeof(*s->above_blocks));
-    s->macroblocks = av_realloc(s->macroblocks,
-                                s->mb_width*s->mb_height*sizeof(*s->macroblocks));
+    av_reallocp_array(&s->above_blocks, 4*s->mb_width+6,
+                      sizeof(*s->above_blocks));
+    av_reallocp_array(&s->macroblocks, s->mb_width*s->mb_height,
+                      sizeof(*s->macroblocks));
     av_free(s->edge_emu_buffer_alloc);
     s->edge_emu_buffer_alloc = av_malloc(16*stride);
     s->edge_emu_buffer = s->edge_emu_buffer_alloc;
+    if (!s->above_blocks || !s->macroblocks || !s->edge_emu_buffer_alloc)
+        return AVERROR(ENOMEM);
     if (s->flip < 0)
         s->edge_emu_buffer += 15 * stride;
 
+    if (s->alpha_context)
+        return vp56_size_changed(s->alpha_context);
+
     return 0;
 }
 
+static int ff_vp56_decode_mbs(AVCodecContext *avctx, void *, int, int);
+
 int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                          AVPacket *avpkt)
 {
@@ -496,8 +503,9 @@ int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     VP56Context *s = avctx->priv_data;
     AVFrame *const p = s->frames[VP56_FRAME_CURRENT];
     int remaining_buf_size = avpkt->size;
-    int is_alpha, av_uninit(alpha_offset);
-    int res;
+    int av_uninit(alpha_offset);
+    int i, res;
+    int ret;
 
     if (s->has_alpha) {
         if (remaining_buf_size < 3)
@@ -508,155 +516,183 @@ int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
             return -1;
     }
 
-    for (is_alpha=0; is_alpha < 1+s->has_alpha; is_alpha++) {
-        int mb_row, mb_col, mb_row_flip, mb_offset = 0;
-        int block, y, uv;
-        ptrdiff_t stride_y, stride_uv;
-        int golden_frame = 0;
+    res = s->parse_header(s, buf, remaining_buf_size);
+    if (res < 0)
+        return res;
 
-        s->modelp = &s->models[is_alpha];
+    if (res == VP56_SIZE_CHANGE) {
+        for (i = 0; i < 4; i++) {
+            av_frame_unref(s->frames[i]);
+            if (s->alpha_context)
+                av_frame_unref(s->alpha_context->frames[i]);
+        }
+    }
 
-        res = s->parse_header(s, buf, remaining_buf_size, &golden_frame);
-        if (res < 0) {
-            int i;
-            for (i = 0; i < 4; i++)
-                av_frame_unref(s->frames[i]);
-            return res;
+    if (ff_get_buffer(avctx, p, AV_GET_BUFFER_FLAG_REF) < 0)
+        return -1;
+
+    if (avctx->pix_fmt == AV_PIX_FMT_YUVA420P) {
+        av_frame_unref(s->alpha_context->frames[VP56_FRAME_CURRENT]);
+        if ((ret = av_frame_ref(s->alpha_context->frames[VP56_FRAME_CURRENT], p)) < 0) {
+            av_frame_unref(p);
+            return ret;
         }
+    }
 
-        if (res == VP56_SIZE_CHANGE) {
-            int i;
-            for (i = 0; i < 4; i++)
-                av_frame_unref(s->frames[i]);
-            if (is_alpha) {
-                ff_set_dimensions(avctx, 0, 0);
-                return -1;
-            }
+    if (res == VP56_SIZE_CHANGE) {
+        if (vp56_size_changed(s)) {
+            av_frame_unref(p);
+            return -1;
         }
+    }
 
-        if (!is_alpha) {
-            if (ff_get_buffer(avctx, p, AV_GET_BUFFER_FLAG_REF) < 0) {
-                av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-                return -1;
+    if (avctx->pix_fmt == AV_PIX_FMT_YUVA420P) {
+        int bak_w = avctx->width;
+        int bak_h = avctx->height;
+        int bak_cw = avctx->coded_width;
+        int bak_ch = avctx->coded_height;
+        buf += alpha_offset;
+        remaining_buf_size -= alpha_offset;
+
+        res = s->alpha_context->parse_header(s->alpha_context, buf, remaining_buf_size);
+        if (res != 0) {
+            if(res==VP56_SIZE_CHANGE) {
+                av_log(avctx, AV_LOG_ERROR, "Alpha reconfiguration\n");
+                avctx->width  = bak_w;
+                avctx->height = bak_h;
+                avctx->coded_width  = bak_cw;
+                avctx->coded_height = bak_ch;
             }
-
-            if (res == VP56_SIZE_CHANGE)
-                if (vp56_size_changed(avctx)) {
-                    av_frame_unref(p);
-                    return -1;
-                }
+            av_frame_unref(p);
+            return -1;
         }
+    }
 
-        if (p->key_frame) {
-            p->pict_type = AV_PICTURE_TYPE_I;
-            s->default_models_init(s);
-            for (block=0; block<s->mb_height*s->mb_width; block++)
-                s->macroblocks[block].type = VP56_MB_INTRA;
-        } else {
-            p->pict_type = AV_PICTURE_TYPE_P;
-            vp56_parse_mb_type_models(s);
-            s->parse_vector_models(s);
-            s->mb_type = VP56_MB_INTER_NOVEC_PF;
-        }
+    avctx->execute2(avctx, ff_vp56_decode_mbs, 0, 0, (avctx->pix_fmt == AV_PIX_FMT_YUVA420P) + 1);
 
-        if (s->parse_coeff_models(s))
-            goto next;
+    if ((res = av_frame_ref(data, p)) < 0)
+        return res;
+    *got_frame = 1;
 
-        memset(s->prev_dc, 0, sizeof(s->prev_dc));
-        s->prev_dc[1][VP56_FRAME_CURRENT] = 128;
-        s->prev_dc[2][VP56_FRAME_CURRENT] = 128;
+    return avpkt->size;
+}
 
-        for (block=0; block < 4*s->mb_width+6; block++) {
-            s->above_blocks[block].ref_frame = VP56_FRAME_NONE;
-            s->above_blocks[block].dc_coeff = 0;
-            s->above_blocks[block].not_null_dc = 0;
-        }
-        s->above_blocks[2*s->mb_width + 2].ref_frame = VP56_FRAME_CURRENT;
-        s->above_blocks[3*s->mb_width + 4].ref_frame = VP56_FRAME_CURRENT;
+static int ff_vp56_decode_mbs(AVCodecContext *avctx, void *data,
+                              int jobnr, int threadnr)
+{
+    VP56Context *s0 = avctx->priv_data;
+    int is_alpha = (jobnr == 1);
+    VP56Context *s = is_alpha ? s0->alpha_context : s0;
+    AVFrame *const p = s->frames[VP56_FRAME_CURRENT];
+    int mb_row, mb_col, mb_row_flip, mb_offset = 0;
+    int block, y, uv;
+    ptrdiff_t stride_y, stride_uv;
+    int res;
+
+    if (p->key_frame) {
+        p->pict_type = AV_PICTURE_TYPE_I;
+        s->default_models_init(s);
+        for (block=0; block<s->mb_height*s->mb_width; block++)
+            s->macroblocks[block].type = VP56_MB_INTRA;
+    } else {
+        p->pict_type = AV_PICTURE_TYPE_P;
+        vp56_parse_mb_type_models(s);
+        s->parse_vector_models(s);
+        s->mb_type = VP56_MB_INTER_NOVEC_PF;
+    }
 
-        stride_y  = p->linesize[0];
-        stride_uv = p->linesize[1];
+    if (s->parse_coeff_models(s))
+        goto next;
 
+    memset(s->prev_dc, 0, sizeof(s->prev_dc));
+    s->prev_dc[1][VP56_FRAME_CURRENT] = 128;
+    s->prev_dc[2][VP56_FRAME_CURRENT] = 128;
+
+    for (block=0; block < 4*s->mb_width+6; block++) {
+        s->above_blocks[block].ref_frame = VP56_FRAME_NONE;
+        s->above_blocks[block].dc_coeff = 0;
+        s->above_blocks[block].not_null_dc = 0;
+    }
+    s->above_blocks[2*s->mb_width + 2].ref_frame = VP56_FRAME_CURRENT;
+    s->above_blocks[3*s->mb_width + 4].ref_frame = VP56_FRAME_CURRENT;
+
+    stride_y  = p->linesize[0];
+    stride_uv = p->linesize[1];
+
+    if (s->flip < 0)
+        mb_offset = 7;
+
+    /* main macroblocks loop */
+    for (mb_row=0; mb_row<s->mb_height; mb_row++) {
         if (s->flip < 0)
-            mb_offset = 7;
-
-        /* main macroblocks loop */
-        for (mb_row=0; mb_row<s->mb_height; mb_row++) {
-            if (s->flip < 0)
-                mb_row_flip = s->mb_height - mb_row - 1;
-            else
-                mb_row_flip = mb_row;
-
-            for (block=0; block<4; block++) {
-                s->left_block[block].ref_frame = VP56_FRAME_NONE;
-                s->left_block[block].dc_coeff = 0;
-                s->left_block[block].not_null_dc = 0;
-            }
-            memset(s->coeff_ctx, 0, sizeof(s->coeff_ctx));
-            memset(s->coeff_ctx_last, 24, sizeof(s->coeff_ctx_last));
-
-            s->above_block_idx[0] = 1;
-            s->above_block_idx[1] = 2;
-            s->above_block_idx[2] = 1;
-            s->above_block_idx[3] = 2;
-            s->above_block_idx[4] = 2*s->mb_width + 2 + 1;
-            s->above_block_idx[5] = 3*s->mb_width + 4 + 1;
-
-            s->block_offset[s->frbi] = (mb_row_flip*16 + mb_offset) * stride_y;
-            s->block_offset[s->srbi] = s->block_offset[s->frbi] + 8*stride_y;
-            s->block_offset[1] = s->block_offset[0] + 8;
-            s->block_offset[3] = s->block_offset[2] + 8;
-            s->block_offset[4] = (mb_row_flip*8 + mb_offset) * stride_uv;
-            s->block_offset[5] = s->block_offset[4];
-
-            for (mb_col=0; mb_col<s->mb_width; mb_col++) {
-                vp56_decode_mb(s, mb_row, mb_col, is_alpha);
-
-                for (y=0; y<4; y++) {
-                    s->above_block_idx[y] += 2;
-                    s->block_offset[y] += 16;
-                }
+            mb_row_flip = s->mb_height - mb_row - 1;
+        else
+            mb_row_flip = mb_row;
 
-                for (uv=4; uv<6; uv++) {
-                    s->above_block_idx[uv] += 1;
-                    s->block_offset[uv] += 8;
-                }
-            }
+        for (block=0; block<4; block++) {
+            s->left_block[block].ref_frame = VP56_FRAME_NONE;
+            s->left_block[block].dc_coeff = 0;
+            s->left_block[block].not_null_dc = 0;
         }
+        memset(s->coeff_ctx, 0, sizeof(s->coeff_ctx));
+        memset(s->coeff_ctx_last, 24, sizeof(s->coeff_ctx_last));
+
+        s->above_block_idx[0] = 1;
+        s->above_block_idx[1] = 2;
+        s->above_block_idx[2] = 1;
+        s->above_block_idx[3] = 2;
+        s->above_block_idx[4] = 2*s->mb_width + 2 + 1;
+        s->above_block_idx[5] = 3*s->mb_width + 4 + 1;
+
+        s->block_offset[s->frbi] = (mb_row_flip*16 + mb_offset) * stride_y;
+        s->block_offset[s->srbi] = s->block_offset[s->frbi] + 8*stride_y;
+        s->block_offset[1] = s->block_offset[0] + 8;
+        s->block_offset[3] = s->block_offset[2] + 8;
+        s->block_offset[4] = (mb_row_flip*8 + mb_offset) * stride_uv;
+        s->block_offset[5] = s->block_offset[4];
+
+        for (mb_col=0; mb_col<s->mb_width; mb_col++) {
+            vp56_decode_mb(s, mb_row, mb_col, is_alpha);
+
+            for (y=0; y<4; y++) {
+                s->above_block_idx[y] += 2;
+                s->block_offset[y] += 16;
+            }
 
-    next:
-        if (p->key_frame || golden_frame) {
-            av_frame_unref(s->frames[VP56_FRAME_GOLDEN]);
-            if ((res = av_frame_ref(s->frames[VP56_FRAME_GOLDEN], p)) < 0)
-                return res;
+            for (uv=4; uv<6; uv++) {
+                s->above_block_idx[uv] += 1;
+                s->block_offset[uv] += 8;
+            }
         }
+    }
 
-        if (s->has_alpha) {
-            FFSWAP(AVFrame *, s->frames[VP56_FRAME_GOLDEN],
-                              s->frames[VP56_FRAME_GOLDEN2]);
-            buf += alpha_offset;
-            remaining_buf_size -= alpha_offset;
-        }
+next:
+    if (p->key_frame || s->golden_frame) {
+        av_frame_unref(s->frames[VP56_FRAME_GOLDEN]);
+        if ((res = av_frame_ref(s->frames[VP56_FRAME_GOLDEN], p)) < 0)
+            return res;
     }
 
     av_frame_unref(s->frames[VP56_FRAME_PREVIOUS]);
     FFSWAP(AVFrame *, s->frames[VP56_FRAME_CURRENT],
                       s->frames[VP56_FRAME_PREVIOUS]);
-
-    if ((res = av_frame_ref(data, p)) < 0)
-        return res;
-    *got_frame = 1;
-
-    return avpkt->size;
+    return 0;
 }
 
 av_cold int ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
 {
     VP56Context *s = avctx->priv_data;
+    return ff_vp56_init_context(avctx, s, flip, has_alpha);
+}
+
+av_cold int ff_vp56_init_context(AVCodecContext *avctx, VP56Context *s,
+                                  int flip, int has_alpha)
+{
     int i;
 
     s->avctx = avctx;
     avctx->pix_fmt = has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
+    if (avctx->skip_alpha) avctx->pix_fmt = AV_PIX_FMT_YUV420P;
 
     ff_h264chroma_init(&s->h264chroma, 8);
     ff_hpeldsp_init(&s->hdsp, avctx->flags);
@@ -682,10 +718,14 @@ av_cold int ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
     s->macroblocks = NULL;
     s->quantizer = -1;
     s->deblock_filtering = 1;
+    s->golden_frame = 0;
 
     s->filter = NULL;
 
     s->has_alpha = has_alpha;
+
+    s->modelp = &s->model;
+
     if (flip) {
         s->flip = -1;
         s->frbi = 2;
@@ -702,6 +742,11 @@ av_cold int ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
 av_cold int ff_vp56_free(AVCodecContext *avctx)
 {
     VP56Context *s = avctx->priv_data;
+    return ff_vp56_free_context(s);
+}
+
+av_cold int ff_vp56_free_context(VP56Context *s)
+{
     int i;
 
     av_freep(&s->above_blocks);
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index a852ff8..1af5eaf 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -79,7 +79,7 @@ typedef void (*VP56DefaultModelsInit)(VP56Context *s);
 typedef void (*VP56ParseVectorModels)(VP56Context *s);
 typedef int  (*VP56ParseCoeffModels)(VP56Context *s);
 typedef int  (*VP56ParseHeader)(VP56Context *s, const uint8_t *buf,
-                                int buf_size, int *golden_frame);
+                                int buf_size);
 
 typedef struct VP56RangeCoder {
     int high;
@@ -135,6 +135,7 @@ struct vp56_context {
     int sub_version;
 
     /* frame info */
+    int golden_frame;
     int plane_width[4];
     int plane_height[4];
     int mb_width;   /* number of horizontal MB */
@@ -189,8 +190,11 @@ struct vp56_context {
     VP56ParseCoeffModels parse_coeff_models;
     VP56ParseHeader parse_header;
 
+    /* for "slice" parallelism between YUV and A */
+    VP56Context *alpha_context;
+
     VP56Model *modelp;
-    VP56Model models[2];
+    VP56Model model;
 
     /* huffman decoding */
     int use_huffman;
@@ -203,7 +207,10 @@ struct vp56_context {
 
 
 int ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha);
+int ff_vp56_init_context(AVCodecContext *avctx, VP56Context *s,
+                          int flip, int has_alpha);
 int ff_vp56_free(AVCodecContext *avctx);
+int ff_vp56_free_context(VP56Context *s);
 void ff_vp56_init_dequant(VP56Context *s, int quantizer);
 int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                          AVPacket *avpkt);
@@ -356,7 +363,7 @@ int vp56_rac_get_tree(VP56RangeCoder *c,
                       const uint8_t *probs)
 {
     while (tree->val > 0) {
-        if (vp56_rac_get_prob(c, probs[tree->prob_idx]))
+        if (vp56_rac_get_prob_branchy(c, probs[tree->prob_idx]))
             tree += tree->val;
         else
             tree++;
@@ -364,15 +371,13 @@ int vp56_rac_get_tree(VP56RangeCoder *c,
     return -tree->val;
 }
 
-/**
- * This is identical to vp8_rac_get_tree except for the possibility of starting
- * on a node other than the root node, needed for coeff decode where this is
- * used to save a bit after a 0 token (by disallowing EOB to immediately follow.)
- */
-static av_always_inline
-int vp8_rac_get_tree_with_offset(VP56RangeCoder *c, const int8_t (*tree)[2],
-                                 const uint8_t *probs, int i)
+// how probabilities are associated with decisions is different I think
+// well, the new scheme fits in the old but this way has one fewer branches per decision
+static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2],
+                                   const uint8_t *probs)
 {
+    int i = 0;
+
     do {
         i = tree[i][vp56_rac_get_prob(c, probs[i])];
     } while (i > 0);
@@ -380,15 +385,6 @@ int vp8_rac_get_tree_with_offset(VP56RangeCoder *c, const int8_t (*tree)[2],
     return -i;
 }
 
-// how probabilities are associated with decisions is different I think
-// well, the new scheme fits in the old but this way has one fewer branches per decision
-static av_always_inline
-int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2],
-                     const uint8_t *probs)
-{
-    return vp8_rac_get_tree_with_offset(c, tree, probs, 0);
-}
-
 // DCTextra
 static av_always_inline int vp8_rac_get_coeff(VP56RangeCoder *c, const uint8_t *prob)
 {
diff --git a/libavcodec/vp56data.c b/libavcodec/vp56data.c
index 989c76a..0080370 100644
--- a/libavcodec/vp56data.c
+++ b/libavcodec/vp56data.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp56data.h b/libavcodec/vp56data.h
index 21907bd..3be268c 100644
--- a/libavcodec/vp56data.h
+++ b/libavcodec/vp56data.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp56dsp.c b/libavcodec/vp56dsp.c
index 5e09d24..fa533ec 100644
--- a/libavcodec/vp56dsp.c
+++ b/libavcodec/vp56dsp.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2006 Aurelien Jacobs <aurel@gnuage.org>
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp56dsp.h b/libavcodec/vp56dsp.h
index 389d359..7807baa 100644
--- a/libavcodec/vp56dsp.h
+++ b/libavcodec/vp56dsp.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp56rac.c b/libavcodec/vp56rac.c
index 270a3ca..6061b7e 100644
--- a/libavcodec/vp56rac.c
+++ b/libavcodec/vp56rac.c
@@ -2,20 +2,20 @@
  * VP5/6/8 decoder
  * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp5data.h b/libavcodec/vp5data.h
index b11b99d..e16ff2d 100644
--- a/libavcodec/vp5data.h
+++ b/libavcodec/vp5data.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index d10a640..a18b8ff 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,8 +43,7 @@
 static void vp6_parse_coeff(VP56Context *s);
 static void vp6_parse_coeff_huffman(VP56Context *s);
 
-static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
-                            int *golden_frame)
+static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size)
 {
     VP56RangeCoder *c = &s->c;
     int parse_filter_info = 0;
@@ -113,6 +112,7 @@ static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
         if (sub_version < 8)
             vrt_shift = 5;
         s->sub_version = sub_version;
+        s->golden_frame = 0;
     } else {
         if (!s->sub_version || !s->avctx->coded_width || !s->avctx->coded_height)
             return AVERROR_INVALIDDATA;
@@ -124,7 +124,7 @@ static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
         }
         ff_vp56_init_range_decoder(c, buf+1, buf_size-1);
 
-        *golden_frame = vp56_rac_get(c);
+        s->golden_frame = vp56_rac_get(c);
         if (s->filter_header) {
             s->deblock_filtering = vp56_rac_get(c);
             if (s->deblock_filtering)
@@ -211,20 +211,20 @@ static void vp6_parse_vector_models(VP56Context *s)
     int comp, node;
 
     for (comp=0; comp<2; comp++) {
-        if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][0]))
+        if (vp56_rac_get_prob_branchy(c, vp6_sig_dct_pct[comp][0]))
             model->vector_dct[comp] = vp56_rac_gets_nn(c, 7);
-        if (vp56_rac_get_prob(c, vp6_sig_dct_pct[comp][1]))
+        if (vp56_rac_get_prob_branchy(c, vp6_sig_dct_pct[comp][1]))
             model->vector_sig[comp] = vp56_rac_gets_nn(c, 7);
     }
 
     for (comp=0; comp<2; comp++)
         for (node=0; node<7; node++)
-            if (vp56_rac_get_prob(c, vp6_pdv_pct[comp][node]))
+            if (vp56_rac_get_prob_branchy(c, vp6_pdv_pct[comp][node]))
                 model->vector_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
 
     for (comp=0; comp<2; comp++)
         for (node=0; node<8; node++)
-            if (vp56_rac_get_prob(c, vp6_fdv_pct[comp][node]))
+            if (vp56_rac_get_prob_branchy(c, vp6_fdv_pct[comp][node]))
                 model->vector_fdv[comp][node] = vp56_rac_gets_nn(c, 7);
 }
 
@@ -252,7 +252,8 @@ static int vp6_build_huff_tree(VP56Context *s, uint8_t coeff_model[],
 
     ff_free_vlc(vlc);
     /* then build the huffman tree according to probabilities */
-    return ff_huff_build_tree(s->avctx, vlc, size, nodes, vp6_huff_cmp,
+    return ff_huff_build_tree(s->avctx, vlc, size, FF_HUFFMAN_BITS,
+                              nodes, vp6_huff_cmp,
                               FF_HUFFMAN_FLAG_HNODE_FIRST);
 }
 
@@ -269,7 +270,7 @@ static int vp6_parse_coeff_models(VP56Context *s)
 
     for (pt=0; pt<2; pt++)
         for (node=0; node<11; node++)
-            if (vp56_rac_get_prob(c, vp6_dccv_pct[pt][node])) {
+            if (vp56_rac_get_prob_branchy(c, vp6_dccv_pct[pt][node])) {
                 def_prob[node] = vp56_rac_gets_nn(c, 7);
                 model->coeff_dccv[pt][node] = def_prob[node];
             } else if (s->frames[VP56_FRAME_CURRENT]->key_frame) {
@@ -278,21 +279,21 @@ static int vp6_parse_coeff_models(VP56Context *s)
 
     if (vp56_rac_get(c)) {
         for (pos=1; pos<64; pos++)
-            if (vp56_rac_get_prob(c, vp6_coeff_reorder_pct[pos]))
+            if (vp56_rac_get_prob_branchy(c, vp6_coeff_reorder_pct[pos]))
                 model->coeff_reorder[pos] = vp56_rac_gets(c, 4);
         vp6_coeff_order_table_init(s);
     }
 
     for (cg=0; cg<2; cg++)
         for (node=0; node<14; node++)
-            if (vp56_rac_get_prob(c, vp6_runv_pct[cg][node]))
+            if (vp56_rac_get_prob_branchy(c, vp6_runv_pct[cg][node]))
                 model->coeff_runv[cg][node] = vp56_rac_gets_nn(c, 7);
 
     for (ct=0; ct<3; ct++)
         for (pt=0; pt<2; pt++)
             for (cg=0; cg<6; cg++)
                 for (node=0; node<11; node++)
-                    if (vp56_rac_get_prob(c, vp6_ract_pct[ct][pt][cg][node])) {
+                    if (vp56_rac_get_prob_branchy(c, vp6_ract_pct[ct][pt][cg][node])) {
                         def_prob[node] = vp56_rac_gets_nn(c, 7);
                         model->coeff_ract[pt][ct][cg][node] = def_prob[node];
                     } else if (s->frames[VP56_FRAME_CURRENT]->key_frame) {
@@ -338,7 +339,7 @@ static void vp6_parse_vector_adjustment(VP56Context *s, VP56mv *vect)
     for (comp=0; comp<2; comp++) {
         int i, delta = 0;
 
-        if (vp56_rac_get_prob(c, model->vector_dct[comp])) {
+        if (vp56_rac_get_prob_branchy(c, model->vector_dct[comp])) {
             static const uint8_t prob_order[] = {0, 1, 2, 7, 6, 5, 4};
             for (i=0; i<sizeof(prob_order); i++) {
                 int j = prob_order[i];
@@ -353,7 +354,7 @@ static void vp6_parse_vector_adjustment(VP56Context *s, VP56mv *vect)
                                       model->vector_pdv[comp]);
         }
 
-        if (delta && vp56_rac_get_prob(c, model->vector_sig[comp]))
+        if (delta && vp56_rac_get_prob_branchy(c, model->vector_sig[comp]))
             delta = -delta;
 
         if (!comp)
@@ -402,11 +403,11 @@ static void vp6_parse_coeff_huffman(VP56Context *s)
             } else {
                 if (get_bits_left(&s->gb) <= 0)
                     return;
-                coeff = get_vlc2(&s->gb, vlc_coeff->table, 9, 3);
+                coeff = get_vlc2(&s->gb, vlc_coeff->table, FF_HUFFMAN_BITS, 3);
                 if (coeff == 0) {
                     if (coeff_idx) {
                         int pt = (coeff_idx >= 6);
-                        run += get_vlc2(&s->gb, s->runv_vlc[pt].table, 9, 3);
+                        run += get_vlc2(&s->gb, s->runv_vlc[pt].table, FF_HUFFMAN_BITS, 3);
                         if (run >= 9)
                             run += get_bits(&s->gb, 6);
                     } else
@@ -461,16 +462,16 @@ static void vp6_parse_coeff(VP56Context *s)
 
         coeff_idx = 0;
         for (;;) {
-            if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob(c, model2[0])) {
+            if ((coeff_idx>1 && ct==0) || vp56_rac_get_prob_branchy(c, model2[0])) {
                 /* parse a coeff */
-                if (vp56_rac_get_prob(c, model2[2])) {
-                    if (vp56_rac_get_prob(c, model2[3])) {
+                if (vp56_rac_get_prob_branchy(c, model2[2])) {
+                    if (vp56_rac_get_prob_branchy(c, model2[3])) {
                         idx = vp56_rac_get_tree(c, ff_vp56_pc_tree, model1);
                         coeff = ff_vp56_coeff_bias[idx+5];
                         for (i=ff_vp56_coeff_bit_length[idx]; i>=0; i--)
                             coeff += vp56_rac_get_prob(c, ff_vp56_coeff_parse_table[idx][i]) << i;
                     } else {
-                        if (vp56_rac_get_prob(c, model2[4]))
+                        if (vp56_rac_get_prob_branchy(c, model2[4]))
                             coeff = 3 + vp56_rac_get_prob(c, model1[5]);
                         else
                             coeff = 2;
@@ -491,7 +492,7 @@ static void vp6_parse_coeff(VP56Context *s)
                 /* parse a run */
                 ct = 0;
                 if (coeff_idx > 0) {
-                    if (!vp56_rac_get_prob(c, model2[1]))
+                    if (!vp56_rac_get_prob_branchy(c, model2[1]))
                         break;
 
                     model3 = model->coeff_runv[coeff_idx >= 6];
@@ -603,6 +604,8 @@ static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src,
     }
 }
 
+static av_cold void vp6_decode_init_context(VP56Context *s);
+
 static av_cold int vp6_decode_init(AVCodecContext *avctx)
 {
     VP56Context *s = avctx->priv_data;
@@ -612,6 +615,21 @@ static av_cold int vp6_decode_init(AVCodecContext *avctx)
                             avctx->codec->id == AV_CODEC_ID_VP6A)) < 0)
         return ret;
 
+    vp6_decode_init_context(s);
+
+    if (s->has_alpha) {
+        s->alpha_context = av_mallocz(sizeof(VP56Context));
+        ff_vp56_init_context(avctx, s->alpha_context,
+                             s->flip == -1, s->has_alpha);
+        vp6_decode_init_context(s->alpha_context);
+    }
+
+    return 0;
+}
+
+static av_cold void vp6_decode_init_context(VP56Context *s)
+{
+    s->deblock_filtering = 0;
     s->vp56_coord_div = vp6_coord_div;
     s->parse_vector_adjustment = vp6_parse_vector_adjustment;
     s->filter = vp6_filter;
@@ -619,16 +637,29 @@ static av_cold int vp6_decode_init(AVCodecContext *avctx)
     s->parse_vector_models = vp6_parse_vector_models;
     s->parse_coeff_models = vp6_parse_coeff_models;
     s->parse_header = vp6_parse_header;
-
-    return 0;
 }
 
+static av_cold void vp6_decode_free_context(VP56Context *s);
+
 static av_cold int vp6_decode_free(AVCodecContext *avctx)
 {
     VP56Context *s = avctx->priv_data;
-    int pt, ct, cg;
 
     ff_vp56_free(avctx);
+    vp6_decode_free_context(s);
+
+    if (s->alpha_context) {
+        ff_vp56_free_context(s->alpha_context);
+        vp6_decode_free_context(s->alpha_context);
+        av_free(s->alpha_context);
+    }
+
+    return 0;
+}
+
+static av_cold void vp6_decode_free_context(VP56Context *s)
+{
+    int pt, ct, cg;
 
     for (pt=0; pt<2; pt++) {
         ff_free_vlc(&s->dccv_vlc[pt]);
@@ -637,7 +668,6 @@ static av_cold int vp6_decode_free(AVCodecContext *avctx)
             for (cg=0; cg<6; cg++)
                 ff_free_vlc(&s->ract_vlc[pt][ct][cg]);
     }
-    return 0;
 }
 
 AVCodec ff_vp6_decoder = {
@@ -675,5 +705,5 @@ AVCodec ff_vp6a_decoder = {
     .init           = vp6_decode_init,
     .close          = vp6_decode_free,
     .decode         = ff_vp56_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
 };
diff --git a/libavcodec/vp6data.h b/libavcodec/vp6data.h
index 2de90e7..539e19a 100644
--- a/libavcodec/vp6data.h
+++ b/libavcodec/vp6data.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp6dsp.c b/libavcodec/vp6dsp.c
index 54a96ed..67c6be0 100644
--- a/libavcodec/vp6dsp.c
+++ b/libavcodec/vp6dsp.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index e3c2bd9..60accf6 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -7,20 +7,20 @@
  * Copyright (C) 2012 Daniel Kang
  * Copyright (C) 2014 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,6 +37,14 @@
 #   include "arm/vp8.h"
 #endif
 
+#if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
+#define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
+#elif CONFIG_VP7_DECODER
+#define VPX(vp7, f) vp7_ ## f
+#else // CONFIG_VP8_DECODER
+#define VPX(vp7, f) vp8_ ## f
+#endif
+
 static void free_buffers(VP8Context *s)
 {
     int i;
@@ -143,7 +151,7 @@ int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
     AVCodecContext *avctx = s->avctx;
     int i, ret;
 
-    if (width  != s->avctx->width ||
+    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
         height != s->avctx->height) {
         vp8_decode_flush_impl(s->avctx, 1);
 
@@ -196,6 +204,7 @@ static int vp8_update_dimensions(VP8Context *s, int width, int height)
     return update_dimensions(s, width, height, IS_VP8);
 }
 
+
 static void parse_segment_info(VP8Context *s)
 {
     VP56RangeCoder *c = &s->c;
@@ -286,7 +295,7 @@ static void vp7_get_quants(VP8Context *s)
     s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
 }
 
-static void get_quants(VP8Context *s)
+static void vp8_get_quants(VP8Context *s)
 {
     VP56RangeCoder *c = &s->c;
     int i, base_qi;
@@ -407,7 +416,7 @@ static void update_refs(VP8Context *s)
     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
 }
 
-static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
+static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
 {
     int i, j;
 
@@ -418,16 +427,16 @@ static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
     }
 }
 
-static void fade(uint8_t *dst, uint8_t *src,
-                 int width, int height, int linesize,
+static void fade(uint8_t *dst, int dst_linesize,
+                 const uint8_t *src, int src_linesize,
+                 int width, int height,
                  int alpha, int beta)
 {
     int i, j;
-
     for (j = 0; j < height; j++) {
         for (i = 0; i < width; i++) {
-            uint8_t y = src[j * linesize + i];
-            dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
+            uint8_t y = src[j * src_linesize + i];
+            dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
         }
     }
 }
@@ -443,8 +452,11 @@ static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
         int height = s->mb_height * 16;
         AVFrame *src, *dst;
 
-        if (!s->framep[VP56_FRAME_PREVIOUS])
+        if (!s->framep[VP56_FRAME_PREVIOUS] ||
+            !s->framep[VP56_FRAME_GOLDEN]) {
+            av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
             return AVERROR_INVALIDDATA;
+        }
 
         dst =
         src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
@@ -453,15 +465,16 @@ static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
         if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
             s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
             if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
-               return ret;
+                return ret;
 
             dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
 
-            copy_luma(dst, src, width, height);
+            copy_chroma(dst, src, width, height);
         }
 
-        fade(dst->data[0], src->data[0],
-             width, height, dst->linesize[0], alpha, beta);
+        fade(dst->data[0], dst->linesize[0],
+             src->data[0], src->linesize[0],
+             width, height, alpha, beta);
     }
 
     return 0;
@@ -484,6 +497,11 @@ static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
     s->invisible = 0;
     part1_size   = AV_RL24(buf) >> 4;
 
+    if (buf_size < 4 - s->profile + part1_size) {
+        av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
+        return AVERROR_INVALIDDATA;
+    }
+
     buf      += 4 - s->profile;
     buf_size -= 4 - s->profile;
 
@@ -529,10 +547,10 @@ static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
                  s->feature_index_prob[i][j] =
                      vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
 
-             if (vp7_feature_value_size[i])
+             if (vp7_feature_value_size[s->profile][i])
                  for (j = 0; j < 4; j++)
                      s->feature_value[i][j] =
-                         vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
+                        vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
         }
     }
 
@@ -694,11 +712,12 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si
     }
 
     if (!s->macroblocks_base || /* first frame */
-        width != s->avctx->width || height != s->avctx->height)
+        width != s->avctx->width || height != s->avctx->height ||
+        (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
         if ((ret = vp8_update_dimensions(s, width, height)) < 0)
             return ret;
 
-    get_quants(s);
+    vp8_get_quants(s);
 
     if (!s->keyframe) {
         update_refs(s);
@@ -738,7 +757,7 @@ void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
 /**
  * Motion vector coding, 17.1.
  */
-static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
+static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
 {
     int bit, x = 0;
 
@@ -766,6 +785,16 @@ static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
 }
 
+static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
+{
+    return read_mv_component(c, p, 1);
+}
+
+static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
+{
+    return read_mv_component(c, p, 0);
+}
+
 static av_always_inline
 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
 {
@@ -956,8 +985,8 @@ void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
                     mb->mode = VP8_MVMODE_SPLIT;
                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
                 } else {
-                    mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
-                    mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
+                    mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
+                    mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
                     mb->bmv[0] = mb->mv;
                 }
             } else {
@@ -1056,8 +1085,8 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
                     mb->mode = VP8_MVMODE_SPLIT;
                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
                 } else {
-                    mb->mv.y  += read_mv_component(c, s->prob->mvc[0], IS_VP8);
-                    mb->mv.x  += read_mv_component(c, s->prob->mvc[1], IS_VP8);
+                    mb->mv.y  += vp8_read_mv_component(c, s->prob->mvc[0]);
+                    mb->mv.x  += vp8_read_mv_component(c, s->prob->mvc[1]);
                     mb->bmv[0] = mb->mv;
                 }
             } else {
@@ -1081,7 +1110,7 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
 {
     uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
 
-    if (layout == 1) {
+    if (layout) {
         VP8Macroblock *mb_top = mb - s->mb_width - 1;
         memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
     }
@@ -1089,7 +1118,7 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
         int x, y;
         uint8_t *top;
         uint8_t *const left = s->intra4x4_pred_mode_left;
-        if (layout == 1)
+        if (layout)
             top = mb->intra4x4_pred_mode_top;
         else
             top = s->intra4x4_pred_mode_top + 4 * mb_x;
@@ -1124,7 +1153,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
         *segment = 0;
         for (i = 0; i < 4; i++) {
             if (s->feature_enabled[i]) {
-                if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
+                if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
                       int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
                                                    s->feature_index_prob[i]);
                       av_log(s->avctx, AV_LOG_WARNING,
@@ -1133,9 +1162,10 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                 }
            }
         }
-    } else if (s->segmentation.update_map)
-        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
-    else if (s->segmentation.enabled)
+    } else if (s->segmentation.update_map) {
+        int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
+        *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
+    } else if (s->segmentation.enabled)
         *segment = ref ? *ref : *segment;
     mb->segment = *segment;
 
@@ -1150,7 +1180,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
         } else {
             const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
                                            : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
-            if (s->mb_layout == 1)
+            if (s->mb_layout)
                 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
             else
                 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
@@ -1314,6 +1344,7 @@ static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
  * @param zero_nhood the initial prediction context for number of surrounding
  *                   all-zero blocks (only left/top, so 0-2)
  * @param qmul       array holding the dc/ac dequant factor at position 0/1
+ * @param scan       scan pattern (VP7 only)
  *
  * @return 0 if no coeffs were decoded
  *         otherwise, the index of the last coeff decoded plus one
@@ -1754,7 +1785,8 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                      src1 - my_idx * linesize - mx_idx,
                                      EDGE_EMU_LINESIZE, linesize,
-                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
                                      x_off - mx_idx, y_off - my_idx, width, height);
             src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
             mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
@@ -1762,7 +1794,8 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                      src2 - my_idx * linesize - mx_idx,
                                      EDGE_EMU_LINESIZE, linesize,
-                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
                                      x_off - mx_idx, y_off - my_idx, width, height);
             src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
             mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
@@ -2223,7 +2256,7 @@ static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
 #define update_pos(td, mb_y, mb_x)
 #endif
 
-static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
                                         int jobnr, int threadnr, int is_vp7)
 {
     VP8Context *s = avctx->priv_data;
@@ -2344,7 +2377,19 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
     }
 }
 
-static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
+static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+                                        int jobnr, int threadnr)
+{
+    decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
+}
+
+static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
+                                        int jobnr, int threadnr)
+{
+    decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
+}
+
+static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
                               int jobnr, int threadnr, int is_vp7)
 {
     VP8Context *s = avctx->priv_data;
@@ -2403,6 +2448,18 @@ static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
     }
 }
 
+static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
+                              int jobnr, int threadnr)
+{
+    filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
+}
+
+static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
+                              int jobnr, int threadnr)
+{
+    filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
+}
+
 static av_always_inline
 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
                               int threadnr, int is_vp7)
@@ -2418,9 +2475,9 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
         if (mb_y >= s->mb_height)
             break;
         td->thread_mb_pos = mb_y << 16;
-        vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
+        s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
         if (s->deblock_filter)
-            vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
+            s->filter_mb_row(avctx, tdata, jobnr, threadnr);
         update_pos(td, mb_y, INT_MAX & 0xFFFF);
 
         s->mv_min.y -= 64;
@@ -2506,10 +2563,8 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     curframe->tf.f->key_frame = s->keyframe;
     curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
                                             : AV_PICTURE_TYPE_P;
-    if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
+    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
         goto err;
-    }
 
     // check if golden and altref are swapped
     if (s->update_altref != VP56_FRAME_NONE)
@@ -2529,7 +2584,8 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     s->next_framep[VP56_FRAME_CURRENT] = curframe;
 
-    ff_thread_finish_setup(avctx);
+    if (avctx->codec->update_thread_context)
+        ff_thread_finish_setup(avctx);
 
     s->linesize   = curframe->tf.f->linesize[0];
     s->uvlinesize = curframe->tf.f->linesize[1];
@@ -2642,6 +2698,7 @@ int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
     int ret;
 
     s->avctx = avctx;
+    s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
     avctx->internal->allocate_progress = 1;
 
@@ -2651,9 +2708,13 @@ int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
     if (CONFIG_VP7_DECODER && is_vp7) {
         ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
         ff_vp7dsp_init(&s->vp8dsp);
+        s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
+        s->filter_mb_row           = vp7_filter_mb_row;
     } else if (CONFIG_VP8_DECODER && !is_vp7) {
         ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
         ff_vp8dsp_init(&s->vp8dsp);
+        s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
+        s->filter_mb_row           = vp8_filter_mb_row;
     }
 
     /* does not change for VP8 */
@@ -2695,7 +2756,7 @@ static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
     return 0;
 }
 
-#define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
+#define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
 
 static int vp8_decode_update_thread_context(AVCodecContext *dst,
                                             const AVCodecContext *src)
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 365e7b7..83729c8 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -6,20 +6,20 @@
  * Copyright (C) 2010 Fiona Glaser
  * Copyright (C) 2012 Daniel Kang
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -35,6 +35,8 @@
 
 #if HAVE_PTHREADS
 #   include <pthread.h>
+#elif HAVE_OS2THREADS
+#   include "compat/os2threads.h"
 #elif HAVE_W32THREADS
 #   include "compat/w32pthreads.h"
 #endif
@@ -272,6 +274,11 @@ typedef struct VP8Context {
      */
     int mb_layout;
 
+    void (*decode_mb_row_no_filter)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr);
+    void (*filter_mb_row)(AVCodecContext *avctx, void *tdata, int jobnr, int threadnr);
+
+    int vp7;
+
     /**
      * Fade bit present in bitstream (VP7)
      */
diff --git a/libavcodec/vp8_parser.c b/libavcodec/vp8_parser.c
index 8f6459c..afc7f99 100644
--- a/libavcodec/vp8_parser.c
+++ b/libavcodec/vp8_parser.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2008 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp8data.h b/libavcodec/vp8data.h
index b49dea9..f9dbf56 100644
--- a/libavcodec/vp8data.h
+++ b/libavcodec/vp8data.h
@@ -2,20 +2,20 @@
  * Copyright (C) 2010 David Conrad
  * Copyright (C) 2010 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
index 4e4012f..e1a91bb 100644
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -3,20 +3,20 @@
  * Copyright (C) 2010 Ronald S. Bultje
  * Copyright (C) 2014 Peter Ross
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
  */
 
 #include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
 
 #include "mathops.h"
 #include "vp8dsp.h"
@@ -71,10 +72,7 @@ static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
         b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
         c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
         d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
-        dc[i * 4 + 0] = 0;
-        dc[i * 4 + 1] = 0;
-        dc[i * 4 + 2] = 0;
-        dc[i * 4 + 3] = 0;
+        AV_ZERO64(dc + i * 4);
         block[0][i][0] = (a1 + d1 + 0x20000) >> 18;
         block[3][i][0] = (a1 - d1 + 0x20000) >> 18;
         block[1][i][0] = (b1 + c1 + 0x20000) >> 18;
@@ -105,10 +103,7 @@ static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
         b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170;
         c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274;
         d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540;
-        block[i * 4 + 0] = 0;
-        block[i * 4 + 1] = 0;
-        block[i * 4 + 2] = 0;
-        block[i * 4 + 3] = 0;
+        AV_ZERO64(block + i * 4);
         tmp[i * 4 + 0] = (a1 + d1) >> 14;
         tmp[i * 4 + 3] = (a1 - d1) >> 14;
         tmp[i * 4 + 1] = (b1 + c1) >> 14;
@@ -171,10 +166,7 @@ static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
         t1 = dc[i * 4 + 1] + dc[i * 4 + 2];
         t2 = dc[i * 4 + 1] - dc[i * 4 + 2];
         t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding
-        dc[i * 4 + 0] = 0;
-        dc[i * 4 + 1] = 0;
-        dc[i * 4 + 2] = 0;
-        dc[i * 4 + 3] = 0;
+        AV_ZERO64(dc + i * 4);
 
         block[i][0][0] = (t0 + t1) >> 3;
         block[i][1][0] = (t3 + t2) >> 3;
@@ -262,7 +254,7 @@ MK_IDCT_DC_ADD4_C(vp8)
     int av_unused q2 = p[ 2 * stride];                                        \
     int av_unused q3 = p[ 3 * stride];
 
-#define clip_int8(n) (cm[n + 0x80] - 0x80)
+#define clip_int8(n) (cm[(n) + 0x80] - 0x80)
 
 static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride,
                                            int is4tap, int is_vp7)
diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h
index 4864cf7..5fdd3af 100644
--- a/libavcodec/vp8dsp.h
+++ b/libavcodec/vp8dsp.h
@@ -2,20 +2,20 @@
  * Copyright (C) 2010 David Conrad
  * Copyright (C) 2010 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 2a6a138..96da823 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -4,99 +4,397 @@
  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
  * Copyright (C) 2013 Clément Bœsch <u pkh me>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/avassert.h"
-
 #include "avcodec.h"
 #include "get_bits.h"
 #include "internal.h"
+#include "thread.h"
 #include "videodsp.h"
 #include "vp56.h"
 #include "vp9.h"
 #include "vp9data.h"
+#include "vp9dsp.h"
+#include "libavutil/avassert.h"
 
 #define VP9_SYNCCODE 0x498342
-#define MAX_PROB 255
 
-static void vp9_decode_flush(AVCodecContext *avctx)
+enum CompPredMode {
+    PRED_SINGLEREF,
+    PRED_COMPREF,
+    PRED_SWITCHABLE,
+};
+
+enum BlockLevel {
+    BL_64X64,
+    BL_32X32,
+    BL_16X16,
+    BL_8X8,
+};
+
+enum BlockSize {
+    BS_64x64,
+    BS_64x32,
+    BS_32x64,
+    BS_32x32,
+    BS_32x16,
+    BS_16x32,
+    BS_16x16,
+    BS_16x8,
+    BS_8x16,
+    BS_8x8,
+    BS_8x4,
+    BS_4x8,
+    BS_4x4,
+    N_BS_SIZES,
+};
+
+struct VP9mvrefPair {
+    VP56mv mv[2];
+    int8_t ref[2];
+};
+
+typedef struct VP9Frame {
+    ThreadFrame tf;
+    AVBufferRef *extradata;
+    uint8_t *segmentation_map;
+    struct VP9mvrefPair *mv;
+} VP9Frame;
+
+struct VP9Filter {
+    uint8_t level[8 * 8];
+    uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
+                              [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
+};
+
+typedef struct VP9Block {
+    uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
+    enum FilterMode filter;
+    VP56mv mv[4 /* b_idx */][2 /* ref */];
+    enum BlockSize bs;
+    enum TxfmMode tx, uvtx;
+    enum BlockLevel bl;
+    enum BlockPartition bp;
+} VP9Block;
+
+typedef struct VP9Context {
+    VP9DSPContext dsp;
+    VideoDSPContext vdsp;
+    GetBitContext gb;
+    VP56RangeCoder c;
+    VP56RangeCoder *c_b;
+    unsigned c_b_size;
+    VP9Block *b_base, *b;
+    int pass, uses_2pass, last_uses_2pass;
+    int row, row7, col, col7;
+    uint8_t *dst[3];
+    ptrdiff_t y_stride, uv_stride;
+
+    // bitstream header
+    uint8_t profile;
+    uint8_t keyframe, last_keyframe;
+    uint8_t invisible;
+    uint8_t use_last_frame_mvs;
+    uint8_t errorres;
+    uint8_t colorspace;
+    uint8_t fullrange;
+    uint8_t intraonly;
+    uint8_t resetctx;
+    uint8_t refreshrefmask;
+    uint8_t highprecisionmvs;
+    enum FilterMode filtermode;
+    uint8_t allowcompinter;
+    uint8_t fixcompref;
+    uint8_t refreshctx;
+    uint8_t parallelmode;
+    uint8_t framectxid;
+    uint8_t refidx[3];
+    uint8_t signbias[3];
+    uint8_t varcompref[2];
+    ThreadFrame refs[8], next_refs[8];
+#define CUR_FRAME 0
+#define LAST_FRAME 1
+    VP9Frame frames[2];
+
+    struct {
+        uint8_t level;
+        int8_t sharpness;
+        uint8_t lim_lut[64];
+        uint8_t mblim_lut[64];
+    } filter;
+    struct {
+        uint8_t enabled;
+        int8_t mode[2];
+        int8_t ref[4];
+    } lf_delta;
+    uint8_t yac_qi;
+    int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
+    uint8_t lossless;
+    struct {
+        uint8_t enabled;
+        uint8_t temporal;
+        uint8_t absolute_vals;
+        uint8_t update_map;
+        struct {
+            uint8_t q_enabled;
+            uint8_t lf_enabled;
+            uint8_t ref_enabled;
+            uint8_t skip_enabled;
+            uint8_t ref_val;
+            int16_t q_val;
+            int8_t lf_val;
+            int16_t qmul[2][2];
+            uint8_t lflvl[4][2];
+        } feat[8];
+    } segmentation;
+    struct {
+        unsigned log2_tile_cols, log2_tile_rows;
+        unsigned tile_cols, tile_rows;
+        unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
+    } tiling;
+    unsigned sb_cols, sb_rows, rows, cols;
+    struct {
+        prob_context p;
+        uint8_t coef[4][2][2][6][6][3];
+    } prob_ctx[4];
+    struct {
+        prob_context p;
+        uint8_t coef[4][2][2][6][6][11];
+        uint8_t seg[7];
+        uint8_t segpred[3];
+    } prob;
+    struct {
+        unsigned y_mode[4][10];
+        unsigned uv_mode[10][10];
+        unsigned filter[4][3];
+        unsigned mv_mode[7][4];
+        unsigned intra[4][2];
+        unsigned comp[5][2];
+        unsigned single_ref[5][2][2];
+        unsigned comp_ref[5][2];
+        unsigned tx32p[2][4];
+        unsigned tx16p[2][3];
+        unsigned tx8p[2][2];
+        unsigned skip[3][2];
+        unsigned mv_joint[4];
+        struct {
+            unsigned sign[2];
+            unsigned classes[11];
+            unsigned class0[2];
+            unsigned bits[10][2];
+            unsigned class0_fp[2][4];
+            unsigned fp[4];
+            unsigned class0_hp[2];
+            unsigned hp[2];
+        } mv_comp[2];
+        unsigned partition[4][4][4];
+        unsigned coef[4][2][2][6][6][3];
+        unsigned eob[4][2][2][6][6][2];
+    } counts;
+    enum TxfmMode txfmmode;
+    enum CompPredMode comppredmode;
+
+    // contextual (left/above) cache
+    DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
+    DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
+    DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
+    DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
+    DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
+    DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
+    uint8_t *above_partition_ctx;
+    uint8_t *above_mode_ctx;
+    // FIXME maybe merge some of the below in a flags field?
+    uint8_t *above_y_nnz_ctx;
+    uint8_t *above_uv_nnz_ctx[2];
+    uint8_t *above_skip_ctx; // 1bit
+    uint8_t *above_txfm_ctx; // 2bit
+    uint8_t *above_segpred_ctx; // 1bit
+    uint8_t *above_intra_ctx; // 1bit
+    uint8_t *above_comp_ctx; // 1bit
+    uint8_t *above_ref_ctx; // 2bit
+    uint8_t *above_filter_ctx;
+    VP56mv (*above_mv_ctx)[2];
+
+    // whole-frame cache
+    uint8_t *intra_pred_data[3];
+    struct VP9Filter *lflvl;
+    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
+
+    // block reconstruction intermediates
+    int block_alloc_using_2pass;
+    int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
+    uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
+    struct { int x, y; } min_mv, max_mv;
+    DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
+    DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
+} VP9Context;
+
+static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
+    {
+        { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
+        { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
+    }, {
+        { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
+        { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
+    }
+};
+
+static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
 {
-    VP9Context *s = avctx->priv_data;
-    int i;
+    VP9Context *s = ctx->priv_data;
+    int ret, sz;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
-        av_frame_unref(s->refs[i]);
+    if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
+    sz = 64 * s->sb_cols * s->sb_rows;
+    if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
+        ff_thread_release_buffer(ctx, &f->tf);
+        return AVERROR(ENOMEM);
+    }
+
+    f->segmentation_map = f->extradata->data;
+    f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
+
+    // retain segmentation map if it doesn't update
+    if (s->segmentation.enabled && !s->segmentation.update_map &&
+        !s->intraonly && !s->keyframe) {
+        memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
+    }
+
+    return 0;
+}
+
+static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
+{
+    ff_thread_release_buffer(ctx, &f->tf);
+    av_buffer_unref(&f->extradata);
+}
+
+static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
+{
+    int res;
+
+    if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
+        return res;
+    } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
+        vp9_unref_frame(ctx, dst);
+        return AVERROR(ENOMEM);
+    }
+
+    dst->segmentation_map = src->segmentation_map;
+    dst->mv = src->mv;
+
+    return 0;
 }
 
-static int update_size(AVCodecContext *avctx, int w, int h)
+static int update_size(AVCodecContext *ctx, int w, int h)
 {
-    VP9Context *s = avctx->priv_data;
+    VP9Context *s = ctx->priv_data;
     uint8_t *p;
 
-    if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
-        return 0;
+    av_assert0(w > 0 && h > 0);
 
-    vp9_decode_flush(avctx);
+    if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
+        return 0;
 
-    if (w <= 0 || h <= 0)
-        return AVERROR_INVALIDDATA;
+    ctx->width  = w;
+    ctx->height = h;
+    s->sb_cols  = (w + 63) >> 6;
+    s->sb_rows  = (h + 63) >> 6;
+    s->cols     = (w + 7) >> 3;
+    s->rows     = (h + 7) >> 3;
 
-    avctx->width  = w;
-    avctx->height = h;
-    s->sb_cols    = (w + 63) >> 6;
-    s->sb_rows    = (h + 63) >> 6;
-    s->cols       = (w +  7) >> 3;
-    s->rows       = (h +  7) >> 3;
-
-#define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
-    av_free(s->above_partition_ctx);
-    p = av_malloc(s->sb_cols *
-                  (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
-                   64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
+#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
+    av_freep(&s->intra_pred_data[0]);
+    p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
     if (!p)
         return AVERROR(ENOMEM);
-    assign(s->above_partition_ctx, uint8_t *,     8);
-    assign(s->above_skip_ctx,      uint8_t *,     8);
-    assign(s->above_txfm_ctx,      uint8_t *,     8);
-    assign(s->above_mode_ctx,      uint8_t *,    16);
-    assign(s->above_y_nnz_ctx,     uint8_t *,    16);
-    assign(s->above_uv_nnz_ctx[0], uint8_t *,     8);
-    assign(s->above_uv_nnz_ctx[1], uint8_t *,     8);
-    assign(s->intra_pred_data[0],  uint8_t *,    64);
-    assign(s->intra_pred_data[1],  uint8_t *,    32);
-    assign(s->intra_pred_data[2],  uint8_t *,    32);
-    assign(s->above_segpred_ctx,   uint8_t *,     8);
-    assign(s->above_intra_ctx,     uint8_t *,     8);
-    assign(s->above_comp_ctx,      uint8_t *,     8);
-    assign(s->above_ref_ctx,       uint8_t *,     8);
-    assign(s->above_filter_ctx,    uint8_t *,     8);
-    assign(s->lflvl,               VP9Filter *,   1);
-    assign(s->above_mv_ctx,        VP56mv(*)[2], 16);
-    assign(s->segmentation_map,    uint8_t *,      64 * s->sb_rows);
-    assign(s->mv[0],               VP9MVRefPair *, 64 * s->sb_rows);
-    assign(s->mv[1],               VP9MVRefPair *, 64 * s->sb_rows);
+    assign(s->intra_pred_data[0],  uint8_t *,             64);
+    assign(s->intra_pred_data[1],  uint8_t *,             32);
+    assign(s->intra_pred_data[2],  uint8_t *,             32);
+    assign(s->above_y_nnz_ctx,     uint8_t *,             16);
+    assign(s->above_mode_ctx,      uint8_t *,             16);
+    assign(s->above_mv_ctx,        VP56mv(*)[2],          16);
+    assign(s->above_partition_ctx, uint8_t *,              8);
+    assign(s->above_skip_ctx,      uint8_t *,              8);
+    assign(s->above_txfm_ctx,      uint8_t *,              8);
+    assign(s->above_uv_nnz_ctx[0], uint8_t *,              8);
+    assign(s->above_uv_nnz_ctx[1], uint8_t *,              8);
+    assign(s->above_segpred_ctx,   uint8_t *,              8);
+    assign(s->above_intra_ctx,     uint8_t *,              8);
+    assign(s->above_comp_ctx,      uint8_t *,              8);
+    assign(s->above_ref_ctx,       uint8_t *,              8);
+    assign(s->above_filter_ctx,    uint8_t *,              8);
+    assign(s->lflvl,               struct VP9Filter *,     1);
 #undef assign
 
+    // these will be re-allocated a little later
+    av_freep(&s->b_base);
+    av_freep(&s->block_base);
+
+    return 0;
+}
+
+static int update_block_buffers(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+
+    if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
+        return 0;
+
+    av_free(s->b_base);
+    av_free(s->block_base);
+    if (s->uses_2pass) {
+        int sbs = s->sb_cols * s->sb_rows;
+
+        s->b_base = av_malloc(sizeof(VP9Block) * s->cols * s->rows);
+        s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
+        if (!s->b_base || !s->block_base)
+            return AVERROR(ENOMEM);
+        s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
+        s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
+        s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
+        s->uveob_base[0] = s->eob_base + 256 * sbs;
+        s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
+    } else {
+        s->b_base = av_malloc(sizeof(VP9Block));
+        s->block_base = av_mallocz((64 * 64 + 128) * 3);
+        if (!s->b_base || !s->block_base)
+            return AVERROR(ENOMEM);
+        s->uvblock_base[0] = s->block_base + 64 * 64;
+        s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
+        s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
+        s->uveob_base[0] = s->eob_base + 256;
+        s->uveob_base[1] = s->uveob_base[0] + 64;
+    }
+    s->block_alloc_using_2pass = s->uses_2pass;
+
     return 0;
 }
 
-// The sign bit is at the end, not the start, of a bit sequence
-static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
+// for some reason the sign bit is at the end, not the start, of a bit sequence
+static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
 {
     int v = get_bits(gb, n);
     return get_bits1(gb) ? -v : v;
@@ -104,17 +402,13 @@ static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
 
 static av_always_inline int inv_recenter_nonneg(int v, int m)
 {
-    if (v > 2 * m)
-        return v;
-    if (v & 1)
-        return m - ((v + 1) >> 1);
-    return m + (v >> 1);
+    return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
 }
 
 // differential forward probability updates
 static int update_prob(VP56RangeCoder *c, int p)
 {
-    static const int inv_map_table[MAX_PROB - 1] = {
+    static const int inv_map_table[254] = {
           7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
         189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
          10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
@@ -139,13 +433,13 @@ static int update_prob(VP56RangeCoder *c, int p)
 
     /* This code is trying to do a differential probability update. For a
      * current probability A in the range [1, 255], the difference to a new
-     * probability of any value can be expressed differentially as 1-A, 255-A
+     * probability of any value can be expressed differentially as 1-A,255-A
      * where some part of this (absolute range) exists both in positive as
      * well as the negative part, whereas another part only exists in one
      * half. We're trying to code this shared part differentially, i.e.
      * times two where the value of the lowest bit specifies the sign, and
      * the single part is then coded on top of this. This absolute difference
-     * then again has a value of [0, 254], but a bigger value in this range
+     * then again has a value of [0,254], but a bigger value in this range
      * indicates that we're further away from the original value A, so we
      * can code this as a VLC code, since higher values are increasingly
      * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
@@ -160,65 +454,59 @@ static int update_prob(VP56RangeCoder *c, int p)
         d = vp8_rac_get_uint(c, 5) + 32;
     } else {
         d = vp8_rac_get_uint(c, 7);
-        if (d >= 65) {
+        if (d >= 65)
             d = (d << 1) - 65 + vp8_rac_get(c);
-            d = av_clip(d, 0, MAX_PROB - 65 - 1);
-        }
         d += 64;
     }
 
-    return p <= 128
-           ?   1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
-           : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
+    return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
+                    255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
 }
 
-static int decode_frame_header(AVCodecContext *avctx,
+static int decode_frame_header(AVCodecContext *ctx,
                                const uint8_t *data, int size, int *ref)
 {
-    VP9Context *s = avctx->priv_data;
-    int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
+    VP9Context *s = ctx->priv_data;
+    int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
     int last_invisible;
     const uint8_t *data2;
 
     /* general header */
-    if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
-        return ret;
+    if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
+        return res;
     }
     if (get_bits(&s->gb, 2) != 0x2) { // frame marker
-        av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
+        av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
         return AVERROR_INVALIDDATA;
     }
     s->profile = get_bits1(&s->gb);
     if (get_bits1(&s->gb)) { // reserved bit
-        av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
+        av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
         return AVERROR_INVALIDDATA;
     }
     if (get_bits1(&s->gb)) {
         *ref = get_bits(&s->gb, 3);
         return 0;
     }
-
-    s->last_keyframe = s->keyframe;
-    s->keyframe      = !get_bits1(&s->gb);
-
-    last_invisible = s->invisible;
-    s->invisible   = !get_bits1(&s->gb);
-    s->errorres    = get_bits1(&s->gb);
-    // FIXME disable this upon resolution change
+    s->last_uses_2pass = s->uses_2pass;
+    s->last_keyframe  = s->keyframe;
+    s->keyframe       = !get_bits1(&s->gb);
+    last_invisible    = s->invisible;
+    s->invisible      = !get_bits1(&s->gb);
+    s->errorres       = get_bits1(&s->gb);
     s->use_last_frame_mvs = !s->errorres && !last_invisible;
-
     if (s->keyframe) {
         if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
-            av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
+            av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
             return AVERROR_INVALIDDATA;
         }
         s->colorspace = get_bits(&s->gb, 3);
         if (s->colorspace == 7) { // RGB = profile 1
-            av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
+            av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
             return AVERROR_INVALIDDATA;
         }
-        s->fullrange = get_bits1(&s->gb);
+        s->fullrange  = get_bits1(&s->gb);
         // for profile 1, here follows the subsampling bits
         s->refreshrefmask = 0xff;
         w = get_bits(&s->gb, 16) + 1;
@@ -226,11 +514,11 @@ static int decode_frame_header(AVCodecContext *avctx,
         if (get_bits1(&s->gb)) // display size
             skip_bits(&s->gb, 32);
     } else {
-        s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
-        s->resetctx  = s->errorres ? 0 : get_bits(&s->gb, 2);
+        s->intraonly  = s->invisible ? get_bits1(&s->gb) : 0;
+        s->resetctx   = s->errorres ? 0 : get_bits(&s->gb, 2);
         if (s->intraonly) {
             if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
-                av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
+                av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
                 return AVERROR_INVALIDDATA;
             }
             s->refreshrefmask = get_bits(&s->gb, 8);
@@ -246,33 +534,37 @@ static int decode_frame_header(AVCodecContext *avctx,
             s->signbias[1]    = get_bits1(&s->gb);
             s->refidx[2]      = get_bits(&s->gb, 3);
             s->signbias[2]    = get_bits1(&s->gb);
-            if (!s->refs[s->refidx[0]]->buf[0] ||
-                !s->refs[s->refidx[1]]->buf[0] ||
-                !s->refs[s->refidx[2]]->buf[0]) {
-                av_log(avctx, AV_LOG_ERROR,
-                       "Not all references are available\n");
+            if (!s->refs[s->refidx[0]].f->data[0] ||
+                !s->refs[s->refidx[1]].f->data[0] ||
+                !s->refs[s->refidx[2]].f->data[0]) {
+                av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
                 return AVERROR_INVALIDDATA;
             }
             if (get_bits1(&s->gb)) {
-                w = s->refs[s->refidx[0]]->width;
-                h = s->refs[s->refidx[0]]->height;
+                w = s->refs[s->refidx[0]].f->width;
+                h = s->refs[s->refidx[0]].f->height;
             } else if (get_bits1(&s->gb)) {
-                w = s->refs[s->refidx[1]]->width;
-                h = s->refs[s->refidx[1]]->height;
+                w = s->refs[s->refidx[1]].f->width;
+                h = s->refs[s->refidx[1]].f->height;
             } else if (get_bits1(&s->gb)) {
-                w = s->refs[s->refidx[2]]->width;
-                h = s->refs[s->refidx[2]]->height;
+                w = s->refs[s->refidx[2]].f->width;
+                h = s->refs[s->refidx[2]].f->height;
             } else {
                 w = get_bits(&s->gb, 16) + 1;
                 h = get_bits(&s->gb, 16) + 1;
             }
+            // Note that in this code, "CUR_FRAME" is actually before we
+            // have formally allocated a frame, and thus actually represents
+            // the _last_ frame
+            s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
+                                     s->frames[CUR_FRAME].tf.f->height == h;
             if (get_bits1(&s->gb)) // display size
                 skip_bits(&s->gb, 32);
             s->highprecisionmvs = get_bits1(&s->gb);
-            s->filtermode       = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
-                                  get_bits(&s->gb, 2);
-            s->allowcompinter   = s->signbias[0] != s->signbias[1] ||
-                                  s->signbias[0] != s->signbias[2];
+            s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
+                                                get_bits(&s->gb, 2);
+            s->allowcompinter = s->signbias[0] != s->signbias[1] ||
+                                s->signbias[0] != s->signbias[2];
             if (s->allowcompinter) {
                 if (s->signbias[0] == s->signbias[1]) {
                     s->fixcompref    = 2;
@@ -290,16 +582,15 @@ static int decode_frame_header(AVCodecContext *avctx,
             }
         }
     }
-
     s->refreshctx   = s->errorres ? 0 : get_bits1(&s->gb);
     s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
     s->framectxid   = c = get_bits(&s->gb, 2);
 
     /* loopfilter header data */
     s->filter.level = get_bits(&s->gb, 6);
-    sharp           = get_bits(&s->gb, 3);
-    /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
-     * keep the old cache values since they are still valid. */
+    sharp = get_bits(&s->gb, 3);
+    // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
+    // the old cache values since they are still valid
     if (s->filter.sharpness != sharp)
         memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
     s->filter.sharpness = sharp;
@@ -307,10 +598,10 @@ static int decode_frame_header(AVCodecContext *avctx,
         if (get_bits1(&s->gb)) {
             for (i = 0; i < 4; i++)
                 if (get_bits1(&s->gb))
-                    s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
+                    s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
             for (i = 0; i < 2; i++)
                 if (get_bits1(&s->gb))
-                    s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
+                    s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
         }
     } else {
         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
@@ -318,9 +609,9 @@ static int decode_frame_header(AVCodecContext *avctx,
 
     /* quantization header data */
     s->yac_qi      = get_bits(&s->gb, 8);
-    s->ydc_qdelta  = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
-    s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
-    s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
+    s->ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
+    s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
     s->lossless    = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
                      s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
 
@@ -330,19 +621,28 @@ static int decode_frame_header(AVCodecContext *avctx,
             for (i = 0; i < 7; i++)
                 s->prob.seg[i] = get_bits1(&s->gb) ?
                                  get_bits(&s->gb, 8) : 255;
-            if ((s->segmentation.temporal = get_bits1(&s->gb)))
+            if ((s->segmentation.temporal = get_bits1(&s->gb))) {
                 for (i = 0; i < 3; i++)
                     s->prob.segpred[i] = get_bits1(&s->gb) ?
                                          get_bits(&s->gb, 8) : 255;
+            }
+        }
+        if ((!s->segmentation.update_map || s->segmentation.temporal) &&
+            (w != s->frames[CUR_FRAME].tf.f->width ||
+             h != s->frames[CUR_FRAME].tf.f->height)) {
+            av_log(ctx, AV_LOG_ERROR,
+                   "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
+                   s->segmentation.temporal, s->segmentation.update_map);
+            return AVERROR_INVALIDDATA;
         }
 
         if (get_bits1(&s->gb)) {
             s->segmentation.absolute_vals = get_bits1(&s->gb);
             for (i = 0; i < 8; i++) {
                 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
-                    s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
+                    s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
                 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
-                    s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
+                    s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
                 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
                     s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
                 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
@@ -365,17 +665,17 @@ static int decode_frame_header(AVCodecContext *avctx,
             else
                 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
         } else {
-            qyac = s->yac_qi;
+            qyac  = s->yac_qi;
         }
         qydc  = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
         quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
         quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
         qyac  = av_clip_uintp2(qyac, 8);
 
-        s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
-        s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
-        s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
-        s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
+        s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
+        s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
+        s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
+        s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
 
         sh = s->filter.level >= 32;
         if (s->segmentation.feat[i].lf_enabled) {
@@ -384,7 +684,7 @@ static int decode_frame_header(AVCodecContext *avctx,
             else
                 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
         } else {
-            lflvl = s->filter.level;
+            lflvl  = s->filter.level;
         }
         s->segmentation.feat[i].lflvl[0][0] =
         s->segmentation.feat[i].lflvl[0][1] =
@@ -400,10 +700,9 @@ static int decode_frame_header(AVCodecContext *avctx,
     }
 
     /* tiling info */
-    if ((ret = update_size(avctx, w, h)) < 0) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Failed to initialize decoder for %dx%d\n", w, h);
-        return ret;
+    if ((res = update_size(ctx, w, h)) < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
+        return res;
     }
     for (s->tiling.log2_tile_cols = 0;
          (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
@@ -417,56 +716,51 @@ static int decode_frame_header(AVCodecContext *avctx,
             break;
     }
     s->tiling.log2_tile_rows = decode012(&s->gb);
-    s->tiling.tile_rows      = 1 << s->tiling.log2_tile_rows;
+    s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
     if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
         s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
-        s->c_b              = av_fast_realloc(s->c_b, &s->c_b_size,
-                                              sizeof(VP56RangeCoder) *
-                                              s->tiling.tile_cols);
+        s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
+                                 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
         if (!s->c_b) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Ran out of memory during range coder init\n");
+            av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
             return AVERROR(ENOMEM);
         }
     }
 
     if (s->keyframe || s->errorres || s->intraonly) {
-        s->prob_ctx[0].p =
-        s->prob_ctx[1].p =
-        s->prob_ctx[2].p =
-        s->prob_ctx[3].p = ff_vp9_default_probs;
-        memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
-               sizeof(ff_vp9_default_coef_probs));
-        memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
-               sizeof(ff_vp9_default_coef_probs));
-        memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
-               sizeof(ff_vp9_default_coef_probs));
-        memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
-               sizeof(ff_vp9_default_coef_probs));
+        s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
+                           s->prob_ctx[3].p = vp9_default_probs;
+        memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
+        memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
+        memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
+        memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
+               sizeof(vp9_default_coef_probs));
     }
 
     // next 16 bits is size of the rest of the header (arith-coded)
     size2 = get_bits(&s->gb, 16);
     data2 = align_get_bits(&s->gb);
     if (size2 > size - (data2 - data)) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
+        av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
         return AVERROR_INVALIDDATA;
     }
     ff_vp56_init_range_decoder(&s->c, data2, size2);
     if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
-        av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
+        av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
         return AVERROR_INVALIDDATA;
     }
 
-    if (s->keyframe || s->intraonly)
-        memset(s->counts.coef, 0,
-               sizeof(s->counts.coef) + sizeof(s->counts.eob));
-    else
+    if (s->keyframe || s->intraonly) {
+        memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
+    } else {
         memset(&s->counts, 0, sizeof(s->counts));
-
-    /* FIXME is it faster to not copy here, but do it down in the fw updates
-     * as explicit copies if the fw update is missing (and skip the copy upon
-     * fw update)? */
+    }
+    // FIXME is it faster to not copy here, but do it down in the fw updates
+    // as explicit copies if the fw update is missing (and skip the copy upon
+    // fw update)?
     s->prob.p = s->prob_ctx[c].p;
 
     // txfm updates
@@ -507,10 +801,11 @@ static int decode_frame_header(AVCodecContext *avctx,
                             if (m >= 3 && l == 0) // dc only has 3 pt
                                 break;
                             for (n = 0; n < 3; n++) {
-                                if (vp56_rac_get_prob_branchy(&s->c, 252))
+                                if (vp56_rac_get_prob_branchy(&s->c, 252)) {
                                     p[n] = update_prob(&s->c, r[n]);
-                                else
+                                } else {
                                     p[n] = r[n];
+                                }
                             }
                             p[3] = 0;
                         }
@@ -595,8 +890,7 @@ static int decode_frame_header(AVCodecContext *avctx,
                 for (k = 0; k < 3; k++)
                     if (vp56_rac_get_prob_branchy(&s->c, 252))
                         s->prob.p.partition[3 - i][j][k] =
-                            update_prob(&s->c,
-                                        s->prob.p.partition[3 - i][j][k]);
+                            update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
 
         // mv fields don't use the update_prob subexp model for some reason
         for (i = 0; i < 3; i++)
@@ -605,8 +899,7 @@ static int decode_frame_header(AVCodecContext *avctx,
 
         for (i = 0; i < 2; i++) {
             if (vp56_rac_get_prob_branchy(&s->c, 252))
-                s->prob.p.mv_comp[i].sign =
-                    (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+                s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 
             for (j = 0; j < 10; j++)
                 if (vp56_rac_get_prob_branchy(&s->c, 252))
@@ -614,8 +907,7 @@ static int decode_frame_header(AVCodecContext *avctx,
                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 
             if (vp56_rac_get_prob_branchy(&s->c, 252))
-                s->prob.p.mv_comp[i].class0 =
-                    (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
+                s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 
             for (j = 0; j < 10; j++)
                 if (vp56_rac_get_prob_branchy(&s->c, 252))
@@ -652,123 +944,2281 @@ static int decode_frame_header(AVCodecContext *avctx,
     return (data2 - data) + size2;
 }
 
-static int decode_subblock(AVCodecContext *avctx, int row, int col,
-                           VP9Filter *lflvl,
-                           ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
+static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
+                                      VP9Context *s)
+{
+    dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
+    dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
+}
+
+static void find_ref_mvs(VP9Context *s,
+                         VP56mv *pmv, int ref, int z, int idx, int sb)
+{
+    static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
+        [BS_64x64] = {{  3, -1 }, { -1,  3 }, {  4, -1 }, { -1,  4 },
+                      { -1, -1 }, {  0, -1 }, { -1,  0 }, {  6, -1 }},
+        [BS_64x32] = {{  0, -1 }, { -1,  0 }, {  4, -1 }, { -1,  2 },
+                      { -1, -1 }, {  0, -3 }, { -3,  0 }, {  2, -1 }},
+        [BS_32x64] = {{ -1,  0 }, {  0, -1 }, { -1,  4 }, {  2, -1 },
+                      { -1, -1 }, { -3,  0 }, {  0, -3 }, { -1,  2 }},
+        [BS_32x32] = {{  1, -1 }, { -1,  1 }, {  2, -1 }, { -1,  2 },
+                      { -1, -1 }, {  0, -3 }, { -3,  0 }, { -3, -3 }},
+        [BS_32x16] = {{  0, -1 }, { -1,  0 }, {  2, -1 }, { -1, -1 },
+                      { -1,  1 }, {  0, -3 }, { -3,  0 }, { -3, -3 }},
+        [BS_16x32] = {{ -1,  0 }, {  0, -1 }, { -1,  2 }, { -1, -1 },
+                      {  1, -1 }, { -3,  0 }, {  0, -3 }, { -3, -3 }},
+        [BS_16x16] = {{  0, -1 }, { -1,  0 }, {  1, -1 }, { -1,  1 },
+                      { -1, -1 }, {  0, -3 }, { -3,  0 }, { -3, -3 }},
+        [BS_16x8]  = {{  0, -1 }, { -1,  0 }, {  1, -1 }, { -1, -1 },
+                      {  0, -2 }, { -2,  0 }, { -2, -1 }, { -1, -2 }},
+        [BS_8x16]  = {{ -1,  0 }, {  0, -1 }, { -1,  1 }, { -1, -1 },
+                      { -2,  0 }, {  0, -2 }, { -1, -2 }, { -2, -1 }},
+        [BS_8x8]   = {{  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                      { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
+        [BS_8x4]   = {{  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                      { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
+        [BS_4x8]   = {{  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                      { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
+        [BS_4x4]   = {{  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
+                      { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
+    };
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col, row7 = s->row7;
+    const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
+#define INVALID_MV 0x80008000U
+    uint32_t mem = INVALID_MV;
+    int i;
+
+#define RETURN_DIRECT_MV(mv) \
+    do { \
+        uint32_t m = AV_RN32A(&mv); \
+        if (!idx) { \
+            AV_WN32A(pmv, m); \
+            return; \
+        } else if (mem == INVALID_MV) { \
+            mem = m; \
+        } else if (m != mem) { \
+            AV_WN32A(pmv, m); \
+            return; \
+        } \
+    } while (0)
+
+    if (sb >= 0) {
+        if (sb == 2 || sb == 1) {
+            RETURN_DIRECT_MV(b->mv[0][z]);
+        } else if (sb == 3) {
+            RETURN_DIRECT_MV(b->mv[2][z]);
+            RETURN_DIRECT_MV(b->mv[1][z]);
+            RETURN_DIRECT_MV(b->mv[0][z]);
+        }
+
+#define RETURN_MV(mv) \
+    do { \
+        if (sb > 0) { \
+            VP56mv tmp; \
+            uint32_t m; \
+            clamp_mv(&tmp, &mv, s); \
+            m = AV_RN32A(&tmp); \
+            if (!idx) { \
+                AV_WN32A(pmv, m); \
+                return; \
+            } else if (mem == INVALID_MV) { \
+                mem = m; \
+            } else if (m != mem) { \
+                AV_WN32A(pmv, m); \
+                return; \
+            } \
+        } else { \
+            uint32_t m = AV_RN32A(&mv); \
+            if (!idx) { \
+                clamp_mv(pmv, &mv, s); \
+                return; \
+            } else if (mem == INVALID_MV) { \
+                mem = m; \
+            } else if (m != mem) { \
+                clamp_mv(pmv, &mv, s); \
+                return; \
+            } \
+        } \
+    } while (0)
+
+        if (row > 0) {
+            struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
+            if (mv->ref[0] == ref) {
+                RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
+            } else if (mv->ref[1] == ref) {
+                RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
+            }
+        }
+        if (col > s->tiling.tile_col_start) {
+            struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
+            if (mv->ref[0] == ref) {
+                RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
+            } else if (mv->ref[1] == ref) {
+                RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
+            }
+        }
+        i = 2;
+    } else {
+        i = 0;
+    }
+
+    // previously coded MVs in this neighbourhood, using same reference frame
+    for (; i < 8; i++) {
+        int c = p[i][0] + col, r = p[i][1] + row;
+
+        if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
+            struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
+
+            if (mv->ref[0] == ref) {
+                RETURN_MV(mv->mv[0]);
+            } else if (mv->ref[1] == ref) {
+                RETURN_MV(mv->mv[1]);
+            }
+        }
+    }
+
+    // MV at this position in previous frame, using same reference frame
+    if (s->use_last_frame_mvs) {
+        struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
+
+        if (!s->last_uses_2pass)
+            ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
+        if (mv->ref[0] == ref) {
+            RETURN_MV(mv->mv[0]);
+        } else if (mv->ref[1] == ref) {
+            RETURN_MV(mv->mv[1]);
+        }
+    }
+
+#define RETURN_SCALE_MV(mv, scale) \
+    do { \
+        if (scale) { \
+            VP56mv mv_temp = { -mv.x, -mv.y }; \
+            RETURN_MV(mv_temp); \
+        } else { \
+            RETURN_MV(mv); \
+        } \
+    } while (0)
+
+    // previously coded MVs in this neighbourhood, using different reference frame
+    for (i = 0; i < 8; i++) {
+        int c = p[i][0] + col, r = p[i][1] + row;
+
+        if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
+            struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
+
+            if (mv->ref[0] != ref && mv->ref[0] >= 0) {
+                RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
+            }
+            if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
+                // BUG - libvpx has this condition regardless of whether
+                // we used the first ref MV and pre-scaling
+                AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
+                RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
+            }
+        }
+    }
+
+    // MV at this position in previous frame, using different reference frame
+    if (s->use_last_frame_mvs) {
+        struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
+
+        // no need to await_progress, because we already did that above
+        if (mv->ref[0] != ref && mv->ref[0] >= 0) {
+            RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
+        }
+        if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
+            // BUG - libvpx has this condition regardless of whether
+            // we used the first ref MV and pre-scaling
+            AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
+            RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
+        }
+    }
+
+    AV_ZERO32(pmv);
+#undef INVALID_MV
+#undef RETURN_MV
+#undef RETURN_SCALE_MV
+}
+
+static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
+{
+    int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
+    int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
+                                s->prob.p.mv_comp[idx].classes);
+
+    s->counts.mv_comp[idx].sign[sign]++;
+    s->counts.mv_comp[idx].classes[c]++;
+    if (c) {
+        int m;
+
+        for (n = 0, m = 0; m < c; m++) {
+            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
+            n |= bit << m;
+            s->counts.mv_comp[idx].bits[m][bit]++;
+        }
+        n <<= 3;
+        bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
+        n |= bit << 1;
+        s->counts.mv_comp[idx].fp[bit]++;
+        if (hp) {
+            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
+            s->counts.mv_comp[idx].hp[bit]++;
+            n |= bit;
+        } else {
+            n |= 1;
+            // bug in libvpx - we count for bw entropy purposes even if the
+            // bit wasn't coded
+            s->counts.mv_comp[idx].hp[1]++;
+        }
+        n += 8 << c;
+    } else {
+        n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
+        s->counts.mv_comp[idx].class0[n]++;
+        bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
+                               s->prob.p.mv_comp[idx].class0_fp[n]);
+        s->counts.mv_comp[idx].class0_fp[n][bit]++;
+        n = (n << 3) | (bit << 1);
+        if (hp) {
+            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
+            s->counts.mv_comp[idx].class0_hp[bit]++;
+            n |= bit;
+        } else {
+            n |= 1;
+            // bug in libvpx - we count for bw entropy purposes even if the
+            // bit wasn't coded
+            s->counts.mv_comp[idx].class0_hp[1]++;
+        }
+    }
+
+    return sign ? -(n + 1) : (n + 1);
+}
+
+static void fill_mv(VP9Context *s,
+                    VP56mv *mv, int mode, int sb)
 {
-    VP9Context *s = avctx->priv_data;
-    int c = ((s->above_partition_ctx[col]       >> (3 - bl)) & 1) |
+    VP9Block *b = s->b;
+
+    if (mode == ZEROMV) {
+        AV_ZERO64(mv);
+    } else {
+        int hp;
+
+        // FIXME cache this value and reuse for other subblocks
+        find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
+                     mode == NEWMV ? -1 : sb);
+        // FIXME maybe move this code into find_ref_mvs()
+        if ((mode == NEWMV || sb == -1) &&
+            !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
+            if (mv[0].y & 1) {
+                if (mv[0].y < 0)
+                    mv[0].y++;
+                else
+                    mv[0].y--;
+            }
+            if (mv[0].x & 1) {
+                if (mv[0].x < 0)
+                    mv[0].x++;
+                else
+                    mv[0].x--;
+            }
+        }
+        if (mode == NEWMV) {
+            enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
+                                              s->prob.p.mv_joint);
+
+            s->counts.mv_joint[j]++;
+            if (j >= MV_JOINT_V)
+                mv[0].y += read_mv_component(s, 0, hp);
+            if (j & 1)
+                mv[0].x += read_mv_component(s, 1, hp);
+        }
+
+        if (b->comp) {
+            // FIXME cache this value and reuse for other subblocks
+            find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
+                         mode == NEWMV ? -1 : sb);
+            if ((mode == NEWMV || sb == -1) &&
+                !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
+                if (mv[1].y & 1) {
+                    if (mv[1].y < 0)
+                        mv[1].y++;
+                    else
+                        mv[1].y--;
+                }
+                if (mv[1].x & 1) {
+                    if (mv[1].x < 0)
+                        mv[1].x++;
+                    else
+                        mv[1].x--;
+                }
+            }
+            if (mode == NEWMV) {
+                enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
+                                                  s->prob.p.mv_joint);
+
+                s->counts.mv_joint[j]++;
+                if (j >= MV_JOINT_V)
+                    mv[1].y += read_mv_component(s, 0, hp);
+                if (j & 1)
+                    mv[1].x += read_mv_component(s, 1, hp);
+            }
+        }
+    }
+}
+
+static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
+                                       ptrdiff_t stride, int v)
+{
+    switch (w) {
+    case 1:
+        do {
+            *ptr = v;
+            ptr += stride;
+        } while (--h);
+        break;
+    case 2: {
+        int v16 = v * 0x0101;
+        do {
+            AV_WN16A(ptr, v16);
+            ptr += stride;
+        } while (--h);
+        break;
+    }
+    case 4: {
+        uint32_t v32 = v * 0x01010101;
+        do {
+            AV_WN32A(ptr, v32);
+            ptr += stride;
+        } while (--h);
+        break;
+    }
+    case 8: {
+#if HAVE_FAST_64BIT
+        uint64_t v64 = v * 0x0101010101010101ULL;
+        do {
+            AV_WN64A(ptr, v64);
+            ptr += stride;
+        } while (--h);
+#else
+        uint32_t v32 = v * 0x01010101;
+        do {
+            AV_WN32A(ptr,     v32);
+            AV_WN32A(ptr + 4, v32);
+            ptr += stride;
+        } while (--h);
+#endif
+        break;
+    }
+    }
+}
+
+static void decode_mode(AVCodecContext *ctx)
+{
+    static const uint8_t left_ctx[N_BS_SIZES] = {
+        0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
+    };
+    static const uint8_t above_ctx[N_BS_SIZES] = {
+        0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
+    };
+    static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
+        TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
+        TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
+    };
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col, row7 = s->row7;
+    enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
+    int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
+    int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
+    int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
+    int vref, filter_id;
+
+    if (!s->segmentation.enabled) {
+        b->seg_id = 0;
+    } else if (s->keyframe || s->intraonly) {
+        b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
+    } else if (!s->segmentation.update_map ||
+               (s->segmentation.temporal &&
+                vp56_rac_get_prob_branchy(&s->c,
+                    s->prob.segpred[s->above_segpred_ctx[col] +
+                                    s->left_segpred_ctx[row7]]))) {
+        int pred = 8, x;
+        uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
+
+        if (!s->last_uses_2pass)
+            ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
+        for (y = 0; y < h4; y++)
+            for (x = 0; x < w4; x++)
+                pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
+        av_assert1(pred < 8);
+        b->seg_id = pred;
+
+        memset(&s->above_segpred_ctx[col], 1, w4);
+        memset(&s->left_segpred_ctx[row7], 1, h4);
+    } else {
+        b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
+                                     s->prob.seg);
+
+        memset(&s->above_segpred_ctx[col], 0, w4);
+        memset(&s->left_segpred_ctx[row7], 0, h4);
+    }
+    if (s->segmentation.enabled &&
+        (s->segmentation.update_map || s->keyframe || s->intraonly)) {
+        setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
+                  w4, h4, 8 * s->sb_cols, b->seg_id);
+    }
+
+    b->skip = s->segmentation.enabled &&
+        s->segmentation.feat[b->seg_id].skip_enabled;
+    if (!b->skip) {
+        int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
+        b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
+        s->counts.skip[c][b->skip]++;
+    }
+
+    if (s->keyframe || s->intraonly) {
+        b->intra = 1;
+    } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
+        b->intra = !s->segmentation.feat[b->seg_id].ref_val;
+    } else {
+        int c, bit;
+
+        if (have_a && have_l) {
+            c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
+            c += (c == 2);
+        } else {
+            c = have_a ? 2 * s->above_intra_ctx[col] :
+                have_l ? 2 * s->left_intra_ctx[row7] : 0;
+        }
+        bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
+        s->counts.intra[c][bit]++;
+        b->intra = !bit;
+    }
+
+    if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
+        int c;
+        if (have_a) {
+            if (have_l) {
+                c = (s->above_skip_ctx[col] ? max_tx :
+                     s->above_txfm_ctx[col]) +
+                    (s->left_skip_ctx[row7] ? max_tx :
+                     s->left_txfm_ctx[row7]) > max_tx;
+            } else {
+                c = s->above_skip_ctx[col] ? 1 :
+                    (s->above_txfm_ctx[col] * 2 > max_tx);
+            }
+        } else if (have_l) {
+            c = s->left_skip_ctx[row7] ? 1 :
+                (s->left_txfm_ctx[row7] * 2 > max_tx);
+        } else {
+            c = 1;
+        }
+        switch (max_tx) {
+        case TX_32X32:
+            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
+            if (b->tx) {
+                b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
+                if (b->tx == 2)
+                    b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
+            }
+            s->counts.tx32p[c][b->tx]++;
+            break;
+        case TX_16X16:
+            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
+            if (b->tx)
+                b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
+            s->counts.tx16p[c][b->tx]++;
+            break;
+        case TX_8X8:
+            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
+            s->counts.tx8p[c][b->tx]++;
+            break;
+        case TX_4X4:
+            b->tx = TX_4X4;
+            break;
+        }
+    } else {
+        b->tx = FFMIN(max_tx, s->txfmmode);
+    }
+
+    if (s->keyframe || s->intraonly) {
+        uint8_t *a = &s->above_mode_ctx[col * 2];
+        uint8_t *l = &s->left_mode_ctx[(row7) << 1];
+
+        b->comp = 0;
+        if (b->bs > BS_8x8) {
+            // FIXME the memory storage intermediates here aren't really
+            // necessary, they're just there to make the code slightly
+            // simpler for now
+            b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                    vp9_default_kf_ymode_probs[a[0]][l[0]]);
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
+                l[0] = a[1] = b->mode[1];
+            } else {
+                l[0] = a[1] = b->mode[1] = b->mode[0];
+            }
+            if (b->bs != BS_4x8) {
+                b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                        vp9_default_kf_ymode_probs[a[0]][l[1]]);
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                  vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
+                    l[1] = a[1] = b->mode[3];
+                } else {
+                    l[1] = a[1] = b->mode[3] = b->mode[2];
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                l[1] = a[1] = b->mode[3] = b->mode[1];
+            }
+        } else {
+            b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                          vp9_default_kf_ymode_probs[*a][*l]);
+            b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
+            // FIXME this can probably be optimized
+            memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
+            memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
+        }
+        b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                     vp9_default_kf_uvmode_probs[b->mode[3]]);
+    } else if (b->intra) {
+        b->comp = 0;
+        if (b->bs > BS_8x8) {
+            b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                          s->prob.p.y_mode[0]);
+            s->counts.y_mode[0][b->mode[0]]++;
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                              s->prob.p.y_mode[0]);
+                s->counts.y_mode[0][b->mode[1]]++;
+            } else {
+                b->mode[1] = b->mode[0];
+            }
+            if (b->bs != BS_4x8) {
+                b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                              s->prob.p.y_mode[0]);
+                s->counts.y_mode[0][b->mode[2]]++;
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                                  s->prob.p.y_mode[0]);
+                    s->counts.y_mode[0][b->mode[3]]++;
+                } else {
+                    b->mode[3] = b->mode[2];
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                b->mode[3] = b->mode[1];
+            }
+        } else {
+            static const uint8_t size_group[10] = {
+                3, 3, 3, 3, 2, 2, 2, 1, 1, 1
+            };
+            int sz = size_group[b->bs];
+
+            b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                          s->prob.p.y_mode[sz]);
+            b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
+            s->counts.y_mode[sz][b->mode[3]]++;
+        }
+        b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
+                                     s->prob.p.uv_mode[b->mode[3]]);
+        s->counts.uv_mode[b->mode[3]][b->uvmode]++;
+    } else {
+        static const uint8_t inter_mode_ctx_lut[14][14] = {
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
+            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
+        };
+
+        if (s->segmentation.feat[b->seg_id].ref_enabled) {
+            av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
+            b->comp = 0;
+            b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
+        } else {
+            // read comp_pred flag
+            if (s->comppredmode != PRED_SWITCHABLE) {
+                b->comp = s->comppredmode == PRED_COMPREF;
+            } else {
+                int c;
+
+                // FIXME add intra as ref=0xff (or -1) to make these easier?
+                if (have_a) {
+                    if (have_l) {
+                        if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
+                            c = 4;
+                        } else if (s->above_comp_ctx[col]) {
+                            c = 2 + (s->left_intra_ctx[row7] ||
+                                     s->left_ref_ctx[row7] == s->fixcompref);
+                        } else if (s->left_comp_ctx[row7]) {
+                            c = 2 + (s->above_intra_ctx[col] ||
+                                     s->above_ref_ctx[col] == s->fixcompref);
+                        } else {
+                            c = (!s->above_intra_ctx[col] &&
+                                 s->above_ref_ctx[col] == s->fixcompref) ^
+                            (!s->left_intra_ctx[row7] &&
+                             s->left_ref_ctx[row & 7] == s->fixcompref);
+                        }
+                    } else {
+                        c = s->above_comp_ctx[col] ? 3 :
+                        (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
+                    }
+                } else if (have_l) {
+                    c = s->left_comp_ctx[row7] ? 3 :
+                    (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
+                } else {
+                    c = 1;
+                }
+                b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
+                s->counts.comp[c][b->comp]++;
+            }
+
+            // read actual references
+            // FIXME probably cache a few variables here to prevent repetitive
+            // memory accesses below
+            if (b->comp) /* two references */ {
+                int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
+
+                b->ref[fix_idx] = s->fixcompref;
+                // FIXME can this codeblob be replaced by some sort of LUT?
+                if (have_a) {
+                    if (have_l) {
+                        if (s->above_intra_ctx[col]) {
+                            if (s->left_intra_ctx[row7]) {
+                                c = 2;
+                            } else {
+                                c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
+                            }
+                        } else if (s->left_intra_ctx[row7]) {
+                            c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
+                        } else {
+                            int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
+
+                            if (refl == refa && refa == s->varcompref[1]) {
+                                c = 0;
+                            } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
+                                if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
+                                    (refl == s->fixcompref && refa == s->varcompref[0])) {
+                                    c = 4;
+                                } else {
+                                    c = (refa == refl) ? 3 : 1;
+                                }
+                            } else if (!s->left_comp_ctx[row7]) {
+                                if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
+                                    c = 1;
+                                } else {
+                                    c = (refl == s->varcompref[1] &&
+                                         refa != s->varcompref[1]) ? 2 : 4;
+                                }
+                            } else if (!s->above_comp_ctx[col]) {
+                                if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
+                                    c = 1;
+                                } else {
+                                    c = (refa == s->varcompref[1] &&
+                                         refl != s->varcompref[1]) ? 2 : 4;
+                                }
+                            } else {
+                                c = (refl == refa) ? 4 : 2;
+                            }
+                        }
+                    } else {
+                        if (s->above_intra_ctx[col]) {
+                            c = 2;
+                        } else if (s->above_comp_ctx[col]) {
+                            c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
+                        } else {
+                            c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
+                        }
+                    }
+                } else if (have_l) {
+                    if (s->left_intra_ctx[row7]) {
+                        c = 2;
+                    } else if (s->left_comp_ctx[row7]) {
+                        c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
+                    } else {
+                        c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
+                    }
+                } else {
+                    c = 2;
+                }
+                bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
+                b->ref[var_idx] = s->varcompref[bit];
+                s->counts.comp_ref[c][bit]++;
+            } else /* single reference */ {
+                int bit, c;
+
+                if (have_a && !s->above_intra_ctx[col]) {
+                    if (have_l && !s->left_intra_ctx[row7]) {
+                        if (s->left_comp_ctx[row7]) {
+                            if (s->above_comp_ctx[col]) {
+                                c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
+                                         !s->above_ref_ctx[col]);
+                            } else {
+                                c = (3 * !s->above_ref_ctx[col]) +
+                                    (!s->fixcompref || !s->left_ref_ctx[row7]);
+                            }
+                        } else if (s->above_comp_ctx[col]) {
+                            c = (3 * !s->left_ref_ctx[row7]) +
+                                (!s->fixcompref || !s->above_ref_ctx[col]);
+                        } else {
+                            c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
+                        }
+                    } else if (s->above_intra_ctx[col]) {
+                        c = 2;
+                    } else if (s->above_comp_ctx[col]) {
+                        c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
+                    } else {
+                        c = 4 * (!s->above_ref_ctx[col]);
+                    }
+                } else if (have_l && !s->left_intra_ctx[row7]) {
+                    if (s->left_intra_ctx[row7]) {
+                        c = 2;
+                    } else if (s->left_comp_ctx[row7]) {
+                        c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
+                    } else {
+                        c = 4 * (!s->left_ref_ctx[row7]);
+                    }
+                } else {
+                    c = 2;
+                }
+                bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
+                s->counts.single_ref[c][0][bit]++;
+                if (!bit) {
+                    b->ref[0] = 0;
+                } else {
+                    // FIXME can this codeblob be replaced by some sort of LUT?
+                    if (have_a) {
+                        if (have_l) {
+                            if (s->left_intra_ctx[row7]) {
+                                if (s->above_intra_ctx[col]) {
+                                    c = 2;
+                                } else if (s->above_comp_ctx[col]) {
+                                    c = 1 + 2 * (s->fixcompref == 1 ||
+                                                 s->above_ref_ctx[col] == 1);
+                                } else if (!s->above_ref_ctx[col]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (s->above_ref_ctx[col] == 1);
+                                }
+                            } else if (s->above_intra_ctx[col]) {
+                                if (s->left_intra_ctx[row7]) {
+                                    c = 2;
+                                } else if (s->left_comp_ctx[row7]) {
+                                    c = 1 + 2 * (s->fixcompref == 1 ||
+                                                 s->left_ref_ctx[row7] == 1);
+                                } else if (!s->left_ref_ctx[row7]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (s->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (s->above_comp_ctx[col]) {
+                                if (s->left_comp_ctx[row7]) {
+                                    if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
+                                        c = 3 * (s->fixcompref == 1 ||
+                                                 s->left_ref_ctx[row7] == 1);
+                                    } else {
+                                        c = 2;
+                                    }
+                                } else if (!s->left_ref_ctx[row7]) {
+                                    c = 1 + 2 * (s->fixcompref == 1 ||
+                                                 s->above_ref_ctx[col] == 1);
+                                } else {
+                                    c = 3 * (s->left_ref_ctx[row7] == 1) +
+                                    (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
+                                }
+                            } else if (s->left_comp_ctx[row7]) {
+                                if (!s->above_ref_ctx[col]) {
+                                    c = 1 + 2 * (s->fixcompref == 1 ||
+                                                 s->left_ref_ctx[row7] == 1);
+                                } else {
+                                    c = 3 * (s->above_ref_ctx[col] == 1) +
+                                    (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (!s->above_ref_ctx[col]) {
+                                if (!s->left_ref_ctx[row7]) {
+                                    c = 3;
+                                } else {
+                                    c = 4 * (s->left_ref_ctx[row7] == 1);
+                                }
+                            } else if (!s->left_ref_ctx[row7]) {
+                                c = 4 * (s->above_ref_ctx[col] == 1);
+                            } else {
+                                c = 2 * (s->left_ref_ctx[row7] == 1) +
+                                2 * (s->above_ref_ctx[col] == 1);
+                            }
+                        } else {
+                            if (s->above_intra_ctx[col] ||
+                                (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
+                                c = 2;
+                            } else if (s->above_comp_ctx[col]) {
+                                c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
+                            } else {
+                                c = 4 * (s->above_ref_ctx[col] == 1);
+                            }
+                        }
+                    } else if (have_l) {
+                        if (s->left_intra_ctx[row7] ||
+                            (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
+                            c = 2;
+                        } else if (s->left_comp_ctx[row7]) {
+                            c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
+                        } else {
+                            c = 4 * (s->left_ref_ctx[row7] == 1);
+                        }
+                    } else {
+                        c = 2;
+                    }
+                    bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
+                    s->counts.single_ref[c][1][bit]++;
+                    b->ref[0] = 1 + bit;
+                }
+            }
+        }
+
+        if (b->bs <= BS_8x8) {
+            if (s->segmentation.feat[b->seg_id].skip_enabled) {
+                b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
+            } else {
+                static const uint8_t off[10] = {
+                    3, 0, 0, 1, 0, 0, 0, 0, 0, 0
+                };
+
+                // FIXME this needs to use the LUT tables from find_ref_mvs
+                // because not all are -1,0/0,-1
+                int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
+                                          [s->left_mode_ctx[row7 + off[b->bs]]];
+
+                b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                              s->prob.p.mv_mode[c]);
+                b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
+                s->counts.mv_mode[c][b->mode[0] - 10]++;
+            }
+        }
+
+        if (s->filtermode == FILTER_SWITCHABLE) {
+            int c;
+
+            if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
+                if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
+                    c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
+                        s->left_filter_ctx[row7] : 3;
+                } else {
+                    c = s->above_filter_ctx[col];
+                }
+            } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
+                c = s->left_filter_ctx[row7];
+            } else {
+                c = 3;
+            }
+
+            filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
+                                         s->prob.p.filter[c]);
+            s->counts.filter[c][filter_id]++;
+            b->filter = vp9_filter_lut[filter_id];
+        } else {
+            b->filter = s->filtermode;
+        }
+
+        if (b->bs > BS_8x8) {
+            int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
+
+            b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                          s->prob.p.mv_mode[c]);
+            s->counts.mv_mode[c][b->mode[0] - 10]++;
+            fill_mv(s, b->mv[0], b->mode[0], 0);
+
+            if (b->bs != BS_8x4) {
+                b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                              s->prob.p.mv_mode[c]);
+                s->counts.mv_mode[c][b->mode[1] - 10]++;
+                fill_mv(s, b->mv[1], b->mode[1], 1);
+            } else {
+                b->mode[1] = b->mode[0];
+                AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
+                AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
+            }
+
+            if (b->bs != BS_4x8) {
+                b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                              s->prob.p.mv_mode[c]);
+                s->counts.mv_mode[c][b->mode[2] - 10]++;
+                fill_mv(s, b->mv[2], b->mode[2], 2);
+
+                if (b->bs != BS_8x4) {
+                    b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
+                                                  s->prob.p.mv_mode[c]);
+                    s->counts.mv_mode[c][b->mode[3] - 10]++;
+                    fill_mv(s, b->mv[3], b->mode[3], 3);
+                } else {
+                    b->mode[3] = b->mode[2];
+                    AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
+                    AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
+                }
+            } else {
+                b->mode[2] = b->mode[0];
+                AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
+                AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
+                b->mode[3] = b->mode[1];
+                AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
+                AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
+            }
+        } else {
+            fill_mv(s, b->mv[0], b->mode[0], -1);
+            AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
+            AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
+            AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
+            AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
+        }
+
+        vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
+    }
+
+#if HAVE_FAST_64BIT
+#define SPLAT_CTX(var, val, n) \
+    switch (n) { \
+    case 1:  var = val;                                    break; \
+    case 2:  AV_WN16A(&var, val *             0x0101);     break; \
+    case 4:  AV_WN32A(&var, val *         0x01010101);     break; \
+    case 8:  AV_WN64A(&var, val * 0x0101010101010101ULL);  break; \
+    case 16: { \
+        uint64_t v64 = val * 0x0101010101010101ULL; \
+        AV_WN64A(              &var,     v64); \
+        AV_WN64A(&((uint8_t *) &var)[8], v64); \
+        break; \
+    } \
+    }
+#else
+#define SPLAT_CTX(var, val, n) \
+    switch (n) { \
+    case 1:  var = val;                         break; \
+    case 2:  AV_WN16A(&var, val *     0x0101);  break; \
+    case 4:  AV_WN32A(&var, val * 0x01010101);  break; \
+    case 8: { \
+        uint32_t v32 = val * 0x01010101; \
+        AV_WN32A(              &var,     v32); \
+        AV_WN32A(&((uint8_t *) &var)[4], v32); \
+        break; \
+    } \
+    case 16: { \
+        uint32_t v32 = val * 0x01010101; \
+        AV_WN32A(              &var,      v32); \
+        AV_WN32A(&((uint8_t *) &var)[4],  v32); \
+        AV_WN32A(&((uint8_t *) &var)[8],  v32); \
+        AV_WN32A(&((uint8_t *) &var)[12], v32); \
+        break; \
+    } \
+    }
+#endif
+
+    switch (bwh_tab[1][b->bs][0]) {
+#define SET_CTXS(dir, off, n) \
+    do { \
+        SPLAT_CTX(s->dir##_skip_ctx[off],      b->skip,          n); \
+        SPLAT_CTX(s->dir##_txfm_ctx[off],      b->tx,            n); \
+        SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
+        if (!s->keyframe && !s->intraonly) { \
+            SPLAT_CTX(s->dir##_intra_ctx[off], b->intra,   n); \
+            SPLAT_CTX(s->dir##_comp_ctx[off],  b->comp,    n); \
+            SPLAT_CTX(s->dir##_mode_ctx[off],  b->mode[3], n); \
+            if (!b->intra) { \
+                SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
+                if (s->filtermode == FILTER_SWITCHABLE) { \
+                    SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
+                } \
+            } \
+        } \
+    } while (0)
+    case 1: SET_CTXS(above, col, 1); break;
+    case 2: SET_CTXS(above, col, 2); break;
+    case 4: SET_CTXS(above, col, 4); break;
+    case 8: SET_CTXS(above, col, 8); break;
+    }
+    switch (bwh_tab[1][b->bs][1]) {
+    case 1: SET_CTXS(left, row7, 1); break;
+    case 2: SET_CTXS(left, row7, 2); break;
+    case 4: SET_CTXS(left, row7, 4); break;
+    case 8: SET_CTXS(left, row7, 8); break;
+    }
+#undef SPLAT_CTX
+#undef SET_CTXS
+
+    if (!s->keyframe && !s->intraonly) {
+        if (b->bs > BS_8x8) {
+            int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
+
+            AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
+            AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
+            AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
+            AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
+            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
+            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
+            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
+            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
+        } else {
+            int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
+
+            for (n = 0; n < w4 * 2; n++) {
+                AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
+                AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
+            }
+            for (n = 0; n < h4 * 2; n++) {
+                AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
+                AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
+            }
+        }
+    }
+
+    // FIXME kinda ugly
+    for (y = 0; y < h4; y++) {
+        int x, o = (row + y) * s->sb_cols * 8 + col;
+        struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
+
+        if (b->intra) {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] =
+                mv[x].ref[1] = -1;
+            }
+        } else if (b->comp) {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] = b->ref[0];
+                mv[x].ref[1] = b->ref[1];
+                AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
+                AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
+            }
+        } else {
+            for (x = 0; x < w4; x++) {
+                mv[x].ref[0] = b->ref[0];
+                mv[x].ref[1] = -1;
+                AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
+            }
+        }
+    }
+}
+
+// FIXME merge cnt/eob arguments?
+static av_always_inline int
+decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
+                        int is_tx32x32, unsigned (*cnt)[6][3],
+                        unsigned (*eob)[6][2], uint8_t (*p)[6][11],
+                        int nnz, const int16_t *scan, const int16_t (*nb)[2],
+                        const int16_t *band_counts, const int16_t *qmul)
+{
+    int i = 0, band = 0, band_left = band_counts[band];
+    uint8_t *tp = p[0][nnz];
+    uint8_t cache[1024];
+
+    do {
+        int val, rc;
+
+        val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
+        eob[band][nnz][val]++;
+        if (!val)
+            break;
+
+    skip_eob:
+        if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
+            cnt[band][nnz][0]++;
+            if (!--band_left)
+                band_left = band_counts[++band];
+            cache[scan[i]] = 0;
+            nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
+            tp = p[band][nnz];
+            if (++i == n_coeffs)
+                break; //invalid input; blocks should end with EOB
+            goto skip_eob;
+        }
+
+        rc = scan[i];
+        if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
+            cnt[band][nnz][1]++;
+            val = 1;
+            cache[rc] = 1;
+        } else {
+            // fill in p[3-10] (model fill) - only once per frame for each pos
+            if (!tp[3])
+                memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
+
+            cnt[band][nnz][2]++;
+            if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
+                if (!vp56_rac_get_prob_branchy(c, tp[4])) {
+                    cache[rc] = val = 2;
+                } else {
+                    val = 3 + vp56_rac_get_prob(c, tp[5]);
+                    cache[rc] = 3;
+                }
+            } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
+                cache[rc] = 4;
+                if (!vp56_rac_get_prob_branchy(c, tp[7])) {
+                    val = 5 + vp56_rac_get_prob(c, 159);
+                } else {
+                    val  = 7 + (vp56_rac_get_prob(c, 165) << 1);
+                    val +=      vp56_rac_get_prob(c, 145);
+                }
+            } else { // cat 3-6
+                cache[rc] = 5;
+                if (!vp56_rac_get_prob_branchy(c, tp[8])) {
+                    if (!vp56_rac_get_prob_branchy(c, tp[9])) {
+                        val  = 11 + (vp56_rac_get_prob(c, 173) << 2);
+                        val +=      (vp56_rac_get_prob(c, 148) << 1);
+                        val +=       vp56_rac_get_prob(c, 140);
+                    } else {
+                        val  = 19 + (vp56_rac_get_prob(c, 176) << 3);
+                        val +=      (vp56_rac_get_prob(c, 155) << 2);
+                        val +=      (vp56_rac_get_prob(c, 140) << 1);
+                        val +=       vp56_rac_get_prob(c, 135);
+                    }
+                } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
+                    val  = 35 + (vp56_rac_get_prob(c, 180) << 4);
+                    val +=      (vp56_rac_get_prob(c, 157) << 3);
+                    val +=      (vp56_rac_get_prob(c, 141) << 2);
+                    val +=      (vp56_rac_get_prob(c, 134) << 1);
+                    val +=       vp56_rac_get_prob(c, 130);
+                } else {
+                    val  = 67 + (vp56_rac_get_prob(c, 254) << 13);
+                    val +=      (vp56_rac_get_prob(c, 254) << 12);
+                    val +=      (vp56_rac_get_prob(c, 254) << 11);
+                    val +=      (vp56_rac_get_prob(c, 252) << 10);
+                    val +=      (vp56_rac_get_prob(c, 249) << 9);
+                    val +=      (vp56_rac_get_prob(c, 243) << 8);
+                    val +=      (vp56_rac_get_prob(c, 230) << 7);
+                    val +=      (vp56_rac_get_prob(c, 196) << 6);
+                    val +=      (vp56_rac_get_prob(c, 177) << 5);
+                    val +=      (vp56_rac_get_prob(c, 153) << 4);
+                    val +=      (vp56_rac_get_prob(c, 140) << 3);
+                    val +=      (vp56_rac_get_prob(c, 133) << 2);
+                    val +=      (vp56_rac_get_prob(c, 130) << 1);
+                    val +=       vp56_rac_get_prob(c, 129);
+                }
+            }
+        }
+        if (!--band_left)
+            band_left = band_counts[++band];
+        if (is_tx32x32)
+            coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
+        else
+            coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
+        nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
+        tp = p[band][nnz];
+    } while (++i < n_coeffs);
+
+    return i;
+}
+
+static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
+                           unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                           uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                           const int16_t (*nb)[2], const int16_t *band_counts,
+                           const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
+                             unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
+                             uint8_t (*p)[6][11], int nnz, const int16_t *scan,
+                             const int16_t (*nb)[2], const int16_t *band_counts,
+                             const int16_t *qmul)
+{
+    return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
+                                   nnz, scan, nb, band_counts, qmul);
+}
+
+static void decode_coeffs(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col;
+    uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
+    unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
+    unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
+    int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
+    int end_x = FFMIN(2 * (s->cols - col), w4);
+    int end_y = FFMIN(2 * (s->rows - row), h4);
+    int n, pl, x, y, res;
+    int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
+    int tx = 4 * s->lossless + b->tx;
+    const int16_t * const *yscans = vp9_scans[tx];
+    const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
+    const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
+    const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
+    uint8_t *a = &s->above_y_nnz_ctx[col * 2];
+    uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
+    static const int16_t band_counts[4][8] = {
+        { 1, 2, 3, 4,  3,   16 - 13 },
+        { 1, 2, 3, 4, 11,   64 - 21 },
+        { 1, 2, 3, 4, 11,  256 - 21 },
+        { 1, 2, 3, 4, 11, 1024 - 21 },
+    };
+    const int16_t *y_band_counts = band_counts[b->tx];
+    const int16_t *uv_band_counts = band_counts[b->uvtx];
+
+#define MERGE(la, end, step, rd) \
+    for (n = 0; n < end; n += step) \
+        la[n] = !!rd(&la[n])
+#define MERGE_CTX(step, rd) \
+    do { \
+        MERGE(l, end_y, step, rd); \
+        MERGE(a, end_x, step, rd); \
+    } while (0)
+
+#define DECODE_Y_COEF_LOOP(step, mode_index, v) \
+    for (n = 0, y = 0; y < end_y; y += step) { \
+        for (x = 0; x < end_x; x += step, n += step * step) { \
+            enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
+            res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
+                                     c, e, p, a[x] + l[y], yscans[txtp], \
+                                     ynbs[txtp], y_band_counts, qmul[0]); \
+            a[x] = l[y] = !!res; \
+            if (step >= 4) { \
+                AV_WN16A(&s->eob[n], res); \
+            } else { \
+                s->eob[n] = res; \
+            } \
+        } \
+    }
+
+#define SPLAT(la, end, step, cond) \
+    if (step == 2) { \
+        for (n = 1; n < end; n += step) \
+            la[n] = la[n - 1]; \
+    } else if (step == 4) { \
+        if (cond) { \
+            for (n = 0; n < end; n += step) \
+                AV_WN32A(&la[n], la[n] * 0x01010101); \
+        } else { \
+            for (n = 0; n < end; n += step) \
+                memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
+        } \
+    } else /* step == 8 */ { \
+        if (cond) { \
+            if (HAVE_FAST_64BIT) { \
+                for (n = 0; n < end; n += step) \
+                    AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
+            } else { \
+                for (n = 0; n < end; n += step) { \
+                    uint32_t v32 = la[n] * 0x01010101; \
+                    AV_WN32A(&la[n],     v32); \
+                    AV_WN32A(&la[n + 4], v32); \
+                } \
+            } \
+        } else { \
+            for (n = 0; n < end; n += step) \
+                memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
+        } \
+    }
+#define SPLAT_CTX(step) \
+    do { \
+        SPLAT(a, end_x, step, end_x == w4); \
+        SPLAT(l, end_y, step, end_y == h4); \
+    } while (0)
+
+    /* y tokens */
+    switch (b->tx) {
+    case TX_4X4:
+        DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
+        break;
+    case TX_8X8:
+        MERGE_CTX(2, AV_RN16A);
+        DECODE_Y_COEF_LOOP(2, 0,);
+        SPLAT_CTX(2);
+        break;
+    case TX_16X16:
+        MERGE_CTX(4, AV_RN32A);
+        DECODE_Y_COEF_LOOP(4, 0,);
+        SPLAT_CTX(4);
+        break;
+    case TX_32X32:
+        MERGE_CTX(8, AV_RN64A);
+        DECODE_Y_COEF_LOOP(8, 0, 32);
+        SPLAT_CTX(8);
+        break;
+    }
+
+#define DECODE_UV_COEF_LOOP(step) \
+    for (n = 0, y = 0; y < end_y; y += step) { \
+        for (x = 0; x < end_x; x += step, n += step * step) { \
+            res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
+                                  16 * step * step, c, e, p, a[x] + l[y], \
+                                  uvscan, uvnb, uv_band_counts, qmul[1]); \
+            a[x] = l[y] = !!res; \
+            if (step >= 4) { \
+                AV_WN16A(&s->uveob[pl][n], res); \
+            } else { \
+                s->uveob[pl][n] = res; \
+            } \
+        } \
+    }
+
+    p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
+    c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
+    e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
+    w4 >>= 1;
+    h4 >>= 1;
+    end_x >>= 1;
+    end_y >>= 1;
+    for (pl = 0; pl < 2; pl++) {
+        a = &s->above_uv_nnz_ctx[pl][col];
+        l = &s->left_uv_nnz_ctx[pl][row & 7];
+        switch (b->uvtx) {
+        case TX_4X4:
+            DECODE_UV_COEF_LOOP(1);
+            break;
+        case TX_8X8:
+            MERGE_CTX(2, AV_RN16A);
+            DECODE_UV_COEF_LOOP(2);
+            SPLAT_CTX(2);
+            break;
+        case TX_16X16:
+            MERGE_CTX(4, AV_RN32A);
+            DECODE_UV_COEF_LOOP(4);
+            SPLAT_CTX(4);
+            break;
+        case TX_32X32:
+            MERGE_CTX(8, AV_RN64A);
+            // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
+            // so there is no need to loop
+            res = decode_coeffs_b32(&s->c, s->uvblock[pl],
+                                    1024, c, e, p, a[0] + l[0],
+                                    uvscan, uvnb, uv_band_counts, qmul[1]);
+            a[0] = l[0] = !!res;
+            AV_WN16A(&s->uveob[pl][0], res);
+            SPLAT_CTX(8);
+            break;
+        }
+    }
+}
+
+static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
+                                             uint8_t *dst_edge, ptrdiff_t stride_edge,
+                                             uint8_t *dst_inner, ptrdiff_t stride_inner,
+                                             uint8_t *l, int col, int x, int w,
+                                             int row, int y, enum TxfmMode tx,
+                                             int p)
+{
+    int have_top = row > 0 || y > 0;
+    int have_left = col > s->tiling.tile_col_start || x > 0;
+    int have_right = x < w - 1;
+    static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
+        [VERT_PRED]            = { { DC_127_PRED,          VERT_PRED },
+                                   { DC_127_PRED,          VERT_PRED } },
+        [HOR_PRED]             = { { DC_129_PRED,          DC_129_PRED },
+                                   { HOR_PRED,             HOR_PRED } },
+        [DC_PRED]              = { { DC_128_PRED,          TOP_DC_PRED },
+                                   { LEFT_DC_PRED,         DC_PRED } },
+        [DIAG_DOWN_LEFT_PRED]  = { { DC_127_PRED,          DIAG_DOWN_LEFT_PRED },
+                                   { DC_127_PRED,          DIAG_DOWN_LEFT_PRED } },
+        [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
+                                   { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
+        [VERT_RIGHT_PRED]      = { { VERT_RIGHT_PRED,      VERT_RIGHT_PRED },
+                                   { VERT_RIGHT_PRED,      VERT_RIGHT_PRED } },
+        [HOR_DOWN_PRED]        = { { HOR_DOWN_PRED,        HOR_DOWN_PRED },
+                                   { HOR_DOWN_PRED,        HOR_DOWN_PRED } },
+        [VERT_LEFT_PRED]       = { { DC_127_PRED,          VERT_LEFT_PRED },
+                                   { DC_127_PRED,          VERT_LEFT_PRED } },
+        [HOR_UP_PRED]          = { { DC_129_PRED,          DC_129_PRED },
+                                   { HOR_UP_PRED,          HOR_UP_PRED } },
+        [TM_VP8_PRED]          = { { DC_129_PRED,          VERT_PRED },
+                                   { HOR_PRED,             TM_VP8_PRED } },
+    };
+    static const struct {
+        uint8_t needs_left:1;
+        uint8_t needs_top:1;
+        uint8_t needs_topleft:1;
+        uint8_t needs_topright:1;
+    } edges[N_INTRA_PRED_MODES] = {
+        [VERT_PRED]            = { .needs_top  = 1 },
+        [HOR_PRED]             = { .needs_left = 1 },
+        [DC_PRED]              = { .needs_top  = 1, .needs_left = 1 },
+        [DIAG_DOWN_LEFT_PRED]  = { .needs_top  = 1, .needs_topright = 1 },
+        [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+        [VERT_RIGHT_PRED]      = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+        [HOR_DOWN_PRED]        = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+        [VERT_LEFT_PRED]       = { .needs_top  = 1, .needs_topright = 1 },
+        [HOR_UP_PRED]          = { .needs_left = 1 },
+        [TM_VP8_PRED]          = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+        [LEFT_DC_PRED]         = { .needs_left = 1 },
+        [TOP_DC_PRED]          = { .needs_top  = 1 },
+        [DC_128_PRED]          = { 0 },
+        [DC_127_PRED]          = { 0 },
+        [DC_129_PRED]          = { 0 }
+    };
+
+    av_assert2(mode >= 0 && mode < 10);
+    mode = mode_conv[mode][have_left][have_top];
+    if (edges[mode].needs_top) {
+        uint8_t *top, *topleft;
+        int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
+        int n_px_need_tr = 0;
+
+        if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
+            n_px_need_tr = 4;
+
+        // if top of sb64-row, use s->intra_pred_data[] instead of
+        // dst[-stride] for intra prediction (it contains pre- instead of
+        // post-loopfilter data)
+        if (have_top) {
+            top = !(row & 7) && !y ?
+                s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
+                y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
+            if (have_left)
+                topleft = !(row & 7) && !y ?
+                    s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
+                    y == 0 || x == 0 ? &dst_edge[-stride_edge] :
+                    &dst_inner[-stride_inner];
+        }
+
+        if (have_top &&
+            (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
+            (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
+            n_px_need + n_px_need_tr <= n_px_have) {
+            *a = top;
+        } else {
+            if (have_top) {
+                if (n_px_need <= n_px_have) {
+                    memcpy(*a, top, n_px_need);
+                } else {
+                    memcpy(*a, top, n_px_have);
+                    memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
+                           n_px_need - n_px_have);
+                }
+            } else {
+                memset(*a, 127, n_px_need);
+            }
+            if (edges[mode].needs_topleft) {
+                if (have_left && have_top) {
+                    (*a)[-1] = topleft[-1];
+                } else {
+                    (*a)[-1] = have_top ? 129 : 127;
+                }
+            }
+            if (tx == TX_4X4 && edges[mode].needs_topright) {
+                if (have_top && have_right &&
+                    n_px_need + n_px_need_tr <= n_px_have) {
+                    memcpy(&(*a)[4], &top[4], 4);
+                } else {
+                    memset(&(*a)[4], (*a)[3], 4);
+                }
+            }
+        }
+    }
+    if (edges[mode].needs_left) {
+        if (have_left) {
+            int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
+            uint8_t *dst = x == 0 ? dst_edge : dst_inner;
+            ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
+
+            if (n_px_need <= n_px_have) {
+                for (i = 0; i < n_px_need; i++)
+                    l[n_px_need - 1 - i] = dst[i * stride - 1];
+            } else {
+                for (i = 0; i < n_px_have; i++)
+                    l[n_px_need - 1 - i] = dst[i * stride - 1];
+                memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
+            }
+        } else {
+            memset(l, 129, 4 << tx);
+        }
+    }
+
+    return mode;
+}
+
+static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col;
+    int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
+    int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
+    int end_x = FFMIN(2 * (s->cols - col), w4);
+    int end_y = FFMIN(2 * (s->rows - row), h4);
+    int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
+    int uvstep1d = 1 << b->uvtx, p;
+    uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
+    LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
+    LOCAL_ALIGNED_32(uint8_t, l, [32]);
+
+    for (n = 0, y = 0; y < end_y; y += step1d) {
+        uint8_t *ptr = dst, *ptr_r = dst_r;
+        for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
+                               ptr_r += 4 * step1d, n += step) {
+            int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
+                               y * 2 + x : 0];
+            uint8_t *a = &a_buf[32];
+            enum TxfmType txtp = vp9_intra_txfm_type[mode];
+            int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
+
+            mode = check_intra_mode(s, mode, &a, ptr_r,
+                                    s->frames[CUR_FRAME].tf.f->linesize[0],
+                                    ptr, s->y_stride, l,
+                                    col, x, w4, row, y, b->tx, 0);
+            s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
+            if (eob)
+                s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
+                                           s->block + 16 * n, eob);
+        }
+        dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
+        dst   += 4 * step1d * s->y_stride;
+    }
+
+    // U/V
+    w4 >>= 1;
+    end_x >>= 1;
+    end_y >>= 1;
+    step = 1 << (b->uvtx * 2);
+    for (p = 0; p < 2; p++) {
+        dst   = s->dst[1 + p];
+        dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
+        for (n = 0, y = 0; y < end_y; y += uvstep1d) {
+            uint8_t *ptr = dst, *ptr_r = dst_r;
+            for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
+                                   ptr_r += 4 * uvstep1d, n += step) {
+                int mode = b->uvmode;
+                uint8_t *a = &a_buf[16];
+                int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
+
+                mode = check_intra_mode(s, mode, &a, ptr_r,
+                                        s->frames[CUR_FRAME].tf.f->linesize[1],
+                                        ptr, s->uv_stride, l,
+                                        col, x, w4, row, y, b->uvtx, p + 1);
+                s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
+                if (eob)
+                    s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
+                                                    s->uvblock[p] + 16 * n, eob);
+            }
+            dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
+            dst   += 4 * uvstep1d * s->uv_stride;
+        }
+    }
+}
+
+static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
+                                         uint8_t *dst, ptrdiff_t dst_stride,
+                                         const uint8_t *ref, ptrdiff_t ref_stride,
+                                         ThreadFrame *ref_frame,
+                                         ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+                                         int bw, int bh, int w, int h)
+{
+    int mx = mv->x, my = mv->y, th;
+
+    y += my >> 3;
+    x += mx >> 3;
+    ref += y * ref_stride + x;
+    mx &= 7;
+    my &= 7;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + bh + 4 * !!my + 7) >> 6;
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    if (x < !!mx * 3 || y < !!my * 3 ||
+        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref - !!my * 3 * ref_stride - !!mx * 3,
+                                 80, ref_stride,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
+        ref_stride = 80;
+    }
+    mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
+}
+
+static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
+                                           uint8_t *dst_u, uint8_t *dst_v,
+                                           ptrdiff_t dst_stride,
+                                           const uint8_t *ref_u, ptrdiff_t src_stride_u,
+                                           const uint8_t *ref_v, ptrdiff_t src_stride_v,
+                                           ThreadFrame *ref_frame,
+                                           ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+                                           int bw, int bh, int w, int h)
+{
+    int mx = mv->x, my = mv->y, th;
+
+    y += my >> 4;
+    x += mx >> 4;
+    ref_u += y * src_stride_u + x;
+    ref_v += y * src_stride_v + x;
+    mx &= 15;
+    my &= 15;
+    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+    // we use +7 because the last 7 pixels of each sbrow can be changed in
+    // the longest loopfilter of the next sbrow
+    th = (y + bh + 4 * !!my + 7) >> 5;
+    ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+    if (x < !!mx * 3 || y < !!my * 3 ||
+        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
+                                 80, src_stride_u,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
+        mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
+
+        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+                                 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
+                                 80, src_stride_v,
+                                 bw + !!mx * 7, bh + !!my * 7,
+                                 x - !!mx * 3, y - !!my * 3, w, h);
+        ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
+        mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
+    } else {
+        mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
+        mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
+    }
+}
+
+static void inter_recon(AVCodecContext *ctx)
+{
+    static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
+        { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
+        { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
+    };
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    int row = s->row, col = s->col;
+    ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
+    AVFrame *ref1 = tref1->f, *ref2;
+    int w1 = ref1->width, h1 = ref1->height, w2, h2;
+    ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
+
+    if (b->comp) {
+        tref2 = &s->refs[s->refidx[b->ref[1]]];
+        ref2 = tref2->f;
+        w2 = ref2->width;
+        h2 = ref2->height;
+    }
+
+    // y inter pred
+    if (b->bs > BS_8x8) {
+        if (b->bs == BS_8x4) {
+            mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
+            mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
+                        s->dst[0] + 4 * ls_y, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
+
+            if (b->comp) {
+                mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
+                mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
+                            s->dst[0] + 4 * ls_y, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
+            }
+        } else if (b->bs == BS_4x8) {
+            mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
+            mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
+
+            if (b->comp) {
+                mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
+                mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
+            }
+        } else {
+            av_assert2(b->bs == BS_4x4);
+
+            // FIXME if two horizontally adjacent blocks have the same MV,
+            // do a w8 instead of a w4 call
+            mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
+            mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
+            mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
+                        s->dst[0] + 4 * ls_y, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
+            mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
+                        s->dst[0] + 4 * ls_y + 4, ls_y,
+                        ref1->data[0], ref1->linesize[0], tref1,
+                        (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
+
+            if (b->comp) {
+                mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
+                mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
+                mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
+                            s->dst[0] + 4 * ls_y, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
+                mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
+                            s->dst[0] + 4 * ls_y + 4, ls_y,
+                            ref2->data[0], ref2->linesize[0], tref2,
+                            (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
+            }
+        }
+    } else {
+        int bwl = bwlog_tab[0][b->bs];
+        int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
+
+        mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
+                    ref1->data[0], ref1->linesize[0], tref1,
+                    row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
+
+        if (b->comp)
+            mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
+                        ref2->data[0], ref2->linesize[0], tref2,
+                        row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
+    }
+
+    // uv inter pred
+    {
+        int bwl = bwlog_tab[1][b->bs];
+        int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
+        VP56mv mvuv;
+
+        w1 = (w1 + 1) >> 1;
+        h1 = (h1 + 1) >> 1;
+        if (b->comp) {
+            w2 = (w2 + 1) >> 1;
+            h2 = (h2 + 1) >> 1;
+        }
+        if (b->bs > BS_8x8) {
+            mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
+            mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
+        } else {
+            mvuv = b->mv[0][0];
+        }
+
+        mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
+                      s->dst[1], s->dst[2], ls_uv,
+                      ref1->data[1], ref1->linesize[1],
+                      ref1->data[2], ref1->linesize[2], tref1,
+                      row << 2, col << 2, &mvuv, bw, bh, w1, h1);
+
+        if (b->comp) {
+            if (b->bs > BS_8x8) {
+                mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
+                mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
+            } else {
+                mvuv = b->mv[0][1];
+            }
+            mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
+                          s->dst[1], s->dst[2], ls_uv,
+                          ref2->data[1], ref2->linesize[1],
+                          ref2->data[2], ref2->linesize[2], tref2,
+                          row << 2, col << 2, &mvuv, bw, bh, w2, h2);
+        }
+    }
+
+    if (!b->skip) {
+        /* mostly copied intra_reconn() */
+
+        int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
+        int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
+        int end_x = FFMIN(2 * (s->cols - col), w4);
+        int end_y = FFMIN(2 * (s->rows - row), h4);
+        int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
+        int uvstep1d = 1 << b->uvtx, p;
+        uint8_t *dst = s->dst[0];
+
+        // y itxfm add
+        for (n = 0, y = 0; y < end_y; y += step1d) {
+            uint8_t *ptr = dst;
+            for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
+                int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
+
+                if (eob)
+                    s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
+                                                  s->block + 16 * n, eob);
+            }
+            dst += 4 * s->y_stride * step1d;
+        }
+
+        // uv itxfm add
+        end_x >>= 1;
+        end_y >>= 1;
+        step = 1 << (b->uvtx * 2);
+        for (p = 0; p < 2; p++) {
+            dst = s->dst[p + 1];
+            for (n = 0, y = 0; y < end_y; y += uvstep1d) {
+                uint8_t *ptr = dst;
+                for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
+                    int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
+
+                    if (eob)
+                        s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
+                                                        s->uvblock[p] + 16 * n, eob);
+                }
+                dst += 4 * uvstep1d * s->uv_stride;
+            }
+        }
+    }
+}
+
+static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
+                                        int row_and_7, int col_and_7,
+                                        int w, int h, int col_end, int row_end,
+                                        enum TxfmMode tx, int skip_inter)
+{
+    // FIXME I'm pretty sure all loops can be replaced by a single LUT if
+    // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
+    // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
+    // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
+
+    // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
+    // edges. This means that for UV, we work on two subsampled blocks at
+    // a time, and we only use the topleft block's mode information to set
+    // things like block strength. Thus, for any block size smaller than
+    // 16x16, ignore the odd portion of the block.
+    if (tx == TX_4X4 && is_uv) {
+        if (h == 1) {
+            if (row_and_7 & 1)
+                return;
+            if (!row_end)
+                h += 1;
+        }
+        if (w == 1) {
+            if (col_and_7 & 1)
+                return;
+            if (!col_end)
+                w += 1;
+        }
+    }
+
+    if (tx == TX_4X4 && !skip_inter) {
+        int t = 1 << col_and_7, m_col = (t << w) - t, y;
+        int m_col_odd = (t << (w - 1)) - t;
+
+        // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
+        if (is_uv) {
+            int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
+
+            for (y = row_and_7; y < h + row_and_7; y++) {
+                int col_mask_id = 2 - !(y & 7);
+
+                lflvl->mask[is_uv][0][y][1] |= m_row_8;
+                lflvl->mask[is_uv][0][y][2] |= m_row_4;
+                // for odd lines, if the odd col is not being filtered,
+                // skip odd row also:
+                // .---. <-- a
+                // |   |
+                // |___| <-- b
+                // ^   ^
+                // c   d
+                //
+                // if a/c are even row/col and b/d are odd, and d is skipped,
+                // e.g. right edge of size-66x66.webm, then skip b also (bug)
+                if ((col_end & 1) && (y & 1)) {
+                    lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
+                } else {
+                    lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
+                }
+            }
+        } else {
+            int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
+
+            for (y = row_and_7; y < h + row_and_7; y++) {
+                int col_mask_id = 2 - !(y & 3);
+
+                lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
+                lflvl->mask[is_uv][0][y][2] |= m_row_4;
+                lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
+                lflvl->mask[is_uv][0][y][3] |= m_col;
+                lflvl->mask[is_uv][1][y][3] |= m_col;
+            }
+        }
+    } else {
+        int y, t = 1 << col_and_7, m_col = (t << w) - t;
+
+        if (!skip_inter) {
+            int mask_id = (tx == TX_8X8);
+            int l2 = tx + is_uv - 1, step1d = 1 << l2;
+            static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
+            int m_row = m_col & masks[l2];
+
+            // at odd UV col/row edges tx16/tx32 loopfilter edges, force
+            // 8wd loopfilter to prevent going off the visible edge.
+            if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
+                int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
+                int m_row_8 = m_row - m_row_16;
+
+                for (y = row_and_7; y < h + row_and_7; y++) {
+                    lflvl->mask[is_uv][0][y][0] |= m_row_16;
+                    lflvl->mask[is_uv][0][y][1] |= m_row_8;
+                }
+            } else {
+                for (y = row_and_7; y < h + row_and_7; y++)
+                    lflvl->mask[is_uv][0][y][mask_id] |= m_row;
+            }
+
+            if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
+                for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
+                    lflvl->mask[is_uv][1][y][0] |= m_col;
+                if (y - row_and_7 == h - 1)
+                    lflvl->mask[is_uv][1][y][1] |= m_col;
+            } else {
+                for (y = row_and_7; y < h + row_and_7; y += step1d)
+                    lflvl->mask[is_uv][1][y][mask_id] |= m_col;
+            }
+        } else if (tx != TX_4X4) {
+            int mask_id;
+
+            mask_id = (tx == TX_8X8) || (is_uv && h == 1);
+            lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
+            mask_id = (tx == TX_8X8) || (is_uv && w == 1);
+            for (y = row_and_7; y < h + row_and_7; y++)
+                lflvl->mask[is_uv][0][y][mask_id] |= t;
+        } else if (is_uv) {
+            int t8 = t & 0x01, t4 = t - t8;
+
+            for (y = row_and_7; y < h + row_and_7; y++) {
+                lflvl->mask[is_uv][0][y][2] |= t4;
+                lflvl->mask[is_uv][0][y][1] |= t8;
+            }
+            lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
+        } else {
+            int t8 = t & 0x11, t4 = t - t8;
+
+            for (y = row_and_7; y < h + row_and_7; y++) {
+                lflvl->mask[is_uv][0][y][2] |= t4;
+                lflvl->mask[is_uv][0][y][1] |= t8;
+            }
+            lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
+        }
+    }
+}
+
+static void decode_b(AVCodecContext *ctx, int row, int col,
+                     struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
+                     enum BlockLevel bl, enum BlockPartition bp)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    enum BlockSize bs = bl * 3 + bp;
+    int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
+    int emu[2];
+    AVFrame *f = s->frames[CUR_FRAME].tf.f;
+
+    s->row = row;
+    s->row7 = row & 7;
+    s->col = col;
+    s->col7 = col & 7;
+    s->min_mv.x = -(128 + col * 64);
+    s->min_mv.y = -(128 + row * 64);
+    s->max_mv.x = 128 + (s->cols - col - w4) * 64;
+    s->max_mv.y = 128 + (s->rows - row - h4) * 64;
+    if (s->pass < 2) {
+        b->bs = bs;
+        b->bl = bl;
+        b->bp = bp;
+        decode_mode(ctx);
+        b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
+
+        if (!b->skip) {
+            decode_coeffs(ctx);
+        } else {
+            int row7 = s->row7;
+
+#define SPLAT_ZERO_CTX(v, n) \
+    switch (n) { \
+    case 1:  v = 0;          break; \
+    case 2:  AV_ZERO16(&v);  break; \
+    case 4:  AV_ZERO32(&v);  break; \
+    case 8:  AV_ZERO64(&v);  break; \
+    case 16: AV_ZERO128(&v); break; \
+    }
+#define SPLAT_ZERO_YUV(dir, var, off, n) \
+    do { \
+        SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
+        SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
+        SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
+    } while (0)
+
+            switch (w4) {
+            case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
+            case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
+            case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
+            case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
+            }
+            switch (h4) {
+            case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
+            case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
+            case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
+            case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
+            }
+        }
+        if (s->pass == 1) {
+            s->b++;
+            s->block += w4 * h4 * 64;
+            s->uvblock[0] += w4 * h4 * 16;
+            s->uvblock[1] += w4 * h4 * 16;
+            s->eob += 4 * w4 * h4;
+            s->uveob[0] += w4 * h4;
+            s->uveob[1] += w4 * h4;
+
+            return;
+        }
+    }
+
+    // emulated overhangs if the stride of the target buffer can't hold. This
+    // allows to support emu-edge and so on even if we have large block
+    // overhangs
+    emu[0] = (col + w4) * 8 > f->linesize[0] ||
+             (row + h4) > s->rows;
+    emu[1] = (col + w4) * 4 > f->linesize[1] ||
+             (row + h4) > s->rows;
+    if (emu[0]) {
+        s->dst[0] = s->tmp_y;
+        s->y_stride = 64;
+    } else {
+        s->dst[0] = f->data[0] + yoff;
+        s->y_stride = f->linesize[0];
+    }
+    if (emu[1]) {
+        s->dst[1] = s->tmp_uv[0];
+        s->dst[2] = s->tmp_uv[1];
+        s->uv_stride = 32;
+    } else {
+        s->dst[1] = f->data[1] + uvoff;
+        s->dst[2] = f->data[2] + uvoff;
+        s->uv_stride = f->linesize[1];
+    }
+    if (b->intra) {
+        intra_recon(ctx, yoff, uvoff);
+    } else {
+        inter_recon(ctx);
+    }
+    if (emu[0]) {
+        int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
+
+        for (n = 0; o < w; n++) {
+            int bw = 64 >> n;
+
+            av_assert2(n <= 4);
+            if (w & bw) {
+                s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
+                                         s->tmp_y + o, 64, h, 0, 0);
+                o += bw;
+            }
+        }
+    }
+    if (emu[1]) {
+        int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
+
+        for (n = 1; o < w; n++) {
+            int bw = 64 >> n;
+
+            av_assert2(n <= 4);
+            if (w & bw) {
+                s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
+                                         s->tmp_uv[0] + o, 32, h, 0, 0);
+                s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
+                                         s->tmp_uv[1] + o, 32, h, 0, 0);
+                o += bw;
+            }
+        }
+    }
+
+    // pick filter level and find edges to apply filter to
+    if (s->filter.level &&
+        (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
+                                                    [b->mode[3] != ZEROMV]) > 0) {
+        int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
+        int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
+
+        setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
+        mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
+        mask_edges(lflvl, 1, row7, col7, x_end, y_end,
+                   s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
+                   s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
+                   b->uvtx, skip_inter);
+
+        if (!s->filter.lim_lut[lvl]) {
+            int sharp = s->filter.sharpness;
+            int limit = lvl;
+
+            if (sharp > 0) {
+                limit >>= (sharp + 3) >> 2;
+                limit = FFMIN(limit, 9 - sharp);
+            }
+            limit = FFMAX(limit, 1);
+
+            s->filter.lim_lut[lvl] = limit;
+            s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
+        }
+    }
+
+    if (s->pass == 2) {
+        s->b++;
+        s->block += w4 * h4 * 64;
+        s->uvblock[0] += w4 * h4 * 16;
+        s->uvblock[1] += w4 * h4 * 16;
+        s->eob += 4 * w4 * h4;
+        s->uveob[0] += w4 * h4;
+        s->uveob[1] += w4 * h4;
+    }
+}
+
+static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
+                      ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
+{
+    VP9Context *s = ctx->priv_data;
+    int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
             (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
-    int ret;
-    const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
-                                   : s->prob.p.partition[bl][c];
+    const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
+                                     s->prob.p.partition[bl][c];
     enum BlockPartition bp;
     ptrdiff_t hbs = 4 >> bl;
+    AVFrame *f = s->frames[CUR_FRAME].tf.f;
+    ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
 
     if (bl == BL_8X8) {
-        bp  = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
-        ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
-    } else if (col + hbs < s->cols) {
-        if (row + hbs < s->rows) {
-            bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
+        bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
+        decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+    } else if (col + hbs < s->cols) { // FIXME why not <=?
+        if (row + hbs < s->rows) { // FIXME why not <=?
+            bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
             switch (bp) {
             case PARTITION_NONE:
-                ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
-                                          bl, bp);
+                decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
                 break;
             case PARTITION_H:
-                ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
-                                          bl, bp);
-                if (!ret) {
-                    yoff  += hbs * 8 * s->cur_frame->linesize[0];
-                    uvoff += hbs * 4 * s->cur_frame->linesize[1];
-                    ret    = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
-                                                 yoff, uvoff, bl, bp);
-                }
+                decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 4 * uv_stride;
+                decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
                 break;
             case PARTITION_V:
-                ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
-                                          bl, bp);
-                if (!ret) {
-                    yoff  += hbs * 8;
-                    uvoff += hbs * 4;
-                    ret    = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
-                                                 yoff, uvoff, bl, bp);
-                }
+                decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
+                yoff  += hbs * 8;
+                uvoff += hbs * 4;
+                decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
                 break;
             case PARTITION_SPLIT:
-                ret = decode_subblock(avctx, row, col, lflvl,
-                                      yoff, uvoff, bl + 1);
-                if (!ret) {
-                    ret = decode_subblock(avctx, row, col + hbs, lflvl,
-                                          yoff + 8 * hbs, uvoff + 4 * hbs,
-                                          bl + 1);
-                    if (!ret) {
-                        yoff  += hbs * 8 * s->cur_frame->linesize[0];
-                        uvoff += hbs * 4 * s->cur_frame->linesize[1];
-                        ret    = decode_subblock(avctx, row + hbs, col, lflvl,
-                                                 yoff, uvoff, bl + 1);
-                        if (!ret) {
-                            ret = decode_subblock(avctx, row + hbs, col + hbs,
-                                                  lflvl, yoff + 8 * hbs,
-                                                  uvoff + 4 * hbs, bl + 1);
-                        }
-                    }
-                }
+                decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb(ctx, row, col + hbs, lflvl,
+                          yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 4 * uv_stride;
+                decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb(ctx, row + hbs, col + hbs, lflvl,
+                          yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
                 break;
             default:
-                av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
-                return AVERROR_INVALIDDATA;
+                av_assert0(0);
             }
         } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
-            bp  = PARTITION_SPLIT;
-            ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
-            if (!ret)
-                ret = decode_subblock(avctx, row, col + hbs, lflvl,
-                                      yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
+            bp = PARTITION_SPLIT;
+            decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+            decode_sb(ctx, row, col + hbs, lflvl,
+                      yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
         } else {
-            bp  = PARTITION_H;
-            ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
-                                      bl, bp);
+            bp = PARTITION_H;
+            decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
         }
-    } else if (row + hbs < s->rows) {
+    } else if (row + hbs < s->rows) { // FIXME why not <=?
         if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
-            bp  = PARTITION_SPLIT;
-            ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
-            if (!ret) {
-                yoff  += hbs * 8 * s->cur_frame->linesize[0];
-                uvoff += hbs * 4 * s->cur_frame->linesize[1];
-                ret    = decode_subblock(avctx, row + hbs, col, lflvl,
-                                         yoff, uvoff, bl + 1);
-            }
+            bp = PARTITION_SPLIT;
+            decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 4 * uv_stride;
+            decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
         } else {
-            bp  = PARTITION_V;
-            ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
-                                      bl, bp);
+            bp = PARTITION_V;
+            decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
         }
     } else {
-        bp  = PARTITION_SPLIT;
-        ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
+        bp = PARTITION_SPLIT;
+        decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
     }
     s->counts.partition[bl][c][bp]++;
+}
 
-    return ret;
+static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
+                          ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
+{
+    VP9Context *s = ctx->priv_data;
+    VP9Block *b = s->b;
+    ptrdiff_t hbs = 4 >> bl;
+    AVFrame *f = s->frames[CUR_FRAME].tf.f;
+    ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
+
+    if (bl == BL_8X8) {
+        av_assert2(b->bl == BL_8X8);
+        decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
+    } else if (s->b->bl == bl) {
+        decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
+        if (b->bp == PARTITION_H && row + hbs < s->rows) {
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 4 * uv_stride;
+            decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
+        } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
+            yoff  += hbs * 8;
+            uvoff += hbs * 4;
+            decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
+        }
+    } else {
+        decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
+        if (col + hbs < s->cols) { // FIXME why not <=?
+            if (row + hbs < s->rows) {
+                decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
+                              uvoff + 4 * hbs, bl + 1);
+                yoff  += hbs * 8 * y_stride;
+                uvoff += hbs * 4 * uv_stride;
+                decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+                decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
+                                    yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
+            } else {
+                yoff  += hbs * 8;
+                uvoff += hbs * 4;
+                decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
+            }
+        } else if (row + hbs < s->rows) {
+            yoff  += hbs * 8 * y_stride;
+            uvoff += hbs * 4 * uv_stride;
+            decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
+        }
+    }
 }
 
-static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
-                                int row, int col,
-                                ptrdiff_t yoff, ptrdiff_t uvoff)
+static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
+                          int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
 {
-    VP9Context *s = avctx->priv_data;
-    uint8_t *dst   = s->cur_frame->data[0] + yoff, *lvl = lflvl->level;
-    ptrdiff_t ls_y = s->cur_frame->linesize[0], ls_uv = s->cur_frame->linesize[1];
+    VP9Context *s = ctx->priv_data;
+    AVFrame *f = s->frames[CUR_FRAME].tf.f;
+    uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
+    ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
     int y, x, p;
 
-    /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
-     * if you think of them as acting on a 8x8 block max, we can interleave
-     * each v/h within the single x loop, but that only works if we work on
-     * 8 pixel blocks, and we won't always do that (we want at least 16px
-     * to use SSE2 optimizations, perhaps 32 for AVX2). */
+    // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
+    // if you think of them as acting on a 8x8 block max, we can interleave
+    // each v/h within the single x loop, but that only works if we work on
+    // 8 pixel blocks, and we won't always do that (we want at least 16px
+    // to use SSE2 optimizations, perhaps 32 for AVX2)
 
     // filter edges between columns, Y plane (e.g. block1 | block2)
     for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
@@ -776,7 +3226,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
         uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
         unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
         unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
-        unsigned hm  = hm1 | hm2 | hm13 | hm23;
+        unsigned hm = hm1 | hm2 | hm13 | hm23;
 
         for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
             if (hm1 & x) {
@@ -792,7 +3242,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                             s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
                         }
                     } else if (hm2 & x) {
-                        L  = l[8];
+                        L = l[8];
                         H |= (L >> 4) << 8;
                         E |= s->filter.mblim_lut[L] << 8;
                         I |= s->filter.lim_lut[L] << 8;
@@ -818,7 +3268,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 
                 if (hm23 & x) {
-                    L  = l[8];
+                    L = l[8];
                     H |= (L >> 4) << 8;
                     E |= s->filter.mblim_lut[L] << 8;
                     I |= s->filter.lim_lut[L] << 8;
@@ -838,7 +3288,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
     //                                          block1
     // filter edges between rows, Y plane (e.g. ------)
     //                                          block2
-    dst = s->cur_frame->data[0] + yoff;
+    dst = f->data[0] + yoff;
     lvl = lflvl->level;
     for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
         uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
@@ -858,7 +3308,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                             s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
                         }
                     } else if (vm & (x << 1)) {
-                        L  = l[1];
+                        L = l[1];
                         H |= (L >> 4) << 8;
                         E |= s->filter.mblim_lut[L] << 8;
                         I |= s->filter.lim_lut[L] << 8;
@@ -882,7 +3332,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 
                 if (vm3 & (x << 1)) {
-                    L  = l[1];
+                    L = l[1];
                     H |= (L >> 4) << 8;
                     E |= s->filter.mblim_lut[L] << 8;
                     I |= s->filter.lim_lut[L] << 8;
@@ -902,7 +3352,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
     // same principle but for U/V planes
     for (p = 0; p < 2; p++) {
         lvl = lflvl->level;
-        dst = s->cur_frame->data[1 + p] + uvoff;
+        dst = f->data[1 + p] + uvoff;
         for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
             uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
             uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
@@ -913,8 +3363,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                 if (col || x > 1) {
                     if (hm1 & x) {
                         int L = *l, H = L >> 4;
-                        int E = s->filter.mblim_lut[L];
-                        int I = s->filter.lim_lut[L];
+                        int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 
                         if (hmask1[0] & x) {
                             if (hmask2[0] & x) {
@@ -924,7 +3373,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                                 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
                             }
                         } else if (hm2 & x) {
-                            L  = l[16];
+                            L = l[16];
                             H |= (L >> 4) << 8;
                             E |= s->filter.mblim_lut[L] << 8;
                             I |= s->filter.lim_lut[L] << 8;
@@ -937,8 +3386,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                         }
                     } else if (hm2 & x) {
                         int L = l[16], H = L >> 4;
-                        int E = s->filter.mblim_lut[L];
-                        int I = s->filter.lim_lut[L];
+                        int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 
                         s->dsp.loop_filter_8[!!(hmask2[1] & x)]
                                             [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
@@ -949,7 +3397,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
             }
         }
         lvl = lflvl->level;
-        dst = s->cur_frame->data[1 + p] + uvoff;
+        dst = f->data[1 + p] + uvoff;
         for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
             uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
             unsigned vm = vmask[0] | vmask[1] | vmask[2];
@@ -958,8 +3406,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                 if (row || y) {
                     if (vm & x) {
                         int L = *l, H = L >> 4;
-                        int E = s->filter.mblim_lut[L];
-                        int I = s->filter.lim_lut[L];
+                        int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 
                         if (vmask[0] & x) {
                             if (vmask[0] & (x << 2)) {
@@ -969,7 +3416,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                                 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
                             }
                         } else if (vm & (x << 2)) {
-                            L  = l[2];
+                            L = l[2];
                             H |= (L >> 4) << 8;
                             E |= s->filter.mblim_lut[L] << 8;
                             I |= s->filter.lim_lut[L] << 8;
@@ -982,8 +3429,7 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
                         }
                     } else if (vm & (x << 2)) {
                         int L = l[2], H = L >> 4;
-                        int E = s->filter.mblim_lut[L];
-                        int I = s->filter.lim_lut[L];
+                        int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 
                         s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
                                             [1](ptr + 8, ls_uv, E, I, H);
@@ -998,282 +3444,625 @@ static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
 
 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
 {
-    int sb_start =  (idx      * n) >> log2_n;
+    int sb_start = ( idx      * n) >> log2_n;
     int sb_end   = ((idx + 1) * n) >> log2_n;
     *start = FFMIN(sb_start, n) << 3;
     *end   = FFMIN(sb_end,   n) << 3;
 }
 
-static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
-                            int *got_frame, const uint8_t *data, int size)
+static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
+                                        int max_count, int update_factor)
 {
-    VP9Context *s = avctx->priv_data;
-    int ret, tile_row, tile_col, i, ref = -1, row, col;
-    ptrdiff_t yoff = 0, uvoff = 0;
+    unsigned ct = ct0 + ct1, p2, p1;
 
-    ret = decode_frame_header(avctx, data, size, &ref);
-    if (ret < 0) {
-        return ret;
-    } else if (!ret) {
-        if (!s->refs[ref]->buf[0]) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Requested reference %d not available\n", ref);
-            return AVERROR_INVALIDDATA;
+    if (!ct)
+        return;
+
+    p1 = *p;
+    p2 = ((ct0 << 8) + (ct >> 1)) / ct;
+    p2 = av_clip(p2, 1, 255);
+    ct = FFMIN(ct, max_count);
+    update_factor = FASTDIV(update_factor * ct, max_count);
+
+    // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
+    *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
+}
+
+static void adapt_probs(VP9Context *s)
+{
+    int i, j, k, l, m;
+    prob_context *p = &s->prob_ctx[s->framectxid].p;
+    int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
+
+    // coefficients
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 2; j++)
+            for (k = 0; k < 2; k++)
+                for (l = 0; l < 6; l++)
+                    for (m = 0; m < 6; m++) {
+                        uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
+                        unsigned *e = s->counts.eob[i][j][k][l][m];
+                        unsigned *c = s->counts.coef[i][j][k][l][m];
+
+                        if (l == 0 && m >= 3) // dc only has 3 pt
+                            break;
+
+                        adapt_prob(&pp[0], e[0], e[1], 24, uf);
+                        adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
+                        adapt_prob(&pp[2], c[1], c[2], 24, uf);
+                    }
+
+    if (s->keyframe || s->intraonly) {
+        memcpy(p->skip,  s->prob.p.skip,  sizeof(p->skip));
+        memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
+        memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
+        memcpy(p->tx8p,  s->prob.p.tx8p,  sizeof(p->tx8p));
+        return;
+    }
+
+    // skip flag
+    for (i = 0; i < 3; i++)
+        adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
+
+    // intra/inter flag
+    for (i = 0; i < 4; i++)
+        adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
+
+    // comppred flag
+    if (s->comppredmode == PRED_SWITCHABLE) {
+      for (i = 0; i < 5; i++)
+          adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
+    }
+
+    // reference frames
+    if (s->comppredmode != PRED_SINGLEREF) {
+      for (i = 0; i < 5; i++)
+          adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
+                     s->counts.comp_ref[i][1], 20, 128);
+    }
+
+    if (s->comppredmode != PRED_COMPREF) {
+      for (i = 0; i < 5; i++) {
+          uint8_t *pp = p->single_ref[i];
+          unsigned (*c)[2] = s->counts.single_ref[i];
+
+          adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
+          adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
+      }
+    }
+
+    // block partitioning
+    for (i = 0; i < 4; i++)
+        for (j = 0; j < 4; j++) {
+            uint8_t *pp = p->partition[i][j];
+            unsigned *c = s->counts.partition[i][j];
+
+            adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+            adapt_prob(&pp[2], c[2], c[3], 20, 128);
+        }
+
+    // tx size
+    if (s->txfmmode == TX_SWITCHABLE) {
+      for (i = 0; i < 2; i++) {
+          unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
+
+          adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
+          adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
+          adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
+          adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
+          adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
+          adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
+      }
+    }
+
+    // interpolation filter
+    if (s->filtermode == FILTER_SWITCHABLE) {
+        for (i = 0; i < 4; i++) {
+            uint8_t *pp = p->filter[i];
+            unsigned *c = s->counts.filter[i];
+
+            adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2], 20, 128);
+        }
+    }
+
+    // inter modes
+    for (i = 0; i < 7; i++) {
+        uint8_t *pp = p->mv_mode[i];
+        unsigned *c = s->counts.mv_mode[i];
+
+        adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[1], c[3], 20, 128);
+    }
+
+    // mv joints
+    {
+        uint8_t *pp = p->mv_joint;
+        unsigned *c = s->counts.mv_joint;
+
+        adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[2], c[3], 20, 128);
+    }
+
+    // mv components
+    for (i = 0; i < 2; i++) {
+        uint8_t *pp;
+        unsigned *c, (*c2)[2], sum;
+
+        adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
+                   s->counts.mv_comp[i].sign[1], 20, 128);
+
+        pp = p->mv_comp[i].classes;
+        c = s->counts.mv_comp[i].classes;
+        sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
+        adapt_prob(&pp[0], c[0], sum, 20, 128);
+        sum -= c[1];
+        adapt_prob(&pp[1], c[1], sum, 20, 128);
+        sum -= c[2] + c[3];
+        adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
+        adapt_prob(&pp[3], c[2], c[3], 20, 128);
+        sum -= c[4] + c[5];
+        adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
+        adapt_prob(&pp[5], c[4], c[5], 20, 128);
+        sum -= c[6];
+        adapt_prob(&pp[6], c[6], sum, 20, 128);
+        adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
+        adapt_prob(&pp[8], c[7], c[8], 20, 128);
+        adapt_prob(&pp[9], c[9], c[10], 20, 128);
+
+        adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
+                   s->counts.mv_comp[i].class0[1], 20, 128);
+        pp = p->mv_comp[i].bits;
+        c2 = s->counts.mv_comp[i].bits;
+        for (j = 0; j < 10; j++)
+            adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
+
+        for (j = 0; j < 2; j++) {
+            pp = p->mv_comp[i].class0_fp[j];
+            c = s->counts.mv_comp[i].class0_fp[j];
+            adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+            adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+            adapt_prob(&pp[2], c[2], c[3], 20, 128);
         }
+        pp = p->mv_comp[i].fp;
+        c = s->counts.mv_comp[i].fp;
+        adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
+        adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
+        adapt_prob(&pp[2], c[2], c[3], 20, 128);
 
-        ret = av_frame_ref(frame, s->refs[ref]);
-        if (ret < 0)
-            return ret;
+        if (s->highprecisionmvs) {
+            adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
+                       s->counts.mv_comp[i].class0_hp[1], 20, 128);
+            adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
+                       s->counts.mv_comp[i].hp[1], 20, 128);
+        }
+    }
+
+    // y intra modes
+    for (i = 0; i < 4; i++) {
+        uint8_t *pp = p->y_mode[i];
+        unsigned *c = s->counts.y_mode[i], sum, s2;
+
+        sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
+        adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
+        sum -= c[TM_VP8_PRED];
+        adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
+        sum -= c[VERT_PRED];
+        adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
+        s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
+        sum -= s2;
+        adapt_prob(&pp[3], s2, sum, 20, 128);
+        s2 -= c[HOR_PRED];
+        adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
+        adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
+        sum -= c[DIAG_DOWN_LEFT_PRED];
+        adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
+        sum -= c[VERT_LEFT_PRED];
+        adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
+        adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
+    }
+
+    // uv intra modes
+    for (i = 0; i < 10; i++) {
+        uint8_t *pp = p->uv_mode[i];
+        unsigned *c = s->counts.uv_mode[i], sum, s2;
+
+        sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
+        adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
+        sum -= c[TM_VP8_PRED];
+        adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
+        sum -= c[VERT_PRED];
+        adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
+        s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
+        sum -= s2;
+        adapt_prob(&pp[3], s2, sum, 20, 128);
+        s2 -= c[HOR_PRED];
+        adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
+        adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
+        sum -= c[DIAG_DOWN_LEFT_PRED];
+        adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
+        sum -= c[VERT_LEFT_PRED];
+        adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
+        adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
+    }
+}
+
+static void free_buffers(VP9Context *s)
+{
+    av_freep(&s->intra_pred_data[0]);
+    av_freep(&s->b_base);
+    av_freep(&s->block_base);
+}
+
+static av_cold int vp9_decode_free(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        if (s->frames[i].tf.f->data[0])
+            vp9_unref_frame(ctx, &s->frames[i]);
+        av_frame_free(&s->frames[i].tf.f);
+    }
+    for (i = 0; i < 8; i++) {
+        if (s->refs[i].f->data[0])
+            ff_thread_release_buffer(ctx, &s->refs[i]);
+        av_frame_free(&s->refs[i].f);
+        if (s->next_refs[i].f->data[0])
+            ff_thread_release_buffer(ctx, &s->next_refs[i]);
+        av_frame_free(&s->next_refs[i].f);
+    }
+    free_buffers(s);
+    av_freep(&s->c_b);
+    s->c_b_size = 0;
+
+    return 0;
+}
+
+
+static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
+                            int *got_frame, AVPacket *pkt)
+{
+    const uint8_t *data = pkt->data;
+    int size = pkt->size;
+    VP9Context *s = ctx->priv_data;
+    int res, tile_row, tile_col, i, ref, row, col;
+    ptrdiff_t yoff, uvoff, ls_y, ls_uv;
+    AVFrame *f;
+
+    if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
+        return res;
+    } else if (res == 0) {
+        if (!s->refs[ref].f->data[0]) {
+            av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
+            return AVERROR_INVALIDDATA;
+        }
+        if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
+            return res;
         *got_frame = 1;
         return 0;
     }
-    data += ret;
-    size -= ret;
-
-    s->cur_frame = frame;
+    data += res;
+    size -= res;
+
+    if (s->frames[LAST_FRAME].tf.f->data[0])
+        vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
+    if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
+        (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
+        return res;
+    if (s->frames[CUR_FRAME].tf.f->data[0])
+        vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
+    if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
+        return res;
+    f = s->frames[CUR_FRAME].tf.f;
+    f->key_frame = s->keyframe;
+    f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+    ls_y = f->linesize[0];
+    ls_uv =f->linesize[1];
 
-    av_frame_unref(s->cur_frame);
-    if ((ret = ff_get_buffer(avctx, s->cur_frame,
-                             s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
-        return ret;
-    s->cur_frame->key_frame = s->keyframe;
-    s->cur_frame->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
-                                          : AV_PICTURE_TYPE_P;
+    // ref frame setup
+    for (i = 0; i < 8; i++) {
+        if (s->next_refs[i].f->data[0])
+            ff_thread_release_buffer(ctx, &s->next_refs[i]);
+        if (s->refreshrefmask & (1 << i)) {
+            res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
+        } else {
+            res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
+        }
+        if (res < 0)
+            return res;
+    }
 
     // main tile decode loop
     memset(s->above_partition_ctx, 0, s->cols);
     memset(s->above_skip_ctx, 0, s->cols);
-    if (s->keyframe || s->intraonly)
+    if (s->keyframe || s->intraonly) {
         memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
-    else
+    } else {
         memset(s->above_mode_ctx, NEARESTMV, s->cols);
+    }
     memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
     memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
     memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
     memset(s->above_segpred_ctx, 0, s->cols);
-    for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
-        set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
-                        tile_row, s->tiling.log2_tile_rows, s->sb_rows);
-        for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
-            int64_t tile_size;
-
-            if (tile_col == s->tiling.tile_cols - 1 &&
-                tile_row == s->tiling.tile_rows - 1) {
-                tile_size = size;
-            } else {
-                tile_size = AV_RB32(data);
-                data     += 4;
-                size     -= 4;
-            }
-            if (tile_size > size)
-                return AVERROR_INVALIDDATA;
-            ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
-            if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
-                return AVERROR_INVALIDDATA;
-            data += tile_size;
-            size -= tile_size;
-        }
-
-        for (row = s->tiling.tile_row_start;
-             row < s->tiling.tile_row_end;
-             row += 8, yoff += s->cur_frame->linesize[0] * 64,
-             uvoff += s->cur_frame->linesize[1] * 32) {
-            VP9Filter *lflvl = s->lflvl;
-            ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
-
-            for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
-                set_tile_offset(&s->tiling.tile_col_start,
-                                &s->tiling.tile_col_end,
-                                tile_col, s->tiling.log2_tile_cols, s->sb_cols);
-
-                memset(s->left_partition_ctx, 0, 8);
-                memset(s->left_skip_ctx, 0, 8);
-                if (s->keyframe || s->intraonly)
-                    memset(s->left_mode_ctx, DC_PRED, 16);
-                else
-                    memset(s->left_mode_ctx, NEARESTMV, 8);
-                memset(s->left_y_nnz_ctx, 0, 16);
-                memset(s->left_uv_nnz_ctx, 0, 16);
-                memset(s->left_segpred_ctx, 0, 8);
-
-                memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
-                for (col = s->tiling.tile_col_start;
-                     col < s->tiling.tile_col_end;
-                     col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
-                    // FIXME integrate with lf code (i.e. zero after each
-                    // use, similar to invtxfm coefficients, or similar)
-                    memset(lflvl->mask, 0, sizeof(lflvl->mask));
-
-                    if ((ret = decode_subblock(avctx, row, col, lflvl,
-                                               yoff2, uvoff2, BL_64X64)) < 0)
-                        return ret;
-                }
-                memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
-            }
-
-            // backup pre-loopfilter reconstruction data for intra
-            // prediction of next row of sb64s
-            if (row + 8 < s->rows) {
-                memcpy(s->intra_pred_data[0],
-                       s->cur_frame->data[0] + yoff +
-                       63 * s->cur_frame->linesize[0],
-                       8 * s->cols);
-                memcpy(s->intra_pred_data[1],
-                       s->cur_frame->data[1] + uvoff +
-                       31 * s->cur_frame->linesize[1],
-                       4 * s->cols);
-                memcpy(s->intra_pred_data[2],
-                       s->cur_frame->data[2] + uvoff +
-                       31 * s->cur_frame->linesize[2],
-                       4 * s->cols);
-            }
-
-            // loopfilter one row
-            if (s->filter.level) {
-                yoff2  = yoff;
-                uvoff2 = uvoff;
-                lflvl  = s->lflvl;
-                for (col = 0; col < s->cols;
-                     col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
-                    loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
-            }
-        }
-    }
-
-    // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
-    // probability maintenance between frames)
-    if (s->refreshctx) {
-        if (s->parallelmode) {
-            int j, k, l, m;
-            for (i = 0; i < 4; i++) {
-                for (j = 0; j < 2; j++)
-                    for (k = 0; k < 2; k++)
-                        for (l = 0; l < 6; l++)
-                            for (m = 0; m < 6; m++)
-                                memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
-                                       s->prob.coef[i][j][k][l][m], 3);
-                if (s->txfmmode == i)
-                    break;
-            }
-            s->prob_ctx[s->framectxid].p = s->prob.p;
-        } else {
-            ff_vp9_adapt_probs(s);
-        }
+    s->pass = s->uses_2pass =
+        ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
+    if ((res = update_block_buffers(ctx)) < 0) {
+        av_log(ctx, AV_LOG_ERROR,
+               "Failed to allocate block buffers\n");
+        return res;
     }
-    FFSWAP(VP9MVRefPair *, s->mv[0], s->mv[1]);
+    if (s->refreshctx && s->parallelmode) {
+        int j, k, l, m;
 
-    // ref frame setup
-    for (i = 0; i < 8; i++)
-        if (s->refreshrefmask & (1 << i)) {
-            av_frame_unref(s->refs[i]);
-            ret = av_frame_ref(s->refs[i], s->cur_frame);
-            if (ret < 0)
-                return ret;
+        for (i = 0; i < 4; i++) {
+            for (j = 0; j < 2; j++)
+                for (k = 0; k < 2; k++)
+                    for (l = 0; l < 6; l++)
+                        for (m = 0; m < 6; m++)
+                            memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
+                                   s->prob.coef[i][j][k][l][m], 3);
+            if (s->txfmmode == i)
+                break;
         }
+        s->prob_ctx[s->framectxid].p = s->prob.p;
+        ff_thread_finish_setup(ctx);
+    }
 
-    if (s->invisible)
-        av_frame_unref(s->cur_frame);
-    else
-        *got_frame = 1;
+    do {
+        yoff = uvoff = 0;
+        s->b = s->b_base;
+        s->block = s->block_base;
+        s->uvblock[0] = s->uvblock_base[0];
+        s->uvblock[1] = s->uvblock_base[1];
+        s->eob = s->eob_base;
+        s->uveob[0] = s->uveob_base[0];
+        s->uveob[1] = s->uveob_base[1];
+
+        for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
+            set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
+                            tile_row, s->tiling.log2_tile_rows, s->sb_rows);
+            if (s->pass != 2) {
+                for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
+                    unsigned tile_size;
+
+                    if (tile_col == s->tiling.tile_cols - 1 &&
+                        tile_row == s->tiling.tile_rows - 1) {
+                        tile_size = size;
+                    } else {
+                        tile_size = AV_RB32(data);
+                        data += 4;
+                        size -= 4;
+                    }
+                    if (tile_size > size) {
+                        ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
+                        return AVERROR_INVALIDDATA;
+                    }
+                    ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
+                    if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
+                        ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
+                        return AVERROR_INVALIDDATA;
+                    }
+                    data += tile_size;
+                    size -= tile_size;
+                }
+            }
 
-    return 0;
-}
+            for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
+                 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
+                struct VP9Filter *lflvl_ptr = s->lflvl;
+                ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
 
-static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
-                             int *got_frame, AVPacket *avpkt)
-{
-    const uint8_t *data = avpkt->data;
-    int size            = avpkt->size;
-    int marker, ret;
-
-    /* Read superframe index - this is a collection of individual frames
-     * that together lead to one visible frame */
-    marker = data[size - 1];
-    if ((marker & 0xe0) == 0xc0) {
-        int nbytes   = 1 + ((marker >> 3) & 0x3);
-        int n_frames = 1 + (marker & 0x7);
-        int idx_sz   = 2 + n_frames * nbytes;
-
-        if (size >= idx_sz && data[size - idx_sz] == marker) {
-            const uint8_t *idx = data + size + 1 - idx_sz;
-
-            while (n_frames--) {
-                unsigned sz = AV_RL32(idx);
-
-                if (nbytes < 4)
-                    sz &= (1 << (8 * nbytes)) - 1;
-                idx += nbytes;
-
-                if (sz > size) {
-                    av_log(avctx, AV_LOG_ERROR,
-                           "Superframe packet size too big: %u > %d\n",
-                           sz, size);
-                    return AVERROR_INVALIDDATA;
+                for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
+                    set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
+                                    tile_col, s->tiling.log2_tile_cols, s->sb_cols);
+
+                    if (s->pass != 2) {
+                        memset(s->left_partition_ctx, 0, 8);
+                        memset(s->left_skip_ctx, 0, 8);
+                        if (s->keyframe || s->intraonly) {
+                            memset(s->left_mode_ctx, DC_PRED, 16);
+                        } else {
+                            memset(s->left_mode_ctx, NEARESTMV, 8);
+                        }
+                        memset(s->left_y_nnz_ctx, 0, 16);
+                        memset(s->left_uv_nnz_ctx, 0, 16);
+                        memset(s->left_segpred_ctx, 0, 8);
+
+                        memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
+                    }
+
+                    for (col = s->tiling.tile_col_start;
+                         col < s->tiling.tile_col_end;
+                         col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
+                        // FIXME integrate with lf code (i.e. zero after each
+                        // use, similar to invtxfm coefficients, or similar)
+                        if (s->pass != 1) {
+                            memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
+                        }
+
+                        if (s->pass == 2) {
+                            decode_sb_mem(ctx, row, col, lflvl_ptr,
+                                          yoff2, uvoff2, BL_64X64);
+                        } else {
+                            decode_sb(ctx, row, col, lflvl_ptr,
+                                      yoff2, uvoff2, BL_64X64);
+                        }
+                    }
+                    if (s->pass != 2) {
+                        memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
+                    }
+                }
+
+                if (s->pass == 1) {
+                    continue;
+                }
+
+                // backup pre-loopfilter reconstruction data for intra
+                // prediction of next row of sb64s
+                if (row + 8 < s->rows) {
+                    memcpy(s->intra_pred_data[0],
+                           f->data[0] + yoff + 63 * ls_y,
+                           8 * s->cols);
+                    memcpy(s->intra_pred_data[1],
+                           f->data[1] + uvoff + 31 * ls_uv,
+                           4 * s->cols);
+                    memcpy(s->intra_pred_data[2],
+                           f->data[2] + uvoff + 31 * ls_uv,
+                           4 * s->cols);
+                }
+
+                // loopfilter one row
+                if (s->filter.level) {
+                    yoff2 = yoff;
+                    uvoff2 = uvoff;
+                    lflvl_ptr = s->lflvl;
+                    for (col = 0; col < s->cols;
+                         col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
+                        loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
+                    }
                 }
 
-                ret = vp9_decode_frame(avctx, frame, got_frame, data, sz);
-                if (ret < 0)
-                    return ret;
-                data += sz;
-                size -= sz;
+                // FIXME maybe we can make this more finegrained by running the
+                // loopfilter per-block instead of after each sbrow
+                // In fact that would also make intra pred left preparation easier?
+                ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
             }
-            return size;
         }
+
+        if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
+            adapt_probs(s);
+            ff_thread_finish_setup(ctx);
+        }
+    } while (s->pass++ == 1);
+    ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
+
+    // ref frame setup
+    for (i = 0; i < 8; i++) {
+        if (s->refs[i].f->data[0])
+            ff_thread_release_buffer(ctx, &s->refs[i]);
+        ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
     }
 
-    /* If we get here, there was no valid superframe index, i.e. this is just
-     * one whole single frame. Decode it as such from the complete input buf. */
-    if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
-        return ret;
-    return size;
+    if (!s->invisible) {
+        if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
+            return res;
+        *got_frame = 1;
+    }
+
+    return 0;
 }
 
-static av_cold int vp9_decode_free(AVCodecContext *avctx)
+static void vp9_decode_flush(AVCodecContext *ctx)
 {
-    VP9Context *s = avctx->priv_data;
+    VP9Context *s = ctx->priv_data;
     int i;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
-        av_frame_free(&s->refs[i]);
+    for (i = 0; i < 2; i++)
+        vp9_unref_frame(ctx, &s->frames[i]);
+    for (i = 0; i < 8; i++)
+        ff_thread_release_buffer(ctx, &s->refs[i]);
+}
 
-    av_freep(&s->c_b);
-    av_freep(&s->above_partition_ctx);
+static int init_frames(AVCodecContext *ctx)
+{
+    VP9Context *s = ctx->priv_data;
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        s->frames[i].tf.f = av_frame_alloc();
+        if (!s->frames[i].tf.f) {
+            vp9_decode_free(ctx);
+            av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
+            return AVERROR(ENOMEM);
+        }
+    }
+    for (i = 0; i < 8; i++) {
+        s->refs[i].f = av_frame_alloc();
+        s->next_refs[i].f = av_frame_alloc();
+        if (!s->refs[i].f || !s->next_refs[i].f) {
+            vp9_decode_free(ctx);
+            av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
+            return AVERROR(ENOMEM);
+        }
+    }
 
     return 0;
 }
 
-static av_cold int vp9_decode_init(AVCodecContext *avctx)
+static av_cold int vp9_decode_init(AVCodecContext *ctx)
 {
-    VP9Context *s = avctx->priv_data;
-    int i;
-
-    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+    VP9Context *s = ctx->priv_data;
 
+    ctx->internal->allocate_progress = 1;
+    ctx->pix_fmt = AV_PIX_FMT_YUV420P;
     ff_vp9dsp_init(&s->dsp);
     ff_videodsp_init(&s->vdsp, 8);
+    s->filter.sharpness = -1;
 
-    for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
-        s->refs[i] = av_frame_alloc();
-        if (!s->refs[i]) {
-            vp9_decode_free(avctx);
-            return AVERROR(ENOMEM);
+    return init_frames(ctx);
+}
+
+static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
+{
+    return init_frames(avctx);
+}
+
+static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    int i, res;
+    VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
+
+    // detect size changes in other threads
+    if (s->intra_pred_data[0] &&
+        (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
+        free_buffers(s);
+    }
+
+    for (i = 0; i < 2; i++) {
+        if (s->frames[i].tf.f->data[0])
+            vp9_unref_frame(dst, &s->frames[i]);
+        if (ssrc->frames[i].tf.f->data[0]) {
+            if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
+                return res;
+        }
+    }
+    for (i = 0; i < 8; i++) {
+        if (s->refs[i].f->data[0])
+            ff_thread_release_buffer(dst, &s->refs[i]);
+        if (ssrc->next_refs[i].f->data[0]) {
+            if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
+                return res;
         }
     }
 
-    s->filter.sharpness = -1;
+    s->invisible = ssrc->invisible;
+    s->keyframe = ssrc->keyframe;
+    s->uses_2pass = ssrc->uses_2pass;
+    memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
+    memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
+    if (ssrc->segmentation.enabled) {
+        memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
+               sizeof(s->segmentation.feat));
+    }
 
     return 0;
 }
 
 AVCodec ff_vp9_decoder = {
-    .name           = "vp9",
-    .long_name      = NULL_IF_CONFIG_SMALL("Google VP9"),
-    .type           = AVMEDIA_TYPE_VIDEO,
-    .id             = AV_CODEC_ID_VP9,
-    .priv_data_size = sizeof(VP9Context),
-    .init           = vp9_decode_init,
-    .decode         = vp9_decode_packet,
-    .flush          = vp9_decode_flush,
-    .close          = vp9_decode_free,
-    .capabilities   = CODEC_CAP_DR1,
+    .name                  = "vp9",
+    .long_name             = NULL_IF_CONFIG_SMALL("Google VP9"),
+    .type                  = AVMEDIA_TYPE_VIDEO,
+    .id                    = AV_CODEC_ID_VP9,
+    .priv_data_size        = sizeof(VP9Context),
+    .init                  = vp9_decode_init,
+    .close                 = vp9_decode_free,
+    .decode                = vp9_decode_frame,
+    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
+    .flush                 = vp9_decode_flush,
+    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
 };
diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h
index 0a6c6ee..9a29416 100644
--- a/libavcodec/vp9.h
+++ b/libavcodec/vp9.h
@@ -4,34 +4,26 @@
  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
  * Copyright (C) 2013 Clément Bœsch <u pkh me>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef AVCODEC_VP9_H
 #define AVCODEC_VP9_H
 
-#include <stddef.h>
-#include <stdint.h>
-
-#include "libavutil/internal.h"
-
-#include "avcodec.h"
-#include "vp56.h"
-
 enum TxfmMode {
     TX_4X4,
     TX_8X8,
@@ -77,343 +69,4 @@ enum FilterMode {
     FILTER_SWITCHABLE,
 };
 
-enum BlockPartition {
-    PARTITION_NONE,    // [ ] <-.
-    PARTITION_H,       // [-]   |
-    PARTITION_V,       // [|]   |
-    PARTITION_SPLIT,   // [+] --'
-};
-
-enum InterPredMode {
-    NEARESTMV = 10,
-    NEARMV    = 11,
-    ZEROMV    = 12,
-    NEWMV     = 13,
-};
-
-enum MVJoint {
-    MV_JOINT_ZERO,
-    MV_JOINT_H,
-    MV_JOINT_V,
-    MV_JOINT_HV,
-};
-
-typedef struct ProbContext {
-    uint8_t y_mode[4][9];
-    uint8_t uv_mode[10][9];
-    uint8_t filter[4][2];
-    uint8_t mv_mode[7][3];
-    uint8_t intra[4];
-    uint8_t comp[5];
-    uint8_t single_ref[5][2];
-    uint8_t comp_ref[5];
-    uint8_t tx32p[2][3];
-    uint8_t tx16p[2][2];
-    uint8_t tx8p[2];
-    uint8_t skip[3];
-    uint8_t mv_joint[3];
-    struct {
-        uint8_t sign;
-        uint8_t classes[10];
-        uint8_t class0;
-        uint8_t bits[10];
-        uint8_t class0_fp[2][3];
-        uint8_t fp[3];
-        uint8_t class0_hp;
-        uint8_t hp;
-    } mv_comp[2];
-    uint8_t partition[4][4][3];
-} ProbContext;
-
-typedef void (*vp9_mc_func)(uint8_t *dst, const uint8_t *ref,
-                            ptrdiff_t dst_stride,
-                            ptrdiff_t ref_stride,
-                            int h, int mx, int my);
-
-typedef struct VP9DSPContext {
-    /*
-     * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32
-     * dimension 2: intra prediction modes
-     *
-     * dst/left/top is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
-     * stride is aligned by 16 pixels
-     * top[-1] is top/left; top[4,7] is top-right for 4x4
-     */
-    // FIXME(rbultje) maybe replace left/top pointers with HAVE_TOP/
-    // HAVE_LEFT/HAVE_TOPRIGHT flags instead, and then handle it in-place?
-    // also needs to fit in with what h264/vp8/etc do
-    void (*intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst,
-                                                         ptrdiff_t stride,
-                                                         const uint8_t *left,
-                                                         const uint8_t *top);
-
-    /*
-     * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32, 4=lossless (3-4=dct only)
-     * dimension 2: 0=dct/dct, 1=dct/adst, 2=adst/dct, 3=adst/adst
-     *
-     * dst is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
-     * stride is aligned by 16 pixels
-     * block is 16-byte aligned
-     * eob indicates the position (+1) of the last non-zero coefficient,
-     * in scan-order. This can be used to write faster versions, e.g. a
-     * dc-only 4x4/8x8/16x16/32x32, or a 4x4-only (eob<10) 8x8/16x16/32x32,
-     * etc.
-     */
-    // FIXME also write idct_add_block() versions for whole (inter) pred
-    // blocks, so we can do 2 4x4s at once
-    void (*itxfm_add[N_TXFM_SIZES + 1][N_TXFM_TYPES])(uint8_t *dst,
-                                                      ptrdiff_t stride,
-                                                      int16_t *block, int eob);
-
-    /*
-     * dimension 1: width of filter (0=4, 1=8, 2=16)
-     * dimension 2: 0=col-edge filter (h), 1=row-edge filter (v)
-     *
-     * dst/stride are aligned by 8
-     */
-    void (*loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride,
-                                int mb_lim, int lim, int hev_thr);
-
-    /*
-     * dimension 1: 0=col-edge filter (h), 1=row-edge filter (v)
-     *
-     * The width of filter is assumed to be 16; dst/stride are aligned by 16
-     */
-    void (*loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride,
-                              int mb_lim, int lim, int hev_thr);
-
-    /*
-     * dimension 1/2: width of filter (0=4, 1=8) for each filter half
-     * dimension 3: 0=col-edge filter (h), 1=row-edge filter (v)
-     *
-     * dst/stride are aligned by operation size
-     * this basically calls loop_filter[d1][d3][0](), followed by
-     * loop_filter[d2][d3][0]() on the next 8 pixels
-     * mb_lim/lim/hev_thr contain two values in the lowest two bytes of the
-     * integer.
-     */
-    // FIXME perhaps a mix4 that operates on 32px (for AVX2)
-    void (*loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride,
-                                      int mb_lim, int lim, int hev_thr);
-
-    /*
-     * dimension 1: hsize (0: 64, 1: 32, 2: 16, 3: 8, 4: 4)
-     * dimension 2: filter type (0: smooth, 1: regular, 2: sharp, 3: bilin)
-     * dimension 3: averaging type (0: put, 1: avg)
-     * dimension 4: x subpel interpolation (0: none, 1: 8tap/bilin)
-     * dimension 5: y subpel interpolation (1: none, 1: 8tap/bilin)
-     *
-     * dst/stride are aligned by hsize
-     */
-    vp9_mc_func mc[5][4][2][2][2];
-} VP9DSPContext;
-
-enum CompPredMode {
-    PRED_SINGLEREF,
-    PRED_COMPREF,
-    PRED_SWITCHABLE,
-};
-
-typedef struct VP9MVRefPair {
-    VP56mv mv[2];
-    int8_t ref[2];
-} VP9MVRefPair;
-
-typedef struct VP9Filter {
-    uint8_t level[8 * 8];
-    uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
-                              [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
-} VP9Filter;
-
-enum BlockLevel {
-    BL_64X64,
-    BL_32X32,
-    BL_16X16,
-    BL_8X8,
-};
-
-enum BlockSize {
-    BS_64x64,
-    BS_64x32,
-    BS_32x64,
-    BS_32x32,
-    BS_32x16,
-    BS_16x32,
-    BS_16x16,
-    BS_16x8,
-    BS_8x16,
-    BS_8x8,
-    BS_8x4,
-    BS_4x8,
-    BS_4x4,
-    N_BS_SIZES,
-};
-
-typedef struct VP9Block {
-    uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
-    enum FilterMode filter;
-    VP56mv mv[4 /* b_idx */][2 /* ref */];
-    enum BlockSize bs;
-    enum TxfmMode tx, uvtx;
-
-    int row, row7, col, col7;
-    uint8_t *dst[3];
-    ptrdiff_t y_stride, uv_stride;
-} VP9Block;
-
-typedef struct VP9Context {
-    VP9DSPContext dsp;
-    VideoDSPContext vdsp;
-    GetBitContext gb;
-    VP56RangeCoder c;
-    VP56RangeCoder *c_b;
-    unsigned c_b_size;
-    VP9Block b;
-
-    // bitstream header
-    uint8_t profile;
-    uint8_t keyframe, last_keyframe;
-    uint8_t invisible;
-    uint8_t use_last_frame_mvs;
-    uint8_t errorres;
-    uint8_t colorspace;
-    uint8_t fullrange;
-    uint8_t intraonly;
-    uint8_t resetctx;
-    uint8_t refreshrefmask;
-    uint8_t highprecisionmvs;
-    enum FilterMode filtermode;
-    uint8_t allowcompinter;
-    uint8_t fixcompref;
-    uint8_t refreshctx;
-    uint8_t parallelmode;
-    uint8_t framectxid;
-    uint8_t refidx[3];
-    uint8_t signbias[3];
-    uint8_t varcompref[2];
-    AVFrame *refs[8];
-    AVFrame *cur_frame;
-
-    struct {
-        uint8_t level;
-        int8_t sharpness;
-        uint8_t lim_lut[64];
-        uint8_t mblim_lut[64];
-    } filter;
-    struct {
-        uint8_t enabled;
-        int8_t mode[2];
-        int8_t ref[4];
-    } lf_delta;
-    uint8_t yac_qi;
-    int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
-    uint8_t lossless;
-    struct {
-        uint8_t enabled;
-        uint8_t temporal;
-        uint8_t absolute_vals;
-        uint8_t update_map;
-        struct {
-            uint8_t q_enabled;
-            uint8_t lf_enabled;
-            uint8_t ref_enabled;
-            uint8_t skip_enabled;
-            uint8_t ref_val;
-            int16_t q_val;
-            int8_t lf_val;
-            int16_t qmul[2][2];
-            uint8_t lflvl[4][2];
-        } feat[8];
-    } segmentation;
-    struct {
-        unsigned log2_tile_cols, log2_tile_rows;
-        unsigned tile_cols, tile_rows;
-        unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
-    } tiling;
-    unsigned sb_cols, sb_rows, rows, cols;
-    struct {
-        ProbContext p;
-        uint8_t coef[4][2][2][6][6][3];
-    } prob_ctx[4];
-    struct {
-        ProbContext p;
-        uint8_t coef[4][2][2][6][6][11];
-        uint8_t seg[7];
-        uint8_t segpred[3];
-    } prob;
-    struct {
-        unsigned y_mode[4][10];
-        unsigned uv_mode[10][10];
-        unsigned filter[4][3];
-        unsigned mv_mode[7][4];
-        unsigned intra[4][2];
-        unsigned comp[5][2];
-        unsigned single_ref[5][2][2];
-        unsigned comp_ref[5][2];
-        unsigned tx32p[2][4];
-        unsigned tx16p[2][3];
-        unsigned tx8p[2][2];
-        unsigned skip[3][2];
-        unsigned mv_joint[4];
-        struct {
-            unsigned sign[2];
-            unsigned classes[11];
-            unsigned class0[2];
-            unsigned bits[10][2];
-            unsigned class0_fp[2][4];
-            unsigned fp[4];
-            unsigned class0_hp[2];
-            unsigned hp[2];
-        } mv_comp[2];
-        unsigned partition[4][4][4];
-        unsigned coef[4][2][2][6][6][3];
-        unsigned eob[4][2][2][6][6][2];
-    } counts;
-    enum TxfmMode txfmmode;
-    enum CompPredMode comppredmode;
-
-    // contextual (left/above) cache
-    uint8_t left_partition_ctx[8], *above_partition_ctx;
-    uint8_t left_mode_ctx[16], *above_mode_ctx;
-    // FIXME maybe merge some of the below in a flags field?
-    uint8_t left_y_nnz_ctx[16], *above_y_nnz_ctx;
-    uint8_t left_uv_nnz_ctx[2][8], *above_uv_nnz_ctx[2];
-    uint8_t left_skip_ctx[8], *above_skip_ctx; // 1bit
-    uint8_t left_txfm_ctx[8], *above_txfm_ctx; // 2bit
-    uint8_t left_segpred_ctx[8], *above_segpred_ctx; // 1bit
-    uint8_t left_intra_ctx[8], *above_intra_ctx; // 1bit
-    uint8_t left_comp_ctx[8], *above_comp_ctx; // 1bit
-    uint8_t left_ref_ctx[8], *above_ref_ctx; // 2bit
-    uint8_t left_filter_ctx[8], *above_filter_ctx;
-    VP56mv left_mv_ctx[16][2], (*above_mv_ctx)[2];
-
-    // whole-frame cache
-    uint8_t *intra_pred_data[3];
-    uint8_t *segmentation_map;
-    VP9MVRefPair *mv[2];
-    VP9Filter *lflvl;
-    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71 * 80];
-
-    // block reconstruction intermediates
-    DECLARE_ALIGNED(32, int16_t, block)[4096];
-    DECLARE_ALIGNED(32, int16_t, uvblock)[2][1024];
-    uint8_t eob[256];
-    uint8_t uveob[2][64];
-    VP56mv min_mv, max_mv;
-    DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
-    DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32 * 32];
-} VP9Context;
-
-void ff_vp9dsp_init(VP9DSPContext *dsp);
-
-void ff_vp9dsp_init_x86(VP9DSPContext *dsp);
-
-void ff_vp9_fill_mv(VP9Context *s, VP56mv *mv, int mode, int sb);
-
-void ff_vp9_adapt_probs(VP9Context *s);
-
-int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
-                        VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
-                        enum BlockLevel bl, enum BlockPartition bp);
-
 #endif /* AVCODEC_VP9_H */
diff --git a/libavcodec/vp9_parser.c b/libavcodec/vp9_parser.c
new file mode 100644
index 0000000..af033c2
--- /dev/null
+++ b/libavcodec/vp9_parser.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2008 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/intreadwrite.h"
+#include "parser.h"
+
+typedef struct VP9ParseContext {
+    int n_frames; // 1-8
+    int size[8];
+} VP9ParseContext;
+
+static void parse_frame(AVCodecParserContext *ctx, const uint8_t *buf, int size)
+{
+    if (buf[0] & 0x4) {
+        ctx->pict_type = AV_PICTURE_TYPE_P;
+        ctx->key_frame = 0;
+    } else {
+        ctx->pict_type = AV_PICTURE_TYPE_I;
+        ctx->key_frame = 1;
+    }
+}
+
+static int parse(AVCodecParserContext *ctx,
+                 AVCodecContext *avctx,
+                 const uint8_t **out_data, int *out_size,
+                 const uint8_t *data, int size)
+{
+    VP9ParseContext *s = ctx->priv_data;
+    int marker;
+
+    if (size <= 0) {
+        *out_size = 0;
+        *out_data = data;
+
+        return 0;
+    }
+
+    if (s->n_frames > 0) {
+        *out_data = data;
+        *out_size = s->size[--s->n_frames];
+        parse_frame(ctx, *out_data, *out_size);
+
+        return s->n_frames > 0 ? *out_size : size /* i.e. include idx tail */;
+    }
+
+    marker = data[size - 1];
+    if ((marker & 0xe0) == 0xc0) {
+        int nbytes = 1 + ((marker >> 3) & 0x3);
+        int n_frames = 1 + (marker & 0x7), idx_sz = 2 + n_frames * nbytes;
+
+        if (size >= idx_sz && data[size - idx_sz] == marker) {
+            const uint8_t *idx = data + size + 1 - idx_sz;
+            int first = 1;
+
+            switch (nbytes) {
+#define case_n(a, rd) \
+            case a: \
+                while (n_frames--) { \
+                    unsigned sz = rd; \
+                    idx += a; \
+                    if (sz > size) { \
+                        s->n_frames = 0; \
+                        av_log(avctx, AV_LOG_ERROR, \
+                               "Superframe packet size too big: %u > %d\n", \
+                               sz, size); \
+                        return size; \
+                    } \
+                    if (first) { \
+                        first = 0; \
+                        *out_data = data; \
+                        *out_size = sz; \
+                        s->n_frames = n_frames; \
+                    } else { \
+                        s->size[n_frames] = sz; \
+                    } \
+                    data += sz; \
+                    size -= sz; \
+                } \
+                parse_frame(ctx, *out_data, *out_size); \
+                return *out_size
+
+                case_n(1, *idx);
+                case_n(2, AV_RL16(idx));
+                case_n(3, AV_RL24(idx));
+                case_n(4, AV_RL32(idx));
+            }
+        }
+    }
+
+    *out_data = data;
+    *out_size = size;
+    parse_frame(ctx, data, size);
+
+    return size;
+}
+
+AVCodecParser ff_vp9_parser = {
+    .codec_ids      = { AV_CODEC_ID_VP9 },
+    .priv_data_size = sizeof(VP9ParseContext),
+    .parser_parse   = parse,
+};
diff --git a/libavcodec/vp9block.c b/libavcodec/vp9block.c
deleted file mode 100644
index caf3bcc..0000000
--- a/libavcodec/vp9block.c
+++ /dev/null
@@ -1,1684 +0,0 @@
-/*
- * VP9 compatible video decoder
- *
- * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
- * Copyright (C) 2013 Clément Bœsch <u pkh me>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/avassert.h"
-
-#include "avcodec.h"
-#include "get_bits.h"
-#include "internal.h"
-#include "videodsp.h"
-#include "vp56.h"
-#include "vp9.h"
-#include "vp9data.h"
-
-static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
-    {
-        { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
-        {  4,  4 }, {  4, 2 }, { 2,  4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
-    },  {
-        {  8,  8 }, {  8, 4 }, { 4,  8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
-        {  2,  2 }, {  2, 1 }, { 1,  2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
-    }
-};
-
-// differential forward probability updates
-static void decode_mode(VP9Context *s, VP9Block *const b)
-{
-    static const uint8_t left_ctx[N_BS_SIZES] = {
-        0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
-    };
-    static const uint8_t above_ctx[N_BS_SIZES] = {
-        0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
-    };
-    static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
-        TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
-        TX_16X16, TX_8X8,   TX_8X8,   TX_8X8,   TX_4X4,   TX_4X4,  TX_4X4
-    };
-    int row = b->row, col = b->col, row7 = b->row7;
-    enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
-    int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
-    int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]);
-    int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
-    int y;
-
-    if (!s->segmentation.enabled) {
-        b->seg_id = 0;
-    } else if (s->keyframe || s->intraonly) {
-        b->seg_id = s->segmentation.update_map ?
-                    vp8_rac_get_tree(&s->c, ff_vp9_segmentation_tree, s->prob.seg) : 0;
-    } else if (!s->segmentation.update_map ||
-               (s->segmentation.temporal &&
-                vp56_rac_get_prob_branchy(&s->c,
-                                          s->prob.segpred[s->above_segpred_ctx[col] +
-                                                          s->left_segpred_ctx[row7]]))) {
-        int pred = 8, x;
-
-        for (y = 0; y < h4; y++)
-            for (x = 0; x < w4; x++)
-                pred = FFMIN(pred,
-                             s->segmentation_map[(y + row) * 8 * s->sb_cols + x + col]);
-        b->seg_id = pred;
-
-        memset(&s->above_segpred_ctx[col], 1, w4);
-        memset(&s->left_segpred_ctx[row7], 1, h4);
-    } else {
-        b->seg_id = vp8_rac_get_tree(&s->c, ff_vp9_segmentation_tree,
-                                     s->prob.seg);
-
-        memset(&s->above_segpred_ctx[col], 0, w4);
-        memset(&s->left_segpred_ctx[row7], 0, h4);
-    }
-    if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
-        for (y = 0; y < h4; y++)
-            memset(&s->segmentation_map[(y + row) * 8 * s->sb_cols + col],
-                   b->seg_id, w4);
-    }
-
-    b->skip = s->segmentation.enabled &&
-              s->segmentation.feat[b->seg_id].skip_enabled;
-    if (!b->skip) {
-        int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
-        b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
-        s->counts.skip[c][b->skip]++;
-    }
-
-    if (s->keyframe || s->intraonly) {
-        b->intra = 1;
-    } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
-        b->intra = !s->segmentation.feat[b->seg_id].ref_val;
-    } else {
-        int c, bit;
-
-        if (have_a && have_l) {
-            c  = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
-            c += (c == 2);
-        } else {
-            c = have_a ? 2 * s->above_intra_ctx[col] :
-                have_l ? 2 * s->left_intra_ctx[row7] : 0;
-        }
-        bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
-        s->counts.intra[c][bit]++;
-        b->intra = !bit;
-    }
-
-    if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
-        int c;
-        if (have_a) {
-            if (have_l) {
-                c = (s->above_skip_ctx[col] ? max_tx :
-                     s->above_txfm_ctx[col]) +
-                    (s->left_skip_ctx[row7] ? max_tx :
-                     s->left_txfm_ctx[row7]) > max_tx;
-            } else {
-                c = s->above_skip_ctx[col] ? 1 :
-                    (s->above_txfm_ctx[col] * 2 > max_tx);
-            }
-        } else if (have_l) {
-            c = s->left_skip_ctx[row7] ? 1 :
-                (s->left_txfm_ctx[row7] * 2 > max_tx);
-        } else {
-            c = 1;
-        }
-        switch (max_tx) {
-        case TX_32X32:
-            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
-            if (b->tx) {
-                b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
-                if (b->tx == 2)
-                    b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
-            }
-            s->counts.tx32p[c][b->tx]++;
-            break;
-        case TX_16X16:
-            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
-            if (b->tx)
-                b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
-            s->counts.tx16p[c][b->tx]++;
-            break;
-        case TX_8X8:
-            b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
-            s->counts.tx8p[c][b->tx]++;
-            break;
-        case TX_4X4:
-            b->tx = TX_4X4;
-            break;
-        }
-    } else {
-        b->tx = FFMIN(max_tx, s->txfmmode);
-    }
-
-    if (s->keyframe || s->intraonly) {
-        uint8_t *a = &s->above_mode_ctx[col * 2];
-        uint8_t *l = &s->left_mode_ctx[(row7) << 1];
-
-        b->comp = 0;
-        if (b->bs > BS_8x8) {
-            // FIXME the memory storage intermediates here aren't really
-            // necessary, they're just there to make the code slightly
-            // simpler for now
-            b->mode[0] =
-            a[0]       = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                          ff_vp9_default_kf_ymode_probs[a[0]][l[0]]);
-            if (b->bs != BS_8x4) {
-                b->mode[1] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                              ff_vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
-                l[0]       =
-                a[1]       = b->mode[1];
-            } else {
-                l[0]       =
-                a[1]       =
-                b->mode[1] = b->mode[0];
-            }
-            if (b->bs != BS_4x8) {
-                b->mode[2] =
-                a[0]       = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                              ff_vp9_default_kf_ymode_probs[a[0]][l[1]]);
-                if (b->bs != BS_8x4) {
-                    b->mode[3] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                                  ff_vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
-                    l[1]       =
-                    a[1]       = b->mode[3];
-                } else {
-                    l[1]       =
-                    a[1]       =
-                    b->mode[3] = b->mode[2];
-                }
-            } else {
-                b->mode[2] = b->mode[0];
-                l[1]       =
-                a[1]       =
-                b->mode[3] = b->mode[1];
-            }
-        } else {
-            b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                          ff_vp9_default_kf_ymode_probs[*a][*l]);
-            b->mode[3] =
-            b->mode[2] =
-            b->mode[1] = b->mode[0];
-            // FIXME this can probably be optimized
-            memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
-            memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
-        }
-        b->uvmode = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                     ff_vp9_default_kf_uvmode_probs[b->mode[3]]);
-    } else if (b->intra) {
-        b->comp = 0;
-        if (b->bs > BS_8x8) {
-            b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                          s->prob.p.y_mode[0]);
-            s->counts.y_mode[0][b->mode[0]]++;
-            if (b->bs != BS_8x4) {
-                b->mode[1] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                              s->prob.p.y_mode[0]);
-                s->counts.y_mode[0][b->mode[1]]++;
-            } else {
-                b->mode[1] = b->mode[0];
-            }
-            if (b->bs != BS_4x8) {
-                b->mode[2] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                              s->prob.p.y_mode[0]);
-                s->counts.y_mode[0][b->mode[2]]++;
-                if (b->bs != BS_8x4) {
-                    b->mode[3] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                                  s->prob.p.y_mode[0]);
-                    s->counts.y_mode[0][b->mode[3]]++;
-                } else {
-                    b->mode[3] = b->mode[2];
-                }
-            } else {
-                b->mode[2] = b->mode[0];
-                b->mode[3] = b->mode[1];
-            }
-        } else {
-            static const uint8_t size_group[10] = {
-                3, 3, 3, 3, 2, 2, 2, 1, 1, 1
-            };
-            int sz = size_group[b->bs];
-
-            b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                          s->prob.p.y_mode[sz]);
-            b->mode[1] =
-            b->mode[2] =
-            b->mode[3] = b->mode[0];
-            s->counts.y_mode[sz][b->mode[3]]++;
-        }
-        b->uvmode = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
-                                     s->prob.p.uv_mode[b->mode[3]]);
-        s->counts.uv_mode[b->mode[3]][b->uvmode]++;
-    } else {
-        static const uint8_t inter_mode_ctx_lut[14][14] = {
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
-            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
-            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
-            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
-            { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
-        };
-
-        if (s->segmentation.feat[b->seg_id].ref_enabled) {
-            av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
-            b->comp   = 0;
-            b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
-        } else {
-            // read comp_pred flag
-            if (s->comppredmode != PRED_SWITCHABLE) {
-                b->comp = s->comppredmode == PRED_COMPREF;
-            } else {
-                int c;
-
-                // FIXME add intra as ref=0xff (or -1) to make these easier?
-                if (have_a) {
-                    if (have_l) {
-                        if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
-                            c = 4;
-                        } else if (s->above_comp_ctx[col]) {
-                            c = 2 + (s->left_intra_ctx[row7] ||
-                                     s->left_ref_ctx[row7] == s->fixcompref);
-                        } else if (s->left_comp_ctx[row7]) {
-                            c = 2 + (s->above_intra_ctx[col] ||
-                                     s->above_ref_ctx[col] == s->fixcompref);
-                        } else {
-                            c = (!s->above_intra_ctx[col] &&
-                                 s->above_ref_ctx[col] == s->fixcompref) ^
-                                (!s->left_intra_ctx[row7] &&
-                                 s->left_ref_ctx[row & 7] == s->fixcompref);
-                        }
-                    } else {
-                        c = s->above_comp_ctx[col] ? 3 :
-                            (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
-                    }
-                } else if (have_l) {
-                    c = s->left_comp_ctx[row7] ? 3 :
-                        (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
-                } else {
-                    c = 1;
-                }
-                b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
-                s->counts.comp[c][b->comp]++;
-            }
-
-            // read actual references
-            // FIXME probably cache a few variables here to prevent repetitive
-            // memory accesses below
-            if (b->comp) { /* two references */
-                int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
-
-                b->ref[fix_idx] = s->fixcompref;
-                // FIXME can this codeblob be replaced by some sort of LUT?
-                if (have_a) {
-                    if (have_l) {
-                        if (s->above_intra_ctx[col]) {
-                            if (s->left_intra_ctx[row7]) {
-                                c = 2;
-                            } else {
-                                c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
-                            }
-                        } else if (s->left_intra_ctx[row7]) {
-                            c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
-                        } else {
-                            int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
-
-                            if (refl == refa && refa == s->varcompref[1]) {
-                                c = 0;
-                            } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
-                                if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
-                                    (refl == s->fixcompref && refa == s->varcompref[0])) {
-                                    c = 4;
-                                } else {
-                                    c = (refa == refl) ? 3 : 1;
-                                }
-                            } else if (!s->left_comp_ctx[row7]) {
-                                if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
-                                    c = 1;
-                                } else {
-                                    c = (refl == s->varcompref[1] &&
-                                         refa != s->varcompref[1]) ? 2 : 4;
-                                }
-                            } else if (!s->above_comp_ctx[col]) {
-                                if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
-                                    c = 1;
-                                } else {
-                                    c = (refa == s->varcompref[1] &&
-                                         refl != s->varcompref[1]) ? 2 : 4;
-                                }
-                            } else {
-                                c = (refl == refa) ? 4 : 2;
-                            }
-                        }
-                    } else {
-                        if (s->above_intra_ctx[col]) {
-                            c = 2;
-                        } else if (s->above_comp_ctx[col]) {
-                            c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
-                        } else {
-                            c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
-                        }
-                    }
-                } else if (have_l) {
-                    if (s->left_intra_ctx[row7]) {
-                        c = 2;
-                    } else if (s->left_comp_ctx[row7]) {
-                        c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
-                    } else {
-                        c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
-                    }
-                } else {
-                    c = 2;
-                }
-                bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
-                b->ref[var_idx] = s->varcompref[bit];
-                s->counts.comp_ref[c][bit]++;
-            } else { /* single reference */
-                int bit, c;
-
-                if (have_a && !s->above_intra_ctx[col]) {
-                    if (have_l && !s->left_intra_ctx[row7]) {
-                        if (s->left_comp_ctx[row7]) {
-                            if (s->above_comp_ctx[col]) {
-                                c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
-                                         !s->above_ref_ctx[col]);
-                            } else {
-                                c = (3 * !s->above_ref_ctx[col]) +
-                                    (!s->fixcompref || !s->left_ref_ctx[row7]);
-                            }
-                        } else if (s->above_comp_ctx[col]) {
-                            c = (3 * !s->left_ref_ctx[row7]) +
-                                (!s->fixcompref || !s->above_ref_ctx[col]);
-                        } else {
-                            c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
-                        }
-                    } else if (s->above_intra_ctx[col]) {
-                        c = 2;
-                    } else if (s->above_comp_ctx[col]) {
-                        c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
-                    } else {
-                        c = 4 * (!s->above_ref_ctx[col]);
-                    }
-                } else if (have_l && !s->left_intra_ctx[row7]) {
-                    if (s->left_intra_ctx[row7]) {
-                        c = 2;
-                    } else if (s->left_comp_ctx[row7]) {
-                        c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
-                    } else {
-                        c = 4 * (!s->left_ref_ctx[row7]);
-                    }
-                } else {
-                    c = 2;
-                }
-                bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
-                s->counts.single_ref[c][0][bit]++;
-                if (!bit) {
-                    b->ref[0] = 0;
-                } else {
-                    // FIXME can this codeblob be replaced by some sort of LUT?
-                    if (have_a) {
-                        if (have_l) {
-                            if (s->left_intra_ctx[row7]) {
-                                if (s->above_intra_ctx[col]) {
-                                    c = 2;
-                                } else if (s->above_comp_ctx[col]) {
-                                    c = 1 + 2 * (s->fixcompref == 1 ||
-                                                 s->above_ref_ctx[col] == 1);
-                                } else if (!s->above_ref_ctx[col]) {
-                                    c = 3;
-                                } else {
-                                    c = 4 * (s->above_ref_ctx[col] == 1);
-                                }
-                            } else if (s->above_intra_ctx[col]) {
-                                if (s->left_intra_ctx[row7]) {
-                                    c = 2;
-                                } else if (s->left_comp_ctx[row7]) {
-                                    c = 1 + 2 * (s->fixcompref == 1 ||
-                                                 s->left_ref_ctx[row7] == 1);
-                                } else if (!s->left_ref_ctx[row7]) {
-                                    c = 3;
-                                } else {
-                                    c = 4 * (s->left_ref_ctx[row7] == 1);
-                                }
-                            } else if (s->above_comp_ctx[col]) {
-                                if (s->left_comp_ctx[row7]) {
-                                    if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
-                                        c = 3 * (s->fixcompref == 1 ||
-                                                 s->left_ref_ctx[row7] == 1);
-                                    } else {
-                                        c = 2;
-                                    }
-                                } else if (!s->left_ref_ctx[row7]) {
-                                    c = 1 + 2 * (s->fixcompref == 1 ||
-                                                 s->above_ref_ctx[col] == 1);
-                                } else {
-                                    c = 3 * (s->left_ref_ctx[row7] == 1) +
-                                        (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
-                                }
-                            } else if (s->left_comp_ctx[row7]) {
-                                if (!s->above_ref_ctx[col]) {
-                                    c = 1 + 2 * (s->fixcompref == 1 ||
-                                                 s->left_ref_ctx[row7] == 1);
-                                } else {
-                                    c = 3 * (s->above_ref_ctx[col] == 1) +
-                                        (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
-                                }
-                            } else if (!s->above_ref_ctx[col]) {
-                                if (!s->left_ref_ctx[row7]) {
-                                    c = 3;
-                                } else {
-                                    c = 4 * (s->left_ref_ctx[row7] == 1);
-                                }
-                            } else if (!s->left_ref_ctx[row7]) {
-                                c = 4 * (s->above_ref_ctx[col] == 1);
-                            } else {
-                                c = 2 * (s->left_ref_ctx[row7] == 1) +
-                                    2 * (s->above_ref_ctx[col] == 1);
-                            }
-                        } else {
-                            if (s->above_intra_ctx[col] ||
-                                (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
-                                c = 2;
-                            } else if (s->above_comp_ctx[col]) {
-                                c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
-                            } else {
-                                c = 4 * (s->above_ref_ctx[col] == 1);
-                            }
-                        }
-                    } else if (have_l) {
-                        if (s->left_intra_ctx[row7] ||
-                            (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
-                            c = 2;
-                        } else if (s->left_comp_ctx[row7]) {
-                            c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
-                        } else {
-                            c = 4 * (s->left_ref_ctx[row7] == 1);
-                        }
-                    } else {
-                        c = 2;
-                    }
-                    bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
-                    s->counts.single_ref[c][1][bit]++;
-                    b->ref[0] = 1 + bit;
-                }
-            }
-        }
-
-        if (b->bs <= BS_8x8) {
-            if (s->segmentation.feat[b->seg_id].skip_enabled) {
-                b->mode[0] =
-                b->mode[1] =
-                b->mode[2] =
-                b->mode[3] = ZEROMV;
-            } else {
-                static const uint8_t off[10] = {
-                    3, 0, 0, 1, 0, 0, 0, 0, 0, 0
-                };
-
-                // FIXME this needs to use the LUT tables from find_ref_mvs
-                // because not all are -1,0/0,-1
-                int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
-                                          [s->left_mode_ctx[row7 + off[b->bs]]];
-
-                b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree,
-                                              s->prob.p.mv_mode[c]);
-                b->mode[1] =
-                b->mode[2] =
-                b->mode[3] = b->mode[0];
-                s->counts.mv_mode[c][b->mode[0] - 10]++;
-            }
-        }
-
-        if (s->filtermode == FILTER_SWITCHABLE) {
-            int c;
-
-            if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
-                if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
-                    c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
-                        s->left_filter_ctx[row7] : 3;
-                } else {
-                    c = s->above_filter_ctx[col];
-                }
-            } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
-                c = s->left_filter_ctx[row7];
-            } else {
-                c = 3;
-            }
-
-            b->filter = vp8_rac_get_tree(&s->c, ff_vp9_filter_tree,
-                                         s->prob.p.filter[c]);
-            s->counts.filter[c][b->filter]++;
-        } else {
-            b->filter = s->filtermode;
-        }
-
-        if (b->bs > BS_8x8) {
-            int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
-
-            b->mode[0] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree,
-                                          s->prob.p.mv_mode[c]);
-            s->counts.mv_mode[c][b->mode[0] - 10]++;
-            ff_vp9_fill_mv(s, b->mv[0], b->mode[0], 0);
-
-            if (b->bs != BS_8x4) {
-                b->mode[1] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree,
-                                              s->prob.p.mv_mode[c]);
-                s->counts.mv_mode[c][b->mode[1] - 10]++;
-                ff_vp9_fill_mv(s, b->mv[1], b->mode[1], 1);
-            } else {
-                b->mode[1] = b->mode[0];
-                AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
-                AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
-            }
-
-            if (b->bs != BS_4x8) {
-                b->mode[2] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree,
-                                              s->prob.p.mv_mode[c]);
-                s->counts.mv_mode[c][b->mode[2] - 10]++;
-                ff_vp9_fill_mv(s, b->mv[2], b->mode[2], 2);
-
-                if (b->bs != BS_8x4) {
-                    b->mode[3] = vp8_rac_get_tree(&s->c, ff_vp9_inter_mode_tree,
-                                                  s->prob.p.mv_mode[c]);
-                    s->counts.mv_mode[c][b->mode[3] - 10]++;
-                    ff_vp9_fill_mv(s, b->mv[3], b->mode[3], 3);
-                } else {
-                    b->mode[3] = b->mode[2];
-                    AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
-                    AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
-                }
-            } else {
-                b->mode[2] = b->mode[0];
-                AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
-                AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
-                b->mode[3] = b->mode[1];
-                AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
-                AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
-            }
-        } else {
-            ff_vp9_fill_mv(s, b->mv[0], b->mode[0], -1);
-            AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
-            AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
-            AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
-            AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
-            AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
-            AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
-        }
-    }
-
-    // FIXME this can probably be optimized
-    memset(&s->above_skip_ctx[col], b->skip, w4);
-    memset(&s->left_skip_ctx[row7], b->skip, h4);
-    memset(&s->above_txfm_ctx[col], b->tx, w4);
-    memset(&s->left_txfm_ctx[row7], b->tx, h4);
-    memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
-    memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
-    if (!s->keyframe && !s->intraonly) {
-        memset(&s->above_intra_ctx[col], b->intra, w4);
-        memset(&s->left_intra_ctx[row7], b->intra, h4);
-        memset(&s->above_comp_ctx[col], b->comp, w4);
-        memset(&s->left_comp_ctx[row7], b->comp, h4);
-        memset(&s->above_mode_ctx[col], b->mode[3], w4);
-        memset(&s->left_mode_ctx[row7], b->mode[3], h4);
-        if (s->filtermode == FILTER_SWITCHABLE && !b->intra) {
-            memset(&s->above_filter_ctx[col], b->filter, w4);
-            memset(&s->left_filter_ctx[row7], b->filter, h4);
-            b->filter = ff_vp9_filter_lut[b->filter];
-        }
-        if (b->bs > BS_8x8) {
-            int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
-
-            AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
-            AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
-            AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
-            AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
-            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
-            AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
-            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
-            AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
-        } else {
-            int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
-
-            for (n = 0; n < w4 * 2; n++) {
-                AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
-                AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
-            }
-            for (n = 0; n < h4 * 2; n++) {
-                AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
-                AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
-            }
-        }
-
-        if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
-                         // as a direct check in above branches
-            int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
-
-            memset(&s->above_ref_ctx[col], vref, w4);
-            memset(&s->left_ref_ctx[row7], vref, h4);
-        }
-    }
-
-    // FIXME kinda ugly
-    for (y = 0; y < h4; y++) {
-        int x, o = (row + y) * s->sb_cols * 8 + col;
-
-        if (b->intra) {
-            for (x = 0; x < w4; x++) {
-                s->mv[0][o + x].ref[0] =
-                s->mv[0][o + x].ref[1] = -1;
-            }
-        } else if (b->comp) {
-            for (x = 0; x < w4; x++) {
-                s->mv[0][o + x].ref[0] = b->ref[0];
-                s->mv[0][o + x].ref[1] = b->ref[1];
-                AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
-                AV_COPY32(&s->mv[0][o + x].mv[1], &b->mv[3][1]);
-            }
-        } else {
-            for (x = 0; x < w4; x++) {
-                s->mv[0][o + x].ref[0] = b->ref[0];
-                s->mv[0][o + x].ref[1] = -1;
-                AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
-            }
-        }
-    }
-}
-
-// FIXME remove tx argument, and merge cnt/eob arguments?
-static int decode_block_coeffs(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
-                               enum TxfmMode tx, unsigned (*cnt)[6][3],
-                               unsigned (*eob)[6][2], uint8_t(*p)[6][11],
-                               int nnz, const int16_t *scan,
-                               const int16_t(*nb)[2],
-                               const int16_t *band_counts, const int16_t *qmul)
-{
-    int i = 0, band = 0, band_left = band_counts[band];
-    uint8_t *tp = p[0][nnz];
-    uint8_t cache[1024];
-
-    do {
-        int val, rc;
-
-        val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
-        eob[band][nnz][val]++;
-        if (!val)
-            break;
-
-skip_eob:
-        if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
-            cnt[band][nnz][0]++;
-            if (!--band_left)
-                band_left = band_counts[++band];
-            cache[scan[i]] = 0;
-            nnz            = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
-            tp             = p[band][nnz];
-            if (++i == n_coeffs)
-                break;  //invalid input; blocks should end with EOB
-            goto skip_eob;
-        }
-
-        rc = scan[i];
-        if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
-            cnt[band][nnz][1]++;
-            val       = 1;
-            cache[rc] = 1;
-        } else {
-            // fill in p[3-10] (model fill) - only once per frame for each pos
-            if (!tp[3])
-                memcpy(&tp[3], ff_vp9_model_pareto8[tp[2]], 8);
-
-            cnt[band][nnz][2]++;
-            if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
-                if (!vp56_rac_get_prob_branchy(c, tp[4])) {
-                    cache[rc] = val = 2;
-                } else {
-                    val       = 3 + vp56_rac_get_prob(c, tp[5]);
-                    cache[rc] = 3;
-                }
-            } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
-                cache[rc] = 4;
-                if (!vp56_rac_get_prob_branchy(c, tp[7])) {
-                    val  =  vp56_rac_get_prob(c, 159) + 5;
-                } else {
-                    val  = (vp56_rac_get_prob(c, 165) << 1) + 7;
-                    val +=  vp56_rac_get_prob(c, 145);
-                }
-            } else { // cat 3-6
-                cache[rc] = 5;
-                if (!vp56_rac_get_prob_branchy(c, tp[8])) {
-                    if (!vp56_rac_get_prob_branchy(c, tp[9])) {
-                        val  = (vp56_rac_get_prob(c, 173) << 2) + 11;
-                        val += (vp56_rac_get_prob(c, 148) << 1);
-                        val +=  vp56_rac_get_prob(c, 140);
-                    } else {
-                        val  = (vp56_rac_get_prob(c, 176) << 3) + 19;
-                        val += (vp56_rac_get_prob(c, 155) << 2);
-                        val += (vp56_rac_get_prob(c, 140) << 1);
-                        val +=  vp56_rac_get_prob(c, 135);
-                    }
-                } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
-                    val  = (vp56_rac_get_prob(c, 180) << 4) + 35;
-                    val += (vp56_rac_get_prob(c, 157) << 3);
-                    val += (vp56_rac_get_prob(c, 141) << 2);
-                    val += (vp56_rac_get_prob(c, 134) << 1);
-                    val +=  vp56_rac_get_prob(c, 130);
-                } else {
-                    val  = (vp56_rac_get_prob(c, 254) << 13) + 67;
-                    val += (vp56_rac_get_prob(c, 254) << 12);
-                    val += (vp56_rac_get_prob(c, 254) << 11);
-                    val += (vp56_rac_get_prob(c, 252) << 10);
-                    val += (vp56_rac_get_prob(c, 249) << 9);
-                    val += (vp56_rac_get_prob(c, 243) << 8);
-                    val += (vp56_rac_get_prob(c, 230) << 7);
-                    val += (vp56_rac_get_prob(c, 196) << 6);
-                    val += (vp56_rac_get_prob(c, 177) << 5);
-                    val += (vp56_rac_get_prob(c, 153) << 4);
-                    val += (vp56_rac_get_prob(c, 140) << 3);
-                    val += (vp56_rac_get_prob(c, 133) << 2);
-                    val += (vp56_rac_get_prob(c, 130) << 1);
-                    val +=  vp56_rac_get_prob(c, 129);
-                }
-            }
-        }
-        if (!--band_left)
-            band_left = band_counts[++band];
-        if (tx == TX_32X32) // FIXME slow
-            coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
-        else
-            coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
-        nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
-        tp  = p[band][nnz];
-    } while (++i < n_coeffs);
-
-    return i;
-}
-
-static int decode_coeffs(AVCodecContext *avctx)
-{
-    VP9Context *s = avctx->priv_data;
-    VP9Block *const b = &s->b;
-    int row = b->row, col = b->col;
-    uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
-    unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
-    unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
-    int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
-    int end_x = FFMIN(2 * (s->cols - col), w4);
-    int end_y = FFMIN(2 * (s->rows - row), h4);
-    int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
-    int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), ret;
-    int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
-    int tx = 4 * s->lossless + b->tx;
-    const int16_t **yscans = ff_vp9_scans[tx];
-    const int16_t (**ynbs)[2] = ff_vp9_scans_nb[tx];
-    const int16_t *uvscan = ff_vp9_scans[b->uvtx][DCT_DCT];
-    const int16_t (*uvnb)[2] = ff_vp9_scans_nb[b->uvtx][DCT_DCT];
-    uint8_t *a = &s->above_y_nnz_ctx[col * 2];
-    uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
-    static const int16_t band_counts[4][8] = {
-        { 1, 2, 3, 4,  3,   16 - 13, 0 },
-        { 1, 2, 3, 4, 11,   64 - 21, 0 },
-        { 1, 2, 3, 4, 11,  256 - 21, 0 },
-        { 1, 2, 3, 4, 11, 1024 - 21, 0 },
-    };
-    const int16_t *y_band_counts  = band_counts[b->tx];
-    const int16_t *uv_band_counts = band_counts[b->uvtx];
-
-    /* y tokens */
-    if (b->tx > TX_4X4) { // FIXME slow
-        for (y = 0; y < end_y; y += step1d)
-            for (x = 1; x < step1d; x++)
-                l[y] |= l[y + x];
-        for (x = 0; x < end_x; x += step1d)
-            for (y = 1; y < step1d; y++)
-                a[x] |= a[x + y];
-    }
-    for (n = 0, y = 0; y < end_y; y += step1d) {
-        for (x = 0; x < end_x; x += step1d, n += step) {
-            enum TxfmType txtp = ff_vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
-                                                                b->bs > BS_8x8 ?
-                                                                n : 0]];
-            int nnz = a[x] + l[y];
-            if ((ret = decode_block_coeffs(&s->c, s->block + 16 * n, 16 * step,
-                                           b->tx, c, e, p, nnz, yscans[txtp],
-                                           ynbs[txtp], y_band_counts,
-                                           qmul[0])) < 0)
-                return ret;
-            a[x] = l[y] = !!ret;
-            if (b->tx > TX_8X8)
-                AV_WN16A(&s->eob[n], ret);
-            else
-                s->eob[n] = ret;
-        }
-    }
-    if (b->tx > TX_4X4) { // FIXME slow
-        for (y = 0; y < end_y; y += step1d)
-            memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
-        for (x = 0; x < end_x; x += step1d)
-            memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
-    }
-
-    p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
-    c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
-    e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
-    w4    >>= 1;
-    h4    >>= 1;
-    end_x >>= 1;
-    end_y >>= 1;
-    for (pl = 0; pl < 2; pl++) {
-        a = &s->above_uv_nnz_ctx[pl][col];
-        l = &s->left_uv_nnz_ctx[pl][row & 7];
-        if (b->uvtx > TX_4X4) { // FIXME slow
-            for (y = 0; y < end_y; y += uvstep1d)
-                for (x = 1; x < uvstep1d; x++)
-                    l[y] |= l[y + x];
-            for (x = 0; x < end_x; x += uvstep1d)
-                for (y = 1; y < uvstep1d; y++)
-                    a[x] |= a[x + y];
-        }
-        for (n = 0, y = 0; y < end_y; y += uvstep1d) {
-            for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
-                int nnz = a[x] + l[y];
-                if ((ret = decode_block_coeffs(&s->c, s->uvblock[pl] + 16 * n,
-                                               16 * uvstep, b->uvtx, c, e, p,
-                                               nnz, uvscan, uvnb,
-                                               uv_band_counts, qmul[1])) < 0)
-                    return ret;
-                a[x] = l[y] = !!ret;
-                if (b->uvtx > TX_8X8)
-                    AV_WN16A(&s->uveob[pl][n], ret);
-                else
-                    s->uveob[pl][n] = ret;
-            }
-        }
-        if (b->uvtx > TX_4X4) { // FIXME slow
-            for (y = 0; y < end_y; y += uvstep1d)
-                memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
-            for (x = 0; x < end_x; x += uvstep1d)
-                memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
-        }
-    }
-
-    return 0;
-}
-
-static av_always_inline int check_intra_mode(VP9Context *s, int mode,
-                                             uint8_t **a,
-                                             uint8_t *dst_edge,
-                                             ptrdiff_t stride_edge,
-                                             uint8_t *dst_inner,
-                                             ptrdiff_t stride_inner,
-                                             uint8_t *l, int col, int x, int w,
-                                             int row, int y, enum TxfmMode tx,
-                                             int p)
-{
-    int have_top   = row > 0 || y > 0;
-    int have_left  = col > s->tiling.tile_col_start || x > 0;
-    int have_right = x < w - 1;
-    static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
-        [VERT_PRED]            = { { DC_127_PRED,          VERT_PRED            },
-                                   { DC_127_PRED,          VERT_PRED            } },
-        [HOR_PRED]             = { { DC_129_PRED,          DC_129_PRED          },
-                                   { HOR_PRED,             HOR_PRED             } },
-        [DC_PRED]              = { { DC_128_PRED,          TOP_DC_PRED          },
-                                   { LEFT_DC_PRED,         DC_PRED              } },
-        [DIAG_DOWN_LEFT_PRED]  = { { DC_127_PRED,          DIAG_DOWN_LEFT_PRED  },
-                                   { DC_127_PRED,          DIAG_DOWN_LEFT_PRED  } },
-        [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
-                                   { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
-        [VERT_RIGHT_PRED]      = { { VERT_RIGHT_PRED,      VERT_RIGHT_PRED      },
-                                   { VERT_RIGHT_PRED,      VERT_RIGHT_PRED      } },
-        [HOR_DOWN_PRED]        = { { HOR_DOWN_PRED,        HOR_DOWN_PRED        },
-                                   { HOR_DOWN_PRED,        HOR_DOWN_PRED        } },
-        [VERT_LEFT_PRED]       = { { DC_127_PRED,          VERT_LEFT_PRED       },
-                                   { DC_127_PRED,          VERT_LEFT_PRED       } },
-        [HOR_UP_PRED]          = { { DC_129_PRED,          DC_129_PRED          },
-                                   { HOR_UP_PRED,          HOR_UP_PRED          } },
-        [TM_VP8_PRED]          = { { DC_129_PRED,          VERT_PRED            },
-                                   { HOR_PRED,             TM_VP8_PRED          } },
-    };
-    static const struct {
-        uint8_t needs_left:1;
-        uint8_t needs_top:1;
-        uint8_t needs_topleft:1;
-        uint8_t needs_topright:1;
-    } edges[N_INTRA_PRED_MODES] = {
-        [VERT_PRED]            = { .needs_top  = 1 },
-        [HOR_PRED]             = { .needs_left = 1 },
-        [DC_PRED]              = { .needs_top  = 1, .needs_left = 1 },
-        [DIAG_DOWN_LEFT_PRED]  = { .needs_top  = 1, .needs_topright = 1 },
-        [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1,
-                                   .needs_topleft = 1 },
-        [VERT_RIGHT_PRED]      = { .needs_left = 1, .needs_top = 1,
-                                   .needs_topleft = 1 },
-        [HOR_DOWN_PRED]        = { .needs_left = 1, .needs_top = 1,
-                                   .needs_topleft = 1 },
-        [VERT_LEFT_PRED]       = { .needs_top  = 1, .needs_topright = 1 },
-        [HOR_UP_PRED]          = { .needs_left = 1 },
-        [TM_VP8_PRED]          = { .needs_left = 1, .needs_top = 1,
-                                   .needs_topleft = 1 },
-        [LEFT_DC_PRED]         = { .needs_left = 1 },
-        [TOP_DC_PRED]          = { .needs_top  = 1 },
-        [DC_128_PRED]          = { 0 },
-        [DC_127_PRED]          = { 0 },
-        [DC_129_PRED]          = { 0 }
-    };
-
-    av_assert2(mode >= 0 && mode < 10);
-    mode = mode_conv[mode][have_left][have_top];
-    if (edges[mode].needs_top) {
-        uint8_t *top = NULL, *topleft = NULL;
-        int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
-        int n_px_need_tr = 0;
-
-        if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
-            n_px_need_tr = 4;
-
-        // if top of sb64-row, use s->intra_pred_data[] instead of
-        // dst[-stride] for intra prediction (it contains pre- instead of
-        // post-loopfilter data)
-        if (have_top) {
-            top = !(row & 7) && !y ?
-                  s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
-                  y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
-            if (have_left)
-                topleft = !(row & 7) && !y ?
-                          s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
-                          y == 0 || x == 0 ? &dst_edge[-stride_edge] :
-                          &dst_inner[-stride_inner];
-        }
-
-        if (have_top &&
-            (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
-            (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
-            n_px_need + n_px_need_tr <= n_px_have) {
-            *a = top;
-        } else {
-            if (have_top) {
-                if (n_px_need <= n_px_have) {
-                    memcpy(*a, top, n_px_need);
-                } else {
-                    memcpy(*a, top, n_px_have);
-                    memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
-                           n_px_need - n_px_have);
-                }
-            } else {
-                memset(*a, 127, n_px_need);
-            }
-            if (edges[mode].needs_topleft) {
-                if (have_left && have_top)
-                    (*a)[-1] = topleft[-1];
-                else
-                    (*a)[-1] = have_top ? 129 : 127;
-            }
-            if (tx == TX_4X4 && edges[mode].needs_topright) {
-                if (have_top && have_right &&
-                    n_px_need + n_px_need_tr <= n_px_have) {
-                    memcpy(&(*a)[4], &top[4], 4);
-                } else {
-                    memset(&(*a)[4], (*a)[3], 4);
-                }
-            }
-        }
-    }
-    if (edges[mode].needs_left) {
-        if (have_left) {
-            int i;
-            int n_px_need = 4 << tx;
-            int n_px_have = (((s->rows - row) << !p) - y) * 4;
-            uint8_t *dst     = x == 0 ? dst_edge : dst_inner;
-            ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
-
-            if (n_px_need <= n_px_have) {
-                for (i = 0; i < n_px_need; i++)
-                    l[i] = dst[i * stride - 1];
-            } else {
-                for (i = 0; i < n_px_have; i++)
-                    l[i] = dst[i * stride - 1];
-                memset(&l[i], l[i - 1], n_px_need - n_px_have);
-            }
-        } else {
-            memset(l, 129, 4 << tx);
-        }
-    }
-
-    return mode;
-}
-
-static void intra_recon(AVCodecContext *avctx, ptrdiff_t y_off, ptrdiff_t uv_off)
-{
-    VP9Context *s = avctx->priv_data;
-    VP9Block *const b = &s->b;
-    int row = b->row, col = b->col;
-    int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
-    int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
-    int end_x = FFMIN(2 * (s->cols - col), w4);
-    int end_y = FFMIN(2 * (s->rows - row), h4);
-    int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
-    int uvstep1d = 1 << b->uvtx, p;
-    uint8_t *dst = b->dst[0], *dst_r = s->cur_frame->data[0] + y_off;
-
-    for (n = 0, y = 0; y < end_y; y += step1d) {
-        uint8_t *ptr = dst, *ptr_r = dst_r;
-        for (x = 0; x < end_x;
-             x += step1d, ptr += 4 * step1d, ptr_r += 4 * step1d, n += step) {
-            int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
-                               y * 2 + x : 0];
-            LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
-            uint8_t *a = &a_buf[16], l[32];
-            enum TxfmType txtp = ff_vp9_intra_txfm_type[mode];
-            int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
-
-            mode = check_intra_mode(s, mode, &a, ptr_r,
-                                    s->cur_frame->linesize[0],
-                                    ptr, b->y_stride, l,
-                                    col, x, w4, row, y, b->tx, 0);
-            s->dsp.intra_pred[b->tx][mode](ptr, b->y_stride, l, a);
-            if (eob)
-                s->dsp.itxfm_add[tx][txtp](ptr, b->y_stride,
-                                           s->block + 16 * n, eob);
-        }
-        dst_r += 4 * s->cur_frame->linesize[0] * step1d;
-        dst   += 4 * b->y_stride * step1d;
-    }
-
-    // U/V
-    h4    >>= 1;
-    w4    >>= 1;
-    end_x >>= 1;
-    end_y >>= 1;
-    step    = 1 << (b->uvtx * 2);
-    for (p = 0; p < 2; p++) {
-        dst   = b->dst[1 + p];
-        dst_r = s->cur_frame->data[1 + p] + uv_off;
-        for (n = 0, y = 0; y < end_y; y += uvstep1d) {
-            uint8_t *ptr = dst, *ptr_r = dst_r;
-            for (x = 0; x < end_x;
-                 x += uvstep1d, ptr += 4 * uvstep1d,
-                 ptr_r += 4 * uvstep1d, n += step) {
-                int mode = b->uvmode;
-                LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
-                uint8_t *a = &a_buf[16], l[32];
-                int eob    = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n])
-                                              : s->uveob[p][n];
-
-                mode = check_intra_mode(s, mode, &a, ptr_r,
-                                        s->cur_frame->linesize[1],
-                                        ptr, b->uv_stride, l,
-                                        col, x, w4, row, y, b->uvtx, p + 1);
-                s->dsp.intra_pred[b->uvtx][mode](ptr, b->uv_stride, l, a);
-                if (eob)
-                    s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
-                                                    s->uvblock[p] + 16 * n,
-                                                    eob);
-            }
-            dst_r += 4 * uvstep1d * s->cur_frame->linesize[1];
-            dst   += 4 * uvstep1d * b->uv_stride;
-        }
-    }
-}
-
-static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
-                                         uint8_t *dst, ptrdiff_t dst_stride,
-                                         const uint8_t *ref,
-                                         ptrdiff_t ref_stride,
-                                         ptrdiff_t y, ptrdiff_t x,
-                                         const VP56mv *mv,
-                                         int bw, int bh, int w, int h)
-{
-    int mx = mv->x, my = mv->y;
-
-    y   += my >> 3;
-    x   += mx >> 3;
-    ref += y * ref_stride + x;
-    mx  &= 7;
-    my  &= 7;
-    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
-    if (x < !!mx * 3 || y < !!my * 3 ||
-        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
-        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
-                                 ref - !!my * 3 * ref_stride - !!mx * 3,
-                                 80,
-                                 ref_stride,
-                                 bw + !!mx * 7, bh + !!my * 7,
-                                 x - !!mx * 3, y - !!my * 3, w, h);
-        ref        = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
-        ref_stride = 80;
-    }
-    mc[!!mx][!!my](dst, ref, dst_stride, ref_stride, bh, mx << 1, my << 1);
-}
-
-static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
-                                           uint8_t *dst_u, uint8_t *dst_v,
-                                           ptrdiff_t dst_stride,
-                                           const uint8_t *ref_u,
-                                           ptrdiff_t src_stride_u,
-                                           const uint8_t *ref_v,
-                                           ptrdiff_t src_stride_v,
-                                           ptrdiff_t y, ptrdiff_t x,
-                                           const VP56mv *mv,
-                                           int bw, int bh, int w, int h)
-{
-    int mx = mv->x, my = mv->y;
-
-    y     += my >> 4;
-    x     += mx >> 4;
-    ref_u += y * src_stride_u + x;
-    ref_v += y * src_stride_v + x;
-    mx    &= 15;
-    my    &= 15;
-    // FIXME bilinear filter only needs 0/1 pixels, not 3/4
-    if (x < !!mx * 3 || y < !!my * 3 ||
-        x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
-        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
-                                 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
-                                 80,
-                                 src_stride_u,
-                                 bw + !!mx * 7, bh + !!my * 7,
-                                 x - !!mx * 3, y - !!my * 3, w, h);
-        ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
-        mc[!!mx][!!my](dst_u, ref_u, dst_stride, 80, bh, mx, my);
-
-        s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
-                                 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
-                                 80,
-                                 src_stride_v,
-                                 bw + !!mx * 7, bh + !!my * 7,
-                                 x - !!mx * 3, y - !!my * 3, w, h);
-        ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
-        mc[!!mx][!!my](dst_v, ref_v, dst_stride, 80, bh, mx, my);
-    } else {
-        mc[!!mx][!!my](dst_u, ref_u, dst_stride, src_stride_u, bh, mx, my);
-        mc[!!mx][!!my](dst_v, ref_v, dst_stride, src_stride_v, bh, mx, my);
-    }
-}
-
-static int inter_recon(AVCodecContext *avctx)
-{
-    static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
-        { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
-        { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
-    };
-    VP9Context *s = avctx->priv_data;
-    VP9Block *const b = &s->b;
-    int row = b->row, col = b->col;
-    AVFrame *ref1 = s->refs[s->refidx[b->ref[0]]];
-    AVFrame *ref2 = b->comp ? s->refs[s->refidx[b->ref[1]]] : NULL;
-    int w = avctx->width, h = avctx->height;
-    ptrdiff_t ls_y = b->y_stride, ls_uv = b->uv_stride;
-
-    if (!ref1->data[0] || (b->comp && !ref2->data[0]))
-        return AVERROR_INVALIDDATA;
-
-    // y inter pred
-    if (b->bs > BS_8x8) {
-        if (b->bs == BS_8x4) {
-            mc_luma_dir(s, s->dsp.mc[3][b->filter][0], b->dst[0], ls_y,
-                        ref1->data[0], ref1->linesize[0],
-                        row << 3, col << 3, &b->mv[0][0], 8, 4, w, h);
-            mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
-                        b->dst[0] + 4 * ls_y, ls_y,
-                        ref1->data[0], ref1->linesize[0],
-                        (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w, h);
-
-            if (b->comp) {
-                mc_luma_dir(s, s->dsp.mc[3][b->filter][1], b->dst[0], ls_y,
-                            ref2->data[0], ref2->linesize[0],
-                            row << 3, col << 3, &b->mv[0][1], 8, 4, w, h);
-                mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
-                            b->dst[0] + 4 * ls_y, ls_y,
-                            ref2->data[0], ref2->linesize[0],
-                            (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w, h);
-            }
-        } else if (b->bs == BS_4x8) {
-            mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
-                        ref1->data[0], ref1->linesize[0],
-                        row << 3, col << 3, &b->mv[0][0], 4, 8, w, h);
-            mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
-                        ref1->data[0], ref1->linesize[0],
-                        row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w, h);
-
-            if (b->comp) {
-                mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
-                            ref2->data[0], ref2->linesize[0],
-                            row << 3, col << 3, &b->mv[0][1], 4, 8, w, h);
-                mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
-                            ref2->data[0], ref2->linesize[0],
-                            row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w, h);
-            }
-        } else {
-            av_assert2(b->bs == BS_4x4);
-
-            // FIXME if two horizontally adjacent blocks have the same MV,
-            // do a w8 instead of a w4 call
-            mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
-                        ref1->data[0], ref1->linesize[0],
-                        row << 3, col << 3, &b->mv[0][0], 4, 4, w, h);
-            mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
-                        ref1->data[0], ref1->linesize[0],
-                        row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w, h);
-            mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
-                        b->dst[0] + 4 * ls_y, ls_y,
-                        ref1->data[0], ref1->linesize[0],
-                        (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w, h);
-            mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
-                        b->dst[0] + 4 * ls_y + 4, ls_y,
-                        ref1->data[0], ref1->linesize[0],
-                        (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w, h);
-
-            if (b->comp) {
-                mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
-                            ref2->data[0], ref2->linesize[0],
-                            row << 3, col << 3, &b->mv[0][1], 4, 4, w, h);
-                mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
-                            ref2->data[0], ref2->linesize[0],
-                            row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w, h);
-                mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
-                            b->dst[0] + 4 * ls_y, ls_y,
-                            ref2->data[0], ref2->linesize[0],
-                            (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w, h);
-                mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
-                            b->dst[0] + 4 * ls_y + 4, ls_y,
-                            ref2->data[0], ref2->linesize[0],
-                            (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w, h);
-            }
-        }
-    } else {
-        int bwl = bwlog_tab[0][b->bs];
-        int bw  = bwh_tab[0][b->bs][0] * 4;
-        int bh  = bwh_tab[0][b->bs][1] * 4;
-
-        mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], b->dst[0], ls_y,
-                    ref1->data[0], ref1->linesize[0],
-                    row << 3, col << 3, &b->mv[0][0], bw, bh, w, h);
-
-        if (b->comp)
-            mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], b->dst[0], ls_y,
-                        ref2->data[0], ref2->linesize[0],
-                        row << 3, col << 3, &b->mv[0][1], bw, bh, w, h);
-    }
-
-    // uv inter pred
-    {
-        int bwl = bwlog_tab[1][b->bs];
-        int bw  = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
-        VP56mv mvuv;
-
-        w = (w + 1) >> 1;
-        h = (h + 1) >> 1;
-        if (b->bs > BS_8x8) {
-            mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x +
-                                 b->mv[2][0].x + b->mv[3][0].x, 4);
-            mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y +
-                                 b->mv[2][0].y + b->mv[3][0].y, 4);
-        } else {
-            mvuv = b->mv[0][0];
-        }
-
-        mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
-                      b->dst[1], b->dst[2], ls_uv,
-                      ref1->data[1], ref1->linesize[1],
-                      ref1->data[2], ref1->linesize[2],
-                      row << 2, col << 2, &mvuv, bw, bh, w, h);
-
-        if (b->comp) {
-            if (b->bs > BS_8x8) {
-                mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x +
-                                     b->mv[2][1].x + b->mv[3][1].x, 4);
-                mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y +
-                                     b->mv[2][1].y + b->mv[3][1].y, 4);
-            } else {
-                mvuv = b->mv[0][1];
-            }
-            mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
-                          b->dst[1], b->dst[2], ls_uv,
-                          ref2->data[1], ref2->linesize[1],
-                          ref2->data[2], ref2->linesize[2],
-                          row << 2, col << 2, &mvuv, bw, bh, w, h);
-        }
-    }
-
-    if (!b->skip) {
-        /* mostly copied intra_reconn() */
-
-        int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
-        int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
-        int end_x = FFMIN(2 * (s->cols - col), w4);
-        int end_y = FFMIN(2 * (s->rows - row), h4);
-        int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
-        int uvstep1d = 1 << b->uvtx, p;
-        uint8_t *dst = b->dst[0];
-
-        // y itxfm add
-        for (n = 0, y = 0; y < end_y; y += step1d) {
-            uint8_t *ptr = dst;
-            for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
-                int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
-
-                if (eob)
-                    s->dsp.itxfm_add[tx][DCT_DCT](ptr, b->y_stride,
-                                                  s->block + 16 * n, eob);
-            }
-            dst += 4 * b->y_stride * step1d;
-        }
-
-        // uv itxfm add
-        h4    >>= 1;
-        w4    >>= 1;
-        end_x >>= 1;
-        end_y >>= 1;
-        step    = 1 << (b->uvtx * 2);
-        for (p = 0; p < 2; p++) {
-            dst = b->dst[p + 1];
-            for (n = 0, y = 0; y < end_y; y += uvstep1d) {
-                uint8_t *ptr = dst;
-                for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
-                    int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n])
-                                               : s->uveob[p][n];
-                    if (eob)
-                        s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
-                                                        s->uvblock[p] + 16 * n, eob);
-                }
-                dst += 4 * uvstep1d * b->uv_stride;
-            }
-        }
-    }
-    return 0;
-}
-
-static av_always_inline void mask_edges(VP9Filter *lflvl, int is_uv,
-                                        int row_and_7, int col_and_7,
-                                        int w, int h, int col_end, int row_end,
-                                        enum TxfmMode tx, int skip_inter)
-{
-    // FIXME I'm pretty sure all loops can be replaced by a single LUT if
-    // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
-    // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
-    // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
-
-    // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
-    // edges. This means that for UV, we work on two subsampled blocks at
-    // a time, and we only use the topleft block's mode information to set
-    // things like block strength. Thus, for any block size smaller than
-    // 16x16, ignore the odd portion of the block.
-    if (tx == TX_4X4 && is_uv) {
-        if (h == 1) {
-            if (row_and_7 & 1)
-                return;
-            if (!row_end)
-                h += 1;
-        }
-        if (w == 1) {
-            if (col_and_7 & 1)
-                return;
-            if (!col_end)
-                w += 1;
-        }
-    }
-
-    if (tx == TX_4X4 && !skip_inter) {
-        int t = 1 << col_and_7, m_col = (t << w) - t, y;
-        int m_col_odd = (t << (w - 1)) - t;
-
-        // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
-        if (is_uv) {
-            int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
-
-            for (y = row_and_7; y < h + row_and_7; y++) {
-                int col_mask_id = 2 - !(y & 7);
-
-                lflvl->mask[is_uv][0][y][1] |= m_row_8;
-                lflvl->mask[is_uv][0][y][2] |= m_row_4;
-                // for odd lines, if the odd col is not being filtered,
-                // skip odd row also:
-                // .---. <-- a
-                // |   |
-                // |___| <-- b
-                // ^   ^
-                // c   d
-                //
-                // if a/c are even row/col and b/d are odd, and d is skipped,
-                // e.g. right edge of size-66x66.webm, then skip b also (bug)
-                if ((col_end & 1) && (y & 1)) {
-                    lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
-                } else {
-                    lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
-                }
-            }
-        } else {
-            int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
-
-            for (y = row_and_7; y < h + row_and_7; y++) {
-                int col_mask_id = 2 - !(y & 3);
-
-                lflvl->mask[is_uv][0][y][1]           |= m_row_8; // row edge
-                lflvl->mask[is_uv][0][y][2]           |= m_row_4;
-                lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
-                lflvl->mask[is_uv][0][y][3]           |= m_col;
-                lflvl->mask[is_uv][1][y][3]           |= m_col;
-            }
-        }
-    } else {
-        int y, t = 1 << col_and_7, m_col = (t << w) - t;
-
-        if (!skip_inter) {
-            int mask_id = (tx == TX_8X8);
-            int l2 = tx + is_uv - 1, step1d = 1 << l2;
-            static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
-            int m_row = m_col & masks[l2];
-
-            // at odd UV col/row edges tx16/tx32 loopfilter edges, force
-            // 8wd loopfilter to prevent going off the visible edge.
-            if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
-                int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
-                int m_row_8  = m_row - m_row_16;
-
-                for (y = row_and_7; y < h + row_and_7; y++) {
-                    lflvl->mask[is_uv][0][y][0] |= m_row_16;
-                    lflvl->mask[is_uv][0][y][1] |= m_row_8;
-                }
-            } else {
-                for (y = row_and_7; y < h + row_and_7; y++)
-                    lflvl->mask[is_uv][0][y][mask_id] |= m_row;
-            }
-
-            if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
-                for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
-                    lflvl->mask[is_uv][1][y][0] |= m_col;
-                if (y - row_and_7 == h - 1)
-                    lflvl->mask[is_uv][1][y][1] |= m_col;
-            } else {
-                for (y = row_and_7; y < h + row_and_7; y += step1d)
-                    lflvl->mask[is_uv][1][y][mask_id] |= m_col;
-            }
-        } else if (tx != TX_4X4) {
-            int mask_id;
-
-            mask_id = (tx == TX_8X8) || (is_uv && h == 1);
-            lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
-            mask_id = (tx == TX_8X8) || (is_uv && w == 1);
-            for (y = row_and_7; y < h + row_and_7; y++)
-                lflvl->mask[is_uv][0][y][mask_id] |= t;
-        } else if (is_uv) {
-            int t8 = t & 0x01, t4 = t - t8;
-
-            for (y = row_and_7; y < h + row_and_7; y++) {
-                lflvl->mask[is_uv][0][y][2] |= t4;
-                lflvl->mask[is_uv][0][y][1] |= t8;
-            }
-            lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
-        } else {
-            int t8 = t & 0x11, t4 = t - t8;
-
-            for (y = row_and_7; y < h + row_and_7; y++) {
-                lflvl->mask[is_uv][0][y][2] |= t4;
-                lflvl->mask[is_uv][0][y][1] |= t8;
-            }
-            lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
-        }
-    }
-}
-
-int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
-                        VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
-                        enum BlockLevel bl, enum BlockPartition bp)
-{
-    VP9Context *s = avctx->priv_data;
-    VP9Block *const b = &s->b;
-    enum BlockSize bs = bl * 3 + bp;
-    int ret, y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
-    int emu[2];
-
-    b->row  = row;
-    b->row7 = row & 7;
-    b->col  = col;
-    b->col7 = col & 7;
-
-    s->min_mv.x = -(128 + col * 64);
-    s->min_mv.y = -(128 + row * 64);
-    s->max_mv.x = 128 + (s->cols - col - w4) * 64;
-    s->max_mv.y = 128 + (s->rows - row - h4) * 64;
-
-    b->bs = bs;
-    decode_mode(s, b);
-    b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
-
-    if (!b->skip) {
-        if ((ret = decode_coeffs(avctx)) < 0)
-            return ret;
-    } else {
-        int pl;
-
-        memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
-        memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
-        for (pl = 0; pl < 2; pl++) {
-            memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
-            memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
-        }
-    }
-
-    /* Emulated overhangs if the stride of the target buffer can't hold.
-     * This allows to support emu-edge and so on even if we have large
-     * block overhangs. */
-    emu[0] = (col + w4) * 8 > s->cur_frame->linesize[0] ||
-             (row + h4) > s->rows;
-    emu[1] = (col + w4) * 4 > s->cur_frame->linesize[1] ||
-             (row + h4) > s->rows;
-    if (emu[0]) {
-        b->dst[0]   = s->tmp_y;
-        b->y_stride = 64;
-    } else {
-        b->dst[0]   = s->cur_frame->data[0] + yoff;
-        b->y_stride = s->cur_frame->linesize[0];
-    }
-    if (emu[1]) {
-        b->dst[1]    = s->tmp_uv[0];
-        b->dst[2]    = s->tmp_uv[1];
-        b->uv_stride = 32;
-    } else {
-        b->dst[1]    = s->cur_frame->data[1] + uvoff;
-        b->dst[2]    = s->cur_frame->data[2] + uvoff;
-        b->uv_stride = s->cur_frame->linesize[1];
-    }
-    if (b->intra) {
-        intra_recon(avctx, yoff, uvoff);
-    } else {
-        if ((ret = inter_recon(avctx)) < 0)
-            return ret;
-    }
-    if (emu[0]) {
-        int w = FFMIN(s->cols - col, w4) * 8;
-        int h = FFMIN(s->rows - row, h4) * 8;
-        int n, o = 0;
-
-        for (n = 0; o < w; n++) {
-            int bw = 64 >> n;
-
-            av_assert2(n <= 4);
-            if (w & bw) {
-                s->dsp.mc[n][0][0][0][0](s->cur_frame->data[0] + yoff + o,
-                                         s->tmp_y + o,
-                                         s->cur_frame->linesize[0],
-                                         64, h, 0, 0);
-                o += bw;
-            }
-        }
-    }
-    if (emu[1]) {
-        int w = FFMIN(s->cols - col, w4) * 4;
-        int h = FFMIN(s->rows - row, h4) * 4;
-        int n, o = 0;
-
-        for (n = 1; o < w; n++) {
-            int bw = 64 >> n;
-
-            av_assert2(n <= 4);
-            if (w & bw) {
-                s->dsp.mc[n][0][0][0][0](s->cur_frame->data[1] + uvoff + o,
-                                         s->tmp_uv[0] + o,
-                                         s->cur_frame->linesize[1],
-                                         32, h, 0, 0);
-                s->dsp.mc[n][0][0][0][0](s->cur_frame->data[2] + uvoff + o,
-                                         s->tmp_uv[1] + o,
-                                         s->cur_frame->linesize[2],
-                                         32, h, 0, 0);
-                o += bw;
-            }
-        }
-    }
-
-    // pick filter level and find edges to apply filter to
-    if (s->filter.level &&
-        (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
-                                                    [b->mode[3] != ZEROMV]) > 0) {
-        int x_end = FFMIN(s->cols - col, w4);
-        int y_end = FFMIN(s->rows - row, h4);
-        int skip_inter = !b->intra && b->skip;
-
-        for (y = 0; y < h4; y++)
-            memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
-        mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
-        mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
-                   s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
-                   s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
-                   b->uvtx, skip_inter);
-
-        if (!s->filter.lim_lut[lvl]) {
-            int sharp = s->filter.sharpness;
-            int limit = lvl;
-
-            if (sharp > 0) {
-                limit >>= (sharp + 3) >> 2;
-                limit   = FFMIN(limit, 9 - sharp);
-            }
-            limit = FFMAX(limit, 1);
-
-            s->filter.lim_lut[lvl]   = limit;
-            s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
-        }
-    }
-
-    return 0;
-}
diff --git a/libavcodec/vp9data.c b/libavcodec/vp9data.c
deleted file mode 100644
index 374fa8b..0000000
--- a/libavcodec/vp9data.c
+++ /dev/null
@@ -1,2133 +0,0 @@
-/*
- * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
- * Copyright (C) 2013 Clément Bœsch <u pkh me>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "vp9.h"
-#include "vp9data.h"
-
-const int8_t ff_vp9_partition_tree[3][2] = {
-    { -PARTITION_NONE,                1 }, // '0'
-    {    -PARTITION_H,                2 }, // '10'
-    {    -PARTITION_V, -PARTITION_SPLIT }, // '110', '111'
-};
-
-const uint8_t ff_vp9_default_kf_partition_probs[4][4][3] = {
-    { /* 64x64 -> 32x32 */
-        { 174,  35,  49 } /* a/l both not split */,
-        {  68,  11,  27 } /* a split, l not split */,
-        {  57,  15,   9 } /* l split, a not split */,
-        {  12,   3,   3 } /* a/l both split */
-    }, { /* 32x32 -> 16x16 */
-        { 150,  40,  39 } /* a/l both not split */,
-        {  78,  12,  26 } /* a split, l not split */,
-        {  67,  33,  11 } /* l split, a not split */,
-        {  24,   7,   5 } /* a/l both split */,
-    }, { /* 16x16 -> 8x8 */
-        { 149,  53,  53 } /* a/l both not split */,
-        {  94,  20,  48 } /* a split, l not split */,
-        {  83,  53,  24 } /* l split, a not split */,
-        {  52,  18,  18 } /* a/l both split */,
-    }, { /* 8x8 -> 4x4 */
-        { 158,  97,  94 } /* a/l both not split */,
-        {  93,  24,  99 } /* a split, l not split */,
-        {  85, 119,  44 } /* l split, a not split */,
-        {  62,  59,  67 } /* a/l both split */,
-    },
-};
-
-const int8_t ff_vp9_segmentation_tree[7][2] = {
-    {  1,  2 },
-    {  3,  4 },
-    {  5,  6 },
-    { -0, -1 }, // '00x'
-    { -2, -3 }, // '01x'
-    { -4, -5 }, // '10x'
-    { -6, -7 }, // '11x'
-};
-
-const int8_t ff_vp9_intramode_tree[9][2] = {
-    {              -DC_PRED,                1 }, // '0'
-    {          -TM_VP8_PRED,                2 }, // '10'
-    {            -VERT_PRED,                3 }, // '110'
-    {                     4,                6 },
-    {             -HOR_PRED,                5 }, // '11100'
-    { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '11101x'
-    {  -DIAG_DOWN_LEFT_PRED,                7 }, // '11110'
-    {       -VERT_LEFT_PRED,                8 }, // '111110'
-    {        -HOR_DOWN_PRED,     -HOR_UP_PRED }, // '111111x'
-};
-
-const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9] = {
-    { /* above = v */
-        {  43,  46, 168, 134, 107, 128,  69, 142,  92 } /* left = v */,
-        {  44,  29,  68, 159, 201, 177,  50,  57,  77 } /* left = h */,
-        {  63,  36, 126, 146, 123, 158,  60,  90,  96 } /* left = dc */,
-        {  58,  38,  76, 114,  97, 172,  78, 133,  92 } /* left = d45 */,
-        {  46,  41,  76, 140,  63, 184,  69, 112,  57 } /* left = d135 */,
-        {  38,  32,  85, 140,  46, 112,  54, 151, 133 } /* left = d117 */,
-        {  39,  27,  61, 131, 110, 175,  44,  75, 136 } /* left = d153 */,
-        {  47,  35,  80, 100,  74, 143,  64, 163,  74 } /* left = d63 */,
-        {  52,  30,  74, 113, 130, 175,  51,  64,  58 } /* left = d27 */,
-        {  36,  61, 116, 114, 128, 162,  80, 125,  82 } /* left = tm */
-    }, { /* above = h */
-        {  55,  44,  68, 166, 179, 192,  57,  57, 108 } /* left = v */,
-        {  42,  26,  11, 199, 241, 228,  23,  15,  85 } /* left = h */,
-        {  82,  26,  26, 171, 208, 204,  44,  32, 105 } /* left = dc */,
-        {  68,  42,  19, 131, 160, 199,  55,  52,  83 } /* left = d45 */,
-        {  58,  50,  25, 139, 115, 232,  39,  52, 118 } /* left = d135 */,
-        {  50,  35,  33, 153, 104, 162,  64,  59, 131 } /* left = d117 */,
-        {  44,  24,  16, 150, 177, 202,  33,  19, 156 } /* left = d153 */,
-        {  53,  49,  21, 110, 116, 168,  59,  80,  76 } /* left = d63 */,
-        {  55,  27,  12, 153, 203, 218,  26,  27,  49 } /* left = d27 */,
-        {  38,  72,  19, 168, 203, 212,  50,  50, 107 } /* left = tm */
-    }, { /* above = dc */
-        {  92,  45, 102, 136, 116, 180,  74,  90, 100 } /* left = v */,
-        {  73,  32,  19, 187, 222, 215,  46,  34, 100 } /* left = h */,
-        { 137,  30,  42, 148, 151, 207,  70,  52,  91 } /* left = dc */,
-        {  91,  30,  32, 116, 121, 186,  93,  86,  94 } /* left = d45 */,
-        {  72,  35,  36, 149,  68, 206,  68,  63, 105 } /* left = d135 */,
-        {  73,  31,  28, 138,  57, 124,  55, 122, 151 } /* left = d117 */,
-        {  67,  23,  21, 140, 126, 197,  40,  37, 171 } /* left = d153 */,
-        {  74,  32,  27, 107,  86, 160,  63, 134, 102 } /* left = d63 */,
-        {  86,  27,  28, 128, 154, 212,  45,  43,  53 } /* left = d27 */,
-        {  59,  67,  44, 140, 161, 202,  78,  67, 119 } /* left = tm */
-    }, { /* above = d45 */
-        {  59,  38,  83, 112, 103, 162,  98, 136,  90 } /* left = v */,
-        {  62,  30,  23, 158, 200, 207,  59,  57,  50 } /* left = h */,
-        { 103,  26,  36, 129, 132, 201,  83,  80,  93 } /* left = dc */,
-        {  67,  30,  29,  84,  86, 191, 102,  91,  59 } /* left = d45 */,
-        {  60,  32,  33, 112,  71, 220,  64,  89, 104 } /* left = d135 */,
-        {  53,  26,  34, 130,  56, 149,  84, 120, 103 } /* left = d117 */,
-        {  53,  21,  23, 133, 109, 210,  56,  77, 172 } /* left = d153 */,
-        {  61,  29,  29,  93,  97, 165,  83, 175, 162 } /* left = d63 */,
-        {  77,  19,  29, 112, 142, 228,  55,  66,  36 } /* left = d27 */,
-        {  47,  47,  43, 114, 137, 181, 100,  99,  95 } /* left = tm */
-    }, { /* above = d135 */
-        {  53,  40,  55, 139,  69, 183,  61,  80, 110 } /* left = v */,
-        {  40,  29,  19, 161, 180, 207,  43,  24,  91 } /* left = h */,
-        {  69,  23,  29, 128,  83, 199,  46,  44, 101 } /* left = dc */,
-        {  60,  34,  19, 105,  61, 198,  53,  64,  89 } /* left = d45 */,
-        {  52,  31,  22, 158,  40, 209,  58,  62,  89 } /* left = d135 */,
-        {  44,  31,  29, 147,  46, 158,  56, 102, 198 } /* left = d117 */,
-        {  35,  19,  12, 135,  87, 209,  41,  45, 167 } /* left = d153 */,
-        {  51,  38,  25, 113,  58, 164,  70,  93,  97 } /* left = d63 */,
-        {  55,  25,  21, 118,  95, 215,  38,  39,  66 } /* left = d27 */,
-        {  47,  54,  34, 146, 108, 203,  72, 103, 151 } /* left = tm */
-    }, { /* above = d117 */
-        {  46,  27,  80, 150,  55, 124,  55, 121, 135 } /* left = v */,
-        {  36,  23,  27, 165, 149, 166,  54,  64, 118 } /* left = h */,
-        {  64,  19,  37, 156,  66, 138,  49,  95, 133 } /* left = dc */,
-        {  53,  21,  36, 131,  63, 163,  60, 109,  81 } /* left = d45 */,
-        {  40,  26,  35, 154,  40, 185,  51,  97, 123 } /* left = d135 */,
-        {  35,  19,  34, 179,  19,  97,  48, 129, 124 } /* left = d117 */,
-        {  36,  20,  26, 136,  62, 164,  33,  77, 154 } /* left = d153 */,
-        {  45,  26,  28, 129,  45, 129,  49, 147, 123 } /* left = d63 */,
-        {  45,  18,  32, 130,  90, 157,  40,  79,  91 } /* left = d27 */,
-        {  38,  44,  51, 136,  74, 162,  57,  97, 121 } /* left = tm */
-    }, { /* above = d153 */
-        {  56,  39,  58, 133, 117, 173,  48,  53, 187 } /* left = v */,
-        {  35,  21,  12, 161, 212, 207,  20,  23, 145 } /* left = h */,
-        {  75,  17,  22, 136, 138, 185,  32,  34, 166 } /* left = dc */,
-        {  56,  29,  19, 117, 109, 181,  55,  68, 112 } /* left = d45 */,
-        {  47,  29,  17, 153,  64, 220,  59,  51, 114 } /* left = d135 */,
-        {  46,  16,  24, 136,  76, 147,  41,  64, 172 } /* left = d117 */,
-        {  34,  17,  11, 108, 152, 187,  13,  15, 209 } /* left = d153 */,
-        {  55,  30,  18, 122,  79, 179,  44,  88, 116 } /* left = d63 */,
-        {  51,  24,  14, 115, 133, 209,  32,  26, 104 } /* left = d27 */,
-        {  37,  49,  25, 129, 168, 164,  41,  54, 148 } /* left = tm */
-    }, { /* above = d63 */
-        {  48,  34,  86, 101,  92, 146,  78, 179, 134 } /* left = v */,
-        {  47,  22,  24, 138, 187, 178,  68,  69,  59 } /* left = h */,
-        {  78,  23,  39, 111, 117, 170,  74, 124,  94 } /* left = dc */,
-        {  56,  25,  33, 105, 112, 187,  95, 177, 129 } /* left = d45 */,
-        {  48,  31,  27, 114,  63, 183,  82, 116,  56 } /* left = d135 */,
-        {  43,  28,  37, 121,  63, 123,  61, 192, 169 } /* left = d117 */,
-        {  42,  17,  24, 109,  97, 177,  56,  76, 122 } /* left = d153 */,
-        {  46,  23,  32,  74,  86, 150,  67, 183,  88 } /* left = d63 */,
-        {  58,  18,  28, 105, 139, 182,  70,  92,  63 } /* left = d27 */,
-        {  36,  38,  48,  92, 122, 165,  88, 137,  91 } /* left = tm */
-    }, { /* above = d27 */
-        {  62,  44,  61, 123, 105, 189,  48,  57,  64 } /* left = v */,
-        {  47,  25,  17, 175, 222, 220,  24,  30,  86 } /* left = h */,
-        {  82,  22,  32, 127, 143, 213,  39,  41,  70 } /* left = dc */,
-        {  68,  36,  17, 106, 102, 206,  59,  74,  74 } /* left = d45 */,
-        {  57,  39,  23, 151,  68, 216,  55,  63,  58 } /* left = d135 */,
-        {  49,  30,  35, 141,  70, 168,  82,  40, 115 } /* left = d117 */,
-        {  51,  25,  15, 136, 129, 202,  38,  35, 139 } /* left = d153 */,
-        {  59,  39,  19, 114,  75, 180,  77, 104,  42 } /* left = d63 */,
-        {  68,  26,  16, 111, 141, 215,  29,  28,  28 } /* left = d27 */,
-        {  40,  61,  26, 126, 152, 206,  61,  59,  93 } /* left = tm */
-    }, { /* above = tm */
-        {  44,  78, 115, 132, 119, 173,  71, 112,  93 } /* left = v */,
-        {  39,  38,  21, 184, 227, 206,  42,  32,  64 } /* left = h */,
-        {  65,  70,  60, 155, 159, 199,  61,  60,  81 } /* left = dc */,
-        {  58,  47,  36, 124, 137, 193,  80,  82,  78 } /* left = d45 */,
-        {  49,  50,  35, 144,  95, 205,  63,  78,  59 } /* left = d135 */,
-        {  41,  53,  52, 148,  71, 142,  65, 128,  51 } /* left = d117 */,
-        {  40,  36,  28, 143, 143, 202,  40,  55, 137 } /* left = d153 */,
-        {  42,  44,  44, 104, 105, 164,  64, 130,  80 } /* left = d63 */,
-        {  52,  34,  29, 129, 183, 227,  42,  35,  43 } /* left = d27 */,
-        {  43,  81,  53, 140, 169, 204,  68,  84,  72 } /* left = tm */
-    }
-};
-
-const uint8_t ff_vp9_default_kf_uvmode_probs[10][9] = {
-    { 118,  15, 123, 148, 131, 101,  44,  93, 131 } /* y = v */,
-    { 113,  12,  23, 188, 226, 142,  26,  32, 125 } /* y = h */,
-    { 144,  11,  54, 157, 195, 130,  46,  58, 108 } /* y = dc */,
-    { 120,  11,  50, 123, 163, 135,  64,  77, 103 } /* y = d45 */,
-    { 113,   9,  36, 155, 111, 157,  32,  44, 161 } /* y = d135 */,
-    { 116,   9,  55, 176,  76,  96,  37,  61, 149 } /* y = d117 */,
-    { 115,   9,  28, 141, 161, 167,  21,  25, 193 } /* y = d153 */,
-    { 116,  12,  64, 120, 140, 125,  49, 115, 121 } /* y = d63 */,
-    { 120,  12,  32, 145, 195, 142,  32,  38,  86 } /* y = d27 */,
-    { 102,  19,  66, 162, 182, 122,  35,  59, 128 } /* y = tm */
-};
-
-const int8_t ff_vp9_inter_mode_tree[3][2] = {
-    {    -ZEROMV,      1 }, // '0'
-    { -NEARESTMV,      2 }, // '10'
-    {    -NEARMV, -NEWMV }, // '11x'
-};
-
-const int8_t ff_vp9_filter_tree[2][2] = {
-    { -0,  1 },  // '0'
-    { -1, -2 },  // '1x'
-};
-
-const enum FilterMode ff_vp9_filter_lut[3] = {
-    FILTER_8TAP_REGULAR,
-    FILTER_8TAP_SMOOTH,
-    FILTER_8TAP_SHARP,
-};
-
-const int16_t ff_vp9_dc_qlookup[256] = {
-       4,    8,    8,    9,   10,   11,   12,   12,
-      13,   14,   15,   16,   17,   18,   19,   19,
-      20,   21,   22,   23,   24,   25,   26,   26,
-      27,   28,   29,   30,   31,   32,   32,   33,
-      34,   35,   36,   37,   38,   38,   39,   40,
-      41,   42,   43,   43,   44,   45,   46,   47,
-      48,   48,   49,   50,   51,   52,   53,   53,
-      54,   55,   56,   57,   57,   58,   59,   60,
-      61,   62,   62,   63,   64,   65,   66,   66,
-      67,   68,   69,   70,   70,   71,   72,   73,
-      74,   74,   75,   76,   77,   78,   78,   79,
-      80,   81,   81,   82,   83,   84,   85,   85,
-      87,   88,   90,   92,   93,   95,   96,   98,
-      99,  101,  102,  104,  105,  107,  108,  110,
-     111,  113,  114,  116,  117,  118,  120,  121,
-     123,  125,  127,  129,  131,  134,  136,  138,
-     140,  142,  144,  146,  148,  150,  152,  154,
-     156,  158,  161,  164,  166,  169,  172,  174,
-     177,  180,  182,  185,  187,  190,  192,  195,
-     199,  202,  205,  208,  211,  214,  217,  220,
-     223,  226,  230,  233,  237,  240,  243,  247,
-     250,  253,  257,  261,  265,  269,  272,  276,
-     280,  284,  288,  292,  296,  300,  304,  309,
-     313,  317,  322,  326,  330,  335,  340,  344,
-     349,  354,  359,  364,  369,  374,  379,  384,
-     389,  395,  400,  406,  411,  417,  423,  429,
-     435,  441,  447,  454,  461,  467,  475,  482,
-     489,  497,  505,  513,  522,  530,  539,  549,
-     559,  569,  579,  590,  602,  614,  626,  640,
-     654,  668,  684,  700,  717,  736,  755,  775,
-     796,  819,  843,  869,  896,  925,  955,  988,
-    1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
-};
-
-const int16_t ff_vp9_ac_qlookup[256] = {
-       4,    8,    9,   10,   11,   12,   13,   14,
-      15,   16,   17,   18,   19,   20,   21,   22,
-      23,   24,   25,   26,   27,   28,   29,   30,
-      31,   32,   33,   34,   35,   36,   37,   38,
-      39,   40,   41,   42,   43,   44,   45,   46,
-      47,   48,   49,   50,   51,   52,   53,   54,
-      55,   56,   57,   58,   59,   60,   61,   62,
-      63,   64,   65,   66,   67,   68,   69,   70,
-      71,   72,   73,   74,   75,   76,   77,   78,
-      79,   80,   81,   82,   83,   84,   85,   86,
-      87,   88,   89,   90,   91,   92,   93,   94,
-      95,   96,   97,   98,   99,  100,  101,  102,
-     104,  106,  108,  110,  112,  114,  116,  118,
-     120,  122,  124,  126,  128,  130,  132,  134,
-     136,  138,  140,  142,  144,  146,  148,  150,
-     152,  155,  158,  161,  164,  167,  170,  173,
-     176,  179,  182,  185,  188,  191,  194,  197,
-     200,  203,  207,  211,  215,  219,  223,  227,
-     231,  235,  239,  243,  247,  251,  255,  260,
-     265,  270,  275,  280,  285,  290,  295,  300,
-     305,  311,  317,  323,  329,  335,  341,  347,
-     353,  359,  366,  373,  380,  387,  394,  401,
-     408,  416,  424,  432,  440,  448,  456,  465,
-     474,  483,  492,  501,  510,  520,  530,  540,
-     550,  560,  571,  582,  593,  604,  615,  627,
-     639,  651,  663,  676,  689,  702,  715,  729,
-     743,  757,  771,  786,  801,  816,  832,  848,
-     864,  881,  898,  915,  933,  951,  969,  988,
-    1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151,
-    1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343,
-    1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567,
-    1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
-};
-
-const enum TxfmType ff_vp9_intra_txfm_type[14] = {
-    [VERT_PRED]            = ADST_DCT,
-    [HOR_PRED]             = DCT_ADST,
-    [DC_PRED]              = DCT_DCT,
-    [DIAG_DOWN_LEFT_PRED]  = DCT_DCT,
-    [DIAG_DOWN_RIGHT_PRED] = ADST_ADST,
-    [VERT_RIGHT_PRED]      = ADST_DCT,
-    [HOR_DOWN_PRED]        = DCT_ADST,
-    [VERT_LEFT_PRED]       = ADST_DCT,
-    [HOR_UP_PRED]          = DCT_ADST,
-    [TM_VP8_PRED]          = ADST_ADST,
-    [NEARESTMV]            = DCT_DCT,
-    [NEARMV]               = DCT_DCT,
-    [ZEROMV]               = DCT_DCT,
-    [NEWMV]                = DCT_DCT,
-};
-
-const int16_t ff_vp9_default_scan_4x4[16] = {
-     0,  1,  4,  5,
-     2,  8,  3,  6,
-    12,  9,  7, 10,
-    13, 11, 14, 15,
-};
-
-const int16_t ff_vp9_col_scan_4x4[16] = {
-     0,  1,  2,  4,
-     3,  5,  6,  8,
-     7,  9, 10, 12,
-    13, 11, 14, 15,
-};
-
-const int16_t ff_vp9_row_scan_4x4[16] = {
-     0,  4,  1,  8,
-     5, 12,  9,  2,
-     6, 13,  3, 10,
-     7, 14, 11, 15,
-};
-
-const int16_t ff_vp9_default_scan_8x8[64] = {
-     0,  1,  8,  2,  9, 16, 10,  3,
-    17, 24, 18, 11,  4, 25, 32, 19,
-    12, 26,  5, 33, 20, 27, 40, 13,
-    34,  6, 41, 28, 21, 35, 42, 48,
-    14,  7, 36, 29, 43, 56, 49, 22,
-    15, 37, 50, 44, 57, 30, 23, 51,
-    45, 58, 38, 31, 52, 59, 39, 46,
-    53, 60, 47, 54, 61, 55, 62, 63,
-};
-
-const int16_t ff_vp9_col_scan_8x8[64] = {
-     0,  1,  2,  8,  3,  9,  4, 10,
-    16,  5, 11, 17, 12, 18,  6, 24,
-    19, 13, 25,  7, 26, 20, 32, 14,
-    27, 21, 33, 28, 34, 15, 22, 35,
-    40, 29, 41, 36, 23, 30, 42, 37,
-    48, 43, 31, 44, 49, 38, 50, 56,
-    45, 39, 51, 57, 52, 46, 58, 53,
-    59, 47, 60, 54, 61, 55, 62, 63,
-};
-
-const int16_t ff_vp9_row_scan_8x8[64] = {
-     0,  8, 16,  1,  9, 24,  2, 17,
-    32, 10, 25,  3, 40, 18, 11, 33,
-    26, 19,  4, 48, 41, 34, 12, 27,
-    56, 20,  5, 42, 35, 13, 49, 28,
-     6, 21, 43, 36, 14, 50, 29, 57,
-     7, 44, 22, 37, 51, 15, 58, 30,
-    23, 45, 52, 38, 59, 31, 46, 53,
-    39, 60, 47, 61, 54, 62, 55, 63,
-};
-
-const int16_t ff_vp9_default_scan_16x16[256] = {
-      0,   1,  16,   2,  17,  32,   3,  18,  33,  48,   4,  34,  19,  49,  20,   5,
-     35,  64,  50,  36,  65,  21,   6,  51,  80,  66,  37,  22,  52,   7,  81,  67,
-     38,  82,  53,  23,  96,  68,   8,  83,  97,  54,  39,  69, 112,  24,  98,  84,
-     70,  55,   9,  40,  85,  99, 113, 128,  25, 114, 100,  71,  86,  56,  10,  41,
-    115, 101, 129, 116,  72,  87,  26, 130, 144, 102,  57,  11,  42, 117, 131, 145,
-     88, 103,  27,  73, 132, 118, 146,  58, 160,  12,  43, 133, 147, 104,  89, 119,
-    161,  74, 148, 134,  28, 162,  59,  13, 176, 120, 149,  90, 135, 105, 163,  44,
-     75, 177, 164,  29, 150, 121, 136, 178, 165,  14, 106,  60,  91, 151,  45, 179,
-    192, 137, 166, 122,  76, 180, 152,  30,  61,  15, 107, 167, 181, 193,  92, 208,
-     46, 138, 123, 153, 194,  77, 168, 182,  31, 195, 209, 183, 108, 139,  62, 154,
-     47, 196,  93, 169, 210, 197, 224, 124, 184, 211,  78, 109, 170, 155,  63, 198,
-    212, 185, 225, 240, 140,  94, 199, 125,  79, 213, 226, 171, 186, 156, 214, 200,
-    110, 227, 141,  95, 241, 215, 228, 201, 126, 242, 187, 172, 157, 229, 111, 216,
-    243, 142, 202, 230, 127, 217, 244, 173, 188, 231, 158, 203, 143, 245, 218, 232,
-    189, 246, 159, 174, 233, 247, 219, 204, 175, 190, 248, 234, 205, 220, 249, 191,
-    235, 221, 250, 206, 222, 251, 236, 207, 237, 223, 252, 238, 253, 239, 254, 255,
-};
-
-const int16_t ff_vp9_col_scan_16x16[256] = {
-      0,   1,   2,   3,  16,   4,  17,   5,  18,   6,  19,  32,  20,   7,  33,  21,
-     34,   8,  35,  22,  48,  36,   9,  49,  23,  50,  37,  10,  38,  51,  24,  64,
-     52,  11,  65,  39,  25,  53,  66,  54,  40,  67,  12,  80,  26,  68,  55,  81,
-     41,  69,  13,  27,  82,  56,  70,  83,  42,  14,  84,  96,  71,  28,  57,  85,
-     97,  15,  72,  98,  43,  86,  58,  99,  29,  87, 100, 112,  73,  44, 101,  59,
-     30, 113,  88, 114,  74, 128, 102,  45,  31, 115,  60, 103,  89, 116,  75, 129,
-    117,  46, 104,  90,  61, 130, 118, 131, 132, 105,  76,  47, 119, 144,  91,  62,
-    133, 106, 145, 120, 146, 134,  77, 147, 121,  92, 135, 148,  63, 107, 136, 122,
-     93, 149, 160,  78, 150, 137, 108, 161, 162, 151, 123,  79, 138, 163, 152,  94,
-    164, 109, 165, 153, 124, 139, 176, 166,  95, 177, 167, 110, 154, 178, 125, 179,
-    140, 168, 155, 111, 180, 192, 181, 169, 141, 126, 182, 193, 194, 156, 183, 170,
-    195, 127, 142, 196, 184, 208, 197, 157, 171, 143, 185, 198, 209, 199, 210, 172,
-    158, 186, 211, 224, 212, 200, 240, 159, 213, 225, 187, 201, 173, 226, 214, 215,
-    227, 202, 228, 188, 241, 216, 174, 229, 242, 203, 243, 217, 230, 175, 189, 244,
-    231, 204, 218, 232, 245, 219, 246, 190, 233, 205, 191, 247, 234, 248, 220, 206,
-    249, 235, 221, 207, 250, 236, 222, 251, 223, 237, 238, 252, 239, 253, 254, 255,
-};
-
-const int16_t ff_vp9_row_scan_16x16[256] = {
-      0,  16,  32,   1,  48,  17,  64,  33,   2,  80,  18,  49,  96,  34,   3,  65,
-     19, 112,  50,  81,  35,   4, 128,  66,  20,  97,  51,  82,   5, 144,  36,  67,
-    113,  98,  21,  52, 160,  83, 129,  37,  68,   6, 114, 176,  99,  53,  22,  84,
-    145,  38,  69, 130,   7, 115, 192, 100,  54,  23,  85, 161, 146, 131,  39,  70,
-    208, 116,   8, 101, 177,  55,  86,  24, 162, 147, 132,  71, 224, 117,  40, 102,
-      9, 148,  56,  87, 193, 163, 240, 133, 178,  25, 118,  72,  41, 103, 164,  10,
-    149,  88, 134, 209, 179,  57, 119, 194,  26,  73, 165, 150, 104,  42, 135,  11,
-    180, 120,  89, 225, 195,  58,  27, 210, 151, 181, 166,  74,  43, 105,  12, 136,
-     90,  59, 241, 121,  28, 196, 167, 211, 152,  44, 182, 137,  75,  13, 226, 106,
-    122,  60, 197,  91, 168,  29, 183, 153,  14,  76, 212, 138,  45, 107,  15, 198,
-     92, 227, 169,  30, 123, 154,  61, 242, 184, 213, 139,  46,  77,  31, 108, 170,
-    199, 185, 124, 228,  93, 155, 214,  62, 140, 243,  78,  47, 200, 109, 186, 171,
-    201,  94,  63, 215, 229, 156,  79, 125, 141, 110, 216, 187, 172, 244, 202, 230,
-    217,  95, 157, 126, 245, 111, 142, 231, 188, 127, 158, 218, 173, 232, 246, 233,
-    203, 143, 247, 174, 189, 159, 219, 204, 248, 234, 249, 175, 190, 220, 205, 250,
-    235, 191, 221, 251, 236, 206, 252, 222, 207, 237, 223, 253, 238, 254, 239, 255,
-};
-
-const int16_t ff_vp9_default_scan_32x32[1024] = {
-       0,    1,   32,    2,   33,   64,    3,   34,   65,    4,   96,   35,   66,    5,   36,   97,
-      67,  128,   98,   68,   37,    6,  129,   99,    7,  160,   69,   38,  130,  100,  161,  131,
-      39,   70,    8,  101,  162,  132,  192,   71,   40,    9,  102,  163,  133,  193,   72,  224,
-     103,   41,  164,   10,  194,  134,  165,   73,  104,  135,  225,   42,  195,   11,  256,  166,
-     226,  196,   74,  105,  136,   43,   12,  167,  197,  227,  257,   75,  106,  137,  228,   44,
-     198,  168,  258,  288,   13,  229,   76,  107,  199,  138,  259,  169,  289,   45,  230,  260,
-     200,  108,   14,  170,  139,  320,  290,   77,  231,  261,   46,  201,  140,  291,  109,  232,
-     321,  262,  171,   78,  292,   15,  322,  202,  263,  352,  172,  293,  233,  141,  323,  110,
-      47,  203,  264,  234,  294,  353,  324,   16,   79,  204,  265,  295,  325,  173,  354,  142,
-     235,  384,   48,  296,  111,  266,  355,  326,   80,   17,  205,  236,  174,  356,  385,  327,
-     143,  297,  267,  357,  386,  112,   49,  328,  298,  206,  416,  237,  358,  387,   81,  175,
-      18,  329,  359,  388,  299,  330,  389,  113,  417,  238,  360,   50,  207,  418,  390,  331,
-      19,  448,  361,   82,  419,  391,  239,   51,  362,  420,  114,  449,  480,  421,   83,  363,
-     450,  422,  512,  451,  423,  115,  452,  481,  453,  482,  454,  544,  483,  455,  513,  484,
-     514,  485,  515,  486,  545,  576,  487,  546,  547,  608,  577,  578,  579,  609,  610,  611,
-      20,  144,  268,  392,  516,  640,   21,   52,  145,  176,  269,  300,  393,  424,  517,  548,
-     641,  672,   22,   53,   84,  146,  177,  208,  270,  301,  332,  394,  425,  456,  518,  549,
-     580,  642,  673,  704,   23,   54,   85,  116,  147,  178,  209,  240,  271,  302,  333,  364,
-     395,  426,  457,  488,  519,  550,  581,  612,  643,  674,  705,  736,   55,   86,  117,  179,
-     210,  241,  303,  334,  365,  427,  458,  489,  551,  582,  613,  675,  706,  737,   87,  118,
-     211,  242,  335,  366,  459,  490,  583,  614,  707,  738,  119,  243,  367,  491,  615,  739,
-      24,  148,  272,  396,  520,  644,  768,   25,   56,  149,  180,  273,  304,  397,  428,  521,
-     552,  645,  676,  769,  800,   26,   57,   88,  150,  181,  212,  274,  305,  336,  398,  429,
-     460,  522,  553,  584,  646,  677,  708,  770,  801,  832,   27,   58,   89,  120,  151,  182,
-     213,  244,  275,  306,  337,  368,  399,  430,  461,  492,  523,  554,  585,  616,  647,  678,
-     709,  740,  771,  802,  833,  864,   59,   90,  121,  183,  214,  245,  307,  338,  369,  431,
-     462,  493,  555,  586,  617,  679,  710,  741,  803,  834,  865,   91,  122,  215,  246,  339,
-     370,  463,  494,  587,  618,  711,  742,  835,  866,  123,  247,  371,  495,  619,  743,  867,
-      28,  152,  276,  400,  524,  648,  772,  896,   29,   60,  153,  184,  277,  308,  401,  432,
-     525,  556,  649,  680,  773,  804,  897,  928,   30,   61,   92,  154,  185,  216,  278,  309,
-     340,  402,  433,  464,  526,  557,  588,  650,  681,  712,  774,  805,  836,  898,  929,  960,
-      31,   62,   93,  124,  155,  186,  217,  248,  279,  310,  341,  372,  403,  434,  465,  496,
-     527,  558,  589,  620,  651,  682,  713,  744,  775,  806,  837,  868,  899,  930,  961,  992,
-      63,   94,  125,  187,  218,  249,  311,  342,  373,  435,  466,  497,  559,  590,  621,  683,
-     714,  745,  807,  838,  869,  931,  962,  993,   95,  126,  219,  250,  343,  374,  467,  498,
-     591,  622,  715,  746,  839,  870,  963,  994,  127,  251,  375,  499,  623,  747,  871,  995,
-     156,  280,  404,  528,  652,  776,  900,  157,  188,  281,  312,  405,  436,  529,  560,  653,
-     684,  777,  808,  901,  932,  158,  189,  220,  282,  313,  344,  406,  437,  468,  530,  561,
-     592,  654,  685,  716,  778,  809,  840,  902,  933,  964,  159,  190,  221,  252,  283,  314,
-     345,  376,  407,  438,  469,  500,  531,  562,  593,  624,  655,  686,  717,  748,  779,  810,
-     841,  872,  903,  934,  965,  996,  191,  222,  253,  315,  346,  377,  439,  470,  501,  563,
-     594,  625,  687,  718,  749,  811,  842,  873,  935,  966,  997,  223,  254,  347,  378,  471,
-     502,  595,  626,  719,  750,  843,  874,  967,  998,  255,  379,  503,  627,  751,  875,  999,
-     284,  408,  532,  656,  780,  904,  285,  316,  409,  440,  533,  564,  657,  688,  781,  812,
-     905,  936,  286,  317,  348,  410,  441,  472,  534,  565,  596,  658,  689,  720,  782,  813,
-     844,  906,  937,  968,  287,  318,  349,  380,  411,  442,  473,  504,  535,  566,  597,  628,
-     659,  690,  721,  752,  783,  814,  845,  876,  907,  938,  969, 1000,  319,  350,  381,  443,
-     474,  505,  567,  598,  629,  691,  722,  753,  815,  846,  877,  939,  970, 1001,  351,  382,
-     475,  506,  599,  630,  723,  754,  847,  878,  971, 1002,  383,  507,  631,  755,  879, 1003,
-     412,  536,  660,  784,  908,  413,  444,  537,  568,  661,  692,  785,  816,  909,  940,  414,
-     445,  476,  538,  569,  600,  662,  693,  724,  786,  817,  848,  910,  941,  972,  415,  446,
-     477,  508,  539,  570,  601,  632,  663,  694,  725,  756,  787,  818,  849,  880,  911,  942,
-     973, 1004,  447,  478,  509,  571,  602,  633,  695,  726,  757,  819,  850,  881,  943,  974,
-    1005,  479,  510,  603,  634,  727,  758,  851,  882,  975, 1006,  511,  635,  759,  883, 1007,
-     540,  664,  788,  912,  541,  572,  665,  696,  789,  820,  913,  944,  542,  573,  604,  666,
-     697,  728,  790,  821,  852,  914,  945,  976,  543,  574,  605,  636,  667,  698,  729,  760,
-     791,  822,  853,  884,  915,  946,  977, 1008,  575,  606,  637,  699,  730,  761,  823,  854,
-     885,  947,  978, 1009,  607,  638,  731,  762,  855,  886,  979, 1010,  639,  763,  887, 1011,
-     668,  792,  916,  669,  700,  793,  824,  917,  948,  670,  701,  732,  794,  825,  856,  918,
-     949,  980,  671,  702,  733,  764,  795,  826,  857,  888,  919,  950,  981, 1012,  703,  734,
-     765,  827,  858,  889,  951,  982, 1013,  735,  766,  859,  890,  983, 1014,  767,  891, 1015,
-     796,  920,  797,  828,  921,  952,  798,  829,  860,  922,  953,  984,  799,  830,  861,  892,
-     923,  954,  985, 1016,  831,  862,  893,  955,  986, 1017,  863,  894,  987, 1018,  895, 1019,
-     924,  925,  956,  926,  957,  988,  927,  958,  989, 1020,  959,  990, 1021,  991, 1022, 1023,
-};
-
-const int16_t *ff_vp9_scans[5][4] = {
-    {
-        ff_vp9_default_scan_4x4, ff_vp9_col_scan_4x4,
-        ff_vp9_row_scan_4x4, ff_vp9_default_scan_4x4
-    }, {
-        ff_vp9_default_scan_8x8, ff_vp9_col_scan_8x8,
-        ff_vp9_row_scan_8x8, ff_vp9_default_scan_8x8
-    }, {
-        ff_vp9_default_scan_16x16, ff_vp9_col_scan_16x16,
-        ff_vp9_row_scan_16x16, ff_vp9_default_scan_16x16
-    }, {
-        ff_vp9_default_scan_32x32, ff_vp9_default_scan_32x32,
-        ff_vp9_default_scan_32x32, ff_vp9_default_scan_32x32
-    }, { // lossless
-        ff_vp9_default_scan_4x4, ff_vp9_default_scan_4x4,
-        ff_vp9_default_scan_4x4, ff_vp9_default_scan_4x4
-    }
-};
-
-const int16_t ff_vp9_default_scan_4x4_nb[16][2] = {
-    {  0,  0 }, {  0,  0 }, {  4,  1 }, {  1,  1 },
-    {  4,  4 }, {  2,  2 }, {  5,  2 }, {  8,  8 },
-    {  8,  5 }, {  6,  3 }, {  9,  6 }, { 12,  9 },
-    { 10,  7 }, { 13, 10 }, { 14, 11 }, {  0,  0 },
-};
-
-const int16_t ff_vp9_col_scan_4x4_nb[16][2] = {
-    {  0,  0 }, {  1,  1 }, {  0,  0 }, {  2,  2 },
-    {  4,  4 }, {  5,  5 }, {  4,  4 }, {  6,  6 },
-    {  8,  8 }, {  9,  9 }, {  8,  8 }, { 12, 12 },
-    { 10, 10 }, { 13, 13 }, { 14, 14 }, {  0,  0 },
-};
-
-const int16_t ff_vp9_row_scan_4x4_nb[16][2] = {
-    {  0,  0 }, {  0,  0 }, {  4,  4 }, {  1,  1 },
-    {  8,  8 }, {  5,  5 }, {  1,  1 }, {  2,  2 },
-    {  9,  9 }, {  2,  2 }, {  6,  6 }, {  3,  3 },
-    { 10, 10 }, {  7,  7 }, { 11, 11 }, {  0,  0 },
-};
-
-const int16_t ff_vp9_default_scan_8x8_nb[64][2] = {
-    {  0,  0 }, {  0,  0 }, {  1,  1 }, {  8,  1 },
-    {  8,  8 }, {  9,  2 }, {  2,  2 }, { 16,  9 },
-    { 16, 16 }, { 17, 10 }, { 10,  3 }, {  3,  3 },
-    { 24, 17 }, { 24, 24 }, { 18, 11 }, { 11,  4 },
-    { 25, 18 }, {  4,  4 }, { 32, 25 }, { 19, 12 },
-    { 26, 19 }, { 32, 32 }, { 12,  5 }, { 33, 26 },
-    {  5,  5 }, { 40, 33 }, { 27, 20 }, { 20, 13 },
-    { 34, 27 }, { 41, 34 }, { 40, 40 }, { 13,  6 },
-    {  6,  6 }, { 35, 28 }, { 28, 21 }, { 42, 35 },
-    { 48, 48 }, { 48, 41 }, { 21, 14 }, { 14,  7 },
-    { 36, 29 }, { 49, 42 }, { 43, 36 }, { 56, 49 },
-    { 29, 22 }, { 22, 15 }, { 50, 43 }, { 44, 37 },
-    { 57, 50 }, { 37, 30 }, { 30, 23 }, { 51, 44 },
-    { 58, 51 }, { 38, 31 }, { 45, 38 }, { 52, 45 },
-    { 59, 52 }, { 46, 39 }, { 53, 46 }, { 60, 53 },
-    { 54, 47 }, { 61, 54 }, { 62, 55 }, {  0,  0 },
-};
-
-const int16_t ff_vp9_col_scan_8x8_nb[64][2] = {
-    {  0,  0 }, {  1,  1 }, {  0,  0 }, {  2,  2 },
-    {  8,  8 }, {  3,  3 }, {  9,  9 }, {  8,  8 },
-    {  4,  4 }, { 10, 10 }, { 16, 16 }, { 11, 11 },
-    { 17, 17 }, {  5,  5 }, { 16, 16 }, { 18, 18 },
-    { 12, 12 }, { 24, 24 }, {  6,  6 }, { 25, 25 },
-    { 19, 19 }, { 24, 24 }, { 13, 13 }, { 26, 26 },
-    { 20, 20 }, { 32, 32 }, { 27, 27 }, { 33, 33 },
-    { 14, 14 }, { 21, 21 }, { 34, 34 }, { 32, 32 },
-    { 28, 28 }, { 40, 40 }, { 35, 35 }, { 22, 22 },
-    { 29, 29 }, { 41, 41 }, { 36, 36 }, { 40, 40 },
-    { 42, 42 }, { 30, 30 }, { 43, 43 }, { 48, 48 },
-    { 37, 37 }, { 49, 49 }, { 48, 48 }, { 44, 44 },
-    { 38, 38 }, { 50, 50 }, { 56, 56 }, { 51, 51 },
-    { 45, 45 }, { 57, 57 }, { 52, 52 }, { 58, 58 },
-    { 46, 46 }, { 59, 59 }, { 53, 53 }, { 60, 60 },
-    { 54, 54 }, { 61, 61 }, { 62, 62 }, {  0,  0 },
-};
-
-const int16_t ff_vp9_row_scan_8x8_nb[64][2] = {
-    {  0,  0 }, {  8,  8 }, {  0,  0 }, {  1,  1 },
-    { 16, 16 }, {  1,  1 }, {  9,  9 }, { 24, 24 },
-    {  2,  2 }, { 17, 17 }, {  2,  2 }, { 32, 32 },
-    { 10, 10 }, {  3,  3 }, { 25, 25 }, { 18, 18 },
-    { 11, 11 }, {  3,  3 }, { 40, 40 }, { 33, 33 },
-    { 26, 26 }, {  4,  4 }, { 19, 19 }, { 48, 48 },
-    { 12, 12 }, {  4,  4 }, { 34, 34 }, { 27, 27 },
-    {  5,  5 }, { 41, 41 }, { 20, 20 }, {  5,  5 },
-    { 13, 13 }, { 35, 35 }, { 28, 28 }, {  6,  6 },
-    { 42, 42 }, { 21, 21 }, { 49, 49 }, {  6,  6 },
-    { 36, 36 }, { 14, 14 }, { 29, 29 }, { 43, 43 },
-    {  7,  7 }, { 50, 50 }, { 22, 22 }, { 15, 15 },
-    { 37, 37 }, { 44, 44 }, { 30, 30 }, { 51, 51 },
-    { 23, 23 }, { 38, 38 }, { 45, 45 }, { 31, 31 },
-    { 52, 52 }, { 39, 39 }, { 53, 53 }, { 46, 46 },
-    { 54, 54 }, { 47, 47 }, { 55, 55 }, {  0,  0 },
-};
-
-const int16_t ff_vp9_default_scan_16x16_nb[256][2] = {
-    {   0,   0 }, {   0,   0 }, {   1,   1 }, {  16,   1 },
-    {  16,  16 }, {   2,   2 }, {  17,   2 }, {  32,  17 },
-    {  32,  32 }, {   3,   3 }, {  33,  18 }, {  18,   3 },
-    {  48,  33 }, {  19,   4 }, {   4,   4 }, {  34,  19 },
-    {  48,  48 }, {  49,  34 }, {  35,  20 }, {  64,  49 },
-    {  20,   5 }, {   5,   5 }, {  50,  35 }, {  64,  64 },
-    {  65,  50 }, {  36,  21 }, {  21,   6 }, {  51,  36 },
-    {   6,   6 }, {  80,  65 }, {  66,  51 }, {  37,  22 },
-    {  81,  66 }, {  52,  37 }, {  22,   7 }, {  80,  80 },
-    {  67,  52 }, {   7,   7 }, {  82,  67 }, {  96,  81 },
-    {  53,  38 }, {  38,  23 }, {  68,  53 }, {  96,  96 },
-    {  23,   8 }, {  97,  82 }, {  83,  68 }, {  69,  54 },
-    {  54,  39 }, {   8,   8 }, {  39,  24 }, {  84,  69 },
-    {  98,  83 }, { 112,  97 }, { 112, 112 }, {  24,   9 },
-    { 113,  98 }, {  99,  84 }, {  70,  55 }, {  85,  70 },
-    {  55,  40 }, {   9,   9 }, {  40,  25 }, { 114,  99 },
-    { 100,  85 }, { 128, 113 }, { 115, 100 }, {  71,  56 },
-    {  86,  71 }, {  25,  10 }, { 129, 114 }, { 128, 128 },
-    { 101,  86 }, {  56,  41 }, {  10,  10 }, {  41,  26 },
-    { 116, 101 }, { 130, 115 }, { 144, 129 }, {  87,  72 },
-    { 102,  87 }, {  26,  11 }, {  72,  57 }, { 131, 116 },
-    { 117, 102 }, { 145, 130 }, {  57,  42 }, { 144, 144 },
-    {  11,  11 }, {  42,  27 }, { 132, 117 }, { 146, 131 },
-    { 103,  88 }, {  88,  73 }, { 118, 103 }, { 160, 145 },
-    {  73,  58 }, { 147, 132 }, { 133, 118 }, {  27,  12 },
-    { 161, 146 }, {  58,  43 }, {  12,  12 }, { 160, 160 },
-    { 119, 104 }, { 148, 133 }, {  89,  74 }, { 134, 119 },
-    { 104,  89 }, { 162, 147 }, {  43,  28 }, {  74,  59 },
-    { 176, 161 }, { 163, 148 }, {  28,  13 }, { 149, 134 },
-    { 120, 105 }, { 135, 120 }, { 177, 162 }, { 164, 149 },
-    {  13,  13 }, { 105,  90 }, {  59,  44 }, {  90,  75 },
-    { 150, 135 }, {  44,  29 }, { 178, 163 }, { 176, 176 },
-    { 136, 121 }, { 165, 150 }, { 121, 106 }, {  75,  60 },
-    { 179, 164 }, { 151, 136 }, {  29,  14 }, {  60,  45 },
-    {  14,  14 }, { 106,  91 }, { 166, 151 }, { 180, 165 },
-    { 192, 177 }, {  91,  76 }, { 192, 192 }, {  45,  30 },
-    { 137, 122 }, { 122, 107 }, { 152, 137 }, { 193, 178 },
-    {  76,  61 }, { 167, 152 }, { 181, 166 }, {  30,  15 },
-    { 194, 179 }, { 208, 193 }, { 182, 167 }, { 107,  92 },
-    { 138, 123 }, {  61,  46 }, { 153, 138 }, {  46,  31 },
-    { 195, 180 }, {  92,  77 }, { 168, 153 }, { 209, 194 },
-    { 196, 181 }, { 208, 208 }, { 123, 108 }, { 183, 168 },
-    { 210, 195 }, {  77,  62 }, { 108,  93 }, { 169, 154 },
-    { 154, 139 }, {  62,  47 }, { 197, 182 }, { 211, 196 },
-    { 184, 169 }, { 224, 209 }, { 224, 224 }, { 139, 124 },
-    {  93,  78 }, { 198, 183 }, { 124, 109 }, {  78,  63 },
-    { 212, 197 }, { 225, 210 }, { 170, 155 }, { 185, 170 },
-    { 155, 140 }, { 213, 198 }, { 199, 184 }, { 109,  94 },
-    { 226, 211 }, { 140, 125 }, {  94,  79 }, { 240, 225 },
-    { 214, 199 }, { 227, 212 }, { 200, 185 }, { 125, 110 },
-    { 241, 226 }, { 186, 171 }, { 171, 156 }, { 156, 141 },
-    { 228, 213 }, { 110,  95 }, { 215, 200 }, { 242, 227 },
-    { 141, 126 }, { 201, 186 }, { 229, 214 }, { 126, 111 },
-    { 216, 201 }, { 243, 228 }, { 172, 157 }, { 187, 172 },
-    { 230, 215 }, { 157, 142 }, { 202, 187 }, { 142, 127 },
-    { 244, 229 }, { 217, 202 }, { 231, 216 }, { 188, 173 },
-    { 245, 230 }, { 158, 143 }, { 173, 158 }, { 232, 217 },
-    { 246, 231 }, { 218, 203 }, { 203, 188 }, { 174, 159 },
-    { 189, 174 }, { 247, 232 }, { 233, 218 }, { 204, 189 },
-    { 219, 204 }, { 248, 233 }, { 190, 175 }, { 234, 219 },
-    { 220, 205 }, { 249, 234 }, { 205, 190 }, { 221, 206 },
-    { 250, 235 }, { 235, 220 }, { 206, 191 }, { 236, 221 },
-    { 222, 207 }, { 251, 236 }, { 237, 222 }, { 252, 237 },
-    { 238, 223 }, { 253, 238 }, { 254, 239 }, {   0,   0 },
-};
-
-const int16_t ff_vp9_col_scan_16x16_nb[256][2] = {
-    {   0,   0 }, {   1,   1 }, {   2,   2 }, {   0,   0 },
-    {   3,   3 }, {  16,  16 }, {   4,   4 }, {  17,  17 },
-    {   5,   5 }, {  18,  18 }, {  16,  16 }, {  19,  19 },
-    {   6,   6 }, {  32,  32 }, {  20,  20 }, {  33,  33 },
-    {   7,   7 }, {  34,  34 }, {  21,  21 }, {  32,  32 },
-    {  35,  35 }, {   8,   8 }, {  48,  48 }, {  22,  22 },
-    {  49,  49 }, {  36,  36 }, {   9,   9 }, {  37,  37 },
-    {  50,  50 }, {  23,  23 }, {  48,  48 }, {  51,  51 },
-    {  10,  10 }, {  64,  64 }, {  38,  38 }, {  24,  24 },
-    {  52,  52 }, {  65,  65 }, {  53,  53 }, {  39,  39 },
-    {  66,  66 }, {  11,  11 }, {  64,  64 }, {  25,  25 },
-    {  67,  67 }, {  54,  54 }, {  80,  80 }, {  40,  40 },
-    {  68,  68 }, {  12,  12 }, {  26,  26 }, {  81,  81 },
-    {  55,  55 }, {  69,  69 }, {  82,  82 }, {  41,  41 },
-    {  13,  13 }, {  83,  83 }, {  80,  80 }, {  70,  70 },
-    {  27,  27 }, {  56,  56 }, {  84,  84 }, {  96,  96 },
-    {  14,  14 }, {  71,  71 }, {  97,  97 }, {  42,  42 },
-    {  85,  85 }, {  57,  57 }, {  98,  98 }, {  28,  28 },
-    {  86,  86 }, {  99,  99 }, {  96,  96 }, {  72,  72 },
-    {  43,  43 }, { 100, 100 }, {  58,  58 }, {  29,  29 },
-    { 112, 112 }, {  87,  87 }, { 113, 113 }, {  73,  73 },
-    { 112, 112 }, { 101, 101 }, {  44,  44 }, {  30,  30 },
-    { 114, 114 }, {  59,  59 }, { 102, 102 }, {  88,  88 },
-    { 115, 115 }, {  74,  74 }, { 128, 128 }, { 116, 116 },
-    {  45,  45 }, { 103, 103 }, {  89,  89 }, {  60,  60 },
-    { 129, 129 }, { 117, 117 }, { 130, 130 }, { 131, 131 },
-    { 104, 104 }, {  75,  75 }, {  46,  46 }, { 118, 118 },
-    { 128, 128 }, {  90,  90 }, {  61,  61 }, { 132, 132 },
-    { 105, 105 }, { 144, 144 }, { 119, 119 }, { 145, 145 },
-    { 133, 133 }, {  76,  76 }, { 146, 146 }, { 120, 120 },
-    {  91,  91 }, { 134, 134 }, { 147, 147 }, {  62,  62 },
-    { 106, 106 }, { 135, 135 }, { 121, 121 }, {  92,  92 },
-    { 148, 148 }, { 144, 144 }, {  77,  77 }, { 149, 149 },
-    { 136, 136 }, { 107, 107 }, { 160, 160 }, { 161, 161 },
-    { 150, 150 }, { 122, 122 }, {  78,  78 }, { 137, 137 },
-    { 162, 162 }, { 151, 151 }, {  93,  93 }, { 163, 163 },
-    { 108, 108 }, { 164, 164 }, { 152, 152 }, { 123, 123 },
-    { 138, 138 }, { 160, 160 }, { 165, 165 }, {  94,  94 },
-    { 176, 176 }, { 166, 166 }, { 109, 109 }, { 153, 153 },
-    { 177, 177 }, { 124, 124 }, { 178, 178 }, { 139, 139 },
-    { 167, 167 }, { 154, 154 }, { 110, 110 }, { 179, 179 },
-    { 176, 176 }, { 180, 180 }, { 168, 168 }, { 140, 140 },
-    { 125, 125 }, { 181, 181 }, { 192, 192 }, { 193, 193 },
-    { 155, 155 }, { 182, 182 }, { 169, 169 }, { 194, 194 },
-    { 126, 126 }, { 141, 141 }, { 195, 195 }, { 183, 183 },
-    { 192, 192 }, { 196, 196 }, { 156, 156 }, { 170, 170 },
-    { 142, 142 }, { 184, 184 }, { 197, 197 }, { 208, 208 },
-    { 198, 198 }, { 209, 209 }, { 171, 171 }, { 157, 157 },
-    { 185, 185 }, { 210, 210 }, { 208, 208 }, { 211, 211 },
-    { 199, 199 }, { 224, 224 }, { 158, 158 }, { 212, 212 },
-    { 224, 224 }, { 186, 186 }, { 200, 200 }, { 172, 172 },
-    { 225, 225 }, { 213, 213 }, { 214, 214 }, { 226, 226 },
-    { 201, 201 }, { 227, 227 }, { 187, 187 }, { 240, 240 },
-    { 215, 215 }, { 173, 173 }, { 228, 228 }, { 241, 241 },
-    { 202, 202 }, { 242, 242 }, { 216, 216 }, { 229, 229 },
-    { 174, 174 }, { 188, 188 }, { 243, 243 }, { 230, 230 },
-    { 203, 203 }, { 217, 217 }, { 231, 231 }, { 244, 244 },
-    { 218, 218 }, { 245, 245 }, { 189, 189 }, { 232, 232 },
-    { 204, 204 }, { 190, 190 }, { 246, 246 }, { 233, 233 },
-    { 247, 247 }, { 219, 219 }, { 205, 205 }, { 248, 248 },
-    { 234, 234 }, { 220, 220 }, { 206, 206 }, { 249, 249 },
-    { 235, 235 }, { 221, 221 }, { 250, 250 }, { 222, 222 },
-    { 236, 236 }, { 237, 237 }, { 251, 251 }, { 238, 238 },
-    { 252, 252 }, { 253, 253 }, { 254, 254 }, {   0,   0 },
-};
-
-const int16_t ff_vp9_row_scan_16x16_nb[256][2] = {
-    {   0,   0 }, {  16,  16 }, {   0,   0 }, {  32,  32 },
-    {   1,   1 }, {  48,  48 }, {  17,  17 }, {   1,   1 },
-    {  64,  64 }, {   2,   2 }, {  33,  33 }, {  80,  80 },
-    {  18,  18 }, {   2,   2 }, {  49,  49 }, {   3,   3 },
-    {  96,  96 }, {  34,  34 }, {  65,  65 }, {  19,  19 },
-    {   3,   3 }, { 112, 112 }, {  50,  50 }, {   4,   4 },
-    {  81,  81 }, {  35,  35 }, {  66,  66 }, {   4,   4 },
-    { 128, 128 }, {  20,  20 }, {  51,  51 }, {  97,  97 },
-    {  82,  82 }, {   5,   5 }, {  36,  36 }, { 144, 144 },
-    {  67,  67 }, { 113, 113 }, {  21,  21 }, {  52,  52 },
-    {   5,   5 }, {  98,  98 }, { 160, 160 }, {  83,  83 },
-    {  37,  37 }, {   6,   6 }, {  68,  68 }, { 129, 129 },
-    {  22,  22 }, {  53,  53 }, { 114, 114 }, {   6,   6 },
-    {  99,  99 }, { 176, 176 }, {  84,  84 }, {  38,  38 },
-    {   7,   7 }, {  69,  69 }, { 145, 145 }, { 130, 130 },
-    { 115, 115 }, {  23,  23 }, {  54,  54 }, { 192, 192 },
-    { 100, 100 }, {   7,   7 }, {  85,  85 }, { 161, 161 },
-    {  39,  39 }, {  70,  70 }, {   8,   8 }, { 146, 146 },
-    { 131, 131 }, { 116, 116 }, {  55,  55 }, { 208, 208 },
-    { 101, 101 }, {  24,  24 }, {  86,  86 }, {   8,   8 },
-    { 132, 132 }, {  40,  40 }, {  71,  71 }, { 177, 177 },
-    { 147, 147 }, { 224, 224 }, { 117, 117 }, { 162, 162 },
-    {   9,   9 }, { 102, 102 }, {  56,  56 }, {  25,  25 },
-    {  87,  87 }, { 148, 148 }, {   9,   9 }, { 133, 133 },
-    {  72,  72 }, { 118, 118 }, { 193, 193 }, { 163, 163 },
-    {  41,  41 }, { 103, 103 }, { 178, 178 }, {  10,  10 },
-    {  57,  57 }, { 149, 149 }, { 134, 134 }, {  88,  88 },
-    {  26,  26 }, { 119, 119 }, {  10,  10 }, { 164, 164 },
-    { 104, 104 }, {  73,  73 }, { 209, 209 }, { 179, 179 },
-    {  42,  42 }, {  11,  11 }, { 194, 194 }, { 135, 135 },
-    { 165, 165 }, { 150, 150 }, {  58,  58 }, {  27,  27 },
-    {  89,  89 }, {  11,  11 }, { 120, 120 }, {  74,  74 },
-    {  43,  43 }, { 225, 225 }, { 105, 105 }, {  12,  12 },
-    { 180, 180 }, { 151, 151 }, { 195, 195 }, { 136, 136 },
-    {  28,  28 }, { 166, 166 }, { 121, 121 }, {  59,  59 },
-    {  12,  12 }, { 210, 210 }, {  90,  90 }, { 106, 106 },
-    {  44,  44 }, { 181, 181 }, {  75,  75 }, { 152, 152 },
-    {  13,  13 }, { 167, 167 }, { 137, 137 }, {  13,  13 },
-    {  60,  60 }, { 196, 196 }, { 122, 122 }, {  29,  29 },
-    {  91,  91 }, {  14,  14 }, { 182, 182 }, {  76,  76 },
-    { 211, 211 }, { 153, 153 }, {  14,  14 }, { 107, 107 },
-    { 138, 138 }, {  45,  45 }, { 226, 226 }, { 168, 168 },
-    { 197, 197 }, { 123, 123 }, {  30,  30 }, {  61,  61 },
-    {  15,  15 }, {  92,  92 }, { 154, 154 }, { 183, 183 },
-    { 169, 169 }, { 108, 108 }, { 212, 212 }, {  77,  77 },
-    { 139, 139 }, { 198, 198 }, {  46,  46 }, { 124, 124 },
-    { 227, 227 }, {  62,  62 }, {  31,  31 }, { 184, 184 },
-    {  93,  93 }, { 170, 170 }, { 155, 155 }, { 185, 185 },
-    {  78,  78 }, {  47,  47 }, { 199, 199 }, { 213, 213 },
-    { 140, 140 }, {  63,  63 }, { 109, 109 }, { 125, 125 },
-    {  94,  94 }, { 200, 200 }, { 171, 171 }, { 156, 156 },
-    { 228, 228 }, { 186, 186 }, { 214, 214 }, { 201, 201 },
-    {  79,  79 }, { 141, 141 }, { 110, 110 }, { 229, 229 },
-    {  95,  95 }, { 126, 126 }, { 215, 215 }, { 172, 172 },
-    { 111, 111 }, { 142, 142 }, { 202, 202 }, { 157, 157 },
-    { 216, 216 }, { 230, 230 }, { 217, 217 }, { 187, 187 },
-    { 127, 127 }, { 231, 231 }, { 158, 158 }, { 173, 173 },
-    { 143, 143 }, { 203, 203 }, { 188, 188 }, { 232, 232 },
-    { 218, 218 }, { 233, 233 }, { 159, 159 }, { 174, 174 },
-    { 204, 204 }, { 189, 189 }, { 234, 234 }, { 219, 219 },
-    { 175, 175 }, { 205, 205 }, { 235, 235 }, { 220, 220 },
-    { 190, 190 }, { 236, 236 }, { 206, 206 }, { 191, 191 },
-    { 221, 221 }, { 207, 207 }, { 237, 237 }, { 222, 222 },
-    { 238, 238 }, { 223, 223 }, { 239, 239 }, {   0,   0 },
-};
-
-const int16_t ff_vp9_default_scan_32x32_nb[1024][2] = {
-    {    0,    0 }, {    0,    0 }, {    1,    1 }, {   32,    1 },
-    {   32,   32 }, {    2,    2 }, {   33,    2 }, {   64,   33 },
-    {    3,    3 }, {   64,   64 }, {   34,    3 }, {   65,   34 },
-    {    4,    4 }, {   35,    4 }, {   96,   65 }, {   66,   35 },
-    {   96,   96 }, {   97,   66 }, {   67,   36 }, {   36,    5 },
-    {    5,    5 }, {  128,   97 }, {   98,   67 }, {    6,    6 },
-    {  128,  128 }, {   68,   37 }, {   37,    6 }, {  129,   98 },
-    {   99,   68 }, {  160,  129 }, {  130,   99 }, {   38,    7 },
-    {   69,   38 }, {    7,    7 }, {  100,   69 }, {  161,  130 },
-    {  131,  100 }, {  160,  160 }, {   70,   39 }, {   39,    8 },
-    {    8,    8 }, {  101,   70 }, {  162,  131 }, {  132,  101 },
-    {  192,  161 }, {   71,   40 }, {  192,  192 }, {  102,   71 },
-    {   40,    9 }, {  163,  132 }, {    9,    9 }, {  193,  162 },
-    {  133,  102 }, {  164,  133 }, {   72,   41 }, {  103,   72 },
-    {  134,  103 }, {  224,  193 }, {   41,   10 }, {  194,  163 },
-    {   10,   10 }, {  224,  224 }, {  165,  134 }, {  225,  194 },
-    {  195,  164 }, {   73,   42 }, {  104,   73 }, {  135,  104 },
-    {   42,   11 }, {   11,   11 }, {  166,  135 }, {  196,  165 },
-    {  226,  195 }, {  256,  225 }, {   74,   43 }, {  105,   74 },
-    {  136,  105 }, {  227,  196 }, {   43,   12 }, {  197,  166 },
-    {  167,  136 }, {  257,  226 }, {  256,  256 }, {   12,   12 },
-    {  228,  197 }, {   75,   44 }, {  106,   75 }, {  198,  167 },
-    {  137,  106 }, {  258,  227 }, {  168,  137 }, {  288,  257 },
-    {   44,   13 }, {  229,  198 }, {  259,  228 }, {  199,  168 },
-    {  107,   76 }, {   13,   13 }, {  169,  138 }, {  138,  107 },
-    {  288,  288 }, {  289,  258 }, {   76,   45 }, {  230,  199 },
-    {  260,  229 }, {   45,   14 }, {  200,  169 }, {  139,  108 },
-    {  290,  259 }, {  108,   77 }, {  231,  200 }, {  320,  289 },
-    {  261,  230 }, {  170,  139 }, {   77,   46 }, {  291,  260 },
-    {   14,   14 }, {  321,  290 }, {  201,  170 }, {  262,  231 },
-    {  320,  320 }, {  171,  140 }, {  292,  261 }, {  232,  201 },
-    {  140,  109 }, {  322,  291 }, {  109,   78 }, {   46,   15 },
-    {  202,  171 }, {  263,  232 }, {  233,  202 }, {  293,  262 },
-    {  352,  321 }, {  323,  292 }, {   15,   15 }, {   78,   47 },
-    {  203,  172 }, {  264,  233 }, {  294,  263 }, {  324,  293 },
-    {  172,  141 }, {  353,  322 }, {  141,  110 }, {  234,  203 },
-    {  352,  352 }, {   47,   16 }, {  295,  264 }, {  110,   79 },
-    {  265,  234 }, {  354,  323 }, {  325,  294 }, {   79,   48 },
-    {   16,   16 }, {  204,  173 }, {  235,  204 }, {  173,  142 },
-    {  355,  324 }, {  384,  353 }, {  326,  295 }, {  142,  111 },
-    {  296,  265 }, {  266,  235 }, {  356,  325 }, {  385,  354 },
-    {  111,   80 }, {   48,   17 }, {  327,  296 }, {  297,  266 },
-    {  205,  174 }, {  384,  384 }, {  236,  205 }, {  357,  326 },
-    {  386,  355 }, {   80,   49 }, {  174,  143 }, {   17,   17 },
-    {  328,  297 }, {  358,  327 }, {  387,  356 }, {  298,  267 },
-    {  329,  298 }, {  388,  357 }, {  112,   81 }, {  416,  385 },
-    {  237,  206 }, {  359,  328 }, {   49,   18 }, {  206,  175 },
-    {  417,  386 }, {  389,  358 }, {  330,  299 }, {   18,   18 },
-    {  416,  416 }, {  360,  329 }, {   81,   50 }, {  418,  387 },
-    {  390,  359 }, {  238,  207 }, {   50,   19 }, {  361,  330 },
-    {  419,  388 }, {  113,   82 }, {  448,  417 }, {  448,  448 },
-    {  420,  389 }, {   82,   51 }, {  362,  331 }, {  449,  418 },
-    {  421,  390 }, {  480,  480 }, {  450,  419 }, {  422,  391 },
-    {  114,   83 }, {  451,  420 }, {  480,  449 }, {  452,  421 },
-    {  481,  450 }, {  453,  422 }, {  512,  512 }, {  482,  451 },
-    {  454,  423 }, {  512,  481 }, {  483,  452 }, {  513,  482 },
-    {  484,  453 }, {  514,  483 }, {  485,  454 }, {  544,  513 },
-    {  544,  544 }, {  486,  455 }, {  545,  514 }, {  546,  515 },
-    {  576,  576 }, {  576,  545 }, {  577,  546 }, {  578,  547 },
-    {  608,  577 }, {  609,  578 }, {  610,  579 }, {   19,   19 },
-    {  143,  112 }, {  267,  236 }, {  391,  360 }, {  515,  484 },
-    {  608,  608 }, {   20,   20 }, {   51,   20 }, {  144,  113 },
-    {  175,  144 }, {  268,  237 }, {  299,  268 }, {  392,  361 },
-    {  423,  392 }, {  516,  485 }, {  547,  516 }, {  640,  609 },
-    {  640,  640 }, {   21,   21 }, {   52,   21 }, {   83,   52 },
-    {  145,  114 }, {  176,  145 }, {  207,  176 }, {  269,  238 },
-    {  300,  269 }, {  331,  300 }, {  393,  362 }, {  424,  393 },
-    {  455,  424 }, {  517,  486 }, {  548,  517 }, {  579,  548 },
-    {  641,  610 }, {  672,  641 }, {  672,  672 }, {   22,   22 },
-    {   53,   22 }, {   84,   53 }, {  115,   84 }, {  146,  115 },
-    {  177,  146 }, {  208,  177 }, {  239,  208 }, {  270,  239 },
-    {  301,  270 }, {  332,  301 }, {  363,  332 }, {  394,  363 },
-    {  425,  394 }, {  456,  425 }, {  487,  456 }, {  518,  487 },
-    {  549,  518 }, {  580,  549 }, {  611,  580 }, {  642,  611 },
-    {  673,  642 }, {  704,  673 }, {  704,  704 }, {   54,   23 },
-    {   85,   54 }, {  116,   85 }, {  178,  147 }, {  209,  178 },
-    {  240,  209 }, {  302,  271 }, {  333,  302 }, {  364,  333 },
-    {  426,  395 }, {  457,  426 }, {  488,  457 }, {  550,  519 },
-    {  581,  550 }, {  612,  581 }, {  674,  643 }, {  705,  674 },
-    {  736,  705 }, {   86,   55 }, {  117,   86 }, {  210,  179 },
-    {  241,  210 }, {  334,  303 }, {  365,  334 }, {  458,  427 },
-    {  489,  458 }, {  582,  551 }, {  613,  582 }, {  706,  675 },
-    {  737,  706 }, {  118,   87 }, {  242,  211 }, {  366,  335 },
-    {  490,  459 }, {  614,  583 }, {  738,  707 }, {   23,   23 },
-    {  147,  116 }, {  271,  240 }, {  395,  364 }, {  519,  488 },
-    {  643,  612 }, {  736,  736 }, {   24,   24 }, {   55,   24 },
-    {  148,  117 }, {  179,  148 }, {  272,  241 }, {  303,  272 },
-    {  396,  365 }, {  427,  396 }, {  520,  489 }, {  551,  520 },
-    {  644,  613 }, {  675,  644 }, {  768,  737 }, {  768,  768 },
-    {   25,   25 }, {   56,   25 }, {   87,   56 }, {  149,  118 },
-    {  180,  149 }, {  211,  180 }, {  273,  242 }, {  304,  273 },
-    {  335,  304 }, {  397,  366 }, {  428,  397 }, {  459,  428 },
-    {  521,  490 }, {  552,  521 }, {  583,  552 }, {  645,  614 },
-    {  676,  645 }, {  707,  676 }, {  769,  738 }, {  800,  769 },
-    {  800,  800 }, {   26,   26 }, {   57,   26 }, {   88,   57 },
-    {  119,   88 }, {  150,  119 }, {  181,  150 }, {  212,  181 },
-    {  243,  212 }, {  274,  243 }, {  305,  274 }, {  336,  305 },
-    {  367,  336 }, {  398,  367 }, {  429,  398 }, {  460,  429 },
-    {  491,  460 }, {  522,  491 }, {  553,  522 }, {  584,  553 },
-    {  615,  584 }, {  646,  615 }, {  677,  646 }, {  708,  677 },
-    {  739,  708 }, {  770,  739 }, {  801,  770 }, {  832,  801 },
-    {  832,  832 }, {   58,   27 }, {   89,   58 }, {  120,   89 },
-    {  182,  151 }, {  213,  182 }, {  244,  213 }, {  306,  275 },
-    {  337,  306 }, {  368,  337 }, {  430,  399 }, {  461,  430 },
-    {  492,  461 }, {  554,  523 }, {  585,  554 }, {  616,  585 },
-    {  678,  647 }, {  709,  678 }, {  740,  709 }, {  802,  771 },
-    {  833,  802 }, {  864,  833 }, {   90,   59 }, {  121,   90 },
-    {  214,  183 }, {  245,  214 }, {  338,  307 }, {  369,  338 },
-    {  462,  431 }, {  493,  462 }, {  586,  555 }, {  617,  586 },
-    {  710,  679 }, {  741,  710 }, {  834,  803 }, {  865,  834 },
-    {  122,   91 }, {  246,  215 }, {  370,  339 }, {  494,  463 },
-    {  618,  587 }, {  742,  711 }, {  866,  835 }, {   27,   27 },
-    {  151,  120 }, {  275,  244 }, {  399,  368 }, {  523,  492 },
-    {  647,  616 }, {  771,  740 }, {  864,  864 }, {   28,   28 },
-    {   59,   28 }, {  152,  121 }, {  183,  152 }, {  276,  245 },
-    {  307,  276 }, {  400,  369 }, {  431,  400 }, {  524,  493 },
-    {  555,  524 }, {  648,  617 }, {  679,  648 }, {  772,  741 },
-    {  803,  772 }, {  896,  865 }, {  896,  896 }, {   29,   29 },
-    {   60,   29 }, {   91,   60 }, {  153,  122 }, {  184,  153 },
-    {  215,  184 }, {  277,  246 }, {  308,  277 }, {  339,  308 },
-    {  401,  370 }, {  432,  401 }, {  463,  432 }, {  525,  494 },
-    {  556,  525 }, {  587,  556 }, {  649,  618 }, {  680,  649 },
-    {  711,  680 }, {  773,  742 }, {  804,  773 }, {  835,  804 },
-    {  897,  866 }, {  928,  897 }, {  928,  928 }, {   30,   30 },
-    {   61,   30 }, {   92,   61 }, {  123,   92 }, {  154,  123 },
-    {  185,  154 }, {  216,  185 }, {  247,  216 }, {  278,  247 },
-    {  309,  278 }, {  340,  309 }, {  371,  340 }, {  402,  371 },
-    {  433,  402 }, {  464,  433 }, {  495,  464 }, {  526,  495 },
-    {  557,  526 }, {  588,  557 }, {  619,  588 }, {  650,  619 },
-    {  681,  650 }, {  712,  681 }, {  743,  712 }, {  774,  743 },
-    {  805,  774 }, {  836,  805 }, {  867,  836 }, {  898,  867 },
-    {  929,  898 }, {  960,  929 }, {  960,  960 }, {   62,   31 },
-    {   93,   62 }, {  124,   93 }, {  186,  155 }, {  217,  186 },
-    {  248,  217 }, {  310,  279 }, {  341,  310 }, {  372,  341 },
-    {  434,  403 }, {  465,  434 }, {  496,  465 }, {  558,  527 },
-    {  589,  558 }, {  620,  589 }, {  682,  651 }, {  713,  682 },
-    {  744,  713 }, {  806,  775 }, {  837,  806 }, {  868,  837 },
-    {  930,  899 }, {  961,  930 }, {  992,  961 }, {   94,   63 },
-    {  125,   94 }, {  218,  187 }, {  249,  218 }, {  342,  311 },
-    {  373,  342 }, {  466,  435 }, {  497,  466 }, {  590,  559 },
-    {  621,  590 }, {  714,  683 }, {  745,  714 }, {  838,  807 },
-    {  869,  838 }, {  962,  931 }, {  993,  962 }, {  126,   95 },
-    {  250,  219 }, {  374,  343 }, {  498,  467 }, {  622,  591 },
-    {  746,  715 }, {  870,  839 }, {  994,  963 }, {  155,  124 },
-    {  279,  248 }, {  403,  372 }, {  527,  496 }, {  651,  620 },
-    {  775,  744 }, {  899,  868 }, {  156,  125 }, {  187,  156 },
-    {  280,  249 }, {  311,  280 }, {  404,  373 }, {  435,  404 },
-    {  528,  497 }, {  559,  528 }, {  652,  621 }, {  683,  652 },
-    {  776,  745 }, {  807,  776 }, {  900,  869 }, {  931,  900 },
-    {  157,  126 }, {  188,  157 }, {  219,  188 }, {  281,  250 },
-    {  312,  281 }, {  343,  312 }, {  405,  374 }, {  436,  405 },
-    {  467,  436 }, {  529,  498 }, {  560,  529 }, {  591,  560 },
-    {  653,  622 }, {  684,  653 }, {  715,  684 }, {  777,  746 },
-    {  808,  777 }, {  839,  808 }, {  901,  870 }, {  932,  901 },
-    {  963,  932 }, {  158,  127 }, {  189,  158 }, {  220,  189 },
-    {  251,  220 }, {  282,  251 }, {  313,  282 }, {  344,  313 },
-    {  375,  344 }, {  406,  375 }, {  437,  406 }, {  468,  437 },
-    {  499,  468 }, {  530,  499 }, {  561,  530 }, {  592,  561 },
-    {  623,  592 }, {  654,  623 }, {  685,  654 }, {  716,  685 },
-    {  747,  716 }, {  778,  747 }, {  809,  778 }, {  840,  809 },
-    {  871,  840 }, {  902,  871 }, {  933,  902 }, {  964,  933 },
-    {  995,  964 }, {  190,  159 }, {  221,  190 }, {  252,  221 },
-    {  314,  283 }, {  345,  314 }, {  376,  345 }, {  438,  407 },
-    {  469,  438 }, {  500,  469 }, {  562,  531 }, {  593,  562 },
-    {  624,  593 }, {  686,  655 }, {  717,  686 }, {  748,  717 },
-    {  810,  779 }, {  841,  810 }, {  872,  841 }, {  934,  903 },
-    {  965,  934 }, {  996,  965 }, {  222,  191 }, {  253,  222 },
-    {  346,  315 }, {  377,  346 }, {  470,  439 }, {  501,  470 },
-    {  594,  563 }, {  625,  594 }, {  718,  687 }, {  749,  718 },
-    {  842,  811 }, {  873,  842 }, {  966,  935 }, {  997,  966 },
-    {  254,  223 }, {  378,  347 }, {  502,  471 }, {  626,  595 },
-    {  750,  719 }, {  874,  843 }, {  998,  967 }, {  283,  252 },
-    {  407,  376 }, {  531,  500 }, {  655,  624 }, {  779,  748 },
-    {  903,  872 }, {  284,  253 }, {  315,  284 }, {  408,  377 },
-    {  439,  408 }, {  532,  501 }, {  563,  532 }, {  656,  625 },
-    {  687,  656 }, {  780,  749 }, {  811,  780 }, {  904,  873 },
-    {  935,  904 }, {  285,  254 }, {  316,  285 }, {  347,  316 },
-    {  409,  378 }, {  440,  409 }, {  471,  440 }, {  533,  502 },
-    {  564,  533 }, {  595,  564 }, {  657,  626 }, {  688,  657 },
-    {  719,  688 }, {  781,  750 }, {  812,  781 }, {  843,  812 },
-    {  905,  874 }, {  936,  905 }, {  967,  936 }, {  286,  255 },
-    {  317,  286 }, {  348,  317 }, {  379,  348 }, {  410,  379 },
-    {  441,  410 }, {  472,  441 }, {  503,  472 }, {  534,  503 },
-    {  565,  534 }, {  596,  565 }, {  627,  596 }, {  658,  627 },
-    {  689,  658 }, {  720,  689 }, {  751,  720 }, {  782,  751 },
-    {  813,  782 }, {  844,  813 }, {  875,  844 }, {  906,  875 },
-    {  937,  906 }, {  968,  937 }, {  999,  968 }, {  318,  287 },
-    {  349,  318 }, {  380,  349 }, {  442,  411 }, {  473,  442 },
-    {  504,  473 }, {  566,  535 }, {  597,  566 }, {  628,  597 },
-    {  690,  659 }, {  721,  690 }, {  752,  721 }, {  814,  783 },
-    {  845,  814 }, {  876,  845 }, {  938,  907 }, {  969,  938 },
-    { 1000,  969 }, {  350,  319 }, {  381,  350 }, {  474,  443 },
-    {  505,  474 }, {  598,  567 }, {  629,  598 }, {  722,  691 },
-    {  753,  722 }, {  846,  815 }, {  877,  846 }, {  970,  939 },
-    { 1001,  970 }, {  382,  351 }, {  506,  475 }, {  630,  599 },
-    {  754,  723 }, {  878,  847 }, { 1002,  971 }, {  411,  380 },
-    {  535,  504 }, {  659,  628 }, {  783,  752 }, {  907,  876 },
-    {  412,  381 }, {  443,  412 }, {  536,  505 }, {  567,  536 },
-    {  660,  629 }, {  691,  660 }, {  784,  753 }, {  815,  784 },
-    {  908,  877 }, {  939,  908 }, {  413,  382 }, {  444,  413 },
-    {  475,  444 }, {  537,  506 }, {  568,  537 }, {  599,  568 },
-    {  661,  630 }, {  692,  661 }, {  723,  692 }, {  785,  754 },
-    {  816,  785 }, {  847,  816 }, {  909,  878 }, {  940,  909 },
-    {  971,  940 }, {  414,  383 }, {  445,  414 }, {  476,  445 },
-    {  507,  476 }, {  538,  507 }, {  569,  538 }, {  600,  569 },
-    {  631,  600 }, {  662,  631 }, {  693,  662 }, {  724,  693 },
-    {  755,  724 }, {  786,  755 }, {  817,  786 }, {  848,  817 },
-    {  879,  848 }, {  910,  879 }, {  941,  910 }, {  972,  941 },
-    { 1003,  972 }, {  446,  415 }, {  477,  446 }, {  508,  477 },
-    {  570,  539 }, {  601,  570 }, {  632,  601 }, {  694,  663 },
-    {  725,  694 }, {  756,  725 }, {  818,  787 }, {  849,  818 },
-    {  880,  849 }, {  942,  911 }, {  973,  942 }, { 1004,  973 },
-    {  478,  447 }, {  509,  478 }, {  602,  571 }, {  633,  602 },
-    {  726,  695 }, {  757,  726 }, {  850,  819 }, {  881,  850 },
-    {  974,  943 }, { 1005,  974 }, {  510,  479 }, {  634,  603 },
-    {  758,  727 }, {  882,  851 }, { 1006,  975 }, {  539,  508 },
-    {  663,  632 }, {  787,  756 }, {  911,  880 }, {  540,  509 },
-    {  571,  540 }, {  664,  633 }, {  695,  664 }, {  788,  757 },
-    {  819,  788 }, {  912,  881 }, {  943,  912 }, {  541,  510 },
-    {  572,  541 }, {  603,  572 }, {  665,  634 }, {  696,  665 },
-    {  727,  696 }, {  789,  758 }, {  820,  789 }, {  851,  820 },
-    {  913,  882 }, {  944,  913 }, {  975,  944 }, {  542,  511 },
-    {  573,  542 }, {  604,  573 }, {  635,  604 }, {  666,  635 },
-    {  697,  666 }, {  728,  697 }, {  759,  728 }, {  790,  759 },
-    {  821,  790 }, {  852,  821 }, {  883,  852 }, {  914,  883 },
-    {  945,  914 }, {  976,  945 }, { 1007,  976 }, {  574,  543 },
-    {  605,  574 }, {  636,  605 }, {  698,  667 }, {  729,  698 },
-    {  760,  729 }, {  822,  791 }, {  853,  822 }, {  884,  853 },
-    {  946,  915 }, {  977,  946 }, { 1008,  977 }, {  606,  575 },
-    {  637,  606 }, {  730,  699 }, {  761,  730 }, {  854,  823 },
-    {  885,  854 }, {  978,  947 }, { 1009,  978 }, {  638,  607 },
-    {  762,  731 }, {  886,  855 }, { 1010,  979 }, {  667,  636 },
-    {  791,  760 }, {  915,  884 }, {  668,  637 }, {  699,  668 },
-    {  792,  761 }, {  823,  792 }, {  916,  885 }, {  947,  916 },
-    {  669,  638 }, {  700,  669 }, {  731,  700 }, {  793,  762 },
-    {  824,  793 }, {  855,  824 }, {  917,  886 }, {  948,  917 },
-    {  979,  948 }, {  670,  639 }, {  701,  670 }, {  732,  701 },
-    {  763,  732 }, {  794,  763 }, {  825,  794 }, {  856,  825 },
-    {  887,  856 }, {  918,  887 }, {  949,  918 }, {  980,  949 },
-    { 1011,  980 }, {  702,  671 }, {  733,  702 }, {  764,  733 },
-    {  826,  795 }, {  857,  826 }, {  888,  857 }, {  950,  919 },
-    {  981,  950 }, { 1012,  981 }, {  734,  703 }, {  765,  734 },
-    {  858,  827 }, {  889,  858 }, {  982,  951 }, { 1013,  982 },
-    {  766,  735 }, {  890,  859 }, { 1014,  983 }, {  795,  764 },
-    {  919,  888 }, {  796,  765 }, {  827,  796 }, {  920,  889 },
-    {  951,  920 }, {  797,  766 }, {  828,  797 }, {  859,  828 },
-    {  921,  890 }, {  952,  921 }, {  983,  952 }, {  798,  767 },
-    {  829,  798 }, {  860,  829 }, {  891,  860 }, {  922,  891 },
-    {  953,  922 }, {  984,  953 }, { 1015,  984 }, {  830,  799 },
-    {  861,  830 }, {  892,  861 }, {  954,  923 }, {  985,  954 },
-    { 1016,  985 }, {  862,  831 }, {  893,  862 }, {  986,  955 },
-    { 1017,  986 }, {  894,  863 }, { 1018,  987 }, {  923,  892 },
-    {  924,  893 }, {  955,  924 }, {  925,  894 }, {  956,  925 },
-    {  987,  956 }, {  926,  895 }, {  957,  926 }, {  988,  957 },
-    { 1019,  988 }, {  958,  927 }, {  989,  958 }, { 1020,  989 },
-    {  990,  959 }, { 1021,  990 }, { 1022,  991 }, {    0,    0 },
-};
-
-const int16_t (*ff_vp9_scans_nb[5][4])[2] = {
-    {
-        ff_vp9_default_scan_4x4_nb, ff_vp9_col_scan_4x4_nb,
-        ff_vp9_row_scan_4x4_nb, ff_vp9_default_scan_4x4_nb
-    }, {
-        ff_vp9_default_scan_8x8_nb, ff_vp9_col_scan_8x8_nb,
-        ff_vp9_row_scan_8x8_nb, ff_vp9_default_scan_8x8_nb
-    }, {
-        ff_vp9_default_scan_16x16_nb, ff_vp9_col_scan_16x16_nb,
-        ff_vp9_row_scan_16x16_nb, ff_vp9_default_scan_16x16_nb
-    }, {
-        ff_vp9_default_scan_32x32_nb, ff_vp9_default_scan_32x32_nb,
-        ff_vp9_default_scan_32x32_nb, ff_vp9_default_scan_32x32_nb
-    }, { // lossless
-        ff_vp9_default_scan_4x4_nb, ff_vp9_default_scan_4x4_nb,
-        ff_vp9_default_scan_4x4_nb, ff_vp9_default_scan_4x4_nb
-    }
-};
-
-const uint8_t ff_vp9_model_pareto8[256][8] = {
-    {   6,  86, 128,  11,  87,  42,  91,  52 },
-    {   3,  86, 128,   6,  86,  23,  88,  29 },
-    {   6,  86, 128,  11,  87,  42,  91,  52 },
-    {   9,  86, 129,  17,  88,  61,  94,  76 },
-    {  12,  86, 129,  22,  88,  77,  97,  93 },
-    {  15,  87, 129,  28,  89,  93, 100, 110 },
-    {  17,  87, 129,  33,  90, 105, 103, 123 },
-    {  20,  88, 130,  38,  91, 118, 106, 136 },
-    {  23,  88, 130,  43,  91, 128, 108, 146 },
-    {  26,  89, 131,  48,  92, 139, 111, 156 },
-    {  28,  89, 131,  53,  93, 147, 114, 163 },
-    {  31,  90, 131,  58,  94, 156, 117, 171 },
-    {  34,  90, 131,  62,  94, 163, 119, 177 },
-    {  37,  90, 132,  66,  95, 171, 122, 184 },
-    {  39,  90, 132,  70,  96, 177, 124, 189 },
-    {  42,  91, 132,  75,  97, 183, 127, 194 },
-    {  44,  91, 132,  79,  97, 188, 129, 198 },
-    {  47,  92, 133,  83,  98, 193, 132, 202 },
-    {  49,  92, 133,  86,  99, 197, 134, 205 },
-    {  52,  93, 133,  90, 100, 201, 137, 208 },
-    {  54,  93, 133,  94, 100, 204, 139, 211 },
-    {  57,  94, 134,  98, 101, 208, 142, 214 },
-    {  59,  94, 134, 101, 102, 211, 144, 216 },
-    {  62,  94, 135, 105, 103, 214, 146, 218 },
-    {  64,  94, 135, 108, 103, 216, 148, 220 },
-    {  66,  95, 135, 111, 104, 219, 151, 222 },
-    {  68,  95, 135, 114, 105, 221, 153, 223 },
-    {  71,  96, 136, 117, 106, 224, 155, 225 },
-    {  73,  96, 136, 120, 106, 225, 157, 226 },
-    {  76,  97, 136, 123, 107, 227, 159, 228 },
-    {  78,  97, 136, 126, 108, 229, 160, 229 },
-    {  80,  98, 137, 129, 109, 231, 162, 231 },
-    {  82,  98, 137, 131, 109, 232, 164, 232 },
-    {  84,  98, 138, 134, 110, 234, 166, 233 },
-    {  86,  98, 138, 137, 111, 235, 168, 234 },
-    {  89,  99, 138, 140, 112, 236, 170, 235 },
-    {  91,  99, 138, 142, 112, 237, 171, 235 },
-    {  93, 100, 139, 145, 113, 238, 173, 236 },
-    {  95, 100, 139, 147, 114, 239, 174, 237 },
-    {  97, 101, 140, 149, 115, 240, 176, 238 },
-    {  99, 101, 140, 151, 115, 241, 177, 238 },
-    { 101, 102, 140, 154, 116, 242, 179, 239 },
-    { 103, 102, 140, 156, 117, 242, 180, 239 },
-    { 105, 103, 141, 158, 118, 243, 182, 240 },
-    { 107, 103, 141, 160, 118, 243, 183, 240 },
-    { 109, 104, 141, 162, 119, 244, 185, 241 },
-    { 111, 104, 141, 164, 119, 244, 186, 241 },
-    { 113, 104, 142, 166, 120, 245, 187, 242 },
-    { 114, 104, 142, 168, 121, 245, 188, 242 },
-    { 116, 105, 143, 170, 122, 246, 190, 243 },
-    { 118, 105, 143, 171, 122, 246, 191, 243 },
-    { 120, 106, 143, 173, 123, 247, 192, 244 },
-    { 121, 106, 143, 175, 124, 247, 193, 244 },
-    { 123, 107, 144, 177, 125, 248, 195, 244 },
-    { 125, 107, 144, 178, 125, 248, 196, 244 },
-    { 127, 108, 145, 180, 126, 249, 197, 245 },
-    { 128, 108, 145, 181, 127, 249, 198, 245 },
-    { 130, 109, 145, 183, 128, 249, 199, 245 },
-    { 132, 109, 145, 184, 128, 249, 200, 245 },
-    { 134, 110, 146, 186, 129, 250, 201, 246 },
-    { 135, 110, 146, 187, 130, 250, 202, 246 },
-    { 137, 111, 147, 189, 131, 251, 203, 246 },
-    { 138, 111, 147, 190, 131, 251, 204, 246 },
-    { 140, 112, 147, 192, 132, 251, 205, 247 },
-    { 141, 112, 147, 193, 132, 251, 206, 247 },
-    { 143, 113, 148, 194, 133, 251, 207, 247 },
-    { 144, 113, 148, 195, 134, 251, 207, 247 },
-    { 146, 114, 149, 197, 135, 252, 208, 248 },
-    { 147, 114, 149, 198, 135, 252, 209, 248 },
-    { 149, 115, 149, 199, 136, 252, 210, 248 },
-    { 150, 115, 149, 200, 137, 252, 210, 248 },
-    { 152, 115, 150, 201, 138, 252, 211, 248 },
-    { 153, 115, 150, 202, 138, 252, 212, 248 },
-    { 155, 116, 151, 204, 139, 253, 213, 249 },
-    { 156, 116, 151, 205, 139, 253, 213, 249 },
-    { 158, 117, 151, 206, 140, 253, 214, 249 },
-    { 159, 117, 151, 207, 141, 253, 215, 249 },
-    { 161, 118, 152, 208, 142, 253, 216, 249 },
-    { 162, 118, 152, 209, 142, 253, 216, 249 },
-    { 163, 119, 153, 210, 143, 253, 217, 249 },
-    { 164, 119, 153, 211, 143, 253, 217, 249 },
-    { 166, 120, 153, 212, 144, 254, 218, 250 },
-    { 167, 120, 153, 212, 145, 254, 219, 250 },
-    { 168, 121, 154, 213, 146, 254, 220, 250 },
-    { 169, 121, 154, 214, 146, 254, 220, 250 },
-    { 171, 122, 155, 215, 147, 254, 221, 250 },
-    { 172, 122, 155, 216, 147, 254, 221, 250 },
-    { 173, 123, 155, 217, 148, 254, 222, 250 },
-    { 174, 123, 155, 217, 149, 254, 222, 250 },
-    { 176, 124, 156, 218, 150, 254, 223, 250 },
-    { 177, 124, 156, 219, 150, 254, 223, 250 },
-    { 178, 125, 157, 220, 151, 254, 224, 251 },
-    { 179, 125, 157, 220, 151, 254, 224, 251 },
-    { 180, 126, 157, 221, 152, 254, 225, 251 },
-    { 181, 126, 157, 221, 152, 254, 225, 251 },
-    { 183, 127, 158, 222, 153, 254, 226, 251 },
-    { 184, 127, 158, 223, 154, 254, 226, 251 },
-    { 185, 128, 159, 224, 155, 255, 227, 251 },
-    { 186, 128, 159, 224, 155, 255, 227, 251 },
-    { 187, 129, 160, 225, 156, 255, 228, 251 },
-    { 188, 130, 160, 225, 156, 255, 228, 251 },
-    { 189, 131, 160, 226, 157, 255, 228, 251 },
-    { 190, 131, 160, 226, 158, 255, 228, 251 },
-    { 191, 132, 161, 227, 159, 255, 229, 251 },
-    { 192, 132, 161, 227, 159, 255, 229, 251 },
-    { 193, 133, 162, 228, 160, 255, 230, 252 },
-    { 194, 133, 162, 229, 160, 255, 230, 252 },
-    { 195, 134, 163, 230, 161, 255, 231, 252 },
-    { 196, 134, 163, 230, 161, 255, 231, 252 },
-    { 197, 135, 163, 231, 162, 255, 231, 252 },
-    { 198, 135, 163, 231, 162, 255, 231, 252 },
-    { 199, 136, 164, 232, 163, 255, 232, 252 },
-    { 200, 136, 164, 232, 164, 255, 232, 252 },
-    { 201, 137, 165, 233, 165, 255, 233, 252 },
-    { 201, 137, 165, 233, 165, 255, 233, 252 },
-    { 202, 138, 166, 233, 166, 255, 233, 252 },
-    { 203, 138, 166, 233, 166, 255, 233, 252 },
-    { 204, 139, 166, 234, 167, 255, 234, 252 },
-    { 205, 139, 166, 234, 167, 255, 234, 252 },
-    { 206, 140, 167, 235, 168, 255, 235, 252 },
-    { 206, 140, 167, 235, 168, 255, 235, 252 },
-    { 207, 141, 168, 236, 169, 255, 235, 252 },
-    { 208, 141, 168, 236, 170, 255, 235, 252 },
-    { 209, 142, 169, 237, 171, 255, 236, 252 },
-    { 209, 143, 169, 237, 171, 255, 236, 252 },
-    { 210, 144, 169, 237, 172, 255, 236, 252 },
-    { 211, 144, 169, 237, 172, 255, 236, 252 },
-    { 212, 145, 170, 238, 173, 255, 237, 252 },
-    { 213, 145, 170, 238, 173, 255, 237, 252 },
-    { 214, 146, 171, 239, 174, 255, 237, 253 },
-    { 214, 146, 171, 239, 174, 255, 237, 253 },
-    { 215, 147, 172, 240, 175, 255, 238, 253 },
-    { 215, 147, 172, 240, 175, 255, 238, 253 },
-    { 216, 148, 173, 240, 176, 255, 238, 253 },
-    { 217, 148, 173, 240, 176, 255, 238, 253 },
-    { 218, 149, 173, 241, 177, 255, 239, 253 },
-    { 218, 149, 173, 241, 178, 255, 239, 253 },
-    { 219, 150, 174, 241, 179, 255, 239, 253 },
-    { 219, 151, 174, 241, 179, 255, 239, 253 },
-    { 220, 152, 175, 242, 180, 255, 240, 253 },
-    { 221, 152, 175, 242, 180, 255, 240, 253 },
-    { 222, 153, 176, 242, 181, 255, 240, 253 },
-    { 222, 153, 176, 242, 181, 255, 240, 253 },
-    { 223, 154, 177, 243, 182, 255, 240, 253 },
-    { 223, 154, 177, 243, 182, 255, 240, 253 },
-    { 224, 155, 178, 244, 183, 255, 241, 253 },
-    { 224, 155, 178, 244, 183, 255, 241, 253 },
-    { 225, 156, 178, 244, 184, 255, 241, 253 },
-    { 225, 157, 178, 244, 184, 255, 241, 253 },
-    { 226, 158, 179, 244, 185, 255, 242, 253 },
-    { 227, 158, 179, 244, 185, 255, 242, 253 },
-    { 228, 159, 180, 245, 186, 255, 242, 253 },
-    { 228, 159, 180, 245, 186, 255, 242, 253 },
-    { 229, 160, 181, 245, 187, 255, 242, 253 },
-    { 229, 160, 181, 245, 187, 255, 242, 253 },
-    { 230, 161, 182, 246, 188, 255, 243, 253 },
-    { 230, 162, 182, 246, 188, 255, 243, 253 },
-    { 231, 163, 183, 246, 189, 255, 243, 253 },
-    { 231, 163, 183, 246, 189, 255, 243, 253 },
-    { 232, 164, 184, 247, 190, 255, 243, 253 },
-    { 232, 164, 184, 247, 190, 255, 243, 253 },
-    { 233, 165, 185, 247, 191, 255, 244, 253 },
-    { 233, 165, 185, 247, 191, 255, 244, 253 },
-    { 234, 166, 185, 247, 192, 255, 244, 253 },
-    { 234, 167, 185, 247, 192, 255, 244, 253 },
-    { 235, 168, 186, 248, 193, 255, 244, 253 },
-    { 235, 168, 186, 248, 193, 255, 244, 253 },
-    { 236, 169, 187, 248, 194, 255, 244, 253 },
-    { 236, 169, 187, 248, 194, 255, 244, 253 },
-    { 236, 170, 188, 248, 195, 255, 245, 253 },
-    { 236, 170, 188, 248, 195, 255, 245, 253 },
-    { 237, 171, 189, 249, 196, 255, 245, 254 },
-    { 237, 172, 189, 249, 196, 255, 245, 254 },
-    { 238, 173, 190, 249, 197, 255, 245, 254 },
-    { 238, 173, 190, 249, 197, 255, 245, 254 },
-    { 239, 174, 191, 249, 198, 255, 245, 254 },
-    { 239, 174, 191, 249, 198, 255, 245, 254 },
-    { 240, 175, 192, 249, 199, 255, 246, 254 },
-    { 240, 176, 192, 249, 199, 255, 246, 254 },
-    { 240, 177, 193, 250, 200, 255, 246, 254 },
-    { 240, 177, 193, 250, 200, 255, 246, 254 },
-    { 241, 178, 194, 250, 201, 255, 246, 254 },
-    { 241, 178, 194, 250, 201, 255, 246, 254 },
-    { 242, 179, 195, 250, 202, 255, 246, 254 },
-    { 242, 180, 195, 250, 202, 255, 246, 254 },
-    { 242, 181, 196, 250, 203, 255, 247, 254 },
-    { 242, 181, 196, 250, 203, 255, 247, 254 },
-    { 243, 182, 197, 251, 204, 255, 247, 254 },
-    { 243, 183, 197, 251, 204, 255, 247, 254 },
-    { 244, 184, 198, 251, 205, 255, 247, 254 },
-    { 244, 184, 198, 251, 205, 255, 247, 254 },
-    { 244, 185, 199, 251, 206, 255, 247, 254 },
-    { 244, 185, 199, 251, 206, 255, 247, 254 },
-    { 245, 186, 200, 251, 207, 255, 247, 254 },
-    { 245, 187, 200, 251, 207, 255, 247, 254 },
-    { 246, 188, 201, 252, 207, 255, 248, 254 },
-    { 246, 188, 201, 252, 207, 255, 248, 254 },
-    { 246, 189, 202, 252, 208, 255, 248, 254 },
-    { 246, 190, 202, 252, 208, 255, 248, 254 },
-    { 247, 191, 203, 252, 209, 255, 248, 254 },
-    { 247, 191, 203, 252, 209, 255, 248, 254 },
-    { 247, 192, 204, 252, 210, 255, 248, 254 },
-    { 247, 193, 204, 252, 210, 255, 248, 254 },
-    { 248, 194, 205, 252, 211, 255, 248, 254 },
-    { 248, 194, 205, 252, 211, 255, 248, 254 },
-    { 248, 195, 206, 252, 212, 255, 249, 254 },
-    { 248, 196, 206, 252, 212, 255, 249, 254 },
-    { 249, 197, 207, 253, 213, 255, 249, 254 },
-    { 249, 197, 207, 253, 213, 255, 249, 254 },
-    { 249, 198, 208, 253, 214, 255, 249, 254 },
-    { 249, 199, 209, 253, 214, 255, 249, 254 },
-    { 250, 200, 210, 253, 215, 255, 249, 254 },
-    { 250, 200, 210, 253, 215, 255, 249, 254 },
-    { 250, 201, 211, 253, 215, 255, 249, 254 },
-    { 250, 202, 211, 253, 215, 255, 249, 254 },
-    { 250, 203, 212, 253, 216, 255, 249, 254 },
-    { 250, 203, 212, 253, 216, 255, 249, 254 },
-    { 251, 204, 213, 253, 217, 255, 250, 254 },
-    { 251, 205, 213, 253, 217, 255, 250, 254 },
-    { 251, 206, 214, 254, 218, 255, 250, 254 },
-    { 251, 206, 215, 254, 218, 255, 250, 254 },
-    { 252, 207, 216, 254, 219, 255, 250, 254 },
-    { 252, 208, 216, 254, 219, 255, 250, 254 },
-    { 252, 209, 217, 254, 220, 255, 250, 254 },
-    { 252, 210, 217, 254, 220, 255, 250, 254 },
-    { 252, 211, 218, 254, 221, 255, 250, 254 },
-    { 252, 212, 218, 254, 221, 255, 250, 254 },
-    { 253, 213, 219, 254, 222, 255, 250, 254 },
-    { 253, 213, 220, 254, 222, 255, 250, 254 },
-    { 253, 214, 221, 254, 223, 255, 250, 254 },
-    { 253, 215, 221, 254, 223, 255, 250, 254 },
-    { 253, 216, 222, 254, 224, 255, 251, 254 },
-    { 253, 217, 223, 254, 224, 255, 251, 254 },
-    { 253, 218, 224, 254, 225, 255, 251, 254 },
-    { 253, 219, 224, 254, 225, 255, 251, 254 },
-    { 254, 220, 225, 254, 225, 255, 251, 254 },
-    { 254, 221, 226, 254, 225, 255, 251, 254 },
-    { 254, 222, 227, 255, 226, 255, 251, 254 },
-    { 254, 223, 227, 255, 226, 255, 251, 254 },
-    { 254, 224, 228, 255, 227, 255, 251, 254 },
-    { 254, 225, 229, 255, 227, 255, 251, 254 },
-    { 254, 226, 230, 255, 228, 255, 251, 254 },
-    { 254, 227, 230, 255, 229, 255, 251, 254 },
-    { 255, 228, 231, 255, 230, 255, 251, 254 },
-    { 255, 229, 232, 255, 230, 255, 251, 254 },
-    { 255, 230, 233, 255, 231, 255, 252, 254 },
-    { 255, 231, 234, 255, 231, 255, 252, 254 },
-    { 255, 232, 235, 255, 232, 255, 252, 254 },
-    { 255, 233, 236, 255, 232, 255, 252, 254 },
-    { 255, 235, 237, 255, 233, 255, 252, 254 },
-    { 255, 236, 238, 255, 234, 255, 252, 254 },
-    { 255, 238, 240, 255, 235, 255, 252, 255 },
-    { 255, 239, 241, 255, 235, 255, 252, 254 },
-    { 255, 241, 243, 255, 236, 255, 252, 254 },
-    { 255, 243, 245, 255, 237, 255, 252, 254 },
-    { 255, 246, 247, 255, 239, 255, 253, 255 },
-};
-
-const ProbContext ff_vp9_default_probs = {
-    { /* y_mode */
-        {  65,  32,  18, 144, 162, 194,  41,  51,  98 } /* bsize < 8x8 */,
-        { 132,  68,  18, 165, 217, 196,  45,  40,  78 } /* bsize < 16x16 */,
-        { 173,  80,  19, 176, 240, 193,  64,  35,  46 } /* bsize < 32x32 */,
-        { 221, 135,  38, 194, 248, 121,  96,  85,  29 } /* bsize >= 32x32 */
-    }, { /* uv_mode */
-        {  48,  12, 154, 155, 139,  90,  34, 117, 119 } /* y = v */,
-        {  67,   6,  25, 204, 243, 158,  13,  21,  96 } /* y = h */,
-        { 120,   7,  76, 176, 208, 126,  28,  54, 103 } /* y = dc */,
-        {  97,   5,  44, 131, 176, 139,  48,  68,  97 } /* y = d45 */,
-        {  83,   5,  42, 156, 111, 152,  26,  49, 152 } /* y = d135 */,
-        {  80,   5,  58, 178,  74,  83,  33,  62, 145 } /* y = d117 */,
-        {  86,   5,  32, 154, 192, 168,  14,  22, 163 } /* y = d153 */,
-        {  77,   7,  64, 116, 132, 122,  37, 126, 120 } /* y = d63 */,
-        {  85,   5,  32, 156, 216, 148,  19,  29,  73 } /* y = d27 */,
-        { 101,  21, 107, 181, 192, 103,  19,  67, 125 } /* y = tm */
-    }, { /* filter */
-        { 235, 162, },
-        {  36, 255, },
-        {  34,   3, },
-        { 149, 144, },
-    }, { /* mv_mode */
-        {  2, 173,  34 },  // 0 = both zero mv
-        {  7, 145,  85 },  // 1 = one zero mv + one a predicted mv
-        {  7, 166,  63 },  // 2 = two predicted mvs
-        {  7,  94,  66 },  // 3 = one predicted/zero and one new mv
-        {  8,  64,  46 },  // 4 = two new mvs
-        { 17,  81,  31 },  // 5 = one intra neighbor + x
-        { 25,  29,  30 },  // 6 = two intra neighbors
-    }, { /* intra */
-        9, 102, 187, 225
-    }, { /* comp */
-        239, 183, 119,  96,  41
-    }, { /* single_ref */
-        {  33,  16 },
-        {  77,  74 },
-        { 142, 142 },
-        { 172, 170 },
-        { 238, 247 }
-    }, { /* comp_ref */
-        50, 126, 123, 221, 226
-    }, { /* tx32p */
-        { 3, 136, 37, },
-        { 5,  52, 13, },
-    }, { /* tx16p */
-        { 20, 152, },
-        { 15, 101, },
-    }, { /* tx8p */
-        100, 66
-    }, { /* skip */
-        192, 128, 64
-    }, { /* mv_joint */
-        32, 64, 96
-    }, {
-        { /* mv vertical component */
-            128, /* sign */
-            { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, /* class */
-            216, /* class0 */
-            { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, /* bits */
-            { /* class0_fp */
-                { 128, 128, 64 },
-                {  96, 112, 64 }
-            },
-            { 64, 96, 64 }, /* fp */
-            160, /* class0_hp bit */
-            128, /* hp */
-        }, { /* mv horizontal component */
-            128, /* sign */
-            { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, /* class */
-            208, /* class0 */
-            { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, /* bits */
-            { /* class0_fp */
-                { 128, 128, 64 },
-                {  96, 112, 64 }
-            },
-            { 64, 96, 64 }, /* fp */
-            160, /* class0_hp bit */
-            128, /* hp */
-        }
-    }, { /* partition */
-        { /* 64x64 -> 32x32 */
-            { 222,  34,  30 } /* a/l both not split */,
-            {  72,  16,  44 } /* a split, l not split */,
-            {  58,  32,  12 } /* l split, a not split */,
-            {  10,   7,   6 } /* a/l both split */,
-        }, { /* 32x32 -> 16x16 */
-            { 177,  58,  59 } /* a/l both not split */,
-            {  68,  26,  63 } /* a split, l not split */,
-            {  52,  79,  25 } /* l split, a not split */,
-            {  17,  14,  12 } /* a/l both split */,
-        }, { /* 16x16 -> 8x8 */
-            { 174,  73,  87 } /* a/l both not split */,
-            {  92,  41,  83 } /* a split, l not split */,
-            {  82,  99,  50 } /* l split, a not split */,
-            {  53,  39,  39 } /* a/l both split */,
-        }, { /* 8x8 -> 4x4 */
-            { 199, 122, 141 } /* a/l both not split */,
-            { 147,  63, 159 } /* a split, l not split */,
-            { 148, 133, 118 } /* l split, a not split */,
-            { 121, 104, 114 } /* a/l both split */,
-        }
-    },
-};
-
-const uint8_t ff_vp9_default_coef_probs[4][2][2][6][6][3] = {
-    { /* tx = 4x4 */
-        { /* block Type 0 */
-            { /* Intra */
-                { /* Coeff Band 0 */
-                    { 195,  29, 183 },
-                    {  84,  49, 136 },
-                    {   8,  42,  71 }
-                }, { /* Coeff Band 1 */
-                    {  31, 107, 169 },
-                    {  35,  99, 159 },
-                    {  17,  82, 140 },
-                    {   8,  66, 114 },
-                    {   2,  44,  76 },
-                    {   1,  19,  32 }
-                }, { /* Coeff Band 2 */
-                    {  40, 132, 201 },
-                    {  29, 114, 187 },
-                    {  13,  91, 157 },
-                    {   7,  75, 127 },
-                    {   3,  58,  95 },
-                    {   1,  28,  47 }
-                }, { /* Coeff Band 3 */
-                    {  69, 142, 221 },
-                    {  42, 122, 201 },
-                    {  15,  91, 159 },
-                    {   6,  67, 121 },
-                    {   1,  42,  77 },
-                    {   1,  17,  31 }
-                }, { /* Coeff Band 4 */
-                    { 102, 148, 228 },
-                    {  67, 117, 204 },
-                    {  17,  82, 154 },
-                    {   6,  59, 114 },
-                    {   2,  39,  75 },
-                    {   1,  15,  29 }
-                }, { /* Coeff Band 5 */
-                    { 156,  57, 233 },
-                    { 119,  57, 212 },
-                    {  58,  48, 163 },
-                    {  29,  40, 124 },
-                    {  12,  30,  81 },
-                    {   3,  12,  31 }
-                }
-            }, { /* Inter */
-                { /* Coeff Band 0 */
-                    { 191, 107, 226 },
-                    { 124, 117, 204 },
-                    {  25,  99, 155 }
-                }, { /* Coeff Band 1 */
-                    {  29, 148, 210 },
-                    {  37, 126, 194 },
-                    {   8,  93, 157 },
-                    {   2,  68, 118 },
-                    {   1,  39,  69 },
-                    {   1,  17,  33 }
-                }, { /* Coeff Band 2 */
-                    {  41, 151, 213 },
-                    {  27, 123, 193 },
-                    {   3,  82, 144 },
-                    {   1,  58, 105 },
-                    {   1,  32,  60 },
-                    {   1,  13,  26 }
-                }, { /* Coeff Band 3 */
-                    {  59, 159, 220 },
-                    {  23, 126, 198 },
-                    {   4,  88, 151 },
-                    {   1,  66, 114 },
-                    {   1,  38,  71 },
-                    {   1,  18,  34 }
-                }, { /* Coeff Band 4 */
-                    { 114, 136, 232 },
-                    {  51, 114, 207 },
-                    {  11,  83, 155 },
-                    {   3,  56, 105 },
-                    {   1,  33,  65 },
-                    {   1,  17,  34 }
-                }, { /* Coeff Band 5 */
-                    { 149,  65, 234 },
-                    { 121,  57, 215 },
-                    {  61,  49, 166 },
-                    {  28,  36, 114 },
-                    {  12,  25,  76 },
-                    {   3,  16,  42 }
-                }
-            }
-        }, { /* block Type 1 */
-            { /* Intra */
-                { /* Coeff Band 0 */
-                    { 214,  49, 220 },
-                    { 132,  63, 188 },
-                    {  42,  65, 137 }
-                }, { /* Coeff Band 1 */
-                    {  85, 137, 221 },
-                    { 104, 131, 216 },
-                    {  49, 111, 192 },
-                    {  21,  87, 155 },
-                    {   2,  49,  87 },
-                    {   1,  16,  28 }
-                }, { /* Coeff Band 2 */
-                    {  89, 163, 230 },
-                    {  90, 137, 220 },
-                    {  29, 100, 183 },
-                    {  10,  70, 135 },
-                    {   2,  42,  81 },
-                    {   1,  17,  33 }
-                }, { /* Coeff Band 3 */
-                    { 108, 167, 237 },
-                    {  55, 133, 222 },
-                    {  15,  97, 179 },
-                    {   4,  72, 135 },
-                    {   1,  45,  85 },
-                    {   1,  19,  38 }
-                }, { /* Coeff Band 4 */
-                    { 124, 146, 240 },
-                    {  66, 124, 224 },
-                    {  17,  88, 175 },
-                    {   4,  58, 122 },
-                    {   1,  36,  75 },
-                    {   1,  18,  37 }
-                }, { /* Coeff Band 5 */
-                    { 141,  79, 241 },
-                    { 126,  70, 227 },
-                    {  66,  58, 182 },
-                    {  30,  44, 136 },
-                    {  12,  34,  96 },
-                    {   2,  20,  47 }
-                }
-            }, { /* Inter */
-                { /* Coeff Band 0 */
-                    { 229,  99, 249 },
-                    { 143, 111, 235 },
-                    {  46, 109, 192 }
-                }, { /* Coeff Band 1 */
-                    {  82, 158, 236 },
-                    {  94, 146, 224 },
-                    {  25, 117, 191 },
-                    {   9,  87, 149 },
-                    {   3,  56,  99 },
-                    {   1,  33,  57 }
-                }, { /* Coeff Band 2 */
-                    {  83, 167, 237 },
-                    {  68, 145, 222 },
-                    {  10, 103, 177 },
-                    {   2,  72, 131 },
-                    {   1,  41,  79 },
-                    {   1,  20,  39 }
-                }, { /* Coeff Band 3 */
-                    {  99, 167, 239 },
-                    {  47, 141, 224 },
-                    {  10, 104, 178 },
-                    {   2,  73, 133 },
-                    {   1,  44,  85 },
-                    {   1,  22,  47 }
-                }, { /* Coeff Band 4 */
-                    { 127, 145, 243 },
-                    {  71, 129, 228 },
-                    {  17,  93, 177 },
-                    {   3,  61, 124 },
-                    {   1,  41,  84 },
-                    {   1,  21,  52 }
-                }, { /* Coeff Band 5 */
-                    { 157,  78, 244 },
-                    { 140,  72, 231 },
-                    {  69,  58, 184 },
-                    {  31,  44, 137 },
-                    {  14,  38, 105 },
-                    {   8,  23,  61 }
-                }
-            }
-        }
-    }, { /* tx = 8x8 */
-        { /* block Type 0 */
-            { /* Intra */
-                { /* Coeff Band 0 */
-                    { 125,  34, 187 },
-                    {  52,  41, 133 },
-                    {   6,  31,  56 }
-                }, { /* Coeff Band 1 */
-                    {  37, 109, 153 },
-                    {  51, 102, 147 },
-                    {  23,  87, 128 },
-                    {   8,  67, 101 },
-                    {   1,  41,  63 },
-                    {   1,  19,  29 }
-                }, { /* Coeff Band 2 */
-                    {  31, 154, 185 },
-                    {  17, 127, 175 },
-                    {   6,  96, 145 },
-                    {   2,  73, 114 },
-                    {   1,  51,  82 },
-                    {   1,  28,  45 }
-                }, { /* Coeff Band 3 */
-                    {  23, 163, 200 },
-                    {  10, 131, 185 },
-                    {   2,  93, 148 },
-                    {   1,  67, 111 },
-                    {   1,  41,  69 },
-                    {   1,  14,  24 }
-                }, { /* Coeff Band 4 */
-                    {  29, 176, 217 },
-                    {  12, 145, 201 },
-                    {   3, 101, 156 },
-                    {   1,  69, 111 },
-                    {   1,  39,  63 },
-                    {   1,  14,  23 }
-                }, { /* Coeff Band 5 */
-                    {  57, 192, 233 },
-                    {  25, 154, 215 },
-                    {   6, 109, 167 },
-                    {   3,  78, 118 },
-                    {   1,  48,  69 },
-                    {   1,  21,  29 }
-                }
-            }, { /* Inter */
-                { /* Coeff Band 0 */
-                    { 202, 105, 245 },
-                    { 108, 106, 216 },
-                    {  18,  90, 144 }
-                }, { /* Coeff Band 1 */
-                    {  33, 172, 219 },
-                    {  64, 149, 206 },
-                    {  14, 117, 177 },
-                    {   5,  90, 141 },
-                    {   2,  61,  95 },
-                    {   1,  37,  57 }
-                }, { /* Coeff Band 2 */
-                    {  33, 179, 220 },
-                    {  11, 140, 198 },
-                    {   1,  89, 148 },
-                    {   1,  60, 104 },
-                    {   1,  33,  57 },
-                    {   1,  12,  21 }
-                }, { /* Coeff Band 3 */
-                    {  30, 181, 221 },
-                    {   8, 141, 198 },
-                    {   1,  87, 145 },
-                    {   1,  58, 100 },
-                    {   1,  31,  55 },
-                    {   1,  12,  20 }
-                }, { /* Coeff Band 4 */
-                    {  32, 186, 224 },
-                    {   7, 142, 198 },
-                    {   1,  86, 143 },
-                    {   1,  58, 100 },
-                    {   1,  31,  55 },
-                    {   1,  12,  22 }
-                }, { /* Coeff Band 5 */
-                    {  57, 192, 227 },
-                    {  20, 143, 204 },
-                    {   3,  96, 154 },
-                    {   1,  68, 112 },
-                    {   1,  42,  69 },
-                    {   1,  19,  32 }
-                }
-            }
-        }, { /* block Type 1 */
-            { /* Intra */
-                { /* Coeff Band 0 */
-                    { 212,  35, 215 },
-                    { 113,  47, 169 },
-                    {  29,  48, 105 }
-                }, { /* Coeff Band 1 */
-                    {  74, 129, 203 },
-                    { 106, 120, 203 },
-                    {  49, 107, 178 },
-                    {  19,  84, 144 },
-                    {   4,  50,  84 },
-                    {   1,  15,  25 }
-                }, { /* Coeff Band 2 */
-                    {  71, 172, 217 },
-                    {  44, 141, 209 },
-                    {  15, 102, 173 },
-                    {   6,  76, 133 },
-                    {   2,  51,  89 },
-                    {   1,  24,  42 }
-                }, { /* Coeff Band 3 */
-                    {  64, 185, 231 },
-                    {  31, 148, 216 },
-                    {   8, 103, 175 },
-                    {   3,  74, 131 },
-                    {   1,  46,  81 },
-                    {   1,  18,  30 }
-                }, { /* Coeff Band 4 */
-                    {  65, 196, 235 },
-                    {  25, 157, 221 },
-                    {   5, 105, 174 },
-                    {   1,  67, 120 },
-                    {   1,  38,  69 },
-                    {   1,  15,  30 }
-                }, { /* Coeff Band 5 */
-                    {  65, 204, 238 },
-                    {  30, 156, 224 },
-                    {   7, 107, 177 },
-                    {   2,  70, 124 },
-                    {   1,  42,  73 },
-                    {   1,  18,  34 }
-                }
-            }, { /* Inter */
-                { /* Coeff Band 0 */
-                    { 225,  86, 251 },
-                    { 144, 104, 235 },
-                    {  42,  99, 181 }
-                }, { /* Coeff Band 1 */
-                    {  85, 175, 239 },
-                    { 112, 165, 229 },
-                    {  29, 136, 200 },
-                    {  12, 103, 162 },
-                    {   6,  77, 123 },
-                    {   2,  53,  84 }
-                }, { /* Coeff Band 2 */
-                    {  75, 183, 239 },
-                    {  30, 155, 221 },
-                    {   3, 106, 171 },
-                    {   1,  74, 128 },
-                    {   1,  44,  76 },
-                    {   1,  17,  28 }
-                }, { /* Coeff Band 3 */
-                    {  73, 185, 240 },
-                    {  27, 159, 222 },
-                    {   2, 107, 172 },
-                    {   1,  75, 127 },
-                    {   1,  42,  73 },
-                    {   1,  17,  29 }
-                }, { /* Coeff Band 4 */
-                    {  62, 190, 238 },
-                    {  21, 159, 222 },
-                    {   2, 107, 172 },
-                    {   1,  72, 122 },
-                    {   1,  40,  71 },
-                    {   1,  18,  32 }
-                }, { /* Coeff Band 5 */
-                    {  61, 199, 240 },
-                    {  27, 161, 226 },
-                    {   4, 113, 180 },
-                    {   1,  76, 129 },
-                    {   1,  46,  80 },
-                    {   1,  23,  41 }
-                }
-            }
-        }
-    }, { /* tx = 16x16 */
-        { /* block Type 0 */
-            { /* Intra */
-                { /* Coeff Band 0 */
-                    {   7,  27, 153 },
-                    {   5,  30,  95 },
-                    {   1,  16,  30 }
-                }, { /* Coeff Band 1 */
-                    {  50,  75, 127 },
-                    {  57,  75, 124 },
-                    {  27,  67, 108 },
-                    {  10,  54,  86 },
-                    {   1,  33,  52 },
-                    {   1,  12,  18 }
-                }, { /* Coeff Band 2 */
-                    {  43, 125, 151 },
-                    {  26, 108, 148 },
-                    {   7,  83, 122 },
-                    {   2,  59,  89 },
-                    {   1,  38,  60 },
-                    {   1,  17,  27 }
-                }, { /* Coeff Band 3 */
-                    {  23, 144, 163 },
-                    {  13, 112, 154 },
-                    {   2,  75, 117 },
-                    {   1,  50,  81 },
-                    {   1,  31,  51 },
-                    {   1,  14,  23 }
-                }, { /* Coeff Band 4 */
-                    {  18, 162, 185 },
-                    {   6, 123, 171 },
-                    {   1,  78, 125 },
-                    {   1,  51,  86 },
-                    {   1,  31,  54 },
-                    {   1,  14,  23 }
-                }, { /* Coeff Band 5 */
-                    {  15, 199, 227 },
-                    {   3, 150, 204 },
-                    {   1,  91, 146 },
-                    {   1,  55,  95 },
-                    {   1,  30,  53 },
-                    {   1,  11,  20 }
-                }
-            }, { /* Inter */
-                { /* Coeff Band 0 */
-                    {  19,  55, 240 },
-                    {  19,  59, 196 },
-                    {   3,  52, 105 }
-                }, { /* Coeff Band 1 */
-                    {  41, 166, 207 },
-                    { 104, 153, 199 },
-                    {  31, 123, 181 },
-                    {  14, 101, 152 },
-                    {   5,  72, 106 },
-                    {   1,  36,  52 }
-                }, { /* Coeff Band 2 */
-                    {  35, 176, 211 },
-                    {  12, 131, 190 },
-                    {   2,  88, 144 },
-                    {   1,  60, 101 },
-                    {   1,  36,  60 },
-                    {   1,  16,  28 }
-                }, { /* Coeff Band 3 */
-                    {  28, 183, 213 },
-                    {   8, 134, 191 },
-                    {   1,  86, 142 },
-                    {   1,  56,  96 },
-                    {   1,  30,  53 },
-                    {   1,  12,  20 }
-                }, { /* Coeff Band 4 */
-                    {  20, 190, 215 },
-                    {   4, 135, 192 },
-                    {   1,  84, 139 },
-                    {   1,  53,  91 },
-                    {   1,  28,  49 },
-                    {   1,  11,  20 }
-                }, { /* Coeff Band 5 */
-                    {  13, 196, 216 },
-                    {   2, 137, 192 },
-                    {   1,  86, 143 },
-                    {   1,  57,  99 },
-                    {   1,  32,  56 },
-                    {   1,  13,  24 }
-                }
-            }
-        }, { /* block Type 1 */
-            { /* Intra */
-                { /* Coeff Band 0 */
-                    { 211,  29, 217 },
-                    {  96,  47, 156 },
-                    {  22,  43,  87 }
-                }, { /* Coeff Band 1 */
-                    {  78, 120, 193 },
-                    { 111, 116, 186 },
-                    {  46, 102, 164 },
-                    {  15,  80, 128 },
-                    {   2,  49,  76 },
-                    {   1,  18,  28 }
-                }, { /* Coeff Band 2 */
-                    {  71, 161, 203 },
-                    {  42, 132, 192 },
-                    {  10,  98, 150 },
-                    {   3,  69, 109 },
-                    {   1,  44,  70 },
-                    {   1,  18,  29 }
-                }, { /* Coeff Band 3 */
-                    {  57, 186, 211 },
-                    {  30, 140, 196 },
-                    {   4,  93, 146 },
-                    {   1,  62, 102 },
-                    {   1,  38,  65 },
-                    {   1,  16,  27 }
-                }, { /* Coeff Band 4 */
-                    {  47, 199, 217 },
-                    {  14, 145, 196 },
-                    {   1,  88, 142 },
-                    {   1,  57,  98 },
-                    {   1,  36,  62 },
-                    {   1,  15,  26 }
-                }, { /* Coeff Band 5 */
-                    {  26, 219, 229 },
-                    {   5, 155, 207 },
-                    {   1,  94, 151 },
-                    {   1,  60, 104 },
-                    {   1,  36,  62 },
-                    {   1,  16,  28 }
-                }
-            }, { /* Inter */
-                { /* Coeff Band 0 */
-                    { 233,  29, 248 },
-                    { 146,  47, 220 },
-                    {  43,  52, 140 }
-                }, { /* Coeff Band 1 */
-                    { 100, 163, 232 },
-                    { 179, 161, 222 },
-                    {  63, 142, 204 },
-                    {  37, 113, 174 },
-                    {  26,  89, 137 },
-                    {  18,  68,  97 }
-                }, { /* Coeff Band 2 */
-                    {  85, 181, 230 },
-                    {  32, 146, 209 },
-                    {   7, 100, 164 },
-                    {   3,  71, 121 },
-                    {   1,  45,  77 },
-                    {   1,  18,  30 }
-                }, { /* Coeff Band 3 */
-                    {  65, 187, 230 },
-                    {  20, 148, 207 },
-                    {   2,  97, 159 },
-                    {   1,  68, 116 },
-                    {   1,  40,  70 },
-                    {   1,  14,  29 }
-                }, { /* Coeff Band 4 */
-                    {  40, 194, 227 },
-                    {   8, 147, 204 },
-                    {   1,  94, 155 },
-                    {   1,  65, 112 },
-                    {   1,  39,  66 },
-                    {   1,  14,  26 }
-                }, { /* Coeff Band 5 */
-                    {  16, 208, 228 },
-                    {   3, 151, 207 },
-                    {   1,  98, 160 },
-                    {   1,  67, 117 },
-                    {   1,  41,  74 },
-                    {   1,  17,  31 }
-                }
-            }
-        }
-    }, { /* tx = 32x32 */
-        { /* block Type 0 */
-            { /* Intra */
-                { /* Coeff Band 0 */
-                    {  17,  38, 140 },
-                    {   7,  34,  80 },
-                    {   1,  17,  29 }
-                }, { /* Coeff Band 1 */
-                    {  37,  75, 128 },
-                    {  41,  76, 128 },
-                    {  26,  66, 116 },
-                    {  12,  52,  94 },
-                    {   2,  32,  55 },
-                    {   1,  10,  16 }
-                }, { /* Coeff Band 2 */
-                    {  50, 127, 154 },
-                    {  37, 109, 152 },
-                    {  16,  82, 121 },
-                    {   5,  59,  85 },
-                    {   1,  35,  54 },
-                    {   1,  13,  20 }
-                }, { /* Coeff Band 3 */
-                    {  40, 142, 167 },
-                    {  17, 110, 157 },
-                    {   2,  71, 112 },
-                    {   1,  44,  72 },
-                    {   1,  27,  45 },
-                    {   1,  11,  17 }
-                }, { /* Coeff Band 4 */
-                    {  30, 175, 188 },
-                    {   9, 124, 169 },
-                    {   1,  74, 116 },
-                    {   1,  48,  78 },
-                    {   1,  30,  49 },
-                    {   1,  11,  18 }
-                }, { /* Coeff Band 5 */
-                    {  10, 222, 223 },
-                    {   2, 150, 194 },
-                    {   1,  83, 128 },
-                    {   1,  48,  79 },
-                    {   1,  27,  45 },
-                    {   1,  11,  17 }
-                }
-            }, { /* Inter */
-                { /* Coeff Band 0 */
-                    {  36,  41, 235 },
-                    {  29,  36, 193 },
-                    {  10,  27, 111 }
-                }, { /* Coeff Band 1 */
-                    {  85, 165, 222 },
-                    { 177, 162, 215 },
-                    { 110, 135, 195 },
-                    {  57, 113, 168 },
-                    {  23,  83, 120 },
-                    {  10,  49,  61 }
-                }, { /* Coeff Band 2 */
-                    {  85, 190, 223 },
-                    {  36, 139, 200 },
-                    {   5,  90, 146 },
-                    {   1,  60, 103 },
-                    {   1,  38,  65 },
-                    {   1,  18,  30 }
-                }, { /* Coeff Band 3 */
-                    {  72, 202, 223 },
-                    {  23, 141, 199 },
-                    {   2,  86, 140 },
-                    {   1,  56,  97 },
-                    {   1,  36,  61 },
-                    {   1,  16,  27 }
-                }, { /* Coeff Band 4 */
-                    {  55, 218, 225 },
-                    {  13, 145, 200 },
-                    {   1,  86, 141 },
-                    {   1,  57,  99 },
-                    {   1,  35,  61 },
-                    {   1,  13,  22 }
-                }, { /* Coeff Band 5 */
-                    {  15, 235, 212 },
-                    {   1, 132, 184 },
-                    {   1,  84, 139 },
-                    {   1,  57,  97 },
-                    {   1,  34,  56 },
-                    {   1,  14,  23 }
-                }
-            }
-        }, { /* block Type 1 */
-            { /* Intra */
-                { /* Coeff Band 0 */
-                    { 181,  21, 201 },
-                    {  61,  37, 123 },
-                    {  10,  38,  71 }
-                }, { /* Coeff Band 1 */
-                    {  47, 106, 172 },
-                    {  95, 104, 173 },
-                    {  42,  93, 159 },
-                    {  18,  77, 131 },
-                    {   4,  50,  81 },
-                    {   1,  17,  23 }
-                }, { /* Coeff Band 2 */
-                    {  62, 147, 199 },
-                    {  44, 130, 189 },
-                    {  28, 102, 154 },
-                    {  18,  75, 115 },
-                    {   2,  44,  65 },
-                    {   1,  12,  19 }
-                }, { /* Coeff Band 3 */
-                    {  55, 153, 210 },
-                    {  24, 130, 194 },
-                    {   3,  93, 146 },
-                    {   1,  61,  97 },
-                    {   1,  31,  50 },
-                    {   1,  10,  16 }
-                }, { /* Coeff Band 4 */
-                    {  49, 186, 223 },
-                    {  17, 148, 204 },
-                    {   1,  96, 142 },
-                    {   1,  53,  83 },
-                    {   1,  26,  44 },
-                    {   1,  11,  17 }
-                }, { /* Coeff Band 5 */
-                    {  13, 217, 212 },
-                    {   2, 136, 180 },
-                    {   1,  78, 124 },
-                    {   1,  50,  83 },
-                    {   1,  29,  49 },
-                    {   1,  14,  23 }
-                }
-            }, { /* Inter */
-                { /* Coeff Band 0 */
-                    { 197,  13, 247 },
-                    {  82,  17, 222 },
-                    {  25,  17, 162 }
-                }, { /* Coeff Band 1 */
-                    { 126, 186, 247 },
-                    { 234, 191, 243 },
-                    { 176, 177, 234 },
-                    { 104, 158, 220 },
-                    {  66, 128, 186 },
-                    {  55,  90, 137 }
-                }, { /* Coeff Band 2 */
-                    { 111, 197, 242 },
-                    {  46, 158, 219 },
-                    {   9, 104, 171 },
-                    {   2,  65, 125 },
-                    {   1,  44,  80 },
-                    {   1,  17,  91 }
-                }, { /* Coeff Band 3 */
-                    { 104, 208, 245 },
-                    {  39, 168, 224 },
-                    {   3, 109, 162 },
-                    {   1,  79, 124 },
-                    {   1,  50, 102 },
-                    {   1,  43, 102 }
-                }, { /* Coeff Band 4 */
-                    {  84, 220, 246 },
-                    {  31, 177, 231 },
-                    {   2, 115, 180 },
-                    {   1,  79, 134 },
-                    {   1,  55,  77 },
-                    {   1,  60,  79 }
-                }, { /* Coeff Band 5 */
-                    {  43, 243, 240 },
-                    {   8, 180, 217 },
-                    {   1, 115, 166 },
-                    {   1,  84, 121 },
-                    {   1,  51,  67 },
-                    {   1,  16,   6 }
-                }
-            }
-        }
-    }
-};
-
-const int8_t ff_vp9_mv_joint_tree[3][2] = {
-    { -MV_JOINT_ZERO,            1 }, // '0'
-    {    -MV_JOINT_H,            2 }, // '10'
-    {    -MV_JOINT_V, -MV_JOINT_HV }, // '11x'
-};
-
-const int8_t ff_vp9_mv_class_tree[10][2] = {
-    { -0,   1 }, // '0'
-    { -1,   2 }, // '10'
-    {  3,   4 },
-    { -2,  -3 }, // '110x'
-    {  5,   6 },
-    { -4,  -5 }, // '1110x'
-    { -6,   7 }, // '11110'
-    {  8,   9 },
-    { -7,  -8 }, // '111110x'
-    { -9, -10 }, // '111111x'
-};
-
-const int8_t ff_vp9_mv_fp_tree[3][2] = {
-    { -0,  1 },   // '0'
-    { -1,  2 },   // '10'
-    { -2, -3 },   // '11x'
-};
diff --git a/libavcodec/vp9data.h b/libavcodec/vp9data.h
index a52cc0a..625b60e 100644
--- a/libavcodec/vp9data.h
+++ b/libavcodec/vp9data.h
@@ -2,20 +2,20 @@
  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
  * Copyright (C) 2013 Clément Bœsch <u pkh me>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,45 +26,2130 @@
 
 #include "vp9.h"
 
-extern const int8_t ff_vp9_partition_tree[3][2];
-extern const uint8_t ff_vp9_default_kf_partition_probs[4][4][3];
-extern const int8_t ff_vp9_segmentation_tree[7][2];
-extern const int8_t ff_vp9_intramode_tree[9][2];
-extern const uint8_t ff_vp9_default_kf_ymode_probs[10][10][9];
-extern const uint8_t ff_vp9_default_kf_uvmode_probs[10][9];
-extern const int8_t ff_vp9_inter_mode_tree[3][2];
-extern const int8_t ff_vp9_filter_tree[2][2];
-extern const enum FilterMode ff_vp9_filter_lut[3];
-extern const int16_t ff_vp9_dc_qlookup[256];
-extern const int16_t ff_vp9_ac_qlookup[256];
-extern const enum TxfmType ff_vp9_intra_txfm_type[14];
-extern const int16_t ff_vp9_default_scan_4x4[16];
-extern const int16_t ff_vp9_col_scan_4x4[16];
-extern const int16_t ff_vp9_row_scan_4x4[16];
-extern const int16_t ff_vp9_default_scan_8x8[64];
-extern const int16_t ff_vp9_col_scan_8x8[64];
-extern const int16_t ff_vp9_row_scan_8x8[64];
-extern const int16_t ff_vp9_default_scan_16x16[256];
-extern const int16_t ff_vp9_col_scan_16x16[256];
-extern const int16_t ff_vp9_row_scan_16x16[256];
-extern const int16_t ff_vp9_default_scan_32x32[1024];
-extern const int16_t *ff_vp9_scans[5][4];
-extern const int16_t ff_vp9_default_scan_4x4_nb[16][2];
-extern const int16_t ff_vp9_col_scan_4x4_nb[16][2];
-extern const int16_t ff_vp9_row_scan_4x4_nb[16][2];
-extern const int16_t ff_vp9_default_scan_8x8_nb[64][2];
-extern const int16_t ff_vp9_col_scan_8x8_nb[64][2];
-extern const int16_t ff_vp9_row_scan_8x8_nb[64][2];
-extern const int16_t ff_vp9_default_scan_16x16_nb[256][2];
-extern const int16_t ff_vp9_col_scan_16x16_nb[256][2];
-extern const int16_t ff_vp9_row_scan_16x16_nb[256][2];
-extern const int16_t ff_vp9_default_scan_32x32_nb[1024][2];
-extern const int16_t (*ff_vp9_scans_nb[5][4])[2];
-extern const uint8_t ff_vp9_model_pareto8[256][8];
-extern const ProbContext ff_vp9_default_probs;
-extern const uint8_t ff_vp9_default_coef_probs[4][2][2][6][6][3];
-extern const int8_t ff_vp9_mv_joint_tree[3][2];
-extern const int8_t ff_vp9_mv_class_tree[10][2];
-extern const int8_t ff_vp9_mv_fp_tree[3][2];
+enum BlockPartition {
+    PARTITION_NONE,    // [ ] <-.
+    PARTITION_H,       // [-]   |
+    PARTITION_V,       // [|]   |
+    PARTITION_SPLIT,   // [+] --'
+};
+
+static const int8_t vp9_partition_tree[3][2] = {
+    { -PARTITION_NONE, 1 },               // '0'
+     { -PARTITION_H, 2 },                 // '10'
+      { -PARTITION_V, -PARTITION_SPLIT }, // '110', '111'
+};
+
+static const uint8_t vp9_default_kf_partition_probs[4][4][3] = {
+    { /* 64x64 -> 32x32 */
+        { 174,  35,  49 } /* a/l both not split */,
+        {  68,  11,  27 } /* a split, l not split */,
+        {  57,  15,   9 } /* l split, a not split */,
+        {  12,   3,   3 } /* a/l both split */
+    }, { /* 32x32 -> 16x16 */
+        { 150,  40,  39 } /* a/l both not split */,
+        {  78,  12,  26 } /* a split, l not split */,
+        {  67,  33,  11 } /* l split, a not split */,
+        {  24,   7,   5 } /* a/l both split */,
+    }, { /* 16x16 -> 8x8 */
+        { 149,  53,  53 } /* a/l both not split */,
+        {  94,  20,  48 } /* a split, l not split */,
+        {  83,  53,  24 } /* l split, a not split */,
+        {  52,  18,  18 } /* a/l both split */,
+    }, { /* 8x8 -> 4x4 */
+        { 158,  97,  94 } /* a/l both not split */,
+        {  93,  24,  99 } /* a split, l not split */,
+        {  85, 119,  44 } /* l split, a not split */,
+        {  62,  59,  67 } /* a/l both split */,
+    },
+};
+
+static const int8_t vp9_segmentation_tree[7][2] = {
+    { 1, 2 },
+     { 3, 4 },
+     { 5, 6 },
+      { -0, -1 }, // '00x'
+      { -2, -3 }, // '01x'
+      { -4, -5 }, // '10x'
+      { -6, -7 }, // '11x'
+};
+
+static const int8_t vp9_intramode_tree[9][2] = {
+    { -DC_PRED, 1 },                                  // '0'
+     { -TM_VP8_PRED, 2 },                             // '10'
+      { -VERT_PRED, 3 },                              // '110'
+       { 4, 6 },
+        { -HOR_PRED, 5 },                             // '11100'
+         { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '11101x'
+        { -DIAG_DOWN_LEFT_PRED, 7 },                  // '11110'
+         { -VERT_LEFT_PRED, 8 },                      // '111110'
+          { -HOR_DOWN_PRED, -HOR_UP_PRED },           // '111111x'
+};
+
+static const uint8_t vp9_default_kf_ymode_probs[10][10][9] = {
+    { /* above = v */
+        {  43,  46, 168, 134, 107, 128,  69, 142,  92 } /* left = v */,
+        {  44,  29,  68, 159, 201, 177,  50,  57,  77 } /* left = h */,
+        {  63,  36, 126, 146, 123, 158,  60,  90,  96 } /* left = dc */,
+        {  58,  38,  76, 114,  97, 172,  78, 133,  92 } /* left = d45 */,
+        {  46,  41,  76, 140,  63, 184,  69, 112,  57 } /* left = d135 */,
+        {  38,  32,  85, 140,  46, 112,  54, 151, 133 } /* left = d117 */,
+        {  39,  27,  61, 131, 110, 175,  44,  75, 136 } /* left = d153 */,
+        {  47,  35,  80, 100,  74, 143,  64, 163,  74 } /* left = d63 */,
+        {  52,  30,  74, 113, 130, 175,  51,  64,  58 } /* left = d27 */,
+        {  36,  61, 116, 114, 128, 162,  80, 125,  82 } /* left = tm */
+    }, { /* above = h */
+        {  55,  44,  68, 166, 179, 192,  57,  57, 108 } /* left = v */,
+        {  42,  26,  11, 199, 241, 228,  23,  15,  85 } /* left = h */,
+        {  82,  26,  26, 171, 208, 204,  44,  32, 105 } /* left = dc */,
+        {  68,  42,  19, 131, 160, 199,  55,  52,  83 } /* left = d45 */,
+        {  58,  50,  25, 139, 115, 232,  39,  52, 118 } /* left = d135 */,
+        {  50,  35,  33, 153, 104, 162,  64,  59, 131 } /* left = d117 */,
+        {  44,  24,  16, 150, 177, 202,  33,  19, 156 } /* left = d153 */,
+        {  53,  49,  21, 110, 116, 168,  59,  80,  76 } /* left = d63 */,
+        {  55,  27,  12, 153, 203, 218,  26,  27,  49 } /* left = d27 */,
+        {  38,  72,  19, 168, 203, 212,  50,  50, 107 } /* left = tm */
+    }, { /* above = dc */
+        {  92,  45, 102, 136, 116, 180,  74,  90, 100 } /* left = v */,
+        {  73,  32,  19, 187, 222, 215,  46,  34, 100 } /* left = h */,
+        { 137,  30,  42, 148, 151, 207,  70,  52,  91 } /* left = dc */,
+        {  91,  30,  32, 116, 121, 186,  93,  86,  94 } /* left = d45 */,
+        {  72,  35,  36, 149,  68, 206,  68,  63, 105 } /* left = d135 */,
+        {  73,  31,  28, 138,  57, 124,  55, 122, 151 } /* left = d117 */,
+        {  67,  23,  21, 140, 126, 197,  40,  37, 171 } /* left = d153 */,
+        {  74,  32,  27, 107,  86, 160,  63, 134, 102 } /* left = d63 */,
+        {  86,  27,  28, 128, 154, 212,  45,  43,  53 } /* left = d27 */,
+        {  59,  67,  44, 140, 161, 202,  78,  67, 119 } /* left = tm */
+    }, { /* above = d45 */
+        {  59,  38,  83, 112, 103, 162,  98, 136,  90 } /* left = v */,
+        {  62,  30,  23, 158, 200, 207,  59,  57,  50 } /* left = h */,
+        { 103,  26,  36, 129, 132, 201,  83,  80,  93 } /* left = dc */,
+        {  67,  30,  29,  84,  86, 191, 102,  91,  59 } /* left = d45 */,
+        {  60,  32,  33, 112,  71, 220,  64,  89, 104 } /* left = d135 */,
+        {  53,  26,  34, 130,  56, 149,  84, 120, 103 } /* left = d117 */,
+        {  53,  21,  23, 133, 109, 210,  56,  77, 172 } /* left = d153 */,
+        {  61,  29,  29,  93,  97, 165,  83, 175, 162 } /* left = d63 */,
+        {  77,  19,  29, 112, 142, 228,  55,  66,  36 } /* left = d27 */,
+        {  47,  47,  43, 114, 137, 181, 100,  99,  95 } /* left = tm */
+    }, { /* above = d135 */
+        {  53,  40,  55, 139,  69, 183,  61,  80, 110 } /* left = v */,
+        {  40,  29,  19, 161, 180, 207,  43,  24,  91 } /* left = h */,
+        {  69,  23,  29, 128,  83, 199,  46,  44, 101 } /* left = dc */,
+        {  60,  34,  19, 105,  61, 198,  53,  64,  89 } /* left = d45 */,
+        {  52,  31,  22, 158,  40, 209,  58,  62,  89 } /* left = d135 */,
+        {  44,  31,  29, 147,  46, 158,  56, 102, 198 } /* left = d117 */,
+        {  35,  19,  12, 135,  87, 209,  41,  45, 167 } /* left = d153 */,
+        {  51,  38,  25, 113,  58, 164,  70,  93,  97 } /* left = d63 */,
+        {  55,  25,  21, 118,  95, 215,  38,  39,  66 } /* left = d27 */,
+        {  47,  54,  34, 146, 108, 203,  72, 103, 151 } /* left = tm */
+    }, { /* above = d117 */
+        {  46,  27,  80, 150,  55, 124,  55, 121, 135 } /* left = v */,
+        {  36,  23,  27, 165, 149, 166,  54,  64, 118 } /* left = h */,
+        {  64,  19,  37, 156,  66, 138,  49,  95, 133 } /* left = dc */,
+        {  53,  21,  36, 131,  63, 163,  60, 109,  81 } /* left = d45 */,
+        {  40,  26,  35, 154,  40, 185,  51,  97, 123 } /* left = d135 */,
+        {  35,  19,  34, 179,  19,  97,  48, 129, 124 } /* left = d117 */,
+        {  36,  20,  26, 136,  62, 164,  33,  77, 154 } /* left = d153 */,
+        {  45,  26,  28, 129,  45, 129,  49, 147, 123 } /* left = d63 */,
+        {  45,  18,  32, 130,  90, 157,  40,  79,  91 } /* left = d27 */,
+        {  38,  44,  51, 136,  74, 162,  57,  97, 121 } /* left = tm */
+    }, { /* above = d153 */
+        {  56,  39,  58, 133, 117, 173,  48,  53, 187 } /* left = v */,
+        {  35,  21,  12, 161, 212, 207,  20,  23, 145 } /* left = h */,
+        {  75,  17,  22, 136, 138, 185,  32,  34, 166 } /* left = dc */,
+        {  56,  29,  19, 117, 109, 181,  55,  68, 112 } /* left = d45 */,
+        {  47,  29,  17, 153,  64, 220,  59,  51, 114 } /* left = d135 */,
+        {  46,  16,  24, 136,  76, 147,  41,  64, 172 } /* left = d117 */,
+        {  34,  17,  11, 108, 152, 187,  13,  15, 209 } /* left = d153 */,
+        {  55,  30,  18, 122,  79, 179,  44,  88, 116 } /* left = d63 */,
+        {  51,  24,  14, 115, 133, 209,  32,  26, 104 } /* left = d27 */,
+        {  37,  49,  25, 129, 168, 164,  41,  54, 148 } /* left = tm */
+    }, { /* above = d63 */
+        {  48,  34,  86, 101,  92, 146,  78, 179, 134 } /* left = v */,
+        {  47,  22,  24, 138, 187, 178,  68,  69,  59 } /* left = h */,
+        {  78,  23,  39, 111, 117, 170,  74, 124,  94 } /* left = dc */,
+        {  56,  25,  33, 105, 112, 187,  95, 177, 129 } /* left = d45 */,
+        {  48,  31,  27, 114,  63, 183,  82, 116,  56 } /* left = d135 */,
+        {  43,  28,  37, 121,  63, 123,  61, 192, 169 } /* left = d117 */,
+        {  42,  17,  24, 109,  97, 177,  56,  76, 122 } /* left = d153 */,
+        {  46,  23,  32,  74,  86, 150,  67, 183,  88 } /* left = d63 */,
+        {  58,  18,  28, 105, 139, 182,  70,  92,  63 } /* left = d27 */,
+        {  36,  38,  48,  92, 122, 165,  88, 137,  91 } /* left = tm */
+    }, { /* above = d27 */
+        {  62,  44,  61, 123, 105, 189,  48,  57,  64 } /* left = v */,
+        {  47,  25,  17, 175, 222, 220,  24,  30,  86 } /* left = h */,
+        {  82,  22,  32, 127, 143, 213,  39,  41,  70 } /* left = dc */,
+        {  68,  36,  17, 106, 102, 206,  59,  74,  74 } /* left = d45 */,
+        {  57,  39,  23, 151,  68, 216,  55,  63,  58 } /* left = d135 */,
+        {  49,  30,  35, 141,  70, 168,  82,  40, 115 } /* left = d117 */,
+        {  51,  25,  15, 136, 129, 202,  38,  35, 139 } /* left = d153 */,
+        {  59,  39,  19, 114,  75, 180,  77, 104,  42 } /* left = d63 */,
+        {  68,  26,  16, 111, 141, 215,  29,  28,  28 } /* left = d27 */,
+        {  40,  61,  26, 126, 152, 206,  61,  59,  93 } /* left = tm */
+    }, { /* above = tm */
+        {  44,  78, 115, 132, 119, 173,  71, 112,  93 } /* left = v */,
+        {  39,  38,  21, 184, 227, 206,  42,  32,  64 } /* left = h */,
+        {  65,  70,  60, 155, 159, 199,  61,  60,  81 } /* left = dc */,
+        {  58,  47,  36, 124, 137, 193,  80,  82,  78 } /* left = d45 */,
+        {  49,  50,  35, 144,  95, 205,  63,  78,  59 } /* left = d135 */,
+        {  41,  53,  52, 148,  71, 142,  65, 128,  51 } /* left = d117 */,
+        {  40,  36,  28, 143, 143, 202,  40,  55, 137 } /* left = d153 */,
+        {  42,  44,  44, 104, 105, 164,  64, 130,  80 } /* left = d63 */,
+        {  52,  34,  29, 129, 183, 227,  42,  35,  43 } /* left = d27 */,
+        {  43,  81,  53, 140, 169, 204,  68,  84,  72 } /* left = tm */
+    }
+};
+
+static const uint8_t vp9_default_kf_uvmode_probs[10][9] = {
+    { 118,  15, 123, 148, 131, 101,  44,  93, 131 } /* y = v */,
+    { 113,  12,  23, 188, 226, 142,  26,  32, 125 } /* y = h */,
+    { 144,  11,  54, 157, 195, 130,  46,  58, 108 } /* y = dc */,
+    { 120,  11,  50, 123, 163, 135,  64,  77, 103 } /* y = d45 */,
+    { 113,   9,  36, 155, 111, 157,  32,  44, 161 } /* y = d135 */,
+    { 116,   9,  55, 176,  76,  96,  37,  61, 149 } /* y = d117 */,
+    { 115,   9,  28, 141, 161, 167,  21,  25, 193 } /* y = d153 */,
+    { 116,  12,  64, 120, 140, 125,  49, 115, 121 } /* y = d63 */,
+    { 120,  12,  32, 145, 195, 142,  32,  38,  86 } /* y = d27 */,
+    { 102,  19,  66, 162, 182, 122,  35,  59, 128 } /* y = tm */
+};
+
+enum InterPredMode {
+    NEARESTMV = 10,
+    NEARMV = 11,
+    ZEROMV = 12,
+    NEWMV = 13,
+};
+
+static const int8_t vp9_inter_mode_tree[3][2] = {
+    { -ZEROMV, 1 },        // '0'
+     { -NEARESTMV, 2 },    // '10'
+      { -NEARMV, -NEWMV }, // '11x'
+};
+
+static const int8_t vp9_filter_tree[2][2] = {
+    { -0, 1 },   // '0'
+     { -1, -2 }, // '1x'
+};
+
+static const enum FilterMode vp9_filter_lut[3] = {
+    FILTER_8TAP_REGULAR,
+    FILTER_8TAP_SMOOTH,
+    FILTER_8TAP_SHARP,
+};
+
+static const int16_t vp9_dc_qlookup[256] = {
+       4,    8,    8,    9,   10,   11,   12,   12,
+      13,   14,   15,   16,   17,   18,   19,   19,
+      20,   21,   22,   23,   24,   25,   26,   26,
+      27,   28,   29,   30,   31,   32,   32,   33,
+      34,   35,   36,   37,   38,   38,   39,   40,
+      41,   42,   43,   43,   44,   45,   46,   47,
+      48,   48,   49,   50,   51,   52,   53,   53,
+      54,   55,   56,   57,   57,   58,   59,   60,
+      61,   62,   62,   63,   64,   65,   66,   66,
+      67,   68,   69,   70,   70,   71,   72,   73,
+      74,   74,   75,   76,   77,   78,   78,   79,
+      80,   81,   81,   82,   83,   84,   85,   85,
+      87,   88,   90,   92,   93,   95,   96,   98,
+      99,  101,  102,  104,  105,  107,  108,  110,
+     111,  113,  114,  116,  117,  118,  120,  121,
+     123,  125,  127,  129,  131,  134,  136,  138,
+     140,  142,  144,  146,  148,  150,  152,  154,
+     156,  158,  161,  164,  166,  169,  172,  174,
+     177,  180,  182,  185,  187,  190,  192,  195,
+     199,  202,  205,  208,  211,  214,  217,  220,
+     223,  226,  230,  233,  237,  240,  243,  247,
+     250,  253,  257,  261,  265,  269,  272,  276,
+     280,  284,  288,  292,  296,  300,  304,  309,
+     313,  317,  322,  326,  330,  335,  340,  344,
+     349,  354,  359,  364,  369,  374,  379,  384,
+     389,  395,  400,  406,  411,  417,  423,  429,
+     435,  441,  447,  454,  461,  467,  475,  482,
+     489,  497,  505,  513,  522,  530,  539,  549,
+     559,  569,  579,  590,  602,  614,  626,  640,
+     654,  668,  684,  700,  717,  736,  755,  775,
+     796,  819,  843,  869,  896,  925,  955,  988,
+    1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
+};
+
+static const int16_t vp9_ac_qlookup[256] = {
+       4,    8,    9,   10,   11,   12,   13,   14,
+      15,   16,   17,   18,   19,   20,   21,   22,
+      23,   24,   25,   26,   27,   28,   29,   30,
+      31,   32,   33,   34,   35,   36,   37,   38,
+      39,   40,   41,   42,   43,   44,   45,   46,
+      47,   48,   49,   50,   51,   52,   53,   54,
+      55,   56,   57,   58,   59,   60,   61,   62,
+      63,   64,   65,   66,   67,   68,   69,   70,
+      71,   72,   73,   74,   75,   76,   77,   78,
+      79,   80,   81,   82,   83,   84,   85,   86,
+      87,   88,   89,   90,   91,   92,   93,   94,
+      95,   96,   97,   98,   99,  100,  101,  102,
+     104,  106,  108,  110,  112,  114,  116,  118,
+     120,  122,  124,  126,  128,  130,  132,  134,
+     136,  138,  140,  142,  144,  146,  148,  150,
+     152,  155,  158,  161,  164,  167,  170,  173,
+     176,  179,  182,  185,  188,  191,  194,  197,
+     200,  203,  207,  211,  215,  219,  223,  227,
+     231,  235,  239,  243,  247,  251,  255,  260,
+     265,  270,  275,  280,  285,  290,  295,  300,
+     305,  311,  317,  323,  329,  335,  341,  347,
+     353,  359,  366,  373,  380,  387,  394,  401,
+     408,  416,  424,  432,  440,  448,  456,  465,
+     474,  483,  492,  501,  510,  520,  530,  540,
+     550,  560,  571,  582,  593,  604,  615,  627,
+     639,  651,  663,  676,  689,  702,  715,  729,
+     743,  757,  771,  786,  801,  816,  832,  848,
+     864,  881,  898,  915,  933,  951,  969,  988,
+    1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151,
+    1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343,
+    1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567,
+    1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
+};
+
+static const enum TxfmType vp9_intra_txfm_type[14] = {
+    [VERT_PRED]            = ADST_DCT,
+    [HOR_PRED]             = DCT_ADST,
+    [DC_PRED]              = DCT_DCT,
+    [DIAG_DOWN_LEFT_PRED]  = DCT_DCT,
+    [DIAG_DOWN_RIGHT_PRED] = ADST_ADST,
+    [VERT_RIGHT_PRED]      = ADST_DCT,
+    [HOR_DOWN_PRED]        = DCT_ADST,
+    [VERT_LEFT_PRED]       = ADST_DCT,
+    [HOR_UP_PRED]          = DCT_ADST,
+    [TM_VP8_PRED]          = ADST_ADST,
+    [NEARESTMV]            = DCT_DCT,
+    [NEARMV]               = DCT_DCT,
+    [ZEROMV]               = DCT_DCT,
+    [NEWMV]                = DCT_DCT,
+};
+
+static const int16_t vp9_default_scan_4x4[16] = {
+     0,  1,  4,  5,
+     2,  8,  3,  6,
+    12,  9,  7, 10,
+    13, 11, 14, 15,
+};
+
+static const int16_t vp9_col_scan_4x4[16] = {
+     0,  1,  2,  4,
+     3,  5,  6,  8,
+     7,  9, 10, 12,
+    13, 11, 14, 15,
+};
+
+static const int16_t vp9_row_scan_4x4[16] = {
+     0,  4,  1,  8,
+     5, 12,  9,  2,
+     6, 13,  3, 10,
+     7, 14, 11, 15,
+};
+
+static const int16_t vp9_default_scan_8x8[64] = {
+     0,  1,  8,  2,  9, 16, 10,  3,
+    17, 24, 18, 11,  4, 25, 32, 19,
+    12, 26,  5, 33, 20, 27, 40, 13,
+    34,  6, 41, 28, 21, 35, 42, 48,
+    14,  7, 36, 29, 43, 56, 49, 22,
+    15, 37, 50, 44, 57, 30, 23, 51,
+    45, 58, 38, 31, 52, 59, 39, 46,
+    53, 60, 47, 54, 61, 55, 62, 63,
+};
+
+static const int16_t vp9_col_scan_8x8[64] = {
+     0,  1,  2,  8,  3,  9,  4, 10,
+    16,  5, 11, 17, 12, 18,  6, 24,
+    19, 13, 25,  7, 26, 20, 32, 14,
+    27, 21, 33, 28, 34, 15, 22, 35,
+    40, 29, 41, 36, 23, 30, 42, 37,
+    48, 43, 31, 44, 49, 38, 50, 56,
+    45, 39, 51, 57, 52, 46, 58, 53,
+    59, 47, 60, 54, 61, 55, 62, 63,
+};
+
+static const int16_t vp9_row_scan_8x8[64] = {
+     0,  8, 16,  1,  9, 24,  2, 17,
+    32, 10, 25,  3, 40, 18, 11, 33,
+    26, 19,  4, 48, 41, 34, 12, 27,
+    56, 20,  5, 42, 35, 13, 49, 28,
+     6, 21, 43, 36, 14, 50, 29, 57,
+     7, 44, 22, 37, 51, 15, 58, 30,
+    23, 45, 52, 38, 59, 31, 46, 53,
+    39, 60, 47, 61, 54, 62, 55, 63,
+};
+
+static const int16_t vp9_default_scan_16x16[256] = {
+      0,   1,  16,   2,  17,  32,   3,  18,  33,  48,   4,  34,  19,  49,  20,   5,
+     35,  64,  50,  36,  65,  21,   6,  51,  80,  66,  37,  22,  52,   7,  81,  67,
+     38,  82,  53,  23,  96,  68,   8,  83,  97,  54,  39,  69, 112,  24,  98,  84,
+     70,  55,   9,  40,  85,  99, 113, 128,  25, 114, 100,  71,  86,  56,  10,  41,
+    115, 101, 129, 116,  72,  87,  26, 130, 144, 102,  57,  11,  42, 117, 131, 145,
+     88, 103,  27,  73, 132, 118, 146,  58, 160,  12,  43, 133, 147, 104,  89, 119,
+    161,  74, 148, 134,  28, 162,  59,  13, 176, 120, 149,  90, 135, 105, 163,  44,
+     75, 177, 164,  29, 150, 121, 136, 178, 165,  14, 106,  60,  91, 151,  45, 179,
+    192, 137, 166, 122,  76, 180, 152,  30,  61,  15, 107, 167, 181, 193,  92, 208,
+     46, 138, 123, 153, 194,  77, 168, 182,  31, 195, 209, 183, 108, 139,  62, 154,
+     47, 196,  93, 169, 210, 197, 224, 124, 184, 211,  78, 109, 170, 155,  63, 198,
+    212, 185, 225, 240, 140,  94, 199, 125,  79, 213, 226, 171, 186, 156, 214, 200,
+    110, 227, 141,  95, 241, 215, 228, 201, 126, 242, 187, 172, 157, 229, 111, 216,
+    243, 142, 202, 230, 127, 217, 244, 173, 188, 231, 158, 203, 143, 245, 218, 232,
+    189, 246, 159, 174, 233, 247, 219, 204, 175, 190, 248, 234, 205, 220, 249, 191,
+    235, 221, 250, 206, 222, 251, 236, 207, 237, 223, 252, 238, 253, 239, 254, 255,
+};
+
+static const int16_t vp9_col_scan_16x16[256] = {
+      0,   1,   2,   3,  16,   4,  17,   5,  18,   6,  19,  32,  20,   7,  33,  21,
+     34,   8,  35,  22,  48,  36,   9,  49,  23,  50,  37,  10,  38,  51,  24,  64,
+     52,  11,  65,  39,  25,  53,  66,  54,  40,  67,  12,  80,  26,  68,  55,  81,
+     41,  69,  13,  27,  82,  56,  70,  83,  42,  14,  84,  96,  71,  28,  57,  85,
+     97,  15,  72,  98,  43,  86,  58,  99,  29,  87, 100, 112,  73,  44, 101,  59,
+     30, 113,  88, 114,  74, 128, 102,  45,  31, 115,  60, 103,  89, 116,  75, 129,
+    117,  46, 104,  90,  61, 130, 118, 131, 132, 105,  76,  47, 119, 144,  91,  62,
+    133, 106, 145, 120, 146, 134,  77, 147, 121,  92, 135, 148,  63, 107, 136, 122,
+     93, 149, 160,  78, 150, 137, 108, 161, 162, 151, 123,  79, 138, 163, 152,  94,
+    164, 109, 165, 153, 124, 139, 176, 166,  95, 177, 167, 110, 154, 178, 125, 179,
+    140, 168, 155, 111, 180, 192, 181, 169, 141, 126, 182, 193, 194, 156, 183, 170,
+    195, 127, 142, 196, 184, 208, 197, 157, 171, 143, 185, 198, 209, 199, 210, 172,
+    158, 186, 211, 224, 212, 200, 240, 159, 213, 225, 187, 201, 173, 226, 214, 215,
+    227, 202, 228, 188, 241, 216, 174, 229, 242, 203, 243, 217, 230, 175, 189, 244,
+    231, 204, 218, 232, 245, 219, 246, 190, 233, 205, 191, 247, 234, 248, 220, 206,
+    249, 235, 221, 207, 250, 236, 222, 251, 223, 237, 238, 252, 239, 253, 254, 255,
+};
+
+static const int16_t vp9_row_scan_16x16[256] = {
+      0,  16,  32,   1,  48,  17,  64,  33,   2,  80,  18,  49,  96,  34,   3,  65,
+     19, 112,  50,  81,  35,   4, 128,  66,  20,  97,  51,  82,   5, 144,  36,  67,
+    113,  98,  21,  52, 160,  83, 129,  37,  68,   6, 114, 176,  99,  53,  22,  84,
+    145,  38,  69, 130,   7, 115, 192, 100,  54,  23,  85, 161, 146, 131,  39,  70,
+    208, 116,   8, 101, 177,  55,  86,  24, 162, 147, 132,  71, 224, 117,  40, 102,
+      9, 148,  56,  87, 193, 163, 240, 133, 178,  25, 118,  72,  41, 103, 164,  10,
+    149,  88, 134, 209, 179,  57, 119, 194,  26,  73, 165, 150, 104,  42, 135,  11,
+    180, 120,  89, 225, 195,  58,  27, 210, 151, 181, 166,  74,  43, 105,  12, 136,
+     90,  59, 241, 121,  28, 196, 167, 211, 152,  44, 182, 137,  75,  13, 226, 106,
+    122,  60, 197,  91, 168,  29, 183, 153,  14,  76, 212, 138,  45, 107,  15, 198,
+     92, 227, 169,  30, 123, 154,  61, 242, 184, 213, 139,  46,  77,  31, 108, 170,
+    199, 185, 124, 228,  93, 155, 214,  62, 140, 243,  78,  47, 200, 109, 186, 171,
+    201,  94,  63, 215, 229, 156,  79, 125, 141, 110, 216, 187, 172, 244, 202, 230,
+    217,  95, 157, 126, 245, 111, 142, 231, 188, 127, 158, 218, 173, 232, 246, 233,
+    203, 143, 247, 174, 189, 159, 219, 204, 248, 234, 249, 175, 190, 220, 205, 250,
+    235, 191, 221, 251, 236, 206, 252, 222, 207, 237, 223, 253, 238, 254, 239, 255,
+};
+
+static const int16_t vp9_default_scan_32x32[1024] = {
+       0,    1,   32,    2,   33,   64,    3,   34,   65,    4,   96,   35,   66,    5,   36,   97,   67,  128,   98,   68,   37,    6,  129,   99,    7,  160,   69,   38,  130,  100,  161,  131,
+      39,   70,    8,  101,  162,  132,  192,   71,   40,    9,  102,  163,  133,  193,   72,  224,  103,   41,  164,   10,  194,  134,  165,   73,  104,  135,  225,   42,  195,   11,  256,  166,
+     226,  196,   74,  105,  136,   43,   12,  167,  197,  227,  257,   75,  106,  137,  228,   44,  198,  168,  258,  288,   13,  229,   76,  107,  199,  138,  259,  169,  289,   45,  230,  260,
+     200,  108,   14,  170,  139,  320,  290,   77,  231,  261,   46,  201,  140,  291,  109,  232,  321,  262,  171,   78,  292,   15,  322,  202,  263,  352,  172,  293,  233,  141,  323,  110,
+      47,  203,  264,  234,  294,  353,  324,   16,   79,  204,  265,  295,  325,  173,  354,  142,  235,  384,   48,  296,  111,  266,  355,  326,   80,   17,  205,  236,  174,  356,  385,  327,
+     143,  297,  267,  357,  386,  112,   49,  328,  298,  206,  416,  237,  358,  387,   81,  175,   18,  329,  359,  388,  299,  330,  389,  113,  417,  238,  360,   50,  207,  418,  390,  331,
+      19,  448,  361,   82,  419,  391,  239,   51,  362,  420,  114,  449,  480,  421,   83,  363,  450,  422,  512,  451,  423,  115,  452,  481,  453,  482,  454,  544,  483,  455,  513,  484,
+     514,  485,  515,  486,  545,  576,  487,  546,  547,  608,  577,  578,  579,  609,  610,  611,   20,  144,  268,  392,  516,  640,   21,   52,  145,  176,  269,  300,  393,  424,  517,  548,
+     641,  672,   22,   53,   84,  146,  177,  208,  270,  301,  332,  394,  425,  456,  518,  549,  580,  642,  673,  704,   23,   54,   85,  116,  147,  178,  209,  240,  271,  302,  333,  364,
+     395,  426,  457,  488,  519,  550,  581,  612,  643,  674,  705,  736,   55,   86,  117,  179,  210,  241,  303,  334,  365,  427,  458,  489,  551,  582,  613,  675,  706,  737,   87,  118,
+     211,  242,  335,  366,  459,  490,  583,  614,  707,  738,  119,  243,  367,  491,  615,  739,   24,  148,  272,  396,  520,  644,  768,   25,   56,  149,  180,  273,  304,  397,  428,  521,
+     552,  645,  676,  769,  800,   26,   57,   88,  150,  181,  212,  274,  305,  336,  398,  429,  460,  522,  553,  584,  646,  677,  708,  770,  801,  832,   27,   58,   89,  120,  151,  182,
+     213,  244,  275,  306,  337,  368,  399,  430,  461,  492,  523,  554,  585,  616,  647,  678,  709,  740,  771,  802,  833,  864,   59,   90,  121,  183,  214,  245,  307,  338,  369,  431,
+     462,  493,  555,  586,  617,  679,  710,  741,  803,  834,  865,   91,  122,  215,  246,  339,  370,  463,  494,  587,  618,  711,  742,  835,  866,  123,  247,  371,  495,  619,  743,  867,
+      28,  152,  276,  400,  524,  648,  772,  896,   29,   60,  153,  184,  277,  308,  401,  432,  525,  556,  649,  680,  773,  804,  897,  928,   30,   61,   92,  154,  185,  216,  278,  309,
+     340,  402,  433,  464,  526,  557,  588,  650,  681,  712,  774,  805,  836,  898,  929,  960,   31,   62,   93,  124,  155,  186,  217,  248,  279,  310,  341,  372,  403,  434,  465,  496,
+     527,  558,  589,  620,  651,  682,  713,  744,  775,  806,  837,  868,  899,  930,  961,  992,   63,   94,  125,  187,  218,  249,  311,  342,  373,  435,  466,  497,  559,  590,  621,  683,
+     714,  745,  807,  838,  869,  931,  962,  993,   95,  126,  219,  250,  343,  374,  467,  498,  591,  622,  715,  746,  839,  870,  963,  994,  127,  251,  375,  499,  623,  747,  871,  995,
+     156,  280,  404,  528,  652,  776,  900,  157,  188,  281,  312,  405,  436,  529,  560,  653,  684,  777,  808,  901,  932,  158,  189,  220,  282,  313,  344,  406,  437,  468,  530,  561,
+     592,  654,  685,  716,  778,  809,  840,  902,  933,  964,  159,  190,  221,  252,  283,  314,  345,  376,  407,  438,  469,  500,  531,  562,  593,  624,  655,  686,  717,  748,  779,  810,
+     841,  872,  903,  934,  965,  996,  191,  222,  253,  315,  346,  377,  439,  470,  501,  563,  594,  625,  687,  718,  749,  811,  842,  873,  935,  966,  997,  223,  254,  347,  378,  471,
+     502,  595,  626,  719,  750,  843,  874,  967,  998,  255,  379,  503,  627,  751,  875,  999,  284,  408,  532,  656,  780,  904,  285,  316,  409,  440,  533,  564,  657,  688,  781,  812,
+     905,  936,  286,  317,  348,  410,  441,  472,  534,  565,  596,  658,  689,  720,  782,  813,  844,  906,  937,  968,  287,  318,  349,  380,  411,  442,  473,  504,  535,  566,  597,  628,
+     659,  690,  721,  752,  783,  814,  845,  876,  907,  938,  969, 1000,  319,  350,  381,  443,  474,  505,  567,  598,  629,  691,  722,  753,  815,  846,  877,  939,  970, 1001,  351,  382,
+     475,  506,  599,  630,  723,  754,  847,  878,  971, 1002,  383,  507,  631,  755,  879, 1003,  412,  536,  660,  784,  908,  413,  444,  537,  568,  661,  692,  785,  816,  909,  940,  414,
+     445,  476,  538,  569,  600,  662,  693,  724,  786,  817,  848,  910,  941,  972,  415,  446,  477,  508,  539,  570,  601,  632,  663,  694,  725,  756,  787,  818,  849,  880,  911,  942,
+     973, 1004,  447,  478,  509,  571,  602,  633,  695,  726,  757,  819,  850,  881,  943,  974, 1005,  479,  510,  603,  634,  727,  758,  851,  882,  975, 1006,  511,  635,  759,  883, 1007,
+     540,  664,  788,  912,  541,  572,  665,  696,  789,  820,  913,  944,  542,  573,  604,  666,  697,  728,  790,  821,  852,  914,  945,  976,  543,  574,  605,  636,  667,  698,  729,  760,
+     791,  822,  853,  884,  915,  946,  977, 1008,  575,  606,  637,  699,  730,  761,  823,  854,  885,  947,  978, 1009,  607,  638,  731,  762,  855,  886,  979, 1010,  639,  763,  887, 1011,
+     668,  792,  916,  669,  700,  793,  824,  917,  948,  670,  701,  732,  794,  825,  856,  918,  949,  980,  671,  702,  733,  764,  795,  826,  857,  888,  919,  950,  981, 1012,  703,  734,
+     765,  827,  858,  889,  951,  982, 1013,  735,  766,  859,  890,  983, 1014,  767,  891, 1015,  796,  920,  797,  828,  921,  952,  798,  829,  860,  922,  953,  984,  799,  830,  861,  892,
+     923,  954,  985, 1016,  831,  862,  893,  955,  986, 1017,  863,  894,  987, 1018,  895, 1019,  924,  925,  956,  926,  957,  988,  927,  958,  989, 1020,  959,  990, 1021,  991, 1022, 1023,
+};
+
+static const int16_t * const vp9_scans[5][4] = {
+    {
+        vp9_default_scan_4x4, vp9_col_scan_4x4,
+        vp9_row_scan_4x4, vp9_default_scan_4x4
+    }, {
+        vp9_default_scan_8x8, vp9_col_scan_8x8,
+        vp9_row_scan_8x8, vp9_default_scan_8x8
+    }, {
+        vp9_default_scan_16x16, vp9_col_scan_16x16,
+        vp9_row_scan_16x16, vp9_default_scan_16x16
+    }, {
+        vp9_default_scan_32x32, vp9_default_scan_32x32,
+        vp9_default_scan_32x32, vp9_default_scan_32x32
+    }, { // lossless
+        vp9_default_scan_4x4, vp9_default_scan_4x4,
+        vp9_default_scan_4x4, vp9_default_scan_4x4
+    }
+};
+
+static const int16_t vp9_default_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  0,  0 }, {  4,  1 }, {  1,  1 },
+    {  4,  4 }, {  2,  2 }, {  5,  2 }, {  8,  8 },
+    {  8,  5 }, {  6,  3 }, {  9,  6 }, { 12,  9 },
+    { 10,  7 }, { 13, 10 }, { 14, 11 }, {  0,  0 },
+};
+
+static const int16_t vp9_col_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  1,  1 }, {  0,  0 }, {  2,  2 },
+    {  4,  4 }, {  5,  5 }, {  4,  4 }, {  6,  6 },
+    {  8,  8 }, {  9,  9 }, {  8,  8 }, { 12, 12 },
+    { 10, 10 }, { 13, 13 }, { 14, 14 }, {  0,  0 },
+};
+
+static const int16_t vp9_row_scan_4x4_nb[16][2] = {
+    {  0,  0 }, {  0,  0 }, {  4,  4 }, {  1,  1 },
+    {  8,  8 }, {  5,  5 }, {  1,  1 }, {  2,  2 },
+    {  9,  9 }, {  2,  2 }, {  6,  6 }, {  3,  3 },
+    { 10, 10 }, {  7,  7 }, { 11, 11 }, {  0,  0 },
+};
+
+static const int16_t vp9_default_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  0,  0 }, {  1,  1 }, {  8,  1 },
+    {  8,  8 }, {  9,  2 }, {  2,  2 }, { 16,  9 },
+    { 16, 16 }, { 17, 10 }, { 10,  3 }, {  3,  3 },
+    { 24, 17 }, { 24, 24 }, { 18, 11 }, { 11,  4 },
+    { 25, 18 }, {  4,  4 }, { 32, 25 }, { 19, 12 },
+    { 26, 19 }, { 32, 32 }, { 12,  5 }, { 33, 26 },
+    {  5,  5 }, { 40, 33 }, { 27, 20 }, { 20, 13 },
+    { 34, 27 }, { 41, 34 }, { 40, 40 }, { 13,  6 },
+    {  6,  6 }, { 35, 28 }, { 28, 21 }, { 42, 35 },
+    { 48, 48 }, { 48, 41 }, { 21, 14 }, { 14,  7 },
+    { 36, 29 }, { 49, 42 }, { 43, 36 }, { 56, 49 },
+    { 29, 22 }, { 22, 15 }, { 50, 43 }, { 44, 37 },
+    { 57, 50 }, { 37, 30 }, { 30, 23 }, { 51, 44 },
+    { 58, 51 }, { 38, 31 }, { 45, 38 }, { 52, 45 },
+    { 59, 52 }, { 46, 39 }, { 53, 46 }, { 60, 53 },
+    { 54, 47 }, { 61, 54 }, { 62, 55 }, {  0,  0 },
+};
+
+static const int16_t vp9_col_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  1,  1 }, {  0,  0 }, {  2,  2 },
+    {  8,  8 }, {  3,  3 }, {  9,  9 }, {  8,  8 },
+    {  4,  4 }, { 10, 10 }, { 16, 16 }, { 11, 11 },
+    { 17, 17 }, {  5,  5 }, { 16, 16 }, { 18, 18 },
+    { 12, 12 }, { 24, 24 }, {  6,  6 }, { 25, 25 },
+    { 19, 19 }, { 24, 24 }, { 13, 13 }, { 26, 26 },
+    { 20, 20 }, { 32, 32 }, { 27, 27 }, { 33, 33 },
+    { 14, 14 }, { 21, 21 }, { 34, 34 }, { 32, 32 },
+    { 28, 28 }, { 40, 40 }, { 35, 35 }, { 22, 22 },
+    { 29, 29 }, { 41, 41 }, { 36, 36 }, { 40, 40 },
+    { 42, 42 }, { 30, 30 }, { 43, 43 }, { 48, 48 },
+    { 37, 37 }, { 49, 49 }, { 48, 48 }, { 44, 44 },
+    { 38, 38 }, { 50, 50 }, { 56, 56 }, { 51, 51 },
+    { 45, 45 }, { 57, 57 }, { 52, 52 }, { 58, 58 },
+    { 46, 46 }, { 59, 59 }, { 53, 53 }, { 60, 60 },
+    { 54, 54 }, { 61, 61 }, { 62, 62 }, {  0,  0 },
+};
+
+static const int16_t vp9_row_scan_8x8_nb[64][2] = {
+    {  0,  0 }, {  8,  8 }, {  0,  0 }, {  1,  1 },
+    { 16, 16 }, {  1,  1 }, {  9,  9 }, { 24, 24 },
+    {  2,  2 }, { 17, 17 }, {  2,  2 }, { 32, 32 },
+    { 10, 10 }, {  3,  3 }, { 25, 25 }, { 18, 18 },
+    { 11, 11 }, {  3,  3 }, { 40, 40 }, { 33, 33 },
+    { 26, 26 }, {  4,  4 }, { 19, 19 }, { 48, 48 },
+    { 12, 12 }, {  4,  4 }, { 34, 34 }, { 27, 27 },
+    {  5,  5 }, { 41, 41 }, { 20, 20 }, {  5,  5 },
+    { 13, 13 }, { 35, 35 }, { 28, 28 }, {  6,  6 },
+    { 42, 42 }, { 21, 21 }, { 49, 49 }, {  6,  6 },
+    { 36, 36 }, { 14, 14 }, { 29, 29 }, { 43, 43 },
+    {  7,  7 }, { 50, 50 }, { 22, 22 }, { 15, 15 },
+    { 37, 37 }, { 44, 44 }, { 30, 30 }, { 51, 51 },
+    { 23, 23 }, { 38, 38 }, { 45, 45 }, { 31, 31 },
+    { 52, 52 }, { 39, 39 }, { 53, 53 }, { 46, 46 },
+    { 54, 54 }, { 47, 47 }, { 55, 55 }, {  0,  0 },
+};
+
+static const int16_t vp9_default_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {   0,   0 }, {   1,   1 }, {  16,   1 },
+    {  16,  16 }, {   2,   2 }, {  17,   2 }, {  32,  17 },
+    {  32,  32 }, {   3,   3 }, {  33,  18 }, {  18,   3 },
+    {  48,  33 }, {  19,   4 }, {   4,   4 }, {  34,  19 },
+    {  48,  48 }, {  49,  34 }, {  35,  20 }, {  64,  49 },
+    {  20,   5 }, {   5,   5 }, {  50,  35 }, {  64,  64 },
+    {  65,  50 }, {  36,  21 }, {  21,   6 }, {  51,  36 },
+    {   6,   6 }, {  80,  65 }, {  66,  51 }, {  37,  22 },
+    {  81,  66 }, {  52,  37 }, {  22,   7 }, {  80,  80 },
+    {  67,  52 }, {   7,   7 }, {  82,  67 }, {  96,  81 },
+    {  53,  38 }, {  38,  23 }, {  68,  53 }, {  96,  96 },
+    {  23,   8 }, {  97,  82 }, {  83,  68 }, {  69,  54 },
+    {  54,  39 }, {   8,   8 }, {  39,  24 }, {  84,  69 },
+    {  98,  83 }, { 112,  97 }, { 112, 112 }, {  24,   9 },
+    { 113,  98 }, {  99,  84 }, {  70,  55 }, {  85,  70 },
+    {  55,  40 }, {   9,   9 }, {  40,  25 }, { 114,  99 },
+    { 100,  85 }, { 128, 113 }, { 115, 100 }, {  71,  56 },
+    {  86,  71 }, {  25,  10 }, { 129, 114 }, { 128, 128 },
+    { 101,  86 }, {  56,  41 }, {  10,  10 }, {  41,  26 },
+    { 116, 101 }, { 130, 115 }, { 144, 129 }, {  87,  72 },
+    { 102,  87 }, {  26,  11 }, {  72,  57 }, { 131, 116 },
+    { 117, 102 }, { 145, 130 }, {  57,  42 }, { 144, 144 },
+    {  11,  11 }, {  42,  27 }, { 132, 117 }, { 146, 131 },
+    { 103,  88 }, {  88,  73 }, { 118, 103 }, { 160, 145 },
+    {  73,  58 }, { 147, 132 }, { 133, 118 }, {  27,  12 },
+    { 161, 146 }, {  58,  43 }, {  12,  12 }, { 160, 160 },
+    { 119, 104 }, { 148, 133 }, {  89,  74 }, { 134, 119 },
+    { 104,  89 }, { 162, 147 }, {  43,  28 }, {  74,  59 },
+    { 176, 161 }, { 163, 148 }, {  28,  13 }, { 149, 134 },
+    { 120, 105 }, { 135, 120 }, { 177, 162 }, { 164, 149 },
+    {  13,  13 }, { 105,  90 }, {  59,  44 }, {  90,  75 },
+    { 150, 135 }, {  44,  29 }, { 178, 163 }, { 176, 176 },
+    { 136, 121 }, { 165, 150 }, { 121, 106 }, {  75,  60 },
+    { 179, 164 }, { 151, 136 }, {  29,  14 }, {  60,  45 },
+    {  14,  14 }, { 106,  91 }, { 166, 151 }, { 180, 165 },
+    { 192, 177 }, {  91,  76 }, { 192, 192 }, {  45,  30 },
+    { 137, 122 }, { 122, 107 }, { 152, 137 }, { 193, 178 },
+    {  76,  61 }, { 167, 152 }, { 181, 166 }, {  30,  15 },
+    { 194, 179 }, { 208, 193 }, { 182, 167 }, { 107,  92 },
+    { 138, 123 }, {  61,  46 }, { 153, 138 }, {  46,  31 },
+    { 195, 180 }, {  92,  77 }, { 168, 153 }, { 209, 194 },
+    { 196, 181 }, { 208, 208 }, { 123, 108 }, { 183, 168 },
+    { 210, 195 }, {  77,  62 }, { 108,  93 }, { 169, 154 },
+    { 154, 139 }, {  62,  47 }, { 197, 182 }, { 211, 196 },
+    { 184, 169 }, { 224, 209 }, { 224, 224 }, { 139, 124 },
+    {  93,  78 }, { 198, 183 }, { 124, 109 }, {  78,  63 },
+    { 212, 197 }, { 225, 210 }, { 170, 155 }, { 185, 170 },
+    { 155, 140 }, { 213, 198 }, { 199, 184 }, { 109,  94 },
+    { 226, 211 }, { 140, 125 }, {  94,  79 }, { 240, 225 },
+    { 214, 199 }, { 227, 212 }, { 200, 185 }, { 125, 110 },
+    { 241, 226 }, { 186, 171 }, { 171, 156 }, { 156, 141 },
+    { 228, 213 }, { 110,  95 }, { 215, 200 }, { 242, 227 },
+    { 141, 126 }, { 201, 186 }, { 229, 214 }, { 126, 111 },
+    { 216, 201 }, { 243, 228 }, { 172, 157 }, { 187, 172 },
+    { 230, 215 }, { 157, 142 }, { 202, 187 }, { 142, 127 },
+    { 244, 229 }, { 217, 202 }, { 231, 216 }, { 188, 173 },
+    { 245, 230 }, { 158, 143 }, { 173, 158 }, { 232, 217 },
+    { 246, 231 }, { 218, 203 }, { 203, 188 }, { 174, 159 },
+    { 189, 174 }, { 247, 232 }, { 233, 218 }, { 204, 189 },
+    { 219, 204 }, { 248, 233 }, { 190, 175 }, { 234, 219 },
+    { 220, 205 }, { 249, 234 }, { 205, 190 }, { 221, 206 },
+    { 250, 235 }, { 235, 220 }, { 206, 191 }, { 236, 221 },
+    { 222, 207 }, { 251, 236 }, { 237, 222 }, { 252, 237 },
+    { 238, 223 }, { 253, 238 }, { 254, 239 }, {   0,   0 },
+};
+
+static const int16_t vp9_col_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {   1,   1 }, {   2,   2 }, {   0,   0 },
+    {   3,   3 }, {  16,  16 }, {   4,   4 }, {  17,  17 },
+    {   5,   5 }, {  18,  18 }, {  16,  16 }, {  19,  19 },
+    {   6,   6 }, {  32,  32 }, {  20,  20 }, {  33,  33 },
+    {   7,   7 }, {  34,  34 }, {  21,  21 }, {  32,  32 },
+    {  35,  35 }, {   8,   8 }, {  48,  48 }, {  22,  22 },
+    {  49,  49 }, {  36,  36 }, {   9,   9 }, {  37,  37 },
+    {  50,  50 }, {  23,  23 }, {  48,  48 }, {  51,  51 },
+    {  10,  10 }, {  64,  64 }, {  38,  38 }, {  24,  24 },
+    {  52,  52 }, {  65,  65 }, {  53,  53 }, {  39,  39 },
+    {  66,  66 }, {  11,  11 }, {  64,  64 }, {  25,  25 },
+    {  67,  67 }, {  54,  54 }, {  80,  80 }, {  40,  40 },
+    {  68,  68 }, {  12,  12 }, {  26,  26 }, {  81,  81 },
+    {  55,  55 }, {  69,  69 }, {  82,  82 }, {  41,  41 },
+    {  13,  13 }, {  83,  83 }, {  80,  80 }, {  70,  70 },
+    {  27,  27 }, {  56,  56 }, {  84,  84 }, {  96,  96 },
+    {  14,  14 }, {  71,  71 }, {  97,  97 }, {  42,  42 },
+    {  85,  85 }, {  57,  57 }, {  98,  98 }, {  28,  28 },
+    {  86,  86 }, {  99,  99 }, {  96,  96 }, {  72,  72 },
+    {  43,  43 }, { 100, 100 }, {  58,  58 }, {  29,  29 },
+    { 112, 112 }, {  87,  87 }, { 113, 113 }, {  73,  73 },
+    { 112, 112 }, { 101, 101 }, {  44,  44 }, {  30,  30 },
+    { 114, 114 }, {  59,  59 }, { 102, 102 }, {  88,  88 },
+    { 115, 115 }, {  74,  74 }, { 128, 128 }, { 116, 116 },
+    {  45,  45 }, { 103, 103 }, {  89,  89 }, {  60,  60 },
+    { 129, 129 }, { 117, 117 }, { 130, 130 }, { 131, 131 },
+    { 104, 104 }, {  75,  75 }, {  46,  46 }, { 118, 118 },
+    { 128, 128 }, {  90,  90 }, {  61,  61 }, { 132, 132 },
+    { 105, 105 }, { 144, 144 }, { 119, 119 }, { 145, 145 },
+    { 133, 133 }, {  76,  76 }, { 146, 146 }, { 120, 120 },
+    {  91,  91 }, { 134, 134 }, { 147, 147 }, {  62,  62 },
+    { 106, 106 }, { 135, 135 }, { 121, 121 }, {  92,  92 },
+    { 148, 148 }, { 144, 144 }, {  77,  77 }, { 149, 149 },
+    { 136, 136 }, { 107, 107 }, { 160, 160 }, { 161, 161 },
+    { 150, 150 }, { 122, 122 }, {  78,  78 }, { 137, 137 },
+    { 162, 162 }, { 151, 151 }, {  93,  93 }, { 163, 163 },
+    { 108, 108 }, { 164, 164 }, { 152, 152 }, { 123, 123 },
+    { 138, 138 }, { 160, 160 }, { 165, 165 }, {  94,  94 },
+    { 176, 176 }, { 166, 166 }, { 109, 109 }, { 153, 153 },
+    { 177, 177 }, { 124, 124 }, { 178, 178 }, { 139, 139 },
+    { 167, 167 }, { 154, 154 }, { 110, 110 }, { 179, 179 },
+    { 176, 176 }, { 180, 180 }, { 168, 168 }, { 140, 140 },
+    { 125, 125 }, { 181, 181 }, { 192, 192 }, { 193, 193 },
+    { 155, 155 }, { 182, 182 }, { 169, 169 }, { 194, 194 },
+    { 126, 126 }, { 141, 141 }, { 195, 195 }, { 183, 183 },
+    { 192, 192 }, { 196, 196 }, { 156, 156 }, { 170, 170 },
+    { 142, 142 }, { 184, 184 }, { 197, 197 }, { 208, 208 },
+    { 198, 198 }, { 209, 209 }, { 171, 171 }, { 157, 157 },
+    { 185, 185 }, { 210, 210 }, { 208, 208 }, { 211, 211 },
+    { 199, 199 }, { 224, 224 }, { 158, 158 }, { 212, 212 },
+    { 224, 224 }, { 186, 186 }, { 200, 200 }, { 172, 172 },
+    { 225, 225 }, { 213, 213 }, { 214, 214 }, { 226, 226 },
+    { 201, 201 }, { 227, 227 }, { 187, 187 }, { 240, 240 },
+    { 215, 215 }, { 173, 173 }, { 228, 228 }, { 241, 241 },
+    { 202, 202 }, { 242, 242 }, { 216, 216 }, { 229, 229 },
+    { 174, 174 }, { 188, 188 }, { 243, 243 }, { 230, 230 },
+    { 203, 203 }, { 217, 217 }, { 231, 231 }, { 244, 244 },
+    { 218, 218 }, { 245, 245 }, { 189, 189 }, { 232, 232 },
+    { 204, 204 }, { 190, 190 }, { 246, 246 }, { 233, 233 },
+    { 247, 247 }, { 219, 219 }, { 205, 205 }, { 248, 248 },
+    { 234, 234 }, { 220, 220 }, { 206, 206 }, { 249, 249 },
+    { 235, 235 }, { 221, 221 }, { 250, 250 }, { 222, 222 },
+    { 236, 236 }, { 237, 237 }, { 251, 251 }, { 238, 238 },
+    { 252, 252 }, { 253, 253 }, { 254, 254 }, {   0,   0 },
+};
+
+static const int16_t vp9_row_scan_16x16_nb[256][2] = {
+    {   0,   0 }, {  16,  16 }, {   0,   0 }, {  32,  32 },
+    {   1,   1 }, {  48,  48 }, {  17,  17 }, {   1,   1 },
+    {  64,  64 }, {   2,   2 }, {  33,  33 }, {  80,  80 },
+    {  18,  18 }, {   2,   2 }, {  49,  49 }, {   3,   3 },
+    {  96,  96 }, {  34,  34 }, {  65,  65 }, {  19,  19 },
+    {   3,   3 }, { 112, 112 }, {  50,  50 }, {   4,   4 },
+    {  81,  81 }, {  35,  35 }, {  66,  66 }, {   4,   4 },
+    { 128, 128 }, {  20,  20 }, {  51,  51 }, {  97,  97 },
+    {  82,  82 }, {   5,   5 }, {  36,  36 }, { 144, 144 },
+    {  67,  67 }, { 113, 113 }, {  21,  21 }, {  52,  52 },
+    {   5,   5 }, {  98,  98 }, { 160, 160 }, {  83,  83 },
+    {  37,  37 }, {   6,   6 }, {  68,  68 }, { 129, 129 },
+    {  22,  22 }, {  53,  53 }, { 114, 114 }, {   6,   6 },
+    {  99,  99 }, { 176, 176 }, {  84,  84 }, {  38,  38 },
+    {   7,   7 }, {  69,  69 }, { 145, 145 }, { 130, 130 },
+    { 115, 115 }, {  23,  23 }, {  54,  54 }, { 192, 192 },
+    { 100, 100 }, {   7,   7 }, {  85,  85 }, { 161, 161 },
+    {  39,  39 }, {  70,  70 }, {   8,   8 }, { 146, 146 },
+    { 131, 131 }, { 116, 116 }, {  55,  55 }, { 208, 208 },
+    { 101, 101 }, {  24,  24 }, {  86,  86 }, {   8,   8 },
+    { 132, 132 }, {  40,  40 }, {  71,  71 }, { 177, 177 },
+    { 147, 147 }, { 224, 224 }, { 117, 117 }, { 162, 162 },
+    {   9,   9 }, { 102, 102 }, {  56,  56 }, {  25,  25 },
+    {  87,  87 }, { 148, 148 }, {   9,   9 }, { 133, 133 },
+    {  72,  72 }, { 118, 118 }, { 193, 193 }, { 163, 163 },
+    {  41,  41 }, { 103, 103 }, { 178, 178 }, {  10,  10 },
+    {  57,  57 }, { 149, 149 }, { 134, 134 }, {  88,  88 },
+    {  26,  26 }, { 119, 119 }, {  10,  10 }, { 164, 164 },
+    { 104, 104 }, {  73,  73 }, { 209, 209 }, { 179, 179 },
+    {  42,  42 }, {  11,  11 }, { 194, 194 }, { 135, 135 },
+    { 165, 165 }, { 150, 150 }, {  58,  58 }, {  27,  27 },
+    {  89,  89 }, {  11,  11 }, { 120, 120 }, {  74,  74 },
+    {  43,  43 }, { 225, 225 }, { 105, 105 }, {  12,  12 },
+    { 180, 180 }, { 151, 151 }, { 195, 195 }, { 136, 136 },
+    {  28,  28 }, { 166, 166 }, { 121, 121 }, {  59,  59 },
+    {  12,  12 }, { 210, 210 }, {  90,  90 }, { 106, 106 },
+    {  44,  44 }, { 181, 181 }, {  75,  75 }, { 152, 152 },
+    {  13,  13 }, { 167, 167 }, { 137, 137 }, {  13,  13 },
+    {  60,  60 }, { 196, 196 }, { 122, 122 }, {  29,  29 },
+    {  91,  91 }, {  14,  14 }, { 182, 182 }, {  76,  76 },
+    { 211, 211 }, { 153, 153 }, {  14,  14 }, { 107, 107 },
+    { 138, 138 }, {  45,  45 }, { 226, 226 }, { 168, 168 },
+    { 197, 197 }, { 123, 123 }, {  30,  30 }, {  61,  61 },
+    {  15,  15 }, {  92,  92 }, { 154, 154 }, { 183, 183 },
+    { 169, 169 }, { 108, 108 }, { 212, 212 }, {  77,  77 },
+    { 139, 139 }, { 198, 198 }, {  46,  46 }, { 124, 124 },
+    { 227, 227 }, {  62,  62 }, {  31,  31 }, { 184, 184 },
+    {  93,  93 }, { 170, 170 }, { 155, 155 }, { 185, 185 },
+    {  78,  78 }, {  47,  47 }, { 199, 199 }, { 213, 213 },
+    { 140, 140 }, {  63,  63 }, { 109, 109 }, { 125, 125 },
+    {  94,  94 }, { 200, 200 }, { 171, 171 }, { 156, 156 },
+    { 228, 228 }, { 186, 186 }, { 214, 214 }, { 201, 201 },
+    {  79,  79 }, { 141, 141 }, { 110, 110 }, { 229, 229 },
+    {  95,  95 }, { 126, 126 }, { 215, 215 }, { 172, 172 },
+    { 111, 111 }, { 142, 142 }, { 202, 202 }, { 157, 157 },
+    { 216, 216 }, { 230, 230 }, { 217, 217 }, { 187, 187 },
+    { 127, 127 }, { 231, 231 }, { 158, 158 }, { 173, 173 },
+    { 143, 143 }, { 203, 203 }, { 188, 188 }, { 232, 232 },
+    { 218, 218 }, { 233, 233 }, { 159, 159 }, { 174, 174 },
+    { 204, 204 }, { 189, 189 }, { 234, 234 }, { 219, 219 },
+    { 175, 175 }, { 205, 205 }, { 235, 235 }, { 220, 220 },
+    { 190, 190 }, { 236, 236 }, { 206, 206 }, { 191, 191 },
+    { 221, 221 }, { 207, 207 }, { 237, 237 }, { 222, 222 },
+    { 238, 238 }, { 223, 223 }, { 239, 239 }, {   0,   0 },
+};
+
+static const int16_t vp9_default_scan_32x32_nb[1024][2] = {
+    {    0,    0 }, {    0,    0 }, {    1,    1 }, {   32,    1 },
+    {   32,   32 }, {    2,    2 }, {   33,    2 }, {   64,   33 },
+    {    3,    3 }, {   64,   64 }, {   34,    3 }, {   65,   34 },
+    {    4,    4 }, {   35,    4 }, {   96,   65 }, {   66,   35 },
+    {   96,   96 }, {   97,   66 }, {   67,   36 }, {   36,    5 },
+    {    5,    5 }, {  128,   97 }, {   98,   67 }, {    6,    6 },
+    {  128,  128 }, {   68,   37 }, {   37,    6 }, {  129,   98 },
+    {   99,   68 }, {  160,  129 }, {  130,   99 }, {   38,    7 },
+    {   69,   38 }, {    7,    7 }, {  100,   69 }, {  161,  130 },
+    {  131,  100 }, {  160,  160 }, {   70,   39 }, {   39,    8 },
+    {    8,    8 }, {  101,   70 }, {  162,  131 }, {  132,  101 },
+    {  192,  161 }, {   71,   40 }, {  192,  192 }, {  102,   71 },
+    {   40,    9 }, {  163,  132 }, {    9,    9 }, {  193,  162 },
+    {  133,  102 }, {  164,  133 }, {   72,   41 }, {  103,   72 },
+    {  134,  103 }, {  224,  193 }, {   41,   10 }, {  194,  163 },
+    {   10,   10 }, {  224,  224 }, {  165,  134 }, {  225,  194 },
+    {  195,  164 }, {   73,   42 }, {  104,   73 }, {  135,  104 },
+    {   42,   11 }, {   11,   11 }, {  166,  135 }, {  196,  165 },
+    {  226,  195 }, {  256,  225 }, {   74,   43 }, {  105,   74 },
+    {  136,  105 }, {  227,  196 }, {   43,   12 }, {  197,  166 },
+    {  167,  136 }, {  257,  226 }, {  256,  256 }, {   12,   12 },
+    {  228,  197 }, {   75,   44 }, {  106,   75 }, {  198,  167 },
+    {  137,  106 }, {  258,  227 }, {  168,  137 }, {  288,  257 },
+    {   44,   13 }, {  229,  198 }, {  259,  228 }, {  199,  168 },
+    {  107,   76 }, {   13,   13 }, {  169,  138 }, {  138,  107 },
+    {  288,  288 }, {  289,  258 }, {   76,   45 }, {  230,  199 },
+    {  260,  229 }, {   45,   14 }, {  200,  169 }, {  139,  108 },
+    {  290,  259 }, {  108,   77 }, {  231,  200 }, {  320,  289 },
+    {  261,  230 }, {  170,  139 }, {   77,   46 }, {  291,  260 },
+    {   14,   14 }, {  321,  290 }, {  201,  170 }, {  262,  231 },
+    {  320,  320 }, {  171,  140 }, {  292,  261 }, {  232,  201 },
+    {  140,  109 }, {  322,  291 }, {  109,   78 }, {   46,   15 },
+    {  202,  171 }, {  263,  232 }, {  233,  202 }, {  293,  262 },
+    {  352,  321 }, {  323,  292 }, {   15,   15 }, {   78,   47 },
+    {  203,  172 }, {  264,  233 }, {  294,  263 }, {  324,  293 },
+    {  172,  141 }, {  353,  322 }, {  141,  110 }, {  234,  203 },
+    {  352,  352 }, {   47,   16 }, {  295,  264 }, {  110,   79 },
+    {  265,  234 }, {  354,  323 }, {  325,  294 }, {   79,   48 },
+    {   16,   16 }, {  204,  173 }, {  235,  204 }, {  173,  142 },
+    {  355,  324 }, {  384,  353 }, {  326,  295 }, {  142,  111 },
+    {  296,  265 }, {  266,  235 }, {  356,  325 }, {  385,  354 },
+    {  111,   80 }, {   48,   17 }, {  327,  296 }, {  297,  266 },
+    {  205,  174 }, {  384,  384 }, {  236,  205 }, {  357,  326 },
+    {  386,  355 }, {   80,   49 }, {  174,  143 }, {   17,   17 },
+    {  328,  297 }, {  358,  327 }, {  387,  356 }, {  298,  267 },
+    {  329,  298 }, {  388,  357 }, {  112,   81 }, {  416,  385 },
+    {  237,  206 }, {  359,  328 }, {   49,   18 }, {  206,  175 },
+    {  417,  386 }, {  389,  358 }, {  330,  299 }, {   18,   18 },
+    {  416,  416 }, {  360,  329 }, {   81,   50 }, {  418,  387 },
+    {  390,  359 }, {  238,  207 }, {   50,   19 }, {  361,  330 },
+    {  419,  388 }, {  113,   82 }, {  448,  417 }, {  448,  448 },
+    {  420,  389 }, {   82,   51 }, {  362,  331 }, {  449,  418 },
+    {  421,  390 }, {  480,  480 }, {  450,  419 }, {  422,  391 },
+    {  114,   83 }, {  451,  420 }, {  480,  449 }, {  452,  421 },
+    {  481,  450 }, {  453,  422 }, {  512,  512 }, {  482,  451 },
+    {  454,  423 }, {  512,  481 }, {  483,  452 }, {  513,  482 },
+    {  484,  453 }, {  514,  483 }, {  485,  454 }, {  544,  513 },
+    {  544,  544 }, {  486,  455 }, {  545,  514 }, {  546,  515 },
+    {  576,  576 }, {  576,  545 }, {  577,  546 }, {  578,  547 },
+    {  608,  577 }, {  609,  578 }, {  610,  579 }, {   19,   19 },
+    {  143,  112 }, {  267,  236 }, {  391,  360 }, {  515,  484 },
+    {  608,  608 }, {   20,   20 }, {   51,   20 }, {  144,  113 },
+    {  175,  144 }, {  268,  237 }, {  299,  268 }, {  392,  361 },
+    {  423,  392 }, {  516,  485 }, {  547,  516 }, {  640,  609 },
+    {  640,  640 }, {   21,   21 }, {   52,   21 }, {   83,   52 },
+    {  145,  114 }, {  176,  145 }, {  207,  176 }, {  269,  238 },
+    {  300,  269 }, {  331,  300 }, {  393,  362 }, {  424,  393 },
+    {  455,  424 }, {  517,  486 }, {  548,  517 }, {  579,  548 },
+    {  641,  610 }, {  672,  641 }, {  672,  672 }, {   22,   22 },
+    {   53,   22 }, {   84,   53 }, {  115,   84 }, {  146,  115 },
+    {  177,  146 }, {  208,  177 }, {  239,  208 }, {  270,  239 },
+    {  301,  270 }, {  332,  301 }, {  363,  332 }, {  394,  363 },
+    {  425,  394 }, {  456,  425 }, {  487,  456 }, {  518,  487 },
+    {  549,  518 }, {  580,  549 }, {  611,  580 }, {  642,  611 },
+    {  673,  642 }, {  704,  673 }, {  704,  704 }, {   54,   23 },
+    {   85,   54 }, {  116,   85 }, {  178,  147 }, {  209,  178 },
+    {  240,  209 }, {  302,  271 }, {  333,  302 }, {  364,  333 },
+    {  426,  395 }, {  457,  426 }, {  488,  457 }, {  550,  519 },
+    {  581,  550 }, {  612,  581 }, {  674,  643 }, {  705,  674 },
+    {  736,  705 }, {   86,   55 }, {  117,   86 }, {  210,  179 },
+    {  241,  210 }, {  334,  303 }, {  365,  334 }, {  458,  427 },
+    {  489,  458 }, {  582,  551 }, {  613,  582 }, {  706,  675 },
+    {  737,  706 }, {  118,   87 }, {  242,  211 }, {  366,  335 },
+    {  490,  459 }, {  614,  583 }, {  738,  707 }, {   23,   23 },
+    {  147,  116 }, {  271,  240 }, {  395,  364 }, {  519,  488 },
+    {  643,  612 }, {  736,  736 }, {   24,   24 }, {   55,   24 },
+    {  148,  117 }, {  179,  148 }, {  272,  241 }, {  303,  272 },
+    {  396,  365 }, {  427,  396 }, {  520,  489 }, {  551,  520 },
+    {  644,  613 }, {  675,  644 }, {  768,  737 }, {  768,  768 },
+    {   25,   25 }, {   56,   25 }, {   87,   56 }, {  149,  118 },
+    {  180,  149 }, {  211,  180 }, {  273,  242 }, {  304,  273 },
+    {  335,  304 }, {  397,  366 }, {  428,  397 }, {  459,  428 },
+    {  521,  490 }, {  552,  521 }, {  583,  552 }, {  645,  614 },
+    {  676,  645 }, {  707,  676 }, {  769,  738 }, {  800,  769 },
+    {  800,  800 }, {   26,   26 }, {   57,   26 }, {   88,   57 },
+    {  119,   88 }, {  150,  119 }, {  181,  150 }, {  212,  181 },
+    {  243,  212 }, {  274,  243 }, {  305,  274 }, {  336,  305 },
+    {  367,  336 }, {  398,  367 }, {  429,  398 }, {  460,  429 },
+    {  491,  460 }, {  522,  491 }, {  553,  522 }, {  584,  553 },
+    {  615,  584 }, {  646,  615 }, {  677,  646 }, {  708,  677 },
+    {  739,  708 }, {  770,  739 }, {  801,  770 }, {  832,  801 },
+    {  832,  832 }, {   58,   27 }, {   89,   58 }, {  120,   89 },
+    {  182,  151 }, {  213,  182 }, {  244,  213 }, {  306,  275 },
+    {  337,  306 }, {  368,  337 }, {  430,  399 }, {  461,  430 },
+    {  492,  461 }, {  554,  523 }, {  585,  554 }, {  616,  585 },
+    {  678,  647 }, {  709,  678 }, {  740,  709 }, {  802,  771 },
+    {  833,  802 }, {  864,  833 }, {   90,   59 }, {  121,   90 },
+    {  214,  183 }, {  245,  214 }, {  338,  307 }, {  369,  338 },
+    {  462,  431 }, {  493,  462 }, {  586,  555 }, {  617,  586 },
+    {  710,  679 }, {  741,  710 }, {  834,  803 }, {  865,  834 },
+    {  122,   91 }, {  246,  215 }, {  370,  339 }, {  494,  463 },
+    {  618,  587 }, {  742,  711 }, {  866,  835 }, {   27,   27 },
+    {  151,  120 }, {  275,  244 }, {  399,  368 }, {  523,  492 },
+    {  647,  616 }, {  771,  740 }, {  864,  864 }, {   28,   28 },
+    {   59,   28 }, {  152,  121 }, {  183,  152 }, {  276,  245 },
+    {  307,  276 }, {  400,  369 }, {  431,  400 }, {  524,  493 },
+    {  555,  524 }, {  648,  617 }, {  679,  648 }, {  772,  741 },
+    {  803,  772 }, {  896,  865 }, {  896,  896 }, {   29,   29 },
+    {   60,   29 }, {   91,   60 }, {  153,  122 }, {  184,  153 },
+    {  215,  184 }, {  277,  246 }, {  308,  277 }, {  339,  308 },
+    {  401,  370 }, {  432,  401 }, {  463,  432 }, {  525,  494 },
+    {  556,  525 }, {  587,  556 }, {  649,  618 }, {  680,  649 },
+    {  711,  680 }, {  773,  742 }, {  804,  773 }, {  835,  804 },
+    {  897,  866 }, {  928,  897 }, {  928,  928 }, {   30,   30 },
+    {   61,   30 }, {   92,   61 }, {  123,   92 }, {  154,  123 },
+    {  185,  154 }, {  216,  185 }, {  247,  216 }, {  278,  247 },
+    {  309,  278 }, {  340,  309 }, {  371,  340 }, {  402,  371 },
+    {  433,  402 }, {  464,  433 }, {  495,  464 }, {  526,  495 },
+    {  557,  526 }, {  588,  557 }, {  619,  588 }, {  650,  619 },
+    {  681,  650 }, {  712,  681 }, {  743,  712 }, {  774,  743 },
+    {  805,  774 }, {  836,  805 }, {  867,  836 }, {  898,  867 },
+    {  929,  898 }, {  960,  929 }, {  960,  960 }, {   62,   31 },
+    {   93,   62 }, {  124,   93 }, {  186,  155 }, {  217,  186 },
+    {  248,  217 }, {  310,  279 }, {  341,  310 }, {  372,  341 },
+    {  434,  403 }, {  465,  434 }, {  496,  465 }, {  558,  527 },
+    {  589,  558 }, {  620,  589 }, {  682,  651 }, {  713,  682 },
+    {  744,  713 }, {  806,  775 }, {  837,  806 }, {  868,  837 },
+    {  930,  899 }, {  961,  930 }, {  992,  961 }, {   94,   63 },
+    {  125,   94 }, {  218,  187 }, {  249,  218 }, {  342,  311 },
+    {  373,  342 }, {  466,  435 }, {  497,  466 }, {  590,  559 },
+    {  621,  590 }, {  714,  683 }, {  745,  714 }, {  838,  807 },
+    {  869,  838 }, {  962,  931 }, {  993,  962 }, {  126,   95 },
+    {  250,  219 }, {  374,  343 }, {  498,  467 }, {  622,  591 },
+    {  746,  715 }, {  870,  839 }, {  994,  963 }, {  155,  124 },
+    {  279,  248 }, {  403,  372 }, {  527,  496 }, {  651,  620 },
+    {  775,  744 }, {  899,  868 }, {  156,  125 }, {  187,  156 },
+    {  280,  249 }, {  311,  280 }, {  404,  373 }, {  435,  404 },
+    {  528,  497 }, {  559,  528 }, {  652,  621 }, {  683,  652 },
+    {  776,  745 }, {  807,  776 }, {  900,  869 }, {  931,  900 },
+    {  157,  126 }, {  188,  157 }, {  219,  188 }, {  281,  250 },
+    {  312,  281 }, {  343,  312 }, {  405,  374 }, {  436,  405 },
+    {  467,  436 }, {  529,  498 }, {  560,  529 }, {  591,  560 },
+    {  653,  622 }, {  684,  653 }, {  715,  684 }, {  777,  746 },
+    {  808,  777 }, {  839,  808 }, {  901,  870 }, {  932,  901 },
+    {  963,  932 }, {  158,  127 }, {  189,  158 }, {  220,  189 },
+    {  251,  220 }, {  282,  251 }, {  313,  282 }, {  344,  313 },
+    {  375,  344 }, {  406,  375 }, {  437,  406 }, {  468,  437 },
+    {  499,  468 }, {  530,  499 }, {  561,  530 }, {  592,  561 },
+    {  623,  592 }, {  654,  623 }, {  685,  654 }, {  716,  685 },
+    {  747,  716 }, {  778,  747 }, {  809,  778 }, {  840,  809 },
+    {  871,  840 }, {  902,  871 }, {  933,  902 }, {  964,  933 },
+    {  995,  964 }, {  190,  159 }, {  221,  190 }, {  252,  221 },
+    {  314,  283 }, {  345,  314 }, {  376,  345 }, {  438,  407 },
+    {  469,  438 }, {  500,  469 }, {  562,  531 }, {  593,  562 },
+    {  624,  593 }, {  686,  655 }, {  717,  686 }, {  748,  717 },
+    {  810,  779 }, {  841,  810 }, {  872,  841 }, {  934,  903 },
+    {  965,  934 }, {  996,  965 }, {  222,  191 }, {  253,  222 },
+    {  346,  315 }, {  377,  346 }, {  470,  439 }, {  501,  470 },
+    {  594,  563 }, {  625,  594 }, {  718,  687 }, {  749,  718 },
+    {  842,  811 }, {  873,  842 }, {  966,  935 }, {  997,  966 },
+    {  254,  223 }, {  378,  347 }, {  502,  471 }, {  626,  595 },
+    {  750,  719 }, {  874,  843 }, {  998,  967 }, {  283,  252 },
+    {  407,  376 }, {  531,  500 }, {  655,  624 }, {  779,  748 },
+    {  903,  872 }, {  284,  253 }, {  315,  284 }, {  408,  377 },
+    {  439,  408 }, {  532,  501 }, {  563,  532 }, {  656,  625 },
+    {  687,  656 }, {  780,  749 }, {  811,  780 }, {  904,  873 },
+    {  935,  904 }, {  285,  254 }, {  316,  285 }, {  347,  316 },
+    {  409,  378 }, {  440,  409 }, {  471,  440 }, {  533,  502 },
+    {  564,  533 }, {  595,  564 }, {  657,  626 }, {  688,  657 },
+    {  719,  688 }, {  781,  750 }, {  812,  781 }, {  843,  812 },
+    {  905,  874 }, {  936,  905 }, {  967,  936 }, {  286,  255 },
+    {  317,  286 }, {  348,  317 }, {  379,  348 }, {  410,  379 },
+    {  441,  410 }, {  472,  441 }, {  503,  472 }, {  534,  503 },
+    {  565,  534 }, {  596,  565 }, {  627,  596 }, {  658,  627 },
+    {  689,  658 }, {  720,  689 }, {  751,  720 }, {  782,  751 },
+    {  813,  782 }, {  844,  813 }, {  875,  844 }, {  906,  875 },
+    {  937,  906 }, {  968,  937 }, {  999,  968 }, {  318,  287 },
+    {  349,  318 }, {  380,  349 }, {  442,  411 }, {  473,  442 },
+    {  504,  473 }, {  566,  535 }, {  597,  566 }, {  628,  597 },
+    {  690,  659 }, {  721,  690 }, {  752,  721 }, {  814,  783 },
+    {  845,  814 }, {  876,  845 }, {  938,  907 }, {  969,  938 },
+    { 1000,  969 }, {  350,  319 }, {  381,  350 }, {  474,  443 },
+    {  505,  474 }, {  598,  567 }, {  629,  598 }, {  722,  691 },
+    {  753,  722 }, {  846,  815 }, {  877,  846 }, {  970,  939 },
+    { 1001,  970 }, {  382,  351 }, {  506,  475 }, {  630,  599 },
+    {  754,  723 }, {  878,  847 }, { 1002,  971 }, {  411,  380 },
+    {  535,  504 }, {  659,  628 }, {  783,  752 }, {  907,  876 },
+    {  412,  381 }, {  443,  412 }, {  536,  505 }, {  567,  536 },
+    {  660,  629 }, {  691,  660 }, {  784,  753 }, {  815,  784 },
+    {  908,  877 }, {  939,  908 }, {  413,  382 }, {  444,  413 },
+    {  475,  444 }, {  537,  506 }, {  568,  537 }, {  599,  568 },
+    {  661,  630 }, {  692,  661 }, {  723,  692 }, {  785,  754 },
+    {  816,  785 }, {  847,  816 }, {  909,  878 }, {  940,  909 },
+    {  971,  940 }, {  414,  383 }, {  445,  414 }, {  476,  445 },
+    {  507,  476 }, {  538,  507 }, {  569,  538 }, {  600,  569 },
+    {  631,  600 }, {  662,  631 }, {  693,  662 }, {  724,  693 },
+    {  755,  724 }, {  786,  755 }, {  817,  786 }, {  848,  817 },
+    {  879,  848 }, {  910,  879 }, {  941,  910 }, {  972,  941 },
+    { 1003,  972 }, {  446,  415 }, {  477,  446 }, {  508,  477 },
+    {  570,  539 }, {  601,  570 }, {  632,  601 }, {  694,  663 },
+    {  725,  694 }, {  756,  725 }, {  818,  787 }, {  849,  818 },
+    {  880,  849 }, {  942,  911 }, {  973,  942 }, { 1004,  973 },
+    {  478,  447 }, {  509,  478 }, {  602,  571 }, {  633,  602 },
+    {  726,  695 }, {  757,  726 }, {  850,  819 }, {  881,  850 },
+    {  974,  943 }, { 1005,  974 }, {  510,  479 }, {  634,  603 },
+    {  758,  727 }, {  882,  851 }, { 1006,  975 }, {  539,  508 },
+    {  663,  632 }, {  787,  756 }, {  911,  880 }, {  540,  509 },
+    {  571,  540 }, {  664,  633 }, {  695,  664 }, {  788,  757 },
+    {  819,  788 }, {  912,  881 }, {  943,  912 }, {  541,  510 },
+    {  572,  541 }, {  603,  572 }, {  665,  634 }, {  696,  665 },
+    {  727,  696 }, {  789,  758 }, {  820,  789 }, {  851,  820 },
+    {  913,  882 }, {  944,  913 }, {  975,  944 }, {  542,  511 },
+    {  573,  542 }, {  604,  573 }, {  635,  604 }, {  666,  635 },
+    {  697,  666 }, {  728,  697 }, {  759,  728 }, {  790,  759 },
+    {  821,  790 }, {  852,  821 }, {  883,  852 }, {  914,  883 },
+    {  945,  914 }, {  976,  945 }, { 1007,  976 }, {  574,  543 },
+    {  605,  574 }, {  636,  605 }, {  698,  667 }, {  729,  698 },
+    {  760,  729 }, {  822,  791 }, {  853,  822 }, {  884,  853 },
+    {  946,  915 }, {  977,  946 }, { 1008,  977 }, {  606,  575 },
+    {  637,  606 }, {  730,  699 }, {  761,  730 }, {  854,  823 },
+    {  885,  854 }, {  978,  947 }, { 1009,  978 }, {  638,  607 },
+    {  762,  731 }, {  886,  855 }, { 1010,  979 }, {  667,  636 },
+    {  791,  760 }, {  915,  884 }, {  668,  637 }, {  699,  668 },
+    {  792,  761 }, {  823,  792 }, {  916,  885 }, {  947,  916 },
+    {  669,  638 }, {  700,  669 }, {  731,  700 }, {  793,  762 },
+    {  824,  793 }, {  855,  824 }, {  917,  886 }, {  948,  917 },
+    {  979,  948 }, {  670,  639 }, {  701,  670 }, {  732,  701 },
+    {  763,  732 }, {  794,  763 }, {  825,  794 }, {  856,  825 },
+    {  887,  856 }, {  918,  887 }, {  949,  918 }, {  980,  949 },
+    { 1011,  980 }, {  702,  671 }, {  733,  702 }, {  764,  733 },
+    {  826,  795 }, {  857,  826 }, {  888,  857 }, {  950,  919 },
+    {  981,  950 }, { 1012,  981 }, {  734,  703 }, {  765,  734 },
+    {  858,  827 }, {  889,  858 }, {  982,  951 }, { 1013,  982 },
+    {  766,  735 }, {  890,  859 }, { 1014,  983 }, {  795,  764 },
+    {  919,  888 }, {  796,  765 }, {  827,  796 }, {  920,  889 },
+    {  951,  920 }, {  797,  766 }, {  828,  797 }, {  859,  828 },
+    {  921,  890 }, {  952,  921 }, {  983,  952 }, {  798,  767 },
+    {  829,  798 }, {  860,  829 }, {  891,  860 }, {  922,  891 },
+    {  953,  922 }, {  984,  953 }, { 1015,  984 }, {  830,  799 },
+    {  861,  830 }, {  892,  861 }, {  954,  923 }, {  985,  954 },
+    { 1016,  985 }, {  862,  831 }, {  893,  862 }, {  986,  955 },
+    { 1017,  986 }, {  894,  863 }, { 1018,  987 }, {  923,  892 },
+    {  924,  893 }, {  955,  924 }, {  925,  894 }, {  956,  925 },
+    {  987,  956 }, {  926,  895 }, {  957,  926 }, {  988,  957 },
+    { 1019,  988 }, {  958,  927 }, {  989,  958 }, { 1020,  989 },
+    {  990,  959 }, { 1021,  990 }, { 1022,  991 }, {    0,    0 },
+};
+
+static const int16_t (* const vp9_scans_nb[5][4])[2] = {
+    {
+        vp9_default_scan_4x4_nb, vp9_col_scan_4x4_nb,
+        vp9_row_scan_4x4_nb, vp9_default_scan_4x4_nb
+    }, {
+        vp9_default_scan_8x8_nb, vp9_col_scan_8x8_nb,
+        vp9_row_scan_8x8_nb, vp9_default_scan_8x8_nb
+    }, {
+        vp9_default_scan_16x16_nb, vp9_col_scan_16x16_nb,
+        vp9_row_scan_16x16_nb, vp9_default_scan_16x16_nb
+    }, {
+        vp9_default_scan_32x32_nb, vp9_default_scan_32x32_nb,
+        vp9_default_scan_32x32_nb, vp9_default_scan_32x32_nb
+    }, { // lossless
+        vp9_default_scan_4x4_nb, vp9_default_scan_4x4_nb,
+        vp9_default_scan_4x4_nb, vp9_default_scan_4x4_nb
+    }
+};
+
+static const uint8_t vp9_model_pareto8[256][8] = {
+    {   6,  86, 128,  11,  87,  42,  91,  52 },
+    {   3,  86, 128,   6,  86,  23,  88,  29 },
+    {   6,  86, 128,  11,  87,  42,  91,  52 },
+    {   9,  86, 129,  17,  88,  61,  94,  76 },
+    {  12,  86, 129,  22,  88,  77,  97,  93 },
+    {  15,  87, 129,  28,  89,  93, 100, 110 },
+    {  17,  87, 129,  33,  90, 105, 103, 123 },
+    {  20,  88, 130,  38,  91, 118, 106, 136 },
+    {  23,  88, 130,  43,  91, 128, 108, 146 },
+    {  26,  89, 131,  48,  92, 139, 111, 156 },
+    {  28,  89, 131,  53,  93, 147, 114, 163 },
+    {  31,  90, 131,  58,  94, 156, 117, 171 },
+    {  34,  90, 131,  62,  94, 163, 119, 177 },
+    {  37,  90, 132,  66,  95, 171, 122, 184 },
+    {  39,  90, 132,  70,  96, 177, 124, 189 },
+    {  42,  91, 132,  75,  97, 183, 127, 194 },
+    {  44,  91, 132,  79,  97, 188, 129, 198 },
+    {  47,  92, 133,  83,  98, 193, 132, 202 },
+    {  49,  92, 133,  86,  99, 197, 134, 205 },
+    {  52,  93, 133,  90, 100, 201, 137, 208 },
+    {  54,  93, 133,  94, 100, 204, 139, 211 },
+    {  57,  94, 134,  98, 101, 208, 142, 214 },
+    {  59,  94, 134, 101, 102, 211, 144, 216 },
+    {  62,  94, 135, 105, 103, 214, 146, 218 },
+    {  64,  94, 135, 108, 103, 216, 148, 220 },
+    {  66,  95, 135, 111, 104, 219, 151, 222 },
+    {  68,  95, 135, 114, 105, 221, 153, 223 },
+    {  71,  96, 136, 117, 106, 224, 155, 225 },
+    {  73,  96, 136, 120, 106, 225, 157, 226 },
+    {  76,  97, 136, 123, 107, 227, 159, 228 },
+    {  78,  97, 136, 126, 108, 229, 160, 229 },
+    {  80,  98, 137, 129, 109, 231, 162, 231 },
+    {  82,  98, 137, 131, 109, 232, 164, 232 },
+    {  84,  98, 138, 134, 110, 234, 166, 233 },
+    {  86,  98, 138, 137, 111, 235, 168, 234 },
+    {  89,  99, 138, 140, 112, 236, 170, 235 },
+    {  91,  99, 138, 142, 112, 237, 171, 235 },
+    {  93, 100, 139, 145, 113, 238, 173, 236 },
+    {  95, 100, 139, 147, 114, 239, 174, 237 },
+    {  97, 101, 140, 149, 115, 240, 176, 238 },
+    {  99, 101, 140, 151, 115, 241, 177, 238 },
+    { 101, 102, 140, 154, 116, 242, 179, 239 },
+    { 103, 102, 140, 156, 117, 242, 180, 239 },
+    { 105, 103, 141, 158, 118, 243, 182, 240 },
+    { 107, 103, 141, 160, 118, 243, 183, 240 },
+    { 109, 104, 141, 162, 119, 244, 185, 241 },
+    { 111, 104, 141, 164, 119, 244, 186, 241 },
+    { 113, 104, 142, 166, 120, 245, 187, 242 },
+    { 114, 104, 142, 168, 121, 245, 188, 242 },
+    { 116, 105, 143, 170, 122, 246, 190, 243 },
+    { 118, 105, 143, 171, 122, 246, 191, 243 },
+    { 120, 106, 143, 173, 123, 247, 192, 244 },
+    { 121, 106, 143, 175, 124, 247, 193, 244 },
+    { 123, 107, 144, 177, 125, 248, 195, 244 },
+    { 125, 107, 144, 178, 125, 248, 196, 244 },
+    { 127, 108, 145, 180, 126, 249, 197, 245 },
+    { 128, 108, 145, 181, 127, 249, 198, 245 },
+    { 130, 109, 145, 183, 128, 249, 199, 245 },
+    { 132, 109, 145, 184, 128, 249, 200, 245 },
+    { 134, 110, 146, 186, 129, 250, 201, 246 },
+    { 135, 110, 146, 187, 130, 250, 202, 246 },
+    { 137, 111, 147, 189, 131, 251, 203, 246 },
+    { 138, 111, 147, 190, 131, 251, 204, 246 },
+    { 140, 112, 147, 192, 132, 251, 205, 247 },
+    { 141, 112, 147, 193, 132, 251, 206, 247 },
+    { 143, 113, 148, 194, 133, 251, 207, 247 },
+    { 144, 113, 148, 195, 134, 251, 207, 247 },
+    { 146, 114, 149, 197, 135, 252, 208, 248 },
+    { 147, 114, 149, 198, 135, 252, 209, 248 },
+    { 149, 115, 149, 199, 136, 252, 210, 248 },
+    { 150, 115, 149, 200, 137, 252, 210, 248 },
+    { 152, 115, 150, 201, 138, 252, 211, 248 },
+    { 153, 115, 150, 202, 138, 252, 212, 248 },
+    { 155, 116, 151, 204, 139, 253, 213, 249 },
+    { 156, 116, 151, 205, 139, 253, 213, 249 },
+    { 158, 117, 151, 206, 140, 253, 214, 249 },
+    { 159, 117, 151, 207, 141, 253, 215, 249 },
+    { 161, 118, 152, 208, 142, 253, 216, 249 },
+    { 162, 118, 152, 209, 142, 253, 216, 249 },
+    { 163, 119, 153, 210, 143, 253, 217, 249 },
+    { 164, 119, 153, 211, 143, 253, 217, 249 },
+    { 166, 120, 153, 212, 144, 254, 218, 250 },
+    { 167, 120, 153, 212, 145, 254, 219, 250 },
+    { 168, 121, 154, 213, 146, 254, 220, 250 },
+    { 169, 121, 154, 214, 146, 254, 220, 250 },
+    { 171, 122, 155, 215, 147, 254, 221, 250 },
+    { 172, 122, 155, 216, 147, 254, 221, 250 },
+    { 173, 123, 155, 217, 148, 254, 222, 250 },
+    { 174, 123, 155, 217, 149, 254, 222, 250 },
+    { 176, 124, 156, 218, 150, 254, 223, 250 },
+    { 177, 124, 156, 219, 150, 254, 223, 250 },
+    { 178, 125, 157, 220, 151, 254, 224, 251 },
+    { 179, 125, 157, 220, 151, 254, 224, 251 },
+    { 180, 126, 157, 221, 152, 254, 225, 251 },
+    { 181, 126, 157, 221, 152, 254, 225, 251 },
+    { 183, 127, 158, 222, 153, 254, 226, 251 },
+    { 184, 127, 158, 223, 154, 254, 226, 251 },
+    { 185, 128, 159, 224, 155, 255, 227, 251 },
+    { 186, 128, 159, 224, 155, 255, 227, 251 },
+    { 187, 129, 160, 225, 156, 255, 228, 251 },
+    { 188, 130, 160, 225, 156, 255, 228, 251 },
+    { 189, 131, 160, 226, 157, 255, 228, 251 },
+    { 190, 131, 160, 226, 158, 255, 228, 251 },
+    { 191, 132, 161, 227, 159, 255, 229, 251 },
+    { 192, 132, 161, 227, 159, 255, 229, 251 },
+    { 193, 133, 162, 228, 160, 255, 230, 252 },
+    { 194, 133, 162, 229, 160, 255, 230, 252 },
+    { 195, 134, 163, 230, 161, 255, 231, 252 },
+    { 196, 134, 163, 230, 161, 255, 231, 252 },
+    { 197, 135, 163, 231, 162, 255, 231, 252 },
+    { 198, 135, 163, 231, 162, 255, 231, 252 },
+    { 199, 136, 164, 232, 163, 255, 232, 252 },
+    { 200, 136, 164, 232, 164, 255, 232, 252 },
+    { 201, 137, 165, 233, 165, 255, 233, 252 },
+    { 201, 137, 165, 233, 165, 255, 233, 252 },
+    { 202, 138, 166, 233, 166, 255, 233, 252 },
+    { 203, 138, 166, 233, 166, 255, 233, 252 },
+    { 204, 139, 166, 234, 167, 255, 234, 252 },
+    { 205, 139, 166, 234, 167, 255, 234, 252 },
+    { 206, 140, 167, 235, 168, 255, 235, 252 },
+    { 206, 140, 167, 235, 168, 255, 235, 252 },
+    { 207, 141, 168, 236, 169, 255, 235, 252 },
+    { 208, 141, 168, 236, 170, 255, 235, 252 },
+    { 209, 142, 169, 237, 171, 255, 236, 252 },
+    { 209, 143, 169, 237, 171, 255, 236, 252 },
+    { 210, 144, 169, 237, 172, 255, 236, 252 },
+    { 211, 144, 169, 237, 172, 255, 236, 252 },
+    { 212, 145, 170, 238, 173, 255, 237, 252 },
+    { 213, 145, 170, 238, 173, 255, 237, 252 },
+    { 214, 146, 171, 239, 174, 255, 237, 253 },
+    { 214, 146, 171, 239, 174, 255, 237, 253 },
+    { 215, 147, 172, 240, 175, 255, 238, 253 },
+    { 215, 147, 172, 240, 175, 255, 238, 253 },
+    { 216, 148, 173, 240, 176, 255, 238, 253 },
+    { 217, 148, 173, 240, 176, 255, 238, 253 },
+    { 218, 149, 173, 241, 177, 255, 239, 253 },
+    { 218, 149, 173, 241, 178, 255, 239, 253 },
+    { 219, 150, 174, 241, 179, 255, 239, 253 },
+    { 219, 151, 174, 241, 179, 255, 239, 253 },
+    { 220, 152, 175, 242, 180, 255, 240, 253 },
+    { 221, 152, 175, 242, 180, 255, 240, 253 },
+    { 222, 153, 176, 242, 181, 255, 240, 253 },
+    { 222, 153, 176, 242, 181, 255, 240, 253 },
+    { 223, 154, 177, 243, 182, 255, 240, 253 },
+    { 223, 154, 177, 243, 182, 255, 240, 253 },
+    { 224, 155, 178, 244, 183, 255, 241, 253 },
+    { 224, 155, 178, 244, 183, 255, 241, 253 },
+    { 225, 156, 178, 244, 184, 255, 241, 253 },
+    { 225, 157, 178, 244, 184, 255, 241, 253 },
+    { 226, 158, 179, 244, 185, 255, 242, 253 },
+    { 227, 158, 179, 244, 185, 255, 242, 253 },
+    { 228, 159, 180, 245, 186, 255, 242, 253 },
+    { 228, 159, 180, 245, 186, 255, 242, 253 },
+    { 229, 160, 181, 245, 187, 255, 242, 253 },
+    { 229, 160, 181, 245, 187, 255, 242, 253 },
+    { 230, 161, 182, 246, 188, 255, 243, 253 },
+    { 230, 162, 182, 246, 188, 255, 243, 253 },
+    { 231, 163, 183, 246, 189, 255, 243, 253 },
+    { 231, 163, 183, 246, 189, 255, 243, 253 },
+    { 232, 164, 184, 247, 190, 255, 243, 253 },
+    { 232, 164, 184, 247, 190, 255, 243, 253 },
+    { 233, 165, 185, 247, 191, 255, 244, 253 },
+    { 233, 165, 185, 247, 191, 255, 244, 253 },
+    { 234, 166, 185, 247, 192, 255, 244, 253 },
+    { 234, 167, 185, 247, 192, 255, 244, 253 },
+    { 235, 168, 186, 248, 193, 255, 244, 253 },
+    { 235, 168, 186, 248, 193, 255, 244, 253 },
+    { 236, 169, 187, 248, 194, 255, 244, 253 },
+    { 236, 169, 187, 248, 194, 255, 244, 253 },
+    { 236, 170, 188, 248, 195, 255, 245, 253 },
+    { 236, 170, 188, 248, 195, 255, 245, 253 },
+    { 237, 171, 189, 249, 196, 255, 245, 254 },
+    { 237, 172, 189, 249, 196, 255, 245, 254 },
+    { 238, 173, 190, 249, 197, 255, 245, 254 },
+    { 238, 173, 190, 249, 197, 255, 245, 254 },
+    { 239, 174, 191, 249, 198, 255, 245, 254 },
+    { 239, 174, 191, 249, 198, 255, 245, 254 },
+    { 240, 175, 192, 249, 199, 255, 246, 254 },
+    { 240, 176, 192, 249, 199, 255, 246, 254 },
+    { 240, 177, 193, 250, 200, 255, 246, 254 },
+    { 240, 177, 193, 250, 200, 255, 246, 254 },
+    { 241, 178, 194, 250, 201, 255, 246, 254 },
+    { 241, 178, 194, 250, 201, 255, 246, 254 },
+    { 242, 179, 195, 250, 202, 255, 246, 254 },
+    { 242, 180, 195, 250, 202, 255, 246, 254 },
+    { 242, 181, 196, 250, 203, 255, 247, 254 },
+    { 242, 181, 196, 250, 203, 255, 247, 254 },
+    { 243, 182, 197, 251, 204, 255, 247, 254 },
+    { 243, 183, 197, 251, 204, 255, 247, 254 },
+    { 244, 184, 198, 251, 205, 255, 247, 254 },
+    { 244, 184, 198, 251, 205, 255, 247, 254 },
+    { 244, 185, 199, 251, 206, 255, 247, 254 },
+    { 244, 185, 199, 251, 206, 255, 247, 254 },
+    { 245, 186, 200, 251, 207, 255, 247, 254 },
+    { 245, 187, 200, 251, 207, 255, 247, 254 },
+    { 246, 188, 201, 252, 207, 255, 248, 254 },
+    { 246, 188, 201, 252, 207, 255, 248, 254 },
+    { 246, 189, 202, 252, 208, 255, 248, 254 },
+    { 246, 190, 202, 252, 208, 255, 248, 254 },
+    { 247, 191, 203, 252, 209, 255, 248, 254 },
+    { 247, 191, 203, 252, 209, 255, 248, 254 },
+    { 247, 192, 204, 252, 210, 255, 248, 254 },
+    { 247, 193, 204, 252, 210, 255, 248, 254 },
+    { 248, 194, 205, 252, 211, 255, 248, 254 },
+    { 248, 194, 205, 252, 211, 255, 248, 254 },
+    { 248, 195, 206, 252, 212, 255, 249, 254 },
+    { 248, 196, 206, 252, 212, 255, 249, 254 },
+    { 249, 197, 207, 253, 213, 255, 249, 254 },
+    { 249, 197, 207, 253, 213, 255, 249, 254 },
+    { 249, 198, 208, 253, 214, 255, 249, 254 },
+    { 249, 199, 209, 253, 214, 255, 249, 254 },
+    { 250, 200, 210, 253, 215, 255, 249, 254 },
+    { 250, 200, 210, 253, 215, 255, 249, 254 },
+    { 250, 201, 211, 253, 215, 255, 249, 254 },
+    { 250, 202, 211, 253, 215, 255, 249, 254 },
+    { 250, 203, 212, 253, 216, 255, 249, 254 },
+    { 250, 203, 212, 253, 216, 255, 249, 254 },
+    { 251, 204, 213, 253, 217, 255, 250, 254 },
+    { 251, 205, 213, 253, 217, 255, 250, 254 },
+    { 251, 206, 214, 254, 218, 255, 250, 254 },
+    { 251, 206, 215, 254, 218, 255, 250, 254 },
+    { 252, 207, 216, 254, 219, 255, 250, 254 },
+    { 252, 208, 216, 254, 219, 255, 250, 254 },
+    { 252, 209, 217, 254, 220, 255, 250, 254 },
+    { 252, 210, 217, 254, 220, 255, 250, 254 },
+    { 252, 211, 218, 254, 221, 255, 250, 254 },
+    { 252, 212, 218, 254, 221, 255, 250, 254 },
+    { 253, 213, 219, 254, 222, 255, 250, 254 },
+    { 253, 213, 220, 254, 222, 255, 250, 254 },
+    { 253, 214, 221, 254, 223, 255, 250, 254 },
+    { 253, 215, 221, 254, 223, 255, 250, 254 },
+    { 253, 216, 222, 254, 224, 255, 251, 254 },
+    { 253, 217, 223, 254, 224, 255, 251, 254 },
+    { 253, 218, 224, 254, 225, 255, 251, 254 },
+    { 253, 219, 224, 254, 225, 255, 251, 254 },
+    { 254, 220, 225, 254, 225, 255, 251, 254 },
+    { 254, 221, 226, 254, 225, 255, 251, 254 },
+    { 254, 222, 227, 255, 226, 255, 251, 254 },
+    { 254, 223, 227, 255, 226, 255, 251, 254 },
+    { 254, 224, 228, 255, 227, 255, 251, 254 },
+    { 254, 225, 229, 255, 227, 255, 251, 254 },
+    { 254, 226, 230, 255, 228, 255, 251, 254 },
+    { 254, 227, 230, 255, 229, 255, 251, 254 },
+    { 255, 228, 231, 255, 230, 255, 251, 254 },
+    { 255, 229, 232, 255, 230, 255, 251, 254 },
+    { 255, 230, 233, 255, 231, 255, 252, 254 },
+    { 255, 231, 234, 255, 231, 255, 252, 254 },
+    { 255, 232, 235, 255, 232, 255, 252, 254 },
+    { 255, 233, 236, 255, 232, 255, 252, 254 },
+    { 255, 235, 237, 255, 233, 255, 252, 254 },
+    { 255, 236, 238, 255, 234, 255, 252, 254 },
+    { 255, 238, 240, 255, 235, 255, 252, 255 },
+    { 255, 239, 241, 255, 235, 255, 252, 254 },
+    { 255, 241, 243, 255, 236, 255, 252, 254 },
+    { 255, 243, 245, 255, 237, 255, 252, 254 },
+    { 255, 246, 247, 255, 239, 255, 253, 255 },
+};
+
+typedef struct {
+    uint8_t y_mode[4][9];
+    uint8_t uv_mode[10][9];
+    uint8_t filter[4][2];
+    uint8_t mv_mode[7][3];
+    uint8_t intra[4];
+    uint8_t comp[5];
+    uint8_t single_ref[5][2];
+    uint8_t comp_ref[5];
+    uint8_t tx32p[2][3];
+    uint8_t tx16p[2][2];
+    uint8_t tx8p[2];
+    uint8_t skip[3];
+    uint8_t mv_joint[3];
+    struct {
+        uint8_t sign;
+        uint8_t classes[10];
+        uint8_t class0;
+        uint8_t bits[10];
+        uint8_t class0_fp[2][3];
+        uint8_t fp[3];
+        uint8_t class0_hp;
+        uint8_t hp;
+    } mv_comp[2];
+    uint8_t partition[4][4][3];
+} prob_context;
+
+static const prob_context vp9_default_probs = {
+    { /* y_mode */
+        {  65,  32,  18, 144, 162, 194,  41,  51,  98 } /* bsize < 8x8 */,
+        { 132,  68,  18, 165, 217, 196,  45,  40,  78 } /* bsize < 16x16 */,
+        { 173,  80,  19, 176, 240, 193,  64,  35,  46 } /* bsize < 32x32 */,
+        { 221, 135,  38, 194, 248, 121,  96,  85,  29 } /* bsize >= 32x32 */
+    }, { /* uv_mode */
+        {  48,  12, 154, 155, 139,  90,  34, 117, 119 } /* y = v */,
+        {  67,   6,  25, 204, 243, 158,  13,  21,  96 } /* y = h */,
+        { 120,   7,  76, 176, 208, 126,  28,  54, 103 } /* y = dc */,
+        {  97,   5,  44, 131, 176, 139,  48,  68,  97 } /* y = d45 */,
+        {  83,   5,  42, 156, 111, 152,  26,  49, 152 } /* y = d135 */,
+        {  80,   5,  58, 178,  74,  83,  33,  62, 145 } /* y = d117 */,
+        {  86,   5,  32, 154, 192, 168,  14,  22, 163 } /* y = d153 */,
+        {  77,   7,  64, 116, 132, 122,  37, 126, 120 } /* y = d63 */,
+        {  85,   5,  32, 156, 216, 148,  19,  29,  73 } /* y = d27 */,
+        { 101,  21, 107, 181, 192, 103,  19,  67, 125 } /* y = tm */
+    }, { /* filter */
+        { 235, 162, },
+        {  36, 255, },
+        {  34,   3, },
+        { 149, 144, },
+    }, { /* mv_mode */
+        {  2, 173,  34},  // 0 = both zero mv
+        {  7, 145,  85},  // 1 = one zero mv + one a predicted mv
+        {  7, 166,  63},  // 2 = two predicted mvs
+        {  7,  94,  66},  // 3 = one predicted/zero and one new mv
+        {  8,  64,  46},  // 4 = two new mvs
+        { 17,  81,  31},  // 5 = one intra neighbour + x
+        { 25,  29,  30},  // 6 = two intra neighbours
+    }, { /* intra */
+        9, 102, 187, 225
+    }, { /* comp */
+        239, 183, 119,  96,  41
+    }, { /* single_ref */
+        {  33,  16 },
+        {  77,  74 },
+        { 142, 142 },
+        { 172, 170 },
+        { 238, 247 }
+    }, { /* comp_ref */
+        50, 126, 123, 221, 226
+    }, { /* tx32p */
+        { 3, 136, 37, },
+        { 5,  52, 13, },
+    }, { /* tx16p */
+        { 20, 152, },
+        { 15, 101, },
+    }, { /* tx8p */
+        100, 66
+    }, { /* skip */
+        192, 128, 64
+    }, { /* mv_joint */
+        32, 64, 96
+    }, {
+        { /* mv vertical component */
+            128, /* sign */
+            { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, /* class */
+            216, /* class0 */
+            { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, /* bits */
+            { /* class0_fp */
+                { 128, 128, 64 },
+                {  96, 112, 64 }
+            },
+            { 64, 96, 64 }, /* fp */
+            160, /* class0_hp bit */
+            128, /* hp */
+        }, { /* mv horizontal component */
+            128, /* sign */
+            { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, /* class */
+            208, /* class0 */
+            { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, /* bits */
+            { /* class0_fp */
+                { 128, 128, 64 },
+                {  96, 112, 64 }
+            },
+            { 64, 96, 64 }, /* fp */
+            160, /* class0_hp bit */
+            128, /* hp */
+        }
+    }, { /* partition */
+        { /* 64x64 -> 32x32 */
+            { 222,  34,  30 } /* a/l both not split */,
+            {  72,  16,  44 } /* a split, l not split */,
+            {  58,  32,  12 } /* l split, a not split */,
+            {  10,   7,   6 } /* a/l both split */,
+        }, { /* 32x32 -> 16x16 */
+            { 177,  58,  59 } /* a/l both not split */,
+            {  68,  26,  63 } /* a split, l not split */,
+            {  52,  79,  25 } /* l split, a not split */,
+            {  17,  14,  12 } /* a/l both split */,
+        }, { /* 16x16 -> 8x8 */
+            { 174,  73,  87 } /* a/l both not split */,
+            {  92,  41,  83 } /* a split, l not split */,
+            {  82,  99,  50 } /* l split, a not split */,
+            {  53,  39,  39 } /* a/l both split */,
+        }, { /* 8x8 -> 4x4 */
+            { 199, 122, 141 } /* a/l both not split */,
+            { 147,  63, 159 } /* a split, l not split */,
+            { 148, 133, 118 } /* l split, a not split */,
+            { 121, 104, 114 } /* a/l both split */,
+        }
+    },
+};
+
+static const uint8_t vp9_default_coef_probs[4][2][2][6][6][3] = {
+    { /* tx = 4x4 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 195,  29, 183 },
+                    {  84,  49, 136 },
+                    {   8,  42,  71 }
+                }, { /* Coeff Band 1 */
+                    {  31, 107, 169 },
+                    {  35,  99, 159 },
+                    {  17,  82, 140 },
+                    {   8,  66, 114 },
+                    {   2,  44,  76 },
+                    {   1,  19,  32 }
+                }, { /* Coeff Band 2 */
+                    {  40, 132, 201 },
+                    {  29, 114, 187 },
+                    {  13,  91, 157 },
+                    {   7,  75, 127 },
+                    {   3,  58,  95 },
+                    {   1,  28,  47 }
+                }, { /* Coeff Band 3 */
+                    {  69, 142, 221 },
+                    {  42, 122, 201 },
+                    {  15,  91, 159 },
+                    {   6,  67, 121 },
+                    {   1,  42,  77 },
+                    {   1,  17,  31 }
+                }, { /* Coeff Band 4 */
+                    { 102, 148, 228 },
+                    {  67, 117, 204 },
+                    {  17,  82, 154 },
+                    {   6,  59, 114 },
+                    {   2,  39,  75 },
+                    {   1,  15,  29 }
+                }, { /* Coeff Band 5 */
+                    { 156,  57, 233 },
+                    { 119,  57, 212 },
+                    {  58,  48, 163 },
+                    {  29,  40, 124 },
+                    {  12,  30,  81 },
+                    {   3,  12,  31 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 191, 107, 226 },
+                    { 124, 117, 204 },
+                    {  25,  99, 155 }
+                }, { /* Coeff Band 1 */
+                    {  29, 148, 210 },
+                    {  37, 126, 194 },
+                    {   8,  93, 157 },
+                    {   2,  68, 118 },
+                    {   1,  39,  69 },
+                    {   1,  17,  33 }
+                }, { /* Coeff Band 2 */
+                    {  41, 151, 213 },
+                    {  27, 123, 193 },
+                    {   3,  82, 144 },
+                    {   1,  58, 105 },
+                    {   1,  32,  60 },
+                    {   1,  13,  26 }
+                }, { /* Coeff Band 3 */
+                    {  59, 159, 220 },
+                    {  23, 126, 198 },
+                    {   4,  88, 151 },
+                    {   1,  66, 114 },
+                    {   1,  38,  71 },
+                    {   1,  18,  34 }
+                }, { /* Coeff Band 4 */
+                    { 114, 136, 232 },
+                    {  51, 114, 207 },
+                    {  11,  83, 155 },
+                    {   3,  56, 105 },
+                    {   1,  33,  65 },
+                    {   1,  17,  34 }
+                }, { /* Coeff Band 5 */
+                    { 149,  65, 234 },
+                    { 121,  57, 215 },
+                    {  61,  49, 166 },
+                    {  28,  36, 114 },
+                    {  12,  25,  76 },
+                    {   3,  16,  42 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 214,  49, 220 },
+                    { 132,  63, 188 },
+                    {  42,  65, 137 }
+                }, { /* Coeff Band 1 */
+                    {  85, 137, 221 },
+                    { 104, 131, 216 },
+                    {  49, 111, 192 },
+                    {  21,  87, 155 },
+                    {   2,  49,  87 },
+                    {   1,  16,  28 }
+                }, { /* Coeff Band 2 */
+                    {  89, 163, 230 },
+                    {  90, 137, 220 },
+                    {  29, 100, 183 },
+                    {  10,  70, 135 },
+                    {   2,  42,  81 },
+                    {   1,  17,  33 }
+                }, { /* Coeff Band 3 */
+                    { 108, 167, 237 },
+                    {  55, 133, 222 },
+                    {  15,  97, 179 },
+                    {   4,  72, 135 },
+                    {   1,  45,  85 },
+                    {   1,  19,  38 }
+                }, { /* Coeff Band 4 */
+                    { 124, 146, 240 },
+                    {  66, 124, 224 },
+                    {  17,  88, 175 },
+                    {   4,  58, 122 },
+                    {   1,  36,  75 },
+                    {   1,  18,  37 }
+                }, { /* Coeff Band 5 */
+                    { 141,  79, 241 },
+                    { 126,  70, 227 },
+                    {  66,  58, 182 },
+                    {  30,  44, 136 },
+                    {  12,  34,  96 },
+                    {   2,  20,  47 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 229,  99, 249 },
+                    { 143, 111, 235 },
+                    {  46, 109, 192 }
+                }, { /* Coeff Band 1 */
+                    {  82, 158, 236 },
+                    {  94, 146, 224 },
+                    {  25, 117, 191 },
+                    {   9,  87, 149 },
+                    {   3,  56,  99 },
+                    {   1,  33,  57 }
+                }, { /* Coeff Band 2 */
+                    {  83, 167, 237 },
+                    {  68, 145, 222 },
+                    {  10, 103, 177 },
+                    {   2,  72, 131 },
+                    {   1,  41,  79 },
+                    {   1,  20,  39 }
+                }, { /* Coeff Band 3 */
+                    {  99, 167, 239 },
+                    {  47, 141, 224 },
+                    {  10, 104, 178 },
+                    {   2,  73, 133 },
+                    {   1,  44,  85 },
+                    {   1,  22,  47 }
+                }, { /* Coeff Band 4 */
+                    { 127, 145, 243 },
+                    {  71, 129, 228 },
+                    {  17,  93, 177 },
+                    {   3,  61, 124 },
+                    {   1,  41,  84 },
+                    {   1,  21,  52 }
+                }, { /* Coeff Band 5 */
+                    { 157,  78, 244 },
+                    { 140,  72, 231 },
+                    {  69,  58, 184 },
+                    {  31,  44, 137 },
+                    {  14,  38, 105 },
+                    {   8,  23,  61 }
+                }
+            }
+        }
+    }, { /* tx = 8x8 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 125,  34, 187 },
+                    {  52,  41, 133 },
+                    {   6,  31,  56 }
+                }, { /* Coeff Band 1 */
+                    {  37, 109, 153 },
+                    {  51, 102, 147 },
+                    {  23,  87, 128 },
+                    {   8,  67, 101 },
+                    {   1,  41,  63 },
+                    {   1,  19,  29 }
+                }, { /* Coeff Band 2 */
+                    {  31, 154, 185 },
+                    {  17, 127, 175 },
+                    {   6,  96, 145 },
+                    {   2,  73, 114 },
+                    {   1,  51,  82 },
+                    {   1,  28,  45 }
+                }, { /* Coeff Band 3 */
+                    {  23, 163, 200 },
+                    {  10, 131, 185 },
+                    {   2,  93, 148 },
+                    {   1,  67, 111 },
+                    {   1,  41,  69 },
+                    {   1,  14,  24 }
+                }, { /* Coeff Band 4 */
+                    {  29, 176, 217 },
+                    {  12, 145, 201 },
+                    {   3, 101, 156 },
+                    {   1,  69, 111 },
+                    {   1,  39,  63 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 5 */
+                    {  57, 192, 233 },
+                    {  25, 154, 215 },
+                    {   6, 109, 167 },
+                    {   3,  78, 118 },
+                    {   1,  48,  69 },
+                    {   1,  21,  29 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 202, 105, 245 },
+                    { 108, 106, 216 },
+                    {  18,  90, 144 }
+                }, { /* Coeff Band 1 */
+                    {  33, 172, 219 },
+                    {  64, 149, 206 },
+                    {  14, 117, 177 },
+                    {   5,  90, 141 },
+                    {   2,  61,  95 },
+                    {   1,  37,  57 }
+                }, { /* Coeff Band 2 */
+                    {  33, 179, 220 },
+                    {  11, 140, 198 },
+                    {   1,  89, 148 },
+                    {   1,  60, 104 },
+                    {   1,  33,  57 },
+                    {   1,  12,  21 }
+                }, { /* Coeff Band 3 */
+                    {  30, 181, 221 },
+                    {   8, 141, 198 },
+                    {   1,  87, 145 },
+                    {   1,  58, 100 },
+                    {   1,  31,  55 },
+                    {   1,  12,  20 }
+                }, { /* Coeff Band 4 */
+                    {  32, 186, 224 },
+                    {   7, 142, 198 },
+                    {   1,  86, 143 },
+                    {   1,  58, 100 },
+                    {   1,  31,  55 },
+                    {   1,  12,  22 }
+                }, { /* Coeff Band 5 */
+                    {  57, 192, 227 },
+                    {  20, 143, 204 },
+                    {   3,  96, 154 },
+                    {   1,  68, 112 },
+                    {   1,  42,  69 },
+                    {   1,  19,  32 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 212,  35, 215 },
+                    { 113,  47, 169 },
+                    {  29,  48, 105 }
+                }, { /* Coeff Band 1 */
+                    {  74, 129, 203 },
+                    { 106, 120, 203 },
+                    {  49, 107, 178 },
+                    {  19,  84, 144 },
+                    {   4,  50,  84 },
+                    {   1,  15,  25 }
+                }, { /* Coeff Band 2 */
+                    {  71, 172, 217 },
+                    {  44, 141, 209 },
+                    {  15, 102, 173 },
+                    {   6,  76, 133 },
+                    {   2,  51,  89 },
+                    {   1,  24,  42 }
+                }, { /* Coeff Band 3 */
+                    {  64, 185, 231 },
+                    {  31, 148, 216 },
+                    {   8, 103, 175 },
+                    {   3,  74, 131 },
+                    {   1,  46,  81 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 4 */
+                    {  65, 196, 235 },
+                    {  25, 157, 221 },
+                    {   5, 105, 174 },
+                    {   1,  67, 120 },
+                    {   1,  38,  69 },
+                    {   1,  15,  30 }
+                }, { /* Coeff Band 5 */
+                    {  65, 204, 238 },
+                    {  30, 156, 224 },
+                    {   7, 107, 177 },
+                    {   2,  70, 124 },
+                    {   1,  42,  73 },
+                    {   1,  18,  34 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 225,  86, 251 },
+                    { 144, 104, 235 },
+                    {  42,  99, 181 }
+                }, { /* Coeff Band 1 */
+                    {  85, 175, 239 },
+                    { 112, 165, 229 },
+                    {  29, 136, 200 },
+                    {  12, 103, 162 },
+                    {   6,  77, 123 },
+                    {   2,  53,  84 }
+                }, { /* Coeff Band 2 */
+                    {  75, 183, 239 },
+                    {  30, 155, 221 },
+                    {   3, 106, 171 },
+                    {   1,  74, 128 },
+                    {   1,  44,  76 },
+                    {   1,  17,  28 }
+                }, { /* Coeff Band 3 */
+                    {  73, 185, 240 },
+                    {  27, 159, 222 },
+                    {   2, 107, 172 },
+                    {   1,  75, 127 },
+                    {   1,  42,  73 },
+                    {   1,  17,  29 }
+                }, { /* Coeff Band 4 */
+                    {  62, 190, 238 },
+                    {  21, 159, 222 },
+                    {   2, 107, 172 },
+                    {   1,  72, 122 },
+                    {   1,  40,  71 },
+                    {   1,  18,  32 }
+                }, { /* Coeff Band 5 */
+                    {  61, 199, 240 },
+                    {  27, 161, 226 },
+                    {   4, 113, 180 },
+                    {   1,  76, 129 },
+                    {   1,  46,  80 },
+                    {   1,  23,  41 }
+                }
+            }
+        }
+    }, { /* tx = 16x16 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    {   7,  27, 153 },
+                    {   5,  30,  95 },
+                    {   1,  16,  30 }
+                }, { /* Coeff Band 1 */
+                    {  50,  75, 127 },
+                    {  57,  75, 124 },
+                    {  27,  67, 108 },
+                    {  10,  54,  86 },
+                    {   1,  33,  52 },
+                    {   1,  12,  18 }
+                }, { /* Coeff Band 2 */
+                    {  43, 125, 151 },
+                    {  26, 108, 148 },
+                    {   7,  83, 122 },
+                    {   2,  59,  89 },
+                    {   1,  38,  60 },
+                    {   1,  17,  27 }
+                }, { /* Coeff Band 3 */
+                    {  23, 144, 163 },
+                    {  13, 112, 154 },
+                    {   2,  75, 117 },
+                    {   1,  50,  81 },
+                    {   1,  31,  51 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 4 */
+                    {  18, 162, 185 },
+                    {   6, 123, 171 },
+                    {   1,  78, 125 },
+                    {   1,  51,  86 },
+                    {   1,  31,  54 },
+                    {   1,  14,  23 }
+                }, { /* Coeff Band 5 */
+                    {  15, 199, 227 },
+                    {   3, 150, 204 },
+                    {   1,  91, 146 },
+                    {   1,  55,  95 },
+                    {   1,  30,  53 },
+                    {   1,  11,  20 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    {  19,  55, 240 },
+                    {  19,  59, 196 },
+                    {   3,  52, 105 }
+                }, { /* Coeff Band 1 */
+                    {  41, 166, 207 },
+                    { 104, 153, 199 },
+                    {  31, 123, 181 },
+                    {  14, 101, 152 },
+                    {   5,  72, 106 },
+                    {   1,  36,  52 }
+                }, { /* Coeff Band 2 */
+                    {  35, 176, 211 },
+                    {  12, 131, 190 },
+                    {   2,  88, 144 },
+                    {   1,  60, 101 },
+                    {   1,  36,  60 },
+                    {   1,  16,  28 }
+                }, { /* Coeff Band 3 */
+                    {  28, 183, 213 },
+                    {   8, 134, 191 },
+                    {   1,  86, 142 },
+                    {   1,  56,  96 },
+                    {   1,  30,  53 },
+                    {   1,  12,  20 }
+                }, { /* Coeff Band 4 */
+                    {  20, 190, 215 },
+                    {   4, 135, 192 },
+                    {   1,  84, 139 },
+                    {   1,  53,  91 },
+                    {   1,  28,  49 },
+                    {   1,  11,  20 }
+                }, { /* Coeff Band 5 */
+                    {  13, 196, 216 },
+                    {   2, 137, 192 },
+                    {   1,  86, 143 },
+                    {   1,  57,  99 },
+                    {   1,  32,  56 },
+                    {   1,  13,  24 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 211,  29, 217 },
+                    {  96,  47, 156 },
+                    {  22,  43,  87 }
+                }, { /* Coeff Band 1 */
+                    {  78, 120, 193 },
+                    { 111, 116, 186 },
+                    {  46, 102, 164 },
+                    {  15,  80, 128 },
+                    {   2,  49,  76 },
+                    {   1,  18,  28 }
+                }, { /* Coeff Band 2 */
+                    {  71, 161, 203 },
+                    {  42, 132, 192 },
+                    {  10,  98, 150 },
+                    {   3,  69, 109 },
+                    {   1,  44,  70 },
+                    {   1,  18,  29 }
+                }, { /* Coeff Band 3 */
+                    {  57, 186, 211 },
+                    {  30, 140, 196 },
+                    {   4,  93, 146 },
+                    {   1,  62, 102 },
+                    {   1,  38,  65 },
+                    {   1,  16,  27 }
+                }, { /* Coeff Band 4 */
+                    {  47, 199, 217 },
+                    {  14, 145, 196 },
+                    {   1,  88, 142 },
+                    {   1,  57,  98 },
+                    {   1,  36,  62 },
+                    {   1,  15,  26 }
+                }, { /* Coeff Band 5 */
+                    {  26, 219, 229 },
+                    {   5, 155, 207 },
+                    {   1,  94, 151 },
+                    {   1,  60, 104 },
+                    {   1,  36,  62 },
+                    {   1,  16,  28 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 233,  29, 248 },
+                    { 146,  47, 220 },
+                    {  43,  52, 140 }
+                }, { /* Coeff Band 1 */
+                    { 100, 163, 232 },
+                    { 179, 161, 222 },
+                    {  63, 142, 204 },
+                    {  37, 113, 174 },
+                    {  26,  89, 137 },
+                    {  18,  68,  97 }
+                }, { /* Coeff Band 2 */
+                    {  85, 181, 230 },
+                    {  32, 146, 209 },
+                    {   7, 100, 164 },
+                    {   3,  71, 121 },
+                    {   1,  45,  77 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 3 */
+                    {  65, 187, 230 },
+                    {  20, 148, 207 },
+                    {   2,  97, 159 },
+                    {   1,  68, 116 },
+                    {   1,  40,  70 },
+                    {   1,  14,  29 }
+                }, { /* Coeff Band 4 */
+                    {  40, 194, 227 },
+                    {   8, 147, 204 },
+                    {   1,  94, 155 },
+                    {   1,  65, 112 },
+                    {   1,  39,  66 },
+                    {   1,  14,  26 }
+                }, { /* Coeff Band 5 */
+                    {  16, 208, 228 },
+                    {   3, 151, 207 },
+                    {   1,  98, 160 },
+                    {   1,  67, 117 },
+                    {   1,  41,  74 },
+                    {   1,  17,  31 }
+                }
+            }
+        }
+    }, { /* tx = 32x32 */
+        { /* block Type 0 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    {  17,  38, 140 },
+                    {   7,  34,  80 },
+                    {   1,  17,  29 }
+                }, { /* Coeff Band 1 */
+                    {  37,  75, 128 },
+                    {  41,  76, 128 },
+                    {  26,  66, 116 },
+                    {  12,  52,  94 },
+                    {   2,  32,  55 },
+                    {   1,  10,  16 }
+                }, { /* Coeff Band 2 */
+                    {  50, 127, 154 },
+                    {  37, 109, 152 },
+                    {  16,  82, 121 },
+                    {   5,  59,  85 },
+                    {   1,  35,  54 },
+                    {   1,  13,  20 }
+                }, { /* Coeff Band 3 */
+                    {  40, 142, 167 },
+                    {  17, 110, 157 },
+                    {   2,  71, 112 },
+                    {   1,  44,  72 },
+                    {   1,  27,  45 },
+                    {   1,  11,  17 }
+                }, { /* Coeff Band 4 */
+                    {  30, 175, 188 },
+                    {   9, 124, 169 },
+                    {   1,  74, 116 },
+                    {   1,  48,  78 },
+                    {   1,  30,  49 },
+                    {   1,  11,  18 }
+                }, { /* Coeff Band 5 */
+                    {  10, 222, 223 },
+                    {   2, 150, 194 },
+                    {   1,  83, 128 },
+                    {   1,  48,  79 },
+                    {   1,  27,  45 },
+                    {   1,  11,  17 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    {  36,  41, 235 },
+                    {  29,  36, 193 },
+                    {  10,  27, 111 }
+                }, { /* Coeff Band 1 */
+                    {  85, 165, 222 },
+                    { 177, 162, 215 },
+                    { 110, 135, 195 },
+                    {  57, 113, 168 },
+                    {  23,  83, 120 },
+                    {  10,  49,  61 }
+                }, { /* Coeff Band 2 */
+                    {  85, 190, 223 },
+                    {  36, 139, 200 },
+                    {   5,  90, 146 },
+                    {   1,  60, 103 },
+                    {   1,  38,  65 },
+                    {   1,  18,  30 }
+                }, { /* Coeff Band 3 */
+                    {  72, 202, 223 },
+                    {  23, 141, 199 },
+                    {   2,  86, 140 },
+                    {   1,  56,  97 },
+                    {   1,  36,  61 },
+                    {   1,  16,  27 }
+                }, { /* Coeff Band 4 */
+                    {  55, 218, 225 },
+                    {  13, 145, 200 },
+                    {   1,  86, 141 },
+                    {   1,  57,  99 },
+                    {   1,  35,  61 },
+                    {   1,  13,  22 }
+                }, { /* Coeff Band 5 */
+                    {  15, 235, 212 },
+                    {   1, 132, 184 },
+                    {   1,  84, 139 },
+                    {   1,  57,  97 },
+                    {   1,  34,  56 },
+                    {   1,  14,  23 }
+                }
+            }
+        }, { /* block Type 1 */
+            { /* Intra */
+                { /* Coeff Band 0 */
+                    { 181,  21, 201 },
+                    {  61,  37, 123 },
+                    {  10,  38,  71 }
+                }, { /* Coeff Band 1 */
+                    {  47, 106, 172 },
+                    {  95, 104, 173 },
+                    {  42,  93, 159 },
+                    {  18,  77, 131 },
+                    {   4,  50,  81 },
+                    {   1,  17,  23 }
+                }, { /* Coeff Band 2 */
+                    {  62, 147, 199 },
+                    {  44, 130, 189 },
+                    {  28, 102, 154 },
+                    {  18,  75, 115 },
+                    {   2,  44,  65 },
+                    {   1,  12,  19 }
+                }, { /* Coeff Band 3 */
+                    {  55, 153, 210 },
+                    {  24, 130, 194 },
+                    {   3,  93, 146 },
+                    {   1,  61,  97 },
+                    {   1,  31,  50 },
+                    {   1,  10,  16 }
+                }, { /* Coeff Band 4 */
+                    {  49, 186, 223 },
+                    {  17, 148, 204 },
+                    {   1,  96, 142 },
+                    {   1,  53,  83 },
+                    {   1,  26,  44 },
+                    {   1,  11,  17 }
+                }, { /* Coeff Band 5 */
+                    {  13, 217, 212 },
+                    {   2, 136, 180 },
+                    {   1,  78, 124 },
+                    {   1,  50,  83 },
+                    {   1,  29,  49 },
+                    {   1,  14,  23 }
+                }
+            }, { /* Inter */
+                { /* Coeff Band 0 */
+                    { 197,  13, 247 },
+                    {  82,  17, 222 },
+                    {  25,  17, 162 }
+                }, { /* Coeff Band 1 */
+                    { 126, 186, 247 },
+                    { 234, 191, 243 },
+                    { 176, 177, 234 },
+                    { 104, 158, 220 },
+                    {  66, 128, 186 },
+                    {  55,  90, 137 }
+                }, { /* Coeff Band 2 */
+                    { 111, 197, 242 },
+                    {  46, 158, 219 },
+                    {   9, 104, 171 },
+                    {   2,  65, 125 },
+                    {   1,  44,  80 },
+                    {   1,  17,  91 }
+                }, { /* Coeff Band 3 */
+                    { 104, 208, 245 },
+                    {  39, 168, 224 },
+                    {   3, 109, 162 },
+                    {   1,  79, 124 },
+                    {   1,  50, 102 },
+                    {   1,  43, 102 }
+                }, { /* Coeff Band 4 */
+                    {  84, 220, 246 },
+                    {  31, 177, 231 },
+                    {   2, 115, 180 },
+                    {   1,  79, 134 },
+                    {   1,  55,  77 },
+                    {   1,  60,  79 }
+                }, { /* Coeff Band 5 */
+                    {  43, 243, 240 },
+                    {   8, 180, 217 },
+                    {   1, 115, 166 },
+                    {   1,  84, 121 },
+                    {   1,  51,  67 },
+                    {   1,  16,   6 }
+                }
+            }
+        }
+    }
+};
+
+enum MVJoint {
+    MV_JOINT_ZERO,
+    MV_JOINT_H,
+    MV_JOINT_V,
+    MV_JOINT_HV,
+};
+
+static const int8_t vp9_mv_joint_tree[3][2] = {
+    { -MV_JOINT_ZERO, 1 },           // '0'
+     { -MV_JOINT_H, 2 },             // '10'
+      { -MV_JOINT_V, -MV_JOINT_HV }, // '11x'
+};
+
+static const int8_t vp9_mv_class_tree[10][2] = {
+    { -0, 1 },         // '0'
+     { -1, 2 },        // '10'
+      { 3, 4 },
+       { -2, -3 },     // '110x'
+       { 5, 6 },
+        { -4, -5 },    // '1110x'
+        { -6, 7 },     // '11110'
+         { 8, 9 },
+          { -7, -8 },  // '111110x'
+          { -9, -10 }, // '111111x'
+};
+
+static const int8_t vp9_mv_fp_tree[3][2] = {
+    { -0, 1 },    // '0'
+     { -1, 2 },   // '10'
+      { -2, -3 }, // '11x'
+};
 
 #endif /* AVCODEC_VP9DATA_H */
diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c
index c83defe..6356add 100644
--- a/libavcodec/vp9dsp.c
+++ b/libavcodec/vp9dsp.c
@@ -4,28 +4,27 @@
  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
  * Copyright (C) 2013 Clément Bœsch <u pkh me>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
-
+#include "vp9dsp.h"
 #include "rnd_avg.h"
-#include "vp9.h"
 
 // FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8)
 // back with h264pred.[ch]
@@ -85,10 +84,10 @@ static void vert_32x32_c(uint8_t *dst, ptrdiff_t stride,
 static void hor_4x4_c(uint8_t *dst, ptrdiff_t stride,
                       const uint8_t *left, const uint8_t *top)
 {
-    AV_WN32A(dst + stride * 0, left[0] * 0x01010101U);
-    AV_WN32A(dst + stride * 1, left[1] * 0x01010101U);
-    AV_WN32A(dst + stride * 2, left[2] * 0x01010101U);
-    AV_WN32A(dst + stride * 3, left[3] * 0x01010101U);
+    AV_WN32A(dst + stride * 0, left[3] * 0x01010101U);
+    AV_WN32A(dst + stride * 1, left[2] * 0x01010101U);
+    AV_WN32A(dst + stride * 2, left[1] * 0x01010101U);
+    AV_WN32A(dst + stride * 3, left[0] * 0x01010101U);
 }
 
 static void hor_8x8_c(uint8_t *dst, ptrdiff_t stride,
@@ -97,7 +96,7 @@ static void hor_8x8_c(uint8_t *dst, ptrdiff_t stride,
     int y;
 
     for (y = 0; y < 8; y++) {
-        AV_WN64A(dst, left[y] * 0x0101010101010101ULL);
+        AV_WN64A(dst, left[7 - y] * 0x0101010101010101ULL);
         dst += stride;
     }
 }
@@ -108,7 +107,7 @@ static void hor_16x16_c(uint8_t *dst, ptrdiff_t stride,
     int y;
 
     for (y = 0; y < 16; y++) {
-        uint64_t p8 = left[y] * 0x0101010101010101ULL;
+        uint64_t p8 = left[15 - y] * 0x0101010101010101ULL;
 
         AV_WN64A(dst + 0, p8);
         AV_WN64A(dst + 8, p8);
@@ -122,7 +121,7 @@ static void hor_32x32_c(uint8_t *dst, ptrdiff_t stride,
     int y;
 
     for (y = 0; y < 32; y++) {
-        uint64_t p8 = left[y] * 0x0101010101010101ULL;
+        uint64_t p8 = left[31 - y] * 0x0101010101010101ULL;
 
         AV_WN64A(dst +  0, p8);
         AV_WN64A(dst +  8, p8);
@@ -138,13 +137,13 @@ static void tm_4x4_c(uint8_t *dst, ptrdiff_t stride,
     int y, tl = top[-1];
 
     for (y = 0; y < 4; y++) {
-        int l_m_tl = left[y] - tl;
+        int l_m_tl = left[3 - y] - tl;
 
         dst[0] = av_clip_uint8(top[0] + l_m_tl);
         dst[1] = av_clip_uint8(top[1] + l_m_tl);
         dst[2] = av_clip_uint8(top[2] + l_m_tl);
         dst[3] = av_clip_uint8(top[3] + l_m_tl);
-        dst   += stride;
+        dst += stride;
     }
 }
 
@@ -154,7 +153,7 @@ static void tm_8x8_c(uint8_t *dst, ptrdiff_t stride,
     int y, tl = top[-1];
 
     for (y = 0; y < 8; y++) {
-        int l_m_tl = left[y] - tl;
+        int l_m_tl = left[7 - y] - tl;
 
         dst[0] = av_clip_uint8(top[0] + l_m_tl);
         dst[1] = av_clip_uint8(top[1] + l_m_tl);
@@ -164,7 +163,7 @@ static void tm_8x8_c(uint8_t *dst, ptrdiff_t stride,
         dst[5] = av_clip_uint8(top[5] + l_m_tl);
         dst[6] = av_clip_uint8(top[6] + l_m_tl);
         dst[7] = av_clip_uint8(top[7] + l_m_tl);
-        dst   += stride;
+        dst += stride;
     }
 }
 
@@ -174,25 +173,25 @@ static void tm_16x16_c(uint8_t *dst, ptrdiff_t stride,
     int y, tl = top[-1];
 
     for (y = 0; y < 16; y++) {
-        int l_m_tl = left[y] - tl;
-
-        dst[0]  = av_clip_uint8(top[0]  + l_m_tl);
-        dst[1]  = av_clip_uint8(top[1]  + l_m_tl);
-        dst[2]  = av_clip_uint8(top[2]  + l_m_tl);
-        dst[3]  = av_clip_uint8(top[3]  + l_m_tl);
-        dst[4]  = av_clip_uint8(top[4]  + l_m_tl);
-        dst[5]  = av_clip_uint8(top[5]  + l_m_tl);
-        dst[6]  = av_clip_uint8(top[6]  + l_m_tl);
-        dst[7]  = av_clip_uint8(top[7]  + l_m_tl);
-        dst[8]  = av_clip_uint8(top[8]  + l_m_tl);
-        dst[9]  = av_clip_uint8(top[9]  + l_m_tl);
+        int l_m_tl = left[15 - y] - tl;
+
+        dst[ 0] = av_clip_uint8(top[ 0] + l_m_tl);
+        dst[ 1] = av_clip_uint8(top[ 1] + l_m_tl);
+        dst[ 2] = av_clip_uint8(top[ 2] + l_m_tl);
+        dst[ 3] = av_clip_uint8(top[ 3] + l_m_tl);
+        dst[ 4] = av_clip_uint8(top[ 4] + l_m_tl);
+        dst[ 5] = av_clip_uint8(top[ 5] + l_m_tl);
+        dst[ 6] = av_clip_uint8(top[ 6] + l_m_tl);
+        dst[ 7] = av_clip_uint8(top[ 7] + l_m_tl);
+        dst[ 8] = av_clip_uint8(top[ 8] + l_m_tl);
+        dst[ 9] = av_clip_uint8(top[ 9] + l_m_tl);
         dst[10] = av_clip_uint8(top[10] + l_m_tl);
         dst[11] = av_clip_uint8(top[11] + l_m_tl);
         dst[12] = av_clip_uint8(top[12] + l_m_tl);
         dst[13] = av_clip_uint8(top[13] + l_m_tl);
         dst[14] = av_clip_uint8(top[14] + l_m_tl);
         dst[15] = av_clip_uint8(top[15] + l_m_tl);
-        dst    += stride;
+        dst += stride;
     }
 }
 
@@ -202,18 +201,18 @@ static void tm_32x32_c(uint8_t *dst, ptrdiff_t stride,
     int y, tl = top[-1];
 
     for (y = 0; y < 32; y++) {
-        int l_m_tl = left[y] - tl;
-
-        dst[0]  = av_clip_uint8(top[0]  + l_m_tl);
-        dst[1]  = av_clip_uint8(top[1]  + l_m_tl);
-        dst[2]  = av_clip_uint8(top[2]  + l_m_tl);
-        dst[3]  = av_clip_uint8(top[3]  + l_m_tl);
-        dst[4]  = av_clip_uint8(top[4]  + l_m_tl);
-        dst[5]  = av_clip_uint8(top[5]  + l_m_tl);
-        dst[6]  = av_clip_uint8(top[6]  + l_m_tl);
-        dst[7]  = av_clip_uint8(top[7]  + l_m_tl);
-        dst[8]  = av_clip_uint8(top[8]  + l_m_tl);
-        dst[9]  = av_clip_uint8(top[9]  + l_m_tl);
+        int l_m_tl = left[31 - y] - tl;
+
+        dst[ 0] = av_clip_uint8(top[ 0] + l_m_tl);
+        dst[ 1] = av_clip_uint8(top[ 1] + l_m_tl);
+        dst[ 2] = av_clip_uint8(top[ 2] + l_m_tl);
+        dst[ 3] = av_clip_uint8(top[ 3] + l_m_tl);
+        dst[ 4] = av_clip_uint8(top[ 4] + l_m_tl);
+        dst[ 5] = av_clip_uint8(top[ 5] + l_m_tl);
+        dst[ 6] = av_clip_uint8(top[ 6] + l_m_tl);
+        dst[ 7] = av_clip_uint8(top[ 7] + l_m_tl);
+        dst[ 8] = av_clip_uint8(top[ 8] + l_m_tl);
+        dst[ 9] = av_clip_uint8(top[ 9] + l_m_tl);
         dst[10] = av_clip_uint8(top[10] + l_m_tl);
         dst[11] = av_clip_uint8(top[11] + l_m_tl);
         dst[12] = av_clip_uint8(top[12] + l_m_tl);
@@ -236,16 +235,15 @@ static void tm_32x32_c(uint8_t *dst, ptrdiff_t stride,
         dst[29] = av_clip_uint8(top[29] + l_m_tl);
         dst[30] = av_clip_uint8(top[30] + l_m_tl);
         dst[31] = av_clip_uint8(top[31] + l_m_tl);
-        dst    += stride;
+        dst += stride;
     }
 }
 
 static void dc_4x4_c(uint8_t *dst, ptrdiff_t stride,
                      const uint8_t *left, const uint8_t *top)
 {
-    unsigned dc = 0x01010101U *
-                  ((left[0] + left[1] + left[2] + left[3] +
-                    top[0]  + top[1]  + top[2]  + top[3]  + 4) >> 3);
+    unsigned dc = 0x01010101U * ((left[0] + left[1] + left[2] + left[3] +
+                                  top[0] + top[1] + top[2] + top[3] + 4) >> 3);
 
     AV_WN32A(dst + stride * 0, dc);
     AV_WN32A(dst + stride * 1, dc);
@@ -257,10 +255,9 @@ static void dc_8x8_c(uint8_t *dst, ptrdiff_t stride,
                      const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((left[0] + left[1] + left[2] + left[3] +
-                    left[4] + left[5] + left[6] + left[7] +
-                    top[0]  + top[1]  + top[2]  + top[3]  +
-                    top[4]  + top[5]  + top[6]  + top[7]  + 8) >> 4);
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + 8) >> 4);
     int y;
 
     for (y = 0; y < 8; y++) {
@@ -273,14 +270,11 @@ static void dc_16x16_c(uint8_t *dst, ptrdiff_t stride,
                        const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((left[0]  + left[1]  + left[2]  + left[3]  +
-                    left[4]  + left[5]  + left[6]  + left[7]  +
-                    left[8]  + left[9]  + left[10] + left[11] +
-                    left[12] + left[13] + left[14] + left[15] +
-                    top[0]   + top[1]   + top[2]   + top[3]   +
-                    top[4]   + top[5]   + top[6]   + top[7]   +
-                    top[8]   + top[9]   + top[10]  + top[11]  +
-                    top[12]  + top[13]  + top[14]  + top[15]  + 16) >> 5);
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
+          left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
+          left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
+          top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
     int y;
 
     for (y = 0; y < 16; y++) {
@@ -294,22 +288,16 @@ static void dc_32x32_c(uint8_t *dst, ptrdiff_t stride,
                        const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((left[0]  + left[1]  + left[2]  + left[3]  +
-                    left[4]  + left[5]  + left[6]  + left[7]  +
-                    left[8]  + left[9]  + left[10] + left[11] +
-                    left[12] + left[13] + left[14] + left[15] +
-                    left[16] + left[17] + left[18] + left[19] +
-                    left[20] + left[21] + left[22] + left[23] +
-                    left[24] + left[25] + left[26] + left[27] +
-                    left[28] + left[29] + left[30] + left[31] +
-                    top[0]   + top[1]   + top[2]   + top[3]   +
-                    top[4]   + top[5]   + top[6]   + top[7]   +
-                    top[8]   + top[9]   + top[10]  + top[11]  +
-                    top[12]  + top[13]  + top[14]  + top[15]  +
-                    top[16]  + top[17]  + top[18]  + top[19]  +
-                    top[20]  + top[21]  + top[22]  + top[23]  +
-                    top[24]  + top[25]  + top[26]  + top[27]  +
-                    top[28]  + top[29]  + top[30]  + top[31]  + 32) >> 6);
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
+          left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
+          left[13] + left[14] + left[15] + left[16] + left[17] + left[18] +
+          left[19] + left[20] + left[21] + left[22] + left[23] + left[24] +
+          left[25] + left[26] + left[27] + left[28] + left[29] + left[30] +
+          left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
+          top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
+          top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
+          top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
     int y;
 
     for (y = 0; y < 32; y++) {
@@ -324,8 +312,7 @@ static void dc_32x32_c(uint8_t *dst, ptrdiff_t stride,
 static void dc_left_4x4_c(uint8_t *dst, ptrdiff_t stride,
                           const uint8_t *left, const uint8_t *top)
 {
-    unsigned dc = 0x01010101U *
-                  ((left[0] + left[1] + left[2] + left[3] + 2) >> 2);
+    unsigned dc = 0x01010101U * ((left[0] + left[1] + left[2] + left[3] + 2) >> 2);
 
     AV_WN32A(dst + stride * 0, dc);
     AV_WN32A(dst + stride * 1, dc);
@@ -337,8 +324,8 @@ static void dc_left_8x8_c(uint8_t *dst, ptrdiff_t stride,
                           const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((left[0] + left[1] + left[2] + left[3] +
-                    left[4] + left[5] + left[6] + left[7] + 4) >> 3);
+        ((left[0] + left[1] + left[2] + left[3] +
+          left[4] + left[5] + left[6] + left[7] + 4) >> 3);
     int y;
 
     for (y = 0; y < 8; y++) {
@@ -351,10 +338,9 @@ static void dc_left_16x16_c(uint8_t *dst, ptrdiff_t stride,
                             const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((left[0]  + left[1]  + left[2]  + left[3]  +
-                    left[4]  + left[5]  + left[6]  + left[7]  +
-                    left[8]  + left[9]  + left[10] + left[11] +
-                    left[12] + left[13] + left[14] + left[15] + 8) >> 4);
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
+          left[12] + left[13] + left[14] + left[15] + 8) >> 4);
     int y;
 
     for (y = 0; y < 16; y++) {
@@ -368,14 +354,12 @@ static void dc_left_32x32_c(uint8_t *dst, ptrdiff_t stride,
                             const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((left[0]  + left[1]  + left[2]  + left[3]  +
-                    left[4]  + left[5]  + left[6]  + left[7]  +
-                    left[8]  + left[9]  + left[10] + left[11] +
-                    left[12] + left[13] + left[14] + left[15] +
-                    left[16] + left[17] + left[18] + left[19] +
-                    left[20] + left[21] + left[22] + left[23] +
-                    left[24] + left[25] + left[26] + left[27] +
-                    left[28] + left[29] + left[30] + left[31] + 16) >> 5);
+        ((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
+          left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
+          left[12] + left[13] + left[14] + left[15] + left[16] + left[17] +
+          left[18] + left[19] + left[20] + left[21] + left[22] + left[23] +
+          left[24] + left[25] + left[26] + left[27] + left[28] + left[29] +
+          left[30] + left[31] + 16) >> 5);
     int y;
 
     for (y = 0; y < 32; y++) {
@@ -402,8 +386,8 @@ static void dc_top_8x8_c(uint8_t *dst, ptrdiff_t stride,
                          const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((top[0] + top[1] + top[2] + top[3] +
-                    top[4] + top[5] + top[6] + top[7] + 4) >> 3);
+        ((top[0] + top[1] + top[2] + top[3] +
+          top[4] + top[5] + top[6] + top[7] + 4) >> 3);
     int y;
 
     for (y = 0; y < 8; y++) {
@@ -416,10 +400,9 @@ static void dc_top_16x16_c(uint8_t *dst, ptrdiff_t stride,
                            const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((top[0]  + top[1]  + top[2]  + top[3]  +
-                    top[4]  + top[5]  + top[6]  + top[7]  +
-                    top[8]  + top[9]  + top[10] + top[11] +
-                    top[12] + top[13] + top[14] + top[15] + 8) >> 4);
+        ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
+          top[12] + top[13] + top[14] + top[15] + 8) >> 4);
     int y;
 
     for (y = 0; y < 16; y++) {
@@ -433,14 +416,12 @@ static void dc_top_32x32_c(uint8_t *dst, ptrdiff_t stride,
                            const uint8_t *left, const uint8_t *top)
 {
     uint64_t dc = 0x0101010101010101ULL *
-                  ((top[0]  + top[1]  + top[2]  + top[3]  +
-                    top[4]  + top[5]  + top[6]  + top[7]  +
-                    top[8]  + top[9]  + top[10] + top[11] +
-                    top[12] + top[13] + top[14] + top[15] +
-                    top[16] + top[17] + top[18] + top[19] +
-                    top[20] + top[21] + top[22] + top[23] +
-                    top[24] + top[25] + top[26] + top[27] +
-                    top[28] + top[29] + top[30] + top[31] + 16) >> 5);
+        ((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
+          top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
+          top[12] + top[13] + top[14] + top[15] + top[16] + top[17] +
+          top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
+          top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
+          top[30] + top[31] + 16) >> 5);
     int y;
 
     for (y = 0; y < 32; y++) {
@@ -598,41 +579,30 @@ static void diag_downleft_4x4_c(uint8_t *dst, ptrdiff_t stride,
     int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
         a4 = top[4], a5 = top[5], a6 = top[6], a7 = top[7];
 
-    DST(0, 0) = (a0 + a1 * 2 + a2 + 2) >> 2;
-    DST(1, 0) =
-    DST(0, 1) = (a1 + a2 * 2 + a3 + 2) >> 2;
-    DST(2, 0) =
-    DST(1, 1) =
-    DST(0, 2) = (a2 + a3 * 2 + a4 + 2) >> 2;
-    DST(3, 0) =
-    DST(2, 1) =
-    DST(1, 2) =
-    DST(0, 3) = (a3 + a4 * 2 + a5 + 2) >> 2;
-    DST(3, 1) =
-    DST(2, 2) =
-    DST(1, 3) = (a4 + a5 * 2 + a6 + 2) >> 2;
-    DST(3, 2) =
-    DST(2, 3) = (a5 + a6 * 2 + a7 + 2) >> 2;
-    DST(3, 3) = a7;  // note: this is different from vp8 and such
-}
-
-#define def_diag_downleft(size)                                             \
-static void diag_downleft_ ## size ## x ## size ## _c(uint8_t *dst,         \
-                                                      ptrdiff_t stride,     \
-                                                      const uint8_t *left,  \
-                                                      const uint8_t *top)   \
-{                                                                           \
-    int i, j;                                                               \
-    uint8_t v[size - 1];                                                    \
-                                                                            \
-    for (i = 0; i < size - 2; i++)                                          \
-        v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2;             \
-    v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2;             \
-                                                                            \
-    for (j = 0; j < size; j++) {                                            \
-        memcpy(dst + j * stride, v + j, size - 1 - j);                      \
-        memset(dst + j * stride + size - 1 - j, top[size - 1], j + 1);      \
-    }                                                                       \
+    DST(0,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(1,0) = DST(0,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
+    DST(2,0) = DST(1,1) = DST(0,2) = (a2 + a3 * 2 + a4 + 2) >> 2;
+    DST(3,0) = DST(2,1) = DST(1,2) = DST(0,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
+    DST(3,1) = DST(2,2) = DST(1,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
+    DST(3,2) = DST(2,3) = (a5 + a6 * 2 + a7 + 2) >> 2;
+    DST(3,3) = a7;  // note: this is different from vp8 and such
+}
+
+#define def_diag_downleft(size) \
+static void diag_downleft_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
+                                              const uint8_t *left, const uint8_t *top) \
+{ \
+    int i, j; \
+    uint8_t v[size - 1]; \
+\
+    for (i = 0; i < size - 2; i++) \
+        v[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
+    v[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) { \
+        memcpy(dst + j*stride, v + j, size - 1 - j); \
+        memset(dst + j*stride + size - 1 - j, top[size - 1], j + 1); \
+    } \
 }
 
 def_diag_downleft(8)
@@ -643,49 +613,34 @@ static void diag_downright_4x4_c(uint8_t *dst, ptrdiff_t stride,
                                  const uint8_t *left, const uint8_t *top)
 {
     int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
-        l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3];
-
-    DST(0, 3) = (l1 + l2 * 2 + l3 + 2) >> 2;
-    DST(0, 2) =
-    DST(1, 3) = (l0 + l1 * 2 + l2 + 2) >> 2;
-    DST(0, 1) =
-    DST(1, 2) =
-    DST(2, 3) = (tl + l0 * 2 + l1 + 2) >> 2;
-    DST(0, 0) =
-    DST(1, 1) =
-    DST(2, 2) =
-    DST(3, 3) = (l0 + tl * 2 + a0 + 2) >> 2;
-    DST(1, 0) =
-    DST(2, 1) =
-    DST(3, 2) = (tl + a0 * 2 + a1 + 2) >> 2;
-    DST(2, 0) =
-    DST(3, 1) = (a0 + a1 * 2 + a2 + 2) >> 2;
-    DST(3, 0) = (a1 + a2 * 2 + a3 + 2) >> 2;
-}
-
-#define def_diag_downright(size)                                            \
-static void diag_downright_ ## size ## x ## size ## _c(uint8_t *dst,        \
-                                                       ptrdiff_t stride,    \
-                                                       const uint8_t *left, \
-                                                       const uint8_t *top)  \
-{                                                                           \
-    int i, j;                                                               \
-    uint8_t v[size + size - 1];                                             \
-                                                                            \
-    for (i = 0; i < size - 2; i++) {                                        \
-        v[i]            = (left[size - 1 - i] +                             \
-                           left[size - 2 - i] * 2 +                         \
-                           left[size - 3 - i] + 2) >> 2;                    \
-        v[size + 1 + i] = (top[i]             +                             \
-                           top[i + 1]         * 2 +                         \
-                           top[i + 2]         + 2) >> 2;                    \
-    }                                                                       \
-    v[size - 2] = (left[1] + left[0] * 2 + top[-1] + 2) >> 2;               \
-    v[size - 1] = (left[0] + top[-1] * 2 + top[0]  + 2) >> 2;               \
-    v[size]     = (top[-1] + top[0]  * 2 + top[1]  + 2) >> 2;               \
-                                                                            \
-    for (j = 0; j < size; j++)                                              \
-        memcpy(dst + j * stride, v + size - 1 - j, size);                   \
+        l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0];
+
+    DST(0,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
+    DST(0,2) = DST(1,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,1) = DST(1,2) = DST(2,3) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,0) = DST(1,1) = DST(2,2) = DST(3,3) = (l0 + tl * 2 + a0 + 2) >> 2;
+    DST(1,0) = DST(2,1) = DST(3,2) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(2,0) = DST(3,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(3,0) = (a1 + a2 * 2 + a3 + 2) >> 2;
+}
+
+#define def_diag_downright(size) \
+static void diag_downright_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
+                                               const uint8_t *left, const uint8_t *top) \
+{ \
+    int i, j; \
+    uint8_t v[size + size - 1]; \
+\
+    for (i = 0; i < size - 2; i++) { \
+        v[i           ] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2; \
+        v[size + 1 + i] = (top[i]  + top[i + 1]  * 2 + top[i + 2]  + 2) >> 2; \
+    } \
+    v[size - 2] = (left[size - 2] + left[size - 1] * 2 + top[-1] + 2) >> 2; \
+    v[size - 1] = (left[size - 1] + top[-1] * 2 + top[ 0] + 2) >> 2; \
+    v[size    ] = (top[-1] + top[0]  * 2 + top[ 1] + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) \
+        memcpy(dst + j*stride, v + size - 1 - j, size); \
 }
 
 def_diag_downright(8)
@@ -696,57 +651,45 @@ static void vert_right_4x4_c(uint8_t *dst, ptrdiff_t stride,
                              const uint8_t *left, const uint8_t *top)
 {
     int tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
-        l0 = left[0], l1 = left[1], l2 = left[2];
-
-    DST(0, 3) = (l0 + l1 * 2 + l2 + 2) >> 2;
-    DST(0, 2) = (tl + l0 * 2 + l1 + 2) >> 2;
-    DST(0, 0) =
-    DST(1, 2) = (tl + a0          + 1) >> 1;
-    DST(0, 1) =
-    DST(1, 3) = (l0 + tl * 2 + a0 + 2) >> 2;
-    DST(1, 0) =
-    DST(2, 2) = (a0 + a1          + 1) >> 1;
-    DST(1, 1) =
-    DST(2, 3) = (tl + a0 * 2 + a1 + 2) >> 2;
-    DST(2, 0) =
-    DST(3, 2) = (a1 + a2          + 1) >> 1;
-    DST(2, 1) =
-    DST(3, 3) = (a0 + a1 * 2 + a2 + 2) >> 2;
-    DST(3, 0) = (a2 + a3          + 1) >> 1;
-    DST(3, 1) = (a1 + a2 * 2 + a3 + 2) >> 2;
-}
-
-#define def_vert_right(size)                                                \
-static void vert_right_ ## size ## x ## size ## _c(uint8_t *dst,            \
-                                                   ptrdiff_t stride,        \
-                                                   const uint8_t *left,     \
-                                                   const uint8_t *top)      \
-{                                                                           \
-    int i, j;                                                               \
-    uint8_t ve[size + size / 2 - 1], vo[size + size / 2 - 1];               \
-                                                                            \
-    for (i = 0; i < size / 2 - 2; i++) {                                    \
-        vo[i] = (left[size - 4 - i * 2] +                                   \
-                 left[size - 3 - i * 2] * 2 +                               \
-                 left[size - 2 - i * 2] + 2) >> 2;                          \
-        ve[i] = (left[size - 5 - i * 2] +                                   \
-                 left[size - 4 - i * 2] * 2 +                               \
-                 left[size - 3 - i * 2] + 2) >> 2;                          \
-    }                                                                       \
-    vo[size / 2 - 2] = (left[0] + left[1] * 2 + left[2] + 2) >> 2;          \
-    ve[size / 2 - 2] = (top[-1] + left[0] * 2 + left[1] + 2) >> 2;          \
-                                                                            \
-    ve[size / 2 - 1] = (top[-1] + top[0] + 1) >> 1;                         \
-    vo[size / 2 - 1] = (left[0] + top[-1] * 2 + top[0] + 2) >> 2;           \
-    for (i = 0; i < size - 1; i++) {                                        \
-        ve[size / 2 + i] = (top[i] + top[i + 1] + 1) >> 1;                  \
-        vo[size / 2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
-    }                                                                       \
-                                                                            \
-    for (j = 0; j < size / 2; j++) {                                        \
-        memcpy(dst +  j * 2      * stride, ve + size / 2 - 1 - j, size);    \
-        memcpy(dst + (j * 2 + 1) * stride, vo + size / 2 - 1 - j, size);    \
-    }                                                                       \
+        l0 = left[3], l1 = left[2], l2 = left[1];
+
+    DST(0,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,2) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,0) = DST(1,2) = (tl + a0 + 1) >> 1;
+    DST(0,1) = DST(1,3) = (l0 + tl * 2 + a0 + 2) >> 2;
+    DST(1,0) = DST(2,2) = (a0 + a1 + 1) >> 1;
+    DST(1,1) = DST(2,3) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(2,0) = DST(3,2) = (a1 + a2 + 1) >> 1;
+    DST(2,1) = DST(3,3) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(3,0) = (a2 + a3 + 1) >> 1;
+    DST(3,1) = (a1 + a2 * 2 + a3 + 2) >> 2;
+}
+
+#define def_vert_right(size) \
+static void vert_right_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
+                                           const uint8_t *left, const uint8_t *top) \
+{ \
+    int i, j; \
+    uint8_t ve[size + size/2 - 1], vo[size + size/2 - 1]; \
+\
+    for (i = 0; i < size/2 - 2; i++) { \
+        vo[i] = (left[i*2 + 3] + left[i*2 + 2] * 2 + left[i*2 + 1] + 2) >> 2; \
+        ve[i] = (left[i*2 + 4] + left[i*2 + 3] * 2 + left[i*2 + 2] + 2) >> 2; \
+    } \
+    vo[size/2 - 2] = (left[size - 1] + left[size - 2] * 2 + left[size - 3] + 2) >> 2; \
+    ve[size/2 - 2] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
+\
+    ve[size/2 - 1] = (top[-1] + top[0] + 1) >> 1; \
+    vo[size/2 - 1] = (left[size - 1] + top[-1] * 2 + top[0] + 2) >> 2; \
+    for (i = 0; i < size - 1; i++) { \
+        ve[size/2 + i] = (top[i] + top[i + 1] + 1) >> 1; \
+        vo[size/2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
+    } \
+\
+    for (j = 0; j < size / 2; j++) { \
+        memcpy(dst +  j*2     *stride, ve + size/2 - 1 - j, size); \
+        memcpy(dst + (j*2 + 1)*stride, vo + size/2 - 1 - j, size); \
+    } \
 }
 
 def_vert_right(8)
@@ -756,53 +699,40 @@ def_vert_right(32)
 static void hor_down_4x4_c(uint8_t *dst, ptrdiff_t stride,
                            const uint8_t *left, const uint8_t *top)
 {
-    int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3],
+    int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0],
         tl = top[-1], a0 = top[0], a1 = top[1], a2 = top[2];
 
-    DST(2, 0) = (tl + a0 * 2 + a1 + 2) >> 2;
-    DST(3, 0) = (a0 + a1 * 2 + a2 + 2) >> 2;
-    DST(0, 0) =
-    DST(2, 1) = (tl + l0          + 1) >> 1;
-    DST(1, 0) =
-    DST(3, 1) = (a0 + tl * 2 + l0 + 2) >> 2;
-    DST(0, 1) =
-    DST(2, 2) = (l0 + l1          + 1) >> 1;
-    DST(1, 1) =
-    DST(3, 2) = (tl + l0 * 2 + l1 + 2) >> 2;
-    DST(0, 2) =
-    DST(2, 3) = (l1 + l2          + 1) >> 1;
-    DST(1, 2) =
-    DST(3, 3) = (l0 + l1 * 2 + l2 + 2) >> 2;
-    DST(0, 3) = (l2 + l3          + 1) >> 1;
-    DST(1, 3) = (l1 + l2 * 2 + l3 + 2) >> 2;
-}
-
-#define def_hor_down(size)                                              \
-static void hor_down_ ## size ## x ## size ## _c(uint8_t *dst,          \
-                                                 ptrdiff_t stride,      \
-                                                 const uint8_t *left,   \
-                                                 const uint8_t *top)    \
-{                                                                       \
-    int i, j;                                                           \
-    uint8_t v[size * 3 - 2];                                            \
-                                                                        \
-    for (i = 0; i < size - 2; i++) {                                    \
-        v[i * 2]        = (left[size - 2 - i] +                         \
-                           left[size - 1 - i] + 1) >> 1;                \
-        v[i * 2    + 1] = (left[size - 3 - i] +                         \
-                           left[size - 2 - i] * 2 +                     \
-                           left[size - 1 - i] + 2) >> 2;                \
-        v[size * 2 + i] = (top[i - 1] +                                 \
-                           top[i] * 2 +                                 \
-                           top[i + 1] + 2) >> 2;                        \
-    }                                                                   \
-    v[size * 2 - 2] = (top[-1] + left[0] + 1) >> 1;                     \
-    v[size * 2 - 4] = (left[0] + left[1] + 1) >> 1;                     \
-    v[size * 2 - 1] = (top[0]  + top[-1] * 2 + left[0] + 2) >> 2;       \
-    v[size * 2 - 3] = (top[-1] + left[0] * 2 + left[1] + 2) >> 2;       \
-                                                                        \
-    for (j = 0; j < size; j++)                                          \
-        memcpy(dst + j * stride, v + size * 2 - 2 - j * 2, size);       \
+    DST(2,0) = (tl + a0 * 2 + a1 + 2) >> 2;
+    DST(3,0) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(0,0) = DST(2,1) = (tl + l0 + 1) >> 1;
+    DST(1,0) = DST(3,1) = (a0 + tl * 2 + l0 + 2) >> 2;
+    DST(0,1) = DST(2,2) = (l0 + l1 + 1) >> 1;
+    DST(1,1) = DST(3,2) = (tl + l0 * 2 + l1 + 2) >> 2;
+    DST(0,2) = DST(2,3) = (l1 + l2 + 1) >> 1;
+    DST(1,2) = DST(3,3) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,3) = (l2 + l3 + 1) >> 1;
+    DST(1,3) = (l1 + l2 * 2 + l3 + 2) >> 2;
+}
+
+#define def_hor_down(size) \
+static void hor_down_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
+                                         const uint8_t *left, const uint8_t *top) \
+{ \
+    int i, j; \
+    uint8_t v[size * 3 - 2]; \
+\
+    for (i = 0; i < size - 2; i++) { \
+        v[i*2       ] = (left[i + 1] + left[i + 0] + 1) >> 1; \
+        v[i*2    + 1] = (left[i + 2] + left[i + 1] * 2 + left[i + 0] + 2) >> 2; \
+        v[size*2 + i] = (top[i - 1] + top[i] * 2 + top[i + 1] + 2) >> 2; \
+    } \
+    v[size*2 - 2] = (top[-1] + left[size - 1] + 1) >> 1; \
+    v[size*2 - 4] = (left[size - 1] + left[size - 2] + 1) >> 1; \
+    v[size*2 - 1] = (top[0]  + top[-1] * 2 + left[size - 1] + 2) >> 2; \
+    v[size*2 - 3] = (top[-1] + left[size - 1] * 2 + left[size - 2] + 2) >> 2; \
+\
+    for (j = 0; j < size; j++) \
+        memcpy(dst + j*stride, v + size*2 - 2 - j*2, size); \
 }
 
 def_hor_down(8)
@@ -815,48 +745,38 @@ static void vert_left_4x4_c(uint8_t *dst, ptrdiff_t stride,
     int a0 = top[0], a1 = top[1], a2 = top[2], a3 = top[3],
         a4 = top[4], a5 = top[5], a6 = top[6];
 
-    DST(0, 0) = (a0 + a1          + 1) >> 1;
-    DST(0, 1) = (a0 + a1 * 2 + a2 + 2) >> 2;
-    DST(1, 0) =
-    DST(0, 2) = (a1 + a2          + 1) >> 1;
-    DST(1, 1) =
-    DST(0, 3) = (a1 + a2 * 2 + a3 + 2) >> 2;
-    DST(2, 0) =
-    DST(1, 2) = (a2 + a3          + 1) >> 1;
-    DST(2, 1) =
-    DST(1, 3) = (a2 + a3 * 2 + a4 + 2) >> 2;
-    DST(3, 0) =
-    DST(2, 2) = (a3 + a4          + 1) >> 1;
-    DST(3, 1) =
-    DST(2, 3) = (a3 + a4 * 2 + a5 + 2) >> 2;
-    DST(3, 2) = (a4 + a5          + 1) >> 1;
-    DST(3, 3) = (a4 + a5 * 2 + a6 + 2) >> 2;
-}
-
-#define def_vert_left(size)                                             \
-static void vert_left_ ## size ## x ## size ## _c(uint8_t *dst,         \
-                                                  ptrdiff_t stride,     \
-                                                  const uint8_t *left,  \
-                                                  const uint8_t *top)   \
-{                                                                       \
-    int i, j;                                                           \
-    uint8_t ve[size - 1], vo[size - 1];                                 \
-                                                                        \
-    for (i = 0; i < size - 2; i++) {                                    \
-        ve[i] = (top[i] + top[i + 1] + 1) >> 1;                         \
-        vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2;        \
-    }                                                                   \
-    ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1;            \
-    vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2;        \
-                                                                        \
-    for (j = 0; j < size / 2; j++) {                                    \
-        memcpy(dst +  j * 2      * stride, ve + j, size - (j + 1));     \
-        memset(dst +  j * 2      * stride + size - j - 1,               \
-               top[size - 1], j + 1);                                   \
-        memcpy(dst + (j * 2 + 1) * stride, vo + j, size - (j + 1));     \
-        memset(dst + (j * 2 + 1) * stride + size - j - 1,               \
-               top[size - 1], j + 1);                                   \
-    }                                                                   \
+    DST(0,0) = (a0 + a1 + 1) >> 1;
+    DST(0,1) = (a0 + a1 * 2 + a2 + 2) >> 2;
+    DST(1,0) = DST(0,2) = (a1 + a2 + 1) >> 1;
+    DST(1,1) = DST(0,3) = (a1 + a2 * 2 + a3 + 2) >> 2;
+    DST(2,0) = DST(1,2) = (a2 + a3 + 1) >> 1;
+    DST(2,1) = DST(1,3) = (a2 + a3 * 2 + a4 + 2) >> 2;
+    DST(3,0) = DST(2,2) = (a3 + a4 + 1) >> 1;
+    DST(3,1) = DST(2,3) = (a3 + a4 * 2 + a5 + 2) >> 2;
+    DST(3,2) = (a4 + a5 + 1) >> 1;
+    DST(3,3) = (a4 + a5 * 2 + a6 + 2) >> 2;
+}
+
+#define def_vert_left(size) \
+static void vert_left_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
+                                          const uint8_t *left, const uint8_t *top) \
+{ \
+    int i, j; \
+    uint8_t ve[size - 1], vo[size - 1]; \
+\
+    for (i = 0; i < size - 2; i++) { \
+        ve[i] = (top[i] + top[i + 1] + 1) >> 1; \
+        vo[i] = (top[i] + top[i + 1] * 2 + top[i + 2] + 2) >> 2; \
+    } \
+    ve[size - 2] = (top[size - 2] + top[size - 1] + 1) >> 1; \
+    vo[size - 2] = (top[size - 2] + top[size - 1] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size / 2; j++) { \
+        memcpy(dst +  j*2      * stride, ve + j, size - j - 1); \
+        memset(dst +  j*2      * stride + size - j - 1, top[size - 1], j + 1); \
+        memcpy(dst + (j*2 + 1) * stride, vo + j, size - j - 1); \
+        memset(dst + (j*2 + 1) * stride + size - j - 1, top[size - 1], j + 1); \
+    } \
 }
 
 def_vert_left(8)
@@ -866,49 +786,38 @@ def_vert_left(32)
 static void hor_up_4x4_c(uint8_t *dst, ptrdiff_t stride,
                          const uint8_t *left, const uint8_t *top)
 {
-    int l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3];
-
-    DST(0, 0) = (l0 + l1          + 1) >> 1;
-    DST(1, 0) = (l0 + l1 * 2 + l2 + 2) >> 2;
-    DST(0, 1) =
-    DST(2, 0) = (l1 + l2          + 1) >> 1;
-    DST(1, 1) =
-    DST(3, 0) = (l1 + l2 * 2 + l3 + 2) >> 2;
-    DST(0, 2) =
-    DST(2, 1) = (l2 + l3          + 1) >> 1;
-    DST(1, 2) =
-    DST(3, 1) = (l2 + l3 * 3      + 2) >> 2;
-    DST(0, 3) =
-    DST(1, 3) =
-    DST(2, 2) =
-    DST(2, 3) =
-    DST(3, 2) =
-    DST(3, 3) = l3;
-}
-
-#define def_hor_up(size)                                                    \
-static void hor_up_ ## size ## x ## size ## _c(uint8_t *dst,                \
-                                               ptrdiff_t stride,            \
-                                               const uint8_t *left,         \
-                                               const uint8_t *top)          \
-{                                                                           \
-    int i, j;                                                               \
-    uint8_t v[size * 2 - 2];                                                \
-                                                                            \
-    for (i = 0; i < size - 2; i++) {                                        \
-        v[i * 2]     = (left[i] + left[i + 1] + 1) >> 1;                    \
-        v[i * 2 + 1] = (left[i] + left[i + 1] * 2 + left[i + 2] + 2) >> 2;  \
-    }                                                                       \
-    v[size * 2 - 4] = (left[size - 2] + left[size - 1]     + 1) >> 1;       \
-    v[size * 2 - 3] = (left[size - 2] + left[size - 1] * 3 + 2) >> 2;       \
-                                                                            \
-    for (j = 0; j < size / 2; j++)                                          \
-        memcpy(dst + j * stride, v + j * 2, size);                          \
-    for (j = size / 2; j < size; j++) {                                     \
-        memcpy(dst + j * stride, v + j * 2, size * 2 - 2 - j * 2);          \
-        memset(dst + j * stride + size * 2 - 2 - j * 2, left[size - 1],     \
-               2 + j * 2 - size);                                           \
-    }                                                                       \
+    int l0 = left[3], l1 = left[2], l2 = left[1], l3 = left[0];
+
+    DST(0,0) = (l0 + l1 + 1) >> 1;
+    DST(1,0) = (l0 + l1 * 2 + l2 + 2) >> 2;
+    DST(0,1) = DST(2,0) = (l1 + l2 + 1) >> 1;
+    DST(1,1) = DST(3,0) = (l1 + l2 * 2 + l3 + 2) >> 2;
+    DST(0,2) = DST(2,1) = (l2 + l3 + 1) >> 1;
+    DST(1,2) = DST(3,1) = (l2 + l3 * 3 + 2) >> 2;
+    DST(0,3) = DST(1,3) = DST(2,2) = DST(2,3) = DST(3,2) = DST(3,3) = l3;
+}
+
+#define def_hor_up(size) \
+static void hor_up_##size##x##size##_c(uint8_t *dst, ptrdiff_t stride, \
+                                       const uint8_t *left, const uint8_t *top) \
+{ \
+    int i, j; \
+    uint8_t v[size*2 - 2]; \
+\
+    for (i = 0; i < size - 2; i++) { \
+        v[i*2    ] = (left[size - i - 1] + left[size - i - 2] + 1) >> 1; \
+        v[i*2 + 1] = (left[size - i - 1] + left[size - i - 2] * 2 + left[size - i - 3] + 2) >> 2; \
+    } \
+    v[size*2 - 4] = (left[1] + left[0] + 1) >> 1; \
+    v[size*2 - 3] = (left[1] + left[0] * 3 + 2) >> 2; \
+\
+    for (j = 0; j < size / 2; j++) \
+        memcpy(dst + j*stride, v + j*2, size); \
+    for (j = size / 2; j < size; j++) { \
+        memcpy(dst + j*stride, v + j*2, size*2 - 2 - j*2); \
+        memset(dst + j*stride + size*2 - 2 - j*2, left[0], \
+               2 + j*2 - size); \
+    } \
 }
 
 def_hor_up(8)
@@ -919,22 +828,22 @@ def_hor_up(32)
 
 static av_cold void vp9dsp_intrapred_init(VP9DSPContext *dsp)
 {
-#define init_intra_pred(tx, sz)                                              \
-    dsp->intra_pred[tx][VERT_PRED]            = vert_           ## sz ## _c; \
-    dsp->intra_pred[tx][HOR_PRED]             = hor_            ## sz ## _c; \
-    dsp->intra_pred[tx][DC_PRED]              = dc_             ## sz ## _c; \
-    dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED]  = diag_downleft_  ## sz ## _c; \
-    dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_ ## sz ## _c; \
-    dsp->intra_pred[tx][VERT_RIGHT_PRED]      = vert_right_     ## sz ## _c; \
-    dsp->intra_pred[tx][HOR_DOWN_PRED]        = hor_down_       ## sz ## _c; \
-    dsp->intra_pred[tx][VERT_LEFT_PRED]       = vert_left_      ## sz ## _c; \
-    dsp->intra_pred[tx][HOR_UP_PRED]          = hor_up_         ## sz ## _c; \
-    dsp->intra_pred[tx][TM_VP8_PRED]          = tm_             ## sz ## _c; \
-    dsp->intra_pred[tx][LEFT_DC_PRED]         = dc_left_        ## sz ## _c; \
-    dsp->intra_pred[tx][TOP_DC_PRED]          = dc_top_         ## sz ## _c; \
-    dsp->intra_pred[tx][DC_128_PRED]          = dc_128_         ## sz ## _c; \
-    dsp->intra_pred[tx][DC_127_PRED]          = dc_127_         ## sz ## _c; \
-    dsp->intra_pred[tx][DC_129_PRED]          = dc_129_         ## sz ## _c
+#define init_intra_pred(tx, sz) \
+    dsp->intra_pred[tx][VERT_PRED]            = vert_##sz##_c; \
+    dsp->intra_pred[tx][HOR_PRED]             = hor_##sz##_c; \
+    dsp->intra_pred[tx][DC_PRED]              = dc_##sz##_c; \
+    dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED]  = diag_downleft_##sz##_c; \
+    dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = diag_downright_##sz##_c; \
+    dsp->intra_pred[tx][VERT_RIGHT_PRED]      = vert_right_##sz##_c; \
+    dsp->intra_pred[tx][HOR_DOWN_PRED]        = hor_down_##sz##_c; \
+    dsp->intra_pred[tx][VERT_LEFT_PRED]       = vert_left_##sz##_c; \
+    dsp->intra_pred[tx][HOR_UP_PRED]          = hor_up_##sz##_c; \
+    dsp->intra_pred[tx][TM_VP8_PRED]          = tm_##sz##_c; \
+    dsp->intra_pred[tx][LEFT_DC_PRED]         = dc_left_##sz##_c; \
+    dsp->intra_pred[tx][TOP_DC_PRED]          = dc_top_##sz##_c; \
+    dsp->intra_pred[tx][DC_128_PRED]          = dc_128_##sz##_c; \
+    dsp->intra_pred[tx][DC_127_PRED]          = dc_127_##sz##_c; \
+    dsp->intra_pred[tx][DC_129_PRED]          = dc_129_##sz##_c
 
     init_intra_pred(TX_4X4,   4x4);
     init_intra_pred(TX_8X8,   8x8);
@@ -944,46 +853,60 @@ static av_cold void vp9dsp_intrapred_init(VP9DSPContext *dsp)
 #undef init_intra_pred
 }
 
-#define itxfm_wrapper(type_a, type_b, sz, bits)                             \
-static void                                                                 \
-type_a ## _ ## type_b ## _ ## sz ## x ## sz ## _add_c(uint8_t *dst,         \
-                                                      ptrdiff_t stride,     \
-                                                      int16_t *block,       \
-                                                      int eob)              \
-{                                                                           \
-    int i, j;                                                               \
-    int16_t tmp[sz * sz], out[sz];                                          \
-    for (i = 0; i < sz; i++)                                                \
-        type_a ## sz ## _1d(tmp + i * sz, block + i, sz, 0);                \
-    memset(block, 0, sz * sz * sizeof(*block));                             \
-    for (i = 0; i < sz; i++) {                                              \
-        type_b ## sz ## _1d(out, tmp + i, sz, 1);                           \
-        for (j = 0; j < sz; j++)                                            \
-            dst[j * stride] =                                               \
-                av_clip_uint8(dst[j * stride] +                             \
-                              (bits ? (out[j] + (1 << (bits - 1))) >> bits  \
-                                    : out[j]));                             \
-        dst++;                                                              \
-    }                                                                       \
-}
-
-#define itxfm_wrap(sz, bits)             \
-    itxfm_wrapper(idct, idct, sz, bits)  \
-    itxfm_wrapper(iadst, idct, sz, bits) \
-    itxfm_wrapper(idct, iadst, sz, bits) \
-    itxfm_wrapper(iadst, iadst, sz, bits)
-
-#define IN(x) in[x * stride]
-
-static av_always_inline void idct4_1d(int16_t *out, const int16_t *in,
-                                      ptrdiff_t stride, int pass)
+#define itxfm_wrapper(type_a, type_b, sz, bits, has_dconly) \
+static void type_a##_##type_b##_##sz##x##sz##_add_c(uint8_t *dst, \
+                                                    ptrdiff_t stride, \
+                                                    int16_t *block, int eob) \
+{ \
+    int i, j; \
+    int16_t tmp[sz * sz], out[sz]; \
+\
+    if (has_dconly && eob == 1) { \
+        const int t  = (((block[0] * 11585 + (1 << 13)) >> 14) \
+                                   * 11585 + (1 << 13)) >> 14; \
+        block[0] = 0; \
+        for (i = 0; i < sz; i++) { \
+            for (j = 0; j < sz; j++) \
+                dst[j * stride] = av_clip_uint8(dst[j * stride] + \
+                                                (bits ? \
+                                                 (t + (1 << (bits - 1))) >> bits : \
+                                                 t)); \
+            dst++; \
+        } \
+        return; \
+    } \
+\
+    for (i = 0; i < sz; i++) \
+        type_a##sz##_1d(block + i, sz, tmp + i * sz, 0); \
+    memset(block, 0, sz * sz * sizeof(*block)); \
+    for (i = 0; i < sz; i++) { \
+        type_b##sz##_1d(tmp + i, sz, out, 1); \
+        for (j = 0; j < sz; j++) \
+            dst[j * stride] = av_clip_uint8(dst[j * stride] + \
+                                            (bits ? \
+                                             (out[j] + (1 << (bits - 1))) >> bits : \
+                                             out[j])); \
+        dst++; \
+    } \
+}
+
+#define itxfm_wrap(sz, bits) \
+itxfm_wrapper(idct,  idct,  sz, bits, 1) \
+itxfm_wrapper(iadst, idct,  sz, bits, 0) \
+itxfm_wrapper(idct,  iadst, sz, bits, 0) \
+itxfm_wrapper(iadst, iadst, sz, bits, 0)
+
+#define IN(x) in[(x) * stride]
+
+static av_always_inline void idct4_1d(const int16_t *in, ptrdiff_t stride,
+                                      int16_t *out, int pass)
 {
     int t0, t1, t2, t3;
 
-    t0 = ((IN(0)        + IN(2)) * 11585 + (1 << 13)) >> 14;
-    t1 = ((IN(0)        - IN(2)) * 11585 + (1 << 13)) >> 14;
-    t2 = (IN(1) *  6270 - IN(3)  * 15137 + (1 << 13)) >> 14;
-    t3 = (IN(1) * 15137 + IN(3)  *  6270 + (1 << 13)) >> 14;
+    t0 = ((IN(0) + IN(2)) * 11585 + (1 << 13)) >> 14;
+    t1 = ((IN(0) - IN(2)) * 11585 + (1 << 13)) >> 14;
+    t2 = (IN(1) *  6270 - IN(3) * 15137 + (1 << 13)) >> 14;
+    t3 = (IN(1) * 15137 + IN(3) *  6270 + (1 << 13)) >> 14;
 
     out[0] = t0 + t3;
     out[1] = t1 + t2;
@@ -991,8 +914,8 @@ static av_always_inline void idct4_1d(int16_t *out, const int16_t *in,
     out[3] = t0 - t3;
 }
 
-static av_always_inline void iadst4_1d(int16_t *out, const int16_t *in,
-                                       ptrdiff_t stride, int pass)
+static av_always_inline void iadst4_1d(const int16_t *in, ptrdiff_t stride,
+                                       int16_t *out, int pass)
 {
     int t0, t1, t2, t3;
 
@@ -1009,19 +932,19 @@ static av_always_inline void iadst4_1d(int16_t *out, const int16_t *in,
 
 itxfm_wrap(4, 4)
 
-static av_always_inline void idct8_1d(int16_t *out, const int16_t *in,
-                                      ptrdiff_t stride, int pass)
+static av_always_inline void idct8_1d(const int16_t *in, ptrdiff_t stride,
+                                      int16_t *out, int pass)
 {
     int t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
 
-    t0a = ((IN(0)        + IN(4)) * 11585 + (1 << 13)) >> 14;
-    t1a = ((IN(0)        - IN(4)) * 11585 + (1 << 13)) >> 14;
-    t2a = (IN(2) *  6270 - IN(6)  * 15137 + (1 << 13)) >> 14;
-    t3a = (IN(2) * 15137 + IN(6)  *  6270 + (1 << 13)) >> 14;
-    t4a = (IN(1) *  3196 - IN(7)  * 16069 + (1 << 13)) >> 14;
-    t5a = (IN(5) * 13623 - IN(3)  *  9102 + (1 << 13)) >> 14;
-    t6a = (IN(5) *  9102 + IN(3)  * 13623 + (1 << 13)) >> 14;
-    t7a = (IN(1) * 16069 + IN(7)  *  3196 + (1 << 13)) >> 14;
+    t0a = ((IN(0) + IN(4)) * 11585 + (1 << 13)) >> 14;
+    t1a = ((IN(0) - IN(4)) * 11585 + (1 << 13)) >> 14;
+    t2a = (IN(2) *  6270 - IN(6) * 15137 + (1 << 13)) >> 14;
+    t3a = (IN(2) * 15137 + IN(6) *  6270 + (1 << 13)) >> 14;
+    t4a = (IN(1) *  3196 - IN(7) * 16069 + (1 << 13)) >> 14;
+    t5a = (IN(5) * 13623 - IN(3) *  9102 + (1 << 13)) >> 14;
+    t6a = (IN(5) *  9102 + IN(3) * 13623 + (1 << 13)) >> 14;
+    t7a = (IN(1) * 16069 + IN(7) *  3196 + (1 << 13)) >> 14;
 
     t0  = t0a + t3a;
     t1  = t1a + t2a;
@@ -1045,8 +968,8 @@ static av_always_inline void idct8_1d(int16_t *out, const int16_t *in,
     out[7] = t0 - t7;
 }
 
-static av_always_inline void iadst8_1d(int16_t *out, const int16_t *in,
-                                       ptrdiff_t stride, int pass)
+static av_always_inline void iadst8_1d(const int16_t *in, ptrdiff_t stride,
+                                       int16_t *out, int pass)
 {
     int t0, t0a, t1, t1a, t2, t2a, t3, t3a, t4, t4a, t5, t5a, t6, t6a, t7, t7a;
 
@@ -1059,14 +982,14 @@ static av_always_inline void iadst8_1d(int16_t *out, const int16_t *in,
     t6a =  4756 * IN(1) + 15679 * IN(6);
     t7a = 15679 * IN(1) -  4756 * IN(6);
 
-    t0  = (t0a + t4a + (1 << 13)) >> 14;
-    t1  = (t1a + t5a + (1 << 13)) >> 14;
-    t2  = (t2a + t6a + (1 << 13)) >> 14;
-    t3  = (t3a + t7a + (1 << 13)) >> 14;
-    t4  = (t0a - t4a + (1 << 13)) >> 14;
-    t5  = (t1a - t5a + (1 << 13)) >> 14;
-    t6  = (t2a - t6a + (1 << 13)) >> 14;
-    t7  = (t3a - t7a + (1 << 13)) >> 14;
+    t0 = (t0a + t4a + (1 << 13)) >> 14;
+    t1 = (t1a + t5a + (1 << 13)) >> 14;
+    t2 = (t2a + t6a + (1 << 13)) >> 14;
+    t3 = (t3a + t7a + (1 << 13)) >> 14;
+    t4 = (t0a - t4a + (1 << 13)) >> 14;
+    t5 = (t1a - t5a + (1 << 13)) >> 14;
+    t6 = (t2a - t6a + (1 << 13)) >> 14;
+    t7 = (t3a - t7a + (1 << 13)) >> 14;
 
     t4a = 15137 * t4 +  6270 * t5;
     t5a =  6270 * t4 - 15137 * t5;
@@ -1091,15 +1014,15 @@ static av_always_inline void iadst8_1d(int16_t *out, const int16_t *in,
 
 itxfm_wrap(8, 5)
 
-static av_always_inline void idct16_1d(int16_t *out, const int16_t *in,
-                                       ptrdiff_t stride, int pass)
+static av_always_inline void idct16_1d(const int16_t *in, ptrdiff_t stride,
+                                       int16_t *out, int pass)
 {
     int t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
     int t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
     int t8a, t9a, t10a, t11a, t12a, t13a, t14a, t15a;
 
-    t0a  = ((IN(0)         + IN(8)) * 11585 + (1 << 13)) >> 14;
-    t1a  = ((IN(0)         - IN(8)) * 11585 + (1 << 13)) >> 14;
+    t0a  = ((IN(0) + IN(8)) * 11585 + (1 << 13)) >> 14;
+    t1a  = ((IN(0) - IN(8)) * 11585 + (1 << 13)) >> 14;
     t2a  = (IN(4)  *  6270 - IN(12) * 15137 + (1 << 13)) >> 14;
     t3a  = (IN(4)  * 15137 + IN(12) *  6270 + (1 << 13)) >> 14;
     t4a  = (IN(2)  *  3196 - IN(14) * 16069 + (1 << 13)) >> 14;
@@ -1115,29 +1038,29 @@ static av_always_inline void idct16_1d(int16_t *out, const int16_t *in,
     t11a = (IN(13) * 15679 - IN(3)  *  4756 + (1 << 13)) >> 14;
     t12a = (IN(13) *  4756 + IN(3)  * 15679 + (1 << 13)) >> 14;
 
-    t0   = t0a  + t3a;
-    t1   = t1a  + t2a;
-    t2   = t1a  - t2a;
-    t3   = t0a  - t3a;
-    t4   = t4a  + t5a;
-    t5   = t4a  - t5a;
-    t6   = t7a  - t6a;
-    t7   = t7a  + t6a;
-    t8   = t8a  + t9a;
-    t9   = t8a  - t9a;
-    t10  = t11a - t10a;
-    t11  = t11a + t10a;
-    t12  = t12a + t13a;
-    t13  = t12a - t13a;
-    t14  = t15a - t14a;
-    t15  = t15a + t14a;
-
-    t5a  =   ((t6         - t5) * 11585  + (1 << 13)) >> 14;
-    t6a  =   ((t6         + t5) * 11585  + (1 << 13)) >> 14;
-    t9a  =   (t14 *  6270 - t9  * 15137  + (1 << 13)) >> 14;
-    t14a =   (t14 * 15137 + t9  *  6270  + (1 << 13)) >> 14;
+    t0  = t0a  + t3a;
+    t1  = t1a  + t2a;
+    t2  = t1a  - t2a;
+    t3  = t0a  - t3a;
+    t4  = t4a  + t5a;
+    t5  = t4a  - t5a;
+    t6  = t7a  - t6a;
+    t7  = t7a  + t6a;
+    t8  = t8a  + t9a;
+    t9  = t8a  - t9a;
+    t10 = t11a - t10a;
+    t11 = t11a + t10a;
+    t12 = t12a + t13a;
+    t13 = t12a - t13a;
+    t14 = t15a - t14a;
+    t15 = t15a + t14a;
+
+    t5a  = ((t6 - t5) * 11585 + (1 << 13)) >> 14;
+    t6a  = ((t6 + t5) * 11585 + (1 << 13)) >> 14;
+    t9a  = (  t14 *  6270 - t9  * 15137  + (1 << 13)) >> 14;
+    t14a = (  t14 * 15137 + t9  *  6270  + (1 << 13)) >> 14;
     t10a = (-(t13 * 15137 + t10 *  6270) + (1 << 13)) >> 14;
-    t13a =   (t13 *  6270 - t10 * 15137  + (1 << 13)) >> 14;
+    t13a = (  t13 *  6270 - t10 * 15137  + (1 << 13)) >> 14;
 
     t0a  = t0   + t7;
     t1a  = t1   + t6a;
@@ -1161,16 +1084,16 @@ static av_always_inline void idct16_1d(int16_t *out, const int16_t *in,
     t11  = ((t12a - t11a) * 11585 + (1 << 13)) >> 14;
     t12  = ((t12a + t11a) * 11585 + (1 << 13)) >> 14;
 
-    out[0]  = t0a + t15a;
-    out[1]  = t1a + t14;
-    out[2]  = t2a + t13a;
-    out[3]  = t3a + t12;
-    out[4]  = t4  + t11;
-    out[5]  = t5  + t10a;
-    out[6]  = t6  + t9;
-    out[7]  = t7  + t8a;
-    out[8]  = t7  - t8a;
-    out[9]  = t6  - t9;
+    out[ 0] = t0a + t15a;
+    out[ 1] = t1a + t14;
+    out[ 2] = t2a + t13a;
+    out[ 3] = t3a + t12;
+    out[ 4] = t4  + t11;
+    out[ 5] = t5  + t10a;
+    out[ 6] = t6  + t9;
+    out[ 7] = t7  + t8a;
+    out[ 8] = t7  - t8a;
+    out[ 9] = t6  - t9;
     out[10] = t5  - t10a;
     out[11] = t4  - t11;
     out[12] = t3a - t12;
@@ -1179,8 +1102,8 @@ static av_always_inline void idct16_1d(int16_t *out, const int16_t *in,
     out[15] = t0a - t15a;
 }
 
-static av_always_inline void iadst16_1d(int16_t *out, const int16_t *in,
-                                        ptrdiff_t stride, int pass)
+static av_always_inline void iadst16_1d(const int16_t *in, ptrdiff_t stride,
+                                        int16_t *out, int pass)
 {
     int t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
     int t0a, t1a, t2a, t3a, t4a, t5a, t6a, t7a;
@@ -1229,14 +1152,14 @@ static av_always_inline void iadst16_1d(int16_t *out, const int16_t *in,
     t14  = t15a *  9102 - t14a * 13623;
     t15  = t15a * 13623 + t14a *  9102;
 
-    t0   = t0a  + t4a;
-    t1   = t1a  + t5a;
-    t2   = t2a  + t6a;
-    t3   = t3a  + t7a;
-    t4   = t0a  - t4a;
-    t5   = t1a  - t5a;
-    t6   = t2a  - t6a;
-    t7   = t3a  - t7a;
+    t0   = t0a + t4a;
+    t1   = t1a + t5a;
+    t2   = t2a + t6a;
+    t3   = t3a + t7a;
+    t4   = t0a - t4a;
+    t5   = t1a - t5a;
+    t6   = t2a - t6a;
+    t7   = t3a - t7a;
     t8a  = (t8  + t12 + (1 << 13)) >> 14;
     t9a  = (t9  + t13 + (1 << 13)) >> 14;
     t10a = (t10 + t14 + (1 << 13)) >> 14;
@@ -1246,79 +1169,79 @@ static av_always_inline void iadst16_1d(int16_t *out, const int16_t *in,
     t14a = (t10 - t14 + (1 << 13)) >> 14;
     t15a = (t11 - t15 + (1 << 13)) >> 14;
 
-    t4a  = t4   * 15137 + t5   *  6270;
-    t5a  = t4   *  6270 - t5   * 15137;
-    t6a  = t7   * 15137 - t6   *  6270;
-    t7a  = t7   *  6270 + t6   * 15137;
+    t4a  = t4 * 15137 + t5 *  6270;
+    t5a  = t4 *  6270 - t5 * 15137;
+    t6a  = t7 * 15137 - t6 *  6270;
+    t7a  = t7 *  6270 + t6 * 15137;
     t12  = t12a * 15137 + t13a *  6270;
     t13  = t12a *  6270 - t13a * 15137;
     t14  = t15a * 15137 - t14a *  6270;
     t15  = t15a *  6270 + t14a * 15137;
 
-    out[0]  =     t0 + t2;
-    out[15] =   -(t1 + t3);
-    t2a     =     t0 - t2;
-    t3a     =     t1 - t3;
-    out[3]  = -((t4a + t6a + (1 << 13)) >> 14);
+    out[ 0] =   t0 + t2;
+    out[15] = -(t1 + t3);
+    t2a     =   t0 - t2;
+    t3a     =   t1 - t3;
+    out[ 3] = -((t4a + t6a + (1 << 13)) >> 14);
     out[12] =   (t5a + t7a + (1 << 13)) >> 14;
     t6      =   (t4a - t6a + (1 << 13)) >> 14;
     t7      =   (t5a - t7a + (1 << 13)) >> 14;
-    out[1]  =  -(t8a + t10a);
-    out[14] =    t9a + t11a;
-    t10     =    t8a - t10a;
-    t11     =    t9a - t11a;
-    out[2]  =   (t12 + t14 + (1 << 13)) >> 14;
+    out[ 1] = -(t8a + t10a);
+    out[14] =   t9a + t11a;
+    t10     =   t8a - t10a;
+    t11     =   t9a - t11a;
+    out[ 2] =   (t12 + t14 + (1 << 13)) >> 14;
     out[13] = -((t13 + t15 + (1 << 13)) >> 14);
     t14a    =   (t12 - t14 + (1 << 13)) >> 14;
     t15a    =   (t13 - t15 + (1 << 13)) >> 14;
 
-    out[7]  = ((t2a  + t3a)  * -11585 + (1 << 13)) >> 14;
-    out[8]  = ((t2a  - t3a)  *  11585 + (1 << 13)) >> 14;
-    out[4]  = ((t7   + t6)   *  11585 + (1 << 13)) >> 14;
+    out[ 7] = ((t2a  + t3a)  * -11585 + (1 << 13)) >> 14;
+    out[ 8] = ((t2a  - t3a)  *  11585 + (1 << 13)) >> 14;
+    out[ 4] = ((t7   + t6)   *  11585 + (1 << 13)) >> 14;
     out[11] = ((t7   - t6)   *  11585 + (1 << 13)) >> 14;
-    out[6]  = ((t11  + t10)  *  11585 + (1 << 13)) >> 14;
-    out[9]  = ((t11  - t10)  *  11585 + (1 << 13)) >> 14;
-    out[5]  = ((t14a + t15a) * -11585 + (1 << 13)) >> 14;
+    out[ 6] = ((t11  + t10)  *  11585 + (1 << 13)) >> 14;
+    out[ 9] = ((t11  - t10)  *  11585 + (1 << 13)) >> 14;
+    out[ 5] = ((t14a + t15a) * -11585 + (1 << 13)) >> 14;
     out[10] = ((t14a - t15a) *  11585 + (1 << 13)) >> 14;
 }
 
 itxfm_wrap(16, 6)
 
-static av_always_inline void idct32_1d(int16_t *out, const int16_t *in,
-                                       ptrdiff_t stride, int pass)
+static av_always_inline void idct32_1d(const int16_t *in, ptrdiff_t stride,
+                                       int16_t *out, int pass)
 {
-    int t0a  = ((IN(0)         + IN(16)) * 11585 + (1 << 13)) >> 14;
-    int t1a  = ((IN(0)         - IN(16)) * 11585 + (1 << 13)) >> 14;
-    int t2a  = (IN(8)  *  6270 - IN(24)  * 15137 + (1 << 13)) >> 14;
-    int t3a  = (IN(8)  * 15137 + IN(24)  *  6270 + (1 << 13)) >> 14;
-    int t4a  = (IN(4)  *  3196 - IN(28)  * 16069 + (1 << 13)) >> 14;
-    int t7a  = (IN(4)  * 16069 + IN(28)  *  3196 + (1 << 13)) >> 14;
-    int t5a  = (IN(20) * 13623 - IN(12)  *  9102 + (1 << 13)) >> 14;
-    int t6a  = (IN(20) *  9102 + IN(12)  * 13623 + (1 << 13)) >> 14;
-    int t8a  = (IN(2)  *  1606 - IN(30)  * 16305 + (1 << 13)) >> 14;
-    int t15a = (IN(2)  * 16305 + IN(30)  *  1606 + (1 << 13)) >> 14;
-    int t9a  = (IN(18) * 12665 - IN(14)  * 10394 + (1 << 13)) >> 14;
-    int t14a = (IN(18) * 10394 + IN(14)  * 12665 + (1 << 13)) >> 14;
-    int t10a = (IN(10) *  7723 - IN(22)  * 14449 + (1 << 13)) >> 14;
-    int t13a = (IN(10) * 14449 + IN(22)  *  7723 + (1 << 13)) >> 14;
-    int t11a = (IN(26) * 15679 - IN(6)   *  4756 + (1 << 13)) >> 14;
-    int t12a = (IN(26) *  4756 + IN(6)   * 15679 + (1 << 13)) >> 14;
-    int t16a = (IN(1)  *   804 - IN(31)  * 16364 + (1 << 13)) >> 14;
-    int t31a = (IN(1)  * 16364 + IN(31)  *   804 + (1 << 13)) >> 14;
-    int t17a = (IN(17) * 12140 - IN(15)  * 11003 + (1 << 13)) >> 14;
-    int t30a = (IN(17) * 11003 + IN(15)  * 12140 + (1 << 13)) >> 14;
-    int t18a = (IN(9)  *  7005 - IN(23)  * 14811 + (1 << 13)) >> 14;
-    int t29a = (IN(9)  * 14811 + IN(23)  *  7005 + (1 << 13)) >> 14;
-    int t19a = (IN(25) * 15426 - IN(7)   *  5520 + (1 << 13)) >> 14;
-    int t28a = (IN(25) *  5520 + IN(7)   * 15426 + (1 << 13)) >> 14;
-    int t20a = (IN(5)  *  3981 - IN(27)  * 15893 + (1 << 13)) >> 14;
-    int t27a = (IN(5)  * 15893 + IN(27)  *  3981 + (1 << 13)) >> 14;
-    int t21a = (IN(21) * 14053 - IN(11)  *  8423 + (1 << 13)) >> 14;
-    int t26a = (IN(21) *  8423 + IN(11)  * 14053 + (1 << 13)) >> 14;
-    int t22a = (IN(13) *  9760 - IN(19)  * 13160 + (1 << 13)) >> 14;
-    int t25a = (IN(13) * 13160 + IN(19)  *  9760 + (1 << 13)) >> 14;
-    int t23a = (IN(29) * 16207 - IN(3)   *  2404 + (1 << 13)) >> 14;
-    int t24a = (IN(29) *  2404 + IN(3)   * 16207 + (1 << 13)) >> 14;
+    int t0a  = ((IN(0) + IN(16)) * 11585 + (1 << 13)) >> 14;
+    int t1a  = ((IN(0) - IN(16)) * 11585 + (1 << 13)) >> 14;
+    int t2a  = (IN( 8) *  6270 - IN(24) * 15137 + (1 << 13)) >> 14;
+    int t3a  = (IN( 8) * 15137 + IN(24) *  6270 + (1 << 13)) >> 14;
+    int t4a  = (IN( 4) *  3196 - IN(28) * 16069 + (1 << 13)) >> 14;
+    int t7a  = (IN( 4) * 16069 + IN(28) *  3196 + (1 << 13)) >> 14;
+    int t5a  = (IN(20) * 13623 - IN(12) *  9102 + (1 << 13)) >> 14;
+    int t6a  = (IN(20) *  9102 + IN(12) * 13623 + (1 << 13)) >> 14;
+    int t8a  = (IN( 2) *  1606 - IN(30) * 16305 + (1 << 13)) >> 14;
+    int t15a = (IN( 2) * 16305 + IN(30) *  1606 + (1 << 13)) >> 14;
+    int t9a  = (IN(18) * 12665 - IN(14) * 10394 + (1 << 13)) >> 14;
+    int t14a = (IN(18) * 10394 + IN(14) * 12665 + (1 << 13)) >> 14;
+    int t10a = (IN(10) *  7723 - IN(22) * 14449 + (1 << 13)) >> 14;
+    int t13a = (IN(10) * 14449 + IN(22) *  7723 + (1 << 13)) >> 14;
+    int t11a = (IN(26) * 15679 - IN( 6) *  4756 + (1 << 13)) >> 14;
+    int t12a = (IN(26) *  4756 + IN( 6) * 15679 + (1 << 13)) >> 14;
+    int t16a = (IN( 1) *   804 - IN(31) * 16364 + (1 << 13)) >> 14;
+    int t31a = (IN( 1) * 16364 + IN(31) *   804 + (1 << 13)) >> 14;
+    int t17a = (IN(17) * 12140 - IN(15) * 11003 + (1 << 13)) >> 14;
+    int t30a = (IN(17) * 11003 + IN(15) * 12140 + (1 << 13)) >> 14;
+    int t18a = (IN( 9) *  7005 - IN(23) * 14811 + (1 << 13)) >> 14;
+    int t29a = (IN( 9) * 14811 + IN(23) *  7005 + (1 << 13)) >> 14;
+    int t19a = (IN(25) * 15426 - IN( 7) *  5520 + (1 << 13)) >> 14;
+    int t28a = (IN(25) *  5520 + IN( 7) * 15426 + (1 << 13)) >> 14;
+    int t20a = (IN( 5) *  3981 - IN(27) * 15893 + (1 << 13)) >> 14;
+    int t27a = (IN( 5) * 15893 + IN(27) *  3981 + (1 << 13)) >> 14;
+    int t21a = (IN(21) * 14053 - IN(11) *  8423 + (1 << 13)) >> 14;
+    int t26a = (IN(21) *  8423 + IN(11) * 14053 + (1 << 13)) >> 14;
+    int t22a = (IN(13) *  9760 - IN(19) * 13160 + (1 << 13)) >> 14;
+    int t25a = (IN(13) * 13160 + IN(19) *  9760 + (1 << 13)) >> 14;
+    int t23a = (IN(29) * 16207 - IN( 3) *  2404 + (1 << 13)) >> 14;
+    int t24a = (IN(29) *  2404 + IN( 3) * 16207 + (1 << 13)) >> 14;
 
     int t0  = t0a  + t3a;
     int t1  = t1a  + t2a;
@@ -1353,20 +1276,20 @@ static av_always_inline void idct32_1d(int16_t *out, const int16_t *in,
     int t30 = t31a - t30a;
     int t31 = t31a + t30a;
 
-    t5a  =   ((t6         - t5) * 11585  + (1 << 13)) >> 14;
-    t6a  =   ((t6         + t5) * 11585  + (1 << 13)) >> 14;
-    t9a  =   (t14 *  6270 - t9  * 15137  + (1 << 13)) >> 14;
-    t14a =   (t14 * 15137 + t9  *  6270  + (1 << 13)) >> 14;
+    t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14;
+    t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14;
+    t9a  = (  t14 *  6270 - t9  * 15137  + (1 << 13)) >> 14;
+    t14a = (  t14 * 15137 + t9  *  6270  + (1 << 13)) >> 14;
     t10a = (-(t13 * 15137 + t10 *  6270) + (1 << 13)) >> 14;
-    t13a =   (t13 *  6270 - t10 * 15137  + (1 << 13)) >> 14;
-    t17a =   (t30 *  3196 - t17 * 16069  + (1 << 13)) >> 14;
-    t30a =   (t30 * 16069 + t17 *  3196  + (1 << 13)) >> 14;
+    t13a = (  t13 *  6270 - t10 * 15137  + (1 << 13)) >> 14;
+    t17a = (  t30 *  3196 - t17 * 16069  + (1 << 13)) >> 14;
+    t30a = (  t30 * 16069 + t17 *  3196  + (1 << 13)) >> 14;
     t18a = (-(t29 * 16069 + t18 *  3196) + (1 << 13)) >> 14;
-    t29a =   (t29 *  3196 - t18 * 16069  + (1 << 13)) >> 14;
-    t21a =   (t26 * 13623 - t21 *  9102  + (1 << 13)) >> 14;
-    t26a =   (t26 *  9102 + t21 * 13623  + (1 << 13)) >> 14;
+    t29a = (  t29 *  3196 - t18 * 16069  + (1 << 13)) >> 14;
+    t21a = (  t26 * 13623 - t21 *  9102  + (1 << 13)) >> 14;
+    t26a = (  t26 *  9102 + t21 * 13623  + (1 << 13)) >> 14;
     t22a = (-(t25 *  9102 + t22 * 13623) + (1 << 13)) >> 14;
-    t25a =   (t25 * 13623 - t22 *  9102  + (1 << 13)) >> 14;
+    t25a = (  t25 * 13623 - t22 *  9102  + (1 << 13)) >> 14;
 
     t0a  = t0   + t7;
     t1a  = t1   + t6a;
@@ -1401,35 +1324,35 @@ static av_always_inline void idct32_1d(int16_t *out, const int16_t *in,
     t30  = t30a + t29a;
     t31a = t31  + t28;
 
-    t10a = ((t13           - t10)  * 11585  + (1 << 13)) >> 14;
-    t13a = ((t13           + t10)  * 11585  + (1 << 13)) >> 14;
-    t11  = ((t12a          - t11a) * 11585  + (1 << 13)) >> 14;
-    t12  = ((t12a          + t11a) * 11585  + (1 << 13)) >> 14;
-    t18a =   (t29  *  6270 - t18   * 15137  + (1 << 13)) >> 14;
-    t29a =   (t29  * 15137 + t18   *  6270  + (1 << 13)) >> 14;
-    t19  =   (t28a *  6270 - t19a  * 15137  + (1 << 13)) >> 14;
-    t28  =   (t28a * 15137 + t19a  *  6270  + (1 << 13)) >> 14;
-    t20  = (-(t27a * 15137 + t20a  *  6270) + (1 << 13)) >> 14;
-    t27  =   (t27a *  6270 - t20a  * 15137  + (1 << 13)) >> 14;
-    t21a = (-(t26  * 15137 + t21   *  6270) + (1 << 13)) >> 14;
-    t26a =   (t26  *  6270 - t21   * 15137  + (1 << 13)) >> 14;
-
-    t0   = t0a  + t15a;
-    t1   = t1a  + t14;
-    t2   = t2a  + t13a;
-    t3   = t3a  + t12;
-    t4   = t4a  + t11;
-    t5a  = t5   + t10a;
-    t6a  = t6   + t9;
-    t7   = t7a  + t8a;
-    t8   = t7a  - t8a;
-    t9a  = t6   - t9;
-    t10  = t5   - t10a;
-    t11a = t4a  - t11;
-    t12a = t3a  - t12;
-    t13  = t2a  - t13a;
-    t14a = t1a  - t14;
-    t15  = t0a  - t15a;
+    t10a = ((t13  - t10)  * 11585 + (1 << 13)) >> 14;
+    t13a = ((t13  + t10)  * 11585 + (1 << 13)) >> 14;
+    t11  = ((t12a - t11a) * 11585 + (1 << 13)) >> 14;
+    t12  = ((t12a + t11a) * 11585 + (1 << 13)) >> 14;
+    t18a = (  t29  *  6270 - t18  * 15137  + (1 << 13)) >> 14;
+    t29a = (  t29  * 15137 + t18  *  6270  + (1 << 13)) >> 14;
+    t19  = (  t28a *  6270 - t19a * 15137  + (1 << 13)) >> 14;
+    t28  = (  t28a * 15137 + t19a *  6270  + (1 << 13)) >> 14;
+    t20  = (-(t27a * 15137 + t20a *  6270) + (1 << 13)) >> 14;
+    t27  = (  t27a *  6270 - t20a * 15137  + (1 << 13)) >> 14;
+    t21a = (-(t26  * 15137 + t21  *  6270) + (1 << 13)) >> 14;
+    t26a = (  t26  *  6270 - t21  * 15137  + (1 << 13)) >> 14;
+
+    t0   = t0a + t15a;
+    t1   = t1a + t14;
+    t2   = t2a + t13a;
+    t3   = t3a + t12;
+    t4   = t4a + t11;
+    t5a  = t5  + t10a;
+    t6a  = t6  + t9;
+    t7   = t7a + t8a;
+    t8   = t7a - t8a;
+    t9a  = t6  - t9;
+    t10  = t5  - t10a;
+    t11a = t4a - t11;
+    t12a = t3a - t12;
+    t13  = t2a - t13a;
+    t14a = t1a - t14;
+    t15  = t0a - t15a;
     t16  = t16a + t23a;
     t17a = t17  + t22;
     t18  = t18a + t21a;
@@ -1449,23 +1372,23 @@ static av_always_inline void idct32_1d(int16_t *out, const int16_t *in,
 
     t20  = ((t27a - t20a) * 11585 + (1 << 13)) >> 14;
     t27  = ((t27a + t20a) * 11585 + (1 << 13)) >> 14;
-    t21a = ((t26  - t21)  * 11585 + (1 << 13)) >> 14;
-    t26a = ((t26  + t21)  * 11585 + (1 << 13)) >> 14;
+    t21a = ((t26  - t21 ) * 11585 + (1 << 13)) >> 14;
+    t26a = ((t26  + t21 ) * 11585 + (1 << 13)) >> 14;
     t22  = ((t25a - t22a) * 11585 + (1 << 13)) >> 14;
     t25  = ((t25a + t22a) * 11585 + (1 << 13)) >> 14;
-    t23a = ((t24  - t23)  * 11585 + (1 << 13)) >> 14;
-    t24a = ((t24  + t23)  * 11585 + (1 << 13)) >> 14;
-
-    out[0]  = t0   + t31;
-    out[1]  = t1   + t30a;
-    out[2]  = t2   + t29;
-    out[3]  = t3   + t28a;
-    out[4]  = t4   + t27;
-    out[5]  = t5a  + t26a;
-    out[6]  = t6a  + t25;
-    out[7]  = t7   + t24a;
-    out[8]  = t8   + t23a;
-    out[9]  = t9a  + t22;
+    t23a = ((t24  - t23 ) * 11585 + (1 << 13)) >> 14;
+    t24a = ((t24  + t23 ) * 11585 + (1 << 13)) >> 14;
+
+    out[ 0] = t0   + t31;
+    out[ 1] = t1   + t30a;
+    out[ 2] = t2   + t29;
+    out[ 3] = t3   + t28a;
+    out[ 4] = t4   + t27;
+    out[ 5] = t5a  + t26a;
+    out[ 6] = t6a  + t25;
+    out[ 7] = t7   + t24a;
+    out[ 8] = t8   + t23a;
+    out[ 9] = t9a  + t22;
     out[10] = t10  + t21a;
     out[11] = t11a + t20;
     out[12] = t12a + t19a;
@@ -1490,10 +1413,10 @@ static av_always_inline void idct32_1d(int16_t *out, const int16_t *in,
     out[31] = t0   - t31;
 }
 
-itxfm_wrapper(idct, idct, 32, 6)
+itxfm_wrapper(idct, idct, 32, 6, 1)
 
-static av_always_inline void iwht4_1d(int16_t *out, const int16_t *in,
-                                      ptrdiff_t stride, int pass)
+static av_always_inline void iwht4_1d(const int16_t *in, ptrdiff_t stride,
+                                      int16_t *out, int pass)
 {
     int t0, t1, t2, t3, t4;
 
@@ -1523,7 +1446,7 @@ static av_always_inline void iwht4_1d(int16_t *out, const int16_t *in,
     out[3] = t3;
 }
 
-itxfm_wrapper(iwht, iwht, 4, 0)
+itxfm_wrapper(iwht, iwht, 4, 0, 0)
 
 #undef IN
 #undef itxfm_wrapper
@@ -1531,30 +1454,29 @@ itxfm_wrapper(iwht, iwht, 4, 0)
 
 static av_cold void vp9dsp_itxfm_init(VP9DSPContext *dsp)
 {
-#define init_itxfm(tx, sz)                                        \
-    dsp->itxfm_add[tx][DCT_DCT]   = idct_idct_   ## sz ## _add_c; \
-    dsp->itxfm_add[tx][DCT_ADST]  = iadst_idct_  ## sz ## _add_c; \
-    dsp->itxfm_add[tx][ADST_DCT]  = idct_iadst_  ## sz ## _add_c; \
-    dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_ ## sz ## _add_c
-
-#define init_idct(tx, nm)                               \
-    dsp->itxfm_add[tx][DCT_DCT]   =                     \
-    dsp->itxfm_add[tx][ADST_DCT]  =                     \
-    dsp->itxfm_add[tx][DCT_ADST]  =                     \
-    dsp->itxfm_add[tx][ADST_ADST] = nm ## _add_c
-
-    init_itxfm(TX_4X4, 4x4);
-    init_itxfm(TX_8X8, 8x8);
+#define init_itxfm(tx, sz) \
+    dsp->itxfm_add[tx][DCT_DCT]   = idct_idct_##sz##_add_c; \
+    dsp->itxfm_add[tx][DCT_ADST]  = iadst_idct_##sz##_add_c; \
+    dsp->itxfm_add[tx][ADST_DCT]  = idct_iadst_##sz##_add_c; \
+    dsp->itxfm_add[tx][ADST_ADST] = iadst_iadst_##sz##_add_c
+
+#define init_idct(tx, nm) \
+    dsp->itxfm_add[tx][DCT_DCT]   = \
+    dsp->itxfm_add[tx][ADST_DCT]  = \
+    dsp->itxfm_add[tx][DCT_ADST]  = \
+    dsp->itxfm_add[tx][ADST_ADST] = nm##_add_c
+
+    init_itxfm(TX_4X4,   4x4);
+    init_itxfm(TX_8X8,   8x8);
     init_itxfm(TX_16X16, 16x16);
-    init_idct(TX_32X32, idct_idct_32x32);
+    init_idct(TX_32X32,  idct_idct_32x32);
     init_idct(4 /* lossless */, iwht_iwht_4x4);
 
 #undef init_itxfm
 #undef init_idct
 }
 
-static av_always_inline void loop_filter(uint8_t *dst, ptrdiff_t stride,
-                                         int E, int I, int H,
+static av_always_inline void loop_filter(uint8_t *dst, int E, int I, int H,
                                          ptrdiff_t stridea, ptrdiff_t strideb,
                                          int wd)
 {
@@ -1637,16 +1559,18 @@ static av_always_inline void loop_filter(uint8_t *dst, ptrdiff_t stride,
             int hev = FFABS(p1 - p0) > H || FFABS(q1 - q0) > H;
 
             if (hev) {
-                int f = av_clip_int8(3 * (q0 - p0) + av_clip_int8(p1 - q1));
-                int f1 = FFMIN(f + 4, 127) >> 3;
-                int f2 = FFMIN(f + 3, 127) >> 3;
+                int f = av_clip_int8(3 * (q0 - p0) + av_clip_int8(p1 - q1)), f1, f2;
+
+                f1 = FFMIN(f + 4, 127) >> 3;
+                f2 = FFMIN(f + 3, 127) >> 3;
 
                 dst[strideb * -1] = av_clip_uint8(p0 + f2);
                 dst[strideb * +0] = av_clip_uint8(q0 - f1);
             } else {
-                int f = av_clip_int8(3 * (q0 - p0));
-                int f1 = FFMIN(f + 4, 127) >> 3;
-                int f2 = FFMIN(f + 3, 127) >> 3;
+                int f = av_clip_int8(3 * (q0 - p0)), f1, f2;
+
+                f1 = FFMIN(f + 4, 127) >> 3;
+                f2 = FFMIN(f + 3, 127) >> 3;
 
                 dst[strideb * -1] = av_clip_uint8(p0 + f2);
                 dst[strideb * +0] = av_clip_uint8(q0 - f1);
@@ -1659,17 +1583,17 @@ static av_always_inline void loop_filter(uint8_t *dst, ptrdiff_t stride,
     }
 }
 
-#define lf_8_fn(dir, wd, stridea, strideb)                                  \
-static void loop_filter_ ## dir ## _ ## wd  ## _8_c(uint8_t *dst,           \
-                                                    ptrdiff_t stride,       \
-                                                    int E, int I, int H)    \
-{                                                                           \
-    loop_filter(dst, stride, E, I, H, stridea, strideb, wd);                \
+#define lf_8_fn(dir, wd, stridea, strideb) \
+static void loop_filter_##dir##_##wd##_8_c(uint8_t *dst, \
+                                           ptrdiff_t stride, \
+                                           int E, int I, int H) \
+{ \
+    loop_filter(dst, E, I, H, stridea, strideb, wd); \
 }
 
-#define lf_8_fns(wd)          \
-    lf_8_fn(h, wd, stride, 1) \
-    lf_8_fn(v, wd, 1, stride)
+#define lf_8_fns(wd) \
+lf_8_fn(h, wd, stride, 1) \
+lf_8_fn(v, wd, 1, stride)
 
 lf_8_fns(4)
 lf_8_fns(8)
@@ -1678,13 +1602,13 @@ lf_8_fns(16)
 #undef lf_8_fn
 #undef lf_8_fns
 
-#define lf_16_fn(dir, stridea)                                          \
-static void loop_filter_ ## dir ## _16_16_c(uint8_t *dst,               \
-                                            ptrdiff_t stride,           \
-                                            int E, int I, int H)        \
-{                                                                       \
-    loop_filter_ ## dir ## _16_8_c(dst, stride, E, I, H);               \
-    loop_filter_ ## dir ## _16_8_c(dst + 8 * stridea, stride, E, I, H); \
+#define lf_16_fn(dir, stridea) \
+static void loop_filter_##dir##_16_16_c(uint8_t *dst, \
+                                        ptrdiff_t stride, \
+                                        int E, int I, int H) \
+{ \
+    loop_filter_##dir##_16_8_c(dst, stride, E, I, H); \
+    loop_filter_##dir##_16_8_c(dst + 8 * stridea, stride, E, I, H); \
 }
 
 lf_16_fn(h, stride)
@@ -1692,21 +1616,18 @@ lf_16_fn(v, 1)
 
 #undef lf_16_fn
 
-#define lf_mix_fn(dir, wd1, wd2, stridea)                                     \
-static void loop_filter_ ## dir ## _ ## wd1 ## wd2 ## _16_c(uint8_t *dst,     \
-                                                            ptrdiff_t stride, \
-                                                            int E, int I,     \
-                                                            int H)            \
-{                                                                             \
-    loop_filter_ ## dir ## _ ## wd1 ## _8_c(dst, stride, E & 0xff,            \
-                                            I & 0xff, H & 0xff);              \
-    loop_filter_ ## dir ## _ ## wd2 ## _8_c(dst + 8 * stridea, stride,        \
-                                            E >> 8, I >> 8, H >> 8);          \
+#define lf_mix_fn(dir, wd1, wd2, stridea) \
+static void loop_filter_##dir##_##wd1##wd2##_16_c(uint8_t *dst, \
+                                                  ptrdiff_t stride, \
+                                                  int E, int I, int H) \
+{ \
+    loop_filter_##dir##_##wd1##_8_c(dst, stride, E & 0xff, I & 0xff, H & 0xff); \
+    loop_filter_##dir##_##wd2##_8_c(dst + 8 * stridea, stride, E >> 8, I >> 8, H >> 8); \
 }
 
-#define lf_mix_fns(wd1, wd2)       \
-    lf_mix_fn(h, wd1, wd2, stride) \
-    lf_mix_fn(v, wd1, wd2, 1)
+#define lf_mix_fns(wd1, wd2) \
+lf_mix_fn(h, wd1, wd2, stride) \
+lf_mix_fn(v, wd1, wd2, 1)
 
 lf_mix_fns(4, 4)
 lf_mix_fns(4, 8)
@@ -1738,9 +1659,8 @@ static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp)
     dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c;
 }
 
-static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src,
-                                    ptrdiff_t dst_stride,
-                                    ptrdiff_t src_stride,
+static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
+                                    const uint8_t *src, ptrdiff_t src_stride,
                                     int w, int h)
 {
     do {
@@ -1751,9 +1671,8 @@ static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src,
     } while (--h);
 }
 
-static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src,
-                                   ptrdiff_t dst_stride,
-                                   ptrdiff_t src_stride,
+static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride,
+                                   const uint8_t *src, ptrdiff_t src_stride,
                                    int w, int h)
 {
     do {
@@ -1767,18 +1686,17 @@ static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src,
     } while (--h);
 }
 
-#define fpel_fn(type, sz)                                      \
-static void type ## sz ## _c(uint8_t *dst, const uint8_t *src, \
-                             ptrdiff_t dst_stride,             \
-                             ptrdiff_t src_stride,             \
-                             int h, int mx, int my)            \
-{                                                              \
-    type ## _c(dst, src, dst_stride, src_stride, sz, h);       \
+#define fpel_fn(type, sz) \
+static void type##sz##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                         const uint8_t *src, ptrdiff_t src_stride, \
+                         int h, int mx, int my) \
+{ \
+    type##_c(dst, dst_stride, src, src_stride, sz, h); \
 }
 
 #define copy_avg_fn(sz) \
-    fpel_fn(copy, sz)   \
-    fpel_fn(avg, sz)
+fpel_fn(copy, sz) \
+fpel_fn(avg,  sz)
 
 copy_avg_fn(64)
 copy_avg_fn(32)
@@ -1841,19 +1759,18 @@ static const int8_t vp9_subpel_filters[3][15][8] = {
     }
 };
 
-#define FILTER_8TAP(src, x, F, stride)              \
-    av_clip_uint8((F[0] * src[x + -3 * stride] +    \
-                   F[1] * src[x + -2 * stride] +    \
-                   F[2] * src[x + -1 * stride] +    \
-                   F[3] * src[x + +0 * stride] +    \
-                   F[4] * src[x + +1 * stride] +    \
-                   F[5] * src[x + +2 * stride] +    \
-                   F[6] * src[x + +3 * stride] +    \
+#define FILTER_8TAP(src, x, F, stride) \
+    av_clip_uint8((F[0] * src[x + -3 * stride] + \
+                   F[1] * src[x + -2 * stride] + \
+                   F[2] * src[x + -1 * stride] + \
+                   F[3] * src[x + +0 * stride] + \
+                   F[4] * src[x + +1 * stride] + \
+                   F[5] * src[x + +2 * stride] + \
+                   F[6] * src[x + +3 * stride] + \
                    F[7] * src[x + +4 * stride] + 64) >> 7)
 
-static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src,
-                                          ptrdiff_t dst_stride,
-                                          ptrdiff_t src_stride,
+static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
+                                          const uint8_t *src, ptrdiff_t src_stride,
                                           int w, int h, ptrdiff_t ds,
                                           const int8_t *filter, int avg)
 {
@@ -1861,25 +1778,23 @@ static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src,
         int x;
 
         for (x = 0; x < w; x++)
-            if (avg)
+            if (avg) {
                 dst[x] = (dst[x] + FILTER_8TAP(src, x, filter, ds) + 1) >> 1;
-            else
+            } else {
                 dst[x] = FILTER_8TAP(src, x, filter, ds);
+            }
 
         dst += dst_stride;
         src += src_stride;
     } while (--h);
 }
 
-#define filter_8tap_1d_fn(opn, opa, dir, ds)                                \
-static av_noinline void opn ## _8tap_1d_ ## dir ## _c(uint8_t *dst,         \
-                                                      const uint8_t *src,   \
-                                                      ptrdiff_t dst_stride, \
-                                                      ptrdiff_t src_stride, \
-                                                      int w, int h,         \
-                                                      const int8_t *filter) \
-{                                                                           \
-    do_8tap_1d_c(dst, src, dst_stride, src_stride, w, h, ds, filter, opa);  \
+#define filter_8tap_1d_fn(opn, opa, dir, ds) \
+static av_noinline void opn##_8tap_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                const uint8_t *src, ptrdiff_t src_stride, \
+                                                int w, int h, const int8_t *filter) \
+{ \
+    do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
 }
 
 filter_8tap_1d_fn(put, 0, v, src_stride)
@@ -1889,9 +1804,8 @@ filter_8tap_1d_fn(avg, 1, h, 1)
 
 #undef filter_8tap_1d_fn
 
-static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src,
-                                          ptrdiff_t dst_stride,
-                                          ptrdiff_t src_stride,
+static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
+                                          const uint8_t *src, ptrdiff_t src_stride,
                                           int w, int h, const int8_t *filterx,
                                           const int8_t *filtery, int avg)
 {
@@ -1906,7 +1820,7 @@ static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src,
             tmp_ptr[x] = FILTER_8TAP(src, x, filterx, 1);
 
         tmp_ptr += 64;
-        src     += src_stride;
+        src += src_stride;
     } while (--tmp_h);
 
     tmp_ptr = tmp + 64 * 3;
@@ -1914,27 +1828,24 @@ static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src,
         int x;
 
         for (x = 0; x < w; x++)
-            if (avg)
+            if (avg) {
                 dst[x] = (dst[x] + FILTER_8TAP(tmp_ptr, x, filtery, 64) + 1) >> 1;
-            else
+            } else {
                 dst[x] = FILTER_8TAP(tmp_ptr, x, filtery, 64);
+            }
 
         tmp_ptr += 64;
         dst += dst_stride;
     } while (--h);
 }
 
-#define filter_8tap_2d_fn(opn, opa)                                     \
-static av_noinline void opn ## _8tap_2d_hv_c(uint8_t *dst,              \
-                                             const uint8_t *src,        \
-                                             ptrdiff_t dst_stride,      \
-                                             ptrdiff_t src_stride,      \
-                                             int w, int h,              \
-                                             const int8_t *filterx,     \
-                                             const int8_t *filtery)     \
-{                                                                       \
-    do_8tap_2d_c(dst, src, dst_stride, src_stride,                      \
-                 w, h, filterx, filtery, opa);                          \
+#define filter_8tap_2d_fn(opn, opa) \
+static av_noinline void opn##_8tap_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                           const uint8_t *src, ptrdiff_t src_stride, \
+                                           int w, int h, const int8_t *filterx, \
+                                           const int8_t *filtery) \
+{ \
+    do_8tap_2d_c(dst, dst_stride, src, src_stride, w, h, filterx, filtery, opa); \
 }
 
 filter_8tap_2d_fn(put, 0)
@@ -1944,62 +1855,53 @@ filter_8tap_2d_fn(avg, 1)
 
 #undef FILTER_8TAP
 
-#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg)                   \
-static void                                                                 \
-avg ## _8tap_ ## type ## _ ## sz ## dir ## _c(uint8_t *dst,                 \
-                                              const uint8_t *src,           \
-                                              ptrdiff_t dst_stride,         \
-                                              ptrdiff_t src_stride,         \
-                                              int h, int mx, int my)        \
-{                                                                           \
-    avg ## _8tap_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, sz, h,  \
-                                  vp9_subpel_filters[type_idx][dir_m - 1]); \
-}
-
-#define filter_fn_2d(sz, type, type_idx, avg)                               \
-static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst,          \
-                                                     const uint8_t *src,    \
-                                                     ptrdiff_t dst_stride,  \
-                                                     ptrdiff_t src_stride,  \
-                                                     int h, int mx, int my) \
-{                                                                           \
-    avg ## _8tap_2d_hv_c(dst, src, dst_stride, src_stride, sz, h,           \
-                         vp9_subpel_filters[type_idx][mx - 1],              \
-                         vp9_subpel_filters[type_idx][my - 1]);             \
-}
-
-#define FILTER_BILIN(src, x, mxy, stride)                       \
+#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
+static void avg##_8tap_##type##_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                              const uint8_t *src, ptrdiff_t src_stride, \
+                                              int h, int mx, int my) \
+{ \
+    avg##_8tap_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, \
+                            vp9_subpel_filters[type_idx][dir_m - 1]); \
+}
+
+#define filter_fn_2d(sz, type, type_idx, avg) \
+static void avg##_8tap_##type##_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                           const uint8_t *src, ptrdiff_t src_stride, \
+                                           int h, int mx, int my) \
+{ \
+    avg##_8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
+                       vp9_subpel_filters[type_idx][mx - 1], \
+                       vp9_subpel_filters[type_idx][my - 1]); \
+}
+
+#define FILTER_BILIN(src, x, mxy, stride) \
     (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
 
-static av_always_inline void do_bilin_1d_c(uint8_t *dst,
-                                           const uint8_t *src,
-                                           ptrdiff_t dst_stride,
-                                           ptrdiff_t src_stride,
-                                           int w, int h, ptrdiff_t ds,
-                                           int mxy, int avg)
+static av_always_inline void do_bilin_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
+                                           const uint8_t *src, ptrdiff_t src_stride,
+                                           int w, int h, ptrdiff_t ds, int mxy, int avg)
 {
     do {
         int x;
 
         for (x = 0; x < w; x++)
-            if (avg)
+            if (avg) {
                 dst[x] = (dst[x] + FILTER_BILIN(src, x, mxy, ds) + 1) >> 1;
-            else
+            } else {
                 dst[x] = FILTER_BILIN(src, x, mxy, ds);
+            }
 
         dst += dst_stride;
         src += src_stride;
     } while (--h);
 }
 
-#define bilin_1d_fn(opn, opa, dir, ds)                                        \
-static av_noinline void opn ## _bilin_1d_ ## dir ## _c(uint8_t *dst,          \
-                                                       const uint8_t *src,    \
-                                                       ptrdiff_t dst_stride,  \
-                                                       ptrdiff_t src_stride,  \
-                                                       int w, int h, int mxy) \
-{                                                                             \
-    do_bilin_1d_c(dst, src, dst_stride, src_stride, w, h, ds, mxy, opa);      \
+#define bilin_1d_fn(opn, opa, dir, ds) \
+static av_noinline void opn##_bilin_1d_##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                 const uint8_t *src, ptrdiff_t src_stride, \
+                                                 int w, int h, int mxy) \
+{ \
+    do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
 }
 
 bilin_1d_fn(put, 0, v, src_stride)
@@ -2009,12 +1911,9 @@ bilin_1d_fn(avg, 1, h, 1)
 
 #undef bilin_1d_fn
 
-static av_always_inline void do_bilin_2d_c(uint8_t *dst,
-                                           const uint8_t *src,
-                                           ptrdiff_t dst_stride,
-                                           ptrdiff_t src_stride,
-                                           int w, int h, int mx, int my,
-                                           int avg)
+static av_always_inline void do_bilin_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
+                                           const uint8_t *src, ptrdiff_t src_stride,
+                                           int w, int h, int mx, int my, int avg)
 {
     uint8_t tmp[64 * 65], *tmp_ptr = tmp;
     int tmp_h = h + 1;
@@ -2026,7 +1925,7 @@ static av_always_inline void do_bilin_2d_c(uint8_t *dst,
             tmp_ptr[x] = FILTER_BILIN(src, x, mx, 1);
 
         tmp_ptr += 64;
-        src     += src_stride;
+        src += src_stride;
     } while (--tmp_h);
 
     tmp_ptr = tmp;
@@ -2034,25 +1933,23 @@ static av_always_inline void do_bilin_2d_c(uint8_t *dst,
         int x;
 
         for (x = 0; x < w; x++)
-            if (avg)
+            if (avg) {
                 dst[x] = (dst[x] + FILTER_BILIN(tmp_ptr, x, my, 64) + 1) >> 1;
-            else
+            } else {
                 dst[x] = FILTER_BILIN(tmp_ptr, x, my, 64);
+            }
 
         tmp_ptr += 64;
         dst += dst_stride;
     } while (--h);
 }
 
-#define bilin_2d_fn(opn, opa)                                           \
-static av_noinline void opn ## _bilin_2d_hv_c(uint8_t *dst,             \
-                                              const uint8_t *src,       \
-                                              ptrdiff_t dst_stride,     \
-                                              ptrdiff_t src_stride,     \
-                                              int w, int h,             \
-                                              int mx, int my)           \
-{                                                                       \
-    do_bilin_2d_c(dst, src, dst_stride, src_stride, w, h, mx, my, opa); \
+#define bilin_2d_fn(opn, opa) \
+static av_noinline void opn##_bilin_2d_hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                            const uint8_t *src, ptrdiff_t src_stride, \
+                                            int w, int h, int mx, int my) \
+{ \
+    do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
 }
 
 bilin_2d_fn(put, 0)
@@ -2062,48 +1959,42 @@ bilin_2d_fn(avg, 1)
 
 #undef FILTER_BILIN
 
-#define bilinf_fn_1d(sz, dir, dir_m, avg)                               \
-static void avg ## _bilin_ ## sz ## dir ## _c(uint8_t *dst,             \
-                                              const uint8_t *src,       \
-                                              ptrdiff_t dst_stride,     \
-                                              ptrdiff_t src_stride,     \
-                                              int h, int mx, int my)    \
-{                                                                       \
-    avg ## _bilin_1d_ ## dir ## _c(dst, src, dst_stride, src_stride,    \
-                                   sz, h, dir_m);                       \
-}
-
-#define bilinf_fn_2d(sz, avg)                                        \
-static void avg ## _bilin_ ## sz ## hv_c(uint8_t *dst,               \
-                                         const uint8_t *src,         \
-                                         ptrdiff_t dst_stride,       \
-                                         ptrdiff_t src_stride,       \
-                                         int h, int mx, int my)      \
-{                                                                    \
-    avg ## _bilin_2d_hv_c(dst, src, dst_stride, src_stride,          \
-                          sz, h, mx, my);                            \
-}
-
-#define filter_fn(sz, avg)                                     \
-    filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \
-    filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \
-    filter_fn_2d(sz, regular, FILTER_8TAP_REGULAR, avg)        \
-    filter_fn_1d(sz, h, mx, smooth, FILTER_8TAP_SMOOTH, avg)   \
-    filter_fn_1d(sz, v, my, smooth, FILTER_8TAP_SMOOTH, avg)   \
-    filter_fn_2d(sz, smooth, FILTER_8TAP_SMOOTH, avg)          \
-    filter_fn_1d(sz, h, mx, sharp, FILTER_8TAP_SHARP, avg)     \
-    filter_fn_1d(sz, v, my, sharp, FILTER_8TAP_SHARP, avg)     \
-    filter_fn_2d(sz, sharp, FILTER_8TAP_SHARP, avg)            \
-    bilinf_fn_1d(sz, h, mx, avg)                               \
-    bilinf_fn_1d(sz, v, my, avg)                               \
-    bilinf_fn_2d(sz, avg)
+#define bilinf_fn_1d(sz, dir, dir_m, avg) \
+static void avg##_bilin_##sz##dir##_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                      const uint8_t *src, ptrdiff_t src_stride, \
+                                      int h, int mx, int my) \
+{ \
+    avg##_bilin_1d_##dir##_c(dst, dst_stride, src, src_stride, sz, h, dir_m); \
+}
+
+#define bilinf_fn_2d(sz, avg) \
+static void avg##_bilin_##sz##hv_c(uint8_t *dst, ptrdiff_t dst_stride, \
+                                   const uint8_t *src, ptrdiff_t src_stride, \
+                                   int h, int mx, int my) \
+{ \
+    avg##_bilin_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, mx, my); \
+}
+
+#define filter_fn(sz, avg) \
+filter_fn_1d(sz, h, mx, regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_1d(sz, v, my, regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_2d(sz,        regular, FILTER_8TAP_REGULAR, avg) \
+filter_fn_1d(sz, h, mx, smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_1d(sz, v, my, smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_2d(sz,        smooth,  FILTER_8TAP_SMOOTH,  avg) \
+filter_fn_1d(sz, h, mx, sharp,   FILTER_8TAP_SHARP,   avg) \
+filter_fn_1d(sz, v, my, sharp,   FILTER_8TAP_SHARP,   avg) \
+filter_fn_2d(sz,        sharp,   FILTER_8TAP_SHARP,   avg) \
+bilinf_fn_1d(sz, h, mx,                               avg) \
+bilinf_fn_1d(sz, v, my,                               avg) \
+bilinf_fn_2d(sz,                                      avg)
 
 #define filter_fn_set(avg) \
-    filter_fn(64, avg)     \
-    filter_fn(32, avg)     \
-    filter_fn(16, avg)     \
-    filter_fn(8, avg)      \
-    filter_fn(4, avg)
+filter_fn(64, avg) \
+filter_fn(32, avg) \
+filter_fn(16, avg) \
+filter_fn(8,  avg) \
+filter_fn(4,  avg)
 
 filter_fn_set(put)
 filter_fn_set(avg)
@@ -2117,14 +2008,14 @@ filter_fn_set(avg)
 
 static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp)
 {
-#define init_fpel(idx1, idx2, sz, type)                                \
-    dsp->mc[idx1][FILTER_8TAP_SMOOTH][idx2][0][0]  = type ## sz ## _c; \
-    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type ## sz ## _c; \
-    dsp->mc[idx1][FILTER_8TAP_SHARP][idx2][0][0]   = type ## sz ## _c; \
-    dsp->mc[idx1][FILTER_BILINEAR][idx2][0][0]     = type ## sz ## _c
-
-#define init_copy_avg(idx, sz)          \
-    init_fpel(idx, 0, sz, copy);        \
+#define init_fpel(idx1, idx2, sz, type) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = type##sz##_c; \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = type##sz##_c
+
+#define init_copy_avg(idx, sz) \
+    init_fpel(idx, 0, sz, copy); \
     init_fpel(idx, 1, sz, avg)
 
     init_copy_avg(0, 64);
@@ -2136,22 +2027,22 @@ static av_cold void vp9dsp_mc_init(VP9DSPContext *dsp)
 #undef init_copy_avg
 #undef init_fpel
 
-#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type)             \
-    dsp->mc[idx1][FILTER_8TAP_SMOOTH][idx2][idxh][idxv]  = type ## _8tap_smooth_  ## sz ## dir ## _c; \
-    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type ## _8tap_regular_ ## sz ## dir ## _c; \
-    dsp->mc[idx1][FILTER_8TAP_SHARP][idx2][idxh][idxv]   = type ## _8tap_sharp_   ## sz ## dir ## _c; \
-    dsp->mc[idx1][FILTER_BILINEAR][idx2][idxh][idxv]     = type ## _bilin_        ## sz ## dir ## _c
+#define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_c; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_c; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_c; \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][idxh][idxv] = type##_bilin_##sz##dir##_c
 
-#define init_subpel2(idx, idxh, idxv, dir, type)     \
+#define init_subpel2(idx, idxh, idxv, dir, type) \
     init_subpel1(0, idx, idxh, idxv, 64, dir, type); \
     init_subpel1(1, idx, idxh, idxv, 32, dir, type); \
     init_subpel1(2, idx, idxh, idxv, 16, dir, type); \
     init_subpel1(3, idx, idxh, idxv,  8, dir, type); \
     init_subpel1(4, idx, idxh, idxv,  4, dir, type)
 
-#define init_subpel3(idx, type)         \
-    init_subpel2(idx, 1, 1, hv, type);  \
-    init_subpel2(idx, 0, 1, v, type);   \
+#define init_subpel3(idx, type) \
+    init_subpel2(idx, 1, 1, hv, type); \
+    init_subpel2(idx, 0, 1, v, type); \
     init_subpel2(idx, 1, 0, h, type)
 
     init_subpel3(0, put);
@@ -2169,6 +2060,5 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp)
     vp9dsp_loopfilter_init(dsp);
     vp9dsp_mc_init(dsp);
 
-    if (ARCH_X86)
-        ff_vp9dsp_init_x86(dsp);
+    if (ARCH_X86) ff_vp9dsp_init_x86(dsp);
 }
diff --git a/libavcodec/vp9dsp.h b/libavcodec/vp9dsp.h
new file mode 100644
index 0000000..db0a92e
--- /dev/null
+++ b/libavcodec/vp9dsp.h
@@ -0,0 +1,118 @@
+/*
+ * VP9 compatible video decoder
+ *
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+ * Copyright (C) 2013 Clément Bœsch <u pkh me>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP9DSP_H
+#define AVCODEC_VP9DSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "vp9.h"
+
+typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+                            const uint8_t *ref, ptrdiff_t ref_stride,
+                            int h, int mx, int my);
+
+typedef struct VP9DSPContext {
+    /*
+     * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32
+     * dimension 2: intra prediction modes
+     *
+     * dst/left/top is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
+     * stride is aligned by 16 pixels
+     * top[-1] is top/left; top[4,7] is top-right for 4x4
+     */
+    // FIXME(rbultje) maybe replace left/top pointers with HAVE_TOP/
+    // HAVE_LEFT/HAVE_TOPRIGHT flags instead, and then handle it in-place?
+    // also needs to fit in with what h264/vp8/etc do
+    void (*intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst,
+                                                         ptrdiff_t stride,
+                                                         const uint8_t *left,
+                                                         const uint8_t *top);
+
+    /*
+     * dimension 1: 0=4x4, 1=8x8, 2=16x16, 3=32x32, 4=lossless (3-4=dct only)
+     * dimension 2: 0=dct/dct, 1=dct/adst, 2=adst/dct, 3=adst/adst
+     *
+     * dst is aligned by transform-size (i.e. 4, 8, 16 or 32 pixels)
+     * stride is aligned by 16 pixels
+     * block is 16-byte aligned
+     * eob indicates the position (+1) of the last non-zero coefficient,
+     * in scan-order. This can be used to write faster versions, e.g. a
+     * dc-only 4x4/8x8/16x16/32x32, or a 4x4-only (eob<10) 8x8/16x16/32x32,
+     * etc.
+     */
+    // FIXME also write idct_add_block() versions for whole (inter) pred
+    // blocks, so we can do 2 4x4s at once
+    void (*itxfm_add[N_TXFM_SIZES + 1][N_TXFM_TYPES])(uint8_t *dst,
+                                                      ptrdiff_t stride,
+                                                      int16_t *block, int eob);
+
+    /*
+     * dimension 1: width of filter (0=4, 1=8, 2=16)
+     * dimension 2: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * dst/stride are aligned by 8
+     */
+    void (*loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride,
+                                int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * The width of filter is assumed to be 16; dst/stride are aligned by 16
+     */
+    void (*loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride,
+                              int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1/2: width of filter (0=4, 1=8) for each filter half
+     * dimension 3: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * dst/stride are aligned by operation size
+     * this basically calls loop_filter[d1][d3][0](), followed by
+     * loop_filter[d2][d3][0]() on the next 8 pixels
+     * mb_lim/lim/hev_thr contain two values in the lowest two bytes of the
+     * integer.
+     */
+    // FIXME perhaps a mix4 that operates on 32px (for AVX2)
+    void (*loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride,
+                                      int mb_lim, int lim, int hev_thr);
+
+    /*
+     * dimension 1: hsize (0: 64, 1: 32, 2: 16, 3: 8, 4: 4)
+     * dimension 2: filter type (0: smooth, 1: regular, 2: sharp, 3: bilin)
+     * dimension 3: averaging type (0: put, 1: avg)
+     * dimension 4: x subpel interpolation (0: none, 1: 8tap/bilin)
+     * dimension 5: y subpel interpolation (0: none, 1: 8tap/bilin)
+     *
+     * dst/stride are aligned by hsize
+     */
+    vp9_mc_func mc[5][4][2][2][2];
+} VP9DSPContext;
+
+void ff_vp9dsp_init(VP9DSPContext *dsp);
+
+void ff_vp9dsp_init_x86(VP9DSPContext *dsp);
+
+#endif /* AVCODEC_VP9DSP_H */
diff --git a/libavcodec/vp9mvs.c b/libavcodec/vp9mvs.c
deleted file mode 100644
index 1f65aaa..0000000
--- a/libavcodec/vp9mvs.c
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * VP9 compatible video decoder
- *
- * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
- * Copyright (C) 2013 Clément Bœsch <u pkh me>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "internal.h"
-#include "vp56.h"
-#include "vp9.h"
-#include "vp9data.h"
-
-static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
-                                      VP9Context *s)
-{
-    dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
-    dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
-}
-
-static void find_ref_mvs(VP9Context *s,
-                         VP56mv *pmv, int ref, int z, int idx, int sb)
-{
-    static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
-        [BS_64x64] = { {  3, -1 }, { -1,  3 }, {  4, -1 }, { -1,  4 },
-                       { -1, -1 }, {  0, -1 }, { -1,  0 }, {  6, -1 } },
-        [BS_64x32] = { {  0, -1 }, { -1,  0 }, {  4, -1 }, { -1,  2 },
-                       { -1, -1 }, {  0, -3 }, { -3,  0 }, {  2, -1 } },
-        [BS_32x64] = { { -1,  0 }, {  0, -1 }, { -1,  4 }, {  2, -1 },
-                       { -1, -1 }, { -3,  0 }, {  0, -3 }, { -1,  2 } },
-        [BS_32x32] = { {  1, -1 }, { -1,  1 }, {  2, -1 }, { -1,  2 },
-                       { -1, -1 }, {  0, -3 }, { -3,  0 }, { -3, -3 } },
-        [BS_32x16] = { {  0, -1 }, { -1,  0 }, {  2, -1 }, { -1, -1 },
-                       { -1,  1 }, {  0, -3 }, { -3,  0 }, { -3, -3 } },
-        [BS_16x32] = { { -1,  0 }, {  0, -1 }, { -1,  2 }, { -1, -1 },
-                       {  1, -1 }, { -3,  0 }, {  0, -3 }, { -3, -3 } },
-        [BS_16x16] = { {  0, -1 }, { -1,  0 }, {  1, -1 }, { -1,  1 },
-                       { -1, -1 }, {  0, -3 }, { -3,  0 }, { -3, -3 } },
-        [BS_16x8]  = { {  0, -1 }, { -1,  0 }, {  1, -1 }, { -1, -1 },
-                       {  0, -2 }, { -2,  0 }, { -2, -1 }, { -1, -2 } },
-        [BS_8x16]  = { { -1,  0 }, {  0, -1 }, { -1,  1 }, { -1, -1 },
-                       { -2,  0 }, {  0, -2 }, { -1, -2 }, { -2, -1 } },
-        [BS_8x8]   = { {  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
-                       { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } },
-        [BS_8x4]   = { {  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
-                       { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } },
-        [BS_4x8]   = { {  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
-                       { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } },
-        [BS_4x4]   = { {  0, -1 }, { -1,  0 }, { -1, -1 }, {  0, -2 },
-                       { -2,  0 }, { -1, -2 }, { -2, -1 }, { -2, -2 } },
-    };
-    VP9Block *const b = &s->b;
-    int row = b->row, col = b->col, row7 = b->row7;
-    const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
-#define INVALID_MV 0x80008000U
-    uint32_t mem = INVALID_MV;
-    int i;
-
-#define RETURN_DIRECT_MV(mv)                    \
-    do {                                        \
-        uint32_t m = AV_RN32A(&mv);             \
-        if (!idx) {                             \
-            AV_WN32A(pmv, m);                   \
-            return;                             \
-        } else if (mem == INVALID_MV) {         \
-            mem = m;                            \
-        } else if (m != mem) {                  \
-            AV_WN32A(pmv, m);                   \
-            return;                             \
-        }                                       \
-    } while (0)
-
-    if (sb >= 0) {
-        if (sb == 2 || sb == 1) {
-            RETURN_DIRECT_MV(b->mv[0][z]);
-        } else if (sb == 3) {
-            RETURN_DIRECT_MV(b->mv[2][z]);
-            RETURN_DIRECT_MV(b->mv[1][z]);
-            RETURN_DIRECT_MV(b->mv[0][z]);
-        }
-
-#define RETURN_MV(mv)                           \
-    do {                                        \
-        if (sb > 0) {                           \
-            VP56mv tmp;                         \
-            uint32_t m;                         \
-            clamp_mv(&tmp, &mv, s);             \
-            m = AV_RN32A(&tmp);                 \
-            if (!idx) {                         \
-                AV_WN32A(pmv, m);               \
-                return;                         \
-            } else if (mem == INVALID_MV) {     \
-                mem = m;                        \
-            } else if (m != mem) {              \
-                AV_WN32A(pmv, m);               \
-                return;                         \
-            }                                   \
-        } else {                                \
-            uint32_t m = AV_RN32A(&mv);         \
-            if (!idx) {                         \
-                clamp_mv(pmv, &mv, s);          \
-                return;                         \
-            } else if (mem == INVALID_MV) {     \
-                mem = m;                        \
-            } else if (m != mem) {              \
-                clamp_mv(pmv, &mv, s);          \
-                return;                         \
-            }                                   \
-        }                                       \
-    } while (0)
-
-        if (row > 0) {
-            VP9MVRefPair *mv = &s->mv[0][(row - 1) * s->sb_cols * 8 + col];
-
-            if (mv->ref[0] == ref)
-                RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
-            else if (mv->ref[1] == ref)
-                RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
-        }
-        if (col > s->tiling.tile_col_start) {
-            VP9MVRefPair *mv = &s->mv[0][row * s->sb_cols * 8 + col - 1];
-
-            if (mv->ref[0] == ref)
-                RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
-            else if (mv->ref[1] == ref)
-                RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
-        }
-        i = 2;
-    } else {
-        i = 0;
-    }
-
-    // previously coded MVs in the neighborhood, using same reference frame
-    for (; i < 8; i++) {
-        int c = p[i][0] + col, r = p[i][1] + row;
-
-        if (c >= s->tiling.tile_col_start && c < s->cols &&
-            r >= 0 && r < s->rows) {
-            VP9MVRefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
-
-            if (mv->ref[0] == ref)
-                RETURN_MV(mv->mv[0]);
-            else if (mv->ref[1] == ref)
-                RETURN_MV(mv->mv[1]);
-        }
-    }
-
-    // MV at this position in previous frame, using same reference frame
-    if (s->use_last_frame_mvs) {
-        VP9MVRefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
-
-        if (mv->ref[0] == ref)
-            RETURN_MV(mv->mv[0]);
-        else if (mv->ref[1] == ref)
-            RETURN_MV(mv->mv[1]);
-    }
-
-#define RETURN_SCALE_MV(mv, scale)              \
-    do {                                        \
-        if (scale) {                            \
-            VP56mv mv_temp = { -mv.x, -mv.y };  \
-            RETURN_MV(mv_temp);                 \
-        } else {                                \
-            RETURN_MV(mv);                      \
-        }                                       \
-    } while (0)
-
-    // previously coded MVs in the neighborhood, using different reference frame
-    for (i = 0; i < 8; i++) {
-        int c = p[i][0] + col, r = p[i][1] + row;
-
-        if (c >= s->tiling.tile_col_start && c < s->cols &&
-            r >= 0 && r < s->rows) {
-            VP9MVRefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
-
-            if (mv->ref[0] != ref && mv->ref[0] >= 0)
-                RETURN_SCALE_MV(mv->mv[0],
-                                s->signbias[mv->ref[0]] != s->signbias[ref]);
-            if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
-                // BUG - libvpx has this condition regardless of whether
-                // we used the first ref MV and pre-scaling
-                AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
-                RETURN_SCALE_MV(mv->mv[1],
-                                s->signbias[mv->ref[1]] != s->signbias[ref]);
-            }
-        }
-    }
-
-    // MV at this position in previous frame, using different reference frame
-    if (s->use_last_frame_mvs) {
-        VP9MVRefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
-
-        if (mv->ref[0] != ref && mv->ref[0] >= 0)
-            RETURN_SCALE_MV(mv->mv[0],
-                            s->signbias[mv->ref[0]] != s->signbias[ref]);
-        if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
-            // BUG - libvpx has this condition regardless of whether
-            // we used the first ref MV and pre-scaling
-            AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
-            RETURN_SCALE_MV(mv->mv[1],
-                            s->signbias[mv->ref[1]] != s->signbias[ref]);
-        }
-    }
-
-    AV_ZERO32(pmv);
-#undef INVALID_MV
-#undef RETURN_MV
-#undef RETURN_SCALE_MV
-}
-
-static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
-{
-    int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
-    int n, c = vp8_rac_get_tree(&s->c, ff_vp9_mv_class_tree,
-                                s->prob.p.mv_comp[idx].classes);
-
-    s->counts.mv_comp[idx].sign[sign]++;
-    s->counts.mv_comp[idx].classes[c]++;
-    if (c) {
-        int m;
-
-        for (n = 0, m = 0; m < c; m++) {
-            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
-            n  |= bit << m;
-            s->counts.mv_comp[idx].bits[m][bit]++;
-        }
-        n <<= 3;
-        bit = vp8_rac_get_tree(&s->c, ff_vp9_mv_fp_tree,
-                               s->prob.p.mv_comp[idx].fp);
-        n  |= bit << 1;
-        s->counts.mv_comp[idx].fp[bit]++;
-        if (hp) {
-            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
-            s->counts.mv_comp[idx].hp[bit]++;
-            n |= bit;
-        } else {
-            n |= 1;
-            // bug in libvpx - we count for bw entropy purposes even if the
-            // bit wasn't coded
-            s->counts.mv_comp[idx].hp[1]++;
-        }
-        n += 8 << c;
-    } else {
-        n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
-        s->counts.mv_comp[idx].class0[n]++;
-        bit = vp8_rac_get_tree(&s->c, ff_vp9_mv_fp_tree,
-                               s->prob.p.mv_comp[idx].class0_fp[n]);
-        s->counts.mv_comp[idx].class0_fp[n][bit]++;
-        n = (n << 3) | (bit << 1);
-        if (hp) {
-            bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
-            s->counts.mv_comp[idx].class0_hp[bit]++;
-            n |= bit;
-        } else {
-            n |= 1;
-            // bug in libvpx - we count for bw entropy purposes even if the
-            // bit wasn't coded
-            s->counts.mv_comp[idx].class0_hp[1]++;
-        }
-    }
-
-    return sign ? -(n + 1) : (n + 1);
-}
-
-void ff_vp9_fill_mv(VP9Context *s, VP56mv *mv, int mode, int sb)
-{
-    VP9Block *const b = &s->b;
-
-    if (mode == ZEROMV) {
-        memset(mv, 0, sizeof(*mv) * 2);
-    } else {
-        int hp;
-
-        // FIXME cache this value and reuse for other subblocks
-        find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
-                     mode == NEWMV ? -1 : sb);
-        // FIXME maybe move this code into find_ref_mvs()
-        if ((mode == NEWMV || sb == -1) &&
-            !(hp = s->highprecisionmvs &&
-              abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
-            if (mv[0].y & 1) {
-                if (mv[0].y < 0)
-                    mv[0].y++;
-                else
-                    mv[0].y--;
-            }
-            if (mv[0].x & 1) {
-                if (mv[0].x < 0)
-                    mv[0].x++;
-                else
-                    mv[0].x--;
-            }
-        }
-        if (mode == NEWMV) {
-            enum MVJoint j = vp8_rac_get_tree(&s->c, ff_vp9_mv_joint_tree,
-                                              s->prob.p.mv_joint);
-
-            s->counts.mv_joint[j]++;
-            if (j >= MV_JOINT_V)
-                mv[0].y += read_mv_component(s, 0, hp);
-            if (j & 1)
-                mv[0].x += read_mv_component(s, 1, hp);
-        }
-
-        if (b->comp) {
-            // FIXME cache this value and reuse for other subblocks
-            find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
-                         mode == NEWMV ? -1 : sb);
-            if ((mode == NEWMV || sb == -1) &&
-                !(hp = s->highprecisionmvs &&
-                  abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
-                if (mv[1].y & 1) {
-                    if (mv[1].y < 0)
-                        mv[1].y++;
-                    else
-                        mv[1].y--;
-                }
-                if (mv[1].x & 1) {
-                    if (mv[1].x < 0)
-                        mv[1].x++;
-                    else
-                        mv[1].x--;
-                }
-            }
-            if (mode == NEWMV) {
-                enum MVJoint j = vp8_rac_get_tree(&s->c, ff_vp9_mv_joint_tree,
-                                                  s->prob.p.mv_joint);
-
-                s->counts.mv_joint[j]++;
-                if (j >= MV_JOINT_V)
-                    mv[1].y += read_mv_component(s, 0, hp);
-                if (j & 1)
-                    mv[1].x += read_mv_component(s, 1, hp);
-            }
-        }
-    }
-}
diff --git a/libavcodec/vp9prob.c b/libavcodec/vp9prob.c
deleted file mode 100644
index b8a7c22..0000000
--- a/libavcodec/vp9prob.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * VP9 compatible video decoder
- *
- * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
- * Copyright (C) 2013 Clément Bœsch <u pkh me>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "vp56.h"
-#include "vp9.h"
-#include "vp9data.h"
-
-static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
-                                        int max_count, int update_factor)
-{
-    unsigned ct = ct0 + ct1, p2, p1;
-
-    if (!ct)
-        return;
-
-    p1 = *p;
-    p2 = ((ct0 << 8) + (ct >> 1)) / ct;
-    p2 = av_clip(p2, 1, 255);
-    ct = FFMIN(ct, max_count);
-    update_factor = FASTDIV(update_factor * ct, max_count);
-
-    // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
-    *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
-}
-
-void ff_vp9_adapt_probs(VP9Context *s)
-{
-    int i, j, k, l, m;
-    ProbContext *p = &s->prob_ctx[s->framectxid].p;
-    int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
-
-    // coefficients
-    for (i = 0; i < 4; i++)
-        for (j = 0; j < 2; j++)
-            for (k = 0; k < 2; k++)
-                for (l = 0; l < 6; l++)
-                    for (m = 0; m < 6; m++) {
-                        uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
-                        unsigned *e = s->counts.eob[i][j][k][l][m];
-                        unsigned *c = s->counts.coef[i][j][k][l][m];
-
-                        if (l == 0 && m >= 3) // dc only has 3 pt
-                            break;
-
-                        adapt_prob(&pp[0], e[0], e[1], 24, uf);
-                        adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
-                        adapt_prob(&pp[2], c[1], c[2], 24, uf);
-                    }
-
-    if (s->keyframe || s->intraonly) {
-        memcpy(p->skip,  s->prob.p.skip,  sizeof(p->skip));
-        memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
-        memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
-        memcpy(p->tx8p,  s->prob.p.tx8p,  sizeof(p->tx8p));
-        return;
-    }
-
-    // skip flag
-    for (i = 0; i < 3; i++)
-        adapt_prob(&p->skip[i], s->counts.skip[i][0],
-                   s->counts.skip[i][1], 20, 128);
-
-    // intra/inter flag
-    for (i = 0; i < 4; i++)
-        adapt_prob(&p->intra[i], s->counts.intra[i][0],
-                   s->counts.intra[i][1], 20, 128);
-
-    // comppred flag
-    if (s->comppredmode == PRED_SWITCHABLE) {
-        for (i = 0; i < 5; i++)
-            adapt_prob(&p->comp[i], s->counts.comp[i][0],
-                       s->counts.comp[i][1], 20, 128);
-    }
-
-    // reference frames
-    if (s->comppredmode != PRED_SINGLEREF) {
-        for (i = 0; i < 5; i++)
-            adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
-                       s->counts.comp_ref[i][1], 20, 128);
-    }
-
-    if (s->comppredmode != PRED_COMPREF) {
-        for (i = 0; i < 5; i++) {
-            uint8_t *pp = p->single_ref[i];
-            unsigned (*c)[2] = s->counts.single_ref[i];
-
-            adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
-            adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
-        }
-    }
-
-    // block partitioning
-    for (i = 0; i < 4; i++)
-        for (j = 0; j < 4; j++) {
-            uint8_t *pp = p->partition[i][j];
-            unsigned *c = s->counts.partition[i][j];
-
-            adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
-            adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
-            adapt_prob(&pp[2], c[2], c[3], 20, 128);
-        }
-
-    // tx size
-    if (s->txfmmode == TX_SWITCHABLE) {
-        for (i = 0; i < 2; i++) {
-            unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
-
-            adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0],
-                       s->counts.tx8p[i][1], 20, 128);
-            adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
-            adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
-            adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
-            adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
-            adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
-        }
-    }
-
-    // interpolation filter
-    if (s->filtermode == FILTER_SWITCHABLE) {
-        for (i = 0; i < 4; i++) {
-            uint8_t *pp = p->filter[i];
-            unsigned *c = s->counts.filter[i];
-
-            adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
-            adapt_prob(&pp[1], c[1], c[2], 20, 128);
-        }
-    }
-
-    // inter modes
-    for (i = 0; i < 7; i++) {
-        uint8_t *pp = p->mv_mode[i];
-        unsigned *c = s->counts.mv_mode[i];
-
-        adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
-        adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
-        adapt_prob(&pp[2], c[1], c[3], 20, 128);
-    }
-
-    // mv joints
-    {
-        uint8_t *pp = p->mv_joint;
-        unsigned *c = s->counts.mv_joint;
-
-        adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
-        adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
-        adapt_prob(&pp[2], c[2], c[3], 20, 128);
-    }
-
-    // mv components
-    for (i = 0; i < 2; i++) {
-        uint8_t *pp;
-        unsigned *c, (*c2)[2], sum;
-
-        adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
-                   s->counts.mv_comp[i].sign[1], 20, 128);
-
-        pp  = p->mv_comp[i].classes;
-        c   = s->counts.mv_comp[i].classes;
-        sum = c[1] + c[2] + c[3] + c[4] + c[5] +
-              c[6] + c[7] + c[8] + c[9] + c[10];
-        adapt_prob(&pp[0], c[0], sum, 20, 128);
-        sum -= c[1];
-        adapt_prob(&pp[1], c[1], sum, 20, 128);
-        sum -= c[2] + c[3];
-        adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
-        adapt_prob(&pp[3], c[2], c[3], 20, 128);
-        sum -= c[4] + c[5];
-        adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
-        adapt_prob(&pp[5], c[4], c[5], 20, 128);
-        sum -= c[6];
-        adapt_prob(&pp[6], c[6], sum, 20, 128);
-        adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
-        adapt_prob(&pp[8], c[7], c[8], 20, 128);
-        adapt_prob(&pp[9], c[9], c[10], 20, 128);
-
-        adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
-                   s->counts.mv_comp[i].class0[1], 20, 128);
-        pp = p->mv_comp[i].bits;
-        c2 = s->counts.mv_comp[i].bits;
-        for (j = 0; j < 10; j++)
-            adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
-
-        for (j = 0; j < 2; j++) {
-            pp = p->mv_comp[i].class0_fp[j];
-            c  = s->counts.mv_comp[i].class0_fp[j];
-            adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
-            adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
-            adapt_prob(&pp[2], c[2], c[3], 20, 128);
-        }
-        pp = p->mv_comp[i].fp;
-        c  = s->counts.mv_comp[i].fp;
-        adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
-        adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
-        adapt_prob(&pp[2], c[2], c[3], 20, 128);
-
-        if (s->highprecisionmvs) {
-            adapt_prob(&p->mv_comp[i].class0_hp,
-                       s->counts.mv_comp[i].class0_hp[0],
-                       s->counts.mv_comp[i].class0_hp[1], 20, 128);
-            adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
-                       s->counts.mv_comp[i].hp[1], 20, 128);
-        }
-    }
-
-    // y intra modes
-    for (i = 0; i < 4; i++) {
-        uint8_t *pp = p->y_mode[i];
-        unsigned *c = s->counts.y_mode[i], sum, s2;
-
-        sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
-        adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
-        sum -= c[TM_VP8_PRED];
-        adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
-        sum -= c[VERT_PRED];
-        adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
-        s2   = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
-        sum -= s2;
-        adapt_prob(&pp[3], s2, sum, 20, 128);
-        s2 -= c[HOR_PRED];
-        adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
-        adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED],
-                   20, 128);
-        sum -= c[DIAG_DOWN_LEFT_PRED];
-        adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
-        sum -= c[VERT_LEFT_PRED];
-        adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
-        adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
-    }
-
-    // uv intra modes
-    for (i = 0; i < 10; i++) {
-        uint8_t *pp = p->uv_mode[i];
-        unsigned *c = s->counts.uv_mode[i], sum, s2;
-
-        sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
-        adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
-        sum -= c[TM_VP8_PRED];
-        adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
-        sum -= c[VERT_PRED];
-        adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
-        s2   = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
-        sum -= s2;
-        adapt_prob(&pp[3], s2, sum, 20, 128);
-        s2 -= c[HOR_PRED];
-        adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
-        adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED],
-                   20, 128);
-        sum -= c[DIAG_DOWN_LEFT_PRED];
-        adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
-        sum -= c[VERT_LEFT_PRED];
-        adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
-        adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
-    }
-}
diff --git a/libavcodec/vqavideo.c b/libavcodec/vqavideo.c
index 86f2fcf..c34849d5 100644
--- a/libavcodec/vqavideo.c
+++ b/libavcodec/vqavideo.c
@@ -2,20 +2,20 @@
  * Westwood Studios VQA Video Decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -128,7 +128,7 @@ static av_cold int vqa_decode_init(AVCodecContext *avctx)
 
     /* make sure the extradata made it */
     if (s->avctx->extradata_size != VQA_HEADER_SIZE) {
-        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: expected extradata size of %d\n", VQA_HEADER_SIZE);
+        av_log(s->avctx, AV_LOG_ERROR, "expected extradata size of %d\n", VQA_HEADER_SIZE);
         return AVERROR(EINVAL);
     }
 
@@ -162,8 +162,7 @@ static av_cold int vqa_decode_init(AVCodecContext *avctx)
         return AVERROR_INVALIDDATA;
     }
 
-    if (s->width  & (s->vector_width  - 1) ||
-        s->height & (s->vector_height - 1)) {
+    if (s->width % s->vector_width || s->height % s->vector_height) {
         av_log(avctx, AV_LOG_ERROR, "Image size not multiple of block size\n");
         return AVERROR_INVALIDDATA;
     }
@@ -180,7 +179,7 @@ static av_cold int vqa_decode_init(AVCodecContext *avctx)
     /* allocate decode buffer */
     s->decode_buffer_size = (s->width / s->vector_width) *
         (s->height / s->vector_height) * 2;
-    s->decode_buffer = av_malloc(s->decode_buffer_size);
+    s->decode_buffer = av_mallocz(s->decode_buffer_size);
     if (!s->decode_buffer)
         goto fail;
 
@@ -208,22 +207,22 @@ fail:
 
 #define CHECK_COUNT() \
     if (dest_index + count > dest_size) { \
-        av_log(NULL, AV_LOG_ERROR, "  VQA video: decode_format80 problem: next op would overflow dest_index\n"); \
-        av_log(NULL, AV_LOG_ERROR, "  VQA video: current dest_index = %d, count = %d, dest_size = %d\n", \
+        av_log(s->avctx, AV_LOG_ERROR, "decode_format80 problem: next op would overflow dest_index\n"); \
+        av_log(s->avctx, AV_LOG_ERROR, "current dest_index = %d, count = %d, dest_size = %d\n", \
             dest_index, count, dest_size); \
         return AVERROR_INVALIDDATA; \
     }
 
 #define CHECK_COPY(idx) \
     if (idx < 0 || idx + count > dest_size) { \
-        av_log(NULL, AV_LOG_ERROR, "  VQA video: decode_format80 problem: next op would overflow dest_index\n"); \
-        av_log(NULL, AV_LOG_ERROR, "  VQA video: current src_pos = %d, count = %d, dest_size = %d\n", \
+        av_log(s->avctx, AV_LOG_ERROR, "decode_format80 problem: next op would overflow dest_index\n"); \
+        av_log(s->avctx, AV_LOG_ERROR, "current src_pos = %d, count = %d, dest_size = %d\n", \
             src_pos, count, dest_size); \
         return AVERROR_INVALIDDATA; \
     }
 
 
-static int decode_format80(GetByteContext *gb, int src_size,
+static int decode_format80(VqaContext *s, int src_size,
     unsigned char *dest, int dest_size, int check_size) {
 
     int dest_index = 0;
@@ -232,26 +231,26 @@ static int decode_format80(GetByteContext *gb, int src_size,
     unsigned char color;
     int i;
 
-    start = bytestream2_tell(gb);
-    while (bytestream2_tell(gb) - start < src_size) {
-        opcode = bytestream2_get_byte(gb);
-        av_dlog(NULL, "      opcode %02X: ", opcode);
+    start = bytestream2_tell(&s->gb);
+    while (bytestream2_tell(&s->gb) - start < src_size) {
+        opcode = bytestream2_get_byte(&s->gb);
+        av_dlog(s->avctx, "opcode %02X: ", opcode);
 
         /* 0x80 means that frame is finished */
         if (opcode == 0x80)
-            return 0;
+            break;
 
         if (dest_index >= dest_size) {
-            av_log(NULL, AV_LOG_ERROR, "  VQA video: decode_format80 problem: dest_index (%d) exceeded dest_size (%d)\n",
+            av_log(s->avctx, AV_LOG_ERROR, "decode_format80 problem: dest_index (%d) exceeded dest_size (%d)\n",
                 dest_index, dest_size);
             return AVERROR_INVALIDDATA;
         }
 
         if (opcode == 0xFF) {
 
-            count   = bytestream2_get_le16(gb);
-            src_pos = bytestream2_get_le16(gb);
-            av_dlog(NULL, "(1) copy %X bytes from absolute pos %X\n", count, src_pos);
+            count   = bytestream2_get_le16(&s->gb);
+            src_pos = bytestream2_get_le16(&s->gb);
+            av_dlog(s->avctx, "(1) copy %X bytes from absolute pos %X\n", count, src_pos);
             CHECK_COUNT();
             CHECK_COPY(src_pos);
             for (i = 0; i < count; i++)
@@ -260,9 +259,9 @@ static int decode_format80(GetByteContext *gb, int src_size,
 
         } else if (opcode == 0xFE) {
 
-            count = bytestream2_get_le16(gb);
-            color = bytestream2_get_byte(gb);
-            av_dlog(NULL, "(2) set %X bytes to %02X\n", count, color);
+            count = bytestream2_get_le16(&s->gb);
+            color = bytestream2_get_byte(&s->gb);
+            av_dlog(s->avctx, "(2) set %X bytes to %02X\n", count, color);
             CHECK_COUNT();
             memset(&dest[dest_index], color, count);
             dest_index += count;
@@ -270,8 +269,8 @@ static int decode_format80(GetByteContext *gb, int src_size,
         } else if ((opcode & 0xC0) == 0xC0) {
 
             count = (opcode & 0x3F) + 3;
-            src_pos = bytestream2_get_le16(gb);
-            av_dlog(NULL, "(3) copy %X bytes from absolute pos %X\n", count, src_pos);
+            src_pos = bytestream2_get_le16(&s->gb);
+            av_dlog(s->avctx, "(3) copy %X bytes from absolute pos %X\n", count, src_pos);
             CHECK_COUNT();
             CHECK_COPY(src_pos);
             for (i = 0; i < count; i++)
@@ -281,16 +280,16 @@ static int decode_format80(GetByteContext *gb, int src_size,
         } else if (opcode > 0x80) {
 
             count = opcode & 0x3F;
-            av_dlog(NULL, "(4) copy %X bytes from source to dest\n", count);
+            av_dlog(s->avctx, "(4) copy %X bytes from source to dest\n", count);
             CHECK_COUNT();
-            bytestream2_get_buffer(gb, &dest[dest_index], count);
+            bytestream2_get_buffer(&s->gb, &dest[dest_index], count);
             dest_index += count;
 
         } else {
 
             count = ((opcode & 0x70) >> 4) + 3;
-            src_pos = bytestream2_get_byte(gb) | ((opcode & 0x0F) << 8);
-            av_dlog(NULL, "(5) copy %X bytes from relpos %X\n", count, src_pos);
+            src_pos = bytestream2_get_byte(&s->gb) | ((opcode & 0x0F) << 8);
+            av_dlog(s->avctx, "(5) copy %X bytes from relpos %X\n", count, src_pos);
             CHECK_COUNT();
             CHECK_COPY(dest_index - src_pos);
             for (i = 0; i < count; i++)
@@ -304,9 +303,11 @@ static int decode_format80(GetByteContext *gb, int src_size,
      * codebook entry; it is not important for compressed codebooks because
      * not every entry needs to be filled */
     if (check_size)
-        if (dest_index < dest_size)
-            av_log(NULL, AV_LOG_ERROR, "  VQA video: decode_format80 problem: decode finished with dest_index (%d) < dest_size (%d)\n",
+        if (dest_index < dest_size) {
+            av_log(s->avctx, AV_LOG_ERROR, "decode_format80 problem: decode finished with dest_index (%d) < dest_size (%d)\n",
                 dest_index, dest_size);
+            memset(dest + dest_index, 0, dest_size - dest_index);
+        }
 
     return 0; // let's display what we decoded anyway
 }
@@ -377,7 +378,7 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
             break;
 
         default:
-            av_log(s->avctx, AV_LOG_ERROR, "  VQA video: Found unknown chunk type: %c%c%c%c (%08X)\n",
+            av_log(s->avctx, AV_LOG_ERROR, "Found unknown chunk type: %c%c%c%c (%08X)\n",
             (chunk_type >> 24) & 0xFF,
             (chunk_type >> 16) & 0xFF,
             (chunk_type >>  8) & 0xFF,
@@ -394,7 +395,7 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
     if ((cpl0_chunk != -1) && (cplz_chunk != -1)) {
 
         /* a chunk should not have both chunk types */
-        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: found both CPL0 and CPLZ chunks\n");
+        av_log(s->avctx, AV_LOG_ERROR, "problem: found both CPL0 and CPLZ chunks\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -412,7 +413,7 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
         chunk_size = bytestream2_get_be32(&s->gb);
         /* sanity check the palette size */
         if (chunk_size / 3 > 256 || chunk_size > bytestream2_get_bytes_left(&s->gb)) {
-            av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: found a palette chunk with %d colors\n",
+            av_log(s->avctx, AV_LOG_ERROR, "problem: found a palette chunk with %d colors\n",
                 chunk_size / 3);
             return AVERROR_INVALIDDATA;
         }
@@ -421,7 +422,8 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
             r = bytestream2_get_byteu(&s->gb) * 4;
             g = bytestream2_get_byteu(&s->gb) * 4;
             b = bytestream2_get_byteu(&s->gb) * 4;
-            s->palette[i] = (r << 16) | (g << 8) | (b);
+            s->palette[i] = 0xFFU << 24 | r << 16 | g << 8 | b;
+            s->palette[i] |= s->palette[i] >> 6 & 0x30303;
         }
     }
 
@@ -429,7 +431,7 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
     if ((cbf0_chunk != -1) && (cbfz_chunk != -1)) {
 
         /* a chunk should not have both chunk types */
-        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: found both CBF0 and CBFZ chunks\n");
+        av_log(s->avctx, AV_LOG_ERROR, "problem: found both CBF0 and CBFZ chunks\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -438,7 +440,7 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
 
         bytestream2_seek(&s->gb, cbfz_chunk, SEEK_SET);
         chunk_size = bytestream2_get_be32(&s->gb);
-        if ((res = decode_format80(&s->gb, chunk_size, s->codebook,
+        if ((res = decode_format80(s, chunk_size, s->codebook,
                                    s->codebook_size, 0)) < 0)
             return res;
     }
@@ -450,7 +452,7 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
         chunk_size = bytestream2_get_be32(&s->gb);
         /* sanity check the full codebook size */
         if (chunk_size > MAX_CODEBOOK_SIZE) {
-            av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: CBF0 chunk too large (0x%X bytes)\n",
+            av_log(s->avctx, AV_LOG_ERROR, "problem: CBF0 chunk too large (0x%X bytes)\n",
                 chunk_size);
             return AVERROR_INVALIDDATA;
         }
@@ -462,13 +464,13 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
     if (vptz_chunk == -1) {
 
         /* something is wrong if there is no VPTZ chunk */
-        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: no VPTZ chunk found\n");
+        av_log(s->avctx, AV_LOG_ERROR, "problem: no VPTZ chunk found\n");
         return AVERROR_INVALIDDATA;
     }
 
     bytestream2_seek(&s->gb, vptz_chunk, SEEK_SET);
     chunk_size = bytestream2_get_be32(&s->gb);
-    if ((res = decode_format80(&s->gb, chunk_size,
+    if ((res = decode_format80(s, chunk_size,
                                s->decode_buffer, s->decode_buffer_size, 1)) < 0)
         return res;
 
@@ -531,7 +533,7 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
     /* handle partial codebook */
     if ((cbp0_chunk != -1) && (cbpz_chunk != -1)) {
         /* a chunk should not have both chunk types */
-        av_log(s->avctx, AV_LOG_ERROR, "  VQA video: problem: found both CBP0 and CBPZ chunks\n");
+        av_log(s->avctx, AV_LOG_ERROR, "problem: found both CBP0 and CBPZ chunks\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -552,7 +554,7 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
         s->next_codebook_buffer_index += chunk_size;
 
         s->partial_countdown--;
-        if (s->partial_countdown == 0) {
+        if (s->partial_countdown <= 0) {
 
             /* time to replace codebook */
             memcpy(s->codebook, s->next_codebook_buffer,
@@ -581,12 +583,10 @@ static int vqa_decode_chunk(VqaContext *s, AVFrame *frame)
         s->next_codebook_buffer_index += chunk_size;
 
         s->partial_countdown--;
-        if (s->partial_countdown == 0) {
-            GetByteContext gb;
-
-            bytestream2_init(&gb, s->next_codebook_buffer, s->next_codebook_buffer_index);
+        if (s->partial_countdown <= 0) {
+            bytestream2_init(&s->gb, s->next_codebook_buffer, s->next_codebook_buffer_index);
             /* decompress codebook */
-            if ((res = decode_format80(&gb, s->next_codebook_buffer_index,
+            if ((res = decode_format80(s, s->next_codebook_buffer_index,
                                        s->codebook, s->codebook_size, 0)) < 0)
                 return res;
 
@@ -607,10 +607,8 @@ static int vqa_decode_frame(AVCodecContext *avctx,
     AVFrame *frame = data;
     int res;
 
-    if ((res = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(s->avctx, AV_LOG_ERROR, "  VQA Video: get_buffer() failed\n");
+    if ((res = ff_get_buffer(avctx, frame, 0)) < 0)
         return res;
-    }
 
     bytestream2_init(&s->gb, avpkt->data, avpkt->size);
     if ((res = vqa_decode_chunk(s, frame)) < 0)
diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c
index cbc5b04..7c60f78 100644
--- a/libavcodec/wavpack.c
+++ b/libavcodec/wavpack.c
@@ -2,20 +2,20 @@
  * WavPack lossless audio decoder
  * Copyright (c) 2006,2011 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,60 +25,16 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "internal.h"
+#include "thread.h"
 #include "unary.h"
 #include "bytestream.h"
+#include "wavpack.h"
 
 /**
  * @file
  * WavPack lossless audio decoder
  */
 
-#define WV_HEADER_SIZE    32
-
-#define WV_MONO           0x00000004
-#define WV_JOINT_STEREO   0x00000010
-#define WV_FALSE_STEREO   0x40000000
-
-#define WV_HYBRID_MODE    0x00000008
-#define WV_HYBRID_SHAPE   0x00000008
-#define WV_HYBRID_BITRATE 0x00000200
-#define WV_HYBRID_BALANCE 0x00000400
-#define WV_INITIAL_BLOCK  0x00000800
-#define WV_FINAL_BLOCK    0x00001000
-
-#define WV_SINGLE_BLOCK (WV_INITIAL_BLOCK | WV_FINAL_BLOCK)
-
-#define WV_FLT_SHIFT_ONES 0x01
-#define WV_FLT_SHIFT_SAME 0x02
-#define WV_FLT_SHIFT_SENT 0x04
-#define WV_FLT_ZERO_SENT  0x08
-#define WV_FLT_ZERO_SIGN  0x10
-
-enum WP_ID_Flags {
-    WP_IDF_MASK   = 0x3F,
-    WP_IDF_IGNORE = 0x20,
-    WP_IDF_ODD    = 0x40,
-    WP_IDF_LONG   = 0x80
-};
-
-enum WP_ID {
-    WP_ID_DUMMY = 0,
-    WP_ID_ENCINFO,
-    WP_ID_DECTERMS,
-    WP_ID_DECWEIGHTS,
-    WP_ID_DECSAMPLES,
-    WP_ID_ENTROPY,
-    WP_ID_HYBRID,
-    WP_ID_SHAPING,
-    WP_ID_FLOATINFO,
-    WP_ID_INT32INFO,
-    WP_ID_DATA,
-    WP_ID_CORR,
-    WP_ID_EXTRABITS,
-    WP_ID_CHANINFO,
-    WP_ID_SAMPLE_RATE = 0x27,
-};
-
 typedef struct SavedContext {
     int offset;
     int size;
@@ -86,23 +42,6 @@ typedef struct SavedContext {
     uint32_t crc;
 } SavedContext;
 
-#define MAX_TERMS 16
-
-typedef struct Decorr {
-    int delta;
-    int value;
-    int weightA;
-    int weightB;
-    int samplesA[8];
-    int samplesB[8];
-} Decorr;
-
-typedef struct WvChannel {
-    int median[3];
-    int slow_level, error_limit;
-    int bitrate_acc, bitrate_delta;
-} WvChannel;
-
 typedef struct WavpackFrameContext {
     AVCodecContext *avctx;
     int frame_flags;
@@ -144,101 +83,7 @@ typedef struct WavpackContext {
     int ch_offset;
 } WavpackContext;
 
-static const int wv_rates[16] = {
-     6000,  8000,  9600, 11025, 12000, 16000,  22050, 24000,
-    32000, 44100, 48000, 64000, 88200, 96000, 192000,     0
-};
-
-// exponent table copied from WavPack source
-static const uint8_t wp_exp2_table[256] = {
-    0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b,
-    0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16,
-    0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23,
-    0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
-    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d,
-    0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b,
-    0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a,
-    0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-    0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-    0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a,
-    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b,
-    0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad,
-    0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
-    0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4,
-    0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9,
-    0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff
-};
-
-static const uint8_t wp_log2_table [] = {
-    0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15,
-    0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a,
-    0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e,
-    0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51,
-    0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63,
-    0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75,
-    0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,
-    0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,
-    0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
-    0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2,
-    0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0,
-    0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce,
-    0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb,
-    0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7,
-    0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4,
-    0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff
-};
-
-static av_always_inline int wp_exp2(int16_t val)
-{
-    int res, neg = 0;
-
-    if (val < 0) {
-        val = -val;
-        neg = 1;
-    }
-
-    res   = wp_exp2_table[val & 0xFF] | 0x100;
-    val >>= 8;
-    res   = (val > 9) ? (res << (val - 9)) : (res >> (9 - val));
-    return neg ? -res : res;
-}
-
-static av_always_inline int wp_log2(int32_t val)
-{
-    int bits;
-
-    if (!val)
-        return 0;
-    if (val == 1)
-        return 256;
-    val += val >> 9;
-    bits = av_log2(val) + 1;
-    if (bits < 9)
-        return (bits << 8) + wp_log2_table[(val << (9 - bits)) & 0xFF];
-    else
-        return (bits << 8) + wp_log2_table[(val >> (bits - 9)) & 0xFF];
-}
-
-#define LEVEL_DECAY(a)  ((a + 0x80) >> 8)
-
-// macros for manipulating median values
-#define GET_MED(n) ((c->median[n] >> 4) + 1)
-#define DEC_MED(n) c->median[n] -= ((c->median[n] + (128 >> n) - 2) / (128 >> n)) * 2
-#define INC_MED(n) c->median[n] += ((c->median[n] + (128 >> n)    ) / (128 >> n)) * 5
-
-// macros for applying weight
-#define UPDATE_WEIGHT_CLIP(weight, delta, samples, in) \
-    if (samples && in) { \
-        if ((samples ^ in) < 0) { \
-            weight -= delta; \
-            if (weight < -1024) \
-                weight = -1024; \
-        } else { \
-            weight += delta; \
-            if (weight > 1024) \
-                weight = 1024; \
-        } \
-    }
+#define LEVEL_DECAY(a)  (((a) + 0x80) >> 8)
 
 static av_always_inline int get_tail(GetBitContext *gb, int k)
 {
@@ -381,6 +226,10 @@ static int wv_get_value(WavpackFrameContext *ctx, GetBitContext *gb,
         INC_MED(2);
     }
     if (!c->error_limit) {
+        if (add >= 0x2000000U) {
+            av_log(ctx->avctx, AV_LOG_ERROR, "k %d is too large\n", add);
+            goto error;
+        }
         ret = base + get_tail(gb, add);
         if (get_bits_left(gb) <= 0)
             goto error;
@@ -638,6 +487,13 @@ static inline int wv_unpack_stereo(WavpackFrameContext *s, GetBitContext *gb,
     } while (!last && count < s->samples);
 
     wv_reset_saved_context(s);
+
+    if (last && count < s->samples) {
+        int size = av_get_bytes_per_sample(type);
+        memset((uint8_t*)dst_l + count*size, 0, (s->samples-count)*size);
+        memset((uint8_t*)dst_r + count*size, 0, (s->samples-count)*size);
+    }
+
     if ((s->avctx->err_recognition & AV_EF_CRCCHECK) &&
         wv_check_crc(s, crc, crc_extra_bits))
         return AVERROR_INVALIDDATA;
@@ -699,6 +555,12 @@ static inline int wv_unpack_mono(WavpackFrameContext *s, GetBitContext *gb,
     } while (!last && count < s->samples);
 
     wv_reset_saved_context(s);
+
+    if (last && count < s->samples) {
+        int size = av_get_bytes_per_sample(type);
+        memset((uint8_t*)dst + count*size, 0, (s->samples-count)*size);
+    }
+
     if (s->avctx->err_recognition & AV_EF_CRCCHECK) {
         int ret = wv_check_crc(s, crc, crc_extra_bits);
         if (ret < 0 && s->avctx->err_recognition & AV_EF_EXPLODE)
@@ -723,6 +585,13 @@ static av_cold int wv_alloc_frame_context(WavpackContext *c)
     return 0;
 }
 
+static int init_thread_copy(AVCodecContext *avctx)
+{
+    WavpackContext *s = avctx->priv_data;
+    s->avctx = avctx;
+    return 0;
+}
+
 static av_cold int wavpack_decode_init(AVCodecContext *avctx)
 {
     WavpackContext *s = avctx->priv_data;
@@ -750,9 +619,10 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
                                 AVFrame *frame, const uint8_t *buf, int buf_size)
 {
     WavpackContext *wc = avctx->priv_data;
+    ThreadFrame tframe = { .f = frame };
     WavpackFrameContext *s;
     GetByteContext gb;
-    void *samples_l, *samples_r;
+    void *samples_l = NULL, *samples_r = NULL;
     int ret;
     int got_terms   = 0, got_weights = 0, got_samples = 0,
         got_entropy = 0, got_bs      = 0, got_float   = 0, got_hybrid = 0;
@@ -910,7 +780,7 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
         case WP_ID_ENTROPY:
             if (size != 6 * (s->stereo_in + 1)) {
                 av_log(avctx, AV_LOG_ERROR,
-                       "Entropy vars size should be %i, got %i",
+                       "Entropy vars size should be %i, got %i.\n",
                        6 * (s->stereo_in + 1), size);
                 bytestream2_skip(&gb, ssize);
                 continue;
@@ -990,7 +860,8 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
         case WP_ID_DATA:
             s->sc.offset = bytestream2_tell(&gb);
             s->sc.size   = size * 8;
-            init_get_bits(&s->gb, gb.buffer, size * 8);
+            if ((ret = init_get_bits8(&s->gb, gb.buffer, size)) < 0)
+                return ret;
             s->data_size = size * 8;
             bytestream2_skip(&gb, size);
             got_bs       = 1;
@@ -1004,7 +875,8 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
             }
             s->extra_sc.offset = bytestream2_tell(&gb);
             s->extra_sc.size   = size * 8;
-            init_get_bits(&s->gb_extra_bits, gb.buffer, size * 8);
+            if ((ret = init_get_bits8(&s->gb_extra_bits, gb.buffer, size)) < 0)
+                return ret;
             s->crc_extra_bits  = get_bits_long(&s->gb_extra_bits, 32);
             bytestream2_skip(&gb, size);
             s->got_extra_bits  = 1;
@@ -1027,7 +899,7 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
                 chmask = bytestream2_get_le24(&gb);
                 break;
             case 3:
-                chmask = bytestream2_get_le32(&gb);;
+                chmask = bytestream2_get_le32(&gb);
                 break;
             case 5:
                 bytestream2_skip(&gb, 1);
@@ -1115,11 +987,10 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
         }
 
         /* get output buffer */
-        frame->nb_samples = s->samples;
-        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+        frame->nb_samples = s->samples + 1;
+        if ((ret = ff_thread_get_buffer(avctx, &tframe, 0)) < 0)
             return ret;
-        }
+        frame->nb_samples = s->samples;
     }
 
     if (wc->ch_offset + s->stereo >= avctx->channels) {
@@ -1176,7 +1047,7 @@ static int wavpack_decode_frame(AVCodecContext *avctx, void *data,
     /* determine number of samples */
     s->samples  = AV_RL32(buf + 20);
     frame_flags = AV_RL32(buf + 24);
-    if (s->samples <= 0) {
+    if (s->samples <= 0 || s->samples > WV_MAX_SAMPLES) {
         av_log(avctx, AV_LOG_ERROR, "Invalid number of samples: %d\n",
                s->samples);
         return AVERROR_INVALIDDATA;
@@ -1234,5 +1105,6 @@ AVCodec ff_wavpack_decoder = {
     .close          = wavpack_decode_end,
     .decode         = wavpack_decode_frame,
     .flush          = wavpack_decode_flush,
-    .capabilities   = CODEC_CAP_DR1,
+    .init_thread_copy = ONLY_IF_THREADS_ENABLED(init_thread_copy),
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
 };
diff --git a/libavcodec/wavpack.h b/libavcodec/wavpack.h
new file mode 100644
index 0000000..3a3e8db
--- /dev/null
+++ b/libavcodec/wavpack.h
@@ -0,0 +1,194 @@
+/*
+ * WavPack decoder/encoder common code
+ * Copyright (c) 2006,2011 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_WAVPACK_H
+#define AVCODEC_WAVPACK_H
+
+#include "libavutil/common.h"
+
+#define MAX_TERMS      16
+#define MAX_TERM        8
+
+#define WV_HEADER_SIZE    32
+
+#define WV_MONO           0x00000004
+#define WV_JOINT_STEREO   0x00000010
+#define WV_CROSS_DECORR   0x00000020
+#define WV_FLOAT_DATA     0x00000080
+#define WV_INT32_DATA     0x00000100
+#define WV_FALSE_STEREO   0x40000000
+
+#define WV_HYBRID_MODE    0x00000008
+#define WV_HYBRID_SHAPE   0x00000008
+#define WV_HYBRID_BITRATE 0x00000200
+#define WV_HYBRID_BALANCE 0x00000400
+#define WV_INITIAL_BLOCK  0x00000800
+#define WV_FINAL_BLOCK    0x00001000
+
+#define WV_MONO_DATA    (WV_MONO | WV_FALSE_STEREO)
+
+#define WV_SINGLE_BLOCK (WV_INITIAL_BLOCK | WV_FINAL_BLOCK)
+
+#define WV_FLT_SHIFT_ONES 0x01
+#define WV_FLT_SHIFT_SAME 0x02
+#define WV_FLT_SHIFT_SENT 0x04
+#define WV_FLT_ZERO_SENT  0x08
+#define WV_FLT_ZERO_SIGN  0x10
+
+#define WV_MAX_SAMPLES    131072
+
+enum WP_ID_Flags {
+    WP_IDF_MASK   = 0x3F,
+    WP_IDF_IGNORE = 0x20,
+    WP_IDF_ODD    = 0x40,
+    WP_IDF_LONG   = 0x80
+};
+
+enum WP_ID {
+    WP_ID_DUMMY = 0,
+    WP_ID_ENCINFO,
+    WP_ID_DECTERMS,
+    WP_ID_DECWEIGHTS,
+    WP_ID_DECSAMPLES,
+    WP_ID_ENTROPY,
+    WP_ID_HYBRID,
+    WP_ID_SHAPING,
+    WP_ID_FLOATINFO,
+    WP_ID_INT32INFO,
+    WP_ID_DATA,
+    WP_ID_CORR,
+    WP_ID_EXTRABITS,
+    WP_ID_CHANINFO,
+    WP_ID_SAMPLE_RATE = 0x27,
+};
+
+typedef struct Decorr {
+    int delta;
+    int value;
+    int weightA;
+    int weightB;
+    int samplesA[MAX_TERM];
+    int samplesB[MAX_TERM];
+    int sumA;
+    int sumB;
+} Decorr;
+
+typedef struct WvChannel {
+    int median[3];
+    int slow_level, error_limit;
+    int bitrate_acc, bitrate_delta;
+} WvChannel;
+
+// macros for manipulating median values
+#define GET_MED(n) ((c->median[n] >> 4) + 1)
+#define DEC_MED(n) c->median[n] -= ((c->median[n] + (128 >> (n)) - 2) / (128 >> (n))) * 2
+#define INC_MED(n) c->median[n] += ((c->median[n] + (128 >> (n))    ) / (128 >> (n))) * 5
+
+// macros for applying weight
+#define UPDATE_WEIGHT_CLIP(weight, delta, samples, in) \
+    if (samples && in) { \
+        if ((samples ^ in) < 0) { \
+            weight -= delta; \
+            if (weight < -1024) \
+                weight = -1024; \
+        } else { \
+            weight += delta; \
+            if (weight > 1024) \
+                weight = 1024; \
+        } \
+    }
+
+static const int wv_rates[16] = {
+     6000,  8000,  9600, 11025, 12000, 16000,  22050, 24000,
+    32000, 44100, 48000, 64000, 88200, 96000, 192000,     0
+};
+
+// exponent table copied from WavPack source
+static const uint8_t wp_exp2_table[256] = {
+    0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b,
+    0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16,
+    0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23,
+    0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d,
+    0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b,
+    0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a,
+    0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+    0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a,
+    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad,
+    0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+    0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4,
+    0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9,
+    0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff
+};
+
+static const uint8_t wp_log2_table [] = {
+    0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15,
+    0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a,
+    0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e,
+    0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51,
+    0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63,
+    0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75,
+    0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,
+    0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,
+    0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+    0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2,
+    0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0,
+    0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce,
+    0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7,
+    0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4,
+    0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff
+};
+
+static av_always_inline int wp_exp2(int16_t val)
+{
+    int res, neg = 0;
+
+    if (val < 0) {
+        val = -val;
+        neg = 1;
+    }
+
+    res   = wp_exp2_table[val & 0xFF] | 0x100;
+    val >>= 8;
+    res   = (val > 9) ? (res << (val - 9)) : (res >> (9 - val));
+    return neg ? -res : res;
+}
+
+static av_always_inline int wp_log2(int32_t val)
+{
+    int bits;
+
+    if (!val)
+        return 0;
+    if (val == 1)
+        return 256;
+    val += val >> 9;
+    bits = av_log2(val) + 1;
+    if (bits < 9)
+        return (bits << 8) + wp_log2_table[(val << (9 - bits)) & 0xFF];
+    else
+        return (bits << 8) + wp_log2_table[(val >> (bits - 9)) & 0xFF];
+}
+
+#endif /* AVCODEC_WAVPACK_H */
diff --git a/libavcodec/wavpackenc.c b/libavcodec/wavpackenc.c
new file mode 100644
index 0000000..5b8973c
--- /dev/null
+++ b/libavcodec/wavpackenc.c
@@ -0,0 +1,2989 @@
+/*
+ * WavPack lossless audio encoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define BITSTREAM_WRITER_LE
+
+#include "libavutil/intreadwrite.h"
+#include "libavutil/opt.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "put_bits.h"
+#include "bytestream.h"
+#include "wavpackenc.h"
+#include "wavpack.h"
+
+#define UPDATE_WEIGHT(weight, delta, source, result) \
+    if ((source) && (result)) { \
+        int32_t s = (int32_t) ((source) ^ (result)) >> 31; \
+        weight = ((delta) ^ s) + ((weight) - s); \
+    }
+
+#define APPLY_WEIGHT_F(weight, sample) ((((((sample) & 0xffff) * (weight)) >> 9) + \
+    ((((sample) & ~0xffff) >> 9) * (weight)) + 1) >> 1)
+
+#define APPLY_WEIGHT_I(weight, sample) (((weight) * (sample) + 512) >> 10)
+
+#define APPLY_WEIGHT(weight, sample) ((sample) != (short) (sample) ? \
+    APPLY_WEIGHT_F(weight, sample) : APPLY_WEIGHT_I (weight, sample))
+
+#define CLEAR(destin) memset(&destin, 0, sizeof(destin));
+
+#define SHIFT_LSB       13
+#define SHIFT_MASK      (0x1FU << SHIFT_LSB)
+
+#define MAG_LSB         18
+#define MAG_MASK        (0x1FU << MAG_LSB)
+
+#define SRATE_LSB       23
+#define SRATE_MASK      (0xFU << SRATE_LSB)
+
+#define EXTRA_TRY_DELTAS     1
+#define EXTRA_ADJUST_DELTAS  2
+#define EXTRA_SORT_FIRST     4
+#define EXTRA_BRANCHES       8
+#define EXTRA_SORT_LAST     16
+
+typedef struct WavPackExtraInfo {
+    struct Decorr dps[MAX_TERMS];
+    int nterms, log_limit, gt16bit;
+    uint32_t best_bits;
+} WavPackExtraInfo;
+
+typedef struct WavPackWords {
+    int pend_data, holding_one, zeros_acc;
+    int holding_zero, pend_count;
+    WvChannel c[2];
+} WavPackWords;
+
+typedef struct WavPackEncodeContext {
+    AVClass *class;
+    AVCodecContext *avctx;
+    PutBitContext pb;
+    int block_samples;
+    int buffer_size;
+    int sample_index;
+    int stereo, stereo_in;
+    int ch_offset;
+
+    int32_t *samples[2];
+    int samples_size[2];
+
+    int32_t *sampleptrs[MAX_TERMS+2][2];
+    int sampleptrs_size[MAX_TERMS+2][2];
+
+    int32_t *temp_buffer[2][2];
+    int temp_buffer_size[2][2];
+
+    int32_t *best_buffer[2];
+    int best_buffer_size[2];
+
+    int32_t *js_left, *js_right;
+    int js_left_size, js_right_size;
+
+    int32_t *orig_l, *orig_r;
+    int orig_l_size, orig_r_size;
+
+    unsigned extra_flags;
+    int optimize_mono;
+    int decorr_filter;
+    int joint;
+    int num_branches;
+
+    uint32_t flags;
+    uint32_t crc_x;
+    WavPackWords w;
+
+    uint8_t int32_sent_bits, int32_zeros, int32_ones, int32_dups;
+    uint8_t float_flags, float_shift, float_max_exp, max_exp;
+    int32_t shifted_ones, shifted_zeros, shifted_both;
+    int32_t false_zeros, neg_zeros, ordata;
+
+    int num_terms, shift, joint_stereo, false_stereo;
+    int num_decorrs, num_passes, best_decorr, mask_decorr;
+    struct Decorr decorr_passes[MAX_TERMS];
+    const WavPackDecorrSpec *decorr_specs;
+    float delta_decay;
+} WavPackEncodeContext;
+
+static av_cold int wavpack_encode_init(AVCodecContext *avctx)
+{
+    WavPackEncodeContext *s = avctx->priv_data;
+
+    s->avctx = avctx;
+
+    if (!avctx->frame_size) {
+        int block_samples;
+        if (!(avctx->sample_rate & 1))
+            block_samples = avctx->sample_rate / 2;
+        else
+            block_samples = avctx->sample_rate;
+
+        while (block_samples * avctx->channels > 150000)
+            block_samples /= 2;
+
+        while (block_samples * avctx->channels < 40000)
+            block_samples *= 2;
+        avctx->frame_size = block_samples;
+    } else if (avctx->frame_size && (avctx->frame_size < 128 ||
+                              avctx->frame_size > WV_MAX_SAMPLES)) {
+        av_log(avctx, AV_LOG_ERROR, "invalid block size: %d\n", avctx->frame_size);
+        return AVERROR(EINVAL);
+    }
+
+    if (avctx->compression_level != FF_COMPRESSION_DEFAULT) {
+        if (avctx->compression_level >= 3) {
+            s->decorr_filter = 3;
+            s->num_passes = 9;
+            if      (avctx->compression_level >= 8) {
+                s->num_branches = 4;
+                s->extra_flags = EXTRA_TRY_DELTAS|EXTRA_ADJUST_DELTAS|EXTRA_SORT_FIRST|EXTRA_SORT_LAST|EXTRA_BRANCHES;
+            } else if (avctx->compression_level >= 7) {
+                s->num_branches = 3;
+                s->extra_flags = EXTRA_TRY_DELTAS|EXTRA_ADJUST_DELTAS|EXTRA_SORT_FIRST|EXTRA_BRANCHES;
+            } else if (avctx->compression_level >= 6) {
+                s->num_branches = 2;
+                s->extra_flags = EXTRA_TRY_DELTAS|EXTRA_ADJUST_DELTAS|EXTRA_SORT_FIRST|EXTRA_BRANCHES;
+            } else if (avctx->compression_level >= 5) {
+                s->num_branches = 1;
+                s->extra_flags = EXTRA_TRY_DELTAS|EXTRA_ADJUST_DELTAS|EXTRA_SORT_FIRST|EXTRA_BRANCHES;
+            } else if (avctx->compression_level >= 4) {
+                s->num_branches = 1;
+                s->extra_flags = EXTRA_TRY_DELTAS|EXTRA_ADJUST_DELTAS|EXTRA_BRANCHES;
+            }
+        } else if (avctx->compression_level == 2) {
+            s->decorr_filter = 2;
+            s->num_passes = 4;
+        } else if (avctx->compression_level == 1) {
+            s->decorr_filter = 1;
+            s->num_passes = 2;
+        } else if (avctx->compression_level < 1) {
+            s->decorr_filter = 0;
+            s->num_passes = 0;
+        }
+    }
+
+    s->num_decorrs = decorr_filter_sizes[s->decorr_filter];
+    s->decorr_specs = decorr_filters[s->decorr_filter];
+
+    s->delta_decay = 2.0;
+
+    return 0;
+}
+
+static void shift_mono(int32_t *samples, int nb_samples, int shift)
+{
+    int i;
+    for (i = 0; i < nb_samples; i++)
+        samples[i] >>= shift;
+}
+
+static void shift_stereo(int32_t *left, int32_t *right,
+                         int nb_samples, int shift)
+{
+    int i;
+    for (i = 0; i < nb_samples; i++) {
+        left [i] >>= shift;
+        right[i] >>= shift;
+    }
+}
+
+#define FLOAT_SHIFT_ONES 1
+#define FLOAT_SHIFT_SAME 2
+#define FLOAT_SHIFT_SENT 4
+#define FLOAT_ZEROS_SENT 8
+#define FLOAT_NEG_ZEROS  0x10
+#define FLOAT_EXCEPTIONS 0x20
+
+#define get_mantissa(f)     ((f) & 0x7fffff)
+#define get_exponent(f)     (((f) >> 23) & 0xff)
+#define get_sign(f)         (((f) >> 31) & 0x1)
+
+static void process_float(WavPackEncodeContext *s, int32_t *sample)
+{
+    int32_t shift_count, value, f = *sample;
+
+    if (get_exponent(f) == 255) {
+        s->float_flags |= FLOAT_EXCEPTIONS;
+        value = 0x1000000;
+        shift_count = 0;
+    } else if (get_exponent(f)) {
+        shift_count = s->max_exp - get_exponent(f);
+        value = 0x800000 + get_mantissa(f);
+    } else {
+        shift_count = s->max_exp ? s->max_exp - 1 : 0;
+        value = get_mantissa(f);
+    }
+
+    if (shift_count < 25)
+        value >>= shift_count;
+    else
+        value = 0;
+
+    if (!value) {
+        if (get_exponent(f) || get_mantissa(f))
+            s->false_zeros++;
+        else if (get_sign(f))
+            s->neg_zeros++;
+    } else if (shift_count) {
+        int32_t mask = (1 << shift_count) - 1;
+
+        if (!(get_mantissa(f) & mask))
+            s->shifted_zeros++;
+        else if ((get_mantissa(f) & mask) == mask)
+            s->shifted_ones++;
+        else
+            s->shifted_both++;
+    }
+
+    s->ordata |= value;
+    *sample = get_sign(f) ? -value : value;
+}
+
+static int scan_float(WavPackEncodeContext *s,
+                      int32_t *samples_l, int32_t *samples_r,
+                      int nb_samples)
+{
+    uint32_t crc = 0xffffffffu;
+    int i;
+
+    s->shifted_ones = s->shifted_zeros = s->shifted_both = s->ordata = 0;
+    s->float_shift = s->float_flags = 0;
+    s->false_zeros = s->neg_zeros = 0;
+    s->max_exp = 0;
+
+    if (s->flags & WV_MONO_DATA) {
+        for (i = 0; i < nb_samples; i++) {
+            int32_t f = samples_l[i];
+            crc = crc * 27 + get_mantissa(f) * 9 + get_exponent(f) * 3 + get_sign(f);
+
+            if (get_exponent(f) > s->max_exp && get_exponent(f) < 255)
+                s->max_exp = get_exponent(f);
+        }
+    } else {
+        for (i = 0; i < nb_samples; i++) {
+            int32_t f;
+
+            f = samples_l[i];
+            crc = crc * 27 + get_mantissa(f) * 9 + get_exponent(f) * 3 + get_sign(f);
+            if (get_exponent(f) > s->max_exp && get_exponent(f) < 255)
+                s->max_exp = get_exponent(f);
+
+            f = samples_r[i];
+            crc = crc * 27 + get_mantissa(f) * 9 + get_exponent(f) * 3 + get_sign(f);
+
+            if (get_exponent(f) > s->max_exp && get_exponent(f) < 255)
+                s->max_exp = get_exponent(f);
+        }
+    }
+
+    s->crc_x = crc;
+
+    if (s->flags & WV_MONO_DATA) {
+        for (i = 0; i < nb_samples; i++)
+            process_float(s, &samples_l[i]);
+    } else {
+        for (i = 0; i < nb_samples; i++) {
+            process_float(s, &samples_l[i]);
+            process_float(s, &samples_r[i]);
+        }
+    }
+
+    s->float_max_exp = s->max_exp;
+
+    if (s->shifted_both)
+        s->float_flags |= FLOAT_SHIFT_SENT;
+    else if (s->shifted_ones && !s->shifted_zeros)
+        s->float_flags |= FLOAT_SHIFT_ONES;
+    else if (s->shifted_ones && s->shifted_zeros)
+        s->float_flags |= FLOAT_SHIFT_SAME;
+    else if (s->ordata && !(s->ordata & 1)) {
+        do {
+            s->float_shift++;
+            s->ordata >>= 1;
+        } while (!(s->ordata & 1));
+
+        if (s->flags & WV_MONO_DATA)
+            shift_mono(samples_l, nb_samples, s->float_shift);
+        else
+            shift_stereo(samples_l, samples_r, nb_samples, s->float_shift);
+    }
+
+    s->flags &= ~MAG_MASK;
+
+    while (s->ordata) {
+        s->flags += 1 << MAG_LSB;
+        s->ordata >>= 1;
+    }
+
+    if (s->false_zeros || s->neg_zeros)
+        s->float_flags |= FLOAT_ZEROS_SENT;
+
+    if (s->neg_zeros)
+        s->float_flags |= FLOAT_NEG_ZEROS;
+
+    return s->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT |
+                             FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME);
+}
+
+static void scan_int23(WavPackEncodeContext *s,
+                       int32_t *samples_l, int32_t *samples_r,
+                       int nb_samples)
+{
+    uint32_t magdata = 0, ordata = 0, xordata = 0, anddata = ~0;
+    int i, total_shift = 0;
+
+    s->int32_sent_bits = s->int32_zeros = s->int32_ones = s->int32_dups = 0;
+
+    if (s->flags & WV_MONO_DATA) {
+        for (i = 0; i < nb_samples; i++) {
+            int32_t M = samples_l[i];
+
+            magdata |= (M < 0) ? ~M : M;
+            xordata |= M ^ -(M & 1);
+            anddata &= M;
+            ordata  |= M;
+
+            if ((ordata & 1) && !(anddata & 1) && (xordata & 2))
+                return;
+        }
+    } else {
+        for (i = 0; i < nb_samples; i++) {
+            int32_t L = samples_l[i];
+            int32_t R = samples_r[i];
+
+            magdata |= (L < 0) ? ~L : L;
+            magdata |= (R < 0) ? ~R : R;
+            xordata |= L ^ -(L & 1);
+            xordata |= R ^ -(R & 1);
+            anddata &= L & R;
+            ordata  |= L | R;
+
+            if ((ordata & 1) && !(anddata & 1) && (xordata & 2))
+                return;
+        }
+    }
+
+    s->flags &= ~MAG_MASK;
+
+    while (magdata) {
+        s->flags += 1 << MAG_LSB;
+        magdata >>= 1;
+    }
+
+    if (!(s->flags & MAG_MASK))
+        return;
+
+    if (!(ordata & 1)) {
+        do {
+            s->flags -= 1 << MAG_LSB;
+            s->int32_zeros++;
+            total_shift++;
+            ordata >>= 1;
+        } while (!(ordata & 1));
+    } else if (anddata & 1) {
+        do {
+            s->flags -= 1 << MAG_LSB;
+            s->int32_ones++;
+            total_shift++;
+            anddata >>= 1;
+        } while (anddata & 1);
+    } else if (!(xordata & 2)) {
+        do {
+            s->flags -= 1 << MAG_LSB;
+            s->int32_dups++;
+            total_shift++;
+            xordata >>= 1;
+        } while (!(xordata & 2));
+    }
+
+    if (total_shift) {
+        s->flags |= WV_INT32_DATA;
+
+        if (s->flags & WV_MONO_DATA)
+            shift_mono(samples_l, nb_samples, total_shift);
+        else
+            shift_stereo(samples_l, samples_r, nb_samples, total_shift);
+    }
+}
+
+static int scan_int32(WavPackEncodeContext *s,
+                      int32_t *samples_l, int32_t *samples_r,
+                      int nb_samples)
+{
+    uint32_t magdata = 0, ordata = 0, xordata = 0, anddata = ~0;
+    uint32_t crc = 0xffffffffu;
+    int i, total_shift = 0;
+
+    s->int32_sent_bits = s->int32_zeros = s->int32_ones = s->int32_dups = 0;
+
+    if (s->flags & WV_MONO_DATA) {
+        for (i = 0; i < nb_samples; i++) {
+            int32_t M = samples_l[i];
+
+            crc = crc * 9 + (M & 0xffff) * 3 + ((M >> 16) & 0xffff);
+            magdata |= (M < 0) ? ~M : M;
+            xordata |= M ^ -(M & 1);
+            anddata &= M;
+            ordata  |= M;
+        }
+    } else {
+        for (i = 0; i < nb_samples; i++) {
+            int32_t L = samples_l[i];
+            int32_t R = samples_r[i];
+
+            crc = crc * 9 + (L & 0xffff) * 3 + ((L >> 16) & 0xffff);
+            crc = crc * 9 + (R & 0xffff) * 3 + ((R >> 16) & 0xffff);
+            magdata |= (L < 0) ? ~L : L;
+            magdata |= (R < 0) ? ~R : R;
+            xordata |= L ^ -(L & 1);
+            xordata |= R ^ -(R & 1);
+            anddata &= L & R;
+            ordata  |= L | R;
+        }
+    }
+
+    s->crc_x = crc;
+    s->flags &= ~MAG_MASK;
+
+    while (magdata) {
+        s->flags += 1 << MAG_LSB;
+        magdata >>= 1;
+    }
+
+    if (!((s->flags & MAG_MASK) >> MAG_LSB)) {
+        s->flags &= ~WV_INT32_DATA;
+        return 0;
+    }
+
+    if (!(ordata & 1))
+        do {
+            s->flags -= 1 << MAG_LSB;
+            s->int32_zeros++;
+            total_shift++;
+            ordata >>= 1;
+        } while (!(ordata & 1));
+    else if (anddata & 1)
+        do {
+            s->flags -= 1 << MAG_LSB;
+            s->int32_ones++;
+            total_shift++;
+            anddata >>= 1;
+        } while (anddata & 1);
+    else if (!(xordata & 2))
+        do {
+            s->flags -= 1 << MAG_LSB;
+            s->int32_dups++;
+            total_shift++;
+            xordata >>= 1;
+        } while (!(xordata & 2));
+
+    if (((s->flags & MAG_MASK) >> MAG_LSB) > 23) {
+        s->int32_sent_bits = (uint8_t)(((s->flags & MAG_MASK) >> MAG_LSB) - 23);
+        total_shift += s->int32_sent_bits;
+        s->flags &= ~MAG_MASK;
+        s->flags += 23 << MAG_LSB;
+    }
+
+    if (total_shift) {
+        s->flags |= WV_INT32_DATA;
+
+        if (s->flags & WV_MONO_DATA)
+            shift_mono(samples_l, nb_samples, total_shift);
+        else
+            shift_stereo(samples_l, samples_r, nb_samples, total_shift);
+    }
+
+    return s->int32_sent_bits;
+}
+
+static int8_t store_weight(int weight)
+{
+    weight = av_clip(weight, -1024, 1024);
+    if (weight > 0)
+        weight -= (weight + 64) >> 7;
+
+    return (weight + 4) >> 3;
+}
+
+static int restore_weight(int8_t weight)
+{
+    int result;
+
+    if ((result = (int) weight << 3) > 0)
+        result += (result + 64) >> 7;
+
+    return result;
+}
+
+static int log2s(int32_t value)
+{
+    return (value < 0) ? -wp_log2(-value) : wp_log2(value);
+}
+
+static void decorr_mono(int32_t *in_samples, int32_t *out_samples,
+                        int nb_samples, struct Decorr *dpp, int dir)
+{
+    int m = 0, i;
+
+    dpp->sumA = 0;
+
+    if (dir < 0) {
+        out_samples += (nb_samples - 1);
+        in_samples  += (nb_samples - 1);
+    }
+
+    dpp->weightA = restore_weight(store_weight(dpp->weightA));
+
+    for (i = 0; i < MAX_TERM; i++)
+        dpp->samplesA[i] = wp_exp2(log2s(dpp->samplesA[i]));
+
+    if (dpp->value > MAX_TERM) {
+        while (nb_samples--) {
+            int32_t left, sam_A;
+
+            sam_A = ((3 - (dpp->value & 1)) * dpp->samplesA[0] - dpp->samplesA[1]) >> !(dpp->value & 1);
+
+            dpp->samplesA[1] = dpp->samplesA[0];
+            dpp->samplesA[0] = left = in_samples[0];
+
+            left -= APPLY_WEIGHT(dpp->weightA, sam_A);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam_A, left);
+            dpp->sumA += dpp->weightA;
+            out_samples[0] = left;
+            in_samples += dir;
+            out_samples += dir;
+        }
+    } else if (dpp->value > 0) {
+        while (nb_samples--) {
+            int k = (m + dpp->value) & (MAX_TERM - 1);
+            int32_t left, sam_A;
+
+            sam_A = dpp->samplesA[m];
+            dpp->samplesA[k] = left = in_samples[0];
+            m = (m + 1) & (MAX_TERM - 1);
+
+            left -= APPLY_WEIGHT(dpp->weightA, sam_A);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam_A, left);
+            dpp->sumA += dpp->weightA;
+            out_samples[0] = left;
+            in_samples += dir;
+            out_samples += dir;
+        }
+    }
+
+    if (m && dpp->value > 0 && dpp->value <= MAX_TERM) {
+        int32_t temp_A[MAX_TERM];
+
+        memcpy(temp_A, dpp->samplesA, sizeof(dpp->samplesA));
+
+        for (i = 0; i < MAX_TERM; i++) {
+            dpp->samplesA[i] = temp_A[m];
+            m = (m + 1) & (MAX_TERM - 1);
+        }
+    }
+}
+
+static void reverse_mono_decorr(struct Decorr *dpp)
+{
+    if (dpp->value > MAX_TERM) {
+        int32_t sam_A;
+
+        if (dpp->value & 1)
+            sam_A = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+        else
+            sam_A = (3 * dpp->samplesA[0] - dpp->samplesA[1]) >> 1;
+
+        dpp->samplesA[1] = dpp->samplesA[0];
+        dpp->samplesA[0] = sam_A;
+
+        if (dpp->value & 1)
+            sam_A = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+        else
+            sam_A = (3 * dpp->samplesA[0] - dpp->samplesA[1]) >> 1;
+
+        dpp->samplesA[1] = sam_A;
+    } else if (dpp->value > 1) {
+        int i, j, k;
+
+        for (i = 0, j = dpp->value - 1, k = 0; k < dpp->value / 2; i++, j--, k++) {
+            i &= (MAX_TERM - 1);
+            j &= (MAX_TERM - 1);
+            dpp->samplesA[i] ^= dpp->samplesA[j];
+            dpp->samplesA[j] ^= dpp->samplesA[i];
+            dpp->samplesA[i] ^= dpp->samplesA[j];
+        }
+    }
+}
+
+static uint32_t log2sample(uint32_t v, int limit, uint32_t *result)
+{
+    uint32_t dbits;
+
+    if ((v += v >> 9) < (1 << 8)) {
+        dbits = nbits_table[v];
+        *result += (dbits << 8) + wp_log2_table[(v << (9 - dbits)) & 0xff];
+    } else {
+        if (v < (1L << 16))
+            dbits = nbits_table[v >> 8] + 8;
+        else if (v < (1L << 24))
+            dbits = nbits_table[v >> 16] + 16;
+        else
+            dbits = nbits_table[v >> 24] + 24;
+
+        *result += dbits = (dbits << 8) + wp_log2_table[(v >> (dbits - 9)) & 0xff];
+
+        if (limit && dbits >= limit)
+            return 1;
+    }
+
+    return 0;
+}
+
+static uint32_t log2mono(int32_t *samples, int nb_samples, int limit)
+{
+    uint32_t result = 0;
+    while (nb_samples--) {
+        if (log2sample(abs(*samples++), limit, &result))
+            return UINT32_MAX;
+    }
+    return result;
+}
+
+static uint32_t log2stereo(int32_t *samples_l, int32_t *samples_r,
+                           int nb_samples, int limit)
+{
+    uint32_t result = 0;
+    while (nb_samples--) {
+        if (log2sample(abs(*samples_l++), limit, &result) ||
+            log2sample(abs(*samples_r++), limit, &result))
+            return UINT32_MAX;
+    }
+    return result;
+}
+
+static void decorr_mono_buffer(int32_t *samples, int32_t *outsamples,
+                               int nb_samples, struct Decorr *dpp,
+                               int tindex)
+{
+    struct Decorr dp, *dppi = dpp + tindex;
+    int delta = dppi->delta, pre_delta, term = dppi->value;
+
+    if (delta == 7)
+        pre_delta = 7;
+    else if (delta < 2)
+        pre_delta = 3;
+    else
+        pre_delta = delta + 1;
+
+    CLEAR(dp);
+    dp.value = term;
+    dp.delta = pre_delta;
+    decorr_mono(samples, outsamples, FFMIN(2048, nb_samples), &dp, -1);
+    dp.delta = delta;
+
+    if (tindex == 0)
+        reverse_mono_decorr(&dp);
+    else
+        CLEAR(dp.samplesA);
+
+    memcpy(dppi->samplesA, dp.samplesA, sizeof(dp.samplesA));
+    dppi->weightA = dp.weightA;
+
+    if (delta == 0) {
+        dp.delta = 1;
+        decorr_mono(samples, outsamples, nb_samples, &dp, 1);
+        dp.delta = 0;
+        memcpy(dp.samplesA, dppi->samplesA, sizeof(dp.samplesA));
+        dppi->weightA = dp.weightA = dp.sumA / nb_samples;
+    }
+
+    decorr_mono(samples, outsamples, nb_samples, &dp, 1);
+}
+
+static void recurse_mono(WavPackEncodeContext *s, WavPackExtraInfo *info,
+                         int depth, int delta, uint32_t input_bits)
+{
+    int term, branches = s->num_branches - depth;
+    int32_t *samples, *outsamples;
+    uint32_t term_bits[22], bits;
+
+    if (branches < 1 || depth + 1 == info->nterms)
+        branches = 1;
+
+    CLEAR(term_bits);
+    samples = s->sampleptrs[depth][0];
+    outsamples = s->sampleptrs[depth + 1][0];
+
+    for (term = 1; term <= 18; term++) {
+        if (term == 17 && branches == 1 && depth + 1 < info->nterms)
+            continue;
+
+        if (term > 8 && term < 17)
+            continue;
+
+        if (!s->extra_flags && (term > 4 && term < 17))
+            continue;
+
+        info->dps[depth].value = term;
+        info->dps[depth].delta = delta;
+        decorr_mono_buffer(samples, outsamples, s->block_samples, info->dps, depth);
+        bits = log2mono(outsamples, s->block_samples, info->log_limit);
+
+        if (bits < info->best_bits) {
+            info->best_bits = bits;
+            CLEAR(s->decorr_passes);
+            memcpy(s->decorr_passes, info->dps, sizeof(info->dps[0]) * (depth + 1));
+            memcpy(s->sampleptrs[info->nterms + 1][0],
+                   s->sampleptrs[depth + 1][0], s->block_samples * 4);
+        }
+
+        term_bits[term + 3] = bits;
+    }
+
+    while (depth + 1 < info->nterms && branches--) {
+        uint32_t local_best_bits = input_bits;
+        int best_term = 0, i;
+
+        for (i = 0; i < 22; i++)
+            if (term_bits[i] && term_bits[i] < local_best_bits) {
+                local_best_bits = term_bits[i];
+                best_term = i - 3;
+            }
+
+        if (!best_term)
+            break;
+
+        term_bits[best_term + 3] = 0;
+
+        info->dps[depth].value = best_term;
+        info->dps[depth].delta = delta;
+        decorr_mono_buffer(samples, outsamples, s->block_samples, info->dps, depth);
+
+        recurse_mono(s, info, depth + 1, delta, local_best_bits);
+    }
+}
+
+static void sort_mono(WavPackEncodeContext *s, WavPackExtraInfo *info)
+{
+    int reversed = 1;
+    uint32_t bits;
+
+    while (reversed) {
+        int ri, i;
+
+        memcpy(info->dps, s->decorr_passes, sizeof(s->decorr_passes));
+        reversed = 0;
+
+        for (ri = 0; ri < info->nterms && s->decorr_passes[ri].value; ri++) {
+
+            if (ri + 1 >= info->nterms || !s->decorr_passes[ri+1].value)
+                break;
+
+            if (s->decorr_passes[ri].value == s->decorr_passes[ri+1].value) {
+                decorr_mono_buffer(s->sampleptrs[ri][0], s->sampleptrs[ri+1][0],
+                                   s->block_samples, info->dps, ri);
+                continue;
+            }
+
+            info->dps[ri  ] = s->decorr_passes[ri+1];
+            info->dps[ri+1] = s->decorr_passes[ri  ];
+
+            for (i = ri; i < info->nterms && s->decorr_passes[i].value; i++)
+                decorr_mono_buffer(s->sampleptrs[i][0], s->sampleptrs[i+1][0],
+                                   s->block_samples, info->dps, i);
+
+            bits = log2mono(s->sampleptrs[i][0], s->block_samples, info->log_limit);
+            if (bits < info->best_bits) {
+                reversed = 1;
+                info->best_bits = bits;
+                CLEAR(s->decorr_passes);
+                memcpy(s->decorr_passes, info->dps, sizeof(info->dps[0]) * i);
+                memcpy(s->sampleptrs[info->nterms + 1][0], s->sampleptrs[i][0],
+                       s->block_samples * 4);
+            } else {
+                info->dps[ri  ] = s->decorr_passes[ri];
+                info->dps[ri+1] = s->decorr_passes[ri+1];
+                decorr_mono_buffer(s->sampleptrs[ri][0], s->sampleptrs[ri+1][0],
+                                   s->block_samples, info->dps, ri);
+            }
+        }
+    }
+}
+
+static void delta_mono(WavPackEncodeContext *s, WavPackExtraInfo *info)
+{
+    int lower = 0, delta, d;
+    uint32_t bits;
+
+    if (!s->decorr_passes[0].value)
+        return;
+    delta = s->decorr_passes[0].delta;
+
+    for (d = delta - 1; d >= 0; d--) {
+        int i;
+
+        for (i = 0; i < info->nterms && s->decorr_passes[i].value; i++) {
+            info->dps[i].value = s->decorr_passes[i].value;
+            info->dps[i].delta = d;
+            decorr_mono_buffer(s->sampleptrs[i][0], s->sampleptrs[i+1][0],
+                               s->block_samples, info->dps, i);
+        }
+
+        bits = log2mono(s->sampleptrs[i][0], s->block_samples, info->log_limit);
+        if (bits >= info->best_bits)
+            break;
+
+        lower = 1;
+        info->best_bits = bits;
+        CLEAR(s->decorr_passes);
+        memcpy(s->decorr_passes, info->dps, sizeof(info->dps[0]) * i);
+        memcpy(s->sampleptrs[info->nterms + 1][0],  s->sampleptrs[i][0],
+               s->block_samples * 4);
+    }
+
+    for (d = delta + 1; !lower && d <= 7; d++) {
+        int i;
+
+        for (i = 0; i < info->nterms && s->decorr_passes[i].value; i++) {
+            info->dps[i].value = s->decorr_passes[i].value;
+            info->dps[i].delta = d;
+            decorr_mono_buffer(s->sampleptrs[i][0], s->sampleptrs[i+1][0],
+                               s->block_samples, info->dps, i);
+        }
+
+        bits = log2mono(s->sampleptrs[i][0], s->block_samples, info->log_limit);
+        if (bits >= info->best_bits)
+            break;
+
+        info->best_bits = bits;
+        CLEAR(s->decorr_passes);
+        memcpy(s->decorr_passes, info->dps, sizeof(info->dps[0]) * i);
+        memcpy(s->sampleptrs[info->nterms + 1][0], s->sampleptrs[i][0],
+               s->block_samples * 4);
+    }
+}
+
+static int allocate_buffers2(WavPackEncodeContext *s, int nterms)
+{
+    int i;
+
+    for (i = 0; i < nterms + 2; i++) {
+        av_fast_padded_malloc(&s->sampleptrs[i][0], &s->sampleptrs_size[i][0],
+                              s->block_samples * 4);
+        if (!s->sampleptrs[i][0])
+            return AVERROR(ENOMEM);
+        if (!(s->flags & WV_MONO_DATA)) {
+            av_fast_padded_malloc(&s->sampleptrs[i][1], &s->sampleptrs_size[i][1],
+                                  s->block_samples * 4);
+            if (!s->sampleptrs[i][1])
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
+static int allocate_buffers(WavPackEncodeContext *s)
+{
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        av_fast_padded_malloc(&s->best_buffer[0], &s->best_buffer_size[0],
+                              s->block_samples * 4);
+        if (!s->best_buffer[0])
+            return AVERROR(ENOMEM);
+
+        av_fast_padded_malloc(&s->temp_buffer[i][0], &s->temp_buffer_size[i][0],
+                              s->block_samples * 4);
+        if (!s->temp_buffer[i][0])
+            return AVERROR(ENOMEM);
+        if (!(s->flags & WV_MONO_DATA)) {
+            av_fast_padded_malloc(&s->best_buffer[1], &s->best_buffer_size[1],
+                                  s->block_samples * 4);
+            if (!s->best_buffer[1])
+                return AVERROR(ENOMEM);
+
+            av_fast_padded_malloc(&s->temp_buffer[i][1], &s->temp_buffer_size[i][1],
+                                  s->block_samples * 4);
+            if (!s->temp_buffer[i][1])
+                return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+}
+
+static void analyze_mono(WavPackEncodeContext *s, int32_t *samples, int do_samples)
+{
+    WavPackExtraInfo info;
+    int i;
+
+    info.log_limit = (((s->flags & MAG_MASK) >> MAG_LSB) + 4) * 256;
+    info.log_limit = FFMIN(6912, info.log_limit);
+
+    info.nterms = s->num_terms;
+
+    if (allocate_buffers2(s, s->num_terms))
+        return;
+
+    memcpy(info.dps, s->decorr_passes, sizeof(info.dps));
+    memcpy(s->sampleptrs[0][0], samples, s->block_samples * 4);
+
+    for (i = 0; i < info.nterms && info.dps[i].value; i++)
+        decorr_mono(s->sampleptrs[i][0], s->sampleptrs[i + 1][0],
+                    s->block_samples, info.dps + i, 1);
+
+    info.best_bits = log2mono(s->sampleptrs[info.nterms][0], s->block_samples, 0) * 1;
+    memcpy(s->sampleptrs[info.nterms + 1][0], s->sampleptrs[i][0], s->block_samples * 4);
+
+    if (s->extra_flags & EXTRA_BRANCHES)
+        recurse_mono(s, &info, 0, (int) floor(s->delta_decay + 0.5),
+                     log2mono(s->sampleptrs[0][0], s->block_samples, 0));
+
+    if (s->extra_flags & EXTRA_SORT_FIRST)
+        sort_mono(s, &info);
+
+    if (s->extra_flags & EXTRA_TRY_DELTAS) {
+        delta_mono(s, &info);
+
+        if ((s->extra_flags & EXTRA_ADJUST_DELTAS) && s->decorr_passes[0].value)
+            s->delta_decay = (float)((s->delta_decay * 2.0 + s->decorr_passes[0].delta) / 3.0);
+        else
+            s->delta_decay = 2.0;
+    }
+
+    if (s->extra_flags & EXTRA_SORT_LAST)
+        sort_mono(s, &info);
+
+    if (do_samples)
+        memcpy(samples, s->sampleptrs[info.nterms + 1][0], s->block_samples * 4);
+
+    for (i = 0; i < info.nterms; i++)
+        if (!s->decorr_passes[i].value)
+            break;
+
+    s->num_terms = i;
+}
+
+static void scan_word(WavPackEncodeContext *s, WvChannel *c,
+                      int32_t *samples, int nb_samples, int dir)
+{
+    if (dir < 0)
+        samples += nb_samples - 1;
+
+    while (nb_samples--) {
+        uint32_t low, value = labs(samples[0]);
+
+        if (value < GET_MED(0)) {
+            DEC_MED(0);
+        } else {
+            low = GET_MED(0);
+            INC_MED(0);
+
+            if (value - low < GET_MED(1)) {
+                DEC_MED(1);
+            } else {
+                low += GET_MED(1);
+                INC_MED(1);
+
+                if (value - low < GET_MED(2)) {
+                    DEC_MED(2);
+                } else {
+                    INC_MED(2);
+                }
+            }
+        }
+        samples += dir;
+    }
+}
+
+static int wv_mono(WavPackEncodeContext *s, int32_t *samples,
+                   int no_history, int do_samples)
+{
+    struct Decorr temp_decorr_pass, save_decorr_passes[MAX_TERMS] = {{0}};
+    int nb_samples = s->block_samples;
+    int buf_size = sizeof(int32_t) * nb_samples;
+    uint32_t best_size = UINT32_MAX, size;
+    int log_limit, pi, i, ret;
+
+    for (i = 0; i < nb_samples; i++)
+        if (samples[i])
+            break;
+
+    if (i == nb_samples) {
+        CLEAR(s->decorr_passes);
+        CLEAR(s->w);
+        s->num_terms = 0;
+        return 0;
+    }
+
+    log_limit = (((s->flags & MAG_MASK) >> MAG_LSB) + 4) * 256;
+    log_limit = FFMIN(6912, log_limit);
+
+    if ((ret = allocate_buffers(s)) < 0)
+        return ret;
+
+    if (no_history || s->num_passes >= 7)
+        s->best_decorr = s->mask_decorr = 0;
+
+    for (pi = 0; pi < s->num_passes;) {
+        const WavPackDecorrSpec *wpds;
+        int nterms, c, j;
+
+        if (!pi) {
+            c = s->best_decorr;
+        } else {
+            if (s->mask_decorr == 0)
+                c = 0;
+            else
+                c = (s->best_decorr & (s->mask_decorr - 1)) | s->mask_decorr;
+
+            if (c == s->best_decorr) {
+                s->mask_decorr = s->mask_decorr ? ((s->mask_decorr << 1) & (s->num_decorrs - 1)) : 1;
+                continue;
+            }
+        }
+
+        wpds = &s->decorr_specs[c];
+        nterms = decorr_filter_nterms[s->decorr_filter];
+
+        while (1) {
+        memcpy(s->temp_buffer[0][0], samples, buf_size);
+        CLEAR(save_decorr_passes);
+
+        for (j = 0; j < nterms; j++) {
+            CLEAR(temp_decorr_pass);
+            temp_decorr_pass.delta = wpds->delta;
+            temp_decorr_pass.value = wpds->terms[j];
+
+            if (temp_decorr_pass.value < 0)
+                temp_decorr_pass.value = 1;
+
+            decorr_mono(s->temp_buffer[j&1][0], s->temp_buffer[~j&1][0],
+                        FFMIN(nb_samples, 2048), &temp_decorr_pass, -1);
+
+            if (j) {
+                CLEAR(temp_decorr_pass.samplesA);
+            } else {
+                reverse_mono_decorr(&temp_decorr_pass);
+            }
+
+            memcpy(save_decorr_passes + j, &temp_decorr_pass, sizeof(struct Decorr));
+            decorr_mono(s->temp_buffer[j&1][0], s->temp_buffer[~j&1][0],
+                        nb_samples, &temp_decorr_pass, 1);
+        }
+
+        size = log2mono(s->temp_buffer[j&1][0], nb_samples, log_limit);
+        if (size != UINT32_MAX || !nterms)
+            break;
+        nterms >>= 1;
+        }
+
+        if (size < best_size) {
+            memcpy(s->best_buffer[0], s->temp_buffer[j&1][0], buf_size);
+            memcpy(s->decorr_passes, save_decorr_passes, sizeof(struct Decorr) * MAX_TERMS);
+            s->num_terms = nterms;
+            s->best_decorr = c;
+            best_size = size;
+        }
+
+        if (pi++)
+            s->mask_decorr = s->mask_decorr ? ((s->mask_decorr << 1) & (s->num_decorrs - 1)) : 1;
+    }
+
+    if (s->extra_flags)
+        analyze_mono(s, samples, do_samples);
+    else if (do_samples)
+        memcpy(samples, s->best_buffer[0], buf_size);
+
+    if (no_history || s->extra_flags) {
+        CLEAR(s->w);
+        scan_word(s, &s->w.c[0], s->best_buffer[0], nb_samples, -1);
+    }
+    return 0;
+}
+
+static void decorr_stereo(int32_t *in_left, int32_t *in_right,
+                          int32_t *out_left, int32_t *out_right,
+                          int nb_samples, struct Decorr *dpp, int dir)
+{
+    int m = 0, i;
+
+    dpp->sumA = dpp->sumB = 0;
+
+    if (dir < 0) {
+        out_left  += nb_samples - 1;
+        out_right += nb_samples - 1;
+        in_left   += nb_samples - 1;
+        in_right  += nb_samples - 1;
+    }
+
+    dpp->weightA = restore_weight(store_weight(dpp->weightA));
+    dpp->weightB = restore_weight(store_weight(dpp->weightB));
+
+    for (i = 0; i < MAX_TERM; i++) {
+        dpp->samplesA[i] = wp_exp2(log2s(dpp->samplesA[i]));
+        dpp->samplesB[i] = wp_exp2(log2s(dpp->samplesB[i]));
+    }
+
+    switch (dpp->value) {
+    case 2:
+        while (nb_samples--) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[0];
+            dpp->samplesA[0] = dpp->samplesA[1];
+            out_left[0] = tmp = (dpp->samplesA[1] = in_left[0]) - APPLY_WEIGHT(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+            dpp->sumA += dpp->weightA;
+
+            sam = dpp->samplesB[0];
+            dpp->samplesB[0] = dpp->samplesB[1];
+            out_right[0] = tmp = (dpp->samplesB[1] = in_right[0]) - APPLY_WEIGHT(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+            dpp->sumB += dpp->weightB;
+
+            in_left   += dir;
+            out_left  += dir;
+            in_right  += dir;
+            out_right += dir;
+        }
+        break;
+    case 17:
+        while (nb_samples--) {
+            int32_t sam, tmp;
+
+            sam = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+            dpp->samplesA[1] = dpp->samplesA[0];
+            out_left[0] = tmp = (dpp->samplesA[0] = in_left[0]) - APPLY_WEIGHT(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+            dpp->sumA += dpp->weightA;
+
+            sam = 2 * dpp->samplesB[0] - dpp->samplesB[1];
+            dpp->samplesB[1] = dpp->samplesB[0];
+            out_right[0] = tmp = (dpp->samplesB[0] = in_right[0]) - APPLY_WEIGHT (dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+            dpp->sumB += dpp->weightB;
+
+            in_left   += dir;
+            out_left  += dir;
+            in_right  += dir;
+            out_right += dir;
+        }
+        break;
+    case 18:
+        while (nb_samples--) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[0] + ((dpp->samplesA[0] - dpp->samplesA[1]) >> 1);
+            dpp->samplesA[1] = dpp->samplesA[0];
+            out_left[0] = tmp = (dpp->samplesA[0] = in_left[0]) - APPLY_WEIGHT(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+            dpp->sumA += dpp->weightA;
+
+            sam = dpp->samplesB[0] + ((dpp->samplesB[0] - dpp->samplesB[1]) >> 1);
+            dpp->samplesB[1] = dpp->samplesB[0];
+            out_right[0] = tmp = (dpp->samplesB[0] = in_right[0]) - APPLY_WEIGHT(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+            dpp->sumB += dpp->weightB;
+
+            in_left   += dir;
+            out_left  += dir;
+            in_right  += dir;
+            out_right += dir;
+        }
+        break;
+    default: {
+        int k = dpp->value & (MAX_TERM - 1);
+
+        while (nb_samples--) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[m];
+            out_left[0] = tmp = (dpp->samplesA[k] = in_left[0]) - APPLY_WEIGHT(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+            dpp->sumA += dpp->weightA;
+
+            sam = dpp->samplesB[m];
+            out_right[0] = tmp = (dpp->samplesB[k] = in_right[0]) - APPLY_WEIGHT(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+            dpp->sumB += dpp->weightB;
+
+            in_left   += dir;
+            out_left  += dir;
+            in_right  += dir;
+            out_right += dir;
+            m = (m + 1) & (MAX_TERM - 1);
+            k = (k + 1) & (MAX_TERM - 1);
+        }
+
+        if (m) {
+            int32_t temp_A[MAX_TERM], temp_B[MAX_TERM];
+            int k;
+
+            memcpy(temp_A, dpp->samplesA, sizeof(dpp->samplesA));
+            memcpy(temp_B, dpp->samplesB, sizeof(dpp->samplesB));
+
+            for (k = 0; k < MAX_TERM; k++) {
+                dpp->samplesA[k] = temp_A[m];
+                dpp->samplesB[k] = temp_B[m];
+                m = (m + 1) & (MAX_TERM - 1);
+            }
+        }
+        break;
+        }
+    case -1:
+        while (nb_samples--) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_A = dpp->samplesA[0];
+            out_left[0] = tmp = (sam_B = in_left[0]) - APPLY_WEIGHT(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+            dpp->sumA += dpp->weightA;
+
+            out_right[0] = tmp = (dpp->samplesA[0] = in_right[0]) - APPLY_WEIGHT(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+            dpp->sumB += dpp->weightB;
+
+            in_left   += dir;
+            out_left  += dir;
+            in_right  += dir;
+            out_right += dir;
+        }
+        break;
+    case -2:
+        while (nb_samples--) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_B = dpp->samplesB[0];
+            out_right[0] = tmp = (sam_A = in_right[0]) - APPLY_WEIGHT(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+            dpp->sumB += dpp->weightB;
+
+            out_left[0] = tmp = (dpp->samplesB[0] = in_left[0]) - APPLY_WEIGHT(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+            dpp->sumA += dpp->weightA;
+
+            in_left   += dir;
+            out_left  += dir;
+            in_right  += dir;
+            out_right += dir;
+        }
+        break;
+    case -3:
+        while (nb_samples--) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_A = dpp->samplesA[0];
+            sam_B = dpp->samplesB[0];
+
+            dpp->samplesA[0] = tmp = in_right[0];
+            out_right[0] = tmp -= APPLY_WEIGHT(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+            dpp->sumB += dpp->weightB;
+
+            dpp->samplesB[0] = tmp = in_left[0];
+            out_left[0] = tmp -= APPLY_WEIGHT(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+            dpp->sumA += dpp->weightA;
+
+            in_left   += dir;
+            out_left  += dir;
+            in_right  += dir;
+            out_right += dir;
+        }
+        break;
+    }
+}
+
+static void reverse_decorr(struct Decorr *dpp)
+{
+    if (dpp->value > MAX_TERM) {
+        int32_t sam_A, sam_B;
+
+        if (dpp->value & 1) {
+            sam_A = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+            sam_B = 2 * dpp->samplesB[0] - dpp->samplesB[1];
+        } else {
+            sam_A = (3 * dpp->samplesA[0] - dpp->samplesA[1]) >> 1;
+            sam_B = (3 * dpp->samplesB[0] - dpp->samplesB[1]) >> 1;
+        }
+
+        dpp->samplesA[1] = dpp->samplesA[0];
+        dpp->samplesB[1] = dpp->samplesB[0];
+        dpp->samplesA[0] = sam_A;
+        dpp->samplesB[0] = sam_B;
+
+        if (dpp->value & 1) {
+            sam_A = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+            sam_B = 2 * dpp->samplesB[0] - dpp->samplesB[1];
+        } else {
+            sam_A = (3 * dpp->samplesA[0] - dpp->samplesA[1]) >> 1;
+            sam_B = (3 * dpp->samplesB[0] - dpp->samplesB[1]) >> 1;
+        }
+
+        dpp->samplesA[1] = sam_A;
+        dpp->samplesB[1] = sam_B;
+    } else if (dpp->value > 1) {
+        int i, j, k;
+
+        for (i = 0, j = dpp->value - 1, k = 0; k < dpp->value / 2; i++, j--, k++) {
+            i &= (MAX_TERM - 1);
+            j &= (MAX_TERM - 1);
+            dpp->samplesA[i] ^= dpp->samplesA[j];
+            dpp->samplesA[j] ^= dpp->samplesA[i];
+            dpp->samplesA[i] ^= dpp->samplesA[j];
+            dpp->samplesB[i] ^= dpp->samplesB[j];
+            dpp->samplesB[j] ^= dpp->samplesB[i];
+            dpp->samplesB[i] ^= dpp->samplesB[j];
+        }
+    }
+}
+
+static void decorr_stereo_quick(int32_t *in_left,  int32_t *in_right,
+                                int32_t *out_left, int32_t *out_right,
+                                int nb_samples, struct Decorr *dpp)
+{
+    int m = 0, i;
+
+    dpp->weightA = restore_weight(store_weight(dpp->weightA));
+    dpp->weightB = restore_weight(store_weight(dpp->weightB));
+
+    for (i = 0; i < MAX_TERM; i++) {
+        dpp->samplesA[i] = wp_exp2(log2s(dpp->samplesA[i]));
+        dpp->samplesB[i] = wp_exp2(log2s(dpp->samplesB[i]));
+    }
+
+    switch (dpp->value) {
+    case 2:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[0];
+            dpp->samplesA[0] = dpp->samplesA[1];
+            out_left[i] = tmp = (dpp->samplesA[1] = in_left[i]) - APPLY_WEIGHT_I(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = dpp->samplesB[0];
+            dpp->samplesB[0] = dpp->samplesB[1];
+            out_right[i] = tmp = (dpp->samplesB[1] = in_right[i]) - APPLY_WEIGHT_I(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+        }
+        break;
+    case 17:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+            dpp->samplesA[1] = dpp->samplesA[0];
+            out_left[i] = tmp = (dpp->samplesA[0] = in_left[i]) - APPLY_WEIGHT_I(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = 2 * dpp->samplesB[0] - dpp->samplesB[1];
+            dpp->samplesB[1] = dpp->samplesB[0];
+            out_right[i] = tmp = (dpp->samplesB[0] = in_right[i]) - APPLY_WEIGHT_I(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+        }
+        break;
+    case 18:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[0] + ((dpp->samplesA[0] - dpp->samplesA[1]) >> 1);
+            dpp->samplesA[1] = dpp->samplesA[0];
+            out_left[i] = tmp = (dpp->samplesA[0] = in_left[i]) - APPLY_WEIGHT_I(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = dpp->samplesB[0] + ((dpp->samplesB[0] - dpp->samplesB[1]) >> 1);
+            dpp->samplesB[1] = dpp->samplesB[0];
+            out_right[i] = tmp = (dpp->samplesB[0] = in_right[i]) - APPLY_WEIGHT_I(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+        }
+        break;
+    default: {
+        int k = dpp->value & (MAX_TERM - 1);
+
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[m];
+            out_left[i] = tmp = (dpp->samplesA[k] = in_left[i]) - APPLY_WEIGHT_I(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = dpp->samplesB[m];
+            out_right[i] = tmp = (dpp->samplesB[k] = in_right[i]) - APPLY_WEIGHT_I(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+
+            m = (m + 1) & (MAX_TERM - 1);
+            k = (k + 1) & (MAX_TERM - 1);
+        }
+
+        if (m) {
+            int32_t temp_A[MAX_TERM], temp_B[MAX_TERM];
+            int k;
+
+            memcpy(temp_A, dpp->samplesA, sizeof(dpp->samplesA));
+            memcpy(temp_B, dpp->samplesB, sizeof(dpp->samplesB));
+
+            for (k = 0; k < MAX_TERM; k++) {
+                dpp->samplesA[k] = temp_A[m];
+                dpp->samplesB[k] = temp_B[m];
+                m = (m + 1) & (MAX_TERM - 1);
+            }
+        }
+        break;
+    }
+    case -1:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_A = dpp->samplesA[0];
+            out_left[i] = tmp = (sam_B = in_left[i]) - APPLY_WEIGHT_I(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+
+            out_right[i] = tmp = (dpp->samplesA[0] = in_right[i]) - APPLY_WEIGHT_I(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+        }
+        break;
+    case -2:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_B = dpp->samplesB[0];
+            out_right[i] = tmp = (sam_A = in_right[i]) - APPLY_WEIGHT_I(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+
+            out_left[i] = tmp = (dpp->samplesB[0] = in_left[i]) - APPLY_WEIGHT_I(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+        }
+        break;
+    case -3:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_A = dpp->samplesA[0];
+            sam_B = dpp->samplesB[0];
+
+            dpp->samplesA[0] = tmp = in_right[i];
+            out_right[i] = tmp -= APPLY_WEIGHT_I(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+
+            dpp->samplesB[0] = tmp = in_left[i];
+            out_left[i] = tmp -= APPLY_WEIGHT_I(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+        }
+        break;
+    }
+}
+
+static void decorr_stereo_buffer(WavPackExtraInfo *info,
+                                 int32_t *in_left,  int32_t *in_right,
+                                 int32_t *out_left, int32_t *out_right,
+                                 int nb_samples, int tindex)
+{
+    struct Decorr dp = {0}, *dppi = info->dps + tindex;
+    int delta = dppi->delta, pre_delta;
+    int term = dppi->value;
+
+    if (delta == 7)
+        pre_delta = 7;
+    else if (delta < 2)
+        pre_delta = 3;
+    else
+        pre_delta = delta + 1;
+
+    dp.value = term;
+    dp.delta = pre_delta;
+    decorr_stereo(in_left, in_right, out_left, out_right,
+                  FFMIN(2048, nb_samples), &dp, -1);
+    dp.delta = delta;
+
+    if (tindex == 0) {
+        reverse_decorr(&dp);
+    } else {
+        CLEAR(dp.samplesA);
+        CLEAR(dp.samplesB);
+    }
+
+    memcpy(dppi->samplesA, dp.samplesA, sizeof(dp.samplesA));
+    memcpy(dppi->samplesB, dp.samplesB, sizeof(dp.samplesB));
+    dppi->weightA = dp.weightA;
+    dppi->weightB = dp.weightB;
+
+    if (delta == 0) {
+        dp.delta = 1;
+        decorr_stereo(in_left, in_right, out_left, out_right, nb_samples, &dp, 1);
+        dp.delta = 0;
+        memcpy(dp.samplesA, dppi->samplesA, sizeof(dp.samplesA));
+        memcpy(dp.samplesB, dppi->samplesB, sizeof(dp.samplesB));
+        dppi->weightA = dp.weightA = dp.sumA / nb_samples;
+        dppi->weightB = dp.weightB = dp.sumB / nb_samples;
+    }
+
+    if (info->gt16bit)
+        decorr_stereo(in_left, in_right, out_left, out_right,
+                           nb_samples, &dp, 1);
+    else
+        decorr_stereo_quick(in_left, in_right, out_left, out_right,
+                            nb_samples, &dp);
+}
+
+static void sort_stereo(WavPackEncodeContext *s, WavPackExtraInfo *info)
+{
+    int reversed = 1;
+    uint32_t bits;
+
+    while (reversed) {
+        int ri, i;
+
+        memcpy(info->dps, s->decorr_passes, sizeof(s->decorr_passes));
+        reversed = 0;
+
+        for (ri = 0; ri < info->nterms && s->decorr_passes[ri].value; ri++) {
+
+            if (ri + 1 >= info->nterms || !s->decorr_passes[ri+1].value)
+                break;
+
+            if (s->decorr_passes[ri].value == s->decorr_passes[ri+1].value) {
+                decorr_stereo_buffer(info,
+                                     s->sampleptrs[ri  ][0], s->sampleptrs[ri  ][1],
+                                     s->sampleptrs[ri+1][0], s->sampleptrs[ri+1][1],
+                                     s->block_samples, ri);
+                continue;
+            }
+
+            info->dps[ri  ] = s->decorr_passes[ri+1];
+            info->dps[ri+1] = s->decorr_passes[ri  ];
+
+            for (i = ri; i < info->nterms && s->decorr_passes[i].value; i++)
+                decorr_stereo_buffer(info,
+                                     s->sampleptrs[i  ][0], s->sampleptrs[i  ][1],
+                                     s->sampleptrs[i+1][0], s->sampleptrs[i+1][1],
+                                     s->block_samples, i);
+
+            bits = log2stereo(s->sampleptrs[i][0], s->sampleptrs[i][1],
+                              s->block_samples, info->log_limit);
+
+            if (bits < info->best_bits) {
+                reversed = 1;
+                info->best_bits = bits;
+                CLEAR(s->decorr_passes);
+                memcpy(s->decorr_passes, info->dps, sizeof(info->dps[0]) * i);
+                memcpy(s->sampleptrs[info->nterms + 1][0],
+                       s->sampleptrs[i][0], s->block_samples * 4);
+                memcpy(s->sampleptrs[info->nterms + 1][1],
+                       s->sampleptrs[i][1], s->block_samples * 4);
+            } else {
+                info->dps[ri  ] = s->decorr_passes[ri  ];
+                info->dps[ri+1] = s->decorr_passes[ri+1];
+                decorr_stereo_buffer(info,
+                                     s->sampleptrs[ri  ][0], s->sampleptrs[ri  ][1],
+                                     s->sampleptrs[ri+1][0], s->sampleptrs[ri+1][1],
+                                     s->block_samples, ri);
+            }
+        }
+    }
+}
+
+static void delta_stereo(WavPackEncodeContext *s, WavPackExtraInfo *info)
+{
+    int lower = 0, delta, d, i;
+    uint32_t bits;
+
+    if (!s->decorr_passes[0].value)
+        return;
+    delta = s->decorr_passes[0].delta;
+
+    for (d = delta - 1; d >= 0; d--) {
+        for (i = 0; i < info->nterms && s->decorr_passes[i].value; i++) {
+            info->dps[i].value = s->decorr_passes[i].value;
+            info->dps[i].delta = d;
+            decorr_stereo_buffer(info,
+                                 s->sampleptrs[i  ][0], s->sampleptrs[i  ][1],
+                                 s->sampleptrs[i+1][0], s->sampleptrs[i+1][1],
+                                 s->block_samples, i);
+        }
+
+        bits = log2stereo(s->sampleptrs[i][0], s->sampleptrs[i][1],
+                          s->block_samples, info->log_limit);
+        if (bits >= info->best_bits)
+            break;
+        lower = 1;
+        info->best_bits = bits;
+        CLEAR(s->decorr_passes);
+        memcpy(s->decorr_passes, info->dps, sizeof(info->dps[0]) * i);
+        memcpy(s->sampleptrs[info->nterms + 1][0], s->sampleptrs[i][0],
+               s->block_samples * 4);
+        memcpy(s->sampleptrs[info->nterms + 1][1], s->sampleptrs[i][1],
+               s->block_samples * 4);
+    }
+
+    for (d = delta + 1; !lower && d <= 7; d++) {
+        for (i = 0; i < info->nterms && s->decorr_passes[i].value; i++) {
+            info->dps[i].value = s->decorr_passes[i].value;
+            info->dps[i].delta = d;
+            decorr_stereo_buffer(info,
+                                 s->sampleptrs[i  ][0], s->sampleptrs[i  ][1],
+                                 s->sampleptrs[i+1][0], s->sampleptrs[i+1][1],
+                                 s->block_samples, i);
+        }
+
+        bits = log2stereo(s->sampleptrs[i][0], s->sampleptrs[i][1],
+                          s->block_samples, info->log_limit);
+
+        if (bits < info->best_bits) {
+            info->best_bits = bits;
+            CLEAR(s->decorr_passes);
+            memcpy(s->decorr_passes, info->dps, sizeof(info->dps[0]) * i);
+            memcpy(s->sampleptrs[info->nterms + 1][0],
+                   s->sampleptrs[i][0], s->block_samples * 4);
+            memcpy(s->sampleptrs[info->nterms + 1][1],
+                   s->sampleptrs[i][1], s->block_samples * 4);
+        }
+        else
+            break;
+    }
+}
+
+static void recurse_stereo(WavPackEncodeContext *s, WavPackExtraInfo *info,
+                           int depth, int delta, uint32_t input_bits)
+{
+    int term, branches = s->num_branches - depth;
+    int32_t *in_left, *in_right, *out_left, *out_right;
+    uint32_t term_bits[22], bits;
+
+    if (branches < 1 || depth + 1 == info->nterms)
+        branches = 1;
+
+    CLEAR(term_bits);
+    in_left   = s->sampleptrs[depth    ][0];
+    in_right  = s->sampleptrs[depth    ][1];
+    out_left  = s->sampleptrs[depth + 1][0];
+    out_right = s->sampleptrs[depth + 1][1];
+
+    for (term = -3; term <= 18; term++) {
+        if (!term || (term > 8 && term < 17))
+            continue;
+
+        if (term == 17 && branches == 1 && depth + 1 < info->nterms)
+            continue;
+
+        if (term == -1 || term == -2)
+            if (!(s->flags & WV_CROSS_DECORR))
+                continue;
+
+        if (!s->extra_flags && (term > 4 && term < 17))
+            continue;
+
+        info->dps[depth].value = term;
+        info->dps[depth].delta = delta;
+        decorr_stereo_buffer(info, in_left, in_right, out_left, out_right,
+                             s->block_samples, depth);
+        bits = log2stereo(out_left, out_right, s->block_samples, info->log_limit);
+
+        if (bits < info->best_bits) {
+            info->best_bits = bits;
+            CLEAR(s->decorr_passes);
+            memcpy(s->decorr_passes, info->dps, sizeof(info->dps[0]) * (depth + 1));
+            memcpy(s->sampleptrs[info->nterms + 1][0], s->sampleptrs[depth + 1][0],
+                   s->block_samples * 4);
+            memcpy(s->sampleptrs[info->nterms + 1][1], s->sampleptrs[depth + 1][1],
+                   s->block_samples * 4);
+        }
+
+        term_bits[term + 3] = bits;
+    }
+
+    while (depth + 1 < info->nterms && branches--) {
+        uint32_t local_best_bits = input_bits;
+        int best_term = 0, i;
+
+        for (i = 0; i < 22; i++)
+            if (term_bits[i] && term_bits[i] < local_best_bits) {
+                local_best_bits = term_bits[i];
+                best_term = i - 3;
+            }
+
+        if (!best_term)
+            break;
+
+        term_bits[best_term + 3] = 0;
+
+        info->dps[depth].value = best_term;
+        info->dps[depth].delta = delta;
+        decorr_stereo_buffer(info, in_left, in_right, out_left, out_right,
+                             s->block_samples, depth);
+
+        recurse_stereo(s, info, depth + 1, delta, local_best_bits);
+    }
+}
+
+static void analyze_stereo(WavPackEncodeContext *s,
+                           int32_t *in_left, int32_t *in_right,
+                           int do_samples)
+{
+    WavPackExtraInfo info;
+    int i;
+
+    info.gt16bit = ((s->flags & MAG_MASK) >> MAG_LSB) >= 16;
+
+    info.log_limit = (((s->flags & MAG_MASK) >> MAG_LSB) + 4) * 256;
+    info.log_limit = FFMIN(6912, info.log_limit);
+
+    info.nterms = s->num_terms;
+
+    if (allocate_buffers2(s, s->num_terms))
+        return;
+
+    memcpy(info.dps, s->decorr_passes, sizeof(info.dps));
+    memcpy(s->sampleptrs[0][0], in_left,  s->block_samples * 4);
+    memcpy(s->sampleptrs[0][1], in_right, s->block_samples * 4);
+
+    for (i = 0; i < info.nterms && info.dps[i].value; i++)
+        if (info.gt16bit)
+            decorr_stereo(s->sampleptrs[i    ][0], s->sampleptrs[i    ][1],
+                          s->sampleptrs[i + 1][0], s->sampleptrs[i + 1][1],
+                          s->block_samples, info.dps + i, 1);
+        else
+            decorr_stereo_quick(s->sampleptrs[i    ][0], s->sampleptrs[i    ][1],
+                                s->sampleptrs[i + 1][0], s->sampleptrs[i + 1][1],
+                                s->block_samples, info.dps + i);
+
+    info.best_bits = log2stereo(s->sampleptrs[info.nterms][0], s->sampleptrs[info.nterms][1],
+                                s->block_samples, 0);
+
+    memcpy(s->sampleptrs[info.nterms + 1][0], s->sampleptrs[i][0], s->block_samples * 4);
+    memcpy(s->sampleptrs[info.nterms + 1][1], s->sampleptrs[i][1], s->block_samples * 4);
+
+    if (s->extra_flags & EXTRA_BRANCHES)
+        recurse_stereo(s, &info, 0, (int) floor(s->delta_decay + 0.5),
+                       log2stereo(s->sampleptrs[0][0], s->sampleptrs[0][1],
+                                  s->block_samples, 0));
+
+    if (s->extra_flags & EXTRA_SORT_FIRST)
+        sort_stereo(s, &info);
+
+    if (s->extra_flags & EXTRA_TRY_DELTAS) {
+        delta_stereo(s, &info);
+
+        if ((s->extra_flags & EXTRA_ADJUST_DELTAS) && s->decorr_passes[0].value)
+            s->delta_decay = (float)((s->delta_decay * 2.0 + s->decorr_passes[0].delta) / 3.0);
+        else
+            s->delta_decay = 2.0;
+    }
+
+    if (s->extra_flags & EXTRA_SORT_LAST)
+        sort_stereo(s, &info);
+
+    if (do_samples) {
+        memcpy(in_left,  s->sampleptrs[info.nterms + 1][0], s->block_samples * 4);
+        memcpy(in_right, s->sampleptrs[info.nterms + 1][1], s->block_samples * 4);
+    }
+
+    for (i = 0; i < info.nterms; i++)
+        if (!s->decorr_passes[i].value)
+            break;
+
+    s->num_terms = i;
+}
+
+static int wv_stereo(WavPackEncodeContext *s,
+                     int32_t *samples_l, int32_t *samples_r,
+                     int no_history, int do_samples)
+{
+    struct Decorr temp_decorr_pass, save_decorr_passes[MAX_TERMS] = {{0}};
+    int nb_samples = s->block_samples, ret;
+    int buf_size = sizeof(int32_t) * nb_samples;
+    int log_limit, force_js = 0, force_ts = 0, got_js = 0, pi, i;
+    uint32_t best_size = UINT32_MAX, size;
+
+    for (i = 0; i < nb_samples; i++)
+        if (samples_l[i] || samples_r[i])
+            break;
+
+    if (i == nb_samples) {
+        s->flags &= ~((uint32_t) WV_JOINT_STEREO);
+        CLEAR(s->decorr_passes);
+        CLEAR(s->w);
+        s->num_terms = 0;
+        return 0;
+    }
+
+    log_limit = (((s->flags & MAG_MASK) >> MAG_LSB) + 4) * 256;
+    log_limit = FFMIN(6912, log_limit);
+
+    if (s->joint) {
+        force_js = s->joint > 0;
+        force_ts = s->joint < 0;
+    }
+
+    if ((ret = allocate_buffers(s)) < 0)
+        return ret;
+
+    if (no_history || s->num_passes >= 7)
+        s->best_decorr = s->mask_decorr = 0;
+
+    for (pi = 0; pi < s->num_passes;) {
+        const WavPackDecorrSpec *wpds;
+        int nterms, c, j;
+
+        if (!pi)
+            c = s->best_decorr;
+        else {
+            if (s->mask_decorr == 0)
+                c = 0;
+            else
+                c = (s->best_decorr & (s->mask_decorr - 1)) | s->mask_decorr;
+
+            if (c == s->best_decorr) {
+                s->mask_decorr = s->mask_decorr ? ((s->mask_decorr << 1) & (s->num_decorrs - 1)) : 1;
+                continue;
+            }
+        }
+
+        wpds = &s->decorr_specs[c];
+        nterms = decorr_filter_nterms[s->decorr_filter];
+
+        while (1) {
+            if (force_js || (wpds->joint_stereo && !force_ts)) {
+                if (!got_js) {
+                    av_fast_padded_malloc(&s->js_left,  &s->js_left_size,  buf_size);
+                    av_fast_padded_malloc(&s->js_right, &s->js_right_size, buf_size);
+                    memcpy(s->js_left,  samples_l, buf_size);
+                    memcpy(s->js_right, samples_r, buf_size);
+
+                    for (i = 0; i < nb_samples; i++)
+                        s->js_right[i] += ((s->js_left[i] -= s->js_right[i]) >> 1);
+                    got_js = 1;
+                }
+
+                memcpy(s->temp_buffer[0][0], s->js_left,  buf_size);
+                memcpy(s->temp_buffer[0][1], s->js_right, buf_size);
+            } else {
+                memcpy(s->temp_buffer[0][0], samples_l, buf_size);
+                memcpy(s->temp_buffer[0][1], samples_r, buf_size);
+            }
+
+            CLEAR(save_decorr_passes);
+
+            for (j = 0; j < nterms; j++) {
+                CLEAR(temp_decorr_pass);
+                temp_decorr_pass.delta = wpds->delta;
+                temp_decorr_pass.value = wpds->terms[j];
+
+                if (temp_decorr_pass.value < 0 && !(s->flags & WV_CROSS_DECORR))
+                    temp_decorr_pass.value = -3;
+
+                decorr_stereo(s->temp_buffer[ j&1][0], s->temp_buffer[ j&1][1],
+                              s->temp_buffer[~j&1][0], s->temp_buffer[~j&1][1],
+                              FFMIN(2048, nb_samples), &temp_decorr_pass, -1);
+
+                if (j) {
+                    CLEAR(temp_decorr_pass.samplesA);
+                    CLEAR(temp_decorr_pass.samplesB);
+                } else {
+                    reverse_decorr(&temp_decorr_pass);
+                }
+
+                memcpy(save_decorr_passes + j, &temp_decorr_pass, sizeof(struct Decorr));
+
+                if (((s->flags & MAG_MASK) >> MAG_LSB) >= 16)
+                    decorr_stereo(s->temp_buffer[ j&1][0], s->temp_buffer[ j&1][1],
+                                  s->temp_buffer[~j&1][0], s->temp_buffer[~j&1][1],
+                                  nb_samples, &temp_decorr_pass, 1);
+                else
+                    decorr_stereo_quick(s->temp_buffer[ j&1][0], s->temp_buffer[ j&1][1],
+                                        s->temp_buffer[~j&1][0], s->temp_buffer[~j&1][1],
+                                        nb_samples, &temp_decorr_pass);
+            }
+
+            size = log2stereo(s->temp_buffer[j&1][0], s->temp_buffer[j&1][1],
+                              nb_samples, log_limit);
+            if (size != UINT32_MAX || !nterms)
+                break;
+            nterms >>= 1;
+        }
+
+        if (size < best_size) {
+            memcpy(s->best_buffer[0], s->temp_buffer[j&1][0], buf_size);
+            memcpy(s->best_buffer[1], s->temp_buffer[j&1][1], buf_size);
+            memcpy(s->decorr_passes, save_decorr_passes, sizeof(struct Decorr) * MAX_TERMS);
+            s->num_terms = nterms;
+            s->best_decorr = c;
+            best_size = size;
+        }
+
+        if (pi++)
+            s->mask_decorr = s->mask_decorr ? ((s->mask_decorr << 1) & (s->num_decorrs - 1)) : 1;
+    }
+
+    if (force_js || (s->decorr_specs[s->best_decorr].joint_stereo && !force_ts))
+        s->flags |= WV_JOINT_STEREO;
+    else
+        s->flags &= ~((uint32_t) WV_JOINT_STEREO);
+
+    if (s->extra_flags) {
+        if (s->flags & WV_JOINT_STEREO) {
+            analyze_stereo(s, s->js_left, s->js_right, do_samples);
+
+            if (do_samples) {
+                memcpy(samples_l, s->js_left,  buf_size);
+                memcpy(samples_r, s->js_right, buf_size);
+            }
+        } else
+            analyze_stereo(s, samples_l, samples_r, do_samples);
+    } else if (do_samples) {
+        memcpy(samples_l, s->best_buffer[0], buf_size);
+        memcpy(samples_r, s->best_buffer[1], buf_size);
+    }
+
+    if (s->extra_flags || no_history ||
+        s->joint_stereo != s->decorr_specs[s->best_decorr].joint_stereo) {
+        s->joint_stereo = s->decorr_specs[s->best_decorr].joint_stereo;
+        CLEAR(s->w);
+        scan_word(s, &s->w.c[0], s->best_buffer[0], nb_samples, -1);
+        scan_word(s, &s->w.c[1], s->best_buffer[1], nb_samples, -1);
+    }
+    return 0;
+}
+
+#define count_bits(av) ( \
+ (av) < (1 << 8) ? nbits_table[av] : \
+  ( \
+   (av) < (1L << 16) ? nbits_table[(av) >> 8] + 8 : \
+   ((av) < (1L << 24) ? nbits_table[(av) >> 16] + 16 : nbits_table[(av) >> 24] + 24) \
+  ) \
+)
+
+static void encode_flush(WavPackEncodeContext *s)
+{
+    WavPackWords *w = &s->w;
+    PutBitContext *pb = &s->pb;
+
+    if (w->zeros_acc) {
+        int cbits = count_bits(w->zeros_acc);
+
+        do {
+            if (cbits > 31) {
+                put_bits(pb, 31, 0x7FFFFFFF);
+                cbits -= 31;
+            } else {
+                put_bits(pb, cbits, (1 << cbits) - 1);
+                cbits = 0;
+            }
+        } while (cbits);
+
+        put_bits(pb, 1, 0);
+
+        while (w->zeros_acc > 1) {
+            put_bits(pb, 1, w->zeros_acc & 1);
+            w->zeros_acc >>= 1;
+        }
+
+        w->zeros_acc = 0;
+    }
+
+    if (w->holding_one) {
+        if (w->holding_one >= 16) {
+            int cbits;
+
+            put_bits(pb, 16, (1 << 16) - 1);
+            put_bits(pb, 1, 0);
+            w->holding_one -= 16;
+            cbits = count_bits(w->holding_one);
+
+            do {
+                if (cbits > 31) {
+                    put_bits(pb, 31, 0x7FFFFFFF);
+                    cbits -= 31;
+                } else {
+                    put_bits(pb, cbits, (1 << cbits) - 1);
+                    cbits = 0;
+                }
+            } while (cbits);
+
+            put_bits(pb, 1, 0);
+
+            while (w->holding_one > 1) {
+                put_bits(pb, 1, w->holding_one & 1);
+                w->holding_one >>= 1;
+            }
+
+            w->holding_zero = 0;
+        } else {
+            put_bits(pb, w->holding_one, (1 << w->holding_one) - 1);
+        }
+
+        w->holding_one = 0;
+    }
+
+    if (w->holding_zero) {
+        put_bits(pb, 1, 0);
+        w->holding_zero = 0;
+    }
+
+    if (w->pend_count) {
+        put_bits(pb, w->pend_count, w->pend_data);
+        w->pend_data = w->pend_count = 0;
+    }
+}
+
+static void wavpack_encode_sample(WavPackEncodeContext *s, WvChannel *c, int32_t sample)
+{
+    WavPackWords *w = &s->w;
+    uint32_t ones_count, low, high;
+    int sign = sample < 0;
+
+    if (s->w.c[0].median[0] < 2 && !s->w.holding_zero && s->w.c[1].median[0] < 2) {
+        if (w->zeros_acc) {
+            if (sample)
+                encode_flush(s);
+            else {
+                w->zeros_acc++;
+                return;
+            }
+        } else if (sample) {
+            put_bits(&s->pb, 1, 0);
+        } else {
+            CLEAR(s->w.c[0].median);
+            CLEAR(s->w.c[1].median);
+            w->zeros_acc = 1;
+            return;
+        }
+    }
+
+    if (sign)
+        sample = ~sample;
+
+    if (sample < (int32_t) GET_MED(0)) {
+        ones_count = low = 0;
+        high = GET_MED(0) - 1;
+        DEC_MED(0);
+    } else {
+        low = GET_MED(0);
+        INC_MED(0);
+
+        if (sample - low < GET_MED(1)) {
+            ones_count = 1;
+            high = low + GET_MED(1) - 1;
+            DEC_MED(1);
+        } else {
+            low += GET_MED(1);
+            INC_MED(1);
+
+            if (sample - low < GET_MED(2)) {
+                ones_count = 2;
+                high = low + GET_MED(2) - 1;
+                DEC_MED(2);
+            } else {
+                ones_count = 2 + (sample - low) / GET_MED(2);
+                low += (ones_count - 2) * GET_MED(2);
+                high = low + GET_MED(2) - 1;
+                INC_MED(2);
+            }
+        }
+    }
+
+    if (w->holding_zero) {
+        if (ones_count)
+            w->holding_one++;
+
+        encode_flush(s);
+
+        if (ones_count) {
+            w->holding_zero = 1;
+            ones_count--;
+        } else
+            w->holding_zero = 0;
+    } else
+        w->holding_zero = 1;
+
+    w->holding_one = ones_count * 2;
+
+    if (high != low) {
+        uint32_t maxcode = high - low, code = sample - low;
+        int bitcount = count_bits(maxcode);
+        uint32_t extras = (1 << bitcount) - maxcode - 1;
+
+        if (code < extras) {
+            w->pend_data |= code << w->pend_count;
+            w->pend_count += bitcount - 1;
+        } else {
+            w->pend_data |= ((code + extras) >> 1) << w->pend_count;
+            w->pend_count += bitcount - 1;
+            w->pend_data |= ((code + extras) & 1) << w->pend_count++;
+        }
+    }
+
+    w->pend_data |= ((int32_t) sign << w->pend_count++);
+
+    if (!w->holding_zero)
+        encode_flush(s);
+}
+
+static void pack_int32(WavPackEncodeContext *s,
+                       int32_t *samples_l, int32_t *samples_r,
+                       int nb_samples)
+{
+    const int sent_bits = s->int32_sent_bits;
+    int32_t value, mask = (1 << sent_bits) - 1;
+    PutBitContext *pb = &s->pb;
+    int i, pre_shift;
+
+    pre_shift = s->int32_zeros + s->int32_ones + s->int32_dups;
+
+    if (!sent_bits)
+        return;
+
+    if (s->flags & WV_MONO_DATA) {
+        for (i = 0; i < nb_samples; i++) {
+            value = (samples_l[i] >> pre_shift) & mask;
+            put_bits(pb, sent_bits, value);
+        }
+    } else {
+        for (i = 0; i < nb_samples; i++) {
+            value = (samples_l[i] >> pre_shift) & mask;
+            put_bits(pb, sent_bits, value);
+            value = (samples_r[i] >> pre_shift) & mask;
+            put_bits(pb, sent_bits, value);
+        }
+    }
+}
+
+static void pack_float_sample(WavPackEncodeContext *s, int32_t *sample)
+{
+    const int max_exp = s->float_max_exp;
+    PutBitContext *pb = &s->pb;
+    int32_t value, shift_count;
+
+    if (get_exponent(*sample) == 255) {
+        if (get_mantissa(*sample)) {
+            put_bits(pb, 1, 1);
+            put_bits(pb, 23, get_mantissa(*sample));
+        } else {
+            put_bits(pb, 1, 0);
+        }
+
+        value = 0x1000000;
+        shift_count = 0;
+    } else if (get_exponent(*sample)) {
+        shift_count = max_exp - get_exponent(*sample);
+        value = 0x800000 + get_mantissa(*sample);
+    } else {
+        shift_count = max_exp ? max_exp - 1 : 0;
+        value = get_mantissa(*sample);
+    }
+
+    if (shift_count < 25)
+        value >>= shift_count;
+    else
+        value = 0;
+
+    if (!value) {
+        if (s->float_flags & FLOAT_ZEROS_SENT) {
+            if (get_exponent(*sample) || get_mantissa(*sample)) {
+                put_bits(pb, 1, 1);
+                put_bits(pb, 23, get_mantissa(*sample));
+
+                if (max_exp >= 25)
+                    put_bits(pb, 8, get_exponent(*sample));
+
+                put_bits(pb, 1, get_sign(*sample));
+            } else {
+                put_bits(pb, 1, 0);
+
+                if (s->float_flags & FLOAT_NEG_ZEROS)
+                    put_bits(pb, 1, get_sign(*sample));
+            }
+        }
+    } else if (shift_count) {
+        if (s->float_flags & FLOAT_SHIFT_SENT) {
+            int32_t data = get_mantissa(*sample) & ((1 << shift_count) - 1);
+            put_bits(pb, shift_count, data);
+        } else if (s->float_flags & FLOAT_SHIFT_SAME) {
+            put_bits(pb, 1, get_mantissa(*sample) & 1);
+        }
+    }
+}
+
+static void pack_float(WavPackEncodeContext *s,
+                       int32_t *samples_l, int32_t *samples_r,
+                       int nb_samples)
+{
+    int i;
+
+    if (s->flags & WV_MONO_DATA) {
+        for (i = 0; i < nb_samples; i++)
+            pack_float_sample(s, &samples_l[i]);
+    } else {
+        for (i = 0; i < nb_samples; i++) {
+            pack_float_sample(s, &samples_l[i]);
+            pack_float_sample(s, &samples_r[i]);
+        }
+    }
+}
+
+static void decorr_stereo_pass2(struct Decorr *dpp,
+                                int32_t *samples_l, int32_t *samples_r,
+                                int nb_samples)
+{
+    int i, m, k;
+
+    switch (dpp->value) {
+    case 17:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+            dpp->samplesA[1] = dpp->samplesA[0];
+            samples_l[i] = tmp = (dpp->samplesA[0] = samples_l[i]) - APPLY_WEIGHT(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = 2 * dpp->samplesB[0] - dpp->samplesB[1];
+            dpp->samplesB[1] = dpp->samplesB[0];
+            samples_r[i] = tmp = (dpp->samplesB[0] = samples_r[i]) - APPLY_WEIGHT(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+        }
+        break;
+    case 18:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[0] + ((dpp->samplesA[0] - dpp->samplesA[1]) >> 1);
+            dpp->samplesA[1] = dpp->samplesA[0];
+            samples_l[i] = tmp = (dpp->samplesA[0] = samples_l[i]) - APPLY_WEIGHT(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = dpp->samplesB[0] + ((dpp->samplesB[0] - dpp->samplesB[1]) >> 1);
+            dpp->samplesB[1] = dpp->samplesB[0];
+            samples_r[i] = tmp = (dpp->samplesB[0] = samples_r[i]) - APPLY_WEIGHT(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+        }
+        break;
+    default:
+        for (m = 0, k = dpp->value & (MAX_TERM - 1), i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[m];
+            samples_l[i] = tmp = (dpp->samplesA[k] = samples_l[i]) - APPLY_WEIGHT(dpp->weightA, sam);
+            UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = dpp->samplesB[m];
+            samples_r[i] = tmp = (dpp->samplesB[k] = samples_r[i]) - APPLY_WEIGHT(dpp->weightB, sam);
+            UPDATE_WEIGHT(dpp->weightB, dpp->delta, sam, tmp);
+
+            m = (m + 1) & (MAX_TERM - 1);
+            k = (k + 1) & (MAX_TERM - 1);
+        }
+        if (m) {
+            int32_t temp_A[MAX_TERM], temp_B[MAX_TERM];
+
+            memcpy(temp_A, dpp->samplesA, sizeof (dpp->samplesA));
+            memcpy(temp_B, dpp->samplesB, sizeof (dpp->samplesB));
+
+            for (k = 0; k < MAX_TERM; k++) {
+                dpp->samplesA[k] = temp_A[m];
+                dpp->samplesB[k] = temp_B[m];
+                m = (m + 1) & (MAX_TERM - 1);
+            }
+        }
+        break;
+    case -1:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_A = dpp->samplesA[0];
+            samples_l[i] = tmp = (sam_B = samples_l[i]) - APPLY_WEIGHT(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+
+            samples_r[i] = tmp = (dpp->samplesA[0] = samples_r[i]) - APPLY_WEIGHT(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+        }
+        break;
+    case -2:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_B = dpp->samplesB[0];
+            samples_r[i] = tmp = (sam_A = samples_r[i]) - APPLY_WEIGHT(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+
+            samples_l[i] = tmp = (dpp->samplesB[0] = samples_l[i]) - APPLY_WEIGHT(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+        }
+        break;
+    case -3:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_A = dpp->samplesA[0];
+            sam_B = dpp->samplesB[0];
+
+            dpp->samplesA[0] = tmp = samples_r[i];
+            samples_r[i] = tmp -= APPLY_WEIGHT(dpp->weightB, sam_B);
+            UPDATE_WEIGHT_CLIP(dpp->weightB, dpp->delta, sam_B, tmp);
+
+            dpp->samplesB[0] = tmp = samples_l[i];
+            samples_l[i] = tmp -= APPLY_WEIGHT(dpp->weightA, sam_A);
+            UPDATE_WEIGHT_CLIP(dpp->weightA, dpp->delta, sam_A, tmp);
+        }
+        break;
+    }
+}
+
+#define update_weight_d2(weight, delta, source, result) \
+    if (source && result) \
+        weight -= (((source ^ result) >> 29) & 4) - 2;
+
+#define update_weight_clip_d2(weight, delta, source, result) \
+    if (source && result) { \
+        const int32_t s = (source ^ result) >> 31; \
+        if ((weight = (weight ^ s) + (2 - s)) > 1024) weight = 1024; \
+        weight = (weight ^ s) - s; \
+    }
+
+static void decorr_stereo_pass_id2(struct Decorr *dpp,
+                                   int32_t *samples_l, int32_t *samples_r,
+                                   int nb_samples)
+{
+    int i, m, k;
+
+    switch (dpp->value) {
+    case 17:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+            dpp->samplesA[1] = dpp->samplesA[0];
+            samples_l[i] = tmp = (dpp->samplesA[0] = samples_l[i]) - APPLY_WEIGHT_I(dpp->weightA, sam);
+            update_weight_d2(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = 2 * dpp->samplesB[0] - dpp->samplesB[1];
+            dpp->samplesB[1] = dpp->samplesB[0];
+            samples_r[i] = tmp = (dpp->samplesB[0] = samples_r[i]) - APPLY_WEIGHT_I(dpp->weightB, sam);
+            update_weight_d2(dpp->weightB, dpp->delta, sam, tmp);
+        }
+        break;
+    case 18:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[0] + ((dpp->samplesA[0] - dpp->samplesA[1]) >> 1);
+            dpp->samplesA[1] = dpp->samplesA[0];
+            samples_l[i] = tmp = (dpp->samplesA[0] = samples_l[i]) - APPLY_WEIGHT_I(dpp->weightA, sam);
+            update_weight_d2(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = dpp->samplesB[0] + ((dpp->samplesB[0] - dpp->samplesB[1]) >> 1);
+            dpp->samplesB[1] = dpp->samplesB[0];
+            samples_r[i] = tmp = (dpp->samplesB[0] = samples_r[i]) - APPLY_WEIGHT_I(dpp->weightB, sam);
+            update_weight_d2(dpp->weightB, dpp->delta, sam, tmp);
+        }
+        break;
+    default:
+        for (m = 0, k = dpp->value & (MAX_TERM - 1), i = 0; i < nb_samples; i++) {
+            int32_t sam, tmp;
+
+            sam = dpp->samplesA[m];
+            samples_l[i] = tmp = (dpp->samplesA[k] = samples_l[i]) - APPLY_WEIGHT_I(dpp->weightA, sam);
+            update_weight_d2(dpp->weightA, dpp->delta, sam, tmp);
+
+            sam = dpp->samplesB[m];
+            samples_r[i] = tmp = (dpp->samplesB[k] = samples_r[i]) - APPLY_WEIGHT_I(dpp->weightB, sam);
+            update_weight_d2(dpp->weightB, dpp->delta, sam, tmp);
+
+            m = (m + 1) & (MAX_TERM - 1);
+            k = (k + 1) & (MAX_TERM - 1);
+        }
+
+        if (m) {
+            int32_t temp_A[MAX_TERM], temp_B[MAX_TERM];
+
+            memcpy(temp_A, dpp->samplesA, sizeof(dpp->samplesA));
+            memcpy(temp_B, dpp->samplesB, sizeof(dpp->samplesB));
+
+            for (k = 0; k < MAX_TERM; k++) {
+                dpp->samplesA[k] = temp_A[m];
+                dpp->samplesB[k] = temp_B[m];
+                m = (m + 1) & (MAX_TERM - 1);
+            }
+        }
+        break;
+    case -1:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_A = dpp->samplesA[0];
+            samples_l[i] = tmp = (sam_B = samples_l[i]) - APPLY_WEIGHT_I(dpp->weightA, sam_A);
+            update_weight_clip_d2(dpp->weightA, dpp->delta, sam_A, tmp);
+
+            samples_r[i] = tmp = (dpp->samplesA[0] = samples_r[i]) - APPLY_WEIGHT_I(dpp->weightB, sam_B);
+            update_weight_clip_d2(dpp->weightB, dpp->delta, sam_B, tmp);
+        }
+        break;
+    case -2:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_B = dpp->samplesB[0];
+            samples_r[i] = tmp = (sam_A = samples_r[i]) - APPLY_WEIGHT_I(dpp->weightB, sam_B);
+            update_weight_clip_d2(dpp->weightB, dpp->delta, sam_B, tmp);
+
+            samples_l[i] = tmp = (dpp->samplesB[0] = samples_l[i]) - APPLY_WEIGHT_I(dpp->weightA, sam_A);
+            update_weight_clip_d2(dpp->weightA, dpp->delta, sam_A, tmp);
+        }
+        break;
+    case -3:
+        for (i = 0; i < nb_samples; i++) {
+            int32_t sam_A, sam_B, tmp;
+
+            sam_A = dpp->samplesA[0];
+            sam_B = dpp->samplesB[0];
+
+            dpp->samplesA[0] = tmp = samples_r[i];
+            samples_r[i] = tmp -= APPLY_WEIGHT_I(dpp->weightB, sam_B);
+            update_weight_clip_d2(dpp->weightB, dpp->delta, sam_B, tmp);
+
+            dpp->samplesB[0] = tmp = samples_l[i];
+            samples_l[i] = tmp -= APPLY_WEIGHT_I(dpp->weightA, sam_A);
+            update_weight_clip_d2(dpp->weightA, dpp->delta, sam_A, tmp);
+        }
+        break;
+    }
+}
+
+static void put_metadata_block(PutByteContext *pb, int flags, int size)
+{
+    if (size & 1)
+        flags |= WP_IDF_ODD;
+
+    bytestream2_put_byte(pb, flags);
+    bytestream2_put_byte(pb, (size + 1) >> 1);
+}
+
+static int wavpack_encode_block(WavPackEncodeContext *s,
+                                int32_t *samples_l, int32_t *samples_r,
+                                uint8_t *out, int out_size)
+{
+    int block_size, start, end, data_size, tcount, temp, m = 0;
+    int i, j, ret = 0, got_extra = 0, nb_samples = s->block_samples;
+    uint32_t crc = 0xffffffffu;
+    struct Decorr *dpp;
+    PutByteContext pb;
+
+    if (!(s->flags & WV_MONO) && s->optimize_mono) {
+        int32_t lor = 0, diff = 0;
+
+        for (i = 0; i < nb_samples; i++) {
+            lor  |= samples_l[i] | samples_r[i];
+            diff |= samples_l[i] - samples_r[i];
+
+            if (lor && diff)
+                break;
+        }
+
+        if (i == nb_samples && lor && !diff) {
+            s->flags &= ~(WV_JOINT_STEREO | WV_CROSS_DECORR);
+            s->flags |= WV_FALSE_STEREO;
+
+            if (!s->false_stereo) {
+                s->false_stereo = 1;
+                s->num_terms = 0;
+                CLEAR(s->w);
+            }
+        } else if (s->false_stereo) {
+            s->false_stereo = 0;
+            s->num_terms = 0;
+            CLEAR(s->w);
+        }
+    }
+
+    if (s->flags & SHIFT_MASK) {
+        int shift = (s->flags & SHIFT_MASK) >> SHIFT_LSB;
+        int mag = (s->flags & MAG_MASK) >> MAG_LSB;
+
+        if (s->flags & WV_MONO_DATA)
+            shift_mono(samples_l, nb_samples, shift);
+        else
+            shift_stereo(samples_l, samples_r, nb_samples, shift);
+
+        if ((mag -= shift) < 0)
+            s->flags &= ~MAG_MASK;
+        else
+            s->flags -= (1 << MAG_LSB) * shift;
+    }
+
+    if ((s->flags & WV_FLOAT_DATA) || (s->flags & MAG_MASK) >> MAG_LSB >= 24) {
+        av_fast_padded_malloc(&s->orig_l, &s->orig_l_size, sizeof(int32_t) * nb_samples);
+        memcpy(s->orig_l, samples_l, sizeof(int32_t) * nb_samples);
+        if (!(s->flags & WV_MONO_DATA)) {
+            av_fast_padded_malloc(&s->orig_r, &s->orig_r_size, sizeof(int32_t) * nb_samples);
+            memcpy(s->orig_r, samples_r, sizeof(int32_t) * nb_samples);
+        }
+
+        if (s->flags & WV_FLOAT_DATA)
+            got_extra = scan_float(s, samples_l, samples_r, nb_samples);
+        else
+            got_extra = scan_int32(s, samples_l, samples_r, nb_samples);
+        s->num_terms = 0;
+    } else {
+        scan_int23(s, samples_l, samples_r, nb_samples);
+        if (s->shift != s->int32_zeros + s->int32_ones + s->int32_dups) {
+            s->shift = s->int32_zeros + s->int32_ones + s->int32_dups;
+            s->num_terms = 0;
+        }
+    }
+
+    if (!s->num_passes && !s->num_terms) {
+        s->num_passes = 1;
+
+        if (s->flags & WV_MONO_DATA)
+            ret = wv_mono(s, samples_l, 1, 0);
+        else
+            ret = wv_stereo(s, samples_l, samples_r, 1, 0);
+
+        s->num_passes = 0;
+    }
+    if (s->flags & WV_MONO_DATA) {
+        for (i = 0; i < nb_samples; i++)
+            crc += (crc << 1) + samples_l[i];
+
+        if (s->num_passes)
+            ret = wv_mono(s, samples_l, !s->num_terms, 1);
+    } else {
+        for (i = 0; i < nb_samples; i++)
+            crc += (crc << 3) + (samples_l[i] << 1) + samples_l[i] + samples_r[i];
+
+        if (s->num_passes)
+            ret = wv_stereo(s, samples_l, samples_r, !s->num_terms, 1);
+    }
+    if (ret < 0)
+        return ret;
+
+    if (!s->ch_offset)
+        s->flags |= WV_INITIAL_BLOCK;
+
+    s->ch_offset += 1 + !(s->flags & WV_MONO);
+
+    if (s->ch_offset == s->avctx->channels)
+        s->flags |= WV_FINAL_BLOCK;
+
+    bytestream2_init_writer(&pb, out, out_size);
+    bytestream2_put_le32(&pb, MKTAG('w', 'v', 'p', 'k'));
+    bytestream2_put_le32(&pb, 0);
+    bytestream2_put_le16(&pb, 0x410);
+    bytestream2_put_le16(&pb, 0);
+    bytestream2_put_le32(&pb, 0);
+    bytestream2_put_le32(&pb, s->sample_index);
+    bytestream2_put_le32(&pb, nb_samples);
+    bytestream2_put_le32(&pb, s->flags);
+    bytestream2_put_le32(&pb, crc);
+
+    if (s->flags & WV_INITIAL_BLOCK &&
+        s->avctx->channel_layout != AV_CH_LAYOUT_MONO &&
+        s->avctx->channel_layout != AV_CH_LAYOUT_STEREO) {
+        put_metadata_block(&pb, WP_ID_CHANINFO, 5);
+        bytestream2_put_byte(&pb, s->avctx->channels);
+        bytestream2_put_le32(&pb, s->avctx->channel_layout);
+        bytestream2_put_byte(&pb, 0);
+    }
+
+    if ((s->flags & SRATE_MASK) == SRATE_MASK) {
+        put_metadata_block(&pb, WP_ID_SAMPLE_RATE, 3);
+        bytestream2_put_le24(&pb, s->avctx->sample_rate);
+        bytestream2_put_byte(&pb, 0);
+    }
+
+    put_metadata_block(&pb, WP_ID_DECTERMS, s->num_terms);
+    for (i = 0; i < s->num_terms; i++) {
+        struct Decorr *dpp = &s->decorr_passes[i];
+        bytestream2_put_byte(&pb, ((dpp->value + 5) & 0x1f) | ((dpp->delta << 5) & 0xe0));
+    }
+    if (s->num_terms & 1)
+        bytestream2_put_byte(&pb, 0);
+
+#define WRITE_DECWEIGHT(type) do {            \
+        temp = store_weight(type);    \
+        bytestream2_put_byte(&pb, temp);      \
+        type = restore_weight(temp);  \
+    } while (0)
+
+    bytestream2_put_byte(&pb, WP_ID_DECWEIGHTS);
+    bytestream2_put_byte(&pb, 0);
+    start = bytestream2_tell_p(&pb);
+    for (i = s->num_terms - 1; i >= 0; --i) {
+        struct Decorr *dpp = &s->decorr_passes[i];
+
+        if (store_weight(dpp->weightA) ||
+            (!(s->flags & WV_MONO_DATA) && store_weight(dpp->weightB)))
+                break;
+    }
+    tcount = i + 1;
+    for (i = 0; i < s->num_terms; i++) {
+        struct Decorr *dpp = &s->decorr_passes[i];
+        if (i < tcount) {
+            WRITE_DECWEIGHT(dpp->weightA);
+            if (!(s->flags & WV_MONO_DATA))
+                WRITE_DECWEIGHT(dpp->weightB);
+        } else {
+            dpp->weightA = dpp->weightB = 0;
+        }
+    }
+    end = bytestream2_tell_p(&pb);
+    out[start - 2] = WP_ID_DECWEIGHTS | (((end - start) & 1) ? WP_IDF_ODD: 0);
+    out[start - 1] = (end - start + 1) >> 1;
+    if ((end - start) & 1)
+        bytestream2_put_byte(&pb, 0);
+
+#define WRITE_DECSAMPLE(type) do {        \
+        temp = log2s(type);               \
+        type = wp_exp2(temp);             \
+        bytestream2_put_le16(&pb, temp);  \
+    } while (0)
+
+    bytestream2_put_byte(&pb, WP_ID_DECSAMPLES);
+    bytestream2_put_byte(&pb, 0);
+    start = bytestream2_tell_p(&pb);
+    for (i = 0; i < s->num_terms; i++) {
+        struct Decorr *dpp = &s->decorr_passes[i];
+        if (i == 0) {
+            if (dpp->value > MAX_TERM) {
+                WRITE_DECSAMPLE(dpp->samplesA[0]);
+                WRITE_DECSAMPLE(dpp->samplesA[1]);
+                if (!(s->flags & WV_MONO_DATA)) {
+                    WRITE_DECSAMPLE(dpp->samplesB[0]);
+                    WRITE_DECSAMPLE(dpp->samplesB[1]);
+                }
+            } else if (dpp->value < 0) {
+                WRITE_DECSAMPLE(dpp->samplesA[0]);
+                WRITE_DECSAMPLE(dpp->samplesB[0]);
+            } else {
+                for (j = 0; j < dpp->value; j++) {
+                    WRITE_DECSAMPLE(dpp->samplesA[j]);
+                    if (!(s->flags & WV_MONO_DATA))
+                        WRITE_DECSAMPLE(dpp->samplesB[j]);
+                }
+            }
+        } else {
+            CLEAR(dpp->samplesA);
+            CLEAR(dpp->samplesB);
+        }
+    }
+    end = bytestream2_tell_p(&pb);
+    out[start - 1] = (end - start) >> 1;
+
+#define WRITE_CHAN_ENTROPY(chan) do {               \
+        for (i = 0; i < 3; i++) {                   \
+            temp = wp_log2(s->w.c[chan].median[i]); \
+            bytestream2_put_le16(&pb, temp);        \
+            s->w.c[chan].median[i] = wp_exp2(temp); \
+        }                                           \
+    } while (0)
+
+    put_metadata_block(&pb, WP_ID_ENTROPY, 6 * (1 + (!(s->flags & WV_MONO_DATA))));
+    WRITE_CHAN_ENTROPY(0);
+    if (!(s->flags & WV_MONO_DATA))
+        WRITE_CHAN_ENTROPY(1);
+
+    if (s->flags & WV_FLOAT_DATA) {
+        put_metadata_block(&pb, WP_ID_FLOATINFO, 4);
+        bytestream2_put_byte(&pb, s->float_flags);
+        bytestream2_put_byte(&pb, s->float_shift);
+        bytestream2_put_byte(&pb, s->float_max_exp);
+        bytestream2_put_byte(&pb, 127);
+    }
+
+    if (s->flags & WV_INT32_DATA) {
+        put_metadata_block(&pb, WP_ID_INT32INFO, 4);
+        bytestream2_put_byte(&pb, s->int32_sent_bits);
+        bytestream2_put_byte(&pb, s->int32_zeros);
+        bytestream2_put_byte(&pb, s->int32_ones);
+        bytestream2_put_byte(&pb, s->int32_dups);
+    }
+
+    if (s->flags & WV_MONO_DATA && !s->num_passes) {
+        for (i = 0; i < nb_samples; i++) {
+            int32_t code = samples_l[i];
+
+            for (tcount = s->num_terms, dpp = s->decorr_passes; tcount--; dpp++) {
+                int32_t sam;
+
+                if (dpp->value > MAX_TERM) {
+                    if (dpp->value & 1)
+                        sam = 2 * dpp->samplesA[0] - dpp->samplesA[1];
+                    else
+                        sam = (3 * dpp->samplesA[0] - dpp->samplesA[1]) >> 1;
+
+                    dpp->samplesA[1] = dpp->samplesA[0];
+                    dpp->samplesA[0] = code;
+                } else {
+                    sam = dpp->samplesA[m];
+                    dpp->samplesA[(m + dpp->value) & (MAX_TERM - 1)] = code;
+                }
+
+                code -= APPLY_WEIGHT(dpp->weightA, sam);
+                UPDATE_WEIGHT(dpp->weightA, dpp->delta, sam, code);
+            }
+
+            m = (m + 1) & (MAX_TERM - 1);
+            samples_l[i] = code;
+        }
+        if (m) {
+            for (tcount = s->num_terms, dpp = s->decorr_passes; tcount--; dpp++)
+                if (dpp->value > 0 && dpp->value <= MAX_TERM) {
+                int32_t temp_A[MAX_TERM], temp_B[MAX_TERM];
+                int k;
+
+                memcpy(temp_A, dpp->samplesA, sizeof(dpp->samplesA));
+                memcpy(temp_B, dpp->samplesB, sizeof(dpp->samplesB));
+
+                for (k = 0; k < MAX_TERM; k++) {
+                    dpp->samplesA[k] = temp_A[m];
+                    dpp->samplesB[k] = temp_B[m];
+                    m = (m + 1) & (MAX_TERM - 1);
+                }
+            }
+        }
+    } else if (!s->num_passes) {
+        if (s->flags & WV_JOINT_STEREO) {
+            for (i = 0; i < nb_samples; i++)
+                samples_r[i] += ((samples_l[i] -= samples_r[i]) >> 1);
+        }
+
+        for (i = 0; i < s->num_terms; i++) {
+            struct Decorr *dpp = &s->decorr_passes[i];
+            if (((s->flags & MAG_MASK) >> MAG_LSB) >= 16 || dpp->delta != 2)
+                decorr_stereo_pass2(dpp, samples_l, samples_r, nb_samples);
+            else
+                decorr_stereo_pass_id2(dpp, samples_l, samples_r, nb_samples);
+        }
+    }
+
+    bytestream2_put_byte(&pb, WP_ID_DATA | WP_IDF_LONG);
+    init_put_bits(&s->pb, pb.buffer + 3, bytestream2_get_bytes_left_p(&pb));
+    if (s->flags & WV_MONO_DATA) {
+        for (i = 0; i < nb_samples; i++)
+            wavpack_encode_sample(s, &s->w.c[0], s->samples[0][i]);
+    } else {
+        for (i = 0; i < nb_samples; i++) {
+            wavpack_encode_sample(s, &s->w.c[0], s->samples[0][i]);
+            wavpack_encode_sample(s, &s->w.c[1], s->samples[1][i]);
+        }
+    }
+    encode_flush(s);
+    flush_put_bits(&s->pb);
+    data_size = put_bits_count(&s->pb) >> 3;
+    bytestream2_put_le24(&pb, (data_size + 1) >> 1);
+    bytestream2_skip_p(&pb, data_size);
+    if (data_size & 1)
+        bytestream2_put_byte(&pb, 0);
+
+    if (got_extra) {
+        bytestream2_put_byte(&pb, WP_ID_EXTRABITS | WP_IDF_LONG);
+        init_put_bits(&s->pb, pb.buffer + 7, bytestream2_get_bytes_left_p(&pb));
+        if (s->flags & WV_FLOAT_DATA)
+            pack_float(s, s->orig_l, s->orig_r, nb_samples);
+        else
+            pack_int32(s, s->orig_l, s->orig_r, nb_samples);
+        flush_put_bits(&s->pb);
+        data_size = put_bits_count(&s->pb) >> 3;
+        bytestream2_put_le24(&pb, (data_size + 5) >> 1);
+        bytestream2_put_le32(&pb, s->crc_x);
+        bytestream2_skip_p(&pb, data_size);
+        if (data_size & 1)
+            bytestream2_put_byte(&pb, 0);
+    }
+
+    block_size = bytestream2_tell_p(&pb);
+    AV_WL32(out + 4, block_size - 8);
+
+    return block_size;
+}
+
+static void fill_buffer(WavPackEncodeContext *s,
+                        const int8_t *src, int32_t *dst,
+                        int nb_samples)
+{
+    int i;
+
+#define COPY_SAMPLES(type, offset, shift) do {            \
+        const type *sptr = (const type *)src;             \
+        for (i = 0; i < nb_samples; i++)                  \
+            dst[i] = (sptr[i] - offset) >> shift;         \
+    } while (0)
+
+    switch (s->avctx->sample_fmt) {
+    case AV_SAMPLE_FMT_U8P:
+        COPY_SAMPLES(int8_t, 0x80, 0);
+        break;
+    case AV_SAMPLE_FMT_S16P:
+        COPY_SAMPLES(int16_t, 0, 0);
+        break;
+    case AV_SAMPLE_FMT_S32P:
+        if (s->avctx->bits_per_raw_sample <= 24) {
+            COPY_SAMPLES(int32_t, 0, 8);
+            break;
+        }
+    case AV_SAMPLE_FMT_FLTP:
+        memcpy(dst, src, nb_samples * 4);
+    }
+}
+
+static void set_samplerate(WavPackEncodeContext *s)
+{
+    int i;
+
+    for (i = 0; i < 15; i++) {
+        if (wv_rates[i] == s->avctx->sample_rate)
+            break;
+    }
+
+    s->flags = i << SRATE_LSB;
+}
+
+static int wavpack_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
+                                const AVFrame *frame, int *got_packet_ptr)
+{
+    WavPackEncodeContext *s = avctx->priv_data;
+    int buf_size, ret;
+    uint8_t *buf;
+
+    s->block_samples = frame->nb_samples;
+    av_fast_padded_malloc(&s->samples[0], &s->samples_size[0],
+                          sizeof(int32_t) * s->block_samples);
+    if (!s->samples[0])
+        return AVERROR(ENOMEM);
+    if (avctx->channels > 1) {
+        av_fast_padded_malloc(&s->samples[1], &s->samples_size[1],
+                              sizeof(int32_t) * s->block_samples);
+        if (!s->samples[1])
+            return AVERROR(ENOMEM);
+    }
+
+    if ((ret = ff_alloc_packet2(avctx, avpkt, s->block_samples * avctx->channels * 8)) < 0)
+        return ret;
+    buf = avpkt->data;
+    buf_size = avpkt->size;
+
+    for (s->ch_offset = 0; s->ch_offset < avctx->channels;) {
+        set_samplerate(s);
+
+        switch (s->avctx->sample_fmt) {
+        case AV_SAMPLE_FMT_S16P: s->flags |= 1; break;
+        case AV_SAMPLE_FMT_S32P: s->flags |= 3 - (s->avctx->bits_per_raw_sample <= 24); break;
+        case AV_SAMPLE_FMT_FLTP: s->flags |= 3 | WV_FLOAT_DATA;
+        }
+
+        fill_buffer(s, frame->extended_data[s->ch_offset], s->samples[0], s->block_samples);
+        if (avctx->channels - s->ch_offset == 1) {
+            s->flags |= WV_MONO;
+        } else {
+            s->flags |= WV_CROSS_DECORR;
+            fill_buffer(s, frame->extended_data[s->ch_offset + 1], s->samples[1], s->block_samples);
+        }
+
+        s->flags += (1 << MAG_LSB) * ((s->flags & 3) * 8 + 7);
+
+        if ((ret = wavpack_encode_block(s, s->samples[0], s->samples[1],
+                                        buf, buf_size)) < 0)
+            return ret;
+
+        buf      += ret;
+        buf_size -= ret;
+    }
+    s->sample_index += frame->nb_samples;
+
+    avpkt->pts      = frame->pts;
+    avpkt->size     = buf - avpkt->data;
+    avpkt->duration = ff_samples_to_time_base(avctx, frame->nb_samples);
+    *got_packet_ptr = 1;
+    return 0;
+}
+
+static av_cold int wavpack_encode_close(AVCodecContext *avctx)
+{
+    WavPackEncodeContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < MAX_TERMS + 2; i++) {
+        av_freep(&s->sampleptrs[i][0]);
+        av_freep(&s->sampleptrs[i][1]);
+        s->sampleptrs_size[i][0] = s->sampleptrs_size[i][1] = 0;
+    }
+
+    for (i = 0; i < 2; i++) {
+        av_freep(&s->samples[i]);
+        s->samples_size[i] = 0;
+
+        av_freep(&s->best_buffer[i]);
+        s->best_buffer_size[i] = 0;
+
+        av_freep(&s->temp_buffer[i][0]);
+        av_freep(&s->temp_buffer[i][1]);
+        s->temp_buffer_size[i][0] = s->temp_buffer_size[i][1] = 0;
+    }
+
+    av_freep(&s->js_left);
+    av_freep(&s->js_right);
+    s->js_left_size = s->js_right_size = 0;
+
+    av_freep(&s->orig_l);
+    av_freep(&s->orig_r);
+    s->orig_l_size = s->orig_r_size = 0;
+
+    return 0;
+}
+
+#define OFFSET(x) offsetof(WavPackEncodeContext, x)
+#define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
+static const AVOption options[] = {
+    { "joint_stereo",  "", OFFSET(joint), AV_OPT_TYPE_INT, {.i64=0},-1, 1, FLAGS, "joint" },
+    { "on",   "mid/side",   0, AV_OPT_TYPE_CONST, {.i64= 1}, 0, 0, FLAGS, "joint"},
+    { "off",  "left/right", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, FLAGS, "joint"},
+    { "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64= 0}, 0, 0, FLAGS, "joint"},
+    { "optimize_mono",        "", OFFSET(optimize_mono), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, FLAGS, "opt_mono" },
+    { "on",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "opt_mono"},
+    { "off",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "opt_mono"},
+    { NULL },
+};
+
+static const AVClass wavpack_encoder_class = {
+    .class_name = "WavPack encoder",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_wavpack_encoder = {
+    .name           = "wavpack",
+    .long_name      = NULL_IF_CONFIG_SMALL("WavPack"),
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_WAVPACK,
+    .priv_data_size = sizeof(WavPackEncodeContext),
+    .priv_class     = &wavpack_encoder_class,
+    .init           = wavpack_encode_init,
+    .encode2        = wavpack_encode_frame,
+    .close          = wavpack_encode_close,
+    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
+    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_U8P,
+                                                     AV_SAMPLE_FMT_S16P,
+                                                     AV_SAMPLE_FMT_S32P,
+                                                     AV_SAMPLE_FMT_FLTP,
+                                                     AV_SAMPLE_FMT_NONE },
+};
diff --git a/libavcodec/wavpackenc.h b/libavcodec/wavpackenc.h
new file mode 100644
index 0000000..7a482ce
--- /dev/null
+++ b/libavcodec/wavpackenc.h
@@ -0,0 +1,664 @@
+/*
+ * WavPack lossless audio encoder
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_WAVPACKENC_H
+#define AVCODEC_WAVPACKENC_H
+
+#include "wavpack.h"
+
+typedef struct WavPackDecorrSpec {
+    int8_t joint_stereo, delta, terms[MAX_TERMS+1];
+} WavPackDecorrSpec;
+
+static const WavPackDecorrSpec fast_specs[] = {
+ { 1, 2, { 18,17 } }, { 1, 1, { 17,17 } }, { 0, 2, { 18,17 } },
+ { 0, 1, { 17,17 } }, { 1, 3, {  1,18 } }, { 1, 1, { 17, 1 } },
+ { 0, 1, {  1,17 } }, { 0, 1, { -2,17 } }, { 0, 2, { -1,17 } },
+ { 1, 1, { 17, 2 } }, { 0, 3, { 18,18 } }, { 0, 1, { 17, 1 } },
+ { 1, 6, {  1, 2 } }, { 1, 1, { 17, 3 } }, { 0, 1, { -2, 3 } },
+ { 0, 1, {  2,17 } }, { 0, 1, { 18,-2 } }, { 0, 1, { -1,17 } },
+ { 0, 1, { 18,17 } }, { 0, 1, { 17, 2 } }, { 1, 2, { 18,-2 } },
+ { 1, 1, {  1,17 } }, { 0, 3, { 18, 2 } }, { 0, 1, { 17,-2 } },
+ { 0, 1, { 18,-2 } }, { 1, 2, { 17,-3 } }, { 0, 1, { 18, 3 } },
+ { 0, 1, { 18,18 } }, { 1, 1, {  1, 3 } }, { 1, 1, { 18, 3 } },
+ { 1, 1, {  1, 3 } }, { 0, 2, { 18,17 } }, { 1, 1, {  1,17 } },
+ { 1, 1, { 17, 3 } }, { 0, 3, { 18,17 } }, { 0, 1, { 18,18 } },
+ { 1, 1, {  1, 3 } }, { 1, 1, {  1,18 } }, { 0, 1, { 18,-2 } },
+ { 0, 2, { 18,17 } }, { 0, 1, { -1,18 } }, { 1, 1, { 17, 3 } },
+ { 0, 1, { 17, 2 } }, { 0, 1, { 17, 3 } }, { 1, 1, { 18, 2 } },
+ { 1, 1, { 17,-2 } }, { 0, 1, {  1,-2 } }, { 0, 2, { 18,17 } },
+ { 0, 1, { 17,-2 } }, { 1, 1, { 17,-2 } }, { 0, 1, { 18, 3 } },
+ { 0, 1, {  2,17 } }, { 1, 2, { 18,-3 } }, { 1, 2, {  1,18 } },
+ { 1, 2, { 18, 2 } }, { 0, 1, { 17,-1 } }, { 0, 1, { 17,-2 } },
+ { 1, 1, { 17,-2 } }, { 1, 1, {  1, 3 } }, { 0, 1, {  1,17 } },
+ { 1, 2, { 18,-2 } }, { 1, 2, { 17,-3 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 1, 1, { 17, 2 } }, { 1, 2, { 18,18 } },
+ { 0, 1, { 17, 2 } }, { 0, 1, { 18,17 } }, { 1, 1, {  1,17 } },
+ { 1, 1, { 17, 2 } }, { 0, 2, { 18,18 } }, { 0, 2, { 18,17 } },
+ { 1, 2, { 17,-3 } }, { 1, 6, {  1, 2 } }, { 0, 3, { 17,17 } },
+ { 0, 1, {  1,18 } }, { 0, 1, {  1,-2 } }, { 1, 1, { 17, 2 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 1, 1, { 18, 3 } },
+ { 1, 2, { 17,-3 } }, { 0, 1, { 17, 2 } }, { 0, 1, { 17, 3 } },
+ { 0, 1, { 18,-2 } }, { 1, 1, { 18,18 } }, { 1, 6, {  1, 2 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 1, { -1,17 } },
+ { 1, 1, { 18, 3 } }, { 0, 1, { 17,18 } }, { 1, 1, { 17, 3 } },
+ { 0, 1, { 18, 3 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 1, 2, { 18, 2 } }, { 0, 1, { -2, 3 } }, { 0, 4, { 18,-1 } },
+ { 0, 2, { 18,18 } }, { 0, 1, { -2, 3 } }, { 1, 1, { 17,-2 } },
+ { 0, 1, { 17, 3 } }, { 0, 2, { 18,17 } }, { 0, 2, { -1,18 } },
+ { 1, 1, {  2,17 } }, { 0, 2, { 17,-2 } }, { 0, 1, { 17, 2 } },
+ { 1, 2, { 18,-3 } }, { 0, 1, { 17,-2 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 1, 1, { 17,-2 } }, { 1, 2, { 17,-3 } },
+ { 1, 1, {  1, 3 } }, { 1, 1, {  2,17 } }, { 1, 2, { 18, 2 } },
+ { 1, 1, {  2,17 } }, { 1, 1, { 18, 2 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 1, { 17,-2 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 17,-1 } }, { 0, 2, { 18,-2 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 1, 1, {  1, 3 } },
+ { 0, 2, { -2,17 } }, { 0, 2, { 18,-2 } }, { 0, 2, { 17,-2 } },
+ { 1, 1, {  2,17 } }, { 1, 1, {  1, 3 } }, { 0, 1, {  2,17 } },
+ { 0, 2, { 18,17 } }, { 0, 3, { -1,17 } }, { 1, 1, {  2,17 } },
+ { 0, 2, { 18,18 } }, { 0, 1, { 17, 2 } }, { 1, 4, { 18,-3 } },
+ { 1, 1, { 18, 1 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 1, 2, { 18,-1 } }, { 0, 1, { -1,18 } }, { 1, 6, {  1, 2 } },
+ { 1, 1, { 17, 2 } }, { 1, 4, { 18, 3 } }, { 0, 1, {  1,17 } },
+ { 0, 1, { 18, 2 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 1, 2, { 17, 2 } }, { 0, 2, { 18,-2 } }, { 0, 1, {  1,18 } },
+ { 1, 2, { 18,-3 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 1, 2, { 18,18 } }, { 1, 3, { 17,17 } },
+ { 0, 1, { -2,17 } }, { 0, 1, { 17,18 } }, { 0, 1, { -1, 3 } },
+ { 1, 1, {  2,17 } }, { 0, 2, { 18,-1 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 1, 1, { 17,-2 } }, { 1, 2, { 17, 2 } },
+ { 1, 1, { 18, 3 } }, { 0, 1, { 18, 2 } }, { 1, 2, { 17,-3 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 1, { -2,17 } },
+ { 0, 1, { 17,-1 } }, { 0, 1, { 18,-1 } }, { 0, 2, { 18,17 } },
+ { 1, 2, { 17,-3 } }, { 1, 1, {  1,18 } }, { 1, 3, { 18, 2 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 3, { 18,18 } }, { 0, 1, {  1,-2 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 1, 2, { 17,-3 } }, { 1, 1, { 18,18 } }, { 0, 2, { 18, 2 } },
+ { 0, 1, { 17,18 } }, { 1, 2, { 18, 2 } }, { 1, 1, { 17,-2 } },
+ { 0, 2, { 17,-1 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 1, {  1,-2 } }, { 0, 1, { 18, 1 } },
+ { 1, 2, { 18,-2 } }, { 0, 1, { 17, 2 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 1, 1, { 17, 3 } }, { 0, 1, { 17,-1 } },
+ { 0, 1, { 18, 2 } }, { 1, 1, { 17, 3 } }, { 1, 1, { 17,-2 } },
+ { 0, 1, { 18,18 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 1, 1, { 17,18 } }, { 0, 1, { -2, 3 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 1, 2, { 18,-3 } },
+ { 0, 2, { 18,17 } }, { 0, 3, { 18, 2 } }, { 0, 1, {  1,18 } },
+ { 0, 2, { 18,17 } }, { 0, 1, { 17,-1 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 1, { -2, 3 } },
+ { 0, 3, { 17,17 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 1, 1, { 17, 2 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 1, 1, { 17, 2 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18, 2 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } }, { 0, 2, { 18,17 } },
+ { 0, 2, { 18,17 } },
+};
+
+static const WavPackDecorrSpec default_specs[] = {
+ { 1, 2, { 18,18, 2,17, 3 } }, { 0, 2, { 18,17,-1, 3, 2 } },
+ { 1, 1, { 17,18,18,-2, 2 } }, { 0, 2, { 18,17, 3,-2,17 } },
+ { 1, 2, { 18,17, 2,17, 3 } }, { 0, 1, { 18,18,-1, 2,17 } },
+ { 0, 1, { 17,17,-2, 2, 3 } }, { 0, 1, { 18,-2,18, 2,17 } },
+ { 1, 2, { 18,18,-1, 2, 3 } }, { 0, 2, { 18,17, 3, 2, 5 } },
+ { 1, 1, { 18,17,18, 2, 5 } }, { 0, 1, { 17,17,-2, 2, 3 } },
+ { 0, 1, { 18,-2,18, 2, 5 } }, { 0, 1, { 17,-2,17, 2,-3 } },
+ { 1, 1, { 17,-2,17, 1, 2 } }, { 0, 1, { 17,17,-2, 2, 3 } },
+ { 1, 1, { 18, 3, 1, 5, 4 } }, { 1, 4, { 18,18, 2, 3,-2 } },
+ { 0, 1, {  1,-1,-1, 2,17 } }, { 0, 2, { 18,17, 3, 2, 5 } },
+ { 0, 1, { 18,18,18, 2,17 } }, { 0, 1, { 18,17,-1, 2,18 } },
+ { 1, 1, { 17, 3, 2, 1, 7 } }, { 0, 2, { 18,-2,18, 2, 3 } },
+ { 1, 3, { 18,-3,18, 2, 3 } }, { 0, 3, { 18,17, 2, 3,17 } },
+ { 1, 1, { 17,17, 2, 1, 4 } }, { 0, 1, { 17,18,-2, 2,17 } },
+ { 1, 1, { 18,18, 3, 5, 2 } }, { 0, 1, { 17,17, 2,18, 4 } },
+ { 0, 1, { 18,17, 1, 4, 6 } }, { 1, 1, {  3,17,18, 2,17 } },
+ { 1, 1, { 17, 3, 2, 1, 7 } }, { 0, 1, { 18,17,-1, 2, 3 } },
+ { 1, 1, { 17,17, 2, 1, 4 } }, { 1, 2, { 18,17,-1,17, 3 } },
+ { 1, 2, { 18,17, 2, 3,-1 } }, { 0, 2, { 18,18,-2, 2,17 } },
+ { 0, 1, { 17,17, 2,18, 4 } }, { 0, 5, { -2,18,18,18, 2 } },
+ { 1, 1, { 18,18,-1, 6, 3 } }, { 0, 1, { 17,17,-2, 2, 3 } },
+ { 1, 1, { 18,17,18, 2,17 } }, { 0, 1, { 18,17, 4, 3, 1 } },
+ { 0, 1, { -2,18, 2, 2,18 } }, { 1, 2, { 18,18,-2, 2,-1 } },
+ { 1, 1, { 17,17, 2, 1, 4 } }, { 0, 1, { 17,18,-2, 2,17 } },
+ { 1, 1, { 17, 3, 2, 1, 7 } }, { 1, 3, { 18,-3,18, 2, 3 } },
+ { 1, 2, { 18,18,-2, 2,-1 } }, { 1, 1, { 18,18, 3, 5, 2 } },
+ { 0, 2, { 18,18,-1, 2,17 } }, { 0, 1, { 18,-1,17,18, 2 } },
+ { 0, 1, { 17,-1, 2, 3, 6 } }, { 0, 1, { 18,-2,18, 2, 5 } },
+ { 1, 2, { 18,18,-2, 2,-1 } }, { 0, 3, { 18,18, 2, 3,17 } },
+ { 0, 1, { 17,17, 2,18, 4 } }, { 1, 1, { 17,-2,17, 1, 2 } },
+ { 0, 1, { -1, 3, 5, 4, 7 } }, { 0, 3, { 18,18, 3, 2, 5 } },
+ { 0, 1, { 17,17, 2,18, 4 } }, { 0, 1, { 18,17,-2,18, 3 } },
+ { 0, 2, { 18,18,-2, 2,17 } }, { 0, 3, { 18,17,-2, 2, 3 } },
+ { 1, 1, { 18,18,-2, 2,17 } }, { 0, 1, { 18,17, 4, 3, 1 } },
+ { 1, 2, {  3,18,17, 2,17 } }, { 1, 2, { 18,18, 2,-2,18 } },
+ { 1, 2, { 18,18,-1,18, 2 } }, { 0, 2, { 18,18,-2, 2,17 } },
+ { 1, 3, { 18,18, 2, 3,-2 } }, { 0, 3, { 18,18, 3, 2, 5 } },
+ { 0, 1, { 18,-2,18, 2, 5 } }, { 1, 1, { 17, 3, 2, 1, 7 } },
+ { 1, 3, { 18,18,-2, 2,18 } }, { 1, 1, { 17,18,18,-2, 2 } },
+ { 0, 1, { 18,-2,18, 2, 5 } }, { 0, 2, { 18,-2,18, 2, 3 } },
+ { 0, 1, { -1, 3, 4, 5, 7 } }, { 1, 1, { 17,17, 2,-1, 7 } },
+ { 0, 1, { 18,-1,-1, 2,-2 } }, { 0, 2, { 18,17, 2, 3,17 } },
+ { 0, 1, { 18,17, 2,18, 2 } }, { 0, 2, { 18,17,-1, 2,17 } },
+ { 0, 1, {  1,18, 3, 2, 5 } }, { 0, 2, { 18,-2, 4,18, 2 } },
+ { 1, 1, { 18, 3, 1, 5, 4 } }, { 0, 1, { 18,17,18, 2, 5 } },
+ { 1, 1, { 18, 3, 1, 5, 4 } }, { 0, 4, { 18,18,-2, 2,18 } },
+ { 1, 1, { 18,18, 3, 2, 5 } }, { 1, 1, { 17,17, 2, 1, 4 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 1, 1, { 18,18, 2, 1, 3 } }, { 1, 1, { 17,17, 2, 1, 4 } },
+ { 1, 2, { 17,17, 2,18, 3 } }, { 0, 1, { 18,17, 1, 4, 6 } },
+ { 1, 2, { 18,18,-2, 2,-1 } }, { 0, 1, { 18,-2,18, 2, 5 } },
+ { 1, 1, { 17, 2,18, 2,17 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 1, { 18,18, 3, 6,-1 } }, { 0, 1, { 18,17, 2,18, 3 } },
+ { 0, 1, { 18,17,-2, 2,17 } }, { 1, 1, {  3,17,18, 2,17 } },
+ { 1, 3, { 18,-3,18, 2, 3 } }, { 1, 3, { 18,18,-3,18, 2 } },
+ { 1, 1, { 18, 3, 1, 5, 4 } }, { 0, 1, { 17,-2,17, 2,-3 } },
+ { 1, 1, { 18,18, 3, 5, 2 } }, { 1, 2, { 18,18,-2, 2,-1 } },
+ { 0, 1, { 18,-1,-1, 2,-2 } }, { 1, 1, { 18, 3, 1, 5, 4 } },
+ { 0, 3, { 18,17,-1, 2,17 } }, { 1, 3, { 18,17, 2,18,-2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 1, 2, { 18,18,-2, 2,-1 } },
+ { 1, 1, { 18, 3, 1, 5, 4 } }, { 0, 4, {  3,18,18, 2,17 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 1, 1, { 18,17,-1,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 1, 1, { 18,18,18, 3, 2 } }, { 0, 1, { 17,-1, 2, 3, 6 } },
+ { 0, 1, { 17,-1, 2, 3, 6 } }, { 0, 2, { 18,17,-2, 3, 2 } },
+ { 1, 3, { 18,17, 2,-2,18 } }, { 0, 2, { 18,18, 2,17, 3 } },
+ { 0, 1, { 18,18, 2,18,-2 } }, { 0, 2, { 18,-2, 4,18, 2 } },
+ { 0, 1, { -2,18, 2, 2,18 } }, { 0, 2, { 18,17, 3, 6, 2 } },
+ { 0, 1, { 18,17,18, 2, 5 } }, { 0, 3, { 18,18,-2, 3, 2 } },
+ { 1, 1, { 18,18, 2,18, 5 } }, { 0, 1, { 17,-1, 2, 3, 6 } },
+ { 1, 4, { 18,18, 2, 3,-2 } }, { 0, 2, { 18,17,18, 2,-2 } },
+ { 0, 1, {  1,18, 3, 2, 5 } }, { 1, 4, { 18,-2,18, 2, 3 } },
+ { 1, 2, { 18, 2,18, 3,-2 } }, { 0, 2, { 18,18,18, 2, 4 } },
+ { 0, 2, {  3,17,18, 2,17 } }, { 1, 1, { 18,-1,18, 2,17 } },
+ { 1, 2, { 17,17, 2,18, 3 } }, { 0, 2, { 18,17,-2, 3, 2 } },
+ { 0, 1, {  1,-1,-1, 2,17 } }, { 0, 3, {  3,18,18, 2,17 } },
+ { 0, 1, { 18,-1,17,18, 2 } }, { 0, 1, { 18,17, 2,18, 3 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 1, { 18,17, 2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 1, 2, { 17,17, 2,18, 3 } }, { 0, 1, { 18,17,-2, 2, 3 } },
+ { 0, 1, { 18,-2,18, 2, 5 } }, { 1, 4, { 18,-2,18, 2, 3 } },
+ { 1, 3, { 18,17, 2, 3, 6 } }, { 0, 2, { 18,18, 2,17, 3 } },
+ { 0, 2, { 18,17, 2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 1, 1, { 18,18, 3, 5, 2 } }, { 0, 2, { 18,18,-2, 2, 3 } },
+ { 1, 2, { 18,17, 2,17, 3 } }, { 0, 1, { 18,17, 2, 3,18 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 1, 4, { 18,18, 2, 3,-2 } },
+ { 0, 1, { 17,-2,17, 2,-3 } }, { 0, 1, { 17,17, 2,18, 4 } },
+ { 1, 1, { 18,18,18, 2, 4 } }, { 1, 2, { 18, 2,18, 3,-2 } },
+ { 1, 1, { 18,18,-2, 2,17 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18, 2,17, 3 } }, { 0, 2, { 18,18,18, 2, 4 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,17,-2, 3, 2 } },
+ { 0, 1, {  1,-1,-1, 2,17 } }, { 1, 4, { 18,18, 2, 3,-2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 1, { 18,-2,18, 3, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 1, { 18,18,-2, 2,17 } }, { 0, 3, { 18,17, 2, 3,17 } },
+ { 1, 2, { 18,18, 2,-2,18 } }, { 0, 1, { -1, 3, 5, 4, 7 } },
+ { 1, 1, { 18, 3, 1, 5, 4 } }, { 1, 1, { 18,18,-2,18, 3 } },
+ { 0, 2, { 18,17,18, 2,-2 } }, { 0, 2, { 18,18, 2,17, 3 } },
+ { 1, 2, { 18, 2,18, 3,-2 } }, { 1, 4, { 18,18, 2, 3,-2 } },
+ { 1, 3, { 18,17, 2, 3, 6 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 1, 2, { 18,17,-2,-1,17 } }, { 0, 1, { 17,-1, 2, 3, 6 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2, 2, 3 } },
+ { 1, 1, { 18,18,18, 2, 5 } }, { 0, 1, { 17,17,-2, 2, 3 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,17, 3, 6, 2 } },
+ { 0, 2, { 18,17,18, 2, 3 } }, { 0, 3, { 18,17,-3,18, 2 } },
+ { 0, 1, { 18,18,18, 2, 3 } }, { 0, 1, { 18,-2,-3, 2, 6 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 1, 1, { 18,17,18, 2, 5 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 1, 1, { 18,17,18, 2, 5 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 1, { 18,18,18, 2, 3 } }, { 1, 1, { 17,-2,17, 1, 2 } },
+ { 1, 1, { 17,17, 2,-1, 7 } }, { 0, 1, { 18,17, 4, 3, 1 } },
+ { 1, 3, { 18,-3,18, 2, 3 } }, { 0, 1, {  1,18, 3, 2, 5 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 1, { 18,18, 3, 6, 2 } }, { 0, 1, { 17,17, 2,18, 4 } },
+ { 0, 1, { 17,17, 2,18, 4 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 1, 2, { 18,-2,18, 3, 2 } }, { 1, 1, { 17,-2,17, 1, 2 } },
+ { 1, 1, { 18,18, 3, 2, 5 } }, { 0, 1, { 18,18,-1, 2, 3 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 1, { 18,17,18, 2, 5 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 1, {  3,18,18, 2,17 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+ { 0, 2, { 18,18,-2,18, 2 } }, { 0, 2, { 18,18,-2,18, 2 } },
+};
+
+static const WavPackDecorrSpec high_specs[] = {
+ { 1, 2, { 18,18,18,-2, 2, 3, 5,-1,17, 4 } }, { 0, 1, { 18,17,-2, 2,18, 3, 7, 2, 5, 4 } },
+ { 1, 2, {  1,18, 3, 6,-2,18, 2, 3, 4, 5 } }, { 0, 2, { 18,18,-2, 2,18, 3, 6, 2,17, 4 } },
+ { 1, 2, { 18,18, 2,18, 3, 2,-1, 4,18, 5 } }, { 1, 1, {  7, 6, 5, 3, 4, 2, 5, 4, 3, 7 } },
+ { 1, 1, { 17, 3,18, 7, 2, 6, 1, 4, 3, 5 } }, { 1, 1, { -2,18,18,18, 3,-2, 6, 5, 2, 1 } },
+ { 1, 2, { 18,18,-1,18, 2, 3, 6,-2,17, 5 } }, { 0, 1, { 17,17,18, 3, 6, 4, 5, 2,18,-2 } },
+ { 1, 2, {  1,18,-2, 3, 5, 2, 4,-1, 6, 1 } }, { 0, 2, { 18,18, 3, 6,18, 2, 4, 8, 5, 3 } },
+ { 0, 1, { -2, 1,18, 2,-2, 7,18, 2,-1, 5 } }, { 1, 1, {  4, 3, 8, 1, 5, 2, 5, 6, 2, 8 } },
+ { 1, 1, { 17,18, 2, 6, 3, 4,-1, 1, 8, 6 } }, { 0, 1, { 18,18, 3, 6, 3,-2, 2, 5,-1, 1 } },
+ { 0, 1, { 18,18,17,-1, 2,-2,18, 3, 4, 5 } }, { 1, 2, { 18,17, 2,-2,18, 3, 5, 7, 2, 4 } },
+ { 1, 2, { 18,18, 3, 6,-2,18, 2, 5, 8, 3 } }, { 0, 1, { 18,17, 2,18,18, 2, 6, 5,17, 7 } },
+ { 1, 2, { 18,17, 2,18, 3, 2, 6,18,-1, 4 } }, { 1, 1, {  5, 3, 6, 5, 3, 4, 1, 2, 4, 7 } },
+ { 1, 1, {  5, 3, 6, 5, 3, 4, 1, 2, 4, 7 } }, { 0, 1, { -2,18,18,18,-2, 3, 2, 4, 6, 5 } },
+ { 1, 2, { 18,17,-3, 3,-1,18, 2, 3, 6, 5 } }, { 0, 1, { 17,18, 7, 3,-2, 7, 1, 2, 4, 5 } },
+ { 1, 1, {  2,18,18,-2, 2, 4,-1,18, 3, 6 } }, { 0, 3, {  1,18, 4, 3, 5, 2, 4,18, 2, 3 } },
+ { 0, 1, { -2,18, 2,18, 3, 7,18, 2, 6,-2 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 18,18, 5, 4, 6, 4, 5, 1, 4, 3 } }, { 1, 1, { 18, 3, 6, 5, 7, 8, 2, 3, 1,-1 } },
+ { 1, 1, { 18,18,18, 2,-2, 3, 5,18, 2, 8 } }, { 0, 2, { 18,17,-2, 2, 3,18,-3, 5, 2, 7 } },
+ { 1, 1, {  1, 1,-1, 8,17, 3,-2, 2, 6,17 } }, { 0, 2, { 18,18,17, 2,-2, 3, 2, 4,18, 5 } },
+ { 1, 1, { 17,18, 2,-1, 5, 7,18, 3, 4, 6 } }, { 1, 1, {  5, 4, 5,17, 3, 6, 3, 4, 7, 2 } },
+ { 0, 1, { 17, 3, 1, 7, 4, 2, 5,-2,18, 6 } }, { 0, 1, { 17,18, 2,18, 4, 3, 5, 7,-3, 6 } },
+ { 1, 2, { 17,17,-3,-2, 2, 8,18,-1, 3, 5 } }, { 0, 1, { 17,17,18, 2, 3, 6,-2, 8, 1, 7 } },
+ { 1, 1, {  1, 2, 6,-2,18, 2, 5,-3, 7,-2 } }, { 0, 1, { 18,18, 3,18, 6, 8,-2, 2, 3, 5 } },
+ { 0, 1, { 18,17, 2,18,-2, 3, 7, 6, 2, 4 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 18,18, 2,-1, 3, 6, 1, 3, 4, 8 } }, { 0, 1, { 18,18, 3, 6, 5, 3,-2, 2,18,-1 } },
+ { 0, 1, { 18,17,-3,18, 2, 4,-2, 3, 6,17 } }, { 1, 3, {  1, 2,17, 3,18, 7,-1, 5, 2, 4 } },
+ { 1, 1, { 18, 3,18, 6, 8,18,-2, 5, 7, 2 } }, { 0, 1, { 17, 2,18, 6, 3, 2, 5, 4, 8, 1 } },
+ { 0, 1, { 18,17,-1, 2, 3,18,18, 2, 3,17 } }, { 1, 1, { 18, 7, 6, 5, 5, 3, 1, 4, 2, 4 } },
+ { 1, 1, {  6,17, 3, 8, 1, 5, 7,-1, 2, 1 } }, { 1, 1, { 18,-2,18, 3,-2, 2, 7, 4, 6,18 } },
+ { 1, 3, { 18,-3,18, 2, 3,18,-1, 7, 2, 5 } }, { 0, 2, { 18,-2, 7, 1, 3, 2, 4, 6,-3, 7 } },
+ { 1, 1, { 18,-2, 2,-3,18,-2,17,-1, 4, 2 } }, { 0, 3, { 17,17, 2, 5, 3, 7,18, 6, 4, 2 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 18,17, 4, 6, 6, 4, 5, 3, 4, 1 } }, { 0, 1, { 18, 5, 3, 6, 2, 3, 8, 1, 3, 7 } },
+ { 1, 2, { 18,17,-2, 2,18, 3, 5, 7,-1, 2 } }, { 0, 1, {  1,18,18, 3, 6,-1, 4, 8, 5, 2 } },
+ { 1, 1, {  1, 5, 3, 4, 1, 1, 3, 5, 7, 3 } }, { 0, 1, {  3,18,18, 2,18,18,-1, 2, 3,18 } },
+ { 1, 2, { 18,18,-1,18, 2, 3, 4, 6,18, 5 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 18, 3, 1, 4, 5, 2, 7, 1, 3, 6 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 2, { 18,18,-1,18, 2, 3, 5,-2, 6, 8 } }, { 1, 1, { 17,18, 4, 8, 3, 2, 5, 2, 7, 6 } },
+ { 1, 4, {  1, 2, 5,18,-2, 2, 3, 7,-1, 4 } }, { 0, 2, { 18,17,-1, 3, 6,18, 2, 3, 7, 5 } },
+ { 0, 1, { -2,18, 2,-3, 6,18, 4, 3,-2, 5 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { 17,17, 6, 2, 4, 8, 3, 5,-1,17 } }, { 1, 1, { 18, 3,18, 6, 8,18,-2, 5, 7, 2 } },
+ { 1, 2, { 17,17,-3, 2,18,-2, 8, 3, 6,-1 } }, { 1, 1, { 18,-2,17,18, 2, 3,-2, 6, 5, 4 } },
+ { 1, 2, { 18,17,-1, 3,18, 2, 5, 3, 6,-3 } }, { 0, 1, { 18,17, 2,18, 7,18, 2, 4, 3,17 } },
+ { 1, 3, { 18,18, 5, 6, 4, 3, 4,18, 6, 5 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, {  7, 6, 5, 3, 4, 2, 5, 4, 3, 7 } }, { 0, 1, { -2,18,18,18, 3, 6, 4, 2, 5, 2 } },
+ { 0, 3, { 18,17,-3,18, 3, 2, 5,-1,17, 3 } }, { 1, 1, { 17,18, 7, 3, 1, 7, 4, 2, 6, 5 } },
+ { 1, 1, { 18, 2,-2,-1,18, 5, 3,-2, 1, 2 } }, { 0, 3, { 18,18,-1, 3, 2, 7, 5,18, 4, 3 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 17,18, 2,-2, 4, 8,18, 3, 6, 5 } }, { 0, 2, { 18,17, 3, 5,-2, 7, 2,18, 3,-1 } },
+ { 1, 1, { 18, 2,-2,-1,18, 5, 3,-2, 1, 2 } }, { 0, 2, {  3,17,18,18, 2, 5, 7, 6,18, 3 } },
+ { 1, 1, { 17,18,18, 4, 3, 2,18, 7, 8,-1 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { 17, 1, 2, 3, 5, 6, 1, 4, 8,17 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 2, { 18,17,-1,18,-3, 2, 8, 3, 6,17 } }, { 1, 1, { 17,17, 1, 2, 4, 5,-1, 2, 1, 6 } },
+ { 1, 1, {  1, 2, 6,-2,18, 2,-3, 3,-2, 5 } }, { 0, 1, { 18, 3,18, 6,18, 5, 2, 4,-1, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 18,18,-1, 2,18, 3, 6, 4,-2, 7 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 2, { -1,18,18,18, 2,-2, 4, 7, 2, 3 } }, { 0, 3, {  3,17,-2, 5, 2, 7,18, 6, 4, 5 } },
+ { 0, 1, { 17, 6,18, 3, 8, 4, 5, 3, 8,18 } }, { 0, 2, { 18, 2, 6, 2,18, 3, 2, 4, 5, 8 } },
+ { 0, 1, {  3,18,18, 2,18,-1, 2,18, 2,17 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, {  3, 6,17,-2, 5, 1, 2, 7, 4, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 3, {  3,18,17, 5, 6, 2, 7,-2, 8,18 } }, { 1, 1, { 18,-1, 3, 1, 7, 2,-1, 4, 6,17 } },
+ { 1, 1, { 18, 2,-2,-1,18, 5, 3,-2, 1, 2 } }, { 0, 2, { 18, 1, 2,18, 3, 6, 5, 2, 4, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 17,-2, 2,18,18, 8, 5, 3, 2, 6 } }, { 0, 1, { 18,17, 2,18, 3, 2, 7,-2,18, 4 } },
+ { 1, 2, {  1,18, 2, 3,-1, 5, 6, 4, 7,17 } }, { 0, 2, { 18,17, 3, 6,-2, 2, 3, 8, 5,17 } },
+ { 0, 2, { 18,18, 3, 2,18,-1, 2, 4, 3,17 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 2, { 17,-1,18, 2, 3,-2, 5,18, 2, 7 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 2, { 18,-3,18, 2, 3,-2,18, 5, 6,-3 } }, { 0, 2, { 18,17, 3, 5,-2, 7, 2,18, 3,-1 } },
+ { 1, 1, {  1,18,-1, 2, 3, 1,-2, 8, 2, 5 } }, { 0, 1, { 18,18, 3, 6,18, 2, 3, 4, 8, 5 } },
+ { 0, 1, { -2, 1,18, 2,-2, 5, 7,18, 2,-1 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 17,18,-1, 2, 8, 3, 4, 5, 1, 7 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 2, { 18,18,-1, 2,18, 3,-2, 5, 4, 2 } }, { 1, 1, { 18,17, 2,18, 3, 8, 5, 2, 7,17 } },
+ { 0, 1, { 18,18, 3,18, 6, 8,-2, 2, 3, 5 } }, { 0, 1, { 18,18, 2,18, 2, 6,18, 2,17, 7 } },
+ { 1, 3, { 18,17,18, 2, 8,18, 5,-1, 3, 6 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 18, 7, 6, 5, 5, 3, 1, 4, 2, 4 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 2, { 18,17,-1, 3, 6,18, 2, 5, 8, 3 } }, { 0, 1, { 17,18,18, 4, 7, 2, 3,-2,18, 5 } },
+ { 1, 2, { 18, 1, 2, 6, 2, 5,18, 2, 4, 8 } }, { 0, 4, { 18, 4, 1, 2, 3, 5, 4, 1, 2, 6 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 2, { 18,17, 2,-1,18, 3,-3, 5, 2, 4 } },
+ { 0, 1, { 17,17, 3, 6, 3, 5,-2, 2,18,-1 } }, { 0, 2, { 18,18, 3,-2,18, 2,-3, 5, 3, 6 } },
+ { 1, 1, { 17,17, 2, 4, 1, 3, 5, 2, 6,-3 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { 17, 1, 3, 2, 7, 1, 6, 3, 4, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { 17,-1,18, 2, 1, 5, 3, 8,-1,-2 } }, { 1, 1, { 17,18,-1, 8, 2, 5, 3, 4, 1, 6 } },
+ { 1, 2, {  1,18, 3,-1, 5, 1, 2, 4, 7, 6 } }, { 0, 1, { 18,18, 3, 6, 5, 3,-2, 2,18,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, {  1,18,-1, 3, 8, 5, 6, 1, 2, 3 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 2, { 18,18, 2, 3, 6,18,-1, 4, 2, 3 } }, { 1, 1, {  1, 3, 5,18, 2, 6, 7, 2, 3, 1 } },
+ { 1, 1, {  1, 3, 8,18, 5, 2, 7, 1, 3,-2 } }, { 0, 2, { 17, 2,18, 3, 6, 2, 4, 5, 8, 3 } },
+ { 0, 1, { 18,17, 2,18, 3, 2, 7,-2,18, 4 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 2, { 18,-3,18,-1, 3,-2, 5, 7, 1, 2 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 3, { 18,18, 2, 6,18, 5,18, 2, 3,17 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 3, {  1,-1, 1, 3,-2, 2, 5, 7,-3,18 } }, { 1, 2, { 18, 7, 3,-3, 2, 8, 2, 5, 4,17 } },
+ { 1, 1, {  1, 4, 5, 1, 3, 4, 6, 7, 8, 3 } }, { 0, 1, { 18,17, 2,18,-1, 2, 3,18, 2, 4 } },
+ { 0, 2, { 18,18,-2,18, 2, 3, 4, 7, 5,17 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 17,18, 2, 1, 3, 2, 5, 1, 2, 3 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 2, { 18,18,-1, 2, 3, 5, 8, 6, 1,-2 } }, { 0, 1, { 17,18, 8, 3, 4, 6, 5, 2, 8, 7 } },
+ { 1, 2, {  1, 3,-2,18, 2, 5, 1, 7,-1,-2 } }, { 0, 3, { 18,17,-1, 3,18, 2, 3, 6, 4,17 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 2, { 18,18, 4,18, 6, 7, 8, 3,18, 2 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 2, { 17,-3,17, 2,-2, 8, 3,18, 4,-3 } }, { 1, 1, { 18,17, 3, 5, 6, 2, 8, 1, 3, 7 } },
+ { 0, 1, { 18,18, 3, 6, 5, 3,-2, 2,18,-1 } }, { 0, 3, { 18,18, 2, 6,18, 5,18, 2, 3,17 } },
+ { 1, 1, { 18,18, 5, 4, 6, 4, 5, 1, 4, 3 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 2, {  3,17,18,-3, 2, 5,18, 6,-1, 7 } }, { 1, 1, { 17,18, 3, 2, 5,-1, 6, 8, 4, 7 } },
+ { 1, 1, { 18, 1,-2, 3, 2, 1, 7, 6, 3, 4 } }, { 0, 3, {  1, 2,17, 3,18, 2, 7, 5, 4,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, { 17,-2, 2,18,18, 8, 5, 3, 2, 6 } }, { 0, 2, { 18, 5,18, 2, 3, 7,-2, 1, 6, 8 } },
+ { 0, 1, {  2,-1,18,-1, 2, 4,-3, 5,18, 3 } }, { 0, 1, {  3,17,18, 5, 2,18, 7, 3, 6, 5 } },
+ { 1, 4, {  1, 2, 5,18,-2, 2, 3, 7,-1, 4 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, {  1,18, 2, 1, 3, 4, 1, 5, 2, 7 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { 17,17,18, 2, 4, 5,18,-2, 6, 3 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 2, { 18,18,-1, 3, 5, 6, 8,18, 2, 3 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { 18,18, 4, 6, 8,18, 7, 3, 2, 5 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 2, { -1,18,18,18, 2, 4,-2, 2, 3, 6 } }, { 0, 2, { 18,-2, 7, 1, 3, 2, 4, 6,-3, 7 } },
+ { 1, 1, { 17,18, 8, 3, 4, 6,-2, 5, 3, 8 } }, { 0, 2, { 18, 1, 2, 6, 2, 8, 3,18, 5, 4 } },
+ { 1, 1, {  3,18,18, 2,18, 2,18, 3, 2,18 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 1, 1, {  3,17,18, 5, 2, 6, 7, 1, 4, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } }, { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2, 8 } },
+};
+
+static const WavPackDecorrSpec very_high_specs[] = {
+ { 1, 2, { 18,18, 2, 3,-2,18, 2, 4, 7, 5, 3, 6, 8,-1,18, 2 } },
+ { 0, 1, { 18,18,-1,18, 2, 3, 4, 6, 5, 7,18,-3, 8, 2,-1, 3 } },
+ { 1, 2, {  1,18,-2, 4,18, 2, 3, 6,-1, 7, 5,-2,18, 8, 2, 4 } },
+ { 0, 1, { 17,17, 2, 3, 4,18,-1, 5, 6, 7,18, 2, 8,17, 3,-2 } },
+ { 1, 1, { 18,18, 2,18, 3, 2,18, 4,-1, 3,18, 2, 6, 8,17, 5 } },
+ { 0, 2, { 18,17, 2, 3,-2, 5,18,-3, 2, 4, 7, 3, 6, 8, 5,17 } },
+ { 1, 1, { 18,-2, 2,-3,18, 5,-2,18, 2, 3, 6, 2,17, 4, 7,-1 } },
+ { 1, 1, { 17, 8,18, 3,-2, 2, 5, 4,18, 6, 3, 8, 7, 2, 5, 4 } },
+ { 0, 2, { 18,17,-2, 2,18, 3, 2, 5,-3, 4, 7,18, 3, 8, 6, 2 } },
+ { 1, 1, {  3, 6, 5, 5, 1, 3, 7, 4, 2, 6, 4,18, 3, 7, 5, 6 } },
+ { 1, 2, {  1,18, 3, 2,-2, 1, 5, 4, 6, 2, 7, 1, 8, 3,-1, 1 } },
+ { 0, 1, { 18,18, 2, 3, 6, 3, 5,-2, 2, 4,18, 3,-2,-1, 6, 7 } },
+ { 0, 1, { -2,18, 2,18, 7, 2, 6,-2, 3, 4,18,18, 2,-3, 8, 5 } },
+ { 0, 2, { 18,18,18, 2, 4, 3,18, 5, 3, 6,-2, 2, 4,18, 8, 7 } },
+ { 0, 1, { -2, 1,18, 2,-2,18,-1, 5, 7, 2, 3, 4,18, 2, 6, 2 } },
+ { 1, 1, { 17,18, 3, 2, 1, 7,-1, 2, 4, 3, 5, 6,-2,18, 7, 8 } },
+ { 1, 1, { 18,18, 2,18, 3, 4, 6,-2,18, 5, 8, 2, 3, 7, 4,-1 } },
+ { 0, 1, { 18,18,18,-1, 2, 3, 4, 6, 8,18, 3, 5, 2, 6, 7, 4 } },
+ { 1, 1, { 17,-2,18,18, 2, 5, 3, 8, 2,-1, 6, 1, 3, 4, 7, 5 } },
+ { 0, 1, { 17,17,18, 2, 3, 6,-2, 8, 1, 7, 5, 2, 3, 1, 4, 8 } },
+ { 1, 1, { 17,17, 3, 2, 7, 1, 4, 3, 6, 2, 5,-2, 8, 7,18, 6 } },
+ { 0, 1, { 18,17,-2, 2,18, 3,-3, 7, 6, 5, 2, 4,-1, 8, 3,17 } },
+ { 1, 1, {  2,18,18,-2, 2, 4,-1, 5,18, 3, 8, 6, 2, 7,17, 4 } },
+ { 0, 1, { 17, 3, 6, 8, 5, 4, 3, 8, 1,18, 7, 2, 4, 5, 6, 3 } },
+ { 1, 2, { 17,18, 4, 8, 3, 2, 5, 7, 6, 8, 2, 7,-2,18, 3, 4 } },
+ { 1, 1, {  6, 5, 5, 3, 4, 7, 3, 2, 4, 6, 3, 7, 1, 5, 2, 4 } },
+ { 1, 1, {  1,18,-1, 2, 1, 3, 8,-2, 2, 5, 6, 3, 8, 7,18, 4 } },
+ { 0, 1, {  1,17,-1,18, 3, 2, 5, 4, 6, 7, 8, 3, 4, 2, 1,-2 } },
+ { 0, 1, { 18, 2,18,18, 2,18, 6,-2,18, 7, 5, 4, 3, 2,18,-2 } },
+ { 0, 3, {  1, 4,18, 3, 2, 4, 1, 5, 2, 3, 6,18, 8, 7, 2, 4 } },
+ { 0, 1, { 17,-2, 1,-3, 2,18, 3,-2, 4,18, 3, 6, 7,-3, 2, 8 } },
+ { 1, 1, { 17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 } },
+ { 1, 2, { 18,-1,17,18, 2, 3,-2,18, 5, 8, 2, 4, 3, 7, 6,-1 } },
+ { 1, 1, { 18,18,18,-2, 4, 2, 3,18, 5, 8, 2, 4, 6, 7,-2, 3 } },
+ { 1, 2, { 18,18,-2,18,-1, 3, 2, 5,18,-2, 7, 2, 3, 4, 6, 8 } },
+ { 0, 1, { 17,18,-1, 2, 4,18, 8, 3, 6, 5, 7,-3, 2, 4, 3,17 } },
+ { 1, 1, { 18,18,17, 2,-1,18, 3, 2,18, 6, 5, 4,18, 7, 2,-1 } },
+ { 0, 2, {  1,18,-1,18, 3, 2, 4, 6,-3, 7,-1, 5, 1, 2, 3, 8 } },
+ { 1, 1, {  1,17,-2, 2,-3, 6, 3, 5, 1, 2, 7, 6, 8,-2, 4, 1 } },
+ { 0, 1, { 17,-1, 5, 1, 4, 3, 6, 2,-2,18, 3, 2, 4, 5, 8,-1 } },
+ { 0, 2, { 18,18,17, 2, 3,-2, 5,18, 2, 4, 7, 8, 6,17, 3, 5 } },
+ { 1, 1, {  1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 } },
+ { 1, 2, {  1,-1, 3, 2,18, 7,-2, 5, 2, 6, 4, 3,-1,18, 8, 7 } },
+ { 0, 2, { 18,17, 3,18, 2, 5, 4, 3, 6, 2, 7, 8,18, 3, 4, 5 } },
+ { 1, 1, {  3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 } },
+ { 0, 2, { 18,18, 3,-3,18, 2, 6, 5, 3, 7,18, 4,-2, 8, 2, 3 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 } },
+ { 1, 1, {  3, 6, 5, 5, 1, 3, 7, 4, 2, 6, 4,18, 3, 7, 5, 6 } },
+ { 0, 1, { 18,18,18, 2, 4,-1,18, 8,-1, 2, 3, 4, 6,-2, 1, 7 } },
+ { 1, 1, { 18,-2,17,18, 2, 6, 3,-2, 5, 4, 7, 1,-3, 8, 2, 6 } },
+ { 0, 1, { 17,18,18, 4, 2, 7, 3, 6,-2,18, 8, 4, 5, 2, 7,17 } },
+ { 1, 1, { 18,18, 5, 4, 6, 4, 1, 5, 4, 3, 2, 5, 6, 1, 4, 5 } },
+ { 0, 1, { 18,18,-2,18, 2,-3, 3, 8, 5,18, 6, 4, 3,-1, 7, 2 } },
+ { 1, 1, { 18, 2,-2,-3,18, 5, 2, 3,-2, 4, 6, 1,-3, 2, 7, 8 } },
+ { 0, 1, { 18, 3, 5, 8, 2, 6, 7, 3, 1, 5, 2,-1, 8, 6, 7, 4 } },
+ { 1, 1, {  4, 3, 8, 1, 5, 6, 2, 5, 8,-2, 2, 7, 3,18, 5, 4 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17, 3,18,18, 7, 2, 4,18, 6, 2, 3,-1, 8, 5,18,-3 } },
+ { 0, 1, {  3,17,18, 2,18, 6, 7,-3,18, 2, 5, 6, 3, 8, 7,-1 } },
+ { 1, 1, { 18,18, 2,18,18, 2,-1, 7, 3,18, 5, 2, 6, 4,-1,18 } },
+ { 0, 3, { 18, 3, 4, 1, 5, 2,18, 4, 2, 3,18, 7, 6, 1, 2, 4 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17, 1,18, 2, 3, 6, 4, 5, 7,18, 3, 8, 2, 4,-2,17 } },
+ { 1, 2, { 18,17, 2, 3, 5,18, 6,-2, 7, 3, 2, 4,18, 8,-1, 5 } },
+ { 0, 2, {  1,18,-1,18, 3, 2, 4, 6,-3, 7,-1, 5, 1, 2, 3, 8 } },
+ { 1, 1, {  1,18,-1, 8, 2, 6, 3,-2, 1, 2, 5, 4,-3, 8, 6, 3 } },
+ { 0, 1, { 18,18, 2,18, 2,18, 7, 6,18, 2,-2, 3, 5, 4,18, 8 } },
+ { 1, 2, { 18,17, 2, 3,18,-1, 2, 3, 6,18, 5, 4, 3, 7, 2, 8 } },
+ { 1, 2, { 18,18, 3,-2, 4,18, 5, 7, 6, 2, 4,-3, 8, 5,18, 3 } },
+ { 1, 1, { 17,-2,18,18, 2, 5, 3, 8, 2,-1, 6, 1, 3, 4, 7, 5 } },
+ { 1, 1, {  3,17,18, 5, 7, 2, 4, 6, 1, 8,-1, 3, 7, 4, 1, 2 } },
+ { 0, 2, {  1,-2, 2,18, 3, 5, 2, 4, 7,-1, 2, 3, 5,18,-2, 4 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, {  1, 2,-2, 6,18,-3, 2, 7, 3,-2, 5, 6, 1, 8, 2, 4 } },
+ { 0, 1, { 18,18,18, 3,-2, 6,18, 2, 4, 3, 5, 8, 7, 6, 2,-2 } },
+ { 1, 1, {  1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 } },
+ { 0, 1, {  3,17,18, 2, 5,18, 6, 7, 5,-2, 2, 4,18, 3, 6, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 2, { 17,-1,18, 2, 4,-1, 8, 3,18, 7,-3, 4, 5, 1, 2,-2 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 8, 6, 4, 5, 7,-1 } },
+ { 1, 1, { 18,18, 3, 6, 4, 8,-2, 2, 5, 3, 7,18, 6, 8, 4, 2 } },
+ { 1, 1, { 17,18,18,-2, 5, 2, 3, 1, 4,-1, 8, 6, 5, 3, 2,18 } },
+ { 1, 1, { 17,17, 1, 2, 4, 5, 2, 6,-1, 3, 1, 1,-2, 4, 2, 7 } },
+ { 1, 1, { 17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 } },
+ { 0, 1, { 18,17,-2,-3, 1, 2, 3, 2, 5, 4, 7,-3, 6,-2, 2, 1 } },
+ { 1, 1, {  1, 3, 5,18, 1, 2, 7, 3, 6, 2, 5, 8,-1, 1, 4, 7 } },
+ { 1, 1, { 17, 3, 6, 8, 1, 4, 5, 3,-2, 7, 2, 8, 5, 6,18, 3 } },
+ { 1, 1, { 17,18, 2, 4, 8,-2, 3, 1, 5, 6, 7, 1, 2, 3, 4, 7 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, {  3, 1, 8,18, 5, 2, 3,18, 6, 7,-2, 4, 3, 2, 8,18 } },
+ { 0, 1, { 18,17, 2,18, 3, 4,-1,18, 7, 6, 2, 8, 4,18,18, 5 } },
+ { 0, 1, { 18,18, 2,18,18, 2, 7,-2, 6, 5, 4, 3,18, 3, 2,17 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17, 8,18, 3, 2, 1, 5, 4, 6,-1, 3,-3, 8,18, 7, 2 } },
+ { 1, 2, { 18,17,18, 2, 3, 5,-2,18, 6,-1, 2, 3, 7, 4, 8,17 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 8, 6, 4, 5, 7,-1 } },
+ { 1, 2, { 18,18,-2,17, 2,18, 3, 4,18, 8, 7,-1, 2, 4, 5,17 } },
+ { 0, 2, { 17,-3,17, 3, 2,-2,18, 8, 4,-3, 2,18, 5, 3,-2, 6 } },
+ { 0, 1, { 18,18, 2,18,18, 2, 7,-2, 6, 5, 4, 3,18, 3, 2,17 } },
+ { 0, 2, {  1,18,-1, 3, 5, 2,-3,18, 7, 3,-1, 6, 4, 2,17, 5 } },
+ { 1, 1, { 17,-2,17, 2,-3, 1, 5,-1, 4, 6, 3, 2, 8, 7,-2, 5 } },
+ { 1, 1, {  1,18, 1, 3, 5, 8, 6, 2, 3,-1, 7, 1, 4, 8, 5,-3 } },
+ { 0, 2, {  3,18,18, 2,18,-2, 6, 5, 7, 2, 4,18, 3, 6,-3, 5 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, {  3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 } },
+ { 0, 4, { 18, 2,17, 3,18,-2, 2, 6,18, 2, 7, 3, 5, 4, 8,18 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { 18,18, 2, 3, 6, 3, 5,-2, 2, 4,18, 3,-2,-1, 6, 7 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17, 1, 2, 5, 3,-2, 1, 4, 3, 7, 6,-3, 2, 1, 1, 2 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 18,18,-2,18,-2, 2, 3, 6,18, 4,-1, 2, 3, 8, 1, 4 } },
+ { 1, 1, { 17,-2,17, 2,-3, 1, 5,-1, 4, 6, 3, 2, 8, 7,-2, 5 } },
+ { 0, 1, { 17,17,18, 3, 2,18,18, 6, 8, 2,-2, 3, 5, 4,17,18 } },
+ { 1, 1, {  1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 } },
+ { 1, 1, {  1, 3,-3,18,18, 6, 5,18, 2,-1, 3, 8, 7,-3, 4,17 } },
+ { 1, 1, { 18, 1, 2, 1, 3, 8, 7, 4, 1, 5, 2,-1,-3,18, 6, 2 } },
+ { 0, 1, { 18, 3, 5, 2, 6, 8,18, 5, 7, 2, 3,-1, 6, 7, 8, 5 } },
+ { 0, 2, { 18, 3,-2, 7, 8, 2, 5, 4,-3, 8, 3, 2,18, 5, 4, 6 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 3, {  1, 1, 2, 5, 2, 7, 4, 3,-1,18,-2, 8, 2, 1, 6, 7 } },
+ { 0, 1, {  3,17,18, 5, 2, 6, 7,18, 4, 5, 3, 6,18, 2, 7, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, {  1,18, 1, 2, 3, 5, 1, 2, 6, 7, 4, 3, 8, 1,17, 5 } },
+ { 1, 2, { 17,-1,18,-2, 2, 3, 5,18, 2, 4, 6, 7, 3,-1, 5, 8 } },
+ { 1, 1, { 18,18,-3,18,-2, 2, 3,-2,18, 6, 4, 5, 8, 3,17,-3 } },
+ { 1, 1, { 18, 7, 6, 5, 5, 3, 1, 4, 2, 7, 3, 4,-3, 6,18, 8 } },
+ { 0, 2, { 18,18, 2, 3, 5,18, 2, 4, 3, 6,18, 7, 8,-1, 5, 2 } },
+ { 0, 1, { 18,17,-1, 2,18, 3, 2,18, 4, 3,18, 2, 6, 5, 8,17 } },
+ { 0, 2, { 18,17, 2, 3,18, 5,-1, 6, 7, 8, 2, 3, 4, 5,18, 6 } },
+ { 1, 2, { 18,-3,18, 2, 3,-2,-3, 5,18, 7, 6, 2, 4, 3, 8,-2 } },
+ { 1, 1, { 17,18,18,-2, 2, 3, 5, 4, 8,18,-1, 5, 3, 6,-2, 7 } },
+ { 1, 2, { 18,17, 2,-2,18, 3,-1, 4,18, 2, 7, 5, 3, 8, 6, 4 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, {  1, 5, 1, 3, 4, 3, 7, 5, 1, 3, 6, 1, 2, 4, 3, 8 } },
+ { 0, 2, { 18,18, 3, 3,-2, 2, 5,18, 6, 3,-1, 4, 7,-1, 1, 2 } },
+ { 0, 1, { -2, 1,18, 2,-2, 5, 7,18, 3, 2, 6, 2,-1, 4,-2,17 } },
+ { 0, 2, { 18,18,18, 2, 3,-2,18, 5, 4, 2, 6, 8, 3,-2, 4,18 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17,18,-1, 3, 2, 5, 1, 3, 2, 8, 4, 7, 6, 2,-1, 5 } },
+ { 1, 1, { 17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 } },
+ { 0, 1, { 18,18,-2,18, 2, 3, 4, 5, 6,18, 8, 2, 3, 7,-2, 4 } },
+ { 0, 1, { 18,-2,18,18,-3,-2, 2, 3, 5, 8, 1, 2, 6, 4, 7,-1 } },
+ { 0, 1, { 18,17, 2,18, 3,-2, 2, 7, 6, 4,18, 3, 8, 7, 4, 2 } },
+ { 1, 1, { 17,18,18, 4, 2, 3, 7, 6,18, 8, 5,-1, 4, 2, 3,17 } },
+ { 1, 1, { 18,17,18, 2, 5, 3,-2,18, 6, 2, 3, 4, 8, 7, 5,-1 } },
+ { 0, 1, {  2,-1,18,-1, 2, 4,-3,18, 5, 3, 6,18, 2, 4, 7, 8 } },
+ { 1, 1, { 17,18, 8, 3, 6, 4,-1, 5, 2, 7, 3, 8, 6, 5,18, 4 } },
+ { 0, 2, { 18, 3,-2, 7, 8, 2, 5, 4,-3, 8, 3, 2,18, 5, 4, 6 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, {  1,18,-1, 8, 2, 6, 3,-2, 1, 2, 5, 4,-3, 8, 6, 3 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { 17,18,18, 4, 2, 7, 3, 6,-2,18, 8, 4, 5, 2, 7,17 } },
+ { 1, 2, { 18,-1,18, 3,-2,18, 2, 5, 3, 6, 7, 2,-1,18, 8, 4 } },
+ { 1, 2, {  1,18,-2, 4,18, 2, 3, 6,-1, 7, 5,-2,18, 8, 2, 4 } },
+ { 1, 2, {  1,18,-3, 2, 3,18,-1, 5, 6, 2, 8, 3, 4, 1,-2, 7 } },
+ { 0, 1, {  1,17,-1,18, 3, 2, 5, 4, 6, 7, 8, 3, 4, 2, 1,-2 } },
+ { 1, 1, { 18,17,18, 4, 3, 5, 1, 2, 6, 3, 4, 7, 1, 8, 5, 2 } },
+ { 0, 1, { 18,-2, 7, 1, 3, 2,-3, 4, 6,-2, 7, 8, 1, 5, 4, 3 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 2, { 18,18,18,-2, 2, 5, 3, 7,18, 2, 4,-3, 5, 6, 3, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 3, {  3,18,-1, 5, 2, 7,18, 6, 5, 2, 4, 3,-1, 7,18, 6 } },
+ { 0, 2, { 18,18,18, 4, 3, 2, 6, 4, 8,18, 5, 3, 2, 7,-2, 6 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 2, { 18,18,18, 2, 3,-2,18, 5, 4, 2, 6, 8, 3,-2, 4,18 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17, 8,18, 3, 2, 1, 5, 4, 6,-1, 3,-3, 8,18, 7, 2 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18,18, 2, 4, 6,-2, 2, 8, 3, 4,18, 7,-1, 6 } },
+ { 0, 1, { 18, 1,-2, 2, 4, 1, 3,-1, 2, 5, 7, 1, 6, 8,-2,17 } },
+ { 0, 1, { 17,17,18, 2, 5, 4,18, 3, 8, 7, 4, 6, 8, 1, 5, 2 } },
+ { 1, 2, { 18,18, 5, 4, 6, 3, 4,18, 8, 4,-1, 7, 5, 3, 6, 2 } },
+ { 0, 1, { 18,18,-3,18, 3, 6, 2, 5, 7,18, 3, 8,-1, 4, 5, 2 } },
+ { 1, 1, { 18, 2,-2,-3,18, 5, 2,-2, 4, 3, 6,18, 8,-1, 2, 7 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17, 1, 7, 2, 3,18,-2, 3, 6, 4, 2, 7, 8, 5, 3,17 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { 17,18, 3,18, 2, 5, 4, 7,-3, 6, 3, 2,18, 4, 7, 3 } },
+ { 1, 1, {  1, 7, 4, 5, 3, 4, 5, 1, 3, 6, 3, 2, 4, 8,-2, 7 } },
+ { 0, 1, {  1,18,-1,-2,18, 3, 2,-1, 6, 7, 4, 5, 3,18, 2,-3 } },
+ { 1, 1, { 18,18,-1, 3, 6,18, 5, 4, 8, 2, 3, 6,18, 7, 4,-2 } },
+ { 0, 2, { 18,18, 2, 6,18, 2,18, 5, 3,18, 2, 4, 7, 8, 3,18 } },
+ { 1, 1, {  3,18,18, 5,18, 6, 2, 4, 7,-2,18, 5, 8, 6, 3, 2 } },
+ { 0, 1, { 18,-2, 7, 1, 3, 2,-3, 4, 6,-2, 7, 8, 1, 5, 4, 3 } },
+ { 1, 1, { 18,-2,18, 2, 5,18, 3,-2, 4, 7, 2,-1, 8, 6, 5, 1 } },
+ { 1, 1, { 17,17, 5,18, 4, 1, 2, 8, 6, 4,-2, 3, 5,-1, 1, 8 } },
+ { 0, 2, {  1, 2,17, 3, 7,18, 2,-1, 4, 5,18, 2, 7, 3, 6, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, {  3, 6,17, 8, 7, 5,18,-1, 1, 2, 3, 4, 2, 6, 8, 1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 2, { 18,18,18, 2,-2, 3, 6, 4, 8,18, 2, 5, 7, 4, 3, 6 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 18, 1, 8, 3, 5, 6, 4,-1, 8, 3, 7,18, 2, 5, 8, 4 } },
+ { 1, 1, { 17,18, 5, 2, 4, 3, 1, 6,-2, 1, 3, 2, 4, 5,-1,17 } },
+ { 1, 1, { 18,17, 2,18, 3,-3, 7, 2, 6, 4, 3, 5,18, 8, 2,-2 } },
+ { 1, 1, { 18,17,18, 4, 3, 5,-1,18, 2, 7, 8, 4, 6, 3,18, 5 } },
+ { 0, 1, { 18,17,18,-2, 2,-3, 3, 4, 8, 5, 2,18, 6, 3, 7,-2 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 17,18, 8, 3, 4, 6,18, 5,-2, 3, 8, 5, 2, 4, 7, 6 } },
+ { 0, 1, { 18,-2, 3, 5, 1, 7, 3, 2, 6,-3, 4, 1, 5, 8, 3,-2 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, {  3,17,18, 5,-1,18, 2, 6, 7,18, 5, 3,-3,-1, 6, 2 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 3, { 18,17,-2, 3,-1,18, 2, 5, 3, 7, 6, 2, 4, 8,18, 5 } },
+ { 0, 1, { 18,-1,18, 2,18, 3, 5,18, 2, 8,18, 5, 4,-1, 6, 2 } },
+ { 1, 2, { 18,-2,18,18, 2, 3, 4,-3, 2, 5,18, 7, 4, 3, 8, 6 } },
+ { 0, 2, { 17,-1,18, 2,-1, 1, 7, 3, 8, 5,-2, 4, 1, 2,-3, 6 } },
+ { 0, 1, { 18,17, 2,18, 2,18, 6, 7, 4, 3,18, 5, 2,-2,17, 8 } },
+ { 0, 3, { 18,17, 2, 3,-3,-1,18, 2, 4, 5,18, 7, 3, 2,-3, 6 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 2, {  3,18,18,18, 2, 6, 5,18, 7, 2, 4, 6,18, 5, 3, 8 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { 18,18, 3, 6, 3,-2, 2,18, 5,-1, 7, 3, 4,-2, 2, 6 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 1, 1, { 18,17,18,18,-2, 2, 3,-3,18, 6, 4, 2,-2, 8, 3, 7 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { 18,18,18, 4, 2, 7, 8,18, 3, 2,-2, 4, 7, 6,17, 5 } },
+ { 1, 1, { 18,18,-1,-2, 8, 3,18, 6, 3, 5, 8, 2, 4, 7, 1, 6 } },
+ { 1, 1, {  1,-3, 3,18,18, 2,-1, 3, 6, 5,18, 4, 7,-2, 8, 3 } },
+ { 1, 1, {  1,18, 4, 2, 5,18, 1, 3,-1, 6, 1, 4, 8, 2, 5, 1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+ { 0, 1, { -1,18,18, 2,18, 3, 5,18, 2,18, 6, 8, 4, 5, 7,-1 } },
+};
+
+static const WavPackDecorrSpec *decorr_filters[] = {
+    &fast_specs[0], &default_specs[0], &high_specs[0], &very_high_specs[0],
+};
+
+static const uint16_t decorr_filter_sizes[] = {
+    FF_ARRAY_ELEMS(fast_specs),
+    FF_ARRAY_ELEMS(default_specs),
+    FF_ARRAY_ELEMS(high_specs),
+    FF_ARRAY_ELEMS(very_high_specs),
+};
+
+static const uint8_t decorr_filter_nterms[] = { 2, 5, 10, 16 };
+
+static const int8_t nbits_table[] = {
+ 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
+};
+
+#endif /* AVCODEC_WAVPACKENC_H */
diff --git a/libavcodec/webp.c b/libavcodec/webp.c
index b98fa4d..c737f54 100644
--- a/libavcodec/webp.c
+++ b/libavcodec/webp.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2013 Aneesh Dogra <aneesh@sugarlabs.org>
  * Copyright (c) 2013 Justin Ruggles <justin.ruggles@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,16 +31,20 @@
  * Lossless decoder
  * Compressed alpha for lossy
  *
+ * @author James Almer <jamrial@gmail.com>
+ * Exif metadata
+ *
  * Unimplemented:
  *   - Animation
  *   - ICC profile
- *   - Exif and XMP metadata
+ *   - XMP metadata
  */
 
 #define BITSTREAM_READER_LE
 #include "libavutil/imgutils.h"
 #include "avcodec.h"
 #include "bytestream.h"
+#include "exif.h"
 #include "internal.h"
 #include "get_bits.h"
 #include "thread.h"
@@ -191,6 +195,8 @@ typedef struct WebPContext {
     enum AlphaFilter alpha_filter;      /* filtering method for alpha chunk */
     uint8_t *alpha_data;                /* alpha chunk data */
     int alpha_data_size;                /* alpha chunk data size */
+    int has_exif;                       /* set after an EXIF chunk has been processed */
+    AVDictionary *exif_metadata;        /* EXIF chunk data */
     int width;                          /* image width */
     int height;                         /* image height */
     int lossless;                       /* indicates lossless or lossy */
@@ -303,7 +309,7 @@ static int huff_reader_build_canonical(HuffReader *r, int *code_lengths,
     if (max_code_length == 0 || max_code_length > MAX_HUFFMAN_CODE_LENGTH)
         return AVERROR(EINVAL);
 
-    codes = av_malloc(alphabet_size * sizeof(*codes));
+    codes = av_malloc_array(alphabet_size, sizeof(*codes));
     if (!codes)
         return AVERROR(ENOMEM);
 
@@ -1330,6 +1336,7 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     s->height    = 0;
     *got_frame   = 0;
     s->has_alpha = 0;
+    s->has_exif  = 0;
     bytestream2_init(&gb, avpkt->data, avpkt->size);
 
     if (bytestream2_get_bytes_left(&gb) < 12)
@@ -1349,6 +1356,7 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR_INVALIDDATA;
     }
 
+    av_dict_free(&s->exif_metadata);
     while (bytestream2_get_bytes_left(&gb) > 0) {
         char chunk_str[5] = { 0 };
 
@@ -1422,10 +1430,44 @@ static int webp_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
             break;
         }
+        case MKTAG('E', 'X', 'I', 'F'): {
+            int le, ifd_offset, exif_offset = bytestream2_tell(&gb);
+            GetByteContext exif_gb;
+
+            if (s->has_exif) {
+                av_log(avctx, AV_LOG_VERBOSE, "Ignoring extra EXIF chunk\n");
+                goto exif_end;
+            }
+            if (!(vp8x_flags & VP8X_FLAG_EXIF_METADATA))
+                av_log(avctx, AV_LOG_WARNING,
+                       "EXIF chunk present, but Exif bit not set in the "
+                       "VP8X header\n");
+
+            s->has_exif = 1;
+            bytestream2_init(&exif_gb, avpkt->data + exif_offset,
+                             avpkt->size - exif_offset);
+            if (ff_tdecode_header(&exif_gb, &le, &ifd_offset) < 0) {
+                av_log(avctx, AV_LOG_ERROR, "invalid TIFF header "
+                       "in Exif data\n");
+                goto exif_end;
+            }
+
+            bytestream2_seek(&exif_gb, ifd_offset, SEEK_SET);
+            if (avpriv_exif_decode_ifd(avctx, &exif_gb, le, 0, &s->exif_metadata) < 0) {
+                av_log(avctx, AV_LOG_ERROR, "error decoding Exif data\n");
+                goto exif_end;
+            }
+
+            av_dict_copy(avpriv_frame_get_metadatap(data), s->exif_metadata, 0);
+
+exif_end:
+            av_dict_free(&s->exif_metadata);
+            bytestream2_skip(&gb, chunk_size);
+            break;
+        }
         case MKTAG('I', 'C', 'C', 'P'):
         case MKTAG('A', 'N', 'I', 'M'):
         case MKTAG('A', 'N', 'M', 'F'):
-        case MKTAG('E', 'X', 'I', 'F'):
         case MKTAG('X', 'M', 'P', ' '):
             AV_WL32(chunk_str, chunk_type);
             av_log(avctx, AV_LOG_VERBOSE, "skipping unsupported chunk: %s\n",
diff --git a/libavcodec/webvttdec.c b/libavcodec/webvttdec.c
new file mode 100644
index 0000000..6b86bed
--- /dev/null
+++ b/libavcodec/webvttdec.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2012 Clément Bœsch
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * WebVTT subtitle decoder
+ * @see http://dev.w3.org/html5/webvtt/
+ * @todo need to support extended markups and cue settings
+ */
+
+#include "avcodec.h"
+#include "ass.h"
+#include "libavutil/bprint.h"
+
+static const struct {
+    const char *from;
+    const char *to;
+} webvtt_tag_replace[] = {
+    {"<i>", "{\\i1}"}, {"</i>", "{\\i0}"},
+    {"<b>", "{\\b1}"}, {"</b>", "{\\b0}"},
+    {"<u>", "{\\u1}"}, {"</u>", "{\\u0}"},
+    {"{", "\\{"}, {"}", "\\}"}, // escape to avoid ASS markup conflicts
+};
+
+static int webvtt_event_to_ass(AVBPrint *buf, const char *p)
+{
+    int i, skip = 0;
+
+    while (*p) {
+
+        for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
+            const char *from = webvtt_tag_replace[i].from;
+            const size_t len = strlen(from);
+            if (!strncmp(p, from, len)) {
+                av_bprintf(buf, "%s", webvtt_tag_replace[i].to);
+                p += len;
+                break;
+            }
+        }
+        if (!*p)
+            break;
+
+        if (*p == '<')
+            skip = 1;
+        else if (*p == '>')
+            skip = 0;
+        else if (p[0] == '\n' && p[1])
+            av_bprintf(buf, "\\N");
+        else if (!skip && *p != '\r')
+            av_bprint_chars(buf, *p, 1);
+        p++;
+    }
+    av_bprintf(buf, "\r\n");
+    return 0;
+}
+
+static int webvtt_decode_frame(AVCodecContext *avctx,
+                               void *data, int *got_sub_ptr, AVPacket *avpkt)
+{
+    AVSubtitle *sub = data;
+    const char *ptr = avpkt->data;
+    AVBPrint buf;
+
+    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+    if (ptr && avpkt->size > 0 && !webvtt_event_to_ass(&buf, ptr)) {
+        int ts_start     = av_rescale_q(avpkt->pts, avctx->time_base, (AVRational){1,100});
+        int ts_duration  = avpkt->duration != -1 ?
+                           av_rescale_q(avpkt->duration, avctx->time_base, (AVRational){1,100}) : -1;
+        ff_ass_add_rect(sub, buf.str, ts_start, ts_duration, 0);
+    }
+    *got_sub_ptr = sub->num_rects > 0;
+    av_bprint_finalize(&buf, NULL);
+    return avpkt->size;
+}
+
+AVCodec ff_webvtt_decoder = {
+    .name           = "webvtt",
+    .long_name      = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_WEBVTT,
+    .decode         = webvtt_decode_frame,
+    .init           = ff_ass_subtitle_header_default,
+};
diff --git a/libavcodec/webvttenc.c b/libavcodec/webvttenc.c
new file mode 100644
index 0000000..9f67a2e
--- /dev/null
+++ b/libavcodec/webvttenc.c
@@ -0,0 +1,219 @@
+/*
+ * WebVTT subtitle encoder
+ * Copyright (c) 2010  Aurelien Jacobs <aurel@gnuage.org>
+ * Copyright (c) 2014  Aman Gupta <ffmpeg@tmm1.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdarg.h>
+#include "avcodec.h"
+#include "libavutil/avstring.h"
+#include "libavutil/bprint.h"
+#include "ass_split.h"
+#include "ass.h"
+
+#define WEBVTT_STACK_SIZE 64
+typedef struct {
+    AVCodecContext *avctx;
+    ASSSplitContext *ass_ctx;
+    AVBPrint buffer;
+    unsigned timestamp_end;
+    int count;
+    char stack[WEBVTT_STACK_SIZE];
+    int stack_ptr;
+} WebVTTContext;
+
+#ifdef __GNUC__
+__attribute__ ((__format__ (__printf__, 2, 3)))
+#endif
+static void webvtt_print(WebVTTContext *s, const char *str, ...)
+{
+    va_list vargs;
+    va_start(vargs, str);
+    av_vbprintf(&s->buffer, str, vargs);
+    va_end(vargs);
+}
+
+static int webvtt_stack_push(WebVTTContext *s, const char c)
+{
+    if (s->stack_ptr >= WEBVTT_STACK_SIZE)
+        return -1;
+    s->stack[s->stack_ptr++] = c;
+    return 0;
+}
+
+static char webvtt_stack_pop(WebVTTContext *s)
+{
+    if (s->stack_ptr <= 0)
+        return 0;
+    return s->stack[--s->stack_ptr];
+}
+
+static int webvtt_stack_find(WebVTTContext *s, const char c)
+{
+    int i;
+    for (i = s->stack_ptr-1; i >= 0; i--)
+        if (s->stack[i] == c)
+            break;
+    return i;
+}
+
+static void webvtt_close_tag(WebVTTContext *s, char tag)
+{
+    webvtt_print(s, "</%c>", tag);
+}
+
+static void webvtt_stack_push_pop(WebVTTContext *s, const char c, int close)
+{
+    if (close) {
+        int i = c ? webvtt_stack_find(s, c) : 0;
+        if (i < 0)
+            return;
+        while (s->stack_ptr != i)
+            webvtt_close_tag(s, webvtt_stack_pop(s));
+    } else if (webvtt_stack_push(s, c) < 0)
+        av_log(s->avctx, AV_LOG_ERROR, "tag stack overflow\n");
+}
+
+static void webvtt_style_apply(WebVTTContext *s, const char *style)
+{
+    ASSStyle *st = ff_ass_style_get(s->ass_ctx, style);
+    if (st) {
+        if (st->bold != ASS_DEFAULT_BOLD) {
+            webvtt_print(s, "<b>");
+            webvtt_stack_push(s, 'b');
+        }
+        if (st->italic != ASS_DEFAULT_ITALIC) {
+            webvtt_print(s, "<i>");
+            webvtt_stack_push(s, 'i');
+        }
+        if (st->underline != ASS_DEFAULT_UNDERLINE) {
+            webvtt_print(s, "<u>");
+            webvtt_stack_push(s, 'u');
+        }
+    }
+}
+
+static void webvtt_text_cb(void *priv, const char *text, int len)
+{
+    WebVTTContext *s = priv;
+    av_bprint_append_data(&s->buffer, text, len);
+}
+
+static void webvtt_new_line_cb(void *priv, int forced)
+{
+    webvtt_print(priv, "\n");
+}
+
+static void webvtt_style_cb(void *priv, char style, int close)
+{
+    if (style == 's') // strikethrough unsupported
+        return;
+
+    webvtt_stack_push_pop(priv, style, close);
+    if (!close)
+        webvtt_print(priv, "<%c>", style);
+}
+
+static void webvtt_cancel_overrides_cb(void *priv, const char *style)
+{
+    webvtt_stack_push_pop(priv, 0, 1);
+    webvtt_style_apply(priv, style);
+}
+
+static void webvtt_end_cb(void *priv)
+{
+    webvtt_stack_push_pop(priv, 0, 1);
+}
+
+static const ASSCodesCallbacks webvtt_callbacks = {
+    .text             = webvtt_text_cb,
+    .new_line         = webvtt_new_line_cb,
+    .style            = webvtt_style_cb,
+    .color            = NULL,
+    .font_name        = NULL,
+    .font_size        = NULL,
+    .alignment        = NULL,
+    .cancel_overrides = webvtt_cancel_overrides_cb,
+    .move             = NULL,
+    .end              = webvtt_end_cb,
+};
+
+static int webvtt_encode_frame(AVCodecContext *avctx,
+                               unsigned char *buf, int bufsize, const AVSubtitle *sub)
+{
+    WebVTTContext *s = avctx->priv_data;
+    ASSDialog *dialog;
+    int i, num;
+
+    av_bprint_clear(&s->buffer);
+
+    for (i=0; i<sub->num_rects; i++) {
+        if (sub->rects[i]->type != SUBTITLE_ASS) {
+            av_log(avctx, AV_LOG_ERROR, "Only SUBTITLE_ASS type supported.\n");
+            return AVERROR(ENOSYS);
+        }
+
+        dialog = ff_ass_split_dialog(s->ass_ctx, sub->rects[i]->ass, 0, &num);
+        for (; dialog && num--; dialog++) {
+            webvtt_style_apply(s, dialog->style);
+            ff_ass_split_override_codes(&webvtt_callbacks, s, dialog->text);
+        }
+    }
+
+    if (!av_bprint_is_complete(&s->buffer))
+        return AVERROR(ENOMEM);
+    if (!s->buffer.len)
+        return 0;
+
+    if (s->buffer.len > bufsize) {
+        av_log(avctx, AV_LOG_ERROR, "Buffer too small for ASS event.\n");
+        return -1;
+    }
+    memcpy(buf, s->buffer.str, s->buffer.len);
+
+    return s->buffer.len;
+}
+
+static int webvtt_encode_close(AVCodecContext *avctx)
+{
+    WebVTTContext *s = avctx->priv_data;
+    ff_ass_split_free(s->ass_ctx);
+    av_bprint_finalize(&s->buffer, NULL);
+    return 0;
+}
+
+static av_cold int webvtt_encode_init(AVCodecContext *avctx)
+{
+    WebVTTContext *s = avctx->priv_data;
+    s->avctx = avctx;
+    s->ass_ctx = ff_ass_split(avctx->subtitle_header);
+    av_bprint_init(&s->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
+    return s->ass_ctx ? 0 : AVERROR_INVALIDDATA;
+}
+
+AVCodec ff_webvtt_encoder = {
+    .name           = "webvtt",
+    .long_name      = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
+    .type           = AVMEDIA_TYPE_SUBTITLE,
+    .id             = AV_CODEC_ID_WEBVTT,
+    .priv_data_size = sizeof(WebVTTContext),
+    .init           = webvtt_encode_init,
+    .encode_sub     = webvtt_encode_frame,
+    .close          = webvtt_encode_close,
+};
diff --git a/libavcodec/wma.c b/libavcodec/wma.c
index f5ea64a..f2519f5 100644
--- a/libavcodec/wma.c
+++ b/libavcodec/wma.c
@@ -1,21 +1,21 @@
 /*
  * WMA compatible codec
- * Copyright (c) 2002-2007 The Libav Project
+ * Copyright (c) 2002-2007 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -45,10 +45,10 @@ static av_cold void init_coef_vlc(VLC *vlc, uint16_t **prun_table,
 
     init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0);
 
-    run_table   = av_malloc(n * sizeof(uint16_t));
-    level_table = av_malloc(n * sizeof(uint16_t));
-    flevel_table= av_malloc(n * sizeof(*flevel_table));
-    int_table   = av_malloc(n * sizeof(uint16_t));
+    run_table   = av_malloc_array(n, sizeof(uint16_t));
+    level_table = av_malloc_array(n, sizeof(uint16_t));
+    flevel_table= av_malloc_array(n, sizeof(*flevel_table));
+    int_table   = av_malloc_array(n, sizeof(uint16_t));
     i = 2;
     level = 1;
     k = 0;
@@ -135,6 +135,10 @@ av_cold int ff_wma_init(AVCodecContext *avctx, int flags2)
 
     bps = (float)avctx->bit_rate / (float)(avctx->channels * avctx->sample_rate);
     s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2;
+    if (s->byte_offset_bits + 3 > MIN_CACHE_BITS) {
+        av_log(avctx, AV_LOG_ERROR, "byte_offset_bits %d is too large\n", s->byte_offset_bits);
+        return AVERROR_PATCHWELCOME;
+    }
 
     /* compute high frequency value and choose if noise coding should
        be activated */
@@ -382,9 +386,9 @@ int ff_wma_end(AVCodecContext *avctx)
     }
     for (i = 0; i < 2; i++) {
         ff_free_vlc(&s->coef_vlc[i]);
-        av_free(s->run_table[i]);
-        av_free(s->level_table[i]);
-        av_free(s->int_table[i]);
+        av_freep(&s->run_table[i]);
+        av_freep(&s->level_table[i]);
+        av_freep(&s->int_table[i]);
     }
 
     return 0;
diff --git a/libavcodec/wma.h b/libavcodec/wma.h
index 513ba3f..c4056ec 100644
--- a/libavcodec/wma.h
+++ b/libavcodec/wma.h
@@ -1,21 +1,21 @@
 /*
  * WMA compatible codec
- * Copyright (c) 2002-2007 The Libav Project
+ * Copyright (c) 2002-2007 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wma_common.c b/libavcodec/wma_common.c
index d1d8045..51467b2 100644
--- a/libavcodec/wma_common.c
+++ b/libavcodec/wma_common.c
@@ -1,20 +1,20 @@
 /*
  * common code shared by all WMA variants
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wma_common.h b/libavcodec/wma_common.h
index 61b1a35..55404af 100644
--- a/libavcodec/wma_common.h
+++ b/libavcodec/wma_common.h
@@ -1,20 +1,20 @@
 /*
  * common code shared by all WMA variants
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wmadata.h b/libavcodec/wmadata.h
index 07a1afe..381f182 100644
--- a/libavcodec/wmadata.h
+++ b/libavcodec/wmadata.h
@@ -1,21 +1,21 @@
 /*
  * WMA compatible decoder
- * copyright (c) 2002 The Libav Project
+ * copyright (c) 2002 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index 2dd5898..466c1a9 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -1,21 +1,21 @@
 /*
  * WMA compatible decoder
- * Copyright (c) 2002 The Libav Project
+ * Copyright (c) 2002 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -93,6 +93,16 @@ static av_cold int wma_decode_init(AVCodecContext * avctx)
     s->use_bit_reservoir = flags2 & 0x0002;
     s->use_variable_block_len = flags2 & 0x0004;
 
+    if(avctx->codec->id == AV_CODEC_ID_WMAV2 && avctx->extradata_size >= 8){
+        if(AV_RL16(extradata+4)==0xd && s->use_variable_block_len){
+            av_log(avctx, AV_LOG_WARNING, "Disabling use_variable_block_len, if this fails contact the ffmpeg developers and send us the file\n");
+            s->use_variable_block_len= 0; // this fixes issue1503
+        }
+    }
+
+    for (i=0; i<MAX_CHANNELS; i++)
+        s->max_exponent[i] = 1.0;
+
     if(ff_wma_init(avctx, flags2)<0)
         return -1;
 
@@ -472,6 +482,11 @@ static int wma_decode_block(WMACodecContext *s)
         s->block_len_bits = s->frame_len_bits;
     }
 
+    if (s->frame_len_bits - s->block_len_bits >= s->nb_block_sizes){
+        av_log(s->avctx, AV_LOG_ERROR, "block_len_bits not initialized to a valid value\n");
+        return -1;
+    }
+
     /* now check if the block length is coherent with the frame length */
     s->block_len = 1 << s->block_len_bits;
     if ((s->block_pos + s->block_len) > s->frame_len){
@@ -500,6 +515,10 @@ static int wma_decode_block(WMACodecContext *s)
        coef escape coding */
     total_gain = 1;
     for(;;) {
+        if (get_bits_left(&s->gb) < 7) {
+            av_log(s->avctx, AV_LOG_ERROR, "total_gain overread\n");
+            return AVERROR_INVALIDDATA;
+        }
         a = get_bits(&s->gb, 7);
         total_gain += a;
         if (a != 127)
@@ -812,7 +831,8 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
                buf_size, avctx->block_align);
         return AVERROR_INVALIDDATA;
     }
-    buf_size = avctx->block_align;
+    if(avctx->block_align)
+        buf_size = avctx->block_align;
 
     init_get_bits(&s->gb, buf, buf_size*8);
 
@@ -820,16 +840,18 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
         /* read super frame header */
         skip_bits(&s->gb, 4); /* super frame index */
         nb_frames = get_bits(&s->gb, 4) - (s->last_superframe_len <= 0);
+        if (nb_frames <= 0) {
+            av_log(avctx, AV_LOG_ERROR, "nb_frames is %d\n", nb_frames);
+            return AVERROR_INVALIDDATA;
+        }
     } else {
         nb_frames = 1;
     }
 
     /* get output buffer */
     frame->nb_samples = nb_frames * s->frame_len;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples = (float **)frame->extended_data;
     samples_offset = 0;
 
@@ -905,13 +927,13 @@ static int wma_decode_superframe(AVCodecContext *avctx, void *data,
         samples_offset += s->frame_len;
     }
 
-    av_dlog(s->avctx, "%d %d %d %d outbytes:%td eaten:%d\n",
+    av_dlog(s->avctx, "%d %d %d %d outbytes:%"PTRDIFF_SPECIFIER" eaten:%d\n",
             s->frame_len_bits, s->block_len_bits, s->frame_len, s->block_len,
             (int8_t *)samples - (int8_t *)data, avctx->block_align);
 
     *got_frame_ptr = 1;
 
-    return avctx->block_align;
+    return buf_size;
  fail:
     /* when error, we reset the bit reservoir */
     s->last_superframe_len = 0;
@@ -926,6 +948,7 @@ static av_cold void flush(AVCodecContext *avctx)
     s->last_superframe_len= 0;
 }
 
+#if CONFIG_WMAV1_DECODER
 AVCodec ff_wmav1_decoder = {
     .name           = "wmav1",
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
@@ -940,7 +963,8 @@ AVCodec ff_wmav1_decoder = {
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
-
+#endif
+#if CONFIG_WMAV2_DECODER
 AVCodec ff_wmav2_decoder = {
     .name           = "wmav2",
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
@@ -955,3 +979,4 @@ AVCodec ff_wmav2_decoder = {
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
+#endif
diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c
index 899cae0..895a180 100644
--- a/libavcodec/wmaenc.c
+++ b/libavcodec/wmaenc.c
@@ -2,20 +2,20 @@
  * WMA compatible encoder
  * Copyright (c) 2007 Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -23,9 +23,7 @@
 #include "avcodec.h"
 #include "internal.h"
 #include "wma.h"
-
-#undef NDEBUG
-#include <assert.h>
+#include "libavutil/avassert.h"
 
 
 static av_cold int encode_init(AVCodecContext *avctx)
@@ -37,13 +35,13 @@ static av_cold int encode_init(AVCodecContext *avctx)
     s->avctx = avctx;
 
     if(avctx->channels > MAX_CHANNELS) {
-        av_log(avctx, AV_LOG_ERROR, "too many channels: got %i, need %i or fewer",
+        av_log(avctx, AV_LOG_ERROR, "too many channels: got %i, need %i or fewer\n",
                avctx->channels, MAX_CHANNELS);
         return AVERROR(EINVAL);
     }
 
     if (avctx->sample_rate > 48000) {
-        av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz",
+        av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n",
                avctx->sample_rate);
         return AVERROR(EINVAL);
     }
@@ -68,7 +66,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
         AV_WL32(extradata, flags1);
         AV_WL16(extradata+4, flags2);
     }else
-        assert(0);
+        av_assert0(0);
     avctx->extradata= extradata;
     s->use_exp_vlc = flags2 & 0x0001;
     s->use_bit_reservoir = flags2 & 0x0002;
@@ -86,8 +84,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
                          (avctx->sample_rate * 8);
     block_align        = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
     avctx->block_align = block_align;
-    avctx->bit_rate    = avctx->block_align * 8LL * avctx->sample_rate /
-                         s->frame_len;
+
     avctx->frame_size = avctx->delay = s->frame_len;
 
     return 0;
@@ -147,7 +144,7 @@ static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param){
     q_end = q + s->block_len;
     if (s->version == 1) {
         last_exp= *exp_param++;
-        assert(last_exp-10 >= 0 && last_exp-10 < 32);
+        av_assert0(last_exp-10 >= 0 && last_exp-10 < 32);
         put_bits(&s->pb, 5, last_exp - 10);
         q+= *ptr++;
     }else
@@ -155,7 +152,7 @@ static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param){
     while (q < q_end) {
         int exp = *exp_param++;
         int code = exp - last_exp + 60;
-        assert(code >= 0 && code < 120);
+        av_assert1(code >= 0 && code < 120);
         put_bits(&s->pb, ff_aac_scalefactor_bits[code], ff_aac_scalefactor_code[code]);
         /* XXX: use a table */
         q+= *ptr++;
@@ -171,7 +168,7 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
 
     //FIXME remove duplication relative to decoder
     if (s->use_variable_block_len) {
-        assert(0); //FIXME not implemented
+        av_assert0(0); //FIXME not implemented
     }else{
         /* fixed block len */
         s->next_block_len_bits = s->frame_len_bits;
@@ -180,7 +177,7 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
     }
 
     s->block_len = 1 << s->block_len_bits;
-//     assert((s->block_pos + s->block_len) <= s->frame_len);
+//     av_assert0((s->block_pos + s->block_len) <= s->frame_len);
     bsize = s->frame_len_bits - s->block_len_bits;
 
     //FIXME factor
@@ -218,7 +215,7 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
             mult *= mdct_norm;
             coefs = src_coefs[ch];
             if (s->use_noise_coding && 0) {
-                assert(0); //FIXME not implemented
+                av_assert0(0); //FIXME not implemented
             } else {
                 coefs += s->coefs_start;
                 n = nb_coefs[ch];
@@ -274,13 +271,13 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
                 if (s->use_exp_vlc) {
                     encode_exp_vlc(s, ch, fixed_exp);
                 } else {
-                    assert(0); //FIXME not implemented
+                    av_assert0(0); //FIXME not implemented
 //                    encode_exp_lsp(s, ch);
                 }
             }
         }
     } else {
-        assert(0); //FIXME not implemented
+        av_assert0(0); //FIXME not implemented
     }
 
     for (ch = 0; ch < s->avctx->channels; ch++) {
@@ -302,7 +299,7 @@ static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
                             code= run + s->int_table[tindex][abs_level-1];
                     }
 
-                    assert(code < s->coef_vlcs[tindex]->n);
+                    av_assert2(code < s->coef_vlcs[tindex]->n);
                     put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code], s->coef_vlcs[tindex]->huffcodes[code]);
 
                     if(code == 0){
@@ -332,7 +329,7 @@ static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
     init_put_bits(&s->pb, buf, buf_size);
 
     if (s->use_bit_reservoir) {
-        assert(0);//FIXME not implemented
+        av_assert0(0);//FIXME not implemented
     }else{
         if(encode_block(s, src_coefs, total_gain) < 0)
             return INT_MAX;
@@ -347,7 +344,7 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
                              const AVFrame *frame, int *got_packet_ptr)
 {
     WMACodecContext *s = avctx->priv_data;
-    int i, total_gain, ret;
+    int i, total_gain, ret, error;
 
     s->block_len_bits= s->frame_len_bits; //required by non variable block len
     s->block_len = 1 << s->block_len_bits;
@@ -366,46 +363,32 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
         }
     }
 
-    if ((ret = ff_alloc_packet(avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE))) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
+    if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0)
         return ret;
-    }
 
-#if 1
     total_gain= 128;
     for(i=64; i; i>>=1){
-        int error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
+        error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
                                  total_gain - i);
-        if(error<0)
+        if(error<=0)
             total_gain-= i;
     }
-#else
-    total_gain= 90;
-    best = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain);
-    for(i=32; i; i>>=1){
-        int scoreL = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain - i);
-        int scoreR = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain + i);
-        av_log(NULL, AV_LOG_ERROR, "%d %d %d (%d)\n", scoreL, best, scoreR, total_gain);
-        if(scoreL < FFMIN(best, scoreR)){
-            best = scoreL;
-            total_gain -= i;
-        }else if(scoreR < best){
-            best = scoreR;
-            total_gain += i;
-        }
-    }
-#endif
 
-    if ((i = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain)) >= 0) {
-        av_log(avctx, AV_LOG_ERROR, "required frame size too large. please "
-               "use a higher bit rate.\n");
+    while(total_gain <= 128 && error > 0)
+        error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++);
+    if (error > 0) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n");
+        avpkt->size = 0;
         return AVERROR(EINVAL);
     }
-    assert((put_bits_count(&s->pb) & 7) == 0);
-    while (i++)
+    av_assert0((put_bits_count(&s->pb) & 7) == 0);
+    i= avctx->block_align - (put_bits_count(&s->pb)+7)/8;
+    av_assert0(i>=0);
+    while(i--)
         put_bits(&s->pb, 8, 'N');
 
     flush_put_bits(&s->pb);
+    av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align);
 
     if (frame->pts != AV_NOPTS_VALUE)
         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
@@ -415,6 +398,7 @@ static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
     return 0;
 }
 
+#if CONFIG_WMAV1_ENCODER
 AVCodec ff_wmav1_encoder = {
     .name           = "wmav1",
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
@@ -427,7 +411,8 @@ AVCodec ff_wmav1_encoder = {
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
                                                      AV_SAMPLE_FMT_NONE },
 };
-
+#endif
+#if CONFIG_WMAV2_ENCODER
 AVCodec ff_wmav2_encoder = {
     .name           = "wmav2",
     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
@@ -440,3 +425,4 @@ AVCodec ff_wmav2_encoder = {
     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
                                                      AV_SAMPLE_FMT_NONE },
 };
+#endif
diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c
index 6ee27d3..8e2ac5f 100644
--- a/libavcodec/wmalosslessdec.c
+++ b/libavcodec/wmalosslessdec.c
@@ -5,20 +5,20 @@
  * Copyright (c) 2011 Andreas Öman
  * Copyright (c) 2011 - 2012 Mashiat Sarker Shakkhar
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@
 #include "internal.h"
 #include "get_bits.h"
 #include "put_bits.h"
+#include "lossless_audiodsp.h"
 #include "wma.h"
 #include "wma_common.h"
 
@@ -46,6 +47,7 @@
 #define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS)    ///< maximum block size
 #define WMALL_BLOCK_SIZES    (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes
 
+#define WMALL_COEFF_PAD_SIZE   16                       ///< pad coef buffers with 0 for use with SIMD
 
 /**
  * @brief frame-specific decoder context for a single channel
@@ -69,6 +71,7 @@ typedef struct WmallDecodeCtx {
     /* generic decoder variables */
     AVCodecContext  *avctx;
     AVFrame         *frame;
+    LLAudDSPContext dsp;                           ///< accelerated DSP functions
     uint8_t         frame_data[MAX_FRAMESIZE + FF_INPUT_BUFFER_PADDING_SIZE];  ///< compressed frame data
     PutBitContext   pb;                             ///< context for filling the frame_data buffer
 
@@ -125,12 +128,12 @@ typedef struct WmallDecodeCtx {
     int8_t  acfilter_order;
     int8_t  acfilter_scaling;
     int64_t acfilter_coeffs[16];
-    int     acfilter_prevvalues[2][16];
+    int     acfilter_prevvalues[WMALL_MAX_CHANNELS][16];
 
     int8_t  mclms_order;
     int8_t  mclms_scaling;
-    int16_t mclms_coeffs[128];
-    int16_t mclms_coeffs_cur[4];
+    int16_t mclms_coeffs[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS * 32];
+    int16_t mclms_coeffs_cur[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS];
     int16_t mclms_prevvalues[WMALL_MAX_CHANNELS * 2 * 32];
     int16_t mclms_updates[WMALL_MAX_CHANNELS * 2 * 32];
     int     mclms_recent;
@@ -143,33 +146,33 @@ typedef struct WmallDecodeCtx {
         int scaling;
         int coefsend;
         int bitsend;
-        int16_t coefs[MAX_ORDER];
-        int16_t lms_prevvalues[MAX_ORDER * 2];
-        int16_t lms_updates[MAX_ORDER * 2];
+        DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+        DECLARE_ALIGNED(16, int16_t, lms_prevvalues)[MAX_ORDER * 2];
+        DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2];
         int recent;
-    } cdlms[2][9];
+    } cdlms[WMALL_MAX_CHANNELS][9];
 
-    int cdlms_ttl[2];
+    int cdlms_ttl[WMALL_MAX_CHANNELS];
 
     int bV3RTM;
 
-    int is_channel_coded[2];
-    int update_speed[2];
+    int is_channel_coded[WMALL_MAX_CHANNELS];
+    int update_speed[WMALL_MAX_CHANNELS];
 
-    int transient[2];
-    int transient_pos[2];
+    int transient[WMALL_MAX_CHANNELS];
+    int transient_pos[WMALL_MAX_CHANNELS];
     int seekable_tile;
 
-    int ave_sum[2];
+    int ave_sum[WMALL_MAX_CHANNELS];
 
-    int channel_residues[2][WMALL_BLOCK_MAX_SIZE];
+    int channel_residues[WMALL_MAX_CHANNELS][WMALL_BLOCK_MAX_SIZE];
 
-    int lpc_coefs[2][40];
+    int lpc_coefs[WMALL_MAX_CHANNELS][40];
     int lpc_order;
     int lpc_scaling;
     int lpc_intbits;
 
-    int channel_coeffs[2][WMALL_BLOCK_MAX_SIZE];
+    int channel_coeffs[WMALL_MAX_CHANNELS][WMALL_BLOCK_MAX_SIZE];
 } WmallDecodeCtx;
 
 
@@ -180,7 +183,13 @@ static av_cold int decode_init(AVCodecContext *avctx)
     unsigned int channel_mask;
     int i, log2_max_num_subframes;
 
+    if (!avctx->block_align) {
+        av_log(avctx, AV_LOG_ERROR, "block_align is not set\n");
+        return AVERROR(EINVAL);
+    }
+
     s->avctx = avctx;
+    ff_llauddsp_init(&s->dsp);
     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 
     if (avctx->extradata_size >= 18) {
@@ -345,11 +354,11 @@ static int decode_tilehdr(WmallDecodeCtx *s)
             if (num_samples[c] == min_channel_len) {
                 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
                    (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe)) {
-                    contains_subframe[c] = in_use = 1;
+                    contains_subframe[c] = 1;
                 } else {
-                    if (get_bits1(&s->gb))
-                        contains_subframe[c] = in_use = 1;
+                    contains_subframe[c] = get_bits1(&s->gb);
                 }
+                in_use |= contains_subframe[c];
             } else
                 contains_subframe[c] = 0;
         }
@@ -454,6 +463,13 @@ static int decode_cdlms(WmallDecodeCtx *s)
                 s->cdlms[0][0].order = 0;
                 return AVERROR_INVALIDDATA;
             }
+            if(s->cdlms[c][i].order & 8) {
+                static int warned;
+                if(!warned)
+                    avpriv_request_sample(s->avctx, "CDLMS of order %d",
+                                          s->cdlms[c][i].order);
+                warned = 1;
+            }
         }
 
         for (i = 0; i < s->cdlms_ttl[c]; i++)
@@ -479,6 +495,10 @@ static int decode_cdlms(WmallDecodeCtx *s)
                         (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
             }
         }
+
+        for (i = 0; i < s->cdlms_ttl[c]; i++)
+            memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order,
+                   0, WMALL_COEFF_PAD_SIZE);
     }
 
     return 0;
@@ -505,9 +525,9 @@ static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
 
     if (s->seekable_tile) {
         if (s->do_inter_ch_decorr)
-            s->channel_residues[ch][0] = get_sbits(&s->gb, s->bits_per_sample + 1);
+            s->channel_residues[ch][0] = get_sbits_long(&s->gb, s->bits_per_sample + 1);
         else
-            s->channel_residues[ch][0] = get_sbits(&s->gb, s->bits_per_sample);
+            s->channel_residues[ch][0] = get_sbits_long(&s->gb, s->bits_per_sample);
         i++;
     }
     for (; i < tile_size; i++) {
@@ -525,7 +545,7 @@ static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
             residue = quo;
         else {
             rem_bits = av_ceil_log2(ave_mean);
-            rem      = rem_bits ? get_bits_long(&s->gb, rem_bits) : 0;
+            rem      = get_bits_long(&s->gb, rem_bits);
             residue  = (quo << rem_bits) + rem;
         }
 
@@ -649,10 +669,10 @@ static void mclms_update(WmallDecodeCtx *s, int icoef, int *pred)
     if (s->mclms_recent == 0) {
         memcpy(&s->mclms_prevvalues[order * num_channels],
                s->mclms_prevvalues,
-               2 * order * num_channels);
+               sizeof(int16_t) * order * num_channels);
         memcpy(&s->mclms_updates[order * num_channels],
                s->mclms_updates,
-               2 * order * num_channels);
+               sizeof(int16_t) * order * num_channels);
         s->mclms_recent = num_channels * order;
     }
 }
@@ -688,35 +708,11 @@ static void revert_mclms(WmallDecodeCtx *s, int tile_size)
     }
 }
 
-static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
-{
-    int pred = 0, icoef;
-    int recent = s->cdlms[ich][ilms].recent;
-
-    for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
-        pred += s->cdlms[ich][ilms].coefs[icoef] *
-                s->cdlms[ich][ilms].lms_prevvalues[icoef + recent];
-
-    return pred;
-}
-
-static void lms_update(WmallDecodeCtx *s, int ich, int ilms,
-                       int input, int residue)
+static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input)
 {
-    int icoef;
     int recent = s->cdlms[ich][ilms].recent;
     int range  = 1 << s->bits_per_sample - 1;
 
-    if (residue < 0) {
-        for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
-            s->cdlms[ich][ilms].coefs[icoef] -=
-                s->cdlms[ich][ilms].lms_updates[icoef + recent];
-    } else if (residue > 0) {
-        for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
-            s->cdlms[ich][ilms].coefs[icoef] +=
-                s->cdlms[ich][ilms].lms_updates[icoef + recent];
-    }
-
     if (recent)
         recent--;
     else {
@@ -777,6 +773,9 @@ static void use_normal_update_speed(WmallDecodeCtx *s, int ich)
     s->update_speed[ich] = 8;
 }
 
+/** Get sign of integer (1 for positive, -1 for negative and 0 for zero) */
+#define WMASIGN(x) ((x > 0) - (x < 0))
+
 static void revert_cdlms(WmallDecodeCtx *s, int ch,
                          int coef_begin, int coef_end)
 {
@@ -787,12 +786,19 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch,
         for (icoef = coef_begin; icoef < coef_end; icoef++) {
             pred = 1 << (s->cdlms[ch][ilms].scaling - 1);
             residue = s->channel_residues[ch][icoef];
-            pred += lms_predict(s, ch, ilms);
+            pred += s->dsp.scalarproduct_and_madd_int16(s->cdlms[ch][ilms].coefs,
+                                                        s->cdlms[ch][ilms].lms_prevvalues
+                                                            + s->cdlms[ch][ilms].recent,
+                                                        s->cdlms[ch][ilms].lms_updates
+                                                            + s->cdlms[ch][ilms].recent,
+                                                        s->cdlms[ch][ilms].order,
+                                                        WMASIGN(residue));
             input = residue + (pred >> s->cdlms[ch][ilms].scaling);
-            lms_update(s, ch, ilms, input, residue);
+            lms_update(s, ch, ilms, input);
             s->channel_residues[ch][icoef] = input;
         }
     }
+    emms_c();
 }
 
 static void revert_inter_ch_decorr(WmallDecodeCtx *s, int tile_size)
@@ -955,7 +961,7 @@ static int decode_subframe(WmallDecodeCtx *s)
                 bits * s->num_channels * subframe_len, get_bits_count(&s->gb));
         for (i = 0; i < s->num_channels; i++)
             for (j = 0; j < subframe_len; j++)
-                s->channel_coeffs[i][j] = get_sbits(&s->gb, bits);
+                s->channel_coeffs[i][j] = get_sbits_long(&s->gb, bits);
     } else {
         for (i = 0; i < s->num_channels; i++)
             if (s->is_channel_coded[i]) {
@@ -1022,8 +1028,6 @@ static int decode_frame(WmallDecodeCtx *s)
     s->frame->nb_samples = s->samples_per_frame;
     if ((ret = ff_get_buffer(s->avctx, s->frame, 0)) < 0) {
         /* return an error if no frame could be decoded at all */
-        av_log(s->avctx, AV_LOG_ERROR,
-               "not enough space for the output samples\n");
         s->packet_loss = 1;
         return ret;
     }
@@ -1037,9 +1041,10 @@ static int decode_frame(WmallDecodeCtx *s)
         len = get_bits(gb, s->log2_frame_size);
 
     /* decode tile information */
-    if (decode_tilehdr(s)) {
+    if ((ret = decode_tilehdr(s))) {
         s->packet_loss = 1;
-        return 0;
+        av_frame_unref(s->frame);
+        return ret;
     }
 
     /* read drc info */
@@ -1074,8 +1079,11 @@ static int decode_frame(WmallDecodeCtx *s)
 
     /* decode all subframes */
     while (!s->parsed_all_subframes) {
+        int decoded_samples = s->channel[0].decoded_samples;
         if (decode_subframe(s) < 0) {
             s->packet_loss = 1;
+            if (s->frame->nb_samples)
+                s->frame->nb_samples = decoded_samples;
             return 0;
         }
     }
@@ -1182,9 +1190,13 @@ static int decode_packet(AVCodecContext *avctx, void *data, int *got_frame_ptr,
     if (s->packet_done || s->packet_loss) {
         s->packet_done = 0;
 
-        /* sanity check for the buffer length */
-        if (buf_size < avctx->block_align)
+        if (!buf_size)
             return 0;
+        /* sanity check for the buffer length */
+        if (buf_size < avctx->block_align) {
+            av_log(avctx, AV_LOG_ERROR, "buf size %d invalid\n", buf_size);
+            return AVERROR_INVALIDDATA;
+        }
 
         s->next_packet_start = buf_size - avctx->block_align;
         buf_size             = avctx->block_align;
diff --git a/libavcodec/wmaprodata.h b/libavcodec/wmaprodata.h
index f8a52bf..5382479 100644
--- a/libavcodec/wmaprodata.h
+++ b/libavcodec/wmaprodata.h
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
  * Copyright (c) 2008 - 2009 Sascha Sommer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index afe6ac2..2f6c485 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
  * Copyright (c) 2008 - 2011 Sascha Sommer, Benjamin Larsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -308,6 +308,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
 
     /** generic init */
     s->log2_frame_size = av_log2(avctx->block_align) + 4;
+    if (s->log2_frame_size > 25) {
+        avpriv_request_sample(avctx, "Large block align");
+        return AVERROR_PATCHWELCOME;
+    }
 
     /** frame info */
     s->skip_frame  = 1; /* skip first frame */
@@ -340,8 +344,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
     }
 
     if (s->min_samples_per_subframe < WMAPRO_BLOCK_MIN_SIZE) {
-        av_log(avctx, AV_LOG_ERROR, "Invalid minimum block size %"PRId8"\n",
-               s->max_num_subframes);
+        av_log(avctx, AV_LOG_ERROR, "min_samples_per_subframe of %d too small\n",
+               s->min_samples_per_subframe);
         return AVERROR_INVALIDDATA;
     }
 
@@ -421,6 +425,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
         }
         s->sfb_offsets[i][band - 1] = subframe_len;
         s->num_sfb[i]               = band - 1;
+        if (s->num_sfb[i] <= 0) {
+            av_log(avctx, AV_LOG_ERROR, "num_sfb invalid\n");
+            return AVERROR_INVALIDDATA;
+        }
     }
 
 
@@ -437,9 +445,10 @@ static av_cold int decode_init(AVCodecContext *avctx)
                            + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
             for (x = 0; x < num_possible_block_sizes; x++) {
                 int v = 0;
-                while (s->sfb_offsets[x][v + 1] << x < offset)
-                    if (++v >= MAX_BANDS)
-                        return AVERROR_INVALIDDATA;
+                while (s->sfb_offsets[x][v + 1] << x < offset) {
+                    v++;
+                    av_assert0(v < MAX_BANDS);
+                }
                 s->sf_offsets[i][x][b] = v;
             }
         }
@@ -493,6 +502,9 @@ static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
     if (offset == s->samples_per_frame - s->min_samples_per_subframe)
         return s->min_samples_per_subframe;
 
+    if (get_bits_left(&s->gb) < 1)
+        return AVERROR_INVALIDDATA;
+
     /** 1 bit indicates if the subframe is of maximum length */
     if (s->max_subframe_len_bit) {
         if (get_bits1(&s->gb))
@@ -671,7 +683,7 @@ static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
 /**
  *@brief Decode channel transformation parameters
  *@param s codec context
- *@return 0 in case of success, < 0 in case of bitstream errors
+ *@return >= 0 in case of success, < 0 in case of bitstream errors
  */
 static int decode_channel_transform(WMAProDecodeCtx* s)
 {
@@ -1145,7 +1157,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
         int num_fill_bits;
         if (!(num_fill_bits = get_bits(&s->gb, 2))) {
             int len = get_bits(&s->gb, 4);
-            num_fill_bits = get_bits(&s->gb, len) + 1;
+            num_fill_bits = (len ? get_bits(&s->gb, len) : 0) + 1;
         }
 
         if (num_fill_bits >= 0) {
@@ -1175,6 +1187,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
             transmit_coeffs = 1;
     }
 
+    av_assert0(s->subframe_len <= WMAPRO_BLOCK_MAX_SIZE);
     if (transmit_coeffs) {
         int step;
         int quant_step = 90 * s->bits_per_sample >> 4;
@@ -1185,10 +1198,11 @@ static int decode_subframe(WMAProDecodeCtx *s)
             for (i = 0; i < s->channels_for_cur_subframe; i++) {
                 int c = s->channel_indexes_for_cur_subframe[i];
                 int num_vec_coeffs = get_bits(&s->gb, num_bits) << 2;
-                if (num_vec_coeffs + offset > FF_ARRAY_ELEMS(s->channel[c].out)) {
+                if (num_vec_coeffs > s->subframe_len) {
                     av_log(s->avctx, AV_LOG_ERROR, "num_vec_coeffs %d is too large\n", num_vec_coeffs);
                     return AVERROR_INVALIDDATA;
                 }
+                av_assert0(num_vec_coeffs + offset <= FF_ARRAY_ELEMS(s->channel[c].out));
                 s->channel[c].num_vec_coeffs = num_vec_coeffs;
             }
         } else {
@@ -1381,7 +1395,6 @@ static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr)
     /* get output buffer */
     frame->nb_samples = s->samples_per_frame;
     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         s->packet_loss = 1;
         return 0;
     }
@@ -1455,7 +1468,7 @@ static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
     int buflen;
 
     /** when the frame data does not need to be concatenated, the input buffer
-        is resetted and additional bits from the previous frame are copyed
+        is reset and additional bits from the previous frame are copied
         and skipped later so that a fast byte copy is possible */
 
     if (!append) {
@@ -1464,7 +1477,7 @@ static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
         init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
     }
 
-    buflen = (s->num_saved_bits + len + 8) >> 3;
+    buflen = (put_bits_count(&s->pb) + len + 8) >> 3;
 
     if (len <= 0 || buflen > MAX_FRAMESIZE) {
         avpriv_request_sample(s->avctx, "Too small input buffer");
@@ -1472,13 +1485,7 @@ static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
         return;
     }
 
-    if (len > put_bits_left(&s->pb)) {
-        av_log(s->avctx, AV_LOG_ERROR,
-               "Cannot append %d bits, only %d bits available.\n",
-               len, put_bits_left(&s->pb));
-        s->packet_loss = 1;
-        return;
-    }
+    av_assert0(len <= put_bits_left(&s->pb));
 
     s->num_saved_bits += len;
     if (!append) {
@@ -1593,7 +1600,8 @@ static int decode_packet(AVCodecContext *avctx, void *data,
             (frame_size = show_bits(gb, s->log2_frame_size)) &&
             frame_size <= remaining_bits(s, gb)) {
             save_bits(s, gb, frame_size, 0);
-            s->packet_done = !decode_frame(s, data, got_frame_ptr);
+            if (!s->packet_loss)
+                s->packet_done = !decode_frame(s, data, got_frame_ptr);
         } else if (!s->len_prefix
                    && s->num_saved_bits > get_bits_count(&s->gb)) {
             /** when the frames do not have a length prefix, we don't know
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 0d91077..c2737ab 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -2,20 +2,20 @@
  * Windows Media Audio Voice decoder.
  * Copyright (c) 2009 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,8 +25,6 @@
  * @author Ronald S. Bultje <rsbultje@gmail.com>
  */
 
-#define UNCHECKED_BITSTREAM_READER 1
-
 #include <math.h>
 
 #include "libavutil/channel_layout.h"
@@ -520,7 +518,7 @@ static int kalman_smoothen(WMAVoiceContext *s, int pitch,
     float optimal_gain = 0, dot;
     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
-                *best_hist_ptr;
+                *best_hist_ptr = NULL;
 
     /* find best fitting point in history */
     do {
@@ -780,7 +778,7 @@ static void postfilter(WMAVoiceContext *s, const float *synth,
           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
           *synth_filter_in = zero_exc_pf;
 
-    assert(size <= MAX_FRAMESIZE / 2);
+    av_assert0(size <= MAX_FRAMESIZE / 2);
 
     /* generate excitation from input signal */
     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
@@ -1249,7 +1247,7 @@ static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
     float gain;
     int n, r_idx;
 
-    assert(size <= MAX_FRAMESIZE);
+    av_assert0(size <= MAX_FRAMESIZE);
 
     /* Set the offset from which we start reading wmavoice_std_codebook */
     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
@@ -1285,7 +1283,7 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
     int n, idx, gain_weight;
     AMRFixed fcb;
 
-    assert(size <= MAX_FRAMESIZE / 2);
+    av_assert0(size <= MAX_FRAMESIZE / 2);
     memset(pulses, 0, sizeof(*pulses) * size);
 
     fcb.pitch_lag      = block_pitch_sh2 >> 2;
@@ -1456,8 +1454,8 @@ static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
                        float *excitation, float *synth)
 {
     WMAVoiceContext *s = ctx->priv_data;
-    int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
-    int pitch[MAX_BLOCKS], last_block_pitch;
+    int n, n_blocks_x2, log_n_blocks_x2, av_uninit(cur_pitch_val);
+    int pitch[MAX_BLOCKS], av_uninit(last_block_pitch);
 
     /* Parse frame type ("frame header"), see frame_descs */
     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
@@ -1674,7 +1672,7 @@ static int check_bits_for_superframe(GetBitContext *orig_gb,
     /* initialize a copy */
     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
     skip_bits_long(gb, get_bits_count(orig_gb));
-    assert(get_bits_left(gb) == get_bits_left(orig_gb));
+    av_assert1(get_bits_left(gb) == get_bits_left(orig_gb));
 
     /* superframe header */
     if (get_bits_left(gb) < 14)
@@ -1820,10 +1818,8 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,
 
     /* get output buffer */
     frame->nb_samples = 480;
-    if ((res = ff_get_buffer(ctx, frame, 0)) < 0) {
-        av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((res = ff_get_buffer(ctx, frame, 0)) < 0)
         return res;
-    }
     frame->nb_samples = n_samples;
     samples = (float *)frame->data[0];
 
@@ -1955,7 +1951,7 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
     int size, res, pos;
 
     /* Packets are sometimes a multiple of ctx->block_align, with a packet
-     * header at each ctx->block_align bytes. However, Libav's ASF demuxer
+     * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
      * feeds us ASF packets, which may concatenate multiple "codec" packets
      * in a single "muxer" packet, so we artificially emulate that by
      * capping the packet size at ctx->block_align. */
@@ -2010,7 +2006,7 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
         /* rewind bit reader to start of last (incomplete) superframe... */
         init_get_bits(gb, avpkt->data, size << 3);
         skip_bits_long(gb, (size << 3) - pos);
-        assert(get_bits_left(gb) == pos);
+        av_assert1(get_bits_left(gb) == pos);
 
         /* ...and cache it for spillover in next packet */
         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
diff --git a/libavcodec/wmavoice_data.h b/libavcodec/wmavoice_data.h
index 7f14fb8..cbf65b0 100644
--- a/libavcodec/wmavoice_data.h
+++ b/libavcodec/wmavoice_data.h
@@ -2,20 +2,20 @@
  * Windows Media Voice (WMAVoice) tables.
  * Copyright (c) 2009 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wmv2.c b/libavcodec/wmv2.c
index 7b1ea57..0fe5603 100644
--- a/libavcodec/wmv2.c
+++ b/libavcodec/wmv2.c
@@ -1,20 +1,20 @@
 /*
- * Copyright (c) 2002 The Libav Project
+ * Copyright (c) 2002 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -96,8 +96,8 @@ void ff_mspel_motion(MpegEncContext *s,
 {
     Wmv2Context * const w= (Wmv2Context*)s;
     uint8_t *ptr;
-    int dxy, offset, mx, my, src_x, src_y, v_edge_pos;
-    ptrdiff_t linesize, uvlinesize;
+    int dxy, mx, my, src_x, src_y, v_edge_pos;
+    ptrdiff_t offset, linesize, uvlinesize;
     int emu=0;
 
     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
@@ -138,21 +138,13 @@ void ff_mspel_motion(MpegEncContext *s,
 
     if(s->flags&CODEC_FLAG_GRAY) return;
 
-    if (s->out_format == FMT_H263) {
-        dxy = 0;
-        if ((motion_x & 3) != 0)
-            dxy |= 1;
-        if ((motion_y & 3) != 0)
-            dxy |= 2;
-        mx = motion_x >> 2;
-        my = motion_y >> 2;
-    } else {
-        mx = motion_x / 2;
-        my = motion_y / 2;
-        dxy = ((my & 1) << 1) | (mx & 1);
-        mx >>= 1;
-        my >>= 1;
-    }
+    dxy = 0;
+    if ((motion_x & 3) != 0)
+        dxy |= 1;
+    if ((motion_y & 3) != 0)
+        dxy |= 2;
+    mx = motion_x >> 2;
+    my = motion_y >> 2;
 
     src_x = s->mb_x * 8 + mx;
     src_y = s->mb_y * 8 + my;
diff --git a/libavcodec/wmv2.h b/libavcodec/wmv2.h
index e01f6c1..52739c1 100644
--- a/libavcodec/wmv2.h
+++ b/libavcodec/wmv2.h
@@ -1,20 +1,20 @@
 /*
- * Copyright (c) 2002 The Libav Project
+ * Copyright (c) 2002 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wmv2dec.c b/libavcodec/wmv2dec.c
index 4ebc801..54ae165 100644
--- a/libavcodec/wmv2dec.c
+++ b/libavcodec/wmv2dec.c
@@ -1,20 +1,20 @@
 /*
- * Copyright (c) 2002 The Libav Project
+ * Copyright (c) 2002 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -447,6 +447,8 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64])
 static av_cold int wmv2_decode_init(AVCodecContext *avctx){
     Wmv2Context * const w= avctx->priv_data;
 
+    avctx->flags |= CODEC_FLAG_EMU_EDGE;
+
     if(ff_msmpeg4_decode_init(avctx) < 0)
         return -1;
 
diff --git a/libavcodec/wmv2dsp.c b/libavcodec/wmv2dsp.c
index 2e3a3ff..40e0bef 100644
--- a/libavcodec/wmv2dsp.c
+++ b/libavcodec/wmv2dsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wmv2dsp.h b/libavcodec/wmv2dsp.h
index f2f258e..0bf9489 100644
--- a/libavcodec/wmv2dsp.h
+++ b/libavcodec/wmv2dsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/wmv2enc.c b/libavcodec/wmv2enc.c
index 256c4e5..869b3c7 100644
--- a/libavcodec/wmv2enc.c
+++ b/libavcodec/wmv2enc.c
@@ -1,20 +1,20 @@
 /*
- * Copyright (c) 2002 The Libav Project
+ * Copyright (c) 2002 The FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -60,7 +60,9 @@ static av_cold int wmv2_encode_init(AVCodecContext *avctx){
     ff_wmv2_common_init(w);
 
     avctx->extradata_size= 4;
-    avctx->extradata= av_mallocz(avctx->extradata_size + 10);
+    avctx->extradata= av_mallocz(avctx->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
+    if (!avctx->extradata)
+        return AVERROR(ENOMEM);
     encode_ext_header(w);
 
     return 0;
@@ -84,10 +86,10 @@ int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
     w->abt_type=0;
     w->j_type=0;
 
-    assert(s->flipflop_rounding);
+    av_assert0(s->flipflop_rounding);
 
     if (s->pict_type == AV_PICTURE_TYPE_I) {
-        assert(s->no_rounding==1);
+        av_assert0(s->no_rounding==1);
         if(w->j_type_bit) put_bits(&s->pb, 1, w->j_type);
 
         if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
@@ -169,10 +171,12 @@ void ff_wmv2_encode_mb(MpegEncContext * s,
                  ff_wmv2_inter_table[w->cbp_table_index][cbp + 64][1],
                  ff_wmv2_inter_table[w->cbp_table_index][cbp + 64][0]);
 
+        s->misc_bits += get_bits_diff(s);
         /* motion vector */
         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
         ff_msmpeg4_encode_motion(s, motion_x - pred_x,
                               motion_y - pred_y);
+        s->mv_bits += get_bits_diff(s);
     } else {
         /* compute cbp */
         cbp = 0;
@@ -203,11 +207,16 @@ void ff_wmv2_encode_mb(MpegEncContext * s,
             s->h263_aic_dir=0;
             put_bits(&s->pb, ff_table_inter_intra[s->h263_aic_dir][1], ff_table_inter_intra[s->h263_aic_dir][0]);
         }
+        s->misc_bits += get_bits_diff(s);
     }
 
     for (i = 0; i < 6; i++) {
         ff_msmpeg4_encode_block(s, block[i], i);
     }
+    if (s->mb_intra)
+        s->i_tex_bits += get_bits_diff(s);
+    else
+        s->p_tex_bits += get_bits_diff(s);
 }
 
 AVCodec ff_wmv2_encoder = {
diff --git a/libavcodec/wnv1.c b/libavcodec/wnv1.c
index 7676c89..99aee3c 100644
--- a/libavcodec/wnv1.c
+++ b/libavcodec/wnv1.c
@@ -2,20 +2,20 @@
  * Winnov WNV1 codec
  * Copyright (c) 2005 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,8 +31,6 @@
 
 
 typedef struct WNV1Context {
-    AVCodecContext *avctx;
-
     int shift;
     GetBitContext gb;
 } WNV1Context;
@@ -70,8 +68,8 @@ static int decode_frame(AVCodecContext *avctx,
     int prev_y = 0, prev_u = 0, prev_v = 0;
     uint8_t *rbuf;
 
-    if (buf_size < 8) {
-        av_log(avctx, AV_LOG_ERROR, "Packet is too short\n");
+    if (buf_size <= 8) {
+        av_log(avctx, AV_LOG_ERROR, "Packet size %d is too small\n", buf_size);
         return AVERROR_INVALIDDATA;
     }
 
@@ -80,9 +78,9 @@ static int decode_frame(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_ERROR, "Cannot allocate temporary buffer\n");
         return AVERROR(ENOMEM);
     }
+    memset(rbuf + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         av_free(rbuf);
         return ret;
     }
@@ -134,10 +132,8 @@ static int decode_frame(AVCodecContext *avctx,
 
 static av_cold int decode_init(AVCodecContext *avctx)
 {
-    WNV1Context * const l = avctx->priv_data;
     static VLC_TYPE code_table[1 << CODE_VLC_BITS][2];
 
-    l->avctx       = avctx;
     avctx->pix_fmt = AV_PIX_FMT_YUV422P;
 
     code_vlc.table           = code_table;
diff --git a/libavcodec/ws-snd1.c b/libavcodec/ws-snd1.c
index fe6f812..6929cbf 100644
--- a/libavcodec/ws-snd1.c
+++ b/libavcodec/ws-snd1.c
@@ -2,20 +2,20 @@
  * Westwood SNDx codecs
  * Copyright (c) 2005 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -76,15 +76,13 @@ static int ws_snd_decode_frame(AVCodecContext *avctx, void *data,
 
     if (in_size > buf_size) {
         av_log(avctx, AV_LOG_ERROR, "Frame data is larger than input buffer\n");
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     /* get output buffer */
     frame->nb_samples = out_size;
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
         return ret;
-    }
     samples     = frame->data[0];
     samples_end = samples + out_size;
 
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index f4e2c1d..53e643b 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -3,17 +3,21 @@ OBJS                                   += x86/constants.o               \
 
 OBJS-$(CONFIG_AC3DSP)                  += x86/ac3dsp_init.o
 OBJS-$(CONFIG_AUDIODSP)                += x86/audiodsp_init.o
-OBJS-$(CONFIG_BLOCKDSP)                += x86/blockdsp.o
+OBJS-$(CONFIG_BLOCKDSP)                += x86/blockdsp_init.o
 OBJS-$(CONFIG_BSWAPDSP)                += x86/bswapdsp_init.o
 OBJS-$(CONFIG_DCT)                     += x86/dct_init.o
 OBJS-$(CONFIG_FDCTDSP)                 += x86/fdctdsp_init.o
 OBJS-$(CONFIG_FFT)                     += x86/fft_init.o
+OBJS-$(CONFIG_FLAC_DECODER)            += x86/flacdsp_init.o
+OBJS-$(CONFIG_FLAC_ENCODER)            += x86/flacdsp_init.o
 OBJS-$(CONFIG_H263DSP)                 += x86/h263dsp_init.o
 OBJS-$(CONFIG_H264CHROMA)              += x86/h264chroma_init.o
 OBJS-$(CONFIG_H264DSP)                 += x86/h264dsp_init.o
 OBJS-$(CONFIG_H264PRED)                += x86/h264_intrapred_init.o
 OBJS-$(CONFIG_H264QPEL)                += x86/h264_qpel.o
 OBJS-$(CONFIG_HPELDSP)                 += x86/hpeldsp_init.o
+OBJS-$(CONFIG_LLAUDDSP)                += x86/lossless_audiodsp_init.o
+OBJS-$(CONFIG_LLVIDDSP)                += x86/lossless_videodsp_init.o
 OBJS-$(CONFIG_HUFFYUVDSP)              += x86/huffyuvdsp_init.o
 OBJS-$(CONFIG_HUFFYUVENCDSP)           += x86/huffyuvencdsp_mmx.o
 OBJS-$(CONFIG_IDCTDSP)                 += x86/idctdsp_init.o
@@ -31,7 +35,6 @@ OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o
 OBJS-$(CONFIG_XMM_CLOBBER_TEST)        += x86/w64xmmtest.o
 
 OBJS-$(CONFIG_AAC_DECODER)             += x86/sbrdsp_init.o
-OBJS-$(CONFIG_APE_DECODER)             += x86/apedsp_init.o
 OBJS-$(CONFIG_CAVS_DECODER)            += x86/cavsdsp.o
 OBJS-$(CONFIG_DCA_DECODER)             += x86/dcadsp_init.o
 OBJS-$(CONFIG_DNXHD_ENCODER)           += x86/dnxhdenc_init.o
@@ -40,10 +43,13 @@ OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp.o
 OBJS-$(CONFIG_MPEG4_DECODER)           += x86/xvididct_init.o
 OBJS-$(CONFIG_PNG_DECODER)             += x86/pngdsp_init.o
 OBJS-$(CONFIG_PRORES_DECODER)          += x86/proresdsp_init.o
+OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp_init.o
 OBJS-$(CONFIG_RV30_DECODER)            += x86/rv34dsp_init.o
 OBJS-$(CONFIG_RV40_DECODER)            += x86/rv34dsp_init.o            \
                                           x86/rv40dsp_init.o
-OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc.o
+OBJS-$(CONFIG_SVQ1_ENCODER)            += x86/svq1enc_init.o
+OBJS-$(CONFIG_V210_DECODER)            += x86/v210-init.o
+OBJS-$(CONFIG_TTA_DECODER)             += x86/ttadsp_init.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
 OBJS-$(CONFIG_VC1_DECODER)             += x86/vc1dsp_init.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
@@ -51,17 +57,17 @@ OBJS-$(CONFIG_VP6_DECODER)             += x86/vp6dsp_init.o
 OBJS-$(CONFIG_VP7_DECODER)             += x86/vp8dsp_init.o
 OBJS-$(CONFIG_VP8_DECODER)             += x86/vp8dsp_init.o
 OBJS-$(CONFIG_VP9_DECODER)             += x86/vp9dsp_init.o
+OBJS-$(CONFIG_WEBP_DECODER)            += x86/vp8dsp_init.o
 
-MMX-OBJS-$(CONFIG_AUDIODSP)            += x86/audiodsp_mmx.o
-MMX-OBJS-$(CONFIG_HPELDSP)             += x86/fpel_mmx.o                \
-                                          x86/hpeldsp_mmx.o
+MMX-OBJS-$(CONFIG_DIRAC_DECODER)       += x86/dirac_dwt.o
 MMX-OBJS-$(CONFIG_FDCTDSP)             += x86/fdct.o
 MMX-OBJS-$(CONFIG_IDCTDSP)             += x86/idctdsp_mmx.o             \
                                           x86/simple_idct.o
-MMX-OBJS-$(CONFIG_QPELDSP)             += x86/fpel_mmx.o
 
 MMX-OBJS-$(CONFIG_MPEG4_DECODER)       += x86/idct_mmx_xvid.o           \
                                           x86/idct_sse2_xvid.o
+MMX-OBJS-$(CONFIG_SNOW_DECODER)        += x86/snowdsp.o
+MMX-OBJS-$(CONFIG_SNOW_ENCODER)        += x86/snowdsp.o
 MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
 
 YASM-OBJS                              += x86/deinterlace.o             \
@@ -69,10 +75,14 @@ YASM-OBJS                              += x86/deinterlace.o             \
 
 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
 YASM-OBJS-$(CONFIG_AUDIODSP)           += x86/audiodsp.o
+YASM-OBJS-$(CONFIG_BLOCKDSP)           += x86/blockdsp.o
 YASM-OBJS-$(CONFIG_BSWAPDSP)           += x86/bswapdsp.o
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32.o
+YASM-OBJS-$(CONFIG_DIRAC_DECODER)      += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\
+                                          x86/dwt_yasm.o
 YASM-OBJS-$(CONFIG_DNXHD_ENCODER)      += x86/dnxhdenc.o
 YASM-OBJS-$(CONFIG_FFT)                += x86/fft.o
+YASM-OBJS-$(CONFIG_FLAC_DECODER)       += x86/flacdsp.o
 YASM-OBJS-$(CONFIG_H263DSP)            += x86/h263_loopfilter.o
 YASM-OBJS-$(CONFIG_H264CHROMA)         += x86/h264_chromamc.o           \
                                           x86/h264_chromamc_10bit.o
@@ -91,6 +101,9 @@ YASM-OBJS-$(CONFIG_H264QPEL)           += x86/h264_qpel_8bit.o          \
 YASM-OBJS-$(CONFIG_HPELDSP)            += x86/fpel.o                    \
                                           x86/hpeldsp.o
 YASM-OBJS-$(CONFIG_HUFFYUVDSP)         += x86/huffyuvdsp.o
+YASM-OBJS-$(CONFIG_IDCTDSP)            += x86/idctdsp.o
+YASM-OBJS-$(CONFIG_LLAUDDSP)           += x86/lossless_audiodsp.o
+YASM-OBJS-$(CONFIG_LLVIDDSP)           += x86/lossless_videodsp.o
 YASM-OBJS-$(CONFIG_ME_CMP)             += x86/me_cmp.o
 YASM-OBJS-$(CONFIG_MPEGAUDIODSP)       += x86/imdct36.o
 YASM-OBJS-$(CONFIG_MPEGVIDEOENC)       += x86/mpegvideoencdsp.o
@@ -102,14 +115,19 @@ YASM-OBJS-$(CONFIG_VIDEODSP)           += x86/videodsp.o
 YASM-OBJS-$(CONFIG_VP3DSP)             += x86/vp3dsp.o
 
 YASM-OBJS-$(CONFIG_AAC_DECODER)        += x86/sbrdsp.o
-YASM-OBJS-$(CONFIG_APE_DECODER)        += x86/apedsp.o
 YASM-OBJS-$(CONFIG_DCA_DECODER)        += x86/dcadsp.o
-YASM-OBJS-$(CONFIG_HEVC_DECODER)       += x86/hevc_deblock.o
+YASM-OBJS-$(CONFIG_HEVC_DECODER)       += x86/hevc_mc.o                 \
+                                          x86/hevc_deblock.o            \
+                                          x86/hevc_idct.o
 YASM-OBJS-$(CONFIG_PNG_DECODER)        += x86/pngdsp.o
 YASM-OBJS-$(CONFIG_PRORES_DECODER)     += x86/proresdsp.o
+YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
 YASM-OBJS-$(CONFIG_RV30_DECODER)       += x86/rv34dsp.o
 YASM-OBJS-$(CONFIG_RV40_DECODER)       += x86/rv34dsp.o                 \
                                           x86/rv40dsp.o
+YASM-OBJS-$(CONFIG_SVQ1_ENCODER)       += x86/svq1enc.o
+YASM-OBJS-$(CONFIG_TTA_DECODER)        += x86/ttadsp.o
+YASM-OBJS-$(CONFIG_V210_DECODER)       += x86/v210.o
 YASM-OBJS-$(CONFIG_VC1_DECODER)        += x86/vc1dsp.o
 YASM-OBJS-$(CONFIG_VORBIS_DECODER)     += x86/vorbisdsp.o
 YASM-OBJS-$(CONFIG_VP6_DECODER)        += x86/vp6dsp.o
@@ -117,4 +135,8 @@ YASM-OBJS-$(CONFIG_VP7_DECODER)        += x86/vp8dsp.o                  \
                                           x86/vp8dsp_loopfilter.o
 YASM-OBJS-$(CONFIG_VP8_DECODER)        += x86/vp8dsp.o                  \
                                           x86/vp8dsp_loopfilter.o
-YASM-OBJS-$(CONFIG_VP9_DECODER)        += x86/vp9dsp.o
+YASM-OBJS-$(CONFIG_VP9_DECODER)        += x86/vp9intrapred.o            \
+                                          x86/vp9itxfm.o                \
+                                          x86/vp9lpf.o                  \
+                                          x86/vp9mc.o
+YASM-OBJS-$(CONFIG_WEBP_DECODER)       += x86/vp8dsp.o
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 817d5a3..b244416 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -2,20 +2,20 @@
 ;* x86-optimized AC-3 DSP functions
 ;* Copyright (c) 2011 Justin Ruggles
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index cd638b9..30a85f9 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -2,20 +2,20 @@
  * x86-optimized AC-3 DSP functions
  * Copyright (c) 2011 Justin Ruggles
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -64,6 +64,11 @@ void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input,
 void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
                                       const int16_t *window, unsigned int len);
 
+#if ARCH_X86_32 && defined(__INTEL_COMPILER)
+#       undef HAVE_7REGS
+#       define HAVE_7REGS 0
+#endif
+
 #if HAVE_SSE_INLINE && HAVE_7REGS
 
 #define IF1(x) x
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index f2e831d..273b9ef 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -2,20 +2,20 @@
 ;* optimized audio functions
 ;* Copyright (c) 2008 Loren Merritt
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -40,15 +40,11 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
     paddd   m2, m1
     add     orderq, mmsize*2
     jl .loop
-%if mmsize == 16
-    movhlps m0, m2
-    paddd   m2, m0
-    pshuflw m0, m2, 0x4e
-%else
-    pshufw  m0, m2, 0x4e
-%endif
-    paddd   m2, m0
+    HADDD   m2, m0
     movd   eax, m2
+%if mmsize == 8
+    emms
+%endif
     RET
 %endmacro
 
@@ -80,17 +76,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
     SPLATD    m4
     SPLATD    m5
 .loop:
-%assign %%i 1
+%assign %%i 0
 %rep %2
-    mova      m0,  [srcq+mmsize*0*%%i]
-    mova      m1,  [srcq+mmsize*1*%%i]
-    mova      m2,  [srcq+mmsize*2*%%i]
-    mova      m3,  [srcq+mmsize*3*%%i]
+    mova      m0,  [srcq+mmsize*(0+%%i)]
+    mova      m1,  [srcq+mmsize*(1+%%i)]
+    mova      m2,  [srcq+mmsize*(2+%%i)]
+    mova      m3,  [srcq+mmsize*(3+%%i)]
 %if %3
-    mova      m7,  [srcq+mmsize*4*%%i]
-    mova      m8,  [srcq+mmsize*5*%%i]
-    mova      m9,  [srcq+mmsize*6*%%i]
-    mova      m10, [srcq+mmsize*7*%%i]
+    mova      m7,  [srcq+mmsize*(4+%%i)]
+    mova      m8,  [srcq+mmsize*(5+%%i)]
+    mova      m9,  [srcq+mmsize*(6+%%i)]
+    mova      m10, [srcq+mmsize*(7+%%i)]
 %endif
     CLIPD  m0,  m4, m5, m6
     CLIPD  m1,  m4, m5, m6
@@ -102,17 +98,17 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len
     CLIPD  m9,  m4, m5, m6
     CLIPD  m10, m4, m5, m6
 %endif
-    mova  [dstq+mmsize*0*%%i], m0
-    mova  [dstq+mmsize*1*%%i], m1
-    mova  [dstq+mmsize*2*%%i], m2
-    mova  [dstq+mmsize*3*%%i], m3
+    mova  [dstq+mmsize*(0+%%i)], m0
+    mova  [dstq+mmsize*(1+%%i)], m1
+    mova  [dstq+mmsize*(2+%%i)], m2
+    mova  [dstq+mmsize*(3+%%i)], m3
 %if %3
-    mova  [dstq+mmsize*4*%%i], m7
-    mova  [dstq+mmsize*5*%%i], m8
-    mova  [dstq+mmsize*6*%%i], m9
-    mova  [dstq+mmsize*7*%%i], m10
+    mova  [dstq+mmsize*(4+%%i)], m7
+    mova  [dstq+mmsize*(5+%%i)], m8
+    mova  [dstq+mmsize*(6+%%i)], m9
+    mova  [dstq+mmsize*(7+%%i)], m10
 %endif
-%assign %%i %%i+1
+%assign %%i %%i+4*(%3+1)
 %endrep
     add     srcq, mmsize*4*(%2+%3)
     add     dstq, mmsize*4*(%2+%3)
@@ -135,3 +131,47 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
 %else
 VECTOR_CLIP_INT32 6, 1, 0, 0
 %endif
+
+;-----------------------------------------------------
+;void ff_vector_clipf(float *dst, const float *src,
+;                     float min, float max, int len)
+;-----------------------------------------------------
+INIT_XMM sse
+%if UNIX64
+cglobal vector_clipf, 3,3,6, dst, src, len
+%else
+cglobal vector_clipf, 5,5,6, dst, src, min, max, len
+%endif
+%if WIN64
+    SWAP 0, 2
+    SWAP 1, 3
+%elif ARCH_X86_32
+    movss   m0, minm
+    movss   m1, maxm
+%endif
+    SPLATD  m0
+    SPLATD  m1
+        shl lend, 2
+        add srcq, lenq
+        add dstq, lenq
+        neg lenq
+.loop:
+    mova    m2,  [srcq+lenq+mmsize*0]
+    mova    m3,  [srcq+lenq+mmsize*1]
+    mova    m4,  [srcq+lenq+mmsize*2]
+    mova    m5,  [srcq+lenq+mmsize*3]
+    maxps   m2, m0
+    maxps   m3, m0
+    maxps   m4, m0
+    maxps   m5, m0
+    minps   m2, m1
+    minps   m3, m1
+    minps   m4, m1
+    minps   m5, m1
+    mova    [dstq+lenq+mmsize*0], m2
+    mova    [dstq+lenq+mmsize*1], m3
+    mova    [dstq+lenq+mmsize*2], m4
+    mova    [dstq+lenq+mmsize*3], m5
+    add     lenq, mmsize*4
+    jl .loop
+    REP_RET
diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index 743f5a3..a2ce231 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,7 +24,6 @@
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/audiodsp.h"
-#include "audiodsp.h"
 
 int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2,
                                       int order);
@@ -39,6 +38,8 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
                                    int32_t min, int32_t max, unsigned int len);
 void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
                                int32_t min, int32_t max, unsigned int len);
+void ff_vector_clipf_sse(float *dst, const float *src,
+                         float min, float max, int len);
 
 av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
 {
@@ -50,7 +51,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
     if (EXTERNAL_MMXEXT(cpu_flags))
         c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
 
-    if (INLINE_SSE(cpu_flags))
+    if (EXTERNAL_SSE(cpu_flags))
         c->vector_clipf = ff_vector_clipf_sse;
 
     if (EXTERNAL_SSE2(cpu_flags)) {
diff --git a/libavcodec/x86/audiodsp_mmx.c b/libavcodec/x86/audiodsp_mmx.c
deleted file mode 100644
index cb55059..0000000
--- a/libavcodec/x86/audiodsp_mmx.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "libavutil/x86/asm.h"
-#include "audiodsp.h"
-
-#if HAVE_INLINE_ASM
-
-void ff_vector_clipf_sse(float *dst, const float *src,
-                         float min, float max, int len)
-{
-    x86_reg i = (len - 16) * 4;
-    __asm__ volatile (
-        "movss          %3, %%xmm4      \n\t"
-        "movss          %4, %%xmm5      \n\t"
-        "shufps $0, %%xmm4, %%xmm4      \n\t"
-        "shufps $0, %%xmm5, %%xmm5      \n\t"
-        "1:                             \n\t"
-        "movaps   (%2, %0), %%xmm0      \n\t" // 3/1 on intel
-        "movaps 16(%2, %0), %%xmm1      \n\t"
-        "movaps 32(%2, %0), %%xmm2      \n\t"
-        "movaps 48(%2, %0), %%xmm3      \n\t"
-        "maxps      %%xmm4, %%xmm0      \n\t"
-        "maxps      %%xmm4, %%xmm1      \n\t"
-        "maxps      %%xmm4, %%xmm2      \n\t"
-        "maxps      %%xmm4, %%xmm3      \n\t"
-        "minps      %%xmm5, %%xmm0      \n\t"
-        "minps      %%xmm5, %%xmm1      \n\t"
-        "minps      %%xmm5, %%xmm2      \n\t"
-        "minps      %%xmm5, %%xmm3      \n\t"
-        "movaps     %%xmm0,   (%1, %0)  \n\t"
-        "movaps     %%xmm1, 16(%1, %0)  \n\t"
-        "movaps     %%xmm2, 32(%1, %0)  \n\t"
-        "movaps     %%xmm3, 48(%1, %0)  \n\t"
-        "sub           $64, %0          \n\t"
-        "jge            1b              \n\t"
-        : "+&r" (i)
-        : "r" (dst), "r" (src), "m" (min), "m" (max)
-        : "memory");
-}
-
-#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/blockdsp.asm b/libavcodec/x86/blockdsp.asm
new file mode 100644
index 0000000..c793858
--- /dev/null
+++ b/libavcodec/x86/blockdsp.asm
@@ -0,0 +1,86 @@
+;******************************************************************************
+;* SIMD-optimized clear block functions
+;* Copyright (c) 2002 Michael Niedermayer
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2009 Fiona Glaser
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_TEXT
+
+;----------------------------------------
+; void ff_clear_block(int16_t *blocks);
+;----------------------------------------
+; %1 = number of xmm registers used
+; %2 = number of inline store loops
+%macro CLEAR_BLOCK 2
+cglobal clear_block, 1, 1, %1, blocks
+    ZERO  m0, m0
+%assign %%i 0
+%rep %2
+    mova  [blocksq+mmsize*(0+%%i)], m0
+    mova  [blocksq+mmsize*(1+%%i)], m0
+    mova  [blocksq+mmsize*(2+%%i)], m0
+    mova  [blocksq+mmsize*(3+%%i)], m0
+    mova  [blocksq+mmsize*(4+%%i)], m0
+    mova  [blocksq+mmsize*(5+%%i)], m0
+    mova  [blocksq+mmsize*(6+%%i)], m0
+    mova  [blocksq+mmsize*(7+%%i)], m0
+%assign %%i %%i+8
+%endrep
+    RET
+%endmacro
+
+INIT_MMX mmx
+%define ZERO pxor
+CLEAR_BLOCK 0, 2
+INIT_XMM sse
+%define ZERO xorps
+CLEAR_BLOCK 1, 1
+
+;-----------------------------------------
+; void ff_clear_blocks(int16_t *blocks);
+;-----------------------------------------
+; %1 = number of xmm registers used
+%macro CLEAR_BLOCKS 1
+cglobal clear_blocks, 1, 2, %1, blocks, len
+    add   blocksq, 768
+    mov      lenq, -768
+    ZERO       m0, m0
+.loop
+    mova  [blocksq+lenq+mmsize*0], m0
+    mova  [blocksq+lenq+mmsize*1], m0
+    mova  [blocksq+lenq+mmsize*2], m0
+    mova  [blocksq+lenq+mmsize*3], m0
+    mova  [blocksq+lenq+mmsize*4], m0
+    mova  [blocksq+lenq+mmsize*5], m0
+    mova  [blocksq+lenq+mmsize*6], m0
+    mova  [blocksq+lenq+mmsize*7], m0
+    add   lenq, mmsize*8
+    js .loop
+    RET
+%endmacro
+
+INIT_MMX mmx
+%define ZERO pxor
+CLEAR_BLOCKS 0
+INIT_XMM sse
+%define ZERO xorps
+CLEAR_BLOCKS 1
diff --git a/libavcodec/x86/blockdsp.c b/libavcodec/x86/blockdsp.c
deleted file mode 100644
index b529424..0000000
--- a/libavcodec/x86/blockdsp.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "libavutil/internal.h"
-#include "libavutil/cpu.h"
-#include "libavutil/x86/asm.h"
-#include "libavutil/x86/cpu.h"
-#include "libavcodec/blockdsp.h"
-#include "libavcodec/version.h"
-
-#if HAVE_INLINE_ASM
-
-#define CLEAR_BLOCKS(name, n)                           \
-static void name(int16_t *blocks)                       \
-{                                                       \
-    __asm__ volatile (                                  \
-        "pxor %%mm7, %%mm7              \n\t"           \
-        "mov     %1,        %%"REG_a"   \n\t"           \
-        "1:                             \n\t"           \
-        "movq %%mm7,   (%0, %%"REG_a")  \n\t"           \
-        "movq %%mm7,  8(%0, %%"REG_a")  \n\t"           \
-        "movq %%mm7, 16(%0, %%"REG_a")  \n\t"           \
-        "movq %%mm7, 24(%0, %%"REG_a")  \n\t"           \
-        "add    $32, %%"REG_a"          \n\t"           \
-        "js      1b                     \n\t"           \
-        :: "r"(((uint8_t *) blocks) + 128 * n),         \
-           "i"(-128 * n)                                \
-        : "%"REG_a);                                    \
-}
-CLEAR_BLOCKS(clear_blocks_mmx, 6)
-CLEAR_BLOCKS(clear_block_mmx, 1)
-
-static void clear_block_sse(int16_t *block)
-{
-    __asm__ volatile (
-        "xorps  %%xmm0, %%xmm0          \n"
-        "movaps %%xmm0,    (%0)         \n"
-        "movaps %%xmm0,  16(%0)         \n"
-        "movaps %%xmm0,  32(%0)         \n"
-        "movaps %%xmm0,  48(%0)         \n"
-        "movaps %%xmm0,  64(%0)         \n"
-        "movaps %%xmm0,  80(%0)         \n"
-        "movaps %%xmm0,  96(%0)         \n"
-        "movaps %%xmm0, 112(%0)         \n"
-        :: "r" (block)
-        : "memory");
-}
-
-static void clear_blocks_sse(int16_t *blocks)
-{
-    __asm__ volatile (
-        "xorps  %%xmm0, %%xmm0              \n"
-        "mov        %1,         %%"REG_a"   \n"
-        "1:                                 \n"
-        "movaps %%xmm0,    (%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  16(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  32(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  48(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  64(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  80(%0, %%"REG_a")  \n"
-        "movaps %%xmm0,  96(%0, %%"REG_a")  \n"
-        "movaps %%xmm0, 112(%0, %%"REG_a")  \n"
-        "add      $128,         %%"REG_a"   \n"
-        "js         1b                      \n"
-        :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
-        : "%"REG_a);
-}
-
-#endif /* HAVE_INLINE_ASM */
-
-#if FF_API_XVMC
-av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
-                                  AVCodecContext *avctx)
-#else
-av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
-#endif /* FF_API_XVMC */
-{
-#if HAVE_INLINE_ASM
-    int cpu_flags = av_get_cpu_flags();
-
-    if (!high_bit_depth) {
-        if (INLINE_MMX(cpu_flags)) {
-            c->clear_block  = clear_block_mmx;
-            c->clear_blocks = clear_blocks_mmx;
-        }
-
-#if FF_API_XVMC
-FF_DISABLE_DEPRECATION_WARNINGS
-    /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
-    if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
-        return;
-FF_ENABLE_DEPRECATION_WARNINGS
-#endif /* FF_API_XVMC */
-
-        if (INLINE_SSE(cpu_flags)) {
-            c->clear_block  = clear_block_sse;
-            c->clear_blocks = clear_blocks_sse;
-        }
-    }
-#endif /* HAVE_INLINE_ASM */
-}
diff --git a/libavcodec/x86/blockdsp_init.c b/libavcodec/x86/blockdsp_init.c
new file mode 100644
index 0000000..7780184
--- /dev/null
+++ b/libavcodec/x86/blockdsp_init.c
@@ -0,0 +1,60 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/internal.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/blockdsp.h"
+#include "libavcodec/version.h"
+
+void ff_clear_block_mmx(int16_t *block);
+void ff_clear_block_sse(int16_t *block);
+void ff_clear_blocks_mmx(int16_t *blocks);
+void ff_clear_blocks_sse(int16_t *blocks);
+
+#if FF_API_XVMC
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
+                                  AVCodecContext *avctx)
+#else
+av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
+#endif /* FF_API_XVMC */
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (!high_bit_depth) {
+        if (EXTERNAL_MMX(cpu_flags)) {
+            c->clear_block  = ff_clear_block_mmx;
+            c->clear_blocks = ff_clear_blocks_mmx;
+        }
+
+    /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
+    if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb)
+        return;
+
+        if (EXTERNAL_SSE(cpu_flags)) {
+            c->clear_block  = ff_clear_block_sse;
+            c->clear_blocks = ff_clear_blocks_sse;
+        }
+    }
+#endif /* HAVE_YASM */
+}
diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 17a6cb1..ec060c9 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -1,21 +1,23 @@
 ;******************************************************************************
 ;* optimized bswap buffer functions
 ;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2003-2013 Michael Niedermayer
+;* Copyright (c) 2013 Daniel Kang
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -24,6 +26,8 @@
 SECTION_RODATA
 pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
 
+cextern pb_80
+
 SECTION_TEXT
 
 ; %1 = aligned/unaligned
@@ -90,6 +94,7 @@ cglobal bswap32_buf, 3,4,3
 cglobal bswap32_buf, 3,4,5
     mov      r3, r1
 %endif
+    or       r3, r0
     and      r3, 15
     jz       .start_align
     BSWAP_LOOPS  u
diff --git a/libavcodec/x86/bswapdsp_init.c b/libavcodec/x86/bswapdsp_init.c
index ba40f2d..c042e56 100644
--- a/libavcodec/x86/bswapdsp_init.c
+++ b/libavcodec/x86/bswapdsp_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index d1701bf..3510336 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,8 +27,28 @@
 #include "libavutil/x86/asm.h"
 #include "config.h"
 
+#if   (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
+   || (                  !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)\
+   || (defined(__INTEL_COMPILER) && defined(_MSC_VER))
+#       define BROKEN_COMPILER 1
+#else
+#       define BROKEN_COMPILER 0
+#endif
+
 #if HAVE_INLINE_ASM
 
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
+#if UNCHECKED_BITSTREAM_READER
+#define END_CHECK(end) ""
+#else
+#define END_CHECK(end) \
+        "cmp    "end"       , %%"REG_c"                                 \n\t"\
+        "jge    1f                                                      \n\t"
+#endif
+
 #ifdef BROKEN_RELOCATIONS
 #define TABLES_ARG , "r"(tables)
 
@@ -73,8 +93,7 @@
         "test   "lowword"   , "lowword"                                 \n\t"\
         "jnz    2f                                                      \n\t"\
         "mov    "byte"      , %%"REG_c"                                 \n\t"\
-        "cmp    "end"       , %%"REG_c"                                 \n\t"\
-        "jge    1f                                                      \n\t"\
+        END_CHECK(end)\
         "add"OPSIZE" $2     , "byte"                                    \n\t"\
         "1:                                                             \n\t"\
         "movzwl (%%"REG_c") , "tmp"                                     \n\t"\
@@ -92,7 +111,8 @@
         "2:                                                             \n\t"
 
 #else /* BROKEN_RELOCATIONS */
-#define TABLES_ARG
+#define TABLES_ARG NAMED_CONSTRAINTS_ARRAY_ADD(ff_h264_cabac_tables)
+#define RIP_ARG
 
 #if HAVE_FAST_CMOV
 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
@@ -134,8 +154,7 @@
         "test   "lowword"   , "lowword"                                 \n\t"\
         " jnz   2f                                                      \n\t"\
         "mov    "byte"      , %%"REG_c"                                 \n\t"\
-        "cmp    "end"       , %%"REG_c"                                 \n\t"\
-        "jge    1f                                                      \n\t"\
+        END_CHECK(end)\
         "add"OPSIZE" $2     , "byte"                                    \n\t"\
         "1:                                                             \n\t"\
         "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
@@ -154,8 +173,7 @@
 
 #endif /* BROKEN_RELOCATIONS */
 
-
-#if HAVE_7REGS
+#if HAVE_7REGS && !BROKEN_COMPILER
 #define get_cabac_inline get_cabac_inline_x86
 static av_always_inline int get_cabac_inline_x86(CABACContext *c,
                                                  uint8_t *const state)
@@ -167,6 +185,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
     __asm__ volatile(
         "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
         : "=&r"(tables)
+        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
     );
 #endif
 
@@ -178,17 +197,19 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
                              AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
                              AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
                              "%8")
-        : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
+        : "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
         : "r"(state), "r"(c),
           "i"(offsetof(CABACContext, bytestream)),
           "i"(offsetof(CABACContext, bytestream_end))
           TABLES_ARG
+          ,"1"(c->low), "2"(c->range)
         : "%"REG_c, "memory"
     );
     return bit & 1;
 }
-#endif /* HAVE_7REGS */
+#endif /* HAVE_7REGS && !BROKEN_COMPILER */
 
+#if !BROKEN_COMPILER
 #define get_cabac_bypass_sign get_cabac_bypass_sign_x86
 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
 {
@@ -199,7 +220,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
         "shl             $17, %k1       \n\t"
         "add           %%eax, %%eax     \n\t"
         "sub             %k1, %%eax     \n\t"
-        "cltd                           \n\t"
+        "cdq                            \n\t"
         "and           %%edx, %k1       \n\t"
         "add             %k1, %%eax     \n\t"
         "xor           %%edx, %%ecx     \n\t"
@@ -211,10 +232,16 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
         "movzwl         (%1), %%edx     \n\t"
         "bswap         %%edx            \n\t"
         "shrl            $15, %%edx     \n\t"
+#if UNCHECKED_BITSTREAM_READER
+        "add              $2, %1        \n\t"
+        "addl          %%edx, %%eax     \n\t"
+        "mov              %1, %c4(%2)   \n\t"
+#else
         "addl          %%edx, %%eax     \n\t"
         "cmp         %c5(%2), %1        \n\t"
         "jge              1f            \n\t"
         "add"OPSIZE"      $2, %c4(%2)   \n\t"
+#endif
         "1:                             \n\t"
         "movl          %%eax, %c3(%2)   \n\t"
 
@@ -240,7 +267,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
         "shl             $17, %k1       \n\t"
         "add           %%eax, %%eax     \n\t"
         "sub             %k1, %%eax     \n\t"
-        "cltd                           \n\t"
+        "cdq                            \n\t"
         "and           %%edx, %k1       \n\t"
         "add             %k1, %%eax     \n\t"
         "inc           %%edx            \n\t"
@@ -268,6 +295,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
     );
     return res;
 }
+#endif /* !BROKEN_COMPILER */
 
 #endif /* HAVE_INLINE_ASM */
 #endif /* AVCODEC_X86_CABAC_H */
diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index b323a10..d155fb2 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -5,20 +5,20 @@
  * MMX-optimized DSP functions, based on H.264 optimizations by
  * Michael Niedermayer and Loren Merritt
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -212,10 +212,10 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
  ****************************************************************************/
 
 /* vertical filter [-1 -2 96 42 -7  0]  */
-#define QPEL_CAVSV1(A,B,C,D,E,F,OP,MUL2) \
+#define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
         "movd (%0), "#F"            \n\t"\
         "movq "#C", %%mm6           \n\t"\
-        "pmullw %5, %%mm6           \n\t"\
+        "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
         "movq "#D", %%mm7           \n\t"\
         "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
         "psllw $3, "#E"             \n\t"\
@@ -230,35 +230,35 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
         "psubw "#B", %%mm6          \n\t"\
         "psraw $1, "#B"             \n\t"\
         "psubw "#A", %%mm6          \n\t"\
-        "paddw %4, %%mm6            \n\t"\
+        "paddw "MANGLE(ADD)", %%mm6 \n\t"\
         "psraw $7, %%mm6            \n\t"\
         "packuswb %%mm6, %%mm6      \n\t"\
         OP(%%mm6, (%1), A, d)            \
         "add %3, %1                 \n\t"
 
 /* vertical filter [ 0 -1  5  5 -1  0]  */
-#define QPEL_CAVSV2(A,B,C,D,E,F,OP,MUL2) \
+#define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
         "movd (%0), "#F"            \n\t"\
         "movq "#C", %%mm6           \n\t"\
         "paddw "#D", %%mm6          \n\t"\
-        "pmullw %5, %%mm6           \n\t"\
+        "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
         "add %2, %0                 \n\t"\
         "punpcklbw %%mm7, "#F"      \n\t"\
         "psubw "#B", %%mm6          \n\t"\
         "psubw "#E", %%mm6          \n\t"\
-        "paddw %4, %%mm6            \n\t"\
+        "paddw "MANGLE(ADD)", %%mm6 \n\t"\
         "psraw $3, %%mm6            \n\t"\
         "packuswb %%mm6, %%mm6      \n\t"\
         OP(%%mm6, (%1), A, d)            \
         "add %3, %1                 \n\t"
 
 /* vertical filter [ 0 -7 42 96 -2 -1]  */
-#define QPEL_CAVSV3(A,B,C,D,E,F,OP,MUL2) \
+#define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
         "movd (%0), "#F"            \n\t"\
         "movq "#C", %%mm6           \n\t"\
         "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
         "movq "#D", %%mm7           \n\t"\
-        "pmullw %5, %%mm7           \n\t"\
+        "pmullw "MANGLE(MUL1)", %%mm7\n\t"\
         "psllw $3, "#B"             \n\t"\
         "psubw "#B", %%mm6          \n\t"\
         "psraw $3, "#B"             \n\t"\
@@ -271,7 +271,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
         "psubw "#E", %%mm6          \n\t"\
         "psraw $1, "#E"             \n\t"\
         "psubw "#F", %%mm6          \n\t"\
-        "paddw %4, %%mm6            \n\t"\
+        "paddw "MANGLE(ADD)", %%mm6 \n\t"\
         "psraw $7, %%mm6            \n\t"\
         "packuswb %%mm6, %%mm6      \n\t"\
         OP(%%mm6, (%1), A, d)            \
@@ -300,32 +300,34 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
         "punpcklbw %%mm7, %%mm2     \n\t"\
         "punpcklbw %%mm7, %%mm3     \n\t"\
         "punpcklbw %%mm7, %%mm4     \n\t"\
-        VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
-        VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
-        VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
-        VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
-        VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\
-        VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\
-        VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
-        VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
+        VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
+        VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
+        VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
+        VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
+        VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
+        VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
+        VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
+        VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
         \
         : "+a"(src), "+c"(dst)\
-        : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1)\
+        : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
+          NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
         : "memory"\
      );\
      if(h==16){\
         __asm__ volatile(\
-            VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
-            VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
-            VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, MUL2)\
-            VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, MUL2)\
-            VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, MUL2)\
-            VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, MUL2)\
-            VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, MUL2)\
-            VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, MUL2)\
+            VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
+            VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
+            VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
+            VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
+            VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
+            VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
+            VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
+            VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
             \
            : "+a"(src), "+c"(dst)\
-           : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD),  "m"(MUL1)\
+           : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
+             NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
            : "memory"\
         );\
      }\
@@ -338,7 +340,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, int
     int h=8;\
     __asm__ volatile(\
         "pxor %%mm7, %%mm7          \n\t"\
-        "movq %5, %%mm6             \n\t"\
+        "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
         "1:                         \n\t"\
         "movq    (%0), %%mm0        \n\t"\
         "movq   1(%0), %%mm2        \n\t"\
@@ -364,7 +366,7 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, int
         "paddw %%mm3, %%mm5         \n\t"\
         "psubw %%mm2, %%mm0         \n\t"\
         "psubw %%mm5, %%mm1         \n\t"\
-        "movq %6, %%mm5             \n\t"\
+        "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\
         "paddw %%mm5, %%mm0         \n\t"\
         "paddw %%mm5, %%mm1         \n\t"\
         "psraw $3, %%mm0            \n\t"\
@@ -376,7 +378,8 @@ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, int
         "decl %2                    \n\t"\
         " jnz 1b                    \n\t"\
         : "+a"(src), "+c"(dst), "+m"(h)\
-        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
+        : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
+          NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\
         : "memory"\
     );\
 }\
@@ -386,7 +389,7 @@ static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8
 }\
 \
 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h){\
-  QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5)         \
+  QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42)        \
 }\
 \
 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h){\
@@ -459,7 +462,7 @@ static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uin
 
 #endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
 
-#if HAVE_MMX_INLINE
+#if HAVE_MMX_EXTERNAL
 static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
                                     ptrdiff_t stride)
 {
@@ -472,6 +475,12 @@ static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
     ff_avg_pixels8_mmx(dst, src, stride, 8);
 }
 
+static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride)
+{
+    ff_avg_pixels8_mmxext(dst, src, stride, 8);
+}
+
 static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
                                      ptrdiff_t stride)
 {
@@ -484,18 +493,40 @@ static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
     ff_avg_pixels16_mmx(dst, src, stride, 16);
 }
 
+static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, const uint8_t *src,
+                                        ptrdiff_t stride)
+{
+    ff_avg_pixels16_mmxext(dst, src, stride, 16);
+}
+
+static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
+                                      ptrdiff_t stride)
+{
+    ff_put_pixels16_sse2(dst, src, stride, 16);
+}
+
+static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
+                                      ptrdiff_t stride)
+{
+    ff_avg_pixels16_sse2(dst, src, stride, 16);
+}
+#endif
+
 static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c,
                                      AVCodecContext *avctx)
 {
+#if HAVE_MMX_EXTERNAL
     c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx;
     c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
     c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx;
     c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx;
+#endif
 
+#if HAVE_MMX_INLINE
     c->cavs_idct8_add = cavs_idct8_add_mmx;
     c->idct_perm      = FF_IDCT_PERM_TRANSPOSE;
-}
 #endif /* HAVE_MMX_INLINE */
+}
 
 #define DSPFUNC(PFX, IDX, NUM, EXT)                                                       \
     c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \
@@ -511,15 +542,6 @@ CAVS_MC(put_,  8, mmxext)
 CAVS_MC(put_, 16, mmxext)
 CAVS_MC(avg_,  8, mmxext)
 CAVS_MC(avg_, 16, mmxext)
-
-static av_cold void cavsdsp_init_mmxext(CAVSDSPContext *c,
-                                        AVCodecContext *avctx)
-{
-    DSPFUNC(put, 0, 16, mmxext);
-    DSPFUNC(put, 1,  8, mmxext);
-    DSPFUNC(avg, 0, 16, mmxext);
-    DSPFUNC(avg, 1,  8, mmxext);
-}
 #endif /* HAVE_MMXEXT_INLINE */
 
 #if HAVE_AMD3DNOW_INLINE
@@ -543,18 +565,31 @@ static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c,
 
 av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
 {
-#if HAVE_MMX_INLINE
     int cpu_flags = av_get_cpu_flags();
 
-    if (INLINE_MMX(cpu_flags))
-        cavsdsp_init_mmx(c, avctx);
-#endif /* HAVE_MMX_INLINE */
+    cavsdsp_init_mmx(c, avctx);
 #if HAVE_AMD3DNOW_INLINE
     if (INLINE_AMD3DNOW(cpu_flags))
         cavsdsp_init_3dnow(c, avctx);
 #endif /* HAVE_AMD3DNOW_INLINE */
 #if HAVE_MMXEXT_INLINE
-    if (INLINE_MMXEXT(cpu_flags))
-        cavsdsp_init_mmxext(c, avctx);
-#endif /* HAVE_MMXEXT_INLINE */
+    if (INLINE_MMXEXT(cpu_flags)) {
+        DSPFUNC(put, 0, 16, mmxext);
+        DSPFUNC(put, 1,  8, mmxext);
+        DSPFUNC(avg, 0, 16, mmxext);
+        DSPFUNC(avg, 1,  8, mmxext);
+    }
+#endif
+#if HAVE_MMX_EXTERNAL
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext;
+        c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
+    }
+#endif
+#if HAVE_SSE2_EXTERNAL
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
+        c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
+    }
+#endif
 }
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index 5b8d1b2..7608bb3 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -1,20 +1,20 @@
 /*
- * MMX/SSE constants used across x86 dsp optimizations.
+ * MMX/SSE/AVX constants used across x86 dsp optimizations.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -47,7 +47,9 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_512)  = { 0x0200020002000200ULL, 0x020
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019) = { 0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL };
 
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_0)    = { 0x0000000000000000ULL, 0x0000000000000000ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_1)    = { 0x0101010101010101ULL, 0x0101010101010101ULL };
-DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x0303030303030303ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_1)    = { 0x0101010101010101ULL, 0x0101010101010101ULL,
+                                                    0x0101010101010101ULL, 0x0101010101010101ULL };
+DECLARE_ALIGNED(32, const ymm_reg,  ff_pb_3)    = { 0x0303030303030303ULL, 0x0303030303030303ULL,
+                                                    0x0303030303030303ULL, 0x0303030303030303ULL };
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_80)   = { 0x8080808080808080ULL, 0x8080808080808080ULL };
 DECLARE_ALIGNED(8,  const uint64_t, ff_pb_FC)   =   0xFCFCFCFCFCFCFCFCULL;
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index f38fbe3..3ebf171 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -1,20 +1,20 @@
 /*
  * MMX/SSE constants used across x86 dsp optimizations.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@ extern const xmm_reg  ff_pw_3;
 extern const xmm_reg  ff_pw_4;
 extern const xmm_reg  ff_pw_5;
 extern const xmm_reg  ff_pw_8;
+extern const xmm_reg  ff_pw_9;
 extern const uint64_t ff_pw_15;
 extern const xmm_reg  ff_pw_16;
 extern const xmm_reg  ff_pw_18;
@@ -43,8 +44,9 @@ extern const uint64_t ff_pw_96;
 extern const uint64_t ff_pw_128;
 extern const uint64_t ff_pw_255;
 
-extern const xmm_reg  ff_pb_1;
-extern const xmm_reg  ff_pb_3;
+extern const ymm_reg  ff_pb_1;
+extern const ymm_reg  ff_pb_3;
+extern const xmm_reg  ff_pb_80;
 extern const xmm_reg  ff_pb_F8;
 extern const uint64_t ff_pb_FC;
 
diff --git a/libavcodec/x86/dca.h b/libavcodec/x86/dca.h
index 11d45ae..c9be50d 100644
--- a/libavcodec/x86/dca.h
+++ b/libavcodec/x86/dca.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index c42ee23..1ac2378 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -2,20 +2,20 @@
 ;* SSE-optimized functions for the DCA decoder
 ;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -132,11 +132,16 @@ DECODE_HF
     mulps       va, %2
     mulps       vb, %2
 %if %0 == 3
+%if cpuflag(fma3)
+    fmaddps     va, m4, %3, va
+    fmaddps     vb, m0, %3, vb
+%else
     mulps       m4, %3
     mulps       m0, %3
     addps       va, m4
     addps       vb, m0
 %endif
+%endif
     ; va = va1 va2 va3 va4
     ; vb = vb1 vb2 vb3 vb4
 %if %1
@@ -148,7 +153,7 @@ DECODE_HF
     addps       m4, va ; va1+3 vb1+3 va2+4 vb2+4
     movhlps     vb, m4 ; va1+3  vb1+3
     addps       vb, m4 ; va0..4 vb0..4
-    movh    [outq + count], vb
+    movlps  [outq + count], vb
 %if %1
     sub       cf0q, 8*NUM_COEF
 %endif
@@ -198,6 +203,10 @@ cglobal dca_lfe_fir%1, 3,3,6-%1, out, in, cf0
 INIT_XMM sse
 DCA_LFE_FIR 0
 DCA_LFE_FIR 1
+%if HAVE_FMA3_EXTERNAL
+INIT_XMM fma3
+DCA_LFE_FIR 0
+%endif
 
 %macro SETZERO 1
 %if cpuflag(sse2) && notcpuflag(avx)
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 9acb818..bb86c26 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,7 @@ void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS
                        int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
 void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
 void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
+void ff_dca_lfe_fir0_fma3(float *out, const float *in, const float *coefs);
 
 av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
 {
@@ -54,6 +55,10 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
     if (EXTERNAL_SSE4(cpu_flags)) {
         s->decode_hf = ff_decode_hf_sse4;
     }
+
+    if (EXTERNAL_FMA3(cpu_flags)) {
+        s->lfe_fir[0]        = ff_dca_lfe_fir0_fma3;
+    }
 }
 
 
diff --git a/libavcodec/x86/dct-test.c b/libavcodec/x86/dct-test.c
index d97c53c..3ade1f3 100644
--- a/libavcodec/x86/dct-test.c
+++ b/libavcodec/x86/dct-test.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -22,6 +22,27 @@
 #include "idct_xvid.h"
 #include "simple_idct.h"
 
+#if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
+void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
+                                int16_t *block, int16_t *qmat);
+
+static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
+    DECLARE_ALIGNED(16, static int16_t, qmat)[64];
+    DECLARE_ALIGNED(16, static int16_t, tmp)[64];
+    int i;
+
+    for(i=0; i<64; i++){
+        qmat[i]=4;
+        tmp[i]= dst[i];
+    }
+    ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);
+
+    for(i=0; i<64; i++) {
+         dst[i] -= 512;
+    }
+}
+#endif
+
 static const struct algo fdct_tab_arch[] = {
 #if HAVE_MMX_INLINE
     { "MMX",    ff_fdct_mmx,    FF_IDCT_PERM_NONE, AV_CPU_FLAG_MMX },
@@ -48,6 +69,9 @@ static const struct algo idct_tab_arch[] = {
 #endif
 #if HAVE_SSE2_INLINE
     { "XVID-SSE2",   ff_idct_xvid_sse2,   FF_IDCT_PERM_SSE2,   AV_CPU_FLAG_SSE2,   1 },
+#if ARCH_X86_64 && HAVE_YASM
+    { "PR-SSE2",     ff_prores_idct_put_10_sse2_wrap, FF_IDCT_PERM_TRANSPOSE, AV_CPU_FLAG_SSE2, 1 },
+#endif
 #endif
 #endif /* CONFIG_MPEG4_DECODER */
     { 0 }
diff --git a/libavcodec/x86/dct32.asm b/libavcodec/x86/dct32.asm
index 9c147b9..c70f6c9 100644
--- a/libavcodec/x86/dct32.asm
+++ b/libavcodec/x86/dct32.asm
@@ -2,20 +2,20 @@
 ;* 32 point SSE-optimized DCT transform
 ;* Copyright (c) 2010 Vitor Sessak
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -192,6 +192,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
 
 INIT_YMM avx
 SECTION_TEXT
+%if HAVE_AVX_EXTERNAL
 ; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
 cglobal dct32_float, 2,3,8, out, in, tmp
     ; pass 1
@@ -264,6 +265,7 @@ cglobal dct32_float, 2,3,8, out, in, tmp
 INIT_XMM
     PASS6_AND_PERMUTE
     RET
+%endif
 
 %if ARCH_X86_64
 %define SPILL SWAP
@@ -482,7 +484,9 @@ cglobal dct32_float, 2, 3, 16, out, in, tmp
 %endif
 %endmacro
 
+%if ARCH_X86_32
 INIT_XMM sse
 DCT32_FUNC
+%endif
 INIT_XMM sse2
 DCT32_FUNC
diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c
index 7bda5e8..30c8f12 100644
--- a/libavcodec/x86/dct_init.c
+++ b/libavcodec/x86/dct_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,7 +30,7 @@ av_cold void ff_dct_init_x86(DCTContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (EXTERNAL_SSE(cpu_flags))
+    if (ARCH_X86_32 && EXTERNAL_SSE(cpu_flags))
         s->dct32 = ff_dct32_float_sse;
     if (EXTERNAL_SSE2(cpu_flags))
         s->dct32 = ff_dct32_float_sse2;
diff --git a/libavcodec/x86/deinterlace.asm b/libavcodec/x86/deinterlace.asm
index 70d000e..baa9249 100644
--- a/libavcodec/x86/deinterlace.asm
+++ b/libavcodec/x86/deinterlace.asm
@@ -3,20 +3,20 @@
 ;* Copyright (c) 2010 Vitor Sessak
 ;* Copyright (c) 2002 Michael Niedermayer
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/dirac_dwt.c b/libavcodec/x86/dirac_dwt.c
new file mode 100644
index 0000000..3c51ea6
--- /dev/null
+++ b/libavcodec/x86/dirac_dwt.c
@@ -0,0 +1,202 @@
+/*
+ * MMX optimized discrete wavelet transform
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "dirac_dwt.h"
+
+#define COMPOSE_VERTICAL(ext, align) \
+void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
+void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
+void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
+void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
+void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
+void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
+void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
+\
+static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
+{ \
+    int i, width_align = width&~(align-1); \
+\
+    for(i=width_align; i<width; i++) \
+        b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
+\
+    ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
+} \
+\
+static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
+{ \
+    int i, width_align = width&~(align-1); \
+\
+    for(i=width_align; i<width; i++) \
+        b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
+\
+    ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
+} \
+\
+static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
+                                           IDWTELEM *b3, IDWTELEM *b4, int width) \
+{ \
+    int i, width_align = width&~(align-1); \
+\
+    for(i=width_align; i<width; i++) \
+        b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
+\
+    ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
+} \
+\
+static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
+                                          IDWTELEM *b3, IDWTELEM *b4, int width) \
+{ \
+    int i, width_align = width&~(align-1); \
+\
+    for(i=width_align; i<width; i++) \
+        b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
+\
+    ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
+} \
+static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
+{ \
+    int i, width_align = width&~(align-1); \
+\
+    for(i=width_align; i<width; i++) { \
+        b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
+        b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
+    } \
+\
+    ff_vertical_compose_haar##ext(b0, b1, width_align); \
+} \
+static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+{\
+    int w2= w>>1;\
+    int x= w2 - (w2&(align-1));\
+    ff_horizontal_compose_haar0i##ext(b, tmp, w);\
+\
+    for (; x < w2; x++) {\
+        b[2*x  ] = tmp[x];\
+        b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
+    }\
+}\
+static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+{\
+    int w2= w>>1;\
+    int x= w2 - (w2&(align-1));\
+    ff_horizontal_compose_haar1i##ext(b, tmp, w);\
+\
+    for (; x < w2; x++) {\
+        b[2*x  ] = (tmp[x] + 1)>>1;\
+        b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
+    }\
+}\
+\
+
+#if HAVE_YASM
+#if !ARCH_X86_64
+COMPOSE_VERTICAL(_mmx, 4)
+#endif
+COMPOSE_VERTICAL(_sse2, 8)
+
+
+void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
+
+static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
+{
+    int w2= w>>1;
+    int x= w2 - (w2&7);
+    ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
+
+    for (; x < w2; x++) {
+        b[2*x  ] = (tmp[x] + 1)>>1;
+        b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+    }
+}
+#endif
+
+void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
+{
+#if HAVE_YASM
+  int mm_flags = av_get_cpu_flags();
+
+#if !ARCH_X86_64
+    if (!(mm_flags & AV_CPU_FLAG_MMX))
+        return;
+
+    switch (type) {
+    case DWT_DIRAC_DD9_7:
+        d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
+        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
+        break;
+    case DWT_DIRAC_LEGALL5_3:
+        d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
+        d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx;
+        break;
+    case DWT_DIRAC_DD13_7:
+        d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx;
+        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
+        break;
+    case DWT_DIRAC_HAAR0:
+        d->vertical_compose   = (void*)vertical_compose_haar_mmx;
+        d->horizontal_compose = horizontal_compose_haar0i_mmx;
+        break;
+    case DWT_DIRAC_HAAR1:
+        d->vertical_compose   = (void*)vertical_compose_haar_mmx;
+        d->horizontal_compose = horizontal_compose_haar1i_mmx;
+        break;
+    }
+#endif
+
+    if (!(mm_flags & AV_CPU_FLAG_SSE2))
+        return;
+
+    switch (type) {
+    case DWT_DIRAC_DD9_7:
+        d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
+        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
+        break;
+    case DWT_DIRAC_LEGALL5_3:
+        d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
+        d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2;
+        break;
+    case DWT_DIRAC_DD13_7:
+        d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2;
+        d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
+        break;
+    case DWT_DIRAC_HAAR0:
+        d->vertical_compose   = (void*)vertical_compose_haar_sse2;
+        d->horizontal_compose = horizontal_compose_haar0i_sse2;
+        break;
+    case DWT_DIRAC_HAAR1:
+        d->vertical_compose   = (void*)vertical_compose_haar_sse2;
+        d->horizontal_compose = horizontal_compose_haar1i_sse2;
+        break;
+    }
+
+    if (!(mm_flags & AV_CPU_FLAG_SSSE3))
+        return;
+
+    switch (type) {
+    case DWT_DIRAC_DD9_7:
+        d->horizontal_compose = horizontal_compose_dd97i_ssse3;
+        break;
+    }
+#endif // HAVE_YASM
+}
diff --git a/libavcodec/x86/dirac_dwt.h b/libavcodec/x86/dirac_dwt.h
new file mode 100644
index 0000000..126b290
--- /dev/null
+++ b/libavcodec/x86/dirac_dwt.h
@@ -0,0 +1,30 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_DIRAC_DWT_H
+#define AVCODEC_X86_DIRAC_DWT_H
+
+#include "libavcodec/dirac_dwt.h"
+
+void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+void ff_horizontal_compose_haar1i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+void ff_horizontal_compose_haar0i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+
+void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type);
+
+#endif
diff --git a/libavcodec/x86/diracdsp_mmx.c b/libavcodec/x86/diracdsp_mmx.c
new file mode 100644
index 0000000..11df5e3
--- /dev/null
+++ b/libavcodec/x86/diracdsp_mmx.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/cpu.h"
+#include "diracdsp_mmx.h"
+#include "fpel.h"
+
+void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+
+#define HPEL_FILTER(MMSIZE, EXT)                                                             \
+    void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int);               \
+    void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int);                    \
+                                                                                             \
+    static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,       \
+                                          const uint8_t *src, int stride, int width, int height)   \
+    {                                                                                        \
+        while( height-- )                                                                    \
+        {                                                                                    \
+            ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
+            ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width);                                \
+            ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width);                               \
+                                                                                             \
+            dsth += stride;                                                                  \
+            dstv += stride;                                                                  \
+            dstc += stride;                                                                  \
+            src  += stride;                                                                  \
+        }                                                                                    \
+    }
+
+#if !ARCH_X86_64
+HPEL_FILTER(8, mmx)
+#endif
+HPEL_FILTER(16, sse2)
+
+#define PIXFUNC(PFX, IDX, EXT)                                                   \
+    /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/  \
+    c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \
+    c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT
+
+#define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
+void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    if (h&3)\
+        ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\
+    else\
+        OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    if (h&3)\
+        ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\
+    else\
+        OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    if (h&3) {\
+        ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\
+    } else {\
+        OPNAME ## _pixels16_ ## EXT(dst   , src[0]   , stride, h);\
+        OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
+    }\
+}
+
+DIRAC_PIXOP(put, ff_put, mmx)
+DIRAC_PIXOP(avg, ff_avg, mmx)
+DIRAC_PIXOP(avg, ff_avg, mmxext)
+
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+    if (h&3)
+        ff_put_dirac_pixels16_c(dst, src, stride, h);
+    else
+    ff_put_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+    if (h&3)
+        ff_avg_dirac_pixels16_c(dst, src, stride, h);
+    else
+    ff_avg_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+    if (h&3) {
+        ff_put_dirac_pixels32_c(dst, src, stride, h);
+    } else {
+    ff_put_pixels16_sse2(dst   , src[0]   , stride, h);
+    ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
+    }
+}
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+    if (h&3) {
+        ff_avg_dirac_pixels32_c(dst, src, stride, h);
+    } else {
+    ff_avg_pixels16_sse2(dst   , src[0]   , stride, h);
+    ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
+    }
+}
+
+void ff_diracdsp_init_mmx(DiracDSPContext* c)
+{
+    int mm_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(mm_flags)) {
+    c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
+#if !ARCH_X86_64
+    c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
+    c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
+    c->dirac_hpel_filter = dirac_hpel_filter_mmx;
+    c->add_rect_clamped = ff_add_rect_clamped_mmx;
+    c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx;
+#endif
+    PIXFUNC(put, 0, mmx);
+    PIXFUNC(avg, 0, mmx);
+    }
+
+    if (EXTERNAL_MMXEXT(mm_flags)) {
+        PIXFUNC(avg, 0, mmxext);
+    }
+
+    if (EXTERNAL_SSE2(mm_flags)) {
+        c->dirac_hpel_filter = dirac_hpel_filter_sse2;
+        c->add_rect_clamped = ff_add_rect_clamped_sse2;
+        c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2;
+
+        c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
+        c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
+
+        c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
+        c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
+        c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
+        c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
+    }
+}
diff --git a/libavcodec/x86/diracdsp_mmx.h b/libavcodec/x86/diracdsp_mmx.h
new file mode 100644
index 0000000..8985854
--- /dev/null
+++ b/libavcodec/x86/diracdsp_mmx.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_DIRACDSP_H
+#define AVCODEC_X86_DIRACDSP_H
+
+#include "libavcodec/diracdsp.h"
+
+void ff_diracdsp_init_mmx(DiracDSPContext* c);
+
+DECL_DIRAC_PIXOP(put, mmx);
+DECL_DIRAC_PIXOP(avg, mmx);
+DECL_DIRAC_PIXOP(avg, mmxext);
+
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+
+void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
+void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
+
+void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
+void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
+#endif
diff --git a/libavcodec/x86/diracdsp_yasm.asm b/libavcodec/x86/diracdsp_yasm.asm
new file mode 100644
index 0000000..3e9765b
--- /dev/null
+++ b/libavcodec/x86/diracdsp_yasm.asm
@@ -0,0 +1,264 @@
+;******************************************************************************
+;* Copyright (c) 2010 David Conrad
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+pw_3: times 8 dw 3
+pw_7: times 8 dw 7
+pw_16: times 8 dw 16
+pw_32: times 8 dw 32
+pb_128: times 16 db 128
+
+section .text
+
+%macro UNPACK_ADD 6
+    mov%5   %1, %3
+    mov%6   m5, %4
+    mova    m4, %1
+    mova    %2, m5
+    punpcklbw %1, m7
+    punpcklbw m5, m7
+    punpckhbw m4, m7
+    punpckhbw %2, m7
+    paddw   %1, m5
+    paddw   %2, m4
+%endmacro
+
+%macro HPEL_FILTER 1
+; dirac_hpel_filter_v_sse2(uint8_t *dst, uint8_t *src, int stride, int width);
+cglobal dirac_hpel_filter_v_%1, 4,6,8, dst, src, stride, width, src0, stridex3
+    mov     src0q, srcq
+    lea     stridex3q, [3*strideq]
+    sub     src0q, stridex3q
+    pxor    m7, m7
+.loop:
+    ; 7*(src[0] + src[1])
+    UNPACK_ADD m0, m1, [srcq], [srcq + strideq], a,a
+    pmullw  m0, [pw_7]
+    pmullw  m1, [pw_7]
+
+    ; 3*( ... + src[-2] + src[3])
+    UNPACK_ADD m2, m3, [src0q + strideq], [srcq + stridex3q], a,a
+    paddw   m0, m2
+    paddw   m1, m3
+    pmullw  m0, [pw_3]
+    pmullw  m1, [pw_3]
+
+    ; ... - 7*(src[-1] + src[2])
+    UNPACK_ADD m2, m3, [src0q + strideq*2], [srcq + strideq*2], a,a
+    pmullw  m2, [pw_7]
+    pmullw  m3, [pw_7]
+    psubw   m0, m2
+    psubw   m1, m3
+
+    ; ... - (src[-3] + src[4])
+    UNPACK_ADD m2, m3, [src0q], [srcq + strideq*4], a,a
+    psubw   m0, m2
+    psubw   m1, m3
+
+    paddw   m0, [pw_16]
+    paddw   m1, [pw_16]
+    psraw   m0, 5
+    psraw   m1, 5
+    packuswb m0, m1
+    mova    [dstq], m0
+    add     dstq, mmsize
+    add     srcq, mmsize
+    add     src0q, mmsize
+    sub     widthd, mmsize
+    jg      .loop
+    RET
+
+; dirac_hpel_filter_h_sse2(uint8_t *dst, uint8_t *src, int width);
+cglobal dirac_hpel_filter_h_%1, 3,3,8, dst, src, width
+    dec     widthd
+    pxor    m7, m7
+    and     widthd, ~(mmsize-1)
+.loop:
+    ; 7*(src[0] + src[1])
+    UNPACK_ADD m0, m1, [srcq + widthq], [srcq + widthq + 1], u,u
+    pmullw  m0, [pw_7]
+    pmullw  m1, [pw_7]
+
+    ; 3*( ... + src[-2] + src[3])
+    UNPACK_ADD m2, m3, [srcq + widthq - 2], [srcq + widthq + 3], u,u
+    paddw   m0, m2
+    paddw   m1, m3
+    pmullw  m0, [pw_3]
+    pmullw  m1, [pw_3]
+
+    ; ... - 7*(src[-1] + src[2])
+    UNPACK_ADD m2, m3, [srcq + widthq - 1], [srcq + widthq + 2], u,u
+    pmullw  m2, [pw_7]
+    pmullw  m3, [pw_7]
+    psubw   m0, m2
+    psubw   m1, m3
+
+    ; ... - (src[-3] + src[4])
+    UNPACK_ADD m2, m3, [srcq + widthq - 3], [srcq + widthq + 4], u,u
+    psubw   m0, m2
+    psubw   m1, m3
+
+    paddw   m0, [pw_16]
+    paddw   m1, [pw_16]
+    psraw   m0, 5
+    psraw   m1, 5
+    packuswb m0, m1
+    mova    [dstq + widthq], m0
+    sub     widthd, mmsize
+    jge     .loop
+    RET
+%endmacro
+
+%macro PUT_RECT 1
+; void put_rect_clamped(uint8_t *dst, int dst_stride, int16_t *src, int src_stride, int width, int height)
+cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w, dst2, src2
+    mova    m0, [pb_128]
+    add     wd, (mmsize-1)
+    and     wd, ~(mmsize-1)
+
+%if ARCH_X86_64
+    movsxd   dst_strideq, dst_strided
+    movsxd   src_strideq, src_strided
+    mov   r7d, r5m
+    mov   r8d, wd
+    %define wspill r8d
+    %define hd r7d
+%else
+    mov    r4m, wd
+    %define wspill r4m
+    %define hd r5mp
+%endif
+
+.loopy
+    lea     src2q, [srcq+src_strideq*2]
+    lea     dst2q, [dstq+dst_strideq]
+.loopx:
+    sub      wd, mmsize
+    mova     m1, [srcq +2*wq]
+    mova     m2, [src2q+2*wq]
+    packsswb m1, [srcq +2*wq+mmsize]
+    packsswb m2, [src2q+2*wq+mmsize]
+    paddb    m1, m0
+    paddb    m2, m0
+    mova    [dstq +wq], m1
+    mova    [dst2q+wq], m2
+    jg      .loopx
+
+    lea   srcq, [srcq+src_strideq*4]
+    lea   dstq, [dstq+dst_strideq*2]
+    sub     hd, 2
+    mov     wd, wspill
+    jg      .loopy
+    RET
+%endm
+
+%macro ADD_RECT 1
+; void add_rect_clamped(uint8_t *dst, uint16_t *src, int stride, int16_t *idwt, int idwt_stride, int width, int height)
+cglobal add_rect_clamped_%1, 7,9,3, dst, src, stride, idwt, idwt_stride, w, h
+    mova    m0, [pw_32]
+    add     wd, (mmsize-1)
+    and     wd, ~(mmsize-1)
+
+%if ARCH_X86_64
+    movsxd   strideq, strided
+    movsxd   idwt_strideq, idwt_strided
+    mov   r8d, wd
+    %define wspill r8d
+%else
+    mov    r5m, wd
+    %define wspill r5m
+%endif
+
+.loop:
+    sub     wd, mmsize
+    movu    m1, [srcq +2*wq] ; FIXME: ensure alignment
+    paddw   m1, m0
+    psraw   m1, 6
+    movu    m2, [srcq +2*wq+mmsize] ; FIXME: ensure alignment
+    paddw   m2, m0
+    psraw   m2, 6
+    paddw   m1, [idwtq+2*wq]
+    paddw   m2, [idwtq+2*wq+mmsize]
+    packuswb m1, m2
+    mova    [dstq +wq], m1
+    jg      .loop
+
+    lea   srcq, [srcq + 2*strideq]
+    add   dstq, strideq
+    lea  idwtq, [idwtq+ 2*idwt_strideq]
+    sub     hd, 1
+    mov     wd, wspill
+    jg      .loop
+    RET
+%endm
+
+%macro ADD_OBMC 2
+; void add_obmc(uint16_t *dst, uint8_t *src, int stride, uint8_t *obmc_weight, int yblen)
+cglobal add_dirac_obmc%1_%2, 6,6,5, dst, src, stride, obmc, yblen
+    pxor        m4, m4
+.loop:
+%assign i 0
+%rep %1 / mmsize
+    mova        m0, [srcq+i]
+    mova        m1, m0
+    punpcklbw   m0, m4
+    punpckhbw   m1, m4
+    mova        m2, [obmcq+i]
+    mova        m3, m2
+   punpcklbw   m2, m4
+    punpckhbw   m3, m4
+    pmullw      m0, m2
+    pmullw      m1, m3
+    movu        m2, [dstq+2*i]
+    movu        m3, [dstq+2*i+mmsize]
+    paddw       m0, m2
+    paddw       m1, m3
+    movu        [dstq+2*i], m0
+    movu        [dstq+2*i+mmsize], m1
+%assign i i+mmsize
+%endrep
+    lea         srcq, [srcq+strideq]
+    lea         dstq, [dstq+2*strideq]
+    add         obmcq, 32
+    sub         yblend, 1
+    jg          .loop
+    RET
+%endm
+
+INIT_MMX
+%if ARCH_X86_64 == 0
+PUT_RECT mmx
+ADD_RECT mmx
+
+HPEL_FILTER mmx
+ADD_OBMC 32, mmx
+ADD_OBMC 16, mmx
+%endif
+ADD_OBMC 8, mmx
+
+INIT_XMM
+PUT_RECT sse2
+ADD_RECT sse2
+
+HPEL_FILTER sse2
+ADD_OBMC 32, sse2
+ADD_OBMC 16, sse2
diff --git a/libavcodec/x86/dnxhdenc.asm b/libavcodec/x86/dnxhdenc.asm
index d39b07b..9dd6d51 100644
--- a/libavcodec/x86/dnxhdenc.asm
+++ b/libavcodec/x86/dnxhdenc.asm
@@ -3,20 +3,20 @@
 ;* Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
 ;* Copyright (c) 2014 Tiancheng "Timothy" Gu <timothygu99@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/dnxhdenc_init.c b/libavcodec/x86/dnxhdenc_init.c
index f1ff7bd..fd6f150 100644
--- a/libavcodec/x86/dnxhdenc_init.c
+++ b/libavcodec/x86/dnxhdenc_init.c
@@ -4,20 +4,20 @@
  *
  * VC-3 encoder funded by the British Broadcasting Corporation
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/dwt_yasm.asm b/libavcodec/x86/dwt_yasm.asm
new file mode 100644
index 0000000..5253abc
--- /dev/null
+++ b/libavcodec/x86/dwt_yasm.asm
@@ -0,0 +1,306 @@
+;******************************************************************************
+;* MMX optimized discrete wavelet trasnform
+;* Copyright (c) 2010 David Conrad
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+pw_1: times 8 dw 1
+pw_2: times 8 dw 2
+pw_8: times 8 dw 8
+pw_16: times 8 dw 16
+pw_1991: times 4 dw 9,-1
+
+section .text
+
+; %1 -= (%2 + %3 + 2)>>2     %4 is pw_2
+%macro COMPOSE_53iL0 4
+    paddw   %2, %3
+    paddw   %2, %4
+    psraw   %2, 2
+    psubw   %1, %2
+%endm
+
+; m1 = %1 + (-m0 + 9*m1 + 9*%2 -%3 + 8)>>4
+; if %4 is supplied, %1 is loaded unaligned from there
+; m2: clobbered  m3: pw_8  m4: pw_1991
+%macro COMPOSE_DD97iH0 3-4
+    paddw   m0, %3
+    paddw   m1, %2
+    psubw   m0, m3
+    mova    m2, m1
+    punpcklwd m1, m0
+    punpckhwd m2, m0
+    pmaddwd m1, m4
+    pmaddwd m2, m4
+%if %0 > 3
+    movu    %1, %4
+%endif
+    psrad   m1, 4
+    psrad   m2, 4
+    packssdw m1, m2
+    paddw   m1, %1
+%endm
+
+%macro COMPOSE_VERTICAL 1
+; void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+;                                  int width)
+cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width
+    mova    m2, [pw_2]
+%if ARCH_X86_64
+    mov     widthd, widthd
+%endif
+.loop:
+    sub     widthq, mmsize/2
+    mova    m1, [b0q+2*widthq]
+    mova    m0, [b1q+2*widthq]
+    COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2
+    mova    [b1q+2*widthq], m0
+    jg      .loop
+    REP_RET
+
+; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+;                                  int width)
+cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width
+    mova    m1, [pw_1]
+%if ARCH_X86_64
+    mov     widthd, widthd
+%endif
+.loop:
+    sub     widthq, mmsize/2
+    mova    m0, [b0q+2*widthq]
+    paddw   m0, [b2q+2*widthq]
+    paddw   m0, m1
+    psraw   m0, 1
+    paddw   m0, [b1q+2*widthq]
+    mova    [b1q+2*widthq], m0
+    jg      .loop
+    REP_RET
+
+; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+;                               IDWTELEM *b3, IDWTELEM *b4, int width)
+cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width
+    mova    m3, [pw_8]
+    mova    m4, [pw_1991]
+%if ARCH_X86_64
+    mov     widthd, widthd
+%endif
+.loop:
+    sub     widthq, mmsize/2
+    mova    m0, [b0q+2*widthq]
+    mova    m1, [b1q+2*widthq]
+    COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq]
+    mova    [b2q+2*widthq], m1
+    jg      .loop
+    REP_RET
+
+; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+;                                IDWTELEM *b3, IDWTELEM *b4, int width)
+cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width
+    mova    m3, [pw_16]
+    mova    m4, [pw_1991]
+%if ARCH_X86_64
+    mov     widthd, widthd
+%endif
+.loop:
+    sub     widthq, mmsize/2
+    mova    m0, [b0q+2*widthq]
+    mova    m1, [b1q+2*widthq]
+    mova    m5, [b2q+2*widthq]
+    paddw   m0, [b4q+2*widthq]
+    paddw   m1, [b3q+2*widthq]
+    psubw   m0, m3
+    mova    m2, m1
+    punpcklwd m1, m0
+    punpckhwd m2, m0
+    pmaddwd m1, m4
+    pmaddwd m2, m4
+    psrad   m1, 5
+    psrad   m2, 5
+    packssdw m1, m2
+    psubw   m5, m1
+    mova    [b2q+2*widthq], m5
+    jg      .loop
+    REP_RET
+
+; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
+cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
+    mova    m3, [pw_1]
+%if ARCH_X86_64
+    mov     widthd, widthd
+%endif
+.loop:
+    sub     widthq, mmsize/2
+    mova    m1, [b1q+2*widthq]
+    mova    m0, [b0q+2*widthq]
+    mova    m2, m1
+    paddw   m1, m3
+    psraw   m1, 1
+    psubw   m0, m1
+    mova    [b0q+2*widthq], m0
+    paddw   m2, m0
+    mova    [b1q+2*widthq], m2
+    jg      .loop
+    REP_RET
+%endmacro
+
+; extend the left and right edges of the tmp array by %1 and %2 respectively
+%macro EDGE_EXTENSION 3
+    mov     %3, [tmpq]
+%assign %%i 1
+%rep %1
+    mov     [tmpq-2*%%i], %3
+    %assign %%i %%i+1
+%endrep
+    mov     %3, [tmpq+2*w2q-2]
+%assign %%i 0
+%rep %2
+    mov     [tmpq+2*w2q+2*%%i], %3
+    %assign %%i %%i+1
+%endrep
+%endmacro
+
+
+%macro HAAR_HORIZONTAL 2
+; void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *tmp, int width)
+cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
+    mov    w2d, wd
+    xor     xq, xq
+    shr    w2d, 1
+    lea  b_w2q, [bq+wq]
+    mova    m3, [pw_1]
+.lowpass_loop:
+    movu    m1, [b_w2q + 2*xq]
+    mova    m0, [bq    + 2*xq]
+    paddw   m1, m3
+    psraw   m1, 1
+    psubw   m0, m1
+    mova    [tmpq + 2*xq], m0
+    add     xq, mmsize/2
+    cmp     xq, w2q
+    jl      .lowpass_loop
+
+    xor     xq, xq
+    and    w2q, ~(mmsize/2 - 1)
+    cmp    w2q, mmsize/2
+    jl      .end
+
+.highpass_loop:
+    movu    m1, [b_w2q + 2*xq]
+    mova    m0, [tmpq  + 2*xq]
+    paddw   m1, m0
+
+    ; shift and interleave
+%if %2 == 1
+    paddw   m0, m3
+    paddw   m1, m3
+    psraw   m0, 1
+    psraw   m1, 1
+%endif
+    mova    m2, m0
+    punpcklwd m0, m1
+    punpckhwd m2, m1
+    mova    [bq+4*xq], m0
+    mova    [bq+4*xq+mmsize], m2
+
+    add     xq, mmsize/2
+    cmp     xq, w2q
+    jl      .highpass_loop
+.end:
+    REP_RET
+%endmacro
+
+
+INIT_XMM
+; void horizontal_compose_dd97i(IDWTELEM *b, IDWTELEM *tmp, int width)
+cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2
+    mov    w2d, wd
+    xor     xd, xd
+    shr    w2d, 1
+    lea  b_w2q, [bq+wq]
+    movu    m4, [bq+wq]
+    mova    m7, [pw_2]
+    pslldq  m4, 14
+.lowpass_loop:
+    movu    m1, [b_w2q + 2*xq]
+    mova    m0, [bq    + 2*xq]
+    mova    m2, m1
+    palignr m1, m4, 14
+    mova    m4, m2
+    COMPOSE_53iL0 m0, m1, m2, m7
+    mova    [tmpq + 2*xq], m0
+    add     xd, mmsize/2
+    cmp     xd, w2d
+    jl      .lowpass_loop
+
+    EDGE_EXTENSION 1, 2, xw
+    ; leave the last up to 7 (sse) or 3 (mmx) values for C
+    xor     xd, xd
+    and    w2d, ~(mmsize/2 - 1)
+    cmp    w2d, mmsize/2
+    jl      .end
+
+    mova    m7, [tmpq-mmsize]
+    mova    m0, [tmpq]
+    mova    m5, [pw_1]
+    mova    m3, [pw_8]
+    mova    m4, [pw_1991]
+.highpass_loop:
+    mova    m6, m0
+    palignr m0, m7, 14
+    mova    m7, [tmpq + 2*xq + 16]
+    mova    m1, m7
+    mova    m2, m7
+    palignr m1, m6, 2
+    palignr m2, m6, 4
+    COMPOSE_DD97iH0 m0, m6, m2, [b_w2q + 2*xq]
+    mova    m0, m7
+    mova    m7, m6
+
+    ; shift and interleave
+    paddw   m6, m5
+    paddw   m1, m5
+    psraw   m6, 1
+    psraw   m1, 1
+    mova    m2, m6
+    punpcklwd m6, m1
+    punpckhwd m2, m1
+    mova    [bq+4*xq], m6
+    mova    [bq+4*xq+mmsize], m2
+
+    add     xd, mmsize/2
+    cmp     xd, w2d
+    jl      .highpass_loop
+.end:
+    REP_RET
+
+
+%if ARCH_X86_64 == 0
+INIT_MMX
+COMPOSE_VERTICAL mmx
+HAAR_HORIZONTAL mmx, 0
+HAAR_HORIZONTAL mmx, 1
+%endif
+
+;;INIT_XMM
+INIT_XMM
+COMPOSE_VERTICAL sse2
+HAAR_HORIZONTAL sse2, 0
+HAAR_HORIZONTAL sse2, 1
diff --git a/libavcodec/x86/fdct.c b/libavcodec/x86/fdct.c
index 6528b57..112566d 100644
--- a/libavcodec/x86/fdct.c
+++ b/libavcodec/x86/fdct.c
@@ -13,20 +13,20 @@
  * a page about fdct at http://www.geocities.com/ssavekar/dct.htm
  * Skal's fdct at http://skal.planet-d.net/coding/dct.html
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -70,7 +70,7 @@ DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
 
 DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
 
-static struct
+static const struct
 {
  DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
 } fdct_r_row_sse2 =
@@ -153,7 +153,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = {  // forward_dct
   29692,  -12299,   26722,  -31521,
 };
 
-static struct
+static const struct
 {
  DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
 } tab_frw_01234567_sse2 =
diff --git a/libavcodec/x86/fdct.h b/libavcodec/x86/fdct.h
index c94a977..648cdc5 100644
--- a/libavcodec/x86/fdct.h
+++ b/libavcodec/x86/fdct.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/fdctdsp_init.c b/libavcodec/x86/fdctdsp_init.c
index 4e8e4eb..0cb5fd6 100644
--- a/libavcodec/x86/fdctdsp_init.c
+++ b/libavcodec/x86/fdctdsp_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index e4744a3..cae404c 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -6,20 +6,20 @@
 ;* This algorithm (though not any of the implementation details) is
 ;* based on libdjbfft by D. J. Bernstein.
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -36,6 +36,8 @@
 %define pointer resd
 %endif
 
+SECTION_RODATA 32
+
 struc FFTContext
     .nbits:    resd 1
     .reverse:  resd 1
@@ -51,13 +53,10 @@ struc FFTContext
     .imdcthalf:pointer 1
 endstruc
 
-SECTION_RODATA
-
 %define M_SQRT1_2 0.70710678118654752440
 %define M_COS_PI_1_8 0.923879532511287
 %define M_COS_PI_3_8 0.38268343236509
 
-align 32
 ps_cos16_1: dd 1.0, M_COS_PI_1_8, M_SQRT1_2, M_COS_PI_3_8, 1.0, M_COS_PI_1_8, M_SQRT1_2, M_COS_PI_3_8
 ps_cos16_2: dd 0, M_COS_PI_3_8, M_SQRT1_2, M_COS_PI_1_8, 0, -M_COS_PI_3_8, -M_SQRT1_2, -M_COS_PI_1_8
 
@@ -305,6 +304,7 @@ IF%1 mova  Z(1), m5
 
 INIT_YMM avx
 
+%if HAVE_AVX_EXTERNAL
 align 16
 fft8_avx:
     mova      m0, Z(0)
@@ -394,6 +394,8 @@ fft32_interleave_avx:
     jg .deint_loop
     ret
 
+%endif
+
 INIT_XMM sse
 
 align 16
@@ -537,6 +539,7 @@ DEFINE_ARGS zc, w, n, o1, o3
 
 INIT_YMM avx
 
+%if HAVE_AVX_EXTERNAL
 %macro INTERL_AVX 5
     vunpckhps      %3, %2, %1
     vunpcklps      %2, %2, %1
@@ -558,6 +561,7 @@ cglobal fft_calc, 2,5,8
     FFT_DISPATCH _interleave %+ SUFFIX, r1
     REP_RET
 
+%endif
 
 INIT_XMM sse
 
@@ -776,9 +780,11 @@ align 8
 dispatch_tab %+ fullsuffix: pointer list_of_fft
 %endmacro ; DECL_FFT
 
+%if HAVE_AVX_EXTERNAL
 INIT_YMM avx
 DECL_FFT 6
 DECL_FFT 6, _interleave
+%endif
 INIT_XMM sse
 DECL_FFT 5
 DECL_FFT 5, _interleave
@@ -1080,4 +1086,7 @@ DECL_IMDCT POSROTATESHUF_3DNOW
 %endif
 
 INIT_YMM avx
+
+%if HAVE_AVX_EXTERNAL
 DECL_IMDCT POSROTATESHUF_AVX
+%endif
diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h
index a604956..398091e 100644
--- a/libavcodec/x86/fft.h
+++ b/libavcodec/x86/fft.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c
index 7ca72c5..5682230 100644
--- a/libavcodec/x86/fft_init.c
+++ b/libavcodec/x86/fft_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/flacdsp.asm b/libavcodec/x86/flacdsp.asm
new file mode 100644
index 0000000..37ee87b
--- /dev/null
+++ b/libavcodec/x86/flacdsp.asm
@@ -0,0 +1,74 @@
+;******************************************************************************
+;* FLAC DSP SIMD optimizations
+;*
+;* Copyright (C) 2014 Loren Merritt
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION .text
+
+%macro LPC_32 1
+INIT_XMM %1
+cglobal flac_lpc_32, 5,6,5, decoded, coeffs, pred_order, qlevel, len, j
+    sub    lend, pred_orderd
+    jle .ret
+    lea    decodedq, [decodedq+pred_orderq*4-8]
+    lea    coeffsq, [coeffsq+pred_orderq*4]
+    neg    pred_orderq
+    movd   m4, qlevelm
+ALIGN 16
+.loop_sample:
+    movd   m0, [decodedq+pred_orderq*4+8]
+    add    decodedq, 8
+    movd   m1, [coeffsq+pred_orderq*4]
+    pxor   m2, m2
+    pxor   m3, m3
+    lea    jq, [pred_orderq+1]
+    test   jq, jq
+    jz .end_order
+.loop_order:
+    PMACSDQL m2, m0, m1, m2, m0
+    movd   m0, [decodedq+jq*4]
+    PMACSDQL m3, m1, m0, m3, m1
+    movd   m1, [coeffsq+jq*4]
+    inc    jq
+    jl .loop_order
+.end_order:
+    PMACSDQL m2, m0, m1, m2, m0
+    psrlq  m2, m4
+    movd   m0, [decodedq]
+    paddd  m0, m2
+    movd   [decodedq], m0
+    sub  lend, 2
+    jl .ret
+    PMACSDQL m3, m1, m0, m3, m1
+    psrlq  m3, m4
+    movd   m1, [decodedq+4]
+    paddd  m1, m3
+    movd   [decodedq+4], m1
+    jg .loop_sample
+.ret:
+    REP_RET
+%endmacro
+
+%if HAVE_XOP_EXTERNAL
+LPC_32 xop
+%endif
+LPC_32 sse4
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
new file mode 100644
index 0000000..a071b3d
--- /dev/null
+++ b/libavcodec/x86/flacdsp_init.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2014 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/flacdsp.h"
+#include "libavutil/x86/cpu.h"
+#include "config.h"
+
+void ff_flac_lpc_32_sse4(int32_t *samples, const int coeffs[32], int order,
+                         int qlevel, int len);
+void ff_flac_lpc_32_xop(int32_t *samples, const int coeffs[32], int order,
+                        int qlevel, int len);
+
+av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt,
+                                 int bps)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        if (bps > 16 && CONFIG_FLAC_DECODER)
+            c->lpc = ff_flac_lpc_32_sse4;
+    }
+    if (EXTERNAL_XOP(cpu_flags)) {
+        if (bps > 16 && CONFIG_FLAC_DECODER)
+            c->lpc = ff_flac_lpc_32_xop;
+    }
+#endif
+}
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 8184376..0d3f821 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -2,20 +2,20 @@
 ;* x86 optimized Format Conversion Utils
 ;* Copyright (c) 2008 Loren Merritt
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/fmtconvert_init.c b/libavcodec/x86/fmtconvert_init.c
index 3d75df9..d300dfd 100644
--- a/libavcodec/x86/fmtconvert_init.c
+++ b/libavcodec/x86/fmtconvert_init.c
@@ -5,20 +5,20 @@
  *
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm
index b581471..0e3b444 100644
--- a/libavcodec/x86/fpel.asm
+++ b/libavcodec/x86/fpel.asm
@@ -4,20 +4,20 @@
 ;* Copyright (c) 2003-2013 Michael Niedermayer
 ;* Copyright (c) 2013 Daniel Kang
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -25,85 +25,83 @@
 
 SECTION .text
 
-INIT_MMX mmxext
+%macro PAVGB_MMX 4
+    LOAD   %3, %1
+    por    %3, %2
+    pxor   %2, %1
+    pand   %2, %4
+    psrlq  %2, 1
+    psubb  %3, %2
+    SWAP   %2, %3
+%endmacro
+
 ; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
 ;                        ptrdiff_t line_size, int h)
-%macro PIXELS48 2
-%if %2 == 4
-%define OP movh
+%macro OP_PIXELS 2
+%if %2 == mmsize/2
+%define LOAD movh
+%define SAVE movh
+%define LEN  mmsize
 %else
-%define OP mova
+%define LOAD movu
+%define SAVE mova
+%define LEN  %2
 %endif
-cglobal %1_pixels%2, 4,5
+cglobal %1_pixels%2, 4,5,4
     movsxdifnidn r2, r2d
     lea          r4, [r2*3]
+%ifidn %1, avg
+%if notcpuflag(mmxext)
+    pcmpeqd      m6, m6
+    paddb        m6, m6
+%endif
+%endif
 .loop:
-    OP           m0, [r1]
-    OP           m1, [r1+r2]
-    OP           m2, [r1+r2*2]
-    OP           m3, [r1+r4]
-    lea          r1, [r1+r2*4]
+%assign %%i 0
+%rep LEN/mmsize
+    LOAD         m0, [r1 + %%i]
+    LOAD         m1, [r1+r2 + %%i]
+    LOAD         m2, [r1+r2*2 + %%i]
+    LOAD         m3, [r1+r4 + %%i]
 %ifidn %1, avg
-    pavgb        m0, [r0]
-    pavgb        m1, [r0+r2]
-    pavgb        m2, [r0+r2*2]
-    pavgb        m3, [r0+r4]
+%if notcpuflag(mmxext)
+    PAVGB_MMX    [r0 + %%i], m0, m4, m6
+    PAVGB_MMX    [r0+r2 + %%i], m1, m5, m6
+    PAVGB_MMX    [r0+r2*2 + %%i], m2, m4, m6
+    PAVGB_MMX    [r0+r4 + %%i], m3, m5, m6
+%else
+    pavgb        m0, [r0 + %%i]
+    pavgb        m1, [r0+r2 + %%i]
+    pavgb        m2, [r0+r2*2 + %%i]
+    pavgb        m3, [r0+r4 + %%i]
+%endif
 %endif
-    OP         [r0], m0
-    OP      [r0+r2], m1
-    OP    [r0+r2*2], m2
-    OP      [r0+r4], m3
+    SAVE       [r0 + %%i], m0
+    SAVE    [r0+r2 + %%i], m1
+    SAVE  [r0+r2*2 + %%i], m2
+    SAVE    [r0+r4 + %%i], m3
+%assign %%i %%i+mmsize
+%endrep
     sub         r3d, 4
+    lea          r1, [r1+r2*4]
     lea          r0, [r0+r2*4]
     jne       .loop
     RET
 %endmacro
 
-PIXELS48 put, 4
-PIXELS48 avg, 4
-PIXELS48 put, 8
-PIXELS48 avg, 8
+INIT_MMX mmx
+OP_PIXELS put, 4
+OP_PIXELS avg, 4
+OP_PIXELS put, 8
+OP_PIXELS avg, 8
+OP_PIXELS put, 16
+OP_PIXELS avg, 16
 
+INIT_MMX mmxext
+OP_PIXELS avg, 4
+OP_PIXELS avg, 8
+OP_PIXELS avg, 16
 
 INIT_XMM sse2
-; void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
-;                           ptrdiff_t line_size, int h)
-cglobal put_pixels16, 4,5,4
-    lea          r4, [r2*3]
-.loop:
-    movu         m0, [r1]
-    movu         m1, [r1+r2]
-    movu         m2, [r1+r2*2]
-    movu         m3, [r1+r4]
-    lea          r1, [r1+r2*4]
-    mova       [r0], m0
-    mova    [r0+r2], m1
-    mova  [r0+r2*2], m2
-    mova    [r0+r4], m3
-    sub         r3d, 4
-    lea          r0, [r0+r2*4]
-    jnz       .loop
-    REP_RET
-
-; void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
-;                           ptrdiff_t line_size, int h)
-cglobal avg_pixels16, 4,5,4
-    lea          r4, [r2*3]
-.loop:
-    movu         m0, [r1]
-    movu         m1, [r1+r2]
-    movu         m2, [r1+r2*2]
-    movu         m3, [r1+r4]
-    lea          r1, [r1+r2*4]
-    pavgb        m0, [r0]
-    pavgb        m1, [r0+r2]
-    pavgb        m2, [r0+r2*2]
-    pavgb        m3, [r0+r4]
-    mova       [r0], m0
-    mova    [r0+r2], m1
-    mova  [r0+r2*2], m2
-    mova    [r0+r4], m3
-    sub         r3d, 4
-    lea          r0, [r0+r2*4]
-    jnz       .loop
-    REP_RET
+OP_PIXELS put, 16
+OP_PIXELS avg, 16
diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h
index 88d1415..4d93959 100644
--- a/libavcodec/x86/fpel.h
+++ b/libavcodec/x86/fpel.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,8 @@ void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
                            ptrdiff_t line_size, int h);
 void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
                          ptrdiff_t line_size, int h);
+void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels,
+                            ptrdiff_t line_size, int h);
 void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
 void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
diff --git a/libavcodec/x86/fpel_mmx.c b/libavcodec/x86/fpel_mmx.c
deleted file mode 100644
index eef05ec..0000000
--- a/libavcodec/x86/fpel_mmx.c
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * MMX-optimized avg/put pixel routines
- *
- * Copyright (c) 2000, 2001 Fabrice Bellard
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include "config.h"
-#include "fpel.h"
-#include "inline_asm.h"
-
-#if HAVE_MMX_INLINE
-
-// in case more speed is needed - unrolling would certainly help
-void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
-                        ptrdiff_t line_size, int h)
-{
-    MOVQ_BFE(mm6);
-    JUMPALIGN();
-    do {
-        __asm__ volatile(
-             "movq  %0, %%mm0           \n\t"
-             "movq  %1, %%mm1           \n\t"
-             PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
-             "movq  %%mm2, %0           \n\t"
-             :"+m"(*block)
-             :"m"(*pixels)
-             :"memory");
-        pixels += line_size;
-        block += line_size;
-    }
-    while (--h);
-}
-
-void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
-                         ptrdiff_t line_size, int h)
-{
-    MOVQ_BFE(mm6);
-    JUMPALIGN();
-    do {
-        __asm__ volatile(
-             "movq  %0, %%mm0           \n\t"
-             "movq  %1, %%mm1           \n\t"
-             PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
-             "movq  %%mm2, %0           \n\t"
-             "movq  8%0, %%mm0          \n\t"
-             "movq  8%1, %%mm1          \n\t"
-             PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
-             "movq  %%mm2, 8%0          \n\t"
-             :"+m"(*block)
-             :"m"(*pixels)
-             :"memory");
-        pixels += line_size;
-        block += line_size;
-    }
-    while (--h);
-}
-
-void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
-                        ptrdiff_t line_size, int h)
-{
-    __asm__ volatile (
-        "lea   (%3, %3), %%"REG_a"      \n\t"
-        ".p2align     3                 \n\t"
-        "1:                             \n\t"
-        "movq  (%1    ), %%mm0          \n\t"
-        "movq  (%1, %3), %%mm1          \n\t"
-        "movq     %%mm0, (%2)           \n\t"
-        "movq     %%mm1, (%2, %3)       \n\t"
-        "add  %%"REG_a", %1             \n\t"
-        "add  %%"REG_a", %2             \n\t"
-        "movq  (%1    ), %%mm0          \n\t"
-        "movq  (%1, %3), %%mm1          \n\t"
-        "movq     %%mm0, (%2)           \n\t"
-        "movq     %%mm1, (%2, %3)       \n\t"
-        "add  %%"REG_a", %1             \n\t"
-        "add  %%"REG_a", %2             \n\t"
-        "subl        $4, %0             \n\t"
-        "jnz         1b                 \n\t"
-        : "+g"(h), "+r"(pixels),  "+r"(block)
-        : "r"((x86_reg)line_size)
-        : "%"REG_a, "memory"
-        );
-}
-
-void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
-                         ptrdiff_t line_size, int h)
-{
-    __asm__ volatile (
-        "lea   (%3, %3), %%"REG_a"      \n\t"
-        ".p2align     3                 \n\t"
-        "1:                             \n\t"
-        "movq  (%1    ), %%mm0          \n\t"
-        "movq 8(%1    ), %%mm4          \n\t"
-        "movq  (%1, %3), %%mm1          \n\t"
-        "movq 8(%1, %3), %%mm5          \n\t"
-        "movq     %%mm0,  (%2)          \n\t"
-        "movq     %%mm4, 8(%2)          \n\t"
-        "movq     %%mm1,  (%2, %3)      \n\t"
-        "movq     %%mm5, 8(%2, %3)      \n\t"
-        "add  %%"REG_a", %1             \n\t"
-        "add  %%"REG_a", %2             \n\t"
-        "movq  (%1    ), %%mm0          \n\t"
-        "movq 8(%1    ), %%mm4          \n\t"
-        "movq  (%1, %3), %%mm1          \n\t"
-        "movq 8(%1, %3), %%mm5          \n\t"
-        "movq     %%mm0,  (%2)          \n\t"
-        "movq     %%mm4, 8(%2)          \n\t"
-        "movq     %%mm1,  (%2, %3)      \n\t"
-        "movq     %%mm5, 8(%2, %3)      \n\t"
-        "add  %%"REG_a", %1             \n\t"
-        "add  %%"REG_a", %2             \n\t"
-        "subl        $4, %0             \n\t"
-        "jnz         1b                 \n\t"
-        : "+g"(h), "+r"(pixels),  "+r"(block)
-        : "r"((x86_reg)line_size)
-        : "%"REG_a, "memory"
-        );
-}
-
-#endif /* HAVE_MMX_INLINE */
diff --git a/libavcodec/x86/h263_loopfilter.asm b/libavcodec/x86/h263_loopfilter.asm
index 673f795..2fcd1a2 100644
--- a/libavcodec/x86/h263_loopfilter.asm
+++ b/libavcodec/x86/h263_loopfilter.asm
@@ -1,20 +1,22 @@
 ;******************************************************************************
 ;* MMX-optimized H.263 loop filter
+;* Copyright (c) 2003-2013 Michael Niedermayer
+;* Copyright (c) 2013 Daniel Kang
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/h263dsp_init.c b/libavcodec/x86/h263dsp_init.c
index d4fab98..ab81063 100644
--- a/libavcodec/x86/h263dsp_init.c
+++ b/libavcodec/x86/h263dsp_init.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2013 Diego Biurrun <diego@biurrun.de>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index cc41f00..107ae51 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -3,20 +3,20 @@
 ;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
 ;*               2005-2008 Loren Merritt
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm
index 7b00351..c358482 100644
--- a/libavcodec/x86/h264_chromamc_10bit.asm
+++ b/libavcodec/x86/h264_chromamc_10bit.asm
@@ -5,20 +5,20 @@
 ;*
 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -252,8 +252,10 @@ cglobal %1_h264_chroma_mc2_10, 6,7
 %define CHROMAMC_AVG  NOTHING
 INIT_XMM sse2
 CHROMA_MC8 put
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CHROMA_MC8 put
+%endif
 INIT_MMX mmxext
 CHROMA_MC4 put
 CHROMA_MC2 put
@@ -261,8 +263,10 @@ CHROMA_MC2 put
 %define CHROMAMC_AVG  AVG
 INIT_XMM sse2
 CHROMA_MC8 avg
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 CHROMA_MC8 avg
+%endif
 INIT_MMX mmxext
 CHROMA_MC4 avg
 CHROMA_MC2 avg
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index d2067c8..14c8205 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -7,20 +7,20 @@
 ;*          Fiona Glaser <fiona@x264.com>
 ;*          Oskar Arvidsson <oskar@irock.se>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -384,8 +384,10 @@ cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
 
 INIT_XMM sse2
 DEBLOCK_LUMA
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEBLOCK_LUMA
+%endif
 
 %else
 
@@ -499,8 +501,10 @@ INIT_MMX mmxext
 DEBLOCK_LUMA v8, 8
 INIT_XMM sse2
 DEBLOCK_LUMA v, 16
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEBLOCK_LUMA v, 16
+%endif
 
 %endif ; ARCH
 
@@ -772,8 +776,10 @@ cglobal deblock_h_luma_intra_8, 2,4,8,0x80
 
 INIT_XMM sse2
 DEBLOCK_LUMA_INTRA v
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEBLOCK_LUMA_INTRA v
+%endif
 %if ARCH_X86_64 == 0
 INIT_MMX mmxext
 DEBLOCK_LUMA_INTRA v8
@@ -836,7 +842,11 @@ cglobal deblock_h_chroma_8, 5,7
     TRANSPOSE4x8_LOAD  bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
     movq  buf0, m0
     movq  buf1, m3
-    call ff_chroma_inter_body_mmxext
+    LOAD_MASK  r2d, r3d
+    movd       m6, [r4] ; tc0
+    punpcklbw  m6, m6
+    pand       m7, m6
+    DEBLOCK_P0_Q0
     movq  m0, buf0
     movq  m3, buf1
     TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index d049c62..d8ace17 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -7,20 +7,20 @@
 ;*          Loren Merritt <lorenm@u.washington.edu>
 ;*          Fiona Glaser <fiona@x264.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -418,9 +418,11 @@ cglobal deblock_h_luma_10, 5,7,15
 
 INIT_XMM sse2
 DEBLOCK_LUMA_64
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEBLOCK_LUMA_64
 %endif
+%endif
 
 %macro SWAPMOVA 2
 %ifid %1
@@ -715,8 +717,10 @@ cglobal deblock_h_luma_intra_10, 4,7,16
 
 INIT_XMM sse2
 DEBLOCK_LUMA_INTRA_64
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEBLOCK_LUMA_INTRA_64
+%endif
 
 %endif
 
@@ -802,10 +806,12 @@ DEBLOCK_LUMA_INTRA
 INIT_XMM sse2
 DEBLOCK_LUMA
 DEBLOCK_LUMA_INTRA
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEBLOCK_LUMA
 DEBLOCK_LUMA_INTRA
 %endif
+%endif
 
 ; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
 ; out: %1=p0', %2=q0'
@@ -918,5 +924,7 @@ DEBLOCK_CHROMA
 %endif
 INIT_XMM sse2
 DEBLOCK_CHROMA
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEBLOCK_CHROMA
+%endif
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index bb881c3..ef65cf8 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -38,7 +38,7 @@
 
 //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
 //as that would make optimization work hard)
-#if HAVE_7REGS
+#if HAVE_7REGS && !BROKEN_COMPILER
 #define decode_significance decode_significance_x86
 static int decode_significance_x86(CABACContext *c, int max_coeff,
                                    uint8_t *significant_coeff_ctx_base,
@@ -55,6 +55,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
     __asm__ volatile(
         "lea   "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
         : "=&r"(tables)
+        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
     );
 #endif
 
@@ -130,6 +131,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
     __asm__ volatile(
         "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
         : "=&r"(tables)
+        : NAMED_CONSTRAINTS_ARRAY(ff_h264_cabac_tables)
     );
 #endif
 
@@ -198,7 +200,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
     );
     return coeff_count;
 }
-#endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
+#endif /* HAVE_7REGS && BROKEN_COMPILER */
 
 #endif /* HAVE_INLINE_ASM */
 #endif /* AVCODEC_X86_H264_I386_H */
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 313791a..7fafe19 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -9,20 +9,20 @@
 ;*          Holger Lubitz <hal@duncan.ol.sub.de>
 ;*          Min Chen <chenm001.163.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;*****************************************************************************
 
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index b7d5105..5c3acb1 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -5,20 +5,20 @@
 ;*
 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -83,8 +83,10 @@ cglobal h264_idct_add_10, 3,3
 
 INIT_XMM sse2
 IDCT_ADD_10
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT_ADD_10
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_h264_idct_add16_10(pixel *dst, const int *block_offset,
@@ -117,9 +119,11 @@ add4x4_idct %+ SUFFIX:
 INIT_XMM sse2
 ALIGN 16
 ADD4x4IDCT
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 ALIGN 16
 ADD4x4IDCT
+%endif
 
 %macro ADD16_OP 2
     cmp          byte [r4+%2], 0
@@ -155,8 +159,10 @@ cglobal h264_idct_add16_10, 5,6
 
 INIT_XMM sse2
 IDCT_ADD16_10
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT_ADD16_10
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_h264_idct_dc_add_10(pixel *dst, int16_t *block, int stride)
@@ -220,8 +226,10 @@ cglobal h264_idct8_dc_add_10,3,4,7
 
 INIT_XMM sse2
 IDCT8_DC_ADD
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT8_DC_ADD
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_h264_idct_add16intra_10(pixel *dst, const int *block_offset,
@@ -293,8 +301,10 @@ cglobal h264_idct_add16intra_10,5,7,8
 
 INIT_XMM sse2
 IDCT_ADD16INTRA_10
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT_ADD16INTRA_10
+%endif
 
 %assign last_block 36
 ;-----------------------------------------------------------------------------
@@ -330,8 +340,10 @@ cglobal h264_idct_add8_10,5,8,7
 
 INIT_XMM sse2
 IDCT_ADD8
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT_ADD8
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_h264_idct8_add_10(pixel *dst, int16_t *block, int stride)
@@ -537,8 +549,10 @@ h264_idct8_add1_10 %+ SUFFIX:
 
 INIT_XMM sse2
 IDCT8_ADD
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT8_ADD
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_h264_idct8_add4_10(pixel **dst, const int *block_offset,
@@ -577,5 +591,7 @@ cglobal h264_idct8_add4_10, 0,7,16
 
 INIT_XMM sse2
 IDCT8_ADD4
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 IDCT8_ADD4
+%endif
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index 4a4fa10..88ba597 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -5,20 +5,20 @@
 ;* Copyright (c) 2010 Loren Merritt
 ;* Copyright (c) 2010 Ronald S. Bultje
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -2497,10 +2497,7 @@ cglobal pred4x4_tm_vp8_8, 3,3
     pshufb     mm3, mm6
     pshufb     mm4, mm6
     pshufb     mm5, mm6
-    psubw      mm2, mm7
-    psubw      mm3, mm7
-    psubw      mm4, mm7
-    psubw      mm5, mm7
+    psubw      mm0, mm7
     paddw      mm2, mm0
     paddw      mm3, mm0
     paddw      mm4, mm0
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 55790a9..9dee577 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -5,20 +5,20 @@
 ;*
 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -82,8 +82,10 @@ INIT_XMM sse2
 PRED4x4_DR
 INIT_XMM ssse3
 PRED4x4_DR
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED4x4_DR
+%endif
 
 ;------------------------------------------------------------------------------
 ; void ff_pred4x4_vertical_right(pixel *src, const pixel *topright, int stride)
@@ -119,8 +121,10 @@ INIT_XMM sse2
 PRED4x4_VR
 INIT_XMM ssse3
 PRED4x4_VR
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED4x4_VR
+%endif
 
 ;-------------------------------------------------------------------------------
 ; void ff_pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride)
@@ -159,28 +163,14 @@ INIT_XMM sse2
 PRED4x4_HD
 INIT_XMM ssse3
 PRED4x4_HD
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED4x4_HD
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred4x4_dc(pixel *src, const pixel *topright, int stride)
 ;-----------------------------------------------------------------------------
-%macro HADDD 2 ; sum junk
-%if mmsize == 16
-    movhlps %2, %1
-    paddd   %1, %2
-    pshuflw %2, %1, 0xE
-    paddd   %1, %2
-%else
-    pshufw  %2, %1, 0xE
-    paddd   %1, %2
-%endif
-%endmacro
-
-%macro HADDW 2
-    pmaddwd %1, [pw_1]
-    HADDD   %1, %2
-%endmacro
 
 INIT_MMX mmxext
 cglobal pred4x4_dc_10, 3, 3
@@ -228,8 +218,10 @@ cglobal pred4x4_down_left_10, 3, 3
 
 INIT_XMM sse2
 PRED4x4_DL
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED4x4_DL
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred4x4_vertical_left(pixel *src, const pixel *topright, int stride)
@@ -255,8 +247,10 @@ cglobal pred4x4_vertical_left_10, 3, 3
 
 INIT_XMM sse2
 PRED4x4_VL
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED4x4_VL
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
@@ -565,8 +559,10 @@ cglobal pred8x8l_top_dc_10, 4, 4, 6
 
 INIT_XMM sse2
 PRED8x8L_TOP_DC
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED8x8L_TOP_DC
+%endif
 
 ;-------------------------------------------------------------------------------
 ; void ff_pred8x8l_dc(pixel *src, int has_topleft, int has_topright, int stride)
@@ -622,8 +618,10 @@ cglobal pred8x8l_dc_10, 4, 6, 6
 
 INIT_XMM sse2
 PRED8x8L_DC
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED8x8L_DC
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8l_vertical(pixel *src, int has_topleft, int has_topright,
@@ -656,8 +654,10 @@ cglobal pred8x8l_vertical_10, 4, 4, 6
 
 INIT_XMM sse2
 PRED8x8L_VERTICAL
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED8x8L_VERTICAL
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright,
@@ -711,8 +711,10 @@ INIT_XMM sse2
 PRED8x8L_HORIZONTAL
 INIT_XMM ssse3
 PRED8x8L_HORIZONTAL
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED8x8L_HORIZONTAL
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8l_down_left(pixel *src, int has_topleft, int has_topright,
@@ -778,8 +780,10 @@ INIT_XMM sse2
 PRED8x8L_DOWN_LEFT
 INIT_XMM ssse3
 PRED8x8L_DOWN_LEFT
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED8x8L_DOWN_LEFT
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8l_down_right(pixel *src, int has_topleft, int has_topright,
@@ -851,8 +855,10 @@ INIT_XMM sse2
 PRED8x8L_DOWN_RIGHT
 INIT_XMM ssse3
 PRED8x8L_DOWN_RIGHT
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED8x8L_DOWN_RIGHT
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8l_vertical_right(pixel *src, int has_topleft,
@@ -920,8 +926,10 @@ INIT_XMM sse2
 PRED8x8L_VERTICAL_RIGHT
 INIT_XMM ssse3
 PRED8x8L_VERTICAL_RIGHT
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED8x8L_VERTICAL_RIGHT
+%endif
 
 ;-----------------------------------------------------------------------------
 ; void ff_pred8x8l_horizontal_up(pixel *src, int has_topleft,
@@ -980,8 +988,10 @@ INIT_XMM sse2
 PRED8x8L_HORIZONTAL_UP
 INIT_XMM ssse3
 PRED8x8L_HORIZONTAL_UP
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 PRED8x8L_HORIZONTAL_UP
+%endif
 
 
 ;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 0e572b1..528b92e 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index 9ca6d7e..b4cb9b1 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -2,20 +2,20 @@
  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
  * Copyright (c) 2011 Daniel Kang
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,8 +29,8 @@
 #include "fpel.h"
 
 #if HAVE_YASM
-void ff_put_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
-                           ptrdiff_t line_size, int h);
+void ff_put_pixels4_mmx(uint8_t *block, const uint8_t *pixels,
+                        ptrdiff_t line_size, int h);
 void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels,
                            ptrdiff_t line_size, int h);
 void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
@@ -49,9 +49,9 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t
 #define ff_avg_pixels8_l2_sse2  ff_avg_pixels8_l2_mmxext
 #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext
 #define ff_avg_pixels16_l2_sse2 ff_avg_pixels16_l2_mmxext
-
-CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
-CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
+#define ff_put_pixels16_mmxext  ff_put_pixels16_mmx
+#define ff_put_pixels8_mmxext   ff_put_pixels8_mmx
+#define ff_put_pixels4_mmxext   ff_put_pixels4_mmx
 
 #define DEF_QPEL(OPNAME)\
 void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\
@@ -339,7 +339,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, const uin
     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
     uint8_t * const halfHV= temp;\
     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
-    assert(((int)temp & 7) == 0);\
+    av_assert2(((int)temp & 7) == 0);\
     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
 }\
@@ -349,7 +349,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, const uin
     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
     uint8_t * const halfHV= temp;\
     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
-    assert(((int)temp & 7) == 0);\
+    av_assert2(((int)temp & 7) == 0);\
     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
 }\
@@ -359,7 +359,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, const uin
     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
     uint8_t * const halfHV= temp;\
     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
-    assert(((int)temp & 7) == 0);\
+    av_assert2(((int)temp & 7) == 0);\
     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
 }\
@@ -369,7 +369,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, const uin
     DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
     uint8_t * const halfHV= temp;\
     int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
-    assert(((int)temp & 7) == 0);\
+    av_assert2(((int)temp & 7) == 0);\
     ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
     ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
 }\
diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm
index f92c4aa..d65660d 100644
--- a/libavcodec/x86/h264_qpel_10bit.asm
+++ b/libavcodec/x86/h264_qpel_10bit.asm
@@ -5,20 +5,20 @@
 ;*
 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -28,7 +28,7 @@ SECTION_RODATA 32
 
 cextern pw_16
 cextern pw_1
-cextern pb_0
+pb_0: times 32 db 0 ; we do not use cextern here as old llvm-gcc fails to align it correctly
 
 pw_pixel_max: times 8 dw ((1 << 10)-1)
 
diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm
index bc6c725..2d287ba 100644
--- a/libavcodec/x86/h264_qpel_8bit.asm
+++ b/libavcodec/x86/h264_qpel_8bit.asm
@@ -6,20 +6,20 @@
 ;*
 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm
index d1873af..b4fb9db 100644
--- a/libavcodec/x86/h264_weight.asm
+++ b/libavcodec/x86/h264_weight.asm
@@ -4,20 +4,20 @@
 ;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
 ;* Copyright (C) 2010 Eli Friedman <eli.friedman@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -135,6 +135,13 @@ WEIGHT_FUNC_HALF_MM 8, 8
     add  off_regd, 1
     or   off_regd, 1
     add        r4, 1
+    cmp        r5, 128
+     jne .normal
+    sar        r5, 1
+    sar        r6, 1
+    sar  off_regd, 1
+    sub        r4, 1
+.normal
 %if cpuflag(ssse3)
     movd       m4, r5d
     movd       m0, r6d
diff --git a/libavcodec/x86/h264_weight_10bit.asm b/libavcodec/x86/h264_weight_10bit.asm
index 961ec8c..5d94962 100644
--- a/libavcodec/x86/h264_weight_10bit.asm
+++ b/libavcodec/x86/h264_weight_10bit.asm
@@ -5,20 +5,20 @@
 ;*
 ;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/h264chroma_init.c b/libavcodec/x86/h264chroma_init.c
index 8ec8a79..e08af27 100644
--- a/libavcodec/x86/h264chroma_init.c
+++ b/libavcodec/x86/h264chroma_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 134d594..35db200 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -210,6 +210,7 @@ H264_BIWEIGHT_10_SSE(4,  10)
 av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                                  const int chroma_format_idc)
 {
+#if HAVE_YASM
     int cpu_flags = av_get_cpu_flags();
 
     if (EXTERNAL_MMXEXT(cpu_flags) && chroma_format_idc <= 1)
@@ -365,4 +366,5 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
 #endif /* HAVE_ALIGNED_STACK */
         }
     }
+#endif
 }
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index 45b8703..7fa0803 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -5,20 +5,20 @@
 ;*
 ;* Authors: Seppo Tomperi <seppo.tomperi@vtt.fi>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -26,10 +26,11 @@
 
 SECTION_RODATA
 
-pw_pixel_max: times 8 dw ((1 << 10)-1)
-pw_m1:        times 8 dw -1
-pw_m2:        times 8 dw -2
-pd_1 :        times 4 dd  1
+pw_pixel_max_12: times 8 dw ((1 << 12)-1)
+pw_pixel_max_10: times 8 dw ((1 << 10)-1)
+pw_m1:           times 8 dw -1
+pw_m2:           times 8 dw -2
+pd_1 :           times 4 dd  1
 
 cextern pw_4
 cextern pw_8
@@ -57,10 +58,10 @@ INIT_XMM sse2
     movd             m4, %5
     movd             m6, %6
     movd             m5, %7
-    movd             m7, %8
+    movd             m3, %8
 
     punpcklbw        m4, m6
-    punpcklbw        m5, m7
+    punpcklbw        m5, m3
     punpcklwd        m4, m5
 
     punpckhdq        m2, m0, m4
@@ -76,16 +77,10 @@ INIT_XMM sse2
 ; in: 4 rows of 8 words in m0..m3
 ; out: 8 rows of 4 bytes in %1..%8
 %macro TRANSPOSE8x4B_STORE 8
-    packuswb         m0, m0
-    packuswb         m1, m1
-    packuswb         m2, m2
-    packuswb         m3, m3
-
-    punpcklbw        m0, m1
-    punpcklbw        m2, m3
-
-    punpckhwd        m6, m0, m2
-    punpcklwd        m0, m2
+    packuswb         m0, m2
+    packuswb         m1, m3
+    SBUTTERFLY bw, 0, 1, 2
+    SBUTTERFLY wd, 0, 1, 2
 
     movd             %1, m0
     pshufd           m0, m0, 0x39
@@ -95,13 +90,13 @@ INIT_XMM sse2
     pshufd           m0, m0, 0x39
     movd             %4, m0
 
-    movd             %5, m6
-    pshufd           m6, m6, 0x39
-    movd             %6, m6
-    pshufd           m6, m6, 0x39
-    movd             %7, m6
-    pshufd           m6, m6, 0x39
-    movd             %8, m6
+    movd             %5, m1
+    pshufd           m1, m1, 0x39
+    movd             %6, m1
+    pshufd           m1, m1, 0x39
+    movd             %7, m1
+    pshufd           m1, m1, 0x39
+    movd             %8, m1
 %endmacro
 
 ; in: 8 rows of 4 words in %4..%11
@@ -120,10 +115,10 @@ INIT_XMM sse2
     movq             m4, %5
     movq             m6, %6
     movq             m5, %7
-    movq             m7, %8
+    movq             m3, %8
 
     punpcklwd        m4, m6
-    punpcklwd        m5, m7
+    punpcklwd        m5, m3
     punpckhdq        m6, m4, m5
     punpckldq        m4, m5
 
@@ -136,32 +131,23 @@ INIT_XMM sse2
 
 ; in: 4 rows of 8 words in m0..m3
 ; out: 8 rows of 4 words in %1..%8
-%macro TRANSPOSE8x4W_STORE 8
-    pxor             m5, m5; zeros reg
-    CLIPW            m0, m5, [pw_pixel_max]
-    CLIPW            m1, m5, [pw_pixel_max]
-    CLIPW            m2, m5, [pw_pixel_max]
-    CLIPW            m3, m5, [pw_pixel_max]
+%macro TRANSPOSE8x4W_STORE 9
+    TRANSPOSE4x4W     0, 1, 2, 3, 4
 
-    punpckhwd        m4, m0, m1
-    punpcklwd        m0, m1
-    punpckhwd        m5, m2, m3
-    punpcklwd        m2, m3
-    punpckhdq        m6, m0, m2
-    punpckldq        m0, m2
+    pxor             m5, m5; zeros reg
+    CLIPW            m0, m5, %9
+    CLIPW            m1, m5, %9
+    CLIPW            m2, m5, %9
+    CLIPW            m3, m5, %9
 
     movq             %1, m0
     movhps           %2, m0
-    movq             %3, m6
-    movhps           %4, m6
-
-    punpckhdq        m6, m4, m5
-    punpckldq        m4, m5
-
-    movq             %5, m4
-    movhps           %6, m4
-    movq             %7, m6
-    movhps           %8, m6
+    movq             %3, m1
+    movhps           %4, m1
+    movq             %5, m2
+    movhps           %6, m2
+    movq             %7, m3
+    movhps           %8, m3
 %endmacro
 
 ; in: 8 rows of 8 bytes in %1..%8
@@ -212,40 +198,20 @@ INIT_XMM sse2
 ; in: 8 rows of 8 words in m0..m8
 ; out: 8 rows of 8 bytes in %1..%8
 %macro TRANSPOSE8x8B_STORE 8
-    packuswb         m0, m0
-    packuswb         m1, m1
-    packuswb         m2, m2
-    packuswb         m3, m3
-    packuswb         m4, m4
-    packuswb         m5, m5
-    packuswb         m6, m6
-    packuswb         m7, m7
-
-    punpcklbw        m0, m1
-    punpcklbw        m2, m3
-
-    punpckhwd        m8, m0, m2
-    punpcklwd        m0, m2
-
-    punpcklbw        m4, m5
-    punpcklbw        m6, m7
-
-    punpckhwd        m9, m4, m6
-    punpcklwd        m4, m6
+    packuswb         m0, m4
+    packuswb         m1, m5
+    packuswb         m2, m6
+    packuswb         m3, m7
+    TRANSPOSE2x4x4B   0, 1, 2, 3, 4
 
-    punpckhdq       m10, m0, m4; 2, 3
-    punpckldq        m0, m4;   0, 1
-
-    punpckldq       m11, m8, m9;  4, 5
-    punpckhdq        m8, m9;   6, 7
     movq             %1, m0
     movhps           %2, m0
-    movq             %3, m10
-    movhps           %4, m10
-    movq             %5, m11
-    movhps           %6, m11
-    movq             %7, m8
-    movhps           %8, m8
+    movq             %3, m1
+    movhps           %4, m1
+    movq             %5, m2
+    movhps           %6, m2
+    movq             %7, m3
+    movhps           %8, m3
 %endmacro
 
 ; in: 8 rows of 8 words in %1..%8
@@ -264,18 +230,18 @@ INIT_XMM sse2
 
 ; in: 8 rows of 8 words in m0..m8
 ; out: 8 rows of 8 words in %1..%8
-%macro TRANSPOSE8x8W_STORE 8
+%macro TRANSPOSE8x8W_STORE 9
     TRANSPOSE8x8W     0, 1, 2, 3, 4, 5, 6, 7, 8
 
     pxor             m8, m8
-    CLIPW            m0, m8, [pw_pixel_max]
-    CLIPW            m1, m8, [pw_pixel_max]
-    CLIPW            m2, m8, [pw_pixel_max]
-    CLIPW            m3, m8, [pw_pixel_max]
-    CLIPW            m4, m8, [pw_pixel_max]
-    CLIPW            m5, m8, [pw_pixel_max]
-    CLIPW            m6, m8, [pw_pixel_max]
-    CLIPW            m7, m8, [pw_pixel_max]
+    CLIPW            m0, m8, %9
+    CLIPW            m1, m8, %9
+    CLIPW            m2, m8, %9
+    CLIPW            m3, m8, %9
+    CLIPW            m4, m8, %9
+    CLIPW            m5, m8, %9
+    CLIPW            m6, m8, %9
+    CLIPW            m7, m8, %9
 
     movdqu           %1, m0
     movdqu           %2, m1
@@ -318,13 +284,14 @@ ALIGN 16
     paddw            m5, m4;
 
     ;tc calculations
-    movd             m6, [r2]; tc0
-    add              r2, 4;
+    movq             m6, [tcq]; tc0
     punpcklwd        m6, m6
-    movd             m7, [r2]; tc1
-    punpcklwd        m7, m7
-    shufps           m6, m7, 0; tc0, tc1
+    pshufd           m6, m6, 0xA0; tc0, tc1
+%if cpuflag(ssse3)
+    psignw           m4, m6, [pw_m1]; -tc0, -tc1
+%else
     pmullw           m4, m6, [pw_m1]; -tc0, -tc1
+%endif
     ;end tc calculations
 
     paddw            m5, [pw_4]; +4
@@ -356,17 +323,17 @@ ALIGN 16
 %if %1 > 8
     shl             betaq, %1 - 8
 %endif
-    movd            m13, betaq
+    movd            m13, betad
     SPLATW          m13, m13, 0
     ;end beta calculations
 
     paddw            m9, m10, m11;   0d0, 0d3  ,  1d0, 1d3
 
-    pshufhw         m14, m9,  q0033 ;0b00001111;  0d3 0d3 0d0 0d0 in high
-    pshuflw         m14, m14, q0033 ;0b00001111;  1d3 1d3 1d0 1d0 in low
+    pshufhw         m14, m9, 0x0f ;0b00001111;  0d3 0d3 0d0 0d0 in high
+    pshuflw         m14, m14, 0x0f ;0b00001111;  1d3 1d3 1d0 1d0 in low
 
-    pshufhw          m9, m9, q3300 ;0b11110000; 0d0 0d0 0d3 0d3
-    pshuflw          m9, m9, q3300 ;0b11110000; 1d0 1d0 1d3 1d3
+    pshufhw          m9, m9, 0xf0 ;0b11110000; 0d0 0d0 0d3 0d3
+    pshuflw          m9, m9, 0xf0 ;0b11110000; 1d0 1d0 1d3 1d3
 
     paddw           m14, m9; 0d0+0d3, 1d0+1d3
 
@@ -380,7 +347,7 @@ ALIGN 16
     psraw           m15, m13, 2;   beta >> 2
     psllw            m8, m9, 1;
     pcmpgtw         m15, m8; (d0 << 1) < beta_2, (d3 << 1) < beta_2
-    movmskps        r14, m15;
+    movmskps        r6, m15;
     ;end weak / strong decision
 
     ; weak filter nd_p/q calculation
@@ -388,19 +355,15 @@ ALIGN 16
     psrld            m8, 16
     paddw            m8, m10
     movd            r7d, m8
-    and              r7, 0xffff; 1dp0 + 1dp3
     pshufd           m8, m8, 0x4E
     movd            r8d, m8
-    and              r8, 0xffff; 0dp0 + 0dp3
 
     pshufd           m8, m11, 0x31
     psrld            m8, 16
     paddw            m8, m11
     movd            r9d, m8
-    and              r9, 0xffff; 1dq0 + 1dq3
     pshufd           m8, m8, 0x4E
     movd           r10d, m8
-    and             r10, 0xffff; 0dq0 + 0dq3
     ; end calc for weak filter
 
     ; filtering mask
@@ -422,14 +385,13 @@ ALIGN 16
     shl             r11, %1 - 8
 %endif
     movd             m8, r11d; tc0
-    add             tcq, 4;
-    mov             r3d, [tcq];
+    mov             r3d, [tcq+4];
 %if %1 > 8
     shl              r3, %1 - 8
 %endif
-    movd             m9, r3d; tc1
     add            r11d, r3d; tc0 + tc1
     jz             .bypassluma
+    movd             m9, r3d; tc1
     punpcklwd        m8, m8
     punpcklwd        m9, m9
     shufps           m8, m9, 0; tc0, tc1
@@ -453,7 +415,7 @@ ALIGN 16
     psraw           m13, 3; beta >> 3
     pcmpgtw         m13, m12;
     movmskps        r11, m13;
-    and             r14, r11; strong mask , beta_2 and beta_3 comparisons
+    and             r6, r11; strong mask , beta_2 and beta_3 comparisons
     ;----beta_3 comparison end-----
     ;----tc25 comparison---
     psubw           m12, m3, m4;      p0 - q0
@@ -464,23 +426,23 @@ ALIGN 16
 
     pcmpgtw          m8, m12; tc25 comparisons
     movmskps        r11, m8;
-    and             r14, r11; strong mask, beta_2, beta_3 and tc25 comparisons
+    and             r6, r11; strong mask, beta_2, beta_3 and tc25 comparisons
     ;----tc25 comparison end---
-    mov             r11, r14;
+    mov             r11, r6;
     shr             r11, 1;
-    and             r14, r11; strong mask, bits 2 and 0
+    and             r6, r11; strong mask, bits 2 and 0
 
     pmullw          m14, m9, [pw_m2]; -tc * 2
     paddw            m9, m9
 
-    and             r14, 5; 0b101
-    mov             r11, r14; strong mask
-    shr             r14, 2;
-    movd            m12, r14d; store to xmm for mask generation
-    shl             r14, 1
+    and             r6, 5; 0b101
+    mov             r11, r6; strong mask
+    shr             r6, 2;
+    movd            m12, r6d; store to xmm for mask generation
+    shl             r6, 1
     and             r11, 1
     movd            m10, r11d; store to xmm for mask generation
-    or              r14, r11; final strong mask, bits 1 and 0
+    or              r6, r11; final strong mask, bits 1 and 0
     jz      .weakfilter
 
     shufps          m10, m12, 0
@@ -565,16 +527,16 @@ ALIGN 16
     MASKED_COPY      m3, m12
 
 .weakfilter:
-    not             r14; strong mask -> weak mask
-    and             r14, r13; final weak filtering mask, bits 0 and 1
+    not             r6; strong mask -> weak mask
+    and             r6, r13; final weak filtering mask, bits 0 and 1
     jz             .store
 
     ; weak filtering mask
-    mov             r11, r14
+    mov             r11, r6
     shr             r11, 1
     movd            m12, r11d
-    and             r14, 1
-    movd            m11, r14d
+    and             r6, 1
+    movd            m11, r6d
     shufps          m11, m12, 0
     pcmpeqd         m11, [pd_1]; filtering mask
 
@@ -609,7 +571,11 @@ ALIGN 16
     pminsw          m12, m9;  av_clip(delta0, -tc, tc)
 
     psraw            m9, 1;   tc -> tc / 2
+%if cpuflag(ssse3)
+    psignw          m14, m9, [pw_m1]; -tc / 2
+%else
     pmullw          m14, m9, [pw_m1]; -tc / 2
+%endif
 
     pavgw           m15, m1, m3;   (p2 + p0 + 1) >> 1
     psubw           m15, m2;  ((p2 + p0 + 1) >> 1) - p1
@@ -620,7 +586,7 @@ ALIGN 16
     paddw           m15, m2; p1'
 
     ;beta calculations
-    movd            m10, betaq
+    movd            m10, betad
     SPLATW          m10, m10, 0
 
     movd            m13, r7d; 1dp0 + 1dp3
@@ -658,97 +624,141 @@ ALIGN 16
     MASKED_COPY      m4, m8
 %endmacro
 
-INIT_XMM sse2
 ;-----------------------------------------------------------------------------
 ; void ff_hevc_v_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
 ;                                   uint8_t *_no_p, uint8_t *_no_q);
 ;-----------------------------------------------------------------------------
-cglobal hevc_v_loop_filter_chroma_8, 3, 6, 8
-    sub              r0, 2
-    lea              r5, [3 * r1]
-    mov              r4, r0
-    add              r0, r5
-    TRANSPOSE4x8B_LOAD  PASS8ROWS(r4, r0, r1, r5)
+%macro LOOP_FILTER_CHROMA 0
+cglobal hevc_v_loop_filter_chroma_8, 3, 5, 7, pix, stride, tc, pix0, r3stride
+    sub            pixq, 2
+    lea       r3strideq, [3*strideq]
+    mov           pix0q, pixq
+    add            pixq, r3strideq
+    TRANSPOSE4x8B_LOAD  PASS8ROWS(pix0q, pixq, strideq, r3strideq)
     CHROMA_DEBLOCK_BODY 8
-    TRANSPOSE8x4B_STORE PASS8ROWS(r4, r0, r1, r5)
+    TRANSPOSE8x4B_STORE PASS8ROWS(pix0q, pixq, strideq, r3strideq)
     RET
 
-cglobal hevc_v_loop_filter_chroma_10, 3, 6, 8
-    sub              r0, 4
-    lea              r5, [3 * r1]
-    mov              r4, r0
-    add              r0, r5
-    TRANSPOSE4x8W_LOAD  PASS8ROWS(r4, r0, r1, r5)
+cglobal hevc_v_loop_filter_chroma_10, 3, 5, 7, pix, stride, tc, pix0, r3stride
+    sub            pixq, 4
+    lea       r3strideq, [3*strideq]
+    mov           pix0q, pixq
+    add            pixq, r3strideq
+    TRANSPOSE4x8W_LOAD  PASS8ROWS(pix0q, pixq, strideq, r3strideq)
     CHROMA_DEBLOCK_BODY 10
-    TRANSPOSE8x4W_STORE PASS8ROWS(r4, r0, r1, r5)
+    TRANSPOSE8x4W_STORE PASS8ROWS(pix0q, pixq, strideq, r3strideq), [pw_pixel_max_10]
+    RET
+
+cglobal hevc_v_loop_filter_chroma_12, 3, 5, 7, pix, stride, tc, pix0, r3stride
+    sub            pixq, 4
+    lea       r3strideq, [3*strideq]
+    mov           pix0q, pixq
+    add            pixq, r3strideq
+    TRANSPOSE4x8W_LOAD  PASS8ROWS(pix0q, pixq, strideq, r3strideq)
+    CHROMA_DEBLOCK_BODY 12
+    TRANSPOSE8x4W_STORE PASS8ROWS(pix0q, pixq, strideq, r3strideq), [pw_pixel_max_12]
     RET
 
 ;-----------------------------------------------------------------------------
 ; void ff_hevc_h_loop_filter_chroma(uint8_t *_pix, ptrdiff_t _stride, int *_tc,
 ;                                   uint8_t *_no_p, uint8_t *_no_q);
 ;-----------------------------------------------------------------------------
-cglobal hevc_h_loop_filter_chroma_8, 3, 6, 8
-    mov              r5, r0; pix
-    sub              r5, r1
-    sub              r5, r1
-    movh             m0, [r5];      p1
-    movh             m1, [r5 + r1]; p0
-    movh             m2, [r0];      q0
-    movh             m3, [r0 + r1]; q1
+cglobal hevc_h_loop_filter_chroma_8, 3, 4, 7, pix, stride, tc, pix0
+    mov           pix0q, pixq
+    sub           pix0q, strideq
+    sub           pix0q, strideq
+    movq             m0, [pix0q];    p1
+    movq             m1, [pix0q+strideq]; p0
+    movq             m2, [pixq];    q0
+    movq             m3, [pixq+strideq]; q1
     pxor             m5, m5; zeros reg
     punpcklbw        m0, m5
     punpcklbw        m1, m5
     punpcklbw        m2, m5
     punpcklbw        m3, m5
     CHROMA_DEBLOCK_BODY  8
-    packuswb          m1, m2
-    movh       [r5 + r1], m1
-    movhps          [r0], m1
+    packuswb         m1, m2
+    movh[pix0q+strideq], m1
+    movhps       [pixq], m1
     RET
 
-cglobal hevc_h_loop_filter_chroma_10, 3, 6, 8
-    mov             r5, r0; pix
-    sub             r5, r1
-    sub             r5, r1
-    movdqu          m0, [r5];      p1
-    movdqu          m1, [r5+r1];   p0
-    movdqu          m2, [r0];      q0
-    movdqu          m3, [r0 + r1]; q1
+cglobal hevc_h_loop_filter_chroma_10, 3, 4, 7, pix, stride, tc, pix0
+    mov          pix0q, pixq
+    sub          pix0q, strideq
+    sub          pix0q, strideq
+    movu            m0, [pix0q];    p1
+    movu            m1, [pix0q+strideq]; p0
+    movu            m2, [pixq];    q0
+    movu            m3, [pixq+strideq]; q1
     CHROMA_DEBLOCK_BODY 10
     pxor            m5, m5; zeros reg
-    CLIPW           m1, m5, [pw_pixel_max]
-    CLIPW           m2, m5, [pw_pixel_max]
-    movdqu   [r5 + r1], m1
-    movdqu        [r0], m2
+    CLIPW           m1, m5, [pw_pixel_max_10]
+    CLIPW           m2, m5, [pw_pixel_max_10]
+    movu [pix0q+strideq], m1
+    movu        [pixq], m2
+    RET
+
+cglobal hevc_h_loop_filter_chroma_12, 3, 4, 7, pix, stride, tc, pix0
+    mov          pix0q, pixq
+    sub          pix0q, strideq
+    sub          pix0q, strideq
+    movu            m0, [pix0q];    p1
+    movu            m1, [pix0q+strideq]; p0
+    movu            m2, [pixq];    q0
+    movu            m3, [pixq+strideq]; q1
+    CHROMA_DEBLOCK_BODY 12
+    pxor            m5, m5; zeros reg
+    CLIPW           m1, m5, [pw_pixel_max_12]
+    CLIPW           m2, m5, [pw_pixel_max_12]
+    movu [pix0q+strideq], m1
+    movu        [pixq], m2
     RET
+%endmacro
+
+INIT_XMM sse2
+LOOP_FILTER_CHROMA
+INIT_XMM avx
+LOOP_FILTER_CHROMA
 
 %if ARCH_X86_64
-INIT_XMM ssse3
+%macro LOOP_FILTER_LUMA 0
 ;-----------------------------------------------------------------------------
 ; void ff_hevc_v_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta,
 ;                                 int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 ;-----------------------------------------------------------------------------
-cglobal hevc_v_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc
-    sub              r0, 4
-    lea              r5, [3 * r1]
-    mov              r6, r0
-    add              r0, r5
-    TRANSPOSE8x8B_LOAD  PASS8ROWS(r6, r0, r1, r5)
+cglobal hevc_v_loop_filter_luma_8, 4, 14, 16, pix, stride, beta, tc, pix0, src3stride
+    sub            pixq, 4
+    lea           pix0q, [3 * r1]
+    mov     src3strideq, pixq
+    add            pixq, pix0q
+    TRANSPOSE8x8B_LOAD  PASS8ROWS(src3strideq, pixq, r1, pix0q)
     LUMA_DEBLOCK_BODY 8, v
 .store:
-    TRANSPOSE8x8B_STORE PASS8ROWS(r6, r0, r1, r5)
+    TRANSPOSE8x8B_STORE PASS8ROWS(src3strideq, pixq, r1, pix0q)
 .bypassluma:
     RET
 
-cglobal hevc_v_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc
+cglobal hevc_v_loop_filter_luma_10, 4, 14, 16, pix, stride, beta, tc, pix0, src3stride
     sub            pixq, 8
-    lea              r5, [3 * strideq]
-    mov              r6, pixq
-    add            pixq, r5
-    TRANSPOSE8x8W_LOAD  PASS8ROWS(r6, pixq, strideq, r5)
+    lea           pix0q, [3 * strideq]
+    mov     src3strideq, pixq
+    add            pixq, pix0q
+    TRANSPOSE8x8W_LOAD  PASS8ROWS(src3strideq, pixq, strideq, pix0q)
     LUMA_DEBLOCK_BODY 10, v
 .store:
-    TRANSPOSE8x8W_STORE PASS8ROWS(r6, r0, r1, r5)
+    TRANSPOSE8x8W_STORE PASS8ROWS(src3strideq, pixq, r1, pix0q), [pw_pixel_max_10]
+.bypassluma:
+    RET
+
+cglobal hevc_v_loop_filter_luma_12, 4, 14, 16, pix, stride, beta, tc, pix0, src3stride
+    sub            pixq, 8
+    lea           pix0q, [3 * strideq]
+    mov     src3strideq, pixq
+    add            pixq, pix0q
+    TRANSPOSE8x8W_LOAD  PASS8ROWS(src3strideq, pixq, strideq, pix0q)
+    LUMA_DEBLOCK_BODY 12, v
+.store:
+    TRANSPOSE8x8W_STORE PASS8ROWS(src3strideq, pixq, r1, pix0q), [pw_pixel_max_12]
 .bypassluma:
     RET
 
@@ -756,19 +766,19 @@ cglobal hevc_v_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc
 ; void ff_hevc_h_loop_filter_luma(uint8_t *_pix, ptrdiff_t _stride, int beta,
 ;                                 int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 ;-----------------------------------------------------------------------------
-cglobal hevc_h_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc, count, pix0, src3stride
+cglobal hevc_h_loop_filter_luma_8, 4, 14, 16, pix, stride, beta, tc, pix0, src3stride
     lea     src3strideq, [3 * strideq]
     mov           pix0q, pixq
     sub           pix0q, src3strideq
     sub           pix0q, strideq
-    movdqu           m0, [pix0q];               p3
-    movdqu           m1, [pix0q +     strideq]; p2
-    movdqu           m2, [pix0q + 2 * strideq]; p1
-    movdqu           m3, [pix0q + src3strideq]; p0
-    movdqu           m4, [pixq];                q0
-    movdqu           m5, [pixq +     strideq];  q1
-    movdqu           m6, [pixq + 2 * strideq];  q2
-    movdqu           m7, [pixq + src3strideq];  q3
+    movq             m0, [pix0q];               p3
+    movq             m1, [pix0q +     strideq]; p2
+    movq             m2, [pix0q + 2 * strideq]; p1
+    movq             m3, [pix0q + src3strideq]; p0
+    movq             m4, [pixq];                q0
+    movq             m5, [pixq +     strideq];  q1
+    movq             m6, [pixq + 2 * strideq];  q2
+    movq             m7, [pixq + src3strideq];  q3
     pxor             m8, m8
     punpcklbw        m0, m8
     punpcklbw        m1, m8
@@ -783,16 +793,16 @@ cglobal hevc_h_loop_filter_luma_8, 4, 15, 16, pix, stride, beta, tc, count, pix0
     packuswb          m1, m2
     packuswb          m3, m4
     packuswb          m5, m6
-    movh   [r5 +     r1], m1
-    movhps [r5 + 2 * r1], m1
-    movh   [r5 +     r6], m3
-    movhps [r0         ], m3
-    movh   [r0 +     r1], m5
-    movhps [r0 + 2 * r1], m5
+    movh   [pix0q +     strideq], m1
+    movhps [pix0q + 2 * strideq], m1
+    movh   [pix0q + src3strideq], m3
+    movhps [pixq               ], m3
+    movh   [pixq  +     strideq], m5
+    movhps [pixq  + 2 * strideq], m5
 .bypassluma:
     RET
 
-cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix0, src3stride
+cglobal hevc_h_loop_filter_luma_10, 4, 14, 16, pix, stride, beta, tc, pix0, src3stride
     lea                  src3strideq, [3 * strideq]
     mov                        pix0q, pixq
     sub                        pix0q, src3strideq
@@ -808,12 +818,43 @@ cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix
     LUMA_DEBLOCK_BODY             10, h
 .store:
     pxor                          m8, m8; zeros reg
-    CLIPW                         m1, m8, [pw_pixel_max]
-    CLIPW                         m2, m8, [pw_pixel_max]
-    CLIPW                         m3, m8, [pw_pixel_max]
-    CLIPW                         m4, m8, [pw_pixel_max]
-    CLIPW                         m5, m8, [pw_pixel_max]
-    CLIPW                         m6, m8, [pw_pixel_max]
+    CLIPW                         m1, m8, [pw_pixel_max_10]
+    CLIPW                         m2, m8, [pw_pixel_max_10]
+    CLIPW                         m3, m8, [pw_pixel_max_10]
+    CLIPW                         m4, m8, [pw_pixel_max_10]
+    CLIPW                         m5, m8, [pw_pixel_max_10]
+    CLIPW                         m6, m8, [pw_pixel_max_10]
+    movdqu     [pix0q +     strideq], m1;  p2
+    movdqu     [pix0q + 2 * strideq], m2;  p1
+    movdqu     [pix0q + src3strideq], m3;  p0
+    movdqu     [pixq               ], m4;  q0
+    movdqu     [pixq  +     strideq], m5;  q1
+    movdqu     [pixq  + 2 * strideq], m6;  q2
+.bypassluma:
+    RET
+
+cglobal hevc_h_loop_filter_luma_12, 4, 14, 16, pix, stride, beta, tc, pix0, src3stride
+    lea                  src3strideq, [3 * strideq]
+    mov                        pix0q, pixq
+    sub                        pix0q, src3strideq
+    sub                        pix0q, strideq
+    movdqu                        m0, [pix0q];               p3
+    movdqu                        m1, [pix0q +     strideq]; p2
+    movdqu                        m2, [pix0q + 2 * strideq]; p1
+    movdqu                        m3, [pix0q + src3strideq]; p0
+    movdqu                        m4, [pixq];                q0
+    movdqu                        m5, [pixq  +     strideq]; q1
+    movdqu                        m6, [pixq  + 2 * strideq]; q2
+    movdqu                        m7, [pixq  + src3strideq]; q3
+    LUMA_DEBLOCK_BODY             12, h
+.store:
+    pxor                          m8, m8; zeros reg
+    CLIPW                         m1, m8, [pw_pixel_max_12]
+    CLIPW                         m2, m8, [pw_pixel_max_12]
+    CLIPW                         m3, m8, [pw_pixel_max_12]
+    CLIPW                         m4, m8, [pw_pixel_max_12]
+    CLIPW                         m5, m8, [pw_pixel_max_12]
+    CLIPW                         m6, m8, [pw_pixel_max_12]
     movdqu     [pix0q +     strideq], m1;  p2
     movdqu     [pix0q + 2 * strideq], m2;  p1
     movdqu     [pix0q + src3strideq], m3;  p0
@@ -822,4 +863,13 @@ cglobal hevc_h_loop_filter_luma_10, 4, 15, 16, pix, stride, beta, tc, count, pix
     movdqu     [pixq  + 2 * strideq], m6;  q2
 .bypassluma:
     RET
+
+%endmacro
+
+INIT_XMM sse2
+LOOP_FILTER_LUMA
+INIT_XMM ssse3
+LOOP_FILTER_LUMA
+INIT_XMM avx
+LOOP_FILTER_LUMA
 %endif
diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm
new file mode 100644
index 0000000..481726d
--- /dev/null
+++ b/libavcodec/x86/hevc_idct.asm
@@ -0,0 +1,122 @@
+; /*
+; * SIMD optimized idct functions for HEVC decoding
+; * Copyright (c) 2014 Pierre-Edouard LEPERE
+; * Copyright (c) 2014 James Almer
+; *
+; * This file is part of FFmpeg.
+; *
+; * FFmpeg is free software; you can redistribute it and/or
+; * modify it under the terms of the GNU Lesser General Public
+; * License as published by the Free Software Foundation; either
+; * version 2.1 of the License, or (at your option) any later version.
+; *
+; * FFmpeg is distributed in the hope that it will be useful,
+; * but WITHOUT ANY WARRANTY; without even the implied warranty of
+; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; * Lesser General Public License for more details.
+; *
+; * You should have received a copy of the GNU Lesser General Public
+; * License along with FFmpeg; if not, write to the Free Software
+; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+; */
+%include "libavutil/x86/x86util.asm"
+
+SECTION_TEXT 32
+
+; void ff_hevc_idctHxW_dc_{8,10}_<opt>(int16_t *coeffs)
+; %1 = HxW
+; %2 = number of loops
+; %3 = bitdepth
+%macro IDCT_DC 3
+cglobal hevc_idct%1x%1_dc_%3, 1, 2, 1, coeff, tmp
+    movsx             tmpq, word [coeffq]
+    add               tmpw, ((1 << 14-%3) + 1)
+    sar               tmpw, (15-%3)
+    movd               xm0, tmpd
+    SPLATW              m0, xm0
+    DEFINE_ARGS coeff, cnt
+    mov               cntd, %2
+.loop:
+    mova [coeffq+mmsize*0], m0
+    mova [coeffq+mmsize*1], m0
+    mova [coeffq+mmsize*2], m0
+    mova [coeffq+mmsize*3], m0
+    mova [coeffq+mmsize*4], m0
+    mova [coeffq+mmsize*5], m0
+    mova [coeffq+mmsize*6], m0
+    mova [coeffq+mmsize*7], m0
+    add  coeffq, mmsize*8
+    dec  cntd
+    jg  .loop
+    RET
+%endmacro
+
+; %1 = HxW
+; %2 = bitdepth
+%macro IDCT_DC_NL 2 ; No loop
+cglobal hevc_idct%1x%1_dc_%2, 1, 2, 1, coeff, tmp
+    movsx             tmpq, word [coeffq]
+    add               tmpw, ((1 << 14-%2) + 1)
+    sar               tmpw, (15-%2)
+    movd                m0, tmpd
+    SPLATW              m0, xm0
+    mova [coeffq+mmsize*0], m0
+    mova [coeffq+mmsize*1], m0
+    mova [coeffq+mmsize*2], m0
+    mova [coeffq+mmsize*3], m0
+%if mmsize == 16
+    mova [coeffq+mmsize*4], m0
+    mova [coeffq+mmsize*5], m0
+    mova [coeffq+mmsize*6], m0
+    mova [coeffq+mmsize*7], m0
+%endif
+    RET
+%endmacro
+
+; 8-bit
+INIT_MMX mmxext
+IDCT_DC_NL  4,      8
+IDCT_DC     8,  2,  8
+
+INIT_XMM sse2
+IDCT_DC_NL  8,      8
+IDCT_DC    16,  4,  8
+IDCT_DC    32, 16,  8
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+IDCT_DC    16,  2,  8
+IDCT_DC    32,  8,  8
+%endif ;HAVE_AVX2_EXTERNAL
+
+; 10-bit
+INIT_MMX mmxext
+IDCT_DC_NL  4,     10
+IDCT_DC     8,  2, 10
+
+INIT_XMM sse2
+IDCT_DC_NL  8,     10
+IDCT_DC    16,  4, 10
+IDCT_DC    32, 16, 10
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+IDCT_DC    16,  2, 10
+IDCT_DC    32,  8, 10
+%endif ;HAVE_AVX2_EXTERNAL
+
+; 12-bit
+INIT_MMX mmxext
+IDCT_DC_NL  4,     12
+IDCT_DC     8,  2, 12
+
+INIT_XMM sse2
+IDCT_DC_NL  8,     12
+IDCT_DC    16,  4, 12
+IDCT_DC    32, 16, 12
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+IDCT_DC    16,  2, 12
+IDCT_DC    32,  8, 12
+%endif ;HAVE_AVX2_EXTERNAL
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
new file mode 100644
index 0000000..c525078
--- /dev/null
+++ b/libavcodec/x86/hevc_mc.asm
@@ -0,0 +1,1381 @@
+; /*
+; * Provide SSE luma and chroma mc functions for HEVC decoding
+; * Copyright (c) 2013 Pierre-Edouard LEPERE
+; *
+; * This file is part of FFmpeg.
+; *
+; * FFmpeg is free software; you can redistribute it and/or
+; * modify it under the terms of the GNU Lesser General Public
+; * License as published by the Free Software Foundation; either
+; * version 2.1 of the License, or (at your option) any later version.
+; *
+; * FFmpeg is distributed in the hope that it will be useful,
+; * but WITHOUT ANY WARRANTY; without even the implied warranty of
+; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+; * Lesser General Public License for more details.
+; *
+; * You should have received a copy of the GNU Lesser General Public
+; * License along with FFmpeg; if not, write to the Free Software
+; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+; */
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+pw_8:                   times 8 dw (1 << 9)
+pw_10:                  times 8 dw (1 << 11)
+pw_12:                  times 8 dw (1 << 13)
+pw_bi_8:                times 8 dw (1 << 8)
+pw_bi_10:               times 8 dw (1 << 10)
+pw_bi_12:               times 8 dw (1 << 12)
+max_pixels_10:          times 8  dw ((1 << 10)-1)
+max_pixels_12:          times 8  dw ((1 << 12)-1)
+zero:                   times 4  dd 0
+one_per_32:             times 4  dd 1
+
+SECTION .text
+%macro EPEL_TABLE 4
+hevc_epel_filters_%4_%1 times %2 d%3 -2, 58
+                        times %2 d%3 10, -2
+                        times %2 d%3 -4, 54
+                        times %2 d%3 16, -2
+                        times %2 d%3 -6, 46
+                        times %2 d%3 28, -4
+                        times %2 d%3 -4, 36
+                        times %2 d%3 36, -4
+                        times %2 d%3 -4, 28
+                        times %2 d%3 46, -6
+                        times %2 d%3 -2, 16
+                        times %2 d%3 54, -4
+                        times %2 d%3 -2, 10
+                        times %2 d%3 58, -2
+%endmacro
+
+
+
+EPEL_TABLE  8, 8, b, sse4
+EPEL_TABLE 10, 4, w, sse4
+EPEL_TABLE 12, 4, w, sse4
+
+%macro QPEL_TABLE 4
+hevc_qpel_filters_%4_%1 times %2 d%3  -1,  4
+                        times %2 d%3 -10, 58
+                        times %2 d%3  17, -5
+                        times %2 d%3   1,  0
+                        times %2 d%3  -1,  4
+                        times %2 d%3 -11, 40
+                        times %2 d%3  40,-11
+                        times %2 d%3   4, -1
+                        times %2 d%3   0,  1
+                        times %2 d%3  -5, 17
+                        times %2 d%3  58,-10
+                        times %2 d%3   4, -1
+%endmacro
+
+QPEL_TABLE  8, 8, b, sse4
+QPEL_TABLE 10, 4, w, sse4
+QPEL_TABLE 12, 4, w, sse4
+
+%define hevc_qpel_filters_sse4_14 hevc_qpel_filters_sse4_10
+
+%if ARCH_X86_64
+
+%macro SIMPLE_BILOAD 4   ;width, tab, r1, r2
+%if %1 <= 4
+    movq              %3, [%2]                                              ; load data from source2
+%elif %1 <= 8
+    movdqa            %3, [%2]                                              ; load data from source2
+%elif %1 <= 12
+    movdqa            %3, [%2]                                              ; load data from source2
+    movq              %4, [%2+16]                                           ; load data from source2
+%else
+    movdqa            %3, [%2]                                              ; load data from source2
+    movdqa            %4, [%2+16]                                           ; load data from source2
+%endif
+%endmacro
+
+%macro SIMPLE_LOAD 4    ;width, bitd, tab, r1
+%if %1 == 2 || (%2 == 8 && %1 <= 4)
+    movd              %4, [%3]                                               ; load data from source
+%elif %1 == 4 || (%2 == 8 && %1 <= 8)
+    movq              %4, [%3]                                               ; load data from source
+%else
+    movdqu            %4, [%3]                                               ; load data from source
+%endif
+%endmacro
+
+%macro SIMPLE_8LOAD 5    ;width, bitd, tab, r1, r2
+%if %1 == 2 || (%2 == 8 && %1 <= 4)
+    movq              %4, [%3]                                              ; load data from source2
+%elif %1 == 4 || (%2 == 8 && %1 <= 8)
+    movdqa            %4, [%3]                                              ; load data from source2
+%elif %1 <= 12
+    movdqa            %4, [%3]                                              ; load data from source2
+    movq              %5, [%3+16]                                           ; load data from source2
+%else
+    movdqa            %4, [%3]                                              ; load data from source2
+    movdqa            %5, [%3+16]                                           ; load data from source2
+%endif
+%endmacro
+
+%macro EPEL_FILTER 2-4                            ; bit depth, filter index
+%ifdef PIC
+    lea         rfilterq, [hevc_epel_filters_sse4_%1]
+%else
+    %define rfilterq hevc_epel_filters_sse4_%1
+%endif
+    sub              %2q, 1
+    shl              %2q, 5                      ; multiply by 32
+%if %0 == 2
+    movdqa           m14, [rfilterq + %2q]        ; get 2 first values of filters
+    movdqa           m15, [rfilterq + %2q+16]     ; get 2 last values of filters
+%else
+    movdqa           %3, [rfilterq + %2q]        ; get 2 first values of filters
+    movdqa           %4, [rfilterq + %2q+16]     ; get 2 last values of filters
+%endif
+%endmacro
+
+%macro EPEL_HV_FILTER 1
+%ifdef PIC
+    lea         rfilterq, [hevc_epel_filters_sse4_%1]
+%else
+    %define rfilterq hevc_epel_filters_sse4_%1
+%endif
+    sub              mxq, 1
+    sub              myq, 1
+    shl              mxq, 5                      ; multiply by 32
+    shl              myq, 5                      ; multiply by 32
+    movdqa           m14, [rfilterq + mxq]        ; get 2 first values of filters
+    movdqa           m15, [rfilterq + mxq+16]     ; get 2 last values of filters
+    lea           r3srcq, [srcstrideq*3]
+
+%ifdef PIC
+    lea         rfilterq, [hevc_epel_filters_sse4_10]
+%else
+    %define rfilterq hevc_epel_filters_sse4_10
+%endif
+    movdqa           m12, [rfilterq + myq]        ; get 2 first values of filters
+    movdqa           m13, [rfilterq + myq+16]     ; get 2 last values of filters
+%endmacro
+
+%macro QPEL_FILTER 2
+%ifdef PIC
+    lea         rfilterq, [hevc_qpel_filters_sse4_%1]
+%else
+    %define rfilterq hevc_qpel_filters_sse4_%1
+%endif
+    lea              %2q, [%2q*8-8]
+    movdqa           m12, [rfilterq + %2q*8]       ; get 4 first values of filters
+    movdqa           m13, [rfilterq + %2q*8 + 16]  ; get 4 first values of filters
+    movdqa           m14, [rfilterq + %2q*8 + 32]  ; get 4 first values of filters
+    movdqa           m15, [rfilterq + %2q*8 + 48]  ; get 4 first values of filters
+%endmacro
+
+%macro EPEL_LOAD 4
+%ifdef PIC
+    lea rfilterq, [%2]
+%else
+    %define rfilterq %2
+%endif
+%if (%1 == 8 && %4 <= 4)
+%define %%load movd
+%elif (%1 == 8 && %4 <= 8) || (%1 > 8 && %4 <= 4)
+%define %%load movq
+%else
+%define %%load movdqu
+%endif
+
+    %%load            m0, [rfilterq ]
+%ifnum %3
+    %%load            m1, [rfilterq+  %3]
+    %%load            m2, [rfilterq+2*%3]
+    %%load            m3, [rfilterq+3*%3]
+%else
+    %%load            m1, [rfilterq+  %3q]
+    %%load            m2, [rfilterq+2*%3q]
+    %%load            m3, [rfilterq+r3srcq]
+%endif
+
+%if %1 == 8
+%if %4 > 8
+    SBUTTERFLY        bw, 0, 1, 10
+    SBUTTERFLY        bw, 2, 3, 10
+%else
+    punpcklbw         m0, m1
+    punpcklbw         m2, m3
+%endif
+%else
+%if %4 > 4
+    SBUTTERFLY        wd, 0, 1, 10
+    SBUTTERFLY        wd, 2, 3, 10
+%else
+    punpcklwd         m0, m1
+    punpcklwd         m2, m3
+%endif
+%endif
+%endmacro
+
+
+%macro QPEL_H_LOAD 4
+%assign %%stride (%1+7)/8
+%if %1 == 8
+%if %3 <= 4
+%define %%load movd
+%elif %3 == 8
+%define %%load movq
+%else
+%define %%load movdqu
+%endif
+%else
+%if %3 == 2
+%define %%load movd
+%elif %3 == 4
+%define %%load movq
+%else
+%define %%load movdqu
+%endif
+%endif
+    %%load            m0, [%2-3*%%stride]        ;load data from source
+    %%load            m1, [%2-2*%%stride]
+    %%load            m2, [%2-%%stride  ]
+    %%load            m3, [%2           ]
+    %%load            m4, [%2+%%stride  ]
+    %%load            m5, [%2+2*%%stride]
+    %%load            m6, [%2+3*%%stride]
+    %%load            m7, [%2+4*%%stride]
+
+%if %1 == 8
+%if %3 > 8
+    SBUTTERFLY        wd, 0, 1, %4
+    SBUTTERFLY        wd, 2, 3, %4
+    SBUTTERFLY        wd, 4, 5, %4
+    SBUTTERFLY        wd, 6, 7, %4
+%else
+    punpcklwd         m0, m1
+    punpcklwd         m2, m3
+    punpcklwd         m4, m5
+    punpcklwd         m6, m7
+%endif
+%else
+%if %3 > 4
+    SBUTTERFLY        dq, 0, 1, %4
+    SBUTTERFLY        dq, 2, 3, %4
+    SBUTTERFLY        dq, 4, 5, %4
+    SBUTTERFLY        dq, 6, 7, %4
+%else
+    punpckldq         m0, m1
+    punpckldq         m2, m3
+    punpckldq         m4, m5
+    punpckldq         m6, m7
+%endif
+%endif
+%endmacro
+
+%macro QPEL_V_LOAD 5
+    lea              %5q, [%2]
+    sub              %5q, r3srcq
+    movdqu            m0, [%5q            ]      ;load x- 3*srcstride
+    movdqu            m1, [%5q+   %3q     ]      ;load x- 2*srcstride
+    movdqu            m2, [%5q+ 2*%3q     ]      ;load x-srcstride
+    movdqu            m3, [%2       ]      ;load x
+    movdqu            m4, [%2+   %3q]      ;load x+stride
+    movdqu            m5, [%2+ 2*%3q]      ;load x+2*stride
+    movdqu            m6, [%2+r3srcq]      ;load x+3*stride
+    movdqu            m7, [%2+ 4*%3q]      ;load x+4*stride
+%if %1 == 8
+%if %4 > 8
+    SBUTTERFLY        bw, 0, 1, 8
+    SBUTTERFLY        bw, 2, 3, 8
+    SBUTTERFLY        bw, 4, 5, 8
+    SBUTTERFLY        bw, 6, 7, 8
+%else
+    punpcklbw         m0, m1
+    punpcklbw         m2, m3
+    punpcklbw         m4, m5
+    punpcklbw         m6, m7
+%endif
+%else
+%if %4 > 4
+    SBUTTERFLY        wd, 0, 1, 8
+    SBUTTERFLY        wd, 2, 3, 8
+    SBUTTERFLY        wd, 4, 5, 8
+    SBUTTERFLY        wd, 6, 7, 8
+%else
+    punpcklwd         m0, m1
+    punpcklwd         m2, m3
+    punpcklwd         m4, m5
+    punpcklwd         m6, m7
+%endif
+%endif
+%endmacro
+
+%macro PEL_12STORE2 3
+    movd           [%1], %2
+%endmacro
+%macro PEL_12STORE4 3
+    movq           [%1], %2
+%endmacro
+%macro PEL_12STORE6 3
+    movq           [%1], %2
+    psrldq            %2, 8
+    movd         [%1+8], %2
+%endmacro
+%macro PEL_12STORE8 3
+    movdqa         [%1], %2
+%endmacro
+%macro PEL_12STORE12 3
+    movdqa         [%1], %2
+    movq        [%1+16], %3
+%endmacro
+%macro PEL_12STORE16 3
+    PEL_12STORE8      %1, %2, %3
+    movdqa       [%1+16], %3
+%endmacro
+
+%macro PEL_10STORE2 3
+    movd           [%1], %2
+%endmacro
+%macro PEL_10STORE4 3
+    movq           [%1], %2
+%endmacro
+%macro PEL_10STORE6 3
+    movq           [%1], %2
+    psrldq            %2, 8
+    movd         [%1+8], %2
+%endmacro
+%macro PEL_10STORE8 3
+    movdqa         [%1], %2
+%endmacro
+%macro PEL_10STORE12 3
+    movdqa         [%1], %2
+    movq        [%1+16], %3
+%endmacro
+%macro PEL_10STORE16 3
+    PEL_10STORE8      %1, %2, %3
+    movdqa       [%1+16], %3
+%endmacro
+
+%macro PEL_8STORE2 3
+    pextrw          [%1], %2, 0
+%endmacro
+%macro PEL_8STORE4 3
+    movd            [%1], %2
+%endmacro
+%macro PEL_8STORE6 3
+    movd            [%1], %2
+    pextrw        [%1+4], %2, 2
+%endmacro
+%macro PEL_8STORE8 3
+    movq           [%1], %2
+%endmacro
+%macro PEL_8STORE12 3
+    movq            [%1], %2
+    psrldq            %2, 8
+    movd          [%1+8], %2
+%endmacro
+%macro PEL_8STORE16 3
+    movdqa          [%1], %2
+%endmacro
+
+%macro LOOP_END 4
+    lea              %1q, [%1q+2*%2q]            ; dst += dststride
+    add              %3q, %4q                    ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+%endmacro
+
+
+%macro MC_PIXEL_COMPUTE 2 ;width, bitdepth
+%if %2 == 8
+%if %1 > 8
+    punpckhbw         m1, m0, m2
+    psllw             m1, 14-%2
+%endif
+    punpcklbw         m0, m2
+%endif
+    psllw             m0, 14-%2
+%endmacro
+
+
+%macro EPEL_COMPUTE 4 ; bitdepth, width, filter1, filter2
+%if %1 == 8
+    pmaddubsw         m0, %3   ;x1*c1+x2*c2
+    pmaddubsw         m2, %4   ;x3*c3+x4*c4
+    paddw             m0, m2
+%if %2 > 8
+    pmaddubsw         m1, %3
+    pmaddubsw         m3, %4
+    paddw             m1, m3
+%endif
+%else
+    pmaddwd           m0, %3
+    pmaddwd           m2, %4
+    paddd             m0, m2
+%if %2 > 4
+    pmaddwd           m1, %3
+    pmaddwd           m3, %4
+    paddd             m1, m3
+%endif
+%if %1 != 8
+    psrad             m0, %1-8
+    psrad             m1, %1-8
+%endif
+    packssdw          m0, m1
+%endif
+%endmacro
+
+%macro QPEL_HV_COMPUTE 4     ; width, bitdepth, filter idx
+%ifdef PIC
+    lea         rfilterq, [hevc_qpel_filters_sse4_%2]
+%else
+    %define rfilterq hevc_qpel_filters_sse4_%2
+%endif
+
+%if %2 == 8
+    pmaddubsw         m0, [rfilterq + %3q*8   ]   ;x1*c1+x2*c2
+    pmaddubsw         m2, [rfilterq + %3q*8+16]   ;x3*c3+x4*c4
+    pmaddubsw         m4, [rfilterq + %3q*8+32]   ;x5*c5+x6*c6
+    pmaddubsw         m6, [rfilterq + %3q*8+48]   ;x7*c7+x8*c8
+    paddw             m0, m2
+    paddw             m4, m6
+    paddw             m0, m4
+%else
+    pmaddwd           m0, [rfilterq + %3q*8   ]
+    pmaddwd           m2, [rfilterq + %3q*8+16]
+    pmaddwd           m4, [rfilterq + %3q*8+32]
+    pmaddwd           m6, [rfilterq + %3q*8+48]
+    paddd             m0, m2
+    paddd             m4, m6
+    paddd             m0, m4
+%if %2 != 8
+    psrad             m0, %2-8
+%endif
+%if %1 > 4
+    pmaddwd           m1, [rfilterq + %3q*8   ]
+    pmaddwd           m3, [rfilterq + %3q*8+16]
+    pmaddwd           m5, [rfilterq + %3q*8+32]
+    pmaddwd           m7, [rfilterq + %3q*8+48]
+    paddd             m1, m3
+    paddd             m5, m7
+    paddd             m1, m5
+%if %2 != 8
+    psrad             m1, %2-8
+%endif
+%endif
+    p%4               m0, m1
+%endif
+%endmacro
+
+%macro QPEL_COMPUTE 2     ; width, bitdepth
+%if %2 == 8
+    pmaddubsw         m0, m12   ;x1*c1+x2*c2
+    pmaddubsw         m2, m13   ;x3*c3+x4*c4
+    pmaddubsw         m4, m14   ;x5*c5+x6*c6
+    pmaddubsw         m6, m15   ;x7*c7+x8*c8
+    paddw             m0, m2
+    paddw             m4, m6
+    paddw             m0, m4
+%if %1 > 8
+    pmaddubsw         m1, m12
+    pmaddubsw         m3, m13
+    pmaddubsw         m5, m14
+    pmaddubsw         m7, m15
+    paddw             m1, m3
+    paddw             m5, m7
+    paddw             m1, m5
+%endif
+%else
+    pmaddwd           m0, m12
+    pmaddwd           m2, m13
+    pmaddwd           m4, m14
+    pmaddwd           m6, m15
+    paddd             m0, m2
+    paddd             m4, m6
+    paddd             m0, m4
+%if %2 != 8
+    psrad             m0, %2-8
+%endif
+%if %1 > 4
+    pmaddwd           m1, m12
+    pmaddwd           m3, m13
+    pmaddwd           m5, m14
+    pmaddwd           m7, m15
+    paddd             m1, m3
+    paddd             m5, m7
+    paddd             m1, m5
+%if %2 != 8
+    psrad             m1, %2-8
+%endif
+%endif
+%endif
+%endmacro
+
+%macro BI_COMPUTE 7     ; width, bitd, src1l, src1h, scr2l, scr2h, pw
+    paddsw            %3, %5
+%if %1 > 8
+    paddsw            %4, %6
+%endif
+    UNI_COMPUTE       %1, %2, %3, %4, %7
+%endmacro
+
+%macro UNI_COMPUTE 5
+    pmulhrsw          %3, %5
+%if %1 > 8 || (%2 > 8 && %1 > 4)
+    pmulhrsw          %4, %5
+%endif
+%if %2 == 8
+    packuswb          %3, %4
+%else
+    pminsw            %3, [max_pixels_%2]
+    pmaxsw            %3, [zero]
+%if %1 > 8
+    pminsw            %4, [max_pixels_%2]
+    pmaxsw            %4, [zero]
+%endif
+%endif
+%endmacro
+
+INIT_XMM sse4                                    ; adds ff_ and _sse4 to function name
+; ******************************
+; void put_hevc_mc_pixels(int16_t *dst, ptrdiff_t dststride,
+;                         uint8_t *_src, ptrdiff_t _srcstride,
+;                         int height, int mx, int my)
+; ******************************
+
+%macro HEVC_PUT_HEVC_PEL_PIXELS 2
+cglobal hevc_put_hevc_pel_pixels%1_%2, 5, 5, 3, dst, dststride, src, srcstride,height
+    pxor               m2, m2
+.loop
+    SIMPLE_LOAD       %1, %2, srcq, m0
+    MC_PIXEL_COMPUTE  %1, %2
+    PEL_10STORE%1     dstq, m0, m1
+    LOOP_END         dst, dststride, src, srcstride
+    RET
+
+cglobal hevc_put_hevc_uni_pel_pixels%1_%2, 5, 5, 2, dst, dststride, src, srcstride,height
+.loop
+    SIMPLE_LOAD       %1, %2, srcq, m0
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+cglobal hevc_put_hevc_bi_pel_pixels%1_%2, 7, 7, 6, dst, dststride, src, srcstride, src2, src2stride,height
+    pxor              m2, m2
+    movdqa            m5, [pw_bi_%2]
+.loop
+    SIMPLE_LOAD       %1, %2, srcq, m0
+    SIMPLE_BILOAD     %1, src2q, m3, m4
+    MC_PIXEL_COMPUTE  %1, %2
+    BI_COMPUTE        %1, %2, m0, m1, m3, m4, m5
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    lea            src2q, [src2q+2*src2strideq]  ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+%endmacro
+
+
+; ******************************
+; void put_hevc_epel_hX(int16_t *dst, ptrdiff_t dststride,
+;                       uint8_t *_src, ptrdiff_t _srcstride,
+;                       int width, int height, int mx, int my,
+;                       int16_t* mcbuffer)
+; ******************************
+
+
+%macro HEVC_PUT_HEVC_EPEL 2
+cglobal hevc_put_hevc_epel_h%1_%2, 6, 7, 6, dst, dststride, src, srcstride, height, mx, rfilter
+%assign %%stride ((%2 + 7)/8)
+    EPEL_FILTER       %2, mx, m4, m5
+.loop
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m4, m5
+    PEL_10STORE%1      dstq, m0, m1
+    LOOP_END         dst, dststride, src, srcstride
+    RET
+
+cglobal hevc_put_hevc_uni_epel_h%1_%2, 6, 7, 7, dst, dststride, src, srcstride, height, mx, rfilter
+%assign %%stride ((%2 + 7)/8)
+    movdqa            m6, [pw_%2]
+    EPEL_FILTER       %2, mx, m4, m5
+.loop
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m4, m5
+    UNI_COMPUTE       %1, %2, m0, m1, m6
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+cglobal hevc_put_hevc_bi_epel_h%1_%2, 8, 9, 7, dst, dststride, src, srcstride, src2, src2stride,height, mx, rfilter
+    movdqa            m6, [pw_bi_%2]
+    EPEL_FILTER       %2, mx, m4, m5
+.loop
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m4, m5
+    SIMPLE_BILOAD     %1, src2q, m2, m3
+    BI_COMPUTE        %1, %2, m0, m1, m2, m3, m6
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    lea            src2q, [src2q+2*src2strideq]  ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+; ******************************
+; void put_hevc_epel_v(int16_t *dst, ptrdiff_t dststride,
+;                      uint8_t *_src, ptrdiff_t _srcstride,
+;                      int width, int height, int mx, int my,
+;                      int16_t* mcbuffer)
+; ******************************
+
+cglobal hevc_put_hevc_epel_v%1_%2, 7, 8, 6, dst, dststride, src, srcstride, height, r3src, my, rfilter
+    lea           r3srcq, [srcstrideq*3]
+    sub             srcq, srcstrideq
+    EPEL_FILTER       %2, my, m4, m5
+.loop
+    EPEL_LOAD         %2, srcq, srcstride, %1
+    EPEL_COMPUTE      %2, %1, m4, m5
+    PEL_10STORE%1     dstq, m0, m1
+    LOOP_END          dst, dststride, src, srcstride
+    RET
+
+cglobal hevc_put_hevc_uni_epel_v%1_%2, 7, 8, 7, dst, dststride, src, srcstride, height, r3src, my, rfilter
+    lea           r3srcq, [srcstrideq*3]
+    movdqa            m6, [pw_%2]
+    sub             srcq, srcstrideq
+    EPEL_FILTER       %2, my, m4, m5
+.loop
+    EPEL_LOAD         %2, srcq, srcstride, %1
+    EPEL_COMPUTE      %2, %1, m4, m5
+    UNI_COMPUTE       %1, %2, m0, m1, m6
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+
+cglobal hevc_put_hevc_bi_epel_v%1_%2, 9, 10, 7, dst, dststride, src, srcstride, src2, src2stride,height, r3src, my, rfilter
+    lea           r3srcq, [srcstrideq*3]
+    movdqa            m6, [pw_bi_%2]
+    sub             srcq, srcstrideq
+    EPEL_FILTER       %2, my, m4, m5
+.loop
+    EPEL_LOAD         %2, srcq, srcstride, %1
+    EPEL_COMPUTE      %2, %1, m4, m5
+    SIMPLE_BILOAD     %1, src2q, m2, m3
+    BI_COMPUTE        %1, %2, m0, m1, m2, m3, m6
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    lea            src2q, [src2q+2*src2strideq]  ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+%endmacro
+
+
+; ******************************
+; void put_hevc_epel_hv(int16_t *dst, ptrdiff_t dststride,
+;                       uint8_t *_src, ptrdiff_t _srcstride,
+;                       int width, int height, int mx, int my)
+; ******************************
+
+%macro HEVC_PUT_HEVC_EPEL_HV 2
+cglobal hevc_put_hevc_epel_hv%1_%2, 7, 9, 12 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter
+%assign %%stride ((%2 + 7)/8)
+    sub             srcq, srcstrideq
+    EPEL_HV_FILTER    %2
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m4, m0
+    add             srcq, srcstrideq
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m5, m0
+    add             srcq, srcstrideq
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m6, m0
+    add             srcq, srcstrideq
+.loop
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m7, m0
+    punpcklwd         m0, m4, m5
+    punpcklwd         m2, m6, m7
+%if %1 > 4
+    punpckhwd         m1, m4, m5
+    punpckhwd         m3, m6, m7
+%endif
+    EPEL_COMPUTE      14, %1, m12, m13
+    PEL_10STORE%1     dstq, m0, m1
+    movdqa            m4, m5
+    movdqa            m5, m6
+    movdqa            m6, m7
+    LOOP_END         dst, dststride, src, srcstride
+    RET
+
+cglobal hevc_put_hevc_uni_epel_hv%1_%2, 7, 9, 12 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter
+%assign %%stride ((%2 + 7)/8)
+    sub             srcq, srcstrideq
+    EPEL_HV_FILTER    %2
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m4, m0
+    add             srcq, srcstrideq
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m5, m0
+    add             srcq, srcstrideq
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m6, m0
+    add             srcq, srcstrideq
+.loop
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m7, m0
+    punpcklwd         m0, m4, m5
+    punpcklwd         m2, m6, m7
+%if %1 > 4
+    punpckhwd         m1, m4, m5
+    punpckhwd         m3, m6, m7
+%endif
+    EPEL_COMPUTE      14, %1, m12, m13
+    UNI_COMPUTE       %1, %2, m0, m1, [pw_%2]
+    PEL_%2STORE%1   dstq, m0, m1
+    movdqa            m4, m5
+    movdqa            m5, m6
+    movdqa            m6, m7
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+
+cglobal hevc_put_hevc_bi_epel_hv%1_%2, 9, 11, 16, dst, dststride, src, srcstride, src2, src2stride, height, mx, my, r3src, rfilter
+%assign %%stride ((%2 + 7)/8)
+    sub             srcq, srcstrideq
+    EPEL_HV_FILTER    %2
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m4, m0
+    add             srcq, srcstrideq
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m5, m0
+    add             srcq, srcstrideq
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m6, m0
+    add             srcq, srcstrideq
+.loop
+    EPEL_LOAD         %2, srcq-%%stride, %%stride, %1
+    EPEL_COMPUTE      %2, %1, m14, m15
+    SWAP              m7, m0
+    punpcklwd         m0, m4, m5
+    punpcklwd         m2, m6, m7
+%if %1 > 4
+    punpckhwd         m1, m4, m5
+    punpckhwd         m3, m6, m7
+%endif
+    EPEL_COMPUTE      14, %1, m12, m13
+    SIMPLE_BILOAD     %1, src2q, m8, m9
+    BI_COMPUTE        %1, %2, m0, m1, m8, m9, [pw_bi_%2]
+    PEL_%2STORE%1   dstq, m0, m1
+    movdqa            m4, m5
+    movdqa            m5, m6
+    movdqa            m6, m7
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    lea            src2q, [src2q+2*src2strideq]  ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+%endmacro
+
+; ******************************
+; void put_hevc_qpel_hX_X_X(int16_t *dst, ptrdiff_t dststride,
+;                       uint8_t *_src, ptrdiff_t _srcstride,
+;                       int width, int height, int mx, int my)
+; ******************************
+
+%macro HEVC_PUT_HEVC_QPEL 2
+cglobal hevc_put_hevc_qpel_h%1_%2, 6, 7, 15 , dst, dststride, src, srcstride, height, mx, rfilter
+    QPEL_FILTER       %2, mx
+.loop
+    QPEL_H_LOAD       %2, srcq, %1, 10
+    QPEL_COMPUTE      %1, %2
+%if %2 > 8
+    packssdw          m0, m1
+%endif
+    PEL_10STORE%1     dstq, m0, m1
+    LOOP_END          dst, dststride, src, srcstride
+    RET
+
+cglobal hevc_put_hevc_uni_qpel_h%1_%2, 6, 7, 15 , dst, dststride, src, srcstride, height, mx, rfilter
+    movdqa            m9, [pw_%2]
+    QPEL_FILTER       %2, mx
+.loop
+    QPEL_H_LOAD       %2, srcq, %1, 10
+    QPEL_COMPUTE      %1, %2
+%if %2 > 8
+    packssdw          m0, m1
+%endif
+    UNI_COMPUTE       %1, %2, m0, m1, m9
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+cglobal hevc_put_hevc_bi_qpel_h%1_%2, 8, 9, 16 , dst, dststride, src, srcstride, src2, src2stride, height, mx, rfilter
+    movdqa            m9, [pw_bi_%2]
+    QPEL_FILTER       %2, mx
+.loop
+    QPEL_H_LOAD       %2, srcq, %1, 10
+    QPEL_COMPUTE      %1, %2
+%if %2 > 8
+    packssdw          m0, m1
+%endif
+    SIMPLE_BILOAD     %1, src2q, m10, m11
+    BI_COMPUTE        %1, %2, m0, m1, m10, m11, m9
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    lea            src2q, [src2q+2*src2strideq]  ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+
+; ******************************
+; void put_hevc_qpel_vX_X_X(int16_t *dst, ptrdiff_t dststride,
+;                       uint8_t *_src, ptrdiff_t _srcstride,
+;                       int width, int height, int mx, int my)
+; ******************************
+
+cglobal hevc_put_hevc_qpel_v%1_%2, 7, 9, 15, dst, dststride, src, srcstride, height, r3src, my, rfilter
+    lea           r3srcq, [srcstrideq*3]
+    QPEL_FILTER       %2, my
+.loop
+    QPEL_V_LOAD       %2, srcq, srcstride, %1, r8
+    QPEL_COMPUTE      %1, %2
+%if %2 > 8
+    packssdw          m0, m1
+%endif
+    PEL_10STORE%1     dstq, m0, m1
+    LOOP_END         dst, dststride, src, srcstride
+    RET
+
+cglobal hevc_put_hevc_uni_qpel_v%1_%2, 7, 9, 15, dst, dststride, src, srcstride, height, r3src, my, rfilter
+    movdqa            m9, [pw_%2]
+    lea           r3srcq, [srcstrideq*3]
+    QPEL_FILTER       %2, my
+.loop
+    QPEL_V_LOAD       %2, srcq, srcstride, %1, r8
+    QPEL_COMPUTE      %1, %2
+%if %2 > 8
+    packusdw          m0, m1
+%endif
+    UNI_COMPUTE       %1, %2, m0, m1, m9
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+cglobal hevc_put_hevc_bi_qpel_v%1_%2, 9, 11, 16, dst, dststride, src, srcstride, src2, src2stride, height, r3src, my, rfilter
+    movdqa            m9, [pw_bi_%2]
+    lea           r3srcq, [srcstrideq*3]
+    QPEL_FILTER       %2, my
+.loop
+    SIMPLE_BILOAD     %1, src2q, m10, m11
+    QPEL_V_LOAD       %2, srcq, srcstride, %1, r10
+    QPEL_COMPUTE      %1, %2
+%if %2 > 8
+    packssdw          m0, m1
+%endif
+    BI_COMPUTE        %1, %2, m0, m1, m10, m11, m9
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    lea            src2q, [src2q+2*src2strideq]  ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+%endmacro
+
+
+; ******************************
+; void put_hevc_qpel_hvX_X(int16_t *dst, ptrdiff_t dststride,
+;                       uint8_t *_src, ptrdiff_t _srcstride,
+;                       int height, int mx, int my)
+; ******************************
+%macro HEVC_PUT_HEVC_QPEL_HV 2
+cglobal hevc_put_hevc_qpel_hv%1_%2, 7, 9, 12 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter
+    lea              mxq, [mxq*8-8]
+    lea              myq, [myq*8-8]
+    lea           r3srcq, [srcstrideq*3]
+    sub             srcq, r3srcq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP              m8, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP              m9, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m10, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m11, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m12, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m13, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m14, m0
+    add             srcq, srcstrideq
+.loop
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m15, m0
+    punpcklwd         m0, m8, m9
+    punpcklwd         m2, m10, m11
+    punpcklwd         m4, m12, m13
+    punpcklwd         m6, m14, m15
+%if %1 > 4
+    punpckhwd         m1, m8, m9
+    punpckhwd         m3, m10, m11
+    punpckhwd         m5, m12, m13
+    punpckhwd         m7, m14, m15
+%endif
+    QPEL_HV_COMPUTE   %1, 14, my, ackssdw
+    PEL_10STORE%1     dstq, m0, m1
+%if %1 <= 4
+    movq              m8, m9
+    movq              m9, m10
+    movq             m10, m11
+    movq             m11, m12
+    movq             m12, m13
+    movq             m13, m14
+    movq             m14, m15
+%else
+    movdqa            m8, m9
+    movdqa            m9, m10
+    movdqa           m10, m11
+    movdqa           m11, m12
+    movdqa           m12, m13
+    movdqa           m13, m14
+    movdqa           m14, m15
+%endif
+    LOOP_END         dst, dststride, src, srcstride
+    RET
+
+cglobal hevc_put_hevc_uni_qpel_hv%1_%2, 7, 9, 12 , dst, dststride, src, srcstride, height, mx, my, r3src, rfilter
+    lea              mxq, [mxq*8-8]
+    lea              myq, [myq*8-8]
+    lea           r3srcq, [srcstrideq*3]
+    sub             srcq, r3srcq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP              m8, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP              m9, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m10, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m11, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m12, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m13, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m14, m0
+    add             srcq, srcstrideq
+.loop
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m15, m0
+    punpcklwd         m0, m8, m9
+    punpcklwd         m2, m10, m11
+    punpcklwd         m4, m12, m13
+    punpcklwd         m6, m14, m15
+%if %1 > 4
+    punpckhwd         m1, m8, m9
+    punpckhwd         m3, m10, m11
+    punpckhwd         m5, m12, m13
+    punpckhwd         m7, m14, m15
+%endif
+    QPEL_HV_COMPUTE   %1, 14, my, ackusdw
+    UNI_COMPUTE       %1, %2, m0, m1, [pw_%2]
+    PEL_%2STORE%1   dstq, m0, m1
+
+%if %1 <= 4
+    movq              m8, m9
+    movq              m9, m10
+    movq             m10, m11
+    movq             m11, m12
+    movq             m12, m13
+    movq             m13, m14
+    movq             m14, m15
+%else
+    movdqa            m8, m9
+    movdqa            m9, m10
+    movdqa           m10, m11
+    movdqa           m11, m12
+    movdqa           m12, m13
+    movdqa           m13, m14
+    movdqa           m14, m15
+%endif
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+cglobal hevc_put_hevc_bi_qpel_hv%1_%2, 9, 11, 16, dst, dststride, src, srcstride, src2, src2stride, height, mx, my, r3src, rfilter
+    lea              mxq, [mxq*8-8]
+    lea              myq, [myq*8-8]
+    lea           r3srcq, [srcstrideq*3]
+    sub             srcq, r3srcq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP              m8, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP              m9, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m10, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m11, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m12, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m13, m0
+    add             srcq, srcstrideq
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m14, m0
+    add             srcq, srcstrideq
+.loop
+    QPEL_H_LOAD       %2, srcq, %1, 15
+    QPEL_HV_COMPUTE   %1, %2, mx, ackssdw
+    SWAP             m15, m0
+    punpcklwd         m0, m8, m9
+    punpcklwd         m2, m10, m11
+    punpcklwd         m4, m12, m13
+    punpcklwd         m6, m14, m15
+%if %1 > 4
+    punpckhwd         m1, m8, m9
+    punpckhwd         m3, m10, m11
+    punpckhwd         m5, m12, m13
+    punpckhwd         m7, m14, m15
+%endif
+    QPEL_HV_COMPUTE   %1, 14, my, ackssdw
+    SIMPLE_BILOAD     %1, src2q, m8, m9 ;m9 not used in this case
+    BI_COMPUTE        %1, %2, m0, m1, m8, m9, [pw_bi_%2]
+    PEL_%2STORE%1   dstq, m0, m1
+
+%if %1 <= 4
+    movq              m8, m9
+    movq              m9, m10
+    movq             m10, m11
+    movq             m11, m12
+    movq             m12, m13
+    movq             m13, m14
+    movq             m14, m15
+%else
+    movdqa            m8, m9
+    movdqa            m9, m10
+    movdqa           m10, m11
+    movdqa           m11, m12
+    movdqa           m12, m13
+    movdqa           m13, m14
+    movdqa           m14, m15
+%endif
+    add             dstq, dststrideq             ; dst += dststride
+    add             srcq, srcstrideq             ; src += srcstride
+    lea            src2q, [src2q+2*src2strideq]  ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+%endmacro
+
+%macro WEIGHTING_FUNCS 2
+%if WIN64 || ARCH_X86_32
+cglobal hevc_put_hevc_uni_w%1_%2, 4, 5, 7, dst, dststride, src, srcstride, height, denom, wx, ox
+    mov             r4d, denomm
+%define SHIFT  r4d
+%else
+cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, height, denom, wx, ox
+%define SHIFT  denomd
+%endif
+    lea           SHIFT, [SHIFT+14-%2]          ; shift = 14 - bitd + denom
+%if %1 <= 4
+    pxor             m1, m1
+%endif
+    movd             m2, wxm        ; WX
+    movd             m4, SHIFT      ; shift
+%if %1 <= 4
+    punpcklwd        m2, m1
+%else
+    punpcklwd        m2, m2
+%endif
+    dec           SHIFT
+    movdqu           m5, [one_per_32]
+    movd             m6, SHIFT
+    pshufd           m2, m2, 0
+    mov           SHIFT, oxm
+    pslld            m5, m6
+%if %2 != 8
+    shl           SHIFT, %2-8       ; ox << (bitd - 8)
+%endif
+    movd             m3, SHIFT      ; OX
+    pshufd           m3, m3, 0
+%if WIN64 || ARCH_X86_32
+    mov           SHIFT, heightm
+%endif
+.loop
+   SIMPLE_LOAD        %1, 10, srcq, m0
+%if %1 <= 4
+    punpcklwd         m0, m1
+    pmaddwd           m0, m2
+    paddd             m0, m5
+    psrad             m0, m4
+    paddd             m0, m3
+%else
+    pmulhw            m6, m0, m2
+    pmullw            m0, m2
+    punpckhwd         m1, m0, m6
+    punpcklwd         m0, m6
+    paddd             m0, m5
+    paddd             m1, m5
+    psrad             m0, m4
+    psrad             m1, m4
+    paddd             m0, m3
+    paddd             m1, m3
+%endif
+    packusdw          m0, m1
+%if %2 == 8
+    packuswb          m0, m0
+%else
+    pminsw            m0, [max_pixels_%2]
+%endif
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    lea             srcq, [srcq+2*srcstrideq]      ; src += srcstride
+    dec          heightd                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+
+cglobal hevc_put_hevc_bi_w%1_%2, 6, 7, 10, dst, dststride, src, srcstride, src2, src2stride, height, denom, wx0, wx1, ox0, ox1
+    mov              r6d, denomm
+%if %1 <= 4
+    pxor              m1, m1
+%endif
+    movd              m2, wx0m         ; WX0
+    lea              r6d, [r6d+14-%2]  ; shift = 14 - bitd + denom
+    movd              m3, wx1m         ; WX1
+    movd              m0, r6d          ; shift
+%if %1 <= 4
+    punpcklwd         m2, m1
+    punpcklwd         m3, m1
+%else
+    punpcklwd         m2, m2
+    punpcklwd         m3, m3
+%endif
+    inc              r6d
+    movd              m5, r6d          ; shift+1
+    pshufd            m2, m2, 0
+    mov              r6d, ox0m
+    pshufd            m3, m3, 0
+    add              r6d, ox1m
+%if %2 != 8
+    shl              r6d, %2-8         ; ox << (bitd - 8)
+%endif
+    inc              r6d
+    movd              m4, r6d          ; offset
+    pshufd            m4, m4, 0
+    mov              r6d, heightm
+    pslld             m4, m0
+
+.loop
+   SIMPLE_LOAD        %1, 10, srcq,  m0
+   SIMPLE_LOAD        %1, 10, src2q, m8
+%if %1 <= 4
+    punpcklwd         m0, m1
+    punpcklwd         m8, m1
+    pmaddwd           m0, m3
+    pmaddwd           m8, m2
+    paddd             m0, m4
+    paddd             m0, m8
+    psrad             m0, m5
+%else
+    pmulhw            m6, m0, m3
+    pmullw            m0, m3
+    pmulhw            m7, m8, m2
+    pmullw            m8, m2
+    punpckhwd         m1, m0, m6
+    punpcklwd         m0, m6
+    punpckhwd         m9, m8, m7
+    punpcklwd         m8, m7
+    paddd             m0, m8
+    paddd             m1, m9
+    paddd             m0, m4
+    paddd             m1, m4
+    psrad             m0, m5
+    psrad             m1, m5
+%endif
+    packusdw          m0, m1
+%if %2 == 8
+    packuswb          m0, m0
+%else
+    pminsw            m0, [max_pixels_%2]
+%endif
+    PEL_%2STORE%1   dstq, m0, m1
+    add             dstq, dststrideq             ; dst += dststride
+    lea             srcq, [srcq+2*srcstrideq]      ; src += srcstride
+    lea            src2q, [src2q+2*src2strideq]      ; src2 += srcstride
+    dec              r6d                         ; cmp height
+    jnz               .loop                      ; height loop
+    RET
+%endmacro
+
+WEIGHTING_FUNCS 2, 8
+WEIGHTING_FUNCS 4, 8
+WEIGHTING_FUNCS 6, 8
+WEIGHTING_FUNCS 8, 8
+
+WEIGHTING_FUNCS 2, 10
+WEIGHTING_FUNCS 4, 10
+WEIGHTING_FUNCS 6, 10
+WEIGHTING_FUNCS 8, 10
+
+WEIGHTING_FUNCS 2, 12
+WEIGHTING_FUNCS 4, 12
+WEIGHTING_FUNCS 6, 12
+WEIGHTING_FUNCS 8, 12
+
+HEVC_PUT_HEVC_PEL_PIXELS  2, 8
+HEVC_PUT_HEVC_PEL_PIXELS  4, 8
+HEVC_PUT_HEVC_PEL_PIXELS  6, 8
+HEVC_PUT_HEVC_PEL_PIXELS  8, 8
+HEVC_PUT_HEVC_PEL_PIXELS 12, 8
+HEVC_PUT_HEVC_PEL_PIXELS 16, 8
+
+HEVC_PUT_HEVC_PEL_PIXELS 2, 10
+HEVC_PUT_HEVC_PEL_PIXELS 4, 10
+HEVC_PUT_HEVC_PEL_PIXELS 6, 10
+HEVC_PUT_HEVC_PEL_PIXELS 8, 10
+
+HEVC_PUT_HEVC_PEL_PIXELS 2, 12
+HEVC_PUT_HEVC_PEL_PIXELS 4, 12
+HEVC_PUT_HEVC_PEL_PIXELS 6, 12
+HEVC_PUT_HEVC_PEL_PIXELS 8, 12
+
+HEVC_PUT_HEVC_EPEL 2,  8
+HEVC_PUT_HEVC_EPEL 4,  8
+HEVC_PUT_HEVC_EPEL 6,  8
+HEVC_PUT_HEVC_EPEL 8,  8
+HEVC_PUT_HEVC_EPEL 12, 8
+HEVC_PUT_HEVC_EPEL 16, 8
+
+
+HEVC_PUT_HEVC_EPEL 2, 10
+HEVC_PUT_HEVC_EPEL 4, 10
+HEVC_PUT_HEVC_EPEL 6, 10
+HEVC_PUT_HEVC_EPEL 8, 10
+
+HEVC_PUT_HEVC_EPEL 2, 12
+HEVC_PUT_HEVC_EPEL 4, 12
+HEVC_PUT_HEVC_EPEL 6, 12
+HEVC_PUT_HEVC_EPEL 8, 12
+
+HEVC_PUT_HEVC_EPEL_HV 2,  8
+HEVC_PUT_HEVC_EPEL_HV 4,  8
+HEVC_PUT_HEVC_EPEL_HV 6,  8
+HEVC_PUT_HEVC_EPEL_HV 8,  8
+
+HEVC_PUT_HEVC_EPEL_HV 2, 10
+HEVC_PUT_HEVC_EPEL_HV 4, 10
+HEVC_PUT_HEVC_EPEL_HV 6, 10
+HEVC_PUT_HEVC_EPEL_HV 8, 10
+
+HEVC_PUT_HEVC_EPEL_HV 2, 12
+HEVC_PUT_HEVC_EPEL_HV 4, 12
+HEVC_PUT_HEVC_EPEL_HV 6, 12
+HEVC_PUT_HEVC_EPEL_HV 8, 12
+
+HEVC_PUT_HEVC_QPEL 4,  8
+HEVC_PUT_HEVC_QPEL 8,  8
+HEVC_PUT_HEVC_QPEL 12, 8
+HEVC_PUT_HEVC_QPEL 16, 8
+
+HEVC_PUT_HEVC_QPEL 4, 10
+HEVC_PUT_HEVC_QPEL 8, 10
+
+HEVC_PUT_HEVC_QPEL 4, 12
+HEVC_PUT_HEVC_QPEL 8, 12
+
+HEVC_PUT_HEVC_QPEL_HV 2, 8
+HEVC_PUT_HEVC_QPEL_HV 4, 8
+HEVC_PUT_HEVC_QPEL_HV 6, 8
+HEVC_PUT_HEVC_QPEL_HV 8, 8
+
+HEVC_PUT_HEVC_QPEL_HV 2, 10
+HEVC_PUT_HEVC_QPEL_HV 4, 10
+HEVC_PUT_HEVC_QPEL_HV 6, 10
+HEVC_PUT_HEVC_QPEL_HV 8, 10
+
+HEVC_PUT_HEVC_QPEL_HV 2, 12
+HEVC_PUT_HEVC_QPEL_HV 4, 12
+HEVC_PUT_HEVC_QPEL_HV 6, 12
+HEVC_PUT_HEVC_QPEL_HV 8, 12
+
+%endif ; ARCH_X86_64
diff --git a/libavcodec/x86/hevcdsp.h b/libavcodec/x86/hevcdsp.h
new file mode 100644
index 0000000..4bcc8dc
--- /dev/null
+++ b/libavcodec/x86/hevcdsp.h
@@ -0,0 +1,134 @@
+/*
+ * HEVC video decoder
+ *
+ * Copyright (C) 2012 - 2013 Guillaume Martres
+ * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
+ *
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_HEVCDSP_H
+#define AVCODEC_X86_HEVCDSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+
+#define idct_dc_proto(size, bitd, opt) \
+                void ff_hevc_idct##size##_dc_add_##bitd##_##opt(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
+
+#define PEL_LINK(dst, idx1, idx2, idx3, name, D, opt) \
+dst[idx1][idx2][idx3] = ff_hevc_put_hevc_ ## name ## _ ## D ## _##opt; \
+dst ## _bi[idx1][idx2][idx3] = ff_hevc_put_hevc_bi_ ## name ## _ ## D ## _##opt; \
+dst ## _uni[idx1][idx2][idx3] = ff_hevc_put_hevc_uni_ ## name ## _ ## D ## _##opt; \
+dst ## _uni_w[idx1][idx2][idx3] = ff_hevc_put_hevc_uni_w_ ## name ## _ ## D ## _##opt; \
+dst ## _bi_w[idx1][idx2][idx3] = ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt
+
+
+#define PEL_PROTOTYPE(name, D, opt) \
+void ff_hevc_put_hevc_ ## name ## _ ## D ## _##opt(int16_t *dst, ptrdiff_t dststride,uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,int width); \
+void ff_hevc_put_hevc_bi_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, ptrdiff_t src2stride, int height, intptr_t mx, intptr_t my, int width); \
+void ff_hevc_put_hevc_uni_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width); \
+void ff_hevc_put_hevc_uni_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width); \
+void ff_hevc_put_hevc_bi_w_ ## name ## _ ## D ## _##opt(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride, int16_t *src2, ptrdiff_t src2stride, int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width)
+
+
+///////////////////////////////////////////////////////////////////////////////
+// MC functions
+///////////////////////////////////////////////////////////////////////////////
+
+#define EPEL_PROTOTYPES(fname, bitd, opt) \
+        PEL_PROTOTYPE(fname##4,  bitd, opt); \
+        PEL_PROTOTYPE(fname##6,  bitd, opt); \
+        PEL_PROTOTYPE(fname##8,  bitd, opt); \
+        PEL_PROTOTYPE(fname##12, bitd, opt); \
+        PEL_PROTOTYPE(fname##16, bitd, opt); \
+        PEL_PROTOTYPE(fname##24, bitd, opt); \
+        PEL_PROTOTYPE(fname##32, bitd, opt); \
+        PEL_PROTOTYPE(fname##48, bitd, opt); \
+        PEL_PROTOTYPE(fname##64, bitd, opt)
+
+#define QPEL_PROTOTYPES(fname, bitd, opt) \
+        PEL_PROTOTYPE(fname##4,  bitd, opt); \
+        PEL_PROTOTYPE(fname##8,  bitd, opt); \
+        PEL_PROTOTYPE(fname##12, bitd, opt); \
+        PEL_PROTOTYPE(fname##16, bitd, opt); \
+        PEL_PROTOTYPE(fname##24, bitd, opt); \
+        PEL_PROTOTYPE(fname##32, bitd, opt); \
+        PEL_PROTOTYPE(fname##48, bitd, opt); \
+        PEL_PROTOTYPE(fname##64, bitd, opt)
+
+#define WEIGHTING_PROTOTYPE(width, bitd, opt) \
+void ff_hevc_put_hevc_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, int height, int denom,  int _wx, int _ox); \
+void ff_hevc_put_hevc_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, int16_t *_src2, ptrdiff_t _src2stride, int height, int denom,  int _wx0,  int _wx1, int _ox0, int _ox1)
+
+#define WEIGHTING_PROTOTYPES(bitd, opt) \
+        WEIGHTING_PROTOTYPE(2, bitd, opt); \
+        WEIGHTING_PROTOTYPE(4, bitd, opt); \
+        WEIGHTING_PROTOTYPE(6, bitd, opt); \
+        WEIGHTING_PROTOTYPE(8, bitd, opt); \
+        WEIGHTING_PROTOTYPE(12, bitd, opt); \
+        WEIGHTING_PROTOTYPE(16, bitd, opt); \
+        WEIGHTING_PROTOTYPE(24, bitd, opt); \
+        WEIGHTING_PROTOTYPE(32, bitd, opt); \
+        WEIGHTING_PROTOTYPE(48, bitd, opt); \
+        WEIGHTING_PROTOTYPE(64, bitd, opt)
+
+
+///////////////////////////////////////////////////////////////////////////////
+// QPEL_PIXELS EPEL_PIXELS
+///////////////////////////////////////////////////////////////////////////////
+EPEL_PROTOTYPES(pel_pixels ,  8, sse4);
+EPEL_PROTOTYPES(pel_pixels , 10, sse4);
+EPEL_PROTOTYPES(pel_pixels , 12, sse4);
+///////////////////////////////////////////////////////////////////////////////
+// EPEL
+///////////////////////////////////////////////////////////////////////////////
+EPEL_PROTOTYPES(epel_h ,  8, sse4);
+EPEL_PROTOTYPES(epel_h , 10, sse4);
+EPEL_PROTOTYPES(epel_h , 12, sse4);
+
+EPEL_PROTOTYPES(epel_v ,  8, sse4);
+EPEL_PROTOTYPES(epel_v , 10, sse4);
+EPEL_PROTOTYPES(epel_v , 12, sse4);
+
+EPEL_PROTOTYPES(epel_hv ,  8, sse4);
+EPEL_PROTOTYPES(epel_hv , 10, sse4);
+EPEL_PROTOTYPES(epel_hv , 12, sse4);
+
+///////////////////////////////////////////////////////////////////////////////
+// QPEL
+///////////////////////////////////////////////////////////////////////////////
+QPEL_PROTOTYPES(qpel_h ,  8, sse4);
+QPEL_PROTOTYPES(qpel_h , 10, sse4);
+QPEL_PROTOTYPES(qpel_h , 12, sse4);
+
+QPEL_PROTOTYPES(qpel_v,  8, sse4);
+QPEL_PROTOTYPES(qpel_v, 10, sse4);
+QPEL_PROTOTYPES(qpel_v, 12, sse4);
+
+QPEL_PROTOTYPES(qpel_hv,  8, sse4);
+QPEL_PROTOTYPES(qpel_hv, 10, sse4);
+QPEL_PROTOTYPES(qpel_hv, 12, sse4);
+
+
+WEIGHTING_PROTOTYPES(8, sse4);
+WEIGHTING_PROTOTYPES(10, sse4);
+WEIGHTING_PROTOTYPES(12, sse4);
+
+#endif // AVCODEC_X86_HEVCDSP_H
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 04203c2..828c081 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -2,29 +2,31 @@
  * Copyright (c) 2013 Seppo Tomperi
  * Copyright (c) 2013 - 2014 Pierre-Edouard Lepere
  *
- * This file is part of Libav.
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "config.h"
-
 #include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
-
+#include "libavcodec/get_bits.h" /* required for hevcdsp.h GetBitContext */
 #include "libavcodec/hevcdsp.h"
+#include "libavcodec/x86/hevcdsp.h"
 
 #define LFC_FUNC(DIR, DEPTH, OPT) \
 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
@@ -32,40 +34,570 @@ void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix,
 #define LFL_FUNC(DIR, DEPTH, OPT) \
 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
 
-#define LFC_FUNCS(type, depth) \
-    LFC_FUNC(h, depth, sse2)   \
-    LFC_FUNC(v, depth, sse2)
+#define LFC_FUNCS(type, depth, opt) \
+    LFC_FUNC(h, depth, opt)  \
+    LFC_FUNC(v, depth, opt)
+
+#define LFL_FUNCS(type, depth, opt) \
+    LFL_FUNC(h, depth, opt)  \
+    LFL_FUNC(v, depth, opt)
+
+LFC_FUNCS(uint8_t,   8, sse2)
+LFC_FUNCS(uint8_t,  10, sse2)
+LFC_FUNCS(uint8_t,  12, sse2)
+LFC_FUNCS(uint8_t,   8, avx)
+LFC_FUNCS(uint8_t,  10, avx)
+LFC_FUNCS(uint8_t,  12, avx)
+LFL_FUNCS(uint8_t,   8, sse2)
+LFL_FUNCS(uint8_t,  10, sse2)
+LFL_FUNCS(uint8_t,  12, sse2)
+LFL_FUNCS(uint8_t,   8, ssse3)
+LFL_FUNCS(uint8_t,  10, ssse3)
+LFL_FUNCS(uint8_t,  12, ssse3)
+LFL_FUNCS(uint8_t,   8, avx)
+LFL_FUNCS(uint8_t,  10, avx)
+LFL_FUNCS(uint8_t,  12, avx)
+
+#define IDCT_FUNCS(W, opt) \
+void ff_hevc_idct##W##_dc_8_##opt(int16_t *coeffs); \
+void ff_hevc_idct##W##_dc_10_##opt(int16_t *coeffs); \
+void ff_hevc_idct##W##_dc_12_##opt(int16_t *coeffs)
+
+IDCT_FUNCS(4x4,   mmxext);
+IDCT_FUNCS(8x8,   mmxext);
+IDCT_FUNCS(8x8,   sse2);
+IDCT_FUNCS(16x16, sse2);
+IDCT_FUNCS(32x32, sse2);
+IDCT_FUNCS(16x16, avx2);
+IDCT_FUNCS(32x32, avx2);
+
+#define mc_rep_func(name, bitd, step, W, opt) \
+void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride,                            \
+                                                uint8_t *_src, ptrdiff_t _srcstride, int height,                \
+                                                intptr_t mx, intptr_t my, int width)                            \
+{                                                                                                               \
+    int i;                                                                                                      \
+    uint8_t *src;                                                                                               \
+    int16_t *dst;                                                                                               \
+    for (i = 0; i < W; i += step) {                                                                             \
+        src  = _src + (i * ((bitd + 7) / 8));                                                                   \
+        dst = _dst + i;                                                                                         \
+        ff_hevc_put_hevc_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, height, mx, my, width); \
+    }                                                                                                           \
+}
+#define mc_rep_uni_func(name, bitd, step, W, opt) \
+void ff_hevc_put_hevc_uni_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride,                        \
+                                                    uint8_t *_src, ptrdiff_t _srcstride, int height,            \
+                                                    intptr_t mx, intptr_t my, int width)                        \
+{                                                                                                               \
+    int i;                                                                                                      \
+    uint8_t *src;                                                                                               \
+    uint8_t *dst;                                                                                               \
+    for (i = 0; i < W; i += step) {                                                                             \
+        src = _src + (i * ((bitd + 7) / 8));                                                                    \
+        dst = _dst + (i * ((bitd + 7) / 8));                                                                    \
+        ff_hevc_put_hevc_uni_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride,                     \
+                                                          height, mx, my, width);                               \
+    }                                                                                                           \
+}
+#define mc_rep_bi_func(name, bitd, step, W, opt) \
+void ff_hevc_put_hevc_bi_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, uint8_t *_src,          \
+                                                   ptrdiff_t _srcstride, int16_t* _src2, ptrdiff_t _src2stride, \
+                                                   int height, intptr_t mx, intptr_t my, int width)             \
+{                                                                                                               \
+    int i;                                                                                                      \
+    uint8_t  *src;                                                                                              \
+    uint8_t  *dst;                                                                                              \
+    int16_t  *src2;                                                                                             \
+    for (i = 0; i < W ; i += step) {                                                                            \
+        src  = _src + (i * ((bitd + 7) / 8));                                                                   \
+        dst  = _dst + (i * ((bitd + 7) / 8));                                                                   \
+        src2 = _src2 + i;                                                                                       \
+        ff_hevc_put_hevc_bi_##name##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2,                \
+                                                         _src2stride, height, mx, my, width);                   \
+    }                                                                                                           \
+}
+
+#define mc_rep_funcs(name, bitd, step, W, opt)        \
+    mc_rep_func(name, bitd, step, W, opt);            \
+    mc_rep_uni_func(name, bitd, step, W, opt);        \
+    mc_rep_bi_func(name, bitd, step, W, opt)
+
+
+#if ARCH_X86_64 && HAVE_SSE4_EXTERNAL
+
+mc_rep_funcs(pel_pixels, 8, 16, 64, sse4);
+mc_rep_funcs(pel_pixels, 8, 16, 48, sse4);
+mc_rep_funcs(pel_pixels, 8, 16, 32, sse4);
+mc_rep_funcs(pel_pixels, 8,  8, 24, sse4);
+mc_rep_funcs(pel_pixels,10,  8, 64, sse4);
+mc_rep_funcs(pel_pixels,10,  8, 48, sse4);
+mc_rep_funcs(pel_pixels,10,  8, 32, sse4);
+mc_rep_funcs(pel_pixels,10,  8, 24, sse4);
+mc_rep_funcs(pel_pixels,10,  8, 16, sse4);
+mc_rep_funcs(pel_pixels,10,  4, 12, sse4);
+mc_rep_funcs(pel_pixels,12,  8, 64, sse4);
+mc_rep_funcs(pel_pixels,12,  8, 48, sse4);
+mc_rep_funcs(pel_pixels,12,  8, 32, sse4);
+mc_rep_funcs(pel_pixels,12,  8, 24, sse4);
+mc_rep_funcs(pel_pixels,12,  8, 16, sse4);
+mc_rep_funcs(pel_pixels,12,  4, 12, sse4);
+
+mc_rep_funcs(epel_h, 8, 16, 64, sse4);
+mc_rep_funcs(epel_h, 8, 16, 48, sse4);
+mc_rep_funcs(epel_h, 8, 16, 32, sse4);
+mc_rep_funcs(epel_h, 8,  8, 24, sse4);
+mc_rep_funcs(epel_h,10,  8, 64, sse4);
+mc_rep_funcs(epel_h,10,  8, 48, sse4);
+mc_rep_funcs(epel_h,10,  8, 32, sse4);
+mc_rep_funcs(epel_h,10,  8, 24, sse4);
+mc_rep_funcs(epel_h,10,  8, 16, sse4);
+mc_rep_funcs(epel_h,10,  4, 12, sse4);
+mc_rep_funcs(epel_h,12,  8, 64, sse4);
+mc_rep_funcs(epel_h,12,  8, 48, sse4);
+mc_rep_funcs(epel_h,12,  8, 32, sse4);
+mc_rep_funcs(epel_h,12,  8, 24, sse4);
+mc_rep_funcs(epel_h,12,  8, 16, sse4);
+mc_rep_funcs(epel_h,12,  4, 12, sse4);
+mc_rep_funcs(epel_v, 8, 16, 64, sse4);
+mc_rep_funcs(epel_v, 8, 16, 48, sse4);
+mc_rep_funcs(epel_v, 8, 16, 32, sse4);
+mc_rep_funcs(epel_v, 8,  8, 24, sse4);
+mc_rep_funcs(epel_v,10,  8, 64, sse4);
+mc_rep_funcs(epel_v,10,  8, 48, sse4);
+mc_rep_funcs(epel_v,10,  8, 32, sse4);
+mc_rep_funcs(epel_v,10,  8, 24, sse4);
+mc_rep_funcs(epel_v,10,  8, 16, sse4);
+mc_rep_funcs(epel_v,10,  4, 12, sse4);
+mc_rep_funcs(epel_v,12,  8, 64, sse4);
+mc_rep_funcs(epel_v,12,  8, 48, sse4);
+mc_rep_funcs(epel_v,12,  8, 32, sse4);
+mc_rep_funcs(epel_v,12,  8, 24, sse4);
+mc_rep_funcs(epel_v,12,  8, 16, sse4);
+mc_rep_funcs(epel_v,12,  4, 12, sse4);
+mc_rep_funcs(epel_hv, 8,  8, 64, sse4);
+mc_rep_funcs(epel_hv, 8,  8, 48, sse4);
+mc_rep_funcs(epel_hv, 8,  8, 32, sse4);
+mc_rep_funcs(epel_hv, 8,  8, 24, sse4);
+mc_rep_funcs(epel_hv, 8,  8, 16, sse4);
+mc_rep_funcs(epel_hv, 8,  4, 12, sse4);
+mc_rep_funcs(epel_hv,10,  8, 64, sse4);
+mc_rep_funcs(epel_hv,10,  8, 48, sse4);
+mc_rep_funcs(epel_hv,10,  8, 32, sse4);
+mc_rep_funcs(epel_hv,10,  8, 24, sse4);
+mc_rep_funcs(epel_hv,10,  8, 16, sse4);
+mc_rep_funcs(epel_hv,10,  4, 12, sse4);
+mc_rep_funcs(epel_hv,12,  8, 64, sse4);
+mc_rep_funcs(epel_hv,12,  8, 48, sse4);
+mc_rep_funcs(epel_hv,12,  8, 32, sse4);
+mc_rep_funcs(epel_hv,12,  8, 24, sse4);
+mc_rep_funcs(epel_hv,12,  8, 16, sse4);
+mc_rep_funcs(epel_hv,12,  4, 12, sse4);
+
+mc_rep_funcs(qpel_h, 8, 16, 64, sse4);
+mc_rep_funcs(qpel_h, 8, 16, 48, sse4);
+mc_rep_funcs(qpel_h, 8, 16, 32, sse4);
+mc_rep_funcs(qpel_h, 8,  8, 24, sse4);
+mc_rep_funcs(qpel_h,10,  8, 64, sse4);
+mc_rep_funcs(qpel_h,10,  8, 48, sse4);
+mc_rep_funcs(qpel_h,10,  8, 32, sse4);
+mc_rep_funcs(qpel_h,10,  8, 24, sse4);
+mc_rep_funcs(qpel_h,10,  8, 16, sse4);
+mc_rep_funcs(qpel_h,10,  4, 12, sse4);
+mc_rep_funcs(qpel_h,12,  8, 64, sse4);
+mc_rep_funcs(qpel_h,12,  8, 48, sse4);
+mc_rep_funcs(qpel_h,12,  8, 32, sse4);
+mc_rep_funcs(qpel_h,12,  8, 24, sse4);
+mc_rep_funcs(qpel_h,12,  8, 16, sse4);
+mc_rep_funcs(qpel_h,12,  4, 12, sse4);
+mc_rep_funcs(qpel_v, 8, 16, 64, sse4);
+mc_rep_funcs(qpel_v, 8, 16, 48, sse4);
+mc_rep_funcs(qpel_v, 8, 16, 32, sse4);
+mc_rep_funcs(qpel_v, 8,  8, 24, sse4);
+mc_rep_funcs(qpel_v,10,  8, 64, sse4);
+mc_rep_funcs(qpel_v,10,  8, 48, sse4);
+mc_rep_funcs(qpel_v,10,  8, 32, sse4);
+mc_rep_funcs(qpel_v,10,  8, 24, sse4);
+mc_rep_funcs(qpel_v,10,  8, 16, sse4);
+mc_rep_funcs(qpel_v,10,  4, 12, sse4);
+mc_rep_funcs(qpel_v,12,  8, 64, sse4);
+mc_rep_funcs(qpel_v,12,  8, 48, sse4);
+mc_rep_funcs(qpel_v,12,  8, 32, sse4);
+mc_rep_funcs(qpel_v,12,  8, 24, sse4);
+mc_rep_funcs(qpel_v,12,  8, 16, sse4);
+mc_rep_funcs(qpel_v,12,  4, 12, sse4);
+mc_rep_funcs(qpel_hv, 8,  8, 64, sse4);
+mc_rep_funcs(qpel_hv, 8,  8, 48, sse4);
+mc_rep_funcs(qpel_hv, 8,  8, 32, sse4);
+mc_rep_funcs(qpel_hv, 8,  8, 24, sse4);
+mc_rep_funcs(qpel_hv, 8,  8, 16, sse4);
+mc_rep_funcs(qpel_hv, 8,  4, 12, sse4);
+mc_rep_funcs(qpel_hv,10,  8, 64, sse4);
+mc_rep_funcs(qpel_hv,10,  8, 48, sse4);
+mc_rep_funcs(qpel_hv,10,  8, 32, sse4);
+mc_rep_funcs(qpel_hv,10,  8, 24, sse4);
+mc_rep_funcs(qpel_hv,10,  8, 16, sse4);
+mc_rep_funcs(qpel_hv,10,  4, 12, sse4);
+mc_rep_funcs(qpel_hv,12,  8, 64, sse4);
+mc_rep_funcs(qpel_hv,12,  8, 48, sse4);
+mc_rep_funcs(qpel_hv,12,  8, 32, sse4);
+mc_rep_funcs(qpel_hv,12,  8, 24, sse4);
+mc_rep_funcs(qpel_hv,12,  8, 16, sse4);
+mc_rep_funcs(qpel_hv,12,  4, 12, sse4);
+
+#define mc_rep_uni_w(bitd, step, W, opt) \
+void ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride,\
+                                               int height, int denom,  int _wx, int _ox)                                \
+{                                                                                                                       \
+    int i;                                                                                                              \
+    int16_t *src;                                                                                                       \
+    uint8_t *dst;                                                                                                       \
+    for (i = 0; i < W; i += step) {                                                                                     \
+        src= _src + i;                                                                                                  \
+        dst= _dst + (i * ((bitd + 7) / 8));                                                                             \
+        ff_hevc_put_hevc_uni_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride,                                  \
+                                                     height, denom, _wx, _ox);                                          \
+    }                                                                                                                   \
+}
+
+mc_rep_uni_w(8, 6, 12, sse4);
+mc_rep_uni_w(8, 8, 16, sse4);
+mc_rep_uni_w(8, 8, 24, sse4);
+mc_rep_uni_w(8, 8, 32, sse4);
+mc_rep_uni_w(8, 8, 48, sse4);
+mc_rep_uni_w(8, 8, 64, sse4);
+
+mc_rep_uni_w(10, 6, 12, sse4);
+mc_rep_uni_w(10, 8, 16, sse4);
+mc_rep_uni_w(10, 8, 24, sse4);
+mc_rep_uni_w(10, 8, 32, sse4);
+mc_rep_uni_w(10, 8, 48, sse4);
+mc_rep_uni_w(10, 8, 64, sse4);
+
+mc_rep_uni_w(12, 6, 12, sse4);
+mc_rep_uni_w(12, 8, 16, sse4);
+mc_rep_uni_w(12, 8, 24, sse4);
+mc_rep_uni_w(12, 8, 32, sse4);
+mc_rep_uni_w(12, 8, 48, sse4);
+mc_rep_uni_w(12, 8, 64, sse4);
+
+#define mc_rep_bi_w(bitd, step, W, opt) \
+void ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, int16_t *_src, ptrdiff_t _srcstride, \
+                                              int16_t *_src2, ptrdiff_t _src2stride, int height,                        \
+                                              int denom,  int _wx0,  int _wx1, int _ox0, int _ox1)                      \
+{                                                                                                                       \
+    int i;                                                                                                              \
+    int16_t *src;                                                                                                       \
+    int16_t *src2;                                                                                                      \
+    uint8_t *dst;                                                                                                       \
+    for (i = 0; i < W; i += step) {                                                                                     \
+        src  = _src  + i;                                                                                               \
+        src2 = _src2 + i;                                                                                               \
+        dst  = _dst  + (i * ((bitd + 7) / 8));                                                                          \
+        ff_hevc_put_hevc_bi_w##step##_##bitd##_##opt(dst, dststride, src, _srcstride, src2, _src2stride,                \
+                                                    height, denom, _wx0, _wx1, _ox0, _ox1);                             \
+    }                                                                                                                   \
+}
+
+mc_rep_bi_w(8, 6, 12, sse4);
+mc_rep_bi_w(8, 8, 16, sse4);
+mc_rep_bi_w(8, 8, 24, sse4);
+mc_rep_bi_w(8, 8, 32, sse4);
+mc_rep_bi_w(8, 8, 48, sse4);
+mc_rep_bi_w(8, 8, 64, sse4);
+
+mc_rep_bi_w(10, 6, 12, sse4);
+mc_rep_bi_w(10, 8, 16, sse4);
+mc_rep_bi_w(10, 8, 24, sse4);
+mc_rep_bi_w(10, 8, 32, sse4);
+mc_rep_bi_w(10, 8, 48, sse4);
+mc_rep_bi_w(10, 8, 64, sse4);
+
+mc_rep_bi_w(12, 6, 12, sse4);
+mc_rep_bi_w(12, 8, 16, sse4);
+mc_rep_bi_w(12, 8, 24, sse4);
+mc_rep_bi_w(12, 8, 32, sse4);
+mc_rep_bi_w(12, 8, 48, sse4);
+mc_rep_bi_w(12, 8, 64, sse4);
+
+#define mc_uni_w_func(name, bitd, W, opt) \
+void ff_hevc_put_hevc_uni_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,         \
+                                                      uint8_t *_src, ptrdiff_t _srcstride,          \
+                                                      int height, int denom,                        \
+                                                      int _wx, int _ox,                             \
+                                                      intptr_t mx, intptr_t my, int width)          \
+{                                                                                                   \
+    LOCAL_ALIGNED_16(int16_t, temp, [71 * 64]);                                                     \
+    ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, 64, _src, _srcstride, height, mx, my, width); \
+    ff_hevc_put_hevc_uni_w##W##_##bitd##_##opt(_dst, _dststride, temp, 64, height, denom, _wx, _ox);\
+}
+
+#define mc_uni_w_funcs(name, bitd, opt)       \
+        mc_uni_w_func(name, bitd, 4, opt);    \
+        mc_uni_w_func(name, bitd, 8, opt);    \
+        mc_uni_w_func(name, bitd, 12, opt);   \
+        mc_uni_w_func(name, bitd, 16, opt);   \
+        mc_uni_w_func(name, bitd, 24, opt);   \
+        mc_uni_w_func(name, bitd, 32, opt);   \
+        mc_uni_w_func(name, bitd, 48, opt);   \
+        mc_uni_w_func(name, bitd, 64, opt)
+
+mc_uni_w_funcs(pel_pixels, 8, sse4);
+mc_uni_w_func(pel_pixels, 8, 6, sse4);
+mc_uni_w_funcs(epel_h, 8, sse4);
+mc_uni_w_func(epel_h, 8, 6, sse4);
+mc_uni_w_funcs(epel_v, 8, sse4);
+mc_uni_w_func(epel_v, 8, 6, sse4);
+mc_uni_w_funcs(epel_hv, 8, sse4);
+mc_uni_w_func(epel_hv, 8, 6, sse4);
+mc_uni_w_funcs(qpel_h, 8, sse4);
+mc_uni_w_funcs(qpel_v, 8, sse4);
+mc_uni_w_funcs(qpel_hv, 8, sse4);
+
+mc_uni_w_funcs(pel_pixels, 10, sse4);
+mc_uni_w_func(pel_pixels, 10, 6, sse4);
+mc_uni_w_funcs(epel_h, 10, sse4);
+mc_uni_w_func(epel_h, 10, 6, sse4);
+mc_uni_w_funcs(epel_v, 10, sse4);
+mc_uni_w_func(epel_v, 10, 6, sse4);
+mc_uni_w_funcs(epel_hv, 10, sse4);
+mc_uni_w_func(epel_hv, 10, 6, sse4);
+mc_uni_w_funcs(qpel_h, 10, sse4);
+mc_uni_w_funcs(qpel_v, 10, sse4);
+mc_uni_w_funcs(qpel_hv, 10, sse4);
 
-#define LFL_FUNCS(type, depth) \
-    LFL_FUNC(h, depth, ssse3)  \
-    LFL_FUNC(v, depth, ssse3)
+mc_uni_w_funcs(pel_pixels, 12, sse4);
+mc_uni_w_func(pel_pixels, 12, 6, sse4);
+mc_uni_w_funcs(epel_h, 12, sse4);
+mc_uni_w_func(epel_h, 12, 6, sse4);
+mc_uni_w_funcs(epel_v, 12, sse4);
+mc_uni_w_func(epel_v, 12, 6, sse4);
+mc_uni_w_funcs(epel_hv, 12, sse4);
+mc_uni_w_func(epel_hv, 12, 6, sse4);
+mc_uni_w_funcs(qpel_h, 12, sse4);
+mc_uni_w_funcs(qpel_v, 12, sse4);
+mc_uni_w_funcs(qpel_hv, 12, sse4);
+
+#define mc_bi_w_func(name, bitd, W, opt) \
+void ff_hevc_put_hevc_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _dststride,           \
+                                                     uint8_t *_src, ptrdiff_t _srcstride,            \
+                                                     int16_t *_src2, ptrdiff_t _src2stride,          \
+                                                     int height, int denom,                          \
+                                                     int _wx0, int _wx1, int _ox0, int _ox1,         \
+                                                     intptr_t mx, intptr_t my, int width)            \
+{                                                                                                    \
+    LOCAL_ALIGNED_16(int16_t, temp, [71 * 64]);                                                      \
+    ff_hevc_put_hevc_##name##W##_##bitd##_##opt(temp, 64, _src, _srcstride, height, mx, my, width);  \
+    ff_hevc_put_hevc_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, 64, _src2, _src2stride,        \
+                                             height, denom, _wx0, _wx1, _ox0, _ox1);                 \
+}
+
+#define mc_bi_w_funcs(name, bitd, opt)       \
+        mc_bi_w_func(name, bitd, 4, opt);    \
+        mc_bi_w_func(name, bitd, 8, opt);    \
+        mc_bi_w_func(name, bitd, 12, opt);   \
+        mc_bi_w_func(name, bitd, 16, opt);   \
+        mc_bi_w_func(name, bitd, 24, opt);   \
+        mc_bi_w_func(name, bitd, 32, opt);   \
+        mc_bi_w_func(name, bitd, 48, opt);   \
+        mc_bi_w_func(name, bitd, 64, opt)
+
+mc_bi_w_funcs(pel_pixels, 8, sse4);
+mc_bi_w_func(pel_pixels, 8, 6, sse4);
+mc_bi_w_funcs(epel_h, 8, sse4);
+mc_bi_w_func(epel_h, 8, 6, sse4);
+mc_bi_w_funcs(epel_v, 8, sse4);
+mc_bi_w_func(epel_v, 8, 6, sse4);
+mc_bi_w_funcs(epel_hv, 8, sse4);
+mc_bi_w_func(epel_hv, 8, 6, sse4);
+mc_bi_w_funcs(qpel_h, 8, sse4);
+mc_bi_w_funcs(qpel_v, 8, sse4);
+mc_bi_w_funcs(qpel_hv, 8, sse4);
+
+mc_bi_w_funcs(pel_pixels, 10, sse4);
+mc_bi_w_func(pel_pixels, 10, 6, sse4);
+mc_bi_w_funcs(epel_h, 10, sse4);
+mc_bi_w_func(epel_h, 10, 6, sse4);
+mc_bi_w_funcs(epel_v, 10, sse4);
+mc_bi_w_func(epel_v, 10, 6, sse4);
+mc_bi_w_funcs(epel_hv, 10, sse4);
+mc_bi_w_func(epel_hv, 10, 6, sse4);
+mc_bi_w_funcs(qpel_h, 10, sse4);
+mc_bi_w_funcs(qpel_v, 10, sse4);
+mc_bi_w_funcs(qpel_hv, 10, sse4);
+
+mc_bi_w_funcs(pel_pixels, 12, sse4);
+mc_bi_w_func(pel_pixels, 12, 6, sse4);
+mc_bi_w_funcs(epel_h, 12, sse4);
+mc_bi_w_func(epel_h, 12, 6, sse4);
+mc_bi_w_funcs(epel_v, 12, sse4);
+mc_bi_w_func(epel_v, 12, 6, sse4);
+mc_bi_w_funcs(epel_hv, 12, sse4);
+mc_bi_w_func(epel_hv, 12, 6, sse4);
+mc_bi_w_funcs(qpel_h, 12, sse4);
+mc_bi_w_funcs(qpel_v, 12, sse4);
+mc_bi_w_funcs(qpel_hv, 12, sse4);
+#endif //ARCH_X86_64 && HAVE_SSE4_EXTERNAL
+
+
+#define EPEL_LINKS(pointer, my, mx, fname, bitd, opt )           \
+        PEL_LINK(pointer, 1, my , mx , fname##4 ,  bitd, opt ); \
+        PEL_LINK(pointer, 2, my , mx , fname##6 ,  bitd, opt ); \
+        PEL_LINK(pointer, 3, my , mx , fname##8 ,  bitd, opt ); \
+        PEL_LINK(pointer, 4, my , mx , fname##12,  bitd, opt ); \
+        PEL_LINK(pointer, 5, my , mx , fname##16,  bitd, opt ); \
+        PEL_LINK(pointer, 6, my , mx , fname##24,  bitd, opt ); \
+        PEL_LINK(pointer, 7, my , mx , fname##32,  bitd, opt ); \
+        PEL_LINK(pointer, 8, my , mx , fname##48,  bitd, opt ); \
+        PEL_LINK(pointer, 9, my , mx , fname##64,  bitd, opt )
+#define QPEL_LINKS(pointer, my, mx, fname, bitd, opt)           \
+        PEL_LINK(pointer, 1, my , mx , fname##4 ,  bitd, opt ); \
+        PEL_LINK(pointer, 3, my , mx , fname##8 ,  bitd, opt ); \
+        PEL_LINK(pointer, 4, my , mx , fname##12,  bitd, opt ); \
+        PEL_LINK(pointer, 5, my , mx , fname##16,  bitd, opt ); \
+        PEL_LINK(pointer, 6, my , mx , fname##24,  bitd, opt ); \
+        PEL_LINK(pointer, 7, my , mx , fname##32,  bitd, opt ); \
+        PEL_LINK(pointer, 8, my , mx , fname##48,  bitd, opt ); \
+        PEL_LINK(pointer, 9, my , mx , fname##64,  bitd, opt )
 
-LFC_FUNCS(uint8_t, 8)
-LFC_FUNCS(uint8_t, 10)
-LFL_FUNCS(uint8_t, 8)
-LFL_FUNCS(uint8_t, 10)
 
 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
 {
     int cpu_flags = av_get_cpu_flags();
 
     if (bit_depth == 8) {
+        if (EXTERNAL_MMXEXT(cpu_flags)) {
+            c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
+            c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
+        }
         if (EXTERNAL_SSE2(cpu_flags)) {
             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
+            if (ARCH_X86_64) {
+                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_sse2;
+                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
+            }
+
+            c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2;
+            c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
+            c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
         }
         if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
         }
+        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
+
+            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels,  8, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,      8, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,      8, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,     8, sse4);
+
+            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 8, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     8, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     8, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    8, sse4);
+        }
+        if (EXTERNAL_AVX(cpu_flags)) {
+            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx;
+            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx;
+            if (ARCH_X86_64) {
+                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx;
+                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx;
+            }
+        }
+        if (EXTERNAL_AVX2(cpu_flags)) {
+            c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
+            c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;
+        }
     } else if (bit_depth == 10) {
+        if (EXTERNAL_MMXEXT(cpu_flags)) {
+            c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
+            c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
+        }
         if (EXTERNAL_SSE2(cpu_flags)) {
             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
+            if (ARCH_X86_64) {
+                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_sse2;
+                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
+            }
+
+            c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2;
+            c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
+            c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
         }
         if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
         }
+        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
+            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 10, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     10, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     10, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    10, sse4);
+
+            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 10, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     10, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     10, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    10, sse4);
+        }
+        if (EXTERNAL_AVX(cpu_flags)) {
+            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx;
+            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx;
+            if (ARCH_X86_64) {
+                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx;
+                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx;
+            }
+        }
+        if (EXTERNAL_AVX2(cpu_flags)) {
+            c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
+            c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
+
+        }
+    } else if (bit_depth == 12) {
+        if (EXTERNAL_MMXEXT(cpu_flags)) {
+            c->idct_dc[0] = ff_hevc_idct4x4_dc_12_mmxext;
+            c->idct_dc[1] = ff_hevc_idct8x8_dc_12_mmxext;
+        }
+        if (EXTERNAL_SSE2(cpu_flags)) {
+            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2;
+            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2;
+            if (ARCH_X86_64) {
+                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2;
+                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
+            }
+
+            c->idct_dc[1] = ff_hevc_idct8x8_dc_12_sse2;
+            c->idct_dc[2] = ff_hevc_idct16x16_dc_12_sse2;
+            c->idct_dc[3] = ff_hevc_idct32x32_dc_12_sse2;
+        }
+        if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) {
+            c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3;
+            c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_ssse3;
+        }
+        if (EXTERNAL_SSE4(cpu_flags) && ARCH_X86_64) {
+            EPEL_LINKS(c->put_hevc_epel, 0, 0, pel_pixels, 12, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 0, 1, epel_h,     12, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 1, 0, epel_v,     12, sse4);
+            EPEL_LINKS(c->put_hevc_epel, 1, 1, epel_hv,    12, sse4);
+
+            QPEL_LINKS(c->put_hevc_qpel, 0, 0, pel_pixels, 12, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 0, 1, qpel_h,     12, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v,     12, sse4);
+            QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv,    12, sse4);
+        }
+        if (EXTERNAL_AVX(cpu_flags)) {
+            c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx;
+            c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx;
+            if (ARCH_X86_64) {
+                c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx;
+                c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx;
+            }
+        }
+        if (EXTERNAL_AVX2(cpu_flags)) {
+            c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2;
+            c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2;
+        }
     }
 }
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index 073f7f9..a702b8b 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -1,20 +1,27 @@
 ;******************************************************************************
+;*
+;* Copyright (c) 2000-2001 Fabrice Bellard <fabrice@bellard.org>
+;* Copyright (c)      Nick Kurshev <nickols_k@mail.ru>
+;* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+;* Copyright (c) 2002 Zdenek Kabelac <kabi@informatics.muni.cz>
+;* Copyright (c) 2013 Daniel Kang
+;*
 ;* SIMD-optimized halfpel functions
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -22,26 +29,48 @@
 
 SECTION_RODATA
 cextern pb_1
+cextern pw_2
+pw_8192: times 8 dw (1<<13)
+pb_interleave16: db 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15
+pb_interleave8:  db 0, 4, 1, 5, 2, 6, 3, 7
 
 SECTION_TEXT
 
 ; void ff_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro PUT_PIXELS8_X2 0
+%if cpuflag(sse2)
+cglobal put_pixels16_x2, 4,5,4
+%else
 cglobal put_pixels8_x2, 4,5
+%endif
     lea          r4, [r2*2]
 .loop:
-    mova         m0, [r1]
-    mova         m1, [r1+r2]
-    PAVGB        m0, [r1+1]
-    PAVGB        m1, [r1+r2+1]
+    movu         m0, [r1+1]
+    movu         m1, [r1+r2+1]
+%if cpuflag(sse2)
+    movu         m2, [r1]
+    movu         m3, [r1+r2]
+    pavgb        m0, m2
+    pavgb        m1, m3
+%else
+    PAVGB        m0, [r1]
+    PAVGB        m1, [r1+r2]
+%endif
     mova       [r0], m0
     mova    [r0+r2], m1
     add          r1, r4
     add          r0, r4
-    mova         m0, [r1]
-    mova         m1, [r1+r2]
-    PAVGB        m0, [r1+1]
-    PAVGB        m1, [r1+r2+1]
+    movu         m0, [r1+1]
+    movu         m1, [r1+r2+1]
+%if cpuflag(sse2)
+    movu         m2, [r1]
+    movu         m3, [r1+r2]
+    pavgb        m0, m2
+    pavgb        m1, m3
+%else
+    PAVGB        m0, [r1]
+    PAVGB        m1, [r1+r2]
+%endif
     add          r1, r4
     mova       [r0], m0
     mova    [r0+r2], m1
@@ -99,6 +128,9 @@ INIT_MMX mmxext
 PUT_PIXELS_16
 INIT_MMX 3dnow
 PUT_PIXELS_16
+; The 8_X2 macro can easily be used here
+INIT_XMM sse2
+PUT_PIXELS8_X2
 
 
 ; void ff_put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@@ -191,20 +223,24 @@ PUT_NO_RND_PIXELS8_X2_EXACT
 
 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro PUT_PIXELS8_Y2 0
+%if cpuflag(sse2)
+cglobal put_pixels16_y2, 4,5,3
+%else
 cglobal put_pixels8_y2, 4,5
+%endif
     lea          r4, [r2*2]
-    mova         m0, [r1]
+    movu         m0, [r1]
     sub          r0, r2
 .loop:
-    mova         m1, [r1+r2]
-    mova         m2, [r1+r4]
+    movu         m1, [r1+r2]
+    movu         m2, [r1+r4]
     add          r1, r4
     PAVGB        m0, m1
     PAVGB        m1, m2
     mova    [r0+r2], m0
     mova    [r0+r4], m1
-    mova         m1, [r1+r2]
-    mova         m0, [r1+r4]
+    movu         m1, [r1+r2]
+    movu         m0, [r1+r4]
     add          r0, r4
     add          r1, r4
     PAVGB        m2, m1
@@ -221,6 +257,9 @@ INIT_MMX mmxext
 PUT_PIXELS8_Y2
 INIT_MMX 3dnow
 PUT_PIXELS8_Y2
+; actually, put_pixels16_y2_sse2
+INIT_XMM sse2
+PUT_PIXELS8_Y2
 
 
 ; void ff_put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
@@ -334,26 +373,48 @@ AVG_PIXELS8
 
 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro AVG_PIXELS8_X2 0
+%if cpuflag(sse2)
+cglobal avg_pixels16_x2, 4,5,4
+%else
 cglobal avg_pixels8_x2, 4,5
+%endif
     lea          r4, [r2*2]
+%if notcpuflag(mmxext)
+    pcmpeqd      m5, m5
+    paddb        m5, m5
+%endif
 .loop:
-    mova         m0, [r1]
-    mova         m2, [r1+r2]
-    PAVGB        m0, [r1+1]
-    PAVGB        m2, [r1+r2+1]
-    PAVGB        m0, [r0]
-    PAVGB        m2, [r0+r2]
+    movu         m0, [r1]
+    movu         m2, [r1+r2]
+%if cpuflag(sse2)
+    movu         m1, [r1+1]
+    movu         m3, [r1+r2+1]
+    pavgb        m0, m1
+    pavgb        m2, m3
+%else
+    PAVGB        m0, [r1+1], m3, m5
+    PAVGB        m2, [r1+r2+1], m4, m5
+%endif
+    PAVGB        m0, [r0], m3, m5
+    PAVGB        m2, [r0+r2], m4, m5
     add          r1, r4
     mova       [r0], m0
     mova    [r0+r2], m2
-    mova         m0, [r1]
-    mova         m2, [r1+r2]
-    PAVGB        m0, [r1+1]
-    PAVGB        m2, [r1+r2+1]
+    movu         m0, [r1]
+    movu         m2, [r1+r2]
+%if cpuflag(sse2)
+    movu         m1, [r1+1]
+    movu         m3, [r1+r2+1]
+    pavgb        m0, m1
+    pavgb        m2, m3
+%else
+    PAVGB        m0, [r1+1], m3, m5
+    PAVGB        m2, [r1+r2+1], m4, m5
+%endif
     add          r0, r4
     add          r1, r4
-    PAVGB        m0, [r0]
-    PAVGB        m2, [r0+r2]
+    PAVGB        m0, [r0], m3, m5
+    PAVGB        m2, [r0+r2], m4, m5
     mova       [r0], m0
     mova    [r0+r2], m2
     add          r0, r4
@@ -362,40 +423,45 @@ cglobal avg_pixels8_x2, 4,5
     REP_RET
 %endmacro
 
+INIT_MMX mmx
+AVG_PIXELS8_X2
 INIT_MMX mmxext
 AVG_PIXELS8_X2
 INIT_MMX 3dnow
 AVG_PIXELS8_X2
+; actually avg_pixels16_x2
+INIT_XMM sse2
+AVG_PIXELS8_X2
 
 
 ; void ff_avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
 %macro AVG_PIXELS8_Y2 0
+%if cpuflag(sse2)
+cglobal avg_pixels16_y2, 4,5,3
+%else
 cglobal avg_pixels8_y2, 4,5
+%endif
     lea          r4, [r2*2]
-    mova         m0, [r1]
+    movu         m0, [r1]
     sub          r0, r2
 .loop:
-    mova         m1, [r1+r2]
-    mova         m2, [r1+r4]
+    movu         m1, [r1+r2]
+    movu         m2, [r1+r4]
     add          r1, r4
     PAVGB        m0, m1
     PAVGB        m1, m2
-    mova         m3, [r0+r2]
-    mova         m4, [r0+r4]
-    PAVGB        m0, m3
-    PAVGB        m1, m4
+    PAVGB        m0, [r0+r2]
+    PAVGB        m1, [r0+r4]
     mova    [r0+r2], m0
     mova    [r0+r4], m1
-    mova         m1, [r1+r2]
-    mova         m0, [r1+r4]
+    movu         m1, [r1+r2]
+    movu         m0, [r1+r4]
     PAVGB        m2, m1
     PAVGB        m1, m0
     add          r0, r4
     add          r1, r4
-    mova         m3, [r0+r2]
-    mova         m4, [r0+r4]
-    PAVGB        m2, m3
-    PAVGB        m1, m4
+    PAVGB        m2, [r0+r2]
+    PAVGB        m1, [r0+r4]
     mova    [r0+r2], m2
     mova    [r0+r4], m1
     add          r0, r4
@@ -408,11 +474,16 @@ INIT_MMX mmxext
 AVG_PIXELS8_Y2
 INIT_MMX 3dnow
 AVG_PIXELS8_Y2
+; actually avg_pixels16_y2
+INIT_XMM sse2
+AVG_PIXELS8_Y2
 
 
 ; void ff_avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
-%macro AVG_PIXELS8_XY2 0
-cglobal avg_pixels8_xy2, 4,5
+; Note this is not correctly rounded, and is therefore used for
+; not-bitexact output
+%macro AVG_APPROX_PIXELS8_XY2 0
+cglobal avg_approx_pixels8_xy2, 4,5
     mova         m6, [pb_1]
     lea          r4, [r2*2]
     mova         m0, [r1]
@@ -449,6 +520,160 @@ cglobal avg_pixels8_xy2, 4,5
 %endmacro
 
 INIT_MMX mmxext
-AVG_PIXELS8_XY2
+AVG_APPROX_PIXELS8_XY2
+INIT_MMX 3dnow
+AVG_APPROX_PIXELS8_XY2
+
+
+; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+%macro SET_PIXELS_XY2 1
+%if cpuflag(sse2)
+cglobal %1_pixels16_xy2, 4,5,8
+%else
+cglobal %1_pixels8_xy2, 4,5
+%endif
+    pxor        m7, m7
+    mova        m6, [pw_2]
+    movu        m0, [r1]
+    movu        m4, [r1+1]
+    mova        m1, m0
+    mova        m5, m4
+    punpcklbw   m0, m7
+    punpcklbw   m4, m7
+    punpckhbw   m1, m7
+    punpckhbw   m5, m7
+    paddusw     m4, m0
+    paddusw     m5, m1
+    xor         r4, r4
+    add         r1, r2
+.loop:
+    movu        m0, [r1+r4]
+    movu        m2, [r1+r4+1]
+    mova        m1, m0
+    mova        m3, m2
+    punpcklbw   m0, m7
+    punpcklbw   m2, m7
+    punpckhbw   m1, m7
+    punpckhbw   m3, m7
+    paddusw     m0, m2
+    paddusw     m1, m3
+    paddusw     m4, m6
+    paddusw     m5, m6
+    paddusw     m4, m0
+    paddusw     m5, m1
+    psrlw       m4, 2
+    psrlw       m5, 2
+%ifidn %1, avg
+    mova        m3, [r0+r4]
+    packuswb    m4, m5
+    PAVGB       m4, m3
+%else
+    packuswb    m4, m5
+%endif
+    mova   [r0+r4], m4
+    add         r4, r2
+
+    movu        m2, [r1+r4]
+    movu        m4, [r1+r4+1]
+    mova        m3, m2
+    mova        m5, m4
+    punpcklbw   m2, m7
+    punpcklbw   m4, m7
+    punpckhbw   m3, m7
+    punpckhbw   m5, m7
+    paddusw     m4, m2
+    paddusw     m5, m3
+    paddusw     m0, m6
+    paddusw     m1, m6
+    paddusw     m0, m4
+    paddusw     m1, m5
+    psrlw       m0, 2
+    psrlw       m1, 2
+%ifidn %1, avg
+    mova        m3, [r0+r4]
+    packuswb    m0, m1
+    PAVGB       m0, m3
+%else
+    packuswb    m0, m1
+%endif
+    mova   [r0+r4], m0
+    add         r4, r2
+    sub        r3d, 2
+    jnz .loop
+    REP_RET
+%endmacro
+
+INIT_MMX mmxext
+SET_PIXELS_XY2 avg
 INIT_MMX 3dnow
-AVG_PIXELS8_XY2
+SET_PIXELS_XY2 avg
+INIT_XMM sse2
+SET_PIXELS_XY2 put
+SET_PIXELS_XY2 avg
+
+%macro SSSE3_PIXELS_XY2 1-2
+%if %0 == 2 ; sse2
+cglobal %1_pixels16_xy2, 4,5,%2
+    mova        m4, [pb_interleave16]
+%else
+cglobal %1_pixels8_xy2, 4,5
+    mova        m4, [pb_interleave8]
+%endif
+    mova        m5, [pb_1]
+    movu        m0, [r1]
+    movu        m1, [r1+1]
+    pmaddubsw   m0, m5
+    pmaddubsw   m1, m5
+    xor         r4, r4
+    add         r1, r2
+.loop:
+    movu        m2, [r1+r4]
+    movu        m3, [r1+r4+1]
+    pmaddubsw   m2, m5
+    pmaddubsw   m3, m5
+    paddusw     m0, m2
+    paddusw     m1, m3
+    pmulhrsw    m0, [pw_8192]
+    pmulhrsw    m1, [pw_8192]
+%ifidn %1, avg
+    mova        m6, [r0+r4]
+    packuswb    m0, m1
+    pshufb      m0, m4
+    pavgb       m0, m6
+%else
+    packuswb    m0, m1
+    pshufb      m0, m4
+%endif
+    mova   [r0+r4], m0
+    add         r4, r2
+
+    movu        m0, [r1+r4]
+    movu        m1, [r1+r4+1]
+    pmaddubsw   m0, m5
+    pmaddubsw   m1, m5
+    paddusw     m2, m0
+    paddusw     m3, m1
+    pmulhrsw    m2, [pw_8192]
+    pmulhrsw    m3, [pw_8192]
+%ifidn %1, avg
+    mova        m6, [r0+r4]
+    packuswb    m2, m3
+    pshufb      m2, m4
+    pavgb       m2, m6
+%else
+    packuswb    m2, m3
+    pshufb      m2, m4
+%endif
+    mova   [r0+r4], m2
+    add         r4, r2
+    sub        r3d, 2
+    jnz .loop
+    REP_RET
+%endmacro
+
+INIT_MMX ssse3
+SSSE3_PIXELS_XY2 put
+SSSE3_PIXELS_XY2 avg
+INIT_XMM ssse3
+SSSE3_PIXELS_XY2 put, 6
+SSSE3_PIXELS_XY2 avg, 7
diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h
index 47b0b8b..5fae990 100644
--- a/libavcodec/x86/hpeldsp.h
+++ b/libavcodec/x86/hpeldsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,12 +27,27 @@ void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
 
 void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                             ptrdiff_t line_size, int h);
+void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
+                               ptrdiff_t line_size, int h);
+void ff_avg_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+                               ptrdiff_t line_size, int h);
+
 void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                              ptrdiff_t line_size, int h);
+void ff_avg_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void ff_avg_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+                               ptrdiff_t line_size, int h);
 
 void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                             ptrdiff_t line_size, int h);
+void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
 void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                              ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels,
+                              ptrdiff_t line_size, int h);
+void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels,
+                               ptrdiff_t line_size, int h);
 
 #endif /* AVCODEC_X86_HPELDSP_H */
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index 1cc3bac..bcae22f 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2000, 2001 Fabrice Bellard
  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
@@ -40,6 +40,14 @@ void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
                                ptrdiff_t line_size, int h);
 void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels,
                               ptrdiff_t line_size, int h);
+void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
+                             ptrdiff_t line_size, int h);
+void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
+                             ptrdiff_t line_size, int h);
+void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
+                             ptrdiff_t line_size, int h);
+void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
+                             ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
                                      ptrdiff_t line_size, int h);
 void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels,
@@ -74,10 +82,12 @@ void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
                               ptrdiff_t line_size, int h);
 void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels,
                              ptrdiff_t line_size, int h);
-void ff_avg_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
-                               ptrdiff_t line_size, int h);
 void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
                               ptrdiff_t line_size, int h);
+void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
+                                      ptrdiff_t line_size, int h);
+void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels,
+                                     ptrdiff_t line_size, int h);
 
 #define avg_pixels8_mmx         ff_avg_pixels8_mmx
 #define avg_pixels8_x2_mmx      ff_avg_pixels8_x2_mmx
@@ -156,32 +166,49 @@ CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
     CALL_2X_PIXELS(avg_pixels16           ## CPUEXT, ff_avg_pixels8           ## CPUEXT, 8) \
     CALL_2X_PIXELS(avg_pixels16_x2        ## CPUEXT, ff_avg_pixels8_x2        ## CPUEXT, 8) \
     CALL_2X_PIXELS(avg_pixels16_y2        ## CPUEXT, ff_avg_pixels8_y2        ## CPUEXT, 8) \
-    CALL_2X_PIXELS(avg_pixels16_xy2       ## CPUEXT, ff_avg_pixels8_xy2       ## CPUEXT, 8)
+    CALL_2X_PIXELS(avg_pixels16_xy2       ## CPUEXT, ff_avg_pixels8_xy2       ## CPUEXT, 8) \
+    CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8)
 
 HPELDSP_AVG_PIXELS16(_3dnow)
 HPELDSP_AVG_PIXELS16(_mmxext)
 
 #endif /* HAVE_YASM */
 
+#define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                             \
+    if (HAVE_MMX_EXTERNAL)                                                  \
+    c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _     ## CPU;
+
+#if HAVE_MMX_INLINE
 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)                                     \
     do {                                                                        \
-        c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _     ## CPU; \
+        SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                                 \
         c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_  ## CPU; \
         c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_  ## CPU; \
         c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
     } while (0)
+#else
+#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)                                     \
+    do {                                                                        \
+        SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                                 \
+    } while (0)
+#endif
 
 static void hpeldsp_init_mmx(HpelDSPContext *c, int flags, int cpu_flags)
 {
-#if HAVE_MMX_INLINE
     SET_HPEL_FUNCS(put,        [0], 16, mmx);
     SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
     SET_HPEL_FUNCS(avg,        [0], 16, mmx);
     SET_HPEL_FUNCS(avg_no_rnd,    , 16, mmx);
     SET_HPEL_FUNCS(put,        [1],  8, mmx);
     SET_HPEL_FUNCS(put_no_rnd, [1],  8, mmx);
-    SET_HPEL_FUNCS(avg,        [1],  8, mmx);
-#endif /* HAVE_MMX_INLINE */
+    if (HAVE_MMX_EXTERNAL) {
+        c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmx;
+        c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmx;
+    }
+#if HAVE_MMX_INLINE
+    c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
+    c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmx;
+#endif
 }
 
 static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
@@ -193,6 +220,7 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
     c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
     c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
     c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
 
     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
@@ -200,6 +228,7 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
     c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
+    c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
 
     if (!(flags & CODEC_FLAG_BITEXACT)) {
         c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
@@ -207,8 +236,8 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags, int cpu_flags)
         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
 
-        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
-        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
+        c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
+        c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
     }
 
     if (flags & CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
@@ -227,6 +256,7 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
     c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
     c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
     c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
 
     c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow;
     c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow;
@@ -234,6 +264,7 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
     c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow;
     c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow;
     c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow;
+    c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
 
     if (!(flags & CODEC_FLAG_BITEXACT)){
         c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
@@ -241,8 +272,8 @@ static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags, int cpu_flags)
         c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow;
         c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow;
 
-        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
-        c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow;
+        c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow;
+        c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow;
     }
 
     if (flags & CODEC_FLAG_BITEXACT && CONFIG_VP3_DECODER) {
@@ -259,11 +290,27 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags)
         // these functions are slower than mmx on AMD, but faster on Intel
         c->put_pixels_tab[0][0]        = ff_put_pixels16_sse2;
         c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
+        c->put_pixels_tab[0][1]        = ff_put_pixels16_x2_sse2;
+        c->put_pixels_tab[0][2]        = ff_put_pixels16_y2_sse2;
+        c->put_pixels_tab[0][3]        = ff_put_pixels16_xy2_sse2;
         c->avg_pixels_tab[0][0]        = ff_avg_pixels16_sse2;
+        c->avg_pixels_tab[0][1]        = ff_avg_pixels16_x2_sse2;
+        c->avg_pixels_tab[0][2]        = ff_avg_pixels16_y2_sse2;
+        c->avg_pixels_tab[0][3]        = ff_avg_pixels16_xy2_sse2;
     }
 #endif /* HAVE_SSE2_EXTERNAL */
 }
 
+static void hpeldsp_init_ssse3(HpelDSPContext *c, int flags, int cpu_flags)
+{
+#if HAVE_SSSE3_EXTERNAL
+    c->put_pixels_tab[0][3]            = ff_put_pixels16_xy2_ssse3;
+    c->avg_pixels_tab[0][3]            = ff_avg_pixels16_xy2_ssse3;
+    c->put_pixels_tab[1][3]            = ff_put_pixels8_xy2_ssse3;
+    c->avg_pixels_tab[1][3]            = ff_avg_pixels8_xy2_ssse3;
+#endif
+}
+
 av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -279,4 +326,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
 
     if (EXTERNAL_SSE2(cpu_flags))
         hpeldsp_init_sse2(c, flags, cpu_flags);
+
+    if (EXTERNAL_SSSE3(cpu_flags))
+        hpeldsp_init_ssse3(c, flags, cpu_flags);
 }
diff --git a/libavcodec/x86/hpeldsp_mmx.c b/libavcodec/x86/hpeldsp_mmx.c
deleted file mode 100644
index c93c78e..0000000
--- a/libavcodec/x86/hpeldsp_mmx.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * MMX-optimized avg/put pixel routines
- *
- * Copyright (c) 2001 Fabrice Bellard
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include "config.h"
-#include "hpeldsp.h"
-#include "inline_asm.h"
-
-#if HAVE_MMX_INLINE
-
-void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
-                           ptrdiff_t line_size, int h)
-{
-    MOVQ_BFE(mm6);
-    JUMPALIGN();
-    do {
-        __asm__ volatile(
-            "movq  %1, %%mm0            \n\t"
-            "movq  1%1, %%mm1           \n\t"
-            "movq  %0, %%mm3            \n\t"
-            PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
-            PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
-            "movq  %%mm0, %0            \n\t"
-            :"+m"(*block)
-            :"m"(*pixels)
-            :"memory");
-        pixels += line_size;
-        block += line_size;
-    } while (--h);
-}
-
-#endif /* HAVE_MMX_INLINE */
diff --git a/libavcodec/x86/hpeldsp_rnd_template.c b/libavcodec/x86/hpeldsp_rnd_template.c
index d854e8a..c8a68fd 100644
--- a/libavcodec/x86/hpeldsp_rnd_template.c
+++ b/libavcodec/x86/hpeldsp_rnd_template.c
@@ -7,20 +7,20 @@
  * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
  * and improved by Zdenek Kabelac <kabi@users.sf.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm
index 436abc8..cc48556 100644
--- a/libavcodec/x86/huffyuvdsp.asm
+++ b/libavcodec/x86/huffyuvdsp.asm
@@ -1,21 +1,22 @@
 ;******************************************************************************
 ;* SIMD-optimized HuffYUV functions
 ;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2014 Christophe Gisquet
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -33,64 +34,72 @@ SECTION_TEXT
 ; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
 ;                                     const uint8_t *diff, int w,
 ;                                     int *left, int *left_top)
-INIT_MMX mmxext
-cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top
-    movq    mm0, [topq]
-    movq    mm2, mm0
-    movd    mm4, [left_topq]
-    psllq   mm2, 8
-    movq    mm1, mm0
-    por     mm4, mm2
-    movd    mm3, [leftq]
-    psubb   mm0, mm4 ; t-tl
+%macro HFYU_MEDIAN 0
+cglobal add_hfyu_median_pred, 6,6,8, dst, top, diff, w, left, left_top
+    movu    m0, [topq]
+    mova    m2, m0
+    movd    m4, [left_topq]
+    LSHIFT  m2, 1
+    mova    m1, m0
+    por     m4, m2
+    movd    m3, [leftq]
+    psubb   m0, m4 ; t-tl
     add    dstq, wq
     add    topq, wq
     add   diffq, wq
     neg      wq
     jmp .skip
 .loop:
-    movq    mm4, [topq+wq]
-    movq    mm0, mm4
-    psllq   mm4, 8
-    por     mm4, mm1
-    movq    mm1, mm0 ; t
-    psubb   mm0, mm4 ; t-tl
+    movu    m4, [topq+wq]
+    mova    m0, m4
+    LSHIFT  m4, 1
+    por     m4, m1
+    mova    m1, m0 ; t
+    psubb   m0, m4 ; t-tl
 .skip:
-    movq    mm2, [diffq+wq]
+    movu    m2, [diffq+wq]
 %assign i 0
-%rep 8
-    movq    mm4, mm0
-    paddb   mm4, mm3 ; t-tl+l
-    movq    mm5, mm3
-    pmaxub  mm3, mm1
-    pminub  mm5, mm1
-    pminub  mm3, mm4
-    pmaxub  mm3, mm5 ; median
-    paddb   mm3, mm2 ; +residual
+%rep mmsize
+    mova    m4, m0
+    paddb   m4, m3 ; t-tl+l
+    mova    m5, m3
+    pmaxub  m3, m1
+    pminub  m5, m1
+    pminub  m3, m4
+    pmaxub  m3, m5 ; median
+    paddb   m3, m2 ; +residual
 %if i==0
-    movq    mm7, mm3
-    psllq   mm7, 56
+    mova    m7, m3
+    LSHIFT  m7, mmsize-1
 %else
-    movq    mm6, mm3
-    psrlq   mm7, 8
-    psllq   mm6, 56
-    por     mm7, mm6
+    mova    m6, m3
+    RSHIFT  m7, 1
+    LSHIFT  m6, mmsize-1
+    por     m7, m6
 %endif
-%if i<7
-    psrlq   mm0, 8
-    psrlq   mm1, 8
-    psrlq   mm2, 8
+%if i<mmsize-1
+    RSHIFT  m0, 1
+    RSHIFT  m1, 1
+    RSHIFT  m2, 1
 %endif
 %assign i i+1
 %endrep
-    movq [dstq+wq], mm7
-    add      wq, 8
+    movu [dstq+wq], m7
+    add      wq, mmsize
     jl .loop
     movzx   r2d, byte [dstq-1]
     mov [leftq], r2d
     movzx   r2d, byte [topq-1]
     mov [left_topq], r2d
     RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmxext
+HFYU_MEDIAN
+%endif
+INIT_XMM sse2
+HFYU_MEDIAN
 
 
 %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
@@ -163,3 +172,82 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
     ADD_HFYU_LEFT_LOOP 0, 1
 .src_unaligned:
     ADD_HFYU_LEFT_LOOP 0, 0
+
+%macro ADD_BYTES 0
+cglobal add_bytes, 3,4,2, dst, src, w, size
+    mov  sizeq, wq
+    and  sizeq, -2*mmsize
+    jz  .2
+    add   dstq, sizeq
+    add   srcq, sizeq
+    neg  sizeq
+.1:
+    mova    m0, [srcq + sizeq]
+    mova    m1, [srcq + sizeq + mmsize]
+    paddb   m0, [dstq + sizeq]
+    paddb   m1, [dstq + sizeq + mmsize]
+    mova   [dstq + sizeq], m0
+    mova   [dstq + sizeq + mmsize], m1
+    add  sizeq, 2*mmsize
+    jl .1
+.2:
+    and     wq, 2*mmsize-1
+    jz    .end
+    add   dstq, wq
+    add   srcq, wq
+    neg     wq
+.3
+    mov  sizeb, [srcq + wq]
+    add [dstq + wq], sizeb
+    inc     wq
+    jl .3
+.end:
+    REP_RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+ADD_BYTES
+%endif
+INIT_XMM sse2
+ADD_BYTES
+
+; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
+;                               intptr_t w, uint8_t *left)
+%macro LEFT_BGR32 0
+cglobal add_hfyu_left_pred_bgr32, 4,4,3, dst, src, w, left
+    shl           wq, 2
+    movd          m0, [leftq]
+    lea         dstq, [dstq + wq]
+    lea         srcq, [srcq + wq]
+    LSHIFT        m0, mmsize-4
+    neg           wq
+.loop:
+    movu          m1, [srcq+wq]
+    mova          m2, m1
+%if mmsize == 8
+    punpckhdq     m0, m0
+%endif
+    LSHIFT        m1, 4
+    paddb         m1, m2
+%if mmsize == 16
+    pshufd        m0, m0, q3333
+    mova          m2, m1
+    LSHIFT        m1, 8
+    paddb         m1, m2
+%endif
+    paddb         m0, m1
+    movu   [dstq+wq], m0
+    add           wq, mmsize
+    jl         .loop
+    movd          m0, [dstq-4]
+    movd     [leftq], m0
+    REP_RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_MMX mmx
+LEFT_BGR32
+%endif
+INIT_XMM sse2
+LEFT_BGR32
diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c
index 75537d7..3ced3c0 100644
--- a/libavcodec/x86/huffyuvdsp_init.c
+++ b/libavcodec/x86/huffyuvdsp_init.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (c) 2009 Loren Merritt <lorenm@u.washington.edu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,20 +25,29 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/huffyuvdsp.h"
 
+void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, intptr_t w);
+void ff_add_bytes_sse2(uint8_t *dst, uint8_t *src, intptr_t w);
+
 void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
-                                    const uint8_t *diff, int w,
+                                    const uint8_t *diff, intptr_t w,
                                     int *left, int *left_top);
+void ff_add_hfyu_median_pred_sse2(uint8_t *dst, const uint8_t *top,
+                                  const uint8_t *diff, intptr_t w,
+                                  int *left, int *left_top);
 
 int  ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
-                                 int w, int left);
+                                 intptr_t w, int left);
 int  ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src,
-                                int w, int left);
+                                intptr_t w, int left);
 
-#if HAVE_INLINE_ASM
+void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src,
+                                     intptr_t w, uint8_t *left);
+void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src,
+                                      intptr_t w, uint8_t *left);
 
-#if HAVE_7REGS
+#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
 static void add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
-                                      const uint8_t *diff, int w,
+                                      const uint8_t *diff, intptr_t w,
                                       int *left, int *left_top)
 {
     x86_reg w2 = -w;
@@ -72,56 +81,34 @@ static void add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
     *left     = l;
     *left_top = tl;
 }
-#endif /* HAVE_7REGS */
-
-static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w)
-{
-    x86_reg i = 0;
-
-    __asm__ volatile (
-        "jmp          2f                \n\t"
-        "1:                             \n\t"
-        "movq   (%1, %0), %%mm0         \n\t"
-        "movq   (%2, %0), %%mm1         \n\t"
-        "paddb     %%mm0, %%mm1         \n\t"
-        "movq      %%mm1, (%2, %0)      \n\t"
-        "movq  8(%1, %0), %%mm0         \n\t"
-        "movq  8(%2, %0), %%mm1         \n\t"
-        "paddb     %%mm0, %%mm1         \n\t"
-        "movq      %%mm1, 8(%2, %0)     \n\t"
-        "add         $16, %0            \n\t"
-        "2:                             \n\t"
-        "cmp          %3, %0            \n\t"
-        "js           1b                \n\t"
-        : "+r" (i)
-        : "r" (src), "r" (dst), "r" ((x86_reg) w - 15));
-
-    for (; i < w; i++)
-        dst[i + 0] += src[i + 0];
-}
-
-#endif /* HAVE_INLINE_ASM */
+#endif
 
 av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
 
-#if HAVE_INLINE_ASM
-#if HAVE_7REGS
+#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
     if (cpu_flags & AV_CPU_FLAG_CMOV)
         c->add_hfyu_median_pred = add_hfyu_median_pred_cmov;
-#endif /* HAVE_7REGS */
+#endif
 
-    if (INLINE_MMX(cpu_flags))
-        c->add_bytes = add_bytes_mmx;
-#endif /* HAVE_INLINE_ASM */
+    if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) {
+        c->add_bytes = ff_add_bytes_mmx;
+        c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx;
+    }
 
-    if (EXTERNAL_MMXEXT(cpu_flags)) {
+    if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) {
         /* slower than cmov version on AMD */
         if (!(cpu_flags & AV_CPU_FLAG_3DNOW))
             c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext;
     }
 
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->add_bytes            = ff_add_bytes_sse2;
+        c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
+        c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2;
+    }
+
     if (EXTERNAL_SSSE3(cpu_flags)) {
         c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3;
         if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe
diff --git a/libavcodec/x86/huffyuvencdsp_mmx.c b/libavcodec/x86/huffyuvencdsp_mmx.c
index 8ffaced..63d8e3c 100644
--- a/libavcodec/x86/huffyuvencdsp_mmx.c
+++ b/libavcodec/x86/huffyuvencdsp_mmx.c
@@ -5,20 +5,20 @@
  *
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,10 +31,11 @@
 
 #if HAVE_INLINE_ASM
 
-static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
+static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w)
 {
     x86_reg i = 0;
 
+    if (w >= 16)
     __asm__ volatile (
         "1:                             \n\t"
         "movq  (%2, %0), %%mm0          \n\t"
diff --git a/libavcodec/x86/idct_mmx_xvid.c b/libavcodec/x86/idct_mmx_xvid.c
index 920ea4c..289120d 100644
--- a/libavcodec/x86/idct_mmx_xvid.c
+++ b/libavcodec/x86/idct_mmx_xvid.c
@@ -22,20 +22,20 @@
  *
  * conversion to gcc syntax by Michael Niedermayer
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public License
- * along with Libav; if not, write to the Free Software Foundation,
+ * along with FFmpeg; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index aadeb12..ce2abe4 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -9,7 +9,7 @@
  *
  * Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
  * Vertical pass is an implementation of the scheme:
  *  Loeffler C., Ligtenberg A., and Moschytz C.S.:
@@ -23,22 +23,21 @@
  *
  * More details at http://skal.planet-d.net/coding/dct.html
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public License
- * along with Libav; if not, write to the Free Software Foundation,
+ * along with FFmpeg; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/x86/asm.h"
 #include "idct_xvid.h"
@@ -148,7 +147,7 @@ DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders)[] = {
 
 #endif
 
-#define ROUND(x) "paddd   "MANGLE(x)
+#define ROUND(x) "paddd   "x
 
 #define JZ(reg, to)                         \
     "testl     "reg","reg"            \n\t" \
@@ -344,17 +343,17 @@ DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders)[] = {
     "movdqa   %%xmm6, 4*16("dct")     \n\t" \
     "movdqa   "SREG2", 7*16("dct")    \n\t"
 
-inline void ff_idct_xvid_sse2(short *block)
+av_extern_inline void ff_idct_xvid_sse2(short *block)
 {
     __asm__ volatile(
     "movq     "MANGLE(m127)", %%mm0                              \n\t"
-    iMTX_MULT("(%0)",     MANGLE(iTab1), ROUND(walkenIdctRounders),      PUT_EVEN(ROW0))
-    iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1))
-    iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+2*16), PUT_EVEN(ROW2))
+    iMTX_MULT("(%0)",     MANGLE(iTab1), ROUND(MANGLE(walkenIdctRounders)),      PUT_EVEN(ROW0))
+    iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND("1*16+"MANGLE(walkenIdctRounders)), PUT_ODD(ROW1))
+    iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND("2*16+"MANGLE(walkenIdctRounders)), PUT_EVEN(ROW2))
 
     TEST_TWO_ROWS("3*16(%0)", "4*16(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4))
     JZ("%%eax", "1f")
-    iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+3*16), PUT_ODD(ROW3))
+    iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND("3*16+"MANGLE(walkenIdctRounders)), PUT_ODD(ROW3))
 
     TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))
     TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))
@@ -369,20 +368,20 @@ inline void ff_idct_xvid_sse2(short *block)
     "2:                                                          \n\t"
     iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4))
     "3:                                                          \n\t"
-    iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+4*16), PUT_ODD(ROW5))
+    iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND("4*16+"MANGLE(walkenIdctRounders)), PUT_ODD(ROW5))
     JZ("%%edx", "1f")
     "4:                                                          \n\t"
-    iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+5*16), PUT_EVEN(ROW6))
+    iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND("5*16+"MANGLE(walkenIdctRounders)), PUT_EVEN(ROW6))
     JZ("%%esi", "1f")
     "5:                                                          \n\t"
-    iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+5*16), PUT_ODD(ROW7))
+    iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND("5*16+"MANGLE(walkenIdctRounders)), PUT_ODD(ROW7))
 #if ARCH_X86_32
     iLLM_HEAD
 #endif
     iLLM_PASS("%0")
     "6:                                                          \n\t"
     : "+r"(block)
-    :
+    : NAMED_CONSTRAINTS_ARRAY(m127,iTab1,walkenIdctRounders,iTab2,iTab3,iTab4,tan3,tan1,tan2,sqrt2)
     : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" ,
                    "%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,)
 #if ARCH_X86_64
diff --git a/libavcodec/x86/idct_xvid.h b/libavcodec/x86/idct_xvid.h
index aea28ba..7a2847b 100644
--- a/libavcodec/x86/idct_xvid.h
+++ b/libavcodec/x86/idct_xvid.h
@@ -1,20 +1,20 @@
 /*
  * XVID MPEG-4 VIDEO CODEC
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm
new file mode 100644
index 0000000..44a1a6e
--- /dev/null
+++ b/libavcodec/x86/idctdsp.asm
@@ -0,0 +1,80 @@
+;******************************************************************************
+;* SIMD-optimized IDCT-related routines
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2003-2013 Michael Niedermayer
+;* Copyright (c) 2013 Daniel Kang
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+cextern pb_80
+
+SECTION_TEXT
+
+;--------------------------------------------------------------------------
+;void ff_put_signed_pixels_clamped(const int16_t *block, uint8_t *pixels,
+;                                  int line_size)
+;--------------------------------------------------------------------------
+
+%macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1
+    mova     m1, [blockq+mmsize*0+%1]
+    mova     m2, [blockq+mmsize*2+%1]
+%if mmsize == 8
+    mova     m3, [blockq+mmsize*4+%1]
+    mova     m4, [blockq+mmsize*6+%1]
+%endif
+    packsswb m1, [blockq+mmsize*1+%1]
+    packsswb m2, [blockq+mmsize*3+%1]
+%if mmsize == 8
+    packsswb m3, [blockq+mmsize*5+%1]
+    packsswb m4, [blockq+mmsize*7+%1]
+%endif
+    paddb    m1, m0
+    paddb    m2, m0
+%if mmsize == 8
+    paddb    m3, m0
+    paddb    m4, m0
+    movq     [pixelsq+lsizeq*0], m1
+    movq     [pixelsq+lsizeq*1], m2
+    movq     [pixelsq+lsizeq*2], m3
+    movq     [pixelsq+lsize3q ], m4
+%else
+    movq     [pixelsq+lsizeq*0], m1
+    movhps   [pixelsq+lsizeq*1], m1
+    movq     [pixelsq+lsizeq*2], m2
+    movhps   [pixelsq+lsize3q ], m2
+%endif
+%endmacro
+
+%macro PUT_SIGNED_PIXELS_CLAMPED 1
+cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3
+    mova     m0, [pb_80]
+    lea      lsize3q, [lsizeq*3]
+    PUT_SIGNED_PIXELS_CLAMPED_HALF 0
+    lea      pixelsq, [pixelsq+lsizeq*4]
+    PUT_SIGNED_PIXELS_CLAMPED_HALF 64
+    RET
+%endmacro
+
+INIT_MMX mmx
+PUT_SIGNED_PIXELS_CLAMPED 0
+INIT_XMM sse2
+PUT_SIGNED_PIXELS_CLAMPED 3
diff --git a/libavcodec/x86/idctdsp.h b/libavcodec/x86/idctdsp.h
index 22df3dd..9b7177a 100644
--- a/libavcodec/x86/idctdsp.h
+++ b/libavcodec/x86/idctdsp.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,5 +27,7 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
                                int line_size);
 void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
                                       int line_size);
+void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels,
+                                       int line_size);
 
 #endif /* AVCODEC_X86_IDCTDSP_H */
diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index a0d681a..6f54d80 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -59,11 +59,12 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
 
     if (INLINE_MMX(cpu_flags)) {
         c->put_pixels_clamped        = ff_put_pixels_clamped_mmx;
-        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
         c->add_pixels_clamped        = ff_add_pixels_clamped_mmx;
 
         if (!high_bit_depth &&
+            avctx->lowres == 0 &&
             (avctx->idct_algo == FF_IDCT_AUTO ||
+             avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
              avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
                 c->idct_put  = ff_simple_idct_put_mmx;
                 c->idct_add  = ff_simple_idct_add_mmx;
@@ -71,4 +72,10 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
                 c->perm_type = FF_IDCT_PERM_SIMPLE;
         }
     }
+    if (EXTERNAL_MMX(cpu_flags)) {
+        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
+    }
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
+    }
 }
diff --git a/libavcodec/x86/idctdsp_mmx.c b/libavcodec/x86/idctdsp_mmx.c
index 7285b1d..a72b941 100644
--- a/libavcodec/x86/idctdsp_mmx.c
+++ b/libavcodec/x86/idctdsp_mmx.c
@@ -5,20 +5,20 @@
  *
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -88,41 +88,6 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
         : "memory");
 }
 
-#define put_signed_pixels_clamped_mmx_half(off)             \
-    "movq          "#off"(%2), %%mm1        \n\t"           \
-    "movq     16 + "#off"(%2), %%mm2        \n\t"           \
-    "movq     32 + "#off"(%2), %%mm3        \n\t"           \
-    "movq     48 + "#off"(%2), %%mm4        \n\t"           \
-    "packsswb  8 + "#off"(%2), %%mm1        \n\t"           \
-    "packsswb 24 + "#off"(%2), %%mm2        \n\t"           \
-    "packsswb 40 + "#off"(%2), %%mm3        \n\t"           \
-    "packsswb 56 + "#off"(%2), %%mm4        \n\t"           \
-    "paddb              %%mm0, %%mm1        \n\t"           \
-    "paddb              %%mm0, %%mm2        \n\t"           \
-    "paddb              %%mm0, %%mm3        \n\t"           \
-    "paddb              %%mm0, %%mm4        \n\t"           \
-    "movq               %%mm1, (%0)         \n\t"           \
-    "movq               %%mm2, (%0, %3)     \n\t"           \
-    "movq               %%mm3, (%0, %3, 2)  \n\t"           \
-    "movq               %%mm4, (%0, %1)     \n\t"
-
-void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
-                                      int line_size)
-{
-    x86_reg line_skip = line_size;
-    x86_reg line_skip3;
-
-    __asm__ volatile (
-        "movq "MANGLE(ff_pb_80)", %%mm0     \n\t"
-        "lea         (%3, %3, 2), %1        \n\t"
-        put_signed_pixels_clamped_mmx_half(0)
-        "lea         (%0, %3, 4), %0        \n\t"
-        put_signed_pixels_clamped_mmx_half(64)
-        : "+&r" (pixels), "=&r" (line_skip3)
-        : "r" (block), "r" (line_skip)
-        : "memory");
-}
-
 void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
                                int line_size)
 {
diff --git a/libavcodec/x86/imdct36.asm b/libavcodec/x86/imdct36.asm
index 633fcd9..ce30b42 100644
--- a/libavcodec/x86/imdct36.asm
+++ b/libavcodec/x86/imdct36.asm
@@ -2,20 +2,20 @@
 ;* 36 point SSE-optimized IMDCT transform
 ;* Copyright (c) 2011 Vitor Sessak
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -50,7 +50,7 @@ ps_cosh_sse3:  dd 1.0, -0.50190991877167369479,  1.0, -5.73685662283492756461
                dd 1.0, -0.51763809020504152469,  1.0, -1.93185165257813657349
                dd 1.0, -0.55168895948124587824, -1.0,  1.18310079157624925896
                dd 1.0, -0.61038729438072803416, -1.0,  0.87172339781054900991
-               dd 1.0,  0.70710678118654752439,  0.0,  0.0
+               dd 1.0, -0.70710678118654752439,  0.0,  0.0
 
 costabs:  times 4 dd  0.98480773
           times 4 dd  0.93969262
@@ -129,6 +129,19 @@ SECTION_TEXT
 %endif
 %endmacro
 
+%macro BUTTERF2 3
+%if cpuflag(sse3)
+    mulps    %1, %1, [ps_cosh_sse3 + %3]
+    PSHUFD   %2, %1, 0xe1
+    addsubps %1, %1, %2
+%else
+    mulps    %1, [ps_cosh + %3]
+    PSHUFD   %2, %1, 0xe1
+    xorps    %1, [ps_p1m1p1m1]
+    addps    %1, %2
+%endif
+%endmacro
+
 %macro STORE 4
     movhlps %2, %1
     movss   [%3       ], %1
@@ -279,11 +292,7 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win
     BUTTERF  m7, m2, 16
     BUTTERF  m3, m6, 32
     BUTTERF  m4, m1, 48
-
-    mulps   m5, m5, [ps_cosh + 64]
-    PSHUFD  m1, m5, 0xe1
-    xorps   m5, m5, [ps_p1m1p1m1]
-    addps   m5, m5, m1
+    BUTTERF2 m5, m1, 64
 
     ; permutates:
     ; m0    0  1  2  3     =>     2  6 10 14   m1
@@ -358,8 +367,10 @@ cglobal imdct36_float, 4,4,9, out, buf, in, win
     RET
 %endmacro
 
+%if ARCH_X86_32
 INIT_XMM sse
 DEFINE_IMDCT
+%endif
 
 INIT_XMM sse2
 DEFINE_IMDCT
@@ -370,8 +381,10 @@ DEFINE_IMDCT
 INIT_XMM ssse3
 DEFINE_IMDCT
 
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEFINE_IMDCT
+%endif
 
 INIT_XMM sse
 
@@ -716,5 +729,7 @@ cglobal four_imdct36_float, 5,5,16, out, buf, in, win, tmp
 INIT_XMM sse
 DEFINE_FOUR_IMDCT
 
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 DEFINE_FOUR_IMDCT
+%endif
diff --git a/libavcodec/x86/inline_asm.h b/libavcodec/x86/inline_asm.h
index e4affab..c2f1bf0 100644
--- a/libavcodec/x86/inline_asm.h
+++ b/libavcodec/x86/inline_asm.h
@@ -1,20 +1,20 @@
 /*
  * inline assembly helper macros
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/apedsp.asm b/libavcodec/x86/lossless_audiodsp.asm
index d721ebd..64b769f 100644
--- a/libavcodec/x86/apedsp.asm
+++ b/libavcodec/x86/lossless_audiodsp.asm
@@ -1,20 +1,20 @@
 ;******************************************************************************
 ;* Copyright (c) 2008 Loren Merritt
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -58,14 +58,7 @@ cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
     mova    [v1q + orderq + mmsize], m3
     add     orderq, mmsize*2
     jl .loop
-%if mmsize == 16
-    movhlps m0, m6
-    paddd   m6, m0
-    pshuflw m0, m6, 0x4e
-%else
-    pshufw  m0, m6, 0x4e
-%endif
-    paddd   m6, m0
+    HADDD   m6, m0
     movd   eax, m6
     RET
 %endmacro
@@ -159,9 +152,6 @@ SCALARPRODUCT_LOOP 4
 SCALARPRODUCT_LOOP 2
 SCALARPRODUCT_LOOP 0
 .end:
-    movhlps m0, m6
-    paddd   m6, m0
-    pshuflw m0, m6, 0x4e
-    paddd   m6, m0
+    HADDD   m6, m0
     movd   eax, m6
     RET
diff --git a/libavcodec/x86/apedsp_init.c b/libavcodec/x86/lossless_audiodsp_init.c
index f692c2b..4879dff 100644
--- a/libavcodec/x86/apedsp_init.c
+++ b/libavcodec/x86/lossless_audiodsp_init.c
@@ -1,25 +1,25 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86/cpu.h"
-#include "libavcodec/apedsp.h"
+#include "libavcodec/lossless_audiodsp.h"
 
 int32_t ff_scalarproduct_and_madd_int16_mmxext(int16_t *v1, const int16_t *v2,
                                                const int16_t *v3,
@@ -31,7 +31,7 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
                                               const int16_t *v3,
                                               int order, int mul);
 
-av_cold void ff_apedsp_init_x86(APEDSPContext *c)
+av_cold void ff_llauddsp_init_x86(LLAudDSPContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
 
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
new file mode 100644
index 0000000..e6c23e7
--- /dev/null
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -0,0 +1,294 @@
+;******************************************************************************
+;* SIMD lossless video DSP utils
+;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2014 Michael Niedermayer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pb_ef: times 8 db 14,15
+pb_67: times 8 db  6, 7
+pb_zzzz2323zzzzabab: db -1,-1,-1,-1, 2, 3, 2, 3,-1,-1,-1,-1,10,11,10,11
+pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
+
+SECTION_TEXT
+
+%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
+    movd    m4, maskd
+    SPLATW  m4, m4
+    add     wd, wd
+    test    wq, 2*mmsize - 1
+    jz %%.tomainloop
+    push  tmpq
+%%.wordloop:
+    sub     wq, 2
+%ifidn %2, add
+    mov   tmpw, [srcq+wq]
+    add   tmpw, [dstq+wq]
+%else
+    mov   tmpw, [src1q+wq]
+    sub   tmpw, [src2q+wq]
+%endif
+    and   tmpw, maskw
+    mov     [dstq+wq], tmpw
+    test    wq, 2*mmsize - 1
+    jnz %%.wordloop
+    pop   tmpq
+%%.tomainloop:
+%ifidn %2, add
+    add     srcq, wq
+%else
+    add     src1q, wq
+    add     src2q, wq
+%endif
+    add     dstq, wq
+    neg     wq
+    jz      %%.end
+%%.loop:
+%ifidn %2, add
+    mov%1   m0, [srcq+wq]
+    mov%1   m1, [dstq+wq]
+    mov%1   m2, [srcq+wq+mmsize]
+    mov%1   m3, [dstq+wq+mmsize]
+%else
+    mov%1   m0, [src1q+wq]
+    mov%1   m1, [src2q+wq]
+    mov%1   m2, [src1q+wq+mmsize]
+    mov%1   m3, [src2q+wq+mmsize]
+%endif
+    p%2w    m0, m1
+    p%2w    m2, m3
+    pand    m0, m4
+    pand    m2, m4
+    mov%1   [dstq+wq]       , m0
+    mov%1   [dstq+wq+mmsize], m2
+    add     wq, 2*mmsize
+    jl %%.loop
+%%.end:
+    RET
+%endmacro
+
+INIT_MMX mmx
+cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
+    INT16_LOOP a, add
+
+INIT_XMM sse2
+cglobal add_int16, 4,4,5, dst, src, mask, w, tmp
+    test srcq, mmsize-1
+    jnz .unaligned
+    test dstq, mmsize-1
+    jnz .unaligned
+    INT16_LOOP a, add
+.unaligned:
+    INT16_LOOP u, add
+
+INIT_MMX mmx
+cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
+    INT16_LOOP a, sub
+
+INIT_XMM sse2
+cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w, tmp
+    test src1q, mmsize-1
+    jnz .unaligned
+    test src2q, mmsize-1
+    jnz .unaligned
+    test dstq, mmsize-1
+    jnz .unaligned
+    INT16_LOOP a, sub
+.unaligned:
+    INT16_LOOP u, sub
+
+
+%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)
+    add     wd, wd
+    add     srcq, wq
+    add     dstq, wq
+    neg     wq
+%%.loop:
+    mov%2   m1, [srcq+wq]
+    mova    m2, m1
+    pslld   m1, 16
+    paddw   m1, m2
+    mova    m2, m1
+
+    pshufb  m1, m3
+    paddw   m1, m2
+    pshufb  m0, m5
+%if mmsize == 16
+    mova    m2, m1
+    pshufb  m1, m4
+    paddw   m1, m2
+%endif
+    paddw   m0, m1
+    pand    m0, m7
+%ifidn %1, a
+    mova    [dstq+wq], m0
+%else
+    movq    [dstq+wq], m0
+    movhps  [dstq+wq+8], m0
+%endif
+    add     wq, mmsize
+    jl %%.loop
+    mov     eax, mmsize-1
+    sub     eax, wd
+    mov     wd, eax
+    shl     wd, 8
+    lea     eax, [wd+eax-1]
+    movd    m1, eax
+    pshufb  m0, m1
+    movd    eax, m0
+    RET
+%endmacro
+
+; int add_hfyu_left_pred_int16(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int left)
+INIT_MMX ssse3
+cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left
+.skip_prologue:
+    mova    m5, [pb_67]
+    mova    m3, [pb_zzzz2323zzzzabab]
+    movd    m0, leftm
+    psllq   m0, 48
+    movd    m7, maskm
+    SPLATW  m7 ,m7
+    ADD_HFYU_LEFT_LOOP_INT16 a, a
+
+INIT_XMM sse4
+cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left
+    mova    m5, [pb_ef]
+    mova    m4, [pb_zzzzzzzz67676767]
+    mova    m3, [pb_zzzz2323zzzzabab]
+    movd    m0, leftm
+    pslldq  m0, 14
+    movd    m7, maskm
+    SPLATW  m7 ,m7
+    test    srcq, 15
+    jnz .src_unaligned
+    test    dstq, 15
+    jnz .dst_unaligned
+    ADD_HFYU_LEFT_LOOP_INT16 a, a
+.dst_unaligned:
+    ADD_HFYU_LEFT_LOOP_INT16 u, a
+.src_unaligned:
+    ADD_HFYU_LEFT_LOOP_INT16 u, u
+
+; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
+INIT_MMX mmxext
+cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top
+    add      wd, wd
+    movd    mm6, maskd
+    SPLATW  mm6, mm6
+    movq    mm0, [topq]
+    movq    mm2, mm0
+    movd    mm4, [left_topq]
+    psllq   mm2, 16
+    movq    mm1, mm0
+    por     mm4, mm2
+    movd    mm3, [leftq]
+    psubw   mm0, mm4 ; t-tl
+    add    dstq, wq
+    add    topq, wq
+    add   diffq, wq
+    neg      wq
+    jmp .skip
+.loop:
+    movq    mm4, [topq+wq]
+    movq    mm0, mm4
+    psllq   mm4, 16
+    por     mm4, mm1
+    movq    mm1, mm0 ; t
+    psubw   mm0, mm4 ; t-tl
+.skip:
+    movq    mm2, [diffq+wq]
+%assign i 0
+%rep 4
+    movq    mm4, mm0
+    paddw   mm4, mm3 ; t-tl+l
+    pand    mm4, mm6
+    movq    mm5, mm3
+    pmaxsw  mm3, mm1
+    pminsw  mm5, mm1
+    pminsw  mm3, mm4
+    pmaxsw  mm3, mm5 ; median
+    paddw   mm3, mm2 ; +residual
+    pand    mm3, mm6
+%if i==0
+    movq    mm7, mm3
+    psllq   mm7, 48
+%else
+    movq    mm4, mm3
+    psrlq   mm7, 16
+    psllq   mm4, 48
+    por     mm7, mm4
+%endif
+%if i<3
+    psrlq   mm0, 16
+    psrlq   mm1, 16
+    psrlq   mm2, 16
+%endif
+%assign i i+1
+%endrep
+    movq [dstq+wq], mm7
+    add      wq, 8
+    jl .loop
+    movzx   r2d, word [dstq-2]
+    mov [leftq], r2d
+    movzx   r2d, word [topq-2]
+    mov [left_topq], r2d
+    RET
+
+cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_top
+    add      wd, wd
+    movd    mm7, maskd
+    SPLATW  mm7, mm7
+    movq    mm0, [src1q]
+    movq    mm2, [src2q]
+    psllq   mm0, 16
+    psllq   mm2, 16
+    movd    mm6, [left_topq]
+    por     mm0, mm6
+    movd    mm6, [leftq]
+    por     mm2, mm6
+    xor     maskq, maskq
+.loop:
+    movq    mm1, [src1q + maskq]
+    movq    mm3, [src2q + maskq]
+    movq    mm4, mm2
+    psubw   mm2, mm0
+    paddw   mm2, mm1
+    pand    mm2, mm7
+    movq    mm5, mm4
+    pmaxsw  mm4, mm1
+    pminsw  mm1, mm5
+    pminsw  mm4, mm2
+    pmaxsw  mm4, mm1
+    psubw   mm3, mm4
+    pand    mm3, mm7
+    movq    [dstq + maskq], mm3
+    add     maskq, 8
+    movq    mm0, [src1q + maskq - 2]
+    movq    mm2, [src2q + maskq - 2]
+    cmp     maskq, wq
+        jb .loop
+    movzx maskd, word [src1q + wq - 2]
+    mov [left_topq], maskd
+    movzx maskd, word [src2q + wq - 2]
+    mov [leftq], maskd
+    RET
diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c
new file mode 100644
index 0000000..6589024
--- /dev/null
+++ b/libavcodec/x86/lossless_videodsp_init.c
@@ -0,0 +1,62 @@
+/*
+ * Lossless video DSP utils
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "../lossless_videodsp.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/x86/cpu.h"
+
+void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w);
+void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
+void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
+int ff_add_hfyu_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc);
+int ff_add_hfyu_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc);
+void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
+void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top);
+
+
+void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
+{
+    int cpu_flags = av_get_cpu_flags();
+    const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        c->add_int16 = ff_add_int16_mmx;
+        c->diff_int16 = ff_diff_int16_mmx;
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc->comp[0].depth_minus1<15) {
+        c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext;
+        c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext;
+    }
+
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->add_int16 = ff_add_int16_sse2;
+        c->diff_int16 = ff_diff_int16_sse2;
+    }
+
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_ssse3;
+    }
+
+    if (EXTERNAL_SSE4(cpu_flags)) {
+        c->add_hfyu_left_pred_int16 = ff_add_hfyu_left_pred_int16_sse4;
+    }
+}
diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c
index ea5d2ea..3a9493f 100644
--- a/libavcodec/x86/lpc.c
+++ b/libavcodec/x86/lpc.c
@@ -2,26 +2,25 @@
  * SIMD-optimized LPC functions
  * Copyright (c) 2007 Loren Merritt
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
-#include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
@@ -73,6 +72,7 @@ static void lpc_apply_welch_window_sse2(const int32_t *data, int len,
         "3:                                    \n\t"
         :"+&r"(i), "+&r"(j)
         :"r"(w_data+n2), "r"(data+n2), "m"(c), "r"(len)
+         NAMED_CONSTRAINTS_ARRAY_ADD(pd_1,pd_2)
          XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
                                     "%xmm5", "%xmm6", "%xmm7")
     );
@@ -117,6 +117,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
                 "movsd     %%xmm2, 16(%1)           \n\t"
                 :"+&r"(i)
                 :"r"(autoc+j), "r"(data+len), "r"(data+len-j)
+                 NAMED_CONSTRAINTS_ARRAY_ADD(pd_1)
                 :"memory"
             );
         } else {
@@ -140,6 +141,7 @@ static void lpc_compute_autocorr_sse2(const double *data, int len, int lag,
                 "movsd     %%xmm1, %2               \n\t"
                 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
                 :"r"(data+len), "r"(data+len-j)
+                 NAMED_CONSTRAINTS_ARRAY_ADD(pd_1)
             );
         }
     }
@@ -152,7 +154,7 @@ av_cold void ff_lpc_init_x86(LPCContext *c)
 #if HAVE_SSE2_INLINE
     int cpu_flags = av_get_cpu_flags();
 
-    if (INLINE_SSE2(cpu_flags) && (cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
+    if (HAVE_SSE2_INLINE && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
         c->lpc_apply_welch_window = lpc_apply_welch_window_sse2;
         c->lpc_compute_autocorr   = lpc_compute_autocorr_sse2;
     }
diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
index a62094e..1cca05d 100644
--- a/libavcodec/x86/mathops.h
+++ b/libavcodec/x86/mathops.h
@@ -2,20 +2,20 @@
  * simple math operations
  * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,6 +24,7 @@
 
 #include "config.h"
 #include "libavutil/common.h"
+#include "libavutil/x86/asm.h"
 
 #if HAVE_INLINE_ASM
 
@@ -88,6 +89,7 @@ static inline av_const int mid_pred(int a, int b, int c)
     return i;
 }
 
+#if HAVE_6REGS
 #define COPY3_IF_LT(x, y, a, b, c, d)\
 __asm__ volatile(\
     "cmpl  %0, %3       \n\t"\
@@ -97,10 +99,11 @@ __asm__ volatile(\
     : "+&r" (x), "+&r" (a), "+r" (c)\
     : "r" (y), "r" (b), "r" (d)\
 );
+#endif /* HAVE_6REGS */
 #endif /* HAVE_I686 */
 
 #define MASK_ABS(mask, level)                   \
-    __asm__ ("cltd                   \n\t"      \
+    __asm__ ("cdq                    \n\t"      \
              "xorl %1, %0            \n\t"      \
              "subl %1, %0            \n\t"      \
              : "+a"(level), "=&d"(mask))
diff --git a/libavcodec/x86/me_cmp.asm b/libavcodec/x86/me_cmp.asm
index 1a87f37..b0741f3 100644
--- a/libavcodec/x86/me_cmp.asm
+++ b/libavcodec/x86/me_cmp.asm
@@ -4,20 +4,20 @@
 ;* Copyright (c) 2000, 2001 Fabrice Bellard
 ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;*****************************************************************************
 
@@ -274,19 +274,27 @@ INIT_XMM ssse3
 %define ABS_SUM_8x8 ABS_SUM_8x8_64
 HADAMARD8_DIFF 9
 
-INIT_XMM sse2
-; int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
-;                   int line_size, int h);
-cglobal sse16, 5, 5, 8
-    shr      r4d, 1
+; int ff_sse*_*(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+;               int line_size, int h)
+
+%macro SUM_SQUARED_ERRORS 1
+cglobal sse%1, 5,5,8, v, pix1, pix2, lsize, h
+%if %1 == mmsize
+    shr       hd, 1
+%endif
     pxor      m0, m0         ; mm0 = 0
     pxor      m7, m7         ; mm7 holds the sum
 
 .next2lines: ; FIXME why are these unaligned movs? pix1[] is aligned
-    movu      m1, [r1   ]    ; mm1 = pix1[0][0-15]
-    movu      m2, [r2   ]    ; mm2 = pix2[0][0-15]
-    movu      m3, [r1+r3]    ; mm3 = pix1[1][0-15]
-    movu      m4, [r2+r3]    ; mm4 = pix2[1][0-15]
+    movu      m1, [pix1q]    ; m1 = pix1[0][0-15], [0-7] for mmx
+    movu      m2, [pix2q]    ; m2 = pix2[0][0-15], [0-7] for mmx
+%if %1 == mmsize
+    movu      m3, [pix1q+lsizeq] ; m3 = pix1[1][0-15], [0-7] for mmx
+    movu      m4, [pix2q+lsizeq] ; m4 = pix2[1][0-15], [0-7] for mmx
+%else  ; %1 / 2 == mmsize; mmx only
+    mova      m3, [pix1q+8]  ; m3 = pix1[0][8-15]
+    mova      m4, [pix2q+8]  ; m4 = pix2[0][8-15]
+%endif
 
     ; todo: mm1-mm2, mm3-mm4
     ; algo: subtract mm1 from mm2 with saturation and vice versa
@@ -315,22 +323,145 @@ cglobal sse16, 5, 5, 8
     pmaddwd   m1, m1
     pmaddwd   m3, m3
 
-    lea       r1, [r1+r3*2]  ; pix1 += 2*line_size
-    lea       r2, [r2+r3*2]  ; pix2 += 2*line_size
-
     paddd     m1, m2
     paddd     m3, m4
     paddd     m7, m1
     paddd     m7, m3
 
-    dec       r4
+%if %1 == mmsize
+    lea    pix1q, [pix1q + 2*lsizeq]
+    lea    pix2q, [pix2q + 2*lsizeq]
+%else
+    add    pix1q, lsizeq
+    add    pix2q, lsizeq
+%endif
+    dec       hd
     jnz .next2lines
 
-    mova      m1, m7
-    psrldq    m7, 8          ; shift hi qword to lo
-    paddd     m7, m1
-    mova      m1, m7
-    psrldq    m7, 4          ; shift hi dword to lo
-    paddd     m7, m1
+    HADDD     m7, m1
     movd     eax, m7         ; return value
     RET
+%endmacro
+
+INIT_MMX mmx
+SUM_SQUARED_ERRORS 8
+
+INIT_MMX mmx
+SUM_SQUARED_ERRORS 16
+
+INIT_XMM sse2
+SUM_SQUARED_ERRORS 16
+
+;-----------------------------------------------
+;int ff_sum_abs_dctelem(int16_t *block)
+;-----------------------------------------------
+; %1 = number of xmm registers used
+; %2 = number of inline loops
+
+%macro SUM_ABS_DCTELEM 2
+cglobal sum_abs_dctelem, 1, 1, %1, block
+    pxor    m0, m0
+    pxor    m1, m1
+%assign %%i 0
+%rep %2
+    mova      m2, [blockq+mmsize*(0+%%i)]
+    mova      m3, [blockq+mmsize*(1+%%i)]
+    mova      m4, [blockq+mmsize*(2+%%i)]
+    mova      m5, [blockq+mmsize*(3+%%i)]
+    ABS1_SUM  m2, m6, m0
+    ABS1_SUM  m3, m6, m1
+    ABS1_SUM  m4, m6, m0
+    ABS1_SUM  m5, m6, m1
+%assign %%i %%i+4
+%endrep
+    paddusw m0, m1
+    HSUM    m0, m1, eax
+    and     eax, 0xFFFF
+    RET
+%endmacro
+
+INIT_MMX mmx
+SUM_ABS_DCTELEM 0, 4
+INIT_MMX mmxext
+SUM_ABS_DCTELEM 0, 4
+INIT_XMM sse2
+SUM_ABS_DCTELEM 7, 2
+INIT_XMM ssse3
+SUM_ABS_DCTELEM 6, 2
+
+;------------------------------------------------------------------------------
+; int ff_hf_noise*_mmx(uint8_t *pix1, int lsize, int h)
+;------------------------------------------------------------------------------
+; %1 = 8/16. %2-5=m#
+%macro HF_NOISE_PART1 5
+    mova      m%2, [pix1q]
+%if %1 == 8
+    mova      m%3, m%2
+    psllq     m%2, 8
+    psrlq     m%3, 8
+    psrlq     m%2, 8
+%else
+    mova      m%3, [pix1q+1]
+%endif
+    mova      m%4, m%2
+    mova      m%5, m%3
+    punpcklbw m%2, m7
+    punpcklbw m%3, m7
+    punpckhbw m%4, m7
+    punpckhbw m%5, m7
+    psubw     m%2, m%3
+    psubw     m%4, m%5
+%endmacro
+
+; %1-2 = m#
+%macro HF_NOISE_PART2 4
+    psubw     m%1, m%3
+    psubw     m%2, m%4
+    pxor       m3, m3
+    pxor       m1, m1
+    pcmpgtw    m3, m%1
+    pcmpgtw    m1, m%2
+    pxor      m%1, m3
+    pxor      m%2, m1
+    psubw     m%1, m3
+    psubw     m%2, m1
+    paddw     m%2, m%1
+    paddw      m6, m%2
+%endmacro
+
+; %1 = 8/16
+%macro HF_NOISE 1
+cglobal hf_noise%1, 3,3,0, pix1, lsize, h
+    movsxdifnidn lsizeq, lsized
+    sub        hd, 2
+    pxor       m7, m7
+    pxor       m6, m6
+    HF_NOISE_PART1 %1, 0, 1, 2, 3
+    add     pix1q, lsizeq
+    HF_NOISE_PART1 %1, 4, 1, 5, 3
+    HF_NOISE_PART2     0, 2, 4, 5
+    add     pix1q, lsizeq
+.loop:
+    HF_NOISE_PART1 %1, 0, 1, 2, 3
+    HF_NOISE_PART2     4, 5, 0, 2
+    add     pix1q, lsizeq
+    HF_NOISE_PART1 %1, 4, 1, 5, 3
+    HF_NOISE_PART2     0, 2, 4, 5
+    add     pix1q, lsizeq
+    sub        hd, 2
+        jne .loop
+
+    mova       m0, m6
+    punpcklwd  m0, m7
+    punpckhwd  m6, m7
+    paddd      m6, m0
+    mova       m0, m6
+    psrlq      m6, 32
+    paddd      m0, m6
+    movd      eax, m0   ; eax = result of hf_noise8;
+    REP_RET                 ; return eax;
+%endmacro
+
+INIT_MMX mmx
+HF_NOISE 8
+HF_NOISE 16
diff --git a/libavcodec/x86/me_cmp_init.c b/libavcodec/x86/me_cmp_init.c
index e93b67b..21db221 100644
--- a/libavcodec/x86/me_cmp_init.c
+++ b/libavcodec/x86/me_cmp_init.c
@@ -5,20 +5,20 @@
  *
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -29,382 +29,31 @@
 #include "libavcodec/me_cmp.h"
 #include "libavcodec/mpegvideo.h"
 
-#if HAVE_INLINE_ASM
-
-static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
-                    int line_size, int h)
-{
-    int tmp;
-
-    __asm__ volatile (
-        "movl         %4, %%ecx          \n"
-        "shr          $1, %%ecx          \n"
-        "pxor      %%mm0, %%mm0          \n" /* mm0 = 0 */
-        "pxor      %%mm7, %%mm7          \n" /* mm7 holds the sum */
-        "1:                              \n"
-        "movq       (%0), %%mm1          \n" /* mm1 = pix1[0][0 - 7] */
-        "movq       (%1), %%mm2          \n" /* mm2 = pix2[0][0 - 7] */
-        "movq   (%0, %3), %%mm3          \n" /* mm3 = pix1[1][0 - 7] */
-        "movq   (%1, %3), %%mm4          \n" /* mm4 = pix2[1][0 - 7] */
-
-        /* todo: mm1-mm2, mm3-mm4 */
-        /* algo: subtract mm1 from mm2 with saturation and vice versa */
-        /*       OR the results to get absolute difference */
-        "movq      %%mm1, %%mm5          \n"
-        "movq      %%mm3, %%mm6          \n"
-        "psubusb   %%mm2, %%mm1          \n"
-        "psubusb   %%mm4, %%mm3          \n"
-        "psubusb   %%mm5, %%mm2          \n"
-        "psubusb   %%mm6, %%mm4          \n"
-
-        "por       %%mm1, %%mm2          \n"
-        "por       %%mm3, %%mm4          \n"
-
-        /* now convert to 16-bit vectors so we can square them */
-        "movq      %%mm2, %%mm1          \n"
-        "movq      %%mm4, %%mm3          \n"
-
-        "punpckhbw %%mm0, %%mm2          \n"
-        "punpckhbw %%mm0, %%mm4          \n"
-        "punpcklbw %%mm0, %%mm1          \n" /* mm1 now spread over (mm1, mm2) */
-        "punpcklbw %%mm0, %%mm3          \n" /* mm4 now spread over (mm3, mm4) */
-
-        "pmaddwd   %%mm2, %%mm2          \n"
-        "pmaddwd   %%mm4, %%mm4          \n"
-        "pmaddwd   %%mm1, %%mm1          \n"
-        "pmaddwd   %%mm3, %%mm3          \n"
-
-        "lea (%0, %3, 2), %0             \n" /* pix1 += 2 * line_size */
-        "lea (%1, %3, 2), %1             \n" /* pix2 += 2 * line_size */
-
-        "paddd     %%mm2, %%mm1          \n"
-        "paddd     %%mm4, %%mm3          \n"
-        "paddd     %%mm1, %%mm7          \n"
-        "paddd     %%mm3, %%mm7          \n"
-
-        "decl      %%ecx                 \n"
-        "jnz       1b                    \n"
-
-        "movq      %%mm7, %%mm1          \n"
-        "psrlq       $32, %%mm7          \n" /* shift hi dword to lo */
-        "paddd     %%mm7, %%mm1          \n"
-        "movd      %%mm1, %2             \n"
-        : "+r" (pix1), "+r" (pix2), "=r" (tmp)
-        : "r" ((x86_reg) line_size), "m" (h)
-        : "%ecx");
-
-    return tmp;
-}
-
-static int sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
-                     int line_size, int h)
-{
-    int tmp;
-
-    __asm__ volatile (
-        "movl %4, %%ecx\n"
-        "pxor %%mm0, %%mm0\n"    /* mm0 = 0 */
-        "pxor %%mm7, %%mm7\n"    /* mm7 holds the sum */
-        "1:\n"
-        "movq (%0), %%mm1\n"     /* mm1 = pix1[0 -  7] */
-        "movq (%1), %%mm2\n"     /* mm2 = pix2[0 -  7] */
-        "movq 8(%0), %%mm3\n"    /* mm3 = pix1[8 - 15] */
-        "movq 8(%1), %%mm4\n"    /* mm4 = pix2[8 - 15] */
-
-        /* todo: mm1-mm2, mm3-mm4 */
-        /* algo: subtract mm1 from mm2 with saturation and vice versa */
-        /*       OR the results to get absolute difference */
-        "movq %%mm1, %%mm5\n"
-        "movq %%mm3, %%mm6\n"
-        "psubusb %%mm2, %%mm1\n"
-        "psubusb %%mm4, %%mm3\n"
-        "psubusb %%mm5, %%mm2\n"
-        "psubusb %%mm6, %%mm4\n"
-
-        "por %%mm1, %%mm2\n"
-        "por %%mm3, %%mm4\n"
-
-        /* now convert to 16-bit vectors so we can square them */
-        "movq %%mm2, %%mm1\n"
-        "movq %%mm4, %%mm3\n"
-
-        "punpckhbw %%mm0, %%mm2\n"
-        "punpckhbw %%mm0, %%mm4\n"
-        "punpcklbw %%mm0, %%mm1\n" /* mm1 now spread over (mm1, mm2) */
-        "punpcklbw %%mm0, %%mm3\n" /* mm4 now spread over (mm3, mm4) */
-
-        "pmaddwd %%mm2, %%mm2\n"
-        "pmaddwd %%mm4, %%mm4\n"
-        "pmaddwd %%mm1, %%mm1\n"
-        "pmaddwd %%mm3, %%mm3\n"
-
-        "add %3, %0\n"
-        "add %3, %1\n"
-
-        "paddd %%mm2, %%mm1\n"
-        "paddd %%mm4, %%mm3\n"
-        "paddd %%mm1, %%mm7\n"
-        "paddd %%mm3, %%mm7\n"
-
-        "decl %%ecx\n"
-        "jnz 1b\n"
-
-        "movq %%mm7, %%mm1\n"
-        "psrlq $32, %%mm7\n"    /* shift hi dword to lo */
-        "paddd %%mm7, %%mm1\n"
-        "movd %%mm1, %2\n"
-        : "+r" (pix1), "+r" (pix2), "=r" (tmp)
-        : "r" ((x86_reg) line_size), "m" (h)
-        : "%ecx");
-
-    return tmp;
-}
-
-static int hf_noise8_mmx(uint8_t *pix1, int line_size, int h)
-{
-    int tmp;
-
-    __asm__ volatile (
-        "movl %3, %%ecx\n"
-        "pxor %%mm7, %%mm7\n"
-        "pxor %%mm6, %%mm6\n"
-
-        "movq (%0), %%mm0\n"
-        "movq %%mm0, %%mm1\n"
-        "psllq $8, %%mm0\n"
-        "psrlq $8, %%mm1\n"
-        "psrlq $8, %%mm0\n"
-        "movq %%mm0, %%mm2\n"
-        "movq %%mm1, %%mm3\n"
-        "punpcklbw %%mm7, %%mm0\n"
-        "punpcklbw %%mm7, %%mm1\n"
-        "punpckhbw %%mm7, %%mm2\n"
-        "punpckhbw %%mm7, %%mm3\n"
-        "psubw %%mm1, %%mm0\n"
-        "psubw %%mm3, %%mm2\n"
-
-        "add %2, %0\n"
-
-        "movq (%0), %%mm4\n"
-        "movq %%mm4, %%mm1\n"
-        "psllq $8, %%mm4\n"
-        "psrlq $8, %%mm1\n"
-        "psrlq $8, %%mm4\n"
-        "movq %%mm4, %%mm5\n"
-        "movq %%mm1, %%mm3\n"
-        "punpcklbw %%mm7, %%mm4\n"
-        "punpcklbw %%mm7, %%mm1\n"
-        "punpckhbw %%mm7, %%mm5\n"
-        "punpckhbw %%mm7, %%mm3\n"
-        "psubw %%mm1, %%mm4\n"
-        "psubw %%mm3, %%mm5\n"
-        "psubw %%mm4, %%mm0\n"
-        "psubw %%mm5, %%mm2\n"
-        "pxor %%mm3, %%mm3\n"
-        "pxor %%mm1, %%mm1\n"
-        "pcmpgtw %%mm0, %%mm3\n\t"
-        "pcmpgtw %%mm2, %%mm1\n\t"
-        "pxor %%mm3, %%mm0\n"
-        "pxor %%mm1, %%mm2\n"
-        "psubw %%mm3, %%mm0\n"
-        "psubw %%mm1, %%mm2\n"
-        "paddw %%mm0, %%mm2\n"
-        "paddw %%mm2, %%mm6\n"
-
-        "add %2, %0\n"
-        "1:\n"
-
-        "movq (%0), %%mm0\n"
-        "movq %%mm0, %%mm1\n"
-        "psllq $8, %%mm0\n"
-        "psrlq $8, %%mm1\n"
-        "psrlq $8, %%mm0\n"
-        "movq %%mm0, %%mm2\n"
-        "movq %%mm1, %%mm3\n"
-        "punpcklbw %%mm7, %%mm0\n"
-        "punpcklbw %%mm7, %%mm1\n"
-        "punpckhbw %%mm7, %%mm2\n"
-        "punpckhbw %%mm7, %%mm3\n"
-        "psubw %%mm1, %%mm0\n"
-        "psubw %%mm3, %%mm2\n"
-        "psubw %%mm0, %%mm4\n"
-        "psubw %%mm2, %%mm5\n"
-        "pxor  %%mm3, %%mm3\n"
-        "pxor  %%mm1, %%mm1\n"
-        "pcmpgtw %%mm4, %%mm3\n\t"
-        "pcmpgtw %%mm5, %%mm1\n\t"
-        "pxor  %%mm3, %%mm4\n"
-        "pxor  %%mm1, %%mm5\n"
-        "psubw %%mm3, %%mm4\n"
-        "psubw %%mm1, %%mm5\n"
-        "paddw %%mm4, %%mm5\n"
-        "paddw %%mm5, %%mm6\n"
-
-        "add %2, %0\n"
-
-        "movq (%0), %%mm4\n"
-        "movq      %%mm4, %%mm1\n"
-        "psllq $8, %%mm4\n"
-        "psrlq $8, %%mm1\n"
-        "psrlq $8, %%mm4\n"
-        "movq      %%mm4, %%mm5\n"
-        "movq      %%mm1, %%mm3\n"
-        "punpcklbw %%mm7, %%mm4\n"
-        "punpcklbw %%mm7, %%mm1\n"
-        "punpckhbw %%mm7, %%mm5\n"
-        "punpckhbw %%mm7, %%mm3\n"
-        "psubw     %%mm1, %%mm4\n"
-        "psubw     %%mm3, %%mm5\n"
-        "psubw     %%mm4, %%mm0\n"
-        "psubw     %%mm5, %%mm2\n"
-        "pxor      %%mm3, %%mm3\n"
-        "pxor      %%mm1, %%mm1\n"
-        "pcmpgtw   %%mm0, %%mm3\n\t"
-        "pcmpgtw   %%mm2, %%mm1\n\t"
-        "pxor      %%mm3, %%mm0\n"
-        "pxor      %%mm1, %%mm2\n"
-        "psubw     %%mm3, %%mm0\n"
-        "psubw     %%mm1, %%mm2\n"
-        "paddw     %%mm0, %%mm2\n"
-        "paddw     %%mm2, %%mm6\n"
-
-        "add  %2, %0\n"
-        "subl $2, %%ecx\n"
-        " jnz 1b\n"
-
-        "movq      %%mm6, %%mm0\n"
-        "punpcklwd %%mm7, %%mm0\n"
-        "punpckhwd %%mm7, %%mm6\n"
-        "paddd     %%mm0, %%mm6\n"
-
-        "movq  %%mm6, %%mm0\n"
-        "psrlq $32,   %%mm6\n"
-        "paddd %%mm6, %%mm0\n"
-        "movd  %%mm0, %1\n"
-        : "+r" (pix1), "=r" (tmp)
-        : "r" ((x86_reg) line_size), "g" (h - 2)
-        : "%ecx");
-
-    return tmp;
-}
-
-static int hf_noise16_mmx(uint8_t *pix1, int line_size, int h)
-{
-    int tmp;
-    uint8_t *pix = pix1;
-
-    __asm__ volatile (
-        "movl %3, %%ecx\n"
-        "pxor %%mm7, %%mm7\n"
-        "pxor %%mm6, %%mm6\n"
-
-        "movq (%0), %%mm0\n"
-        "movq 1(%0), %%mm1\n"
-        "movq %%mm0, %%mm2\n"
-        "movq %%mm1, %%mm3\n"
-        "punpcklbw %%mm7, %%mm0\n"
-        "punpcklbw %%mm7, %%mm1\n"
-        "punpckhbw %%mm7, %%mm2\n"
-        "punpckhbw %%mm7, %%mm3\n"
-        "psubw %%mm1, %%mm0\n"
-        "psubw %%mm3, %%mm2\n"
-
-        "add %2, %0\n"
-
-        "movq (%0), %%mm4\n"
-        "movq 1(%0), %%mm1\n"
-        "movq %%mm4, %%mm5\n"
-        "movq %%mm1, %%mm3\n"
-        "punpcklbw %%mm7, %%mm4\n"
-        "punpcklbw %%mm7, %%mm1\n"
-        "punpckhbw %%mm7, %%mm5\n"
-        "punpckhbw %%mm7, %%mm3\n"
-        "psubw %%mm1, %%mm4\n"
-        "psubw %%mm3, %%mm5\n"
-        "psubw %%mm4, %%mm0\n"
-        "psubw %%mm5, %%mm2\n"
-        "pxor %%mm3, %%mm3\n"
-        "pxor %%mm1, %%mm1\n"
-        "pcmpgtw %%mm0, %%mm3\n\t"
-        "pcmpgtw %%mm2, %%mm1\n\t"
-        "pxor %%mm3, %%mm0\n"
-        "pxor %%mm1, %%mm2\n"
-        "psubw %%mm3, %%mm0\n"
-        "psubw %%mm1, %%mm2\n"
-        "paddw %%mm0, %%mm2\n"
-        "paddw %%mm2, %%mm6\n"
-
-        "add %2, %0\n"
-        "1:\n"
-
-        "movq (%0), %%mm0\n"
-        "movq 1(%0), %%mm1\n"
-        "movq %%mm0, %%mm2\n"
-        "movq %%mm1, %%mm3\n"
-        "punpcklbw %%mm7, %%mm0\n"
-        "punpcklbw %%mm7, %%mm1\n"
-        "punpckhbw %%mm7, %%mm2\n"
-        "punpckhbw %%mm7, %%mm3\n"
-        "psubw %%mm1, %%mm0\n"
-        "psubw %%mm3, %%mm2\n"
-        "psubw %%mm0, %%mm4\n"
-        "psubw %%mm2, %%mm5\n"
-        "pxor %%mm3, %%mm3\n"
-        "pxor %%mm1, %%mm1\n"
-        "pcmpgtw %%mm4, %%mm3\n\t"
-        "pcmpgtw %%mm5, %%mm1\n\t"
-        "pxor %%mm3, %%mm4\n"
-        "pxor %%mm1, %%mm5\n"
-        "psubw %%mm3, %%mm4\n"
-        "psubw %%mm1, %%mm5\n"
-        "paddw %%mm4, %%mm5\n"
-        "paddw %%mm5, %%mm6\n"
-
-        "add %2, %0\n"
-
-        "movq (%0), %%mm4\n"
-        "movq 1(%0), %%mm1\n"
-        "movq %%mm4, %%mm5\n"
-        "movq %%mm1, %%mm3\n"
-        "punpcklbw %%mm7, %%mm4\n"
-        "punpcklbw %%mm7, %%mm1\n"
-        "punpckhbw %%mm7, %%mm5\n"
-        "punpckhbw %%mm7, %%mm3\n"
-        "psubw %%mm1, %%mm4\n"
-        "psubw %%mm3, %%mm5\n"
-        "psubw %%mm4, %%mm0\n"
-        "psubw %%mm5, %%mm2\n"
-        "pxor %%mm3, %%mm3\n"
-        "pxor %%mm1, %%mm1\n"
-        "pcmpgtw %%mm0, %%mm3\n\t"
-        "pcmpgtw %%mm2, %%mm1\n\t"
-        "pxor %%mm3, %%mm0\n"
-        "pxor %%mm1, %%mm2\n"
-        "psubw %%mm3, %%mm0\n"
-        "psubw %%mm1, %%mm2\n"
-        "paddw %%mm0, %%mm2\n"
-        "paddw %%mm2, %%mm6\n"
-
-        "add %2, %0\n"
-        "subl $2, %%ecx\n"
-        " jnz 1b\n"
-
-        "movq %%mm6, %%mm0\n"
-        "punpcklwd %%mm7, %%mm0\n"
-        "punpckhwd %%mm7, %%mm6\n"
-        "paddd %%mm0, %%mm6\n"
+int ff_sum_abs_dctelem_mmx(int16_t *block);
+int ff_sum_abs_dctelem_mmxext(int16_t *block);
+int ff_sum_abs_dctelem_sse2(int16_t *block);
+int ff_sum_abs_dctelem_ssse3(int16_t *block);
+int ff_sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                int line_size, int h);
+int ff_sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                 int line_size, int h);
+int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
+                  int line_size, int h);
+int ff_hf_noise8_mmx(uint8_t *pix1, int lsize, int h);
+int ff_hf_noise16_mmx(uint8_t *pix1, int lsize, int h);
 
-        "movq %%mm6, %%mm0\n"
-        "psrlq $32, %%mm6\n"
-        "paddd %%mm6, %%mm0\n"
-        "movd %%mm0, %1\n"
-        : "+r" (pix1), "=r" (tmp)
-        : "r" ((x86_reg) line_size), "g" (h - 2)
-        : "%ecx");
+#define hadamard_func(cpu)                                              \
+    int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1,     \
+                                  uint8_t *src2, int stride, int h);    \
+    int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1,   \
+                                    uint8_t *src2, int stride, int h);
 
-    return tmp + hf_noise8_mmx(pix + 8, line_size, h);
-}
+hadamard_func(mmx)
+hadamard_func(mmxext)
+hadamard_func(sse2)
+hadamard_func(ssse3)
 
+#if HAVE_YASM
 static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
                       int line_size, int h)
 {
@@ -413,9 +62,9 @@ static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
     if (c)
         score1 = c->mecc.sse[0](c, pix1, pix2, line_size, h);
     else
-        score1 = sse16_mmx(c, pix1, pix2, line_size, h);
-    score2 = hf_noise16_mmx(pix1, line_size, h) -
-             hf_noise16_mmx(pix2, line_size, h);
+        score1 = ff_sse16_mmx(c, pix1, pix2, line_size, h);
+    score2 = ff_hf_noise16_mmx(pix1, line_size, h) + ff_hf_noise8_mmx(pix1+8, line_size, h)
+           - ff_hf_noise16_mmx(pix2, line_size, h) - ff_hf_noise8_mmx(pix2+8, line_size, h);
 
     if (c)
         return score1 + FFABS(score2) * c->avctx->nsse_weight;
@@ -426,9 +75,9 @@ static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
 static int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
                      int line_size, int h)
 {
-    int score1 = sse8_mmx(c, pix1, pix2, line_size, h);
-    int score2 = hf_noise8_mmx(pix1, line_size, h) -
-                 hf_noise8_mmx(pix2, line_size, h);
+    int score1 = ff_sse8_mmx(c, pix1, pix2, line_size, h);
+    int score2 = ff_hf_noise8_mmx(pix1, line_size, h) -
+                 ff_hf_noise8_mmx(pix2, line_size, h);
 
     if (c)
         return score1 + FFABS(score2) * c->avctx->nsse_weight;
@@ -436,13 +85,17 @@ static int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
         return score1 + FFABS(score2) * 8;
 }
 
+#endif /* HAVE_YASM */
+
+#if HAVE_INLINE_ASM
+
 static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
                             int line_size, int h)
 {
     int tmp;
 
-    assert((((int) pix) & 7) == 0);
-    assert((line_size & 7) == 0);
+    av_assert2((((int) pix) & 7) == 0);
+    av_assert2((line_size & 7) == 0);
 
 #define SUM(in0, in1, out0, out1)               \
     "movq (%0), %%mm2\n"                        \
@@ -505,8 +158,8 @@ static int vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
 {
     int tmp;
 
-    assert((((int) pix) & 7) == 0);
-    assert((line_size & 7) == 0);
+    av_assert2((((int) pix) & 7) == 0);
+    av_assert2((line_size & 7) == 0);
 
 #define SUM(in0, in1, out0, out1)               \
     "movq (%0), " #out0 "\n"                    \
@@ -548,9 +201,9 @@ static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
 {
     int tmp;
 
-    assert((((int) pix1) & 7) == 0);
-    assert((((int) pix2) & 7) == 0);
-    assert((line_size & 7) == 0);
+    av_assert2((((int) pix1) & 7) == 0);
+    av_assert2((((int) pix2) & 7) == 0);
+    av_assert2((line_size & 7) == 0);
 
 #define SUM(in0, in1, out0, out1)       \
     "movq (%0), %%mm2\n"                \
@@ -629,9 +282,9 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
 {
     int tmp;
 
-    assert((((int) pix1) & 7) == 0);
-    assert((((int) pix2) & 7) == 0);
-    assert((line_size & 7) == 0);
+    av_assert2((((int) pix1) & 7) == 0);
+    av_assert2((((int) pix2) & 7) == 0);
+    av_assert2((line_size & 7) == 0);
 
 #define SUM(in0, in1, out0, out1)               \
     "movq (%0), " #out0 "\n"                    \
@@ -684,117 +337,6 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
 }
 #undef SUM
 
-#define MMABS_MMX(a,z)                          \
-    "pxor "    #z ", " #z "             \n\t"   \
-    "pcmpgtw " #a ", " #z "             \n\t"   \
-    "pxor "    #z ", " #a "             \n\t"   \
-    "psubw "   #z ", " #a "             \n\t"
-
-#define MMABS_MMXEXT(a, z)                      \
-    "pxor "    #z ", " #z "             \n\t"   \
-    "psubw "   #a ", " #z "             \n\t"   \
-    "pmaxsw "  #z ", " #a "             \n\t"
-
-#define MMABS_SSSE3(a,z)                        \
-    "pabsw "   #a ", " #a "             \n\t"
-
-#define MMABS_SUM(a,z, sum)                     \
-    MMABS(a,z)                                  \
-    "paddusw " #a ", " #sum "           \n\t"
-
-/* FIXME: HSUM_* saturates at 64k, while an 8x8 hadamard or dct block can get
- * up to about 100k on extreme inputs. But that's very unlikely to occur in
- * natural video, and it's even more unlikely to not have any alternative
- * mvs/modes with lower cost. */
-#define HSUM_MMX(a, t, dst)                     \
-    "movq    " #a ", " #t "             \n\t"   \
-    "psrlq      $32, " #a "             \n\t"   \
-    "paddusw " #t ", " #a "             \n\t"   \
-    "movq    " #a ", " #t "             \n\t"   \
-    "psrlq      $16, " #a "             \n\t"   \
-    "paddusw " #t ", " #a "             \n\t"   \
-    "movd    " #a ", " #dst "           \n\t"   \
-
-#define HSUM_MMXEXT(a, t, dst)                  \
-    "pshufw   $0x0E, " #a ", " #t "     \n\t"   \
-    "paddusw " #t ", " #a "             \n\t"   \
-    "pshufw   $0x01, " #a ", " #t "     \n\t"   \
-    "paddusw " #t ", " #a "             \n\t"   \
-    "movd    " #a ", " #dst "           \n\t"   \
-
-#define HSUM_SSE2(a, t, dst)                    \
-    "movhlps " #a ", " #t "             \n\t"   \
-    "paddusw " #t ", " #a "             \n\t"   \
-    "pshuflw  $0x0E, " #a ", " #t "     \n\t"   \
-    "paddusw " #t ", " #a "             \n\t"   \
-    "pshuflw  $0x01, " #a ", " #t "     \n\t"   \
-    "paddusw " #t ", " #a "             \n\t"   \
-    "movd    " #a ", " #dst "           \n\t"   \
-
-#define DCT_SAD4(m, mm, o)                      \
-    "mov"#m" "#o" +  0(%1), " #mm "2    \n\t"   \
-    "mov"#m" "#o" + 16(%1), " #mm "3    \n\t"   \
-    "mov"#m" "#o" + 32(%1), " #mm "4    \n\t"   \
-    "mov"#m" "#o" + 48(%1), " #mm "5    \n\t"   \
-    MMABS_SUM(mm ## 2, mm ## 6, mm ## 0)        \
-    MMABS_SUM(mm ## 3, mm ## 7, mm ## 1)        \
-    MMABS_SUM(mm ## 4, mm ## 6, mm ## 0)        \
-    MMABS_SUM(mm ## 5, mm ## 7, mm ## 1)        \
-
-#define DCT_SAD_MMX                             \
-    "pxor    %%mm0, %%mm0               \n\t"   \
-    "pxor    %%mm1, %%mm1               \n\t"   \
-    DCT_SAD4(q, %%mm, 0)                        \
-    DCT_SAD4(q, %%mm, 8)                        \
-    DCT_SAD4(q, %%mm, 64)                       \
-    DCT_SAD4(q, %%mm, 72)                       \
-    "paddusw %%mm1, %%mm0               \n\t"   \
-    HSUM(%%mm0, %%mm1, %0)
-
-#define DCT_SAD_SSE2                            \
-    "pxor    %%xmm0, %%xmm0             \n\t"   \
-    "pxor    %%xmm1, %%xmm1             \n\t"   \
-    DCT_SAD4(dqa, %%xmm, 0)                     \
-    DCT_SAD4(dqa, %%xmm, 64)                    \
-    "paddusw %%xmm1, %%xmm0             \n\t"   \
-    HSUM(%%xmm0, %%xmm1, %0)
-
-#define DCT_SAD_FUNC(cpu)                           \
-static int sum_abs_dctelem_ ## cpu(int16_t *block)  \
-{                                                   \
-    int sum;                                        \
-    __asm__ volatile (                              \
-        DCT_SAD                                     \
-        :"=r"(sum)                                  \
-        :"r"(block));                               \
-    return sum & 0xFFFF;                            \
-}
-
-#define DCT_SAD         DCT_SAD_MMX
-#define HSUM(a, t, dst) HSUM_MMX(a, t, dst)
-#define MMABS(a, z)     MMABS_MMX(a, z)
-DCT_SAD_FUNC(mmx)
-#undef MMABS
-#undef HSUM
-
-#define HSUM(a, t, dst) HSUM_MMXEXT(a, t, dst)
-#define MMABS(a, z)     MMABS_MMXEXT(a, z)
-DCT_SAD_FUNC(mmxext)
-#undef HSUM
-#undef DCT_SAD
-
-#define DCT_SAD         DCT_SAD_SSE2
-#define HSUM(a, t, dst) HSUM_SSE2(a, t, dst)
-DCT_SAD_FUNC(sse2)
-#undef MMABS
-
-#if HAVE_SSSE3_INLINE
-#define MMABS(a, z)     MMABS_SSSE3(a, z)
-DCT_SAD_FUNC(ssse3)
-#undef MMABS
-#endif
-#undef HSUM
-#undef DCT_SAD
 
 
 DECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = {
@@ -807,7 +349,7 @@ DECLARE_ASM_CONST(8, uint64_t, bone) = 0x0101010101010101LL;
 
 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
 {
-    x86_reg len = -(stride * h);
+    x86_reg len = -(x86_reg)stride * h;
     __asm__ volatile (
         ".p2align 4                     \n\t"
         "1:                             \n\t"
@@ -960,13 +502,14 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
         "sub $2, %0                     \n\t"
         " jg 1b                         \n\t"
         : "+r" (h), "+r" (blk1), "+r" (blk2)
-        : "r" ((x86_reg) stride));
+        : "r" ((x86_reg) stride)
+          NAMED_CONSTRAINTS_ADD(bone));
 }
 
 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
                               int stride, int h)
 {
-    x86_reg len = -(stride * h);
+    x86_reg len = -(x86_reg)stride * h;
     __asm__ volatile (
         ".p2align 4                     \n\t"
         "1:                             \n\t"
@@ -1004,7 +547,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
 
 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
 {
-    x86_reg len = -(stride * h);
+    x86_reg len = -(x86_reg)stride * h;
     __asm__ volatile (
         "movq  (%1, %%"REG_a"), %%mm0   \n\t"
         "movq 1(%1, %%"REG_a"), %%mm2   \n\t"
@@ -1028,7 +571,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
         "punpckhbw %%mm7, %%mm5         \n\t"
         "paddw %%mm4, %%mm2             \n\t"
         "paddw %%mm5, %%mm3             \n\t"
-        "movq 16+"MANGLE(round_tab)", %%mm5 \n\t"
+        "movq %5, %%mm5                 \n\t"
         "paddw %%mm2, %%mm0             \n\t"
         "paddw %%mm3, %%mm1             \n\t"
         "paddw %%mm5, %%mm0             \n\t"
@@ -1052,7 +595,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
         " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len),
-          "r" ((x86_reg) stride));
+          "r" ((x86_reg) stride), "m" (round_tab[2]));
 }
 
 static inline int sum_mmx(void)
@@ -1093,7 +636,7 @@ static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
 static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2,               \
                         uint8_t *blk1, int stride, int h)               \
 {                                                                       \
-    assert(h == 8);                                                     \
+    av_assert2(h == 8);                                                     \
     __asm__ volatile (                                                  \
         "pxor %%mm7, %%mm7     \n\t"                                    \
         "pxor %%mm6, %%mm6     \n\t"                                    \
@@ -1107,7 +650,7 @@ static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2,               \
 static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2,            \
                            uint8_t *blk1, int stride, int h)            \
 {                                                                       \
-    assert(h == 8);                                                     \
+    av_assert2(h == 8);                                                     \
     __asm__ volatile (                                                  \
         "pxor %%mm7, %%mm7     \n\t"                                    \
         "pxor %%mm6, %%mm6     \n\t"                                    \
@@ -1122,7 +665,7 @@ static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2,            \
 static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2,            \
                            uint8_t *blk1, int stride, int h)            \
 {                                                                       \
-    assert(h == 8);                                                     \
+    av_assert2(h == 8);                                                     \
     __asm__ volatile (                                                  \
         "pxor %%mm7, %%mm7     \n\t"                                    \
         "pxor %%mm6, %%mm6     \n\t"                                    \
@@ -1137,7 +680,7 @@ static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2,            \
 static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2,           \
                             uint8_t *blk1, int stride, int h)           \
 {                                                                       \
-    assert(h == 8);                                                     \
+    av_assert2(h == 8);                                                     \
     __asm__ volatile (                                                  \
         "pxor %%mm7, %%mm7     \n\t"                                    \
         "pxor %%mm6, %%mm6     \n\t"                                    \
@@ -1211,28 +754,12 @@ PIX_SAD(mmxext)
 
 #endif /* HAVE_INLINE_ASM */
 
-int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
-                  int line_size, int h);
-
-#define hadamard_func(cpu)                                              \
-    int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1,     \
-                                  uint8_t *src2, int stride, int h);    \
-    int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1,   \
-                                    uint8_t *src2, int stride, int h);
-
-hadamard_func(mmx)
-hadamard_func(mmxext)
-hadamard_func(sse2)
-hadamard_func(ssse3)
-
 av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
 {
     int cpu_flags = av_get_cpu_flags();
 
 #if HAVE_INLINE_ASM
     if (INLINE_MMX(cpu_flags)) {
-        c->sum_abs_dctelem = sum_abs_dctelem_mmx;
-
         c->pix_abs[0][0] = sad16_mmx;
         c->pix_abs[0][1] = sad16_x2_mmx;
         c->pix_abs[0][2] = sad16_y2_mmx;
@@ -1245,21 +772,14 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
         c->sad[0] = sad16_mmx;
         c->sad[1] = sad8_mmx;
 
-        c->sse[0]  = sse16_mmx;
-        c->sse[1]  = sse8_mmx;
         c->vsad[4] = vsad_intra16_mmx;
 
-        c->nsse[0] = nsse16_mmx;
-        c->nsse[1] = nsse8_mmx;
-
         if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
             c->vsad[0] = vsad16_mmx;
         }
     }
 
     if (INLINE_MMXEXT(cpu_flags)) {
-        c->sum_abs_dctelem = sum_abs_dctelem_mmxext;
-
         c->vsad[4] = vsad_intra16_mmxext;
 
         c->pix_abs[0][0] = sad16_mmxext;
@@ -1268,45 +788,46 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
         c->sad[0] = sad16_mmxext;
         c->sad[1] = sad8_mmxext;
 
+        c->pix_abs[0][1] = sad16_x2_mmxext;
+        c->pix_abs[0][2] = sad16_y2_mmxext;
+        c->pix_abs[1][1] = sad8_x2_mmxext;
+        c->pix_abs[1][2] = sad8_y2_mmxext;
+
         if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
-            c->pix_abs[0][1] = sad16_x2_mmxext;
-            c->pix_abs[0][2] = sad16_y2_mmxext;
             c->pix_abs[0][3] = sad16_xy2_mmxext;
-            c->pix_abs[1][1] = sad8_x2_mmxext;
-            c->pix_abs[1][2] = sad8_y2_mmxext;
             c->pix_abs[1][3] = sad8_xy2_mmxext;
 
             c->vsad[0] = vsad16_mmxext;
         }
     }
 
-    if (INLINE_SSE2(cpu_flags)) {
-        c->sum_abs_dctelem = sum_abs_dctelem_sse2;
-    }
-
-    if (INLINE_SSE2(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_3DNOW)) {
+    if (INLINE_SSE2(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_SSE2SLOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
         c->sad[0] = sad16_sse2;
     }
 
-#if HAVE_SSSE3_INLINE
-    if (INLINE_SSSE3(cpu_flags)) {
-        c->sum_abs_dctelem = sum_abs_dctelem_ssse3;
-    }
-#endif
 #endif /* HAVE_INLINE_ASM */
 
     if (EXTERNAL_MMX(cpu_flags)) {
         c->hadamard8_diff[0] = ff_hadamard8_diff16_mmx;
         c->hadamard8_diff[1] = ff_hadamard8_diff_mmx;
+        c->sum_abs_dctelem   = ff_sum_abs_dctelem_mmx;
+        c->sse[0]            = ff_sse16_mmx;
+        c->sse[1]            = ff_sse8_mmx;
+#if HAVE_YASM
+        c->nsse[0]           = nsse16_mmx;
+        c->nsse[1]           = nsse8_mmx;
+#endif
     }
 
     if (EXTERNAL_MMXEXT(cpu_flags)) {
         c->hadamard8_diff[0] = ff_hadamard8_diff16_mmxext;
         c->hadamard8_diff[1] = ff_hadamard8_diff_mmxext;
+        c->sum_abs_dctelem   = ff_sum_abs_dctelem_mmxext;
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->sse[0] = ff_sse16_sse2;
+        c->sum_abs_dctelem   = ff_sum_abs_dctelem_sse2;
 
 #if HAVE_ALIGNED_STACK
         c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2;
@@ -1314,8 +835,11 @@ av_cold void ff_me_cmp_init_x86(MECmpContext *c, AVCodecContext *avctx)
 #endif
     }
 
-    if (EXTERNAL_SSSE3(cpu_flags) && HAVE_ALIGNED_STACK) {
+    if (EXTERNAL_SSSE3(cpu_flags)) {
+        c->sum_abs_dctelem   = ff_sum_abs_dctelem_ssse3;
+#if HAVE_ALIGNED_STACK
         c->hadamard8_diff[0] = ff_hadamard8_diff16_ssse3;
         c->hadamard8_diff[1] = ff_hadamard8_diff_ssse3;
+#endif
     }
 }
diff --git a/libavcodec/x86/mlpdsp.c b/libavcodec/x86/mlpdsp.c
index 72fc637..b473625 100644
--- a/libavcodec/x86/mlpdsp.c
+++ b/libavcodec/x86/mlpdsp.c
@@ -2,32 +2,31 @@
  * MLP DSP functions x86-optimized
  * Copyright (c) 2009 Ramiro Polla
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
-#include "libavutil/internal.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/mlpdsp.h"
 #include "libavcodec/mlp.h"
 
-#if HAVE_7REGS && HAVE_INLINE_ASM
+#if HAVE_7REGS && HAVE_INLINE_ASM && HAVE_INLINE_ASM_NONLOCAL_LABELS
 
 extern char ff_mlp_firorder_8;
 extern char ff_mlp_firorder_7;
@@ -179,7 +178,7 @@ static void mlp_filter_channel_x86(int32_t *state, const int32_t *coeff,
 
 av_cold void ff_mlpdsp_init_x86(MLPDSPContext *c)
 {
-#if HAVE_7REGS && HAVE_INLINE_ASM
+#if HAVE_7REGS && HAVE_INLINE_ASM && HAVE_INLINE_ASM_NONLOCAL_LABELS
     int cpu_flags = av_get_cpu_flags();
     if (INLINE_MMX(cpu_flags))
         c->mlp_filter_channel = mlp_filter_channel_x86;
diff --git a/libavcodec/x86/mpegaudiodsp.c b/libavcodec/x86/mpegaudiodsp.c
index 533b4a7..2723167 100644
--- a/libavcodec/x86/mpegaudiodsp.c
+++ b/libavcodec/x86/mpegaudiodsp.c
@@ -2,20 +2,20 @@
  * SIMD-optimized MP3 decoding functions
  * Copyright (c) 2010 Vitor Sessak
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,11 +26,18 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/mpegaudiodsp.h"
 
-void ff_imdct36_float_sse(float *out, float *buf, float *in, float *win);
-void ff_imdct36_float_sse2(float *out, float *buf, float *in, float *win);
-void ff_imdct36_float_sse3(float *out, float *buf, float *in, float *win);
-void ff_imdct36_float_ssse3(float *out, float *buf, float *in, float *win);
-void ff_imdct36_float_avx(float *out, float *buf, float *in, float *win);
+#define DECL(CPU)\
+static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
+void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
+
+#if ARCH_X86_32
+DECL(sse)
+#endif
+DECL(sse2)
+DECL(sse3)
+DECL(ssse3)
+DECL(avx)
+
 void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
                                float *tmpbuf);
 void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
@@ -38,7 +45,7 @@ void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
 
 DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
 
-#if HAVE_SSE2_INLINE
+#if HAVE_6REGS && HAVE_SSE_INLINE
 
 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
@@ -182,7 +189,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
     *out = sum;
 }
 
-#endif /* HAVE_SSE2_INLINE */
+#endif /* HAVE_6REGS && HAVE_SSE_INLINE */
 
 #if HAVE_YASM
 #define DECL_IMDCT_BLOCKS(CPU1, CPU2)                                       \
@@ -217,11 +224,17 @@ static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in,      \
     }                                                                   \
 }
 
+#if HAVE_SSE
+#if ARCH_X86_32
 DECL_IMDCT_BLOCKS(sse,sse)
+#endif
 DECL_IMDCT_BLOCKS(sse2,sse)
 DECL_IMDCT_BLOCKS(sse3,sse)
 DECL_IMDCT_BLOCKS(ssse3,sse)
+#endif
+#if HAVE_AVX_EXTERNAL
 DECL_IMDCT_BLOCKS(avx,avx)
+#endif
 #endif /* HAVE_YASM */
 
 av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
@@ -242,16 +255,19 @@ av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
         }
     }
 
-#if HAVE_SSE2_INLINE
-    if (INLINE_SSE2(cpu_flags)) {
+#if HAVE_6REGS && HAVE_SSE_INLINE
+    if (INLINE_SSE(cpu_flags)) {
         s->apply_window_float = apply_window_mp3;
     }
-#endif /* HAVE_SSE2_INLINE */
+#endif /* HAVE_SSE_INLINE */
 
 #if HAVE_YASM
+#if HAVE_SSE
+#if ARCH_X86_32
     if (EXTERNAL_SSE(cpu_flags)) {
         s->imdct36_blocks_float = imdct36_blocks_sse;
     }
+#endif
     if (EXTERNAL_SSE2(cpu_flags)) {
         s->imdct36_blocks_float = imdct36_blocks_sse2;
     }
@@ -261,8 +277,11 @@ av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
     if (EXTERNAL_SSSE3(cpu_flags)) {
         s->imdct36_blocks_float = imdct36_blocks_ssse3;
     }
+#endif
+#if HAVE_AVX_EXTERNAL
     if (EXTERNAL_AVX(cpu_flags)) {
         s->imdct36_blocks_float = imdct36_blocks_avx;
     }
+#endif
 #endif /* HAVE_YASM */
 }
diff --git a/libavcodec/x86/mpegvideo.c b/libavcodec/x86/mpegvideo.c
index db94ffa..9353a82 100644
--- a/libavcodec/x86/mpegvideo.c
+++ b/libavcodec/x86/mpegvideo.c
@@ -2,20 +2,20 @@
  * Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
  * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,7 +26,7 @@
 #include "libavcodec/avcodec.h"
 #include "libavcodec/mpegvideo.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
 
 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
                                   int16_t *block, int n, int qscale)
@@ -35,7 +35,7 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
 
     qmul = qscale << 1;
 
-    assert(s->block_last_index[n]>=0 || s->h263_aic);
+    av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
     if (!s->h263_aic) {
         if (n < 4)
@@ -111,7 +111,7 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
     qmul = qscale << 1;
     qadd = (qscale - 1) | 1;
 
-    assert(s->block_last_index[n]>=0 || s->h263_aic);
+    av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
 
     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
 
@@ -171,7 +171,7 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
     const uint16_t *quant_matrix;
     int block0;
 
-    assert(s->block_last_index[n]>=0);
+    av_assert2(s->block_last_index[n]>=0);
 
     nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
 
@@ -239,7 +239,7 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
     x86_reg nCoeffs;
     const uint16_t *quant_matrix;
 
-    assert(s->block_last_index[n]>=0);
+    av_assert2(s->block_last_index[n]>=0);
 
     nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
 
@@ -306,7 +306,7 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
     const uint16_t *quant_matrix;
     int block0;
 
-    assert(s->block_last_index[n]>=0);
+    av_assert2(s->block_last_index[n]>=0);
 
     if(s->alternate_scan) nCoeffs= 63; //FIXME
     else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
@@ -371,7 +371,7 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
     x86_reg nCoeffs;
     const uint16_t *quant_matrix;
 
-    assert(s->block_last_index[n]>=0);
+    av_assert2(s->block_last_index[n]>=0);
 
     if(s->alternate_scan) nCoeffs= 63; //FIXME
     else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
@@ -442,11 +442,11 @@ __asm__ volatile(
         );
 }
 
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_MMX_INLINE */
 
 av_cold void ff_MPV_common_init_x86(MpegEncContext *s)
 {
-#if HAVE_INLINE_ASM
+#if HAVE_MMX_INLINE
     int cpu_flags = av_get_cpu_flags();
 
     if (INLINE_MMX(cpu_flags)) {
@@ -458,5 +458,5 @@ av_cold void ff_MPV_common_init_x86(MpegEncContext *s)
             s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
         s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
     }
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_MMX_INLINE */
 }
diff --git a/libavcodec/x86/mpegvideodsp.c b/libavcodec/x86/mpegvideodsp.c
index 0e5dd0f..941a8e2 100644
--- a/libavcodec/x86/mpegvideodsp.c
+++ b/libavcodec/x86/mpegvideodsp.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -22,6 +22,7 @@
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/mpegvideodsp.h"
+#include "libavcodec/videodsp.h"
 
 #if HAVE_INLINE_ASM
 
@@ -43,20 +44,24 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src,
     const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
     const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
     const uint64_t shift2  = 2 * shift;
+#define MAX_STRIDE 4096U
+#define MAX_H 8U
+    uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE];
     int x, y;
 
     const int dxw = (dxx - (1 << (16 + shift))) * (w - 1);
     const int dyh = (dyy - (1 << (16 + shift))) * (h - 1);
     const int dxh = dxy * (h - 1);
     const int dyw = dyx * (w - 1);
+    int need_emu  =  (unsigned) ix >= width  - w ||
+                     (unsigned) iy >= height - h;
 
     if ( // non-constant fullpel offset (3% of blocks)
         ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
          (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift) ||
         // uses more than 16 bits of subpel mv (only at huge resolution)
         (dxx | dxy | dyx | dyy) & 15 ||
-        (unsigned) ix >= width  - w ||
-        (unsigned) iy >= height - h) {
+        (need_emu && (h > MAX_H || stride > MAX_STRIDE))) {
         // FIXME could still use mmx for some of the rows
         ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy,
                  shift, r, width, height);
@@ -64,6 +69,10 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src,
     }
 
     src += ix + iy * stride;
+    if (need_emu) {
+        ff_emulated_edge_mc_8(edge_buf, src, stride, stride, w + 1, h + 1, ix, iy, width, height);
+        src = edge_buf;
+    }
 
     __asm__ volatile (
         "movd         %0, %%mm6         \n\t"
@@ -150,4 +159,3 @@ av_cold void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c)
         c->gmc = gmc_mmx;
 #endif /* HAVE_INLINE_ASM */
 }
-
diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c
index 2e4f06c..b410511 100644
--- a/libavcodec/x86/mpegvideoenc.c
+++ b/libavcodec/x86/mpegvideoenc.c
@@ -2,20 +2,20 @@
  * The simplest mpeg encoder (well, it was the simplest!)
  * Copyright (c) 2000,2001 Fabrice Bellard
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,6 +30,8 @@
 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
 DECLARE_ALIGNED(16, static uint16_t, inv_zigzag_direct16)[64];
 
+#if HAVE_6REGS
+
 #if HAVE_MMX_INLINE
 #define COMPILE_TEMPLATE_MMXEXT 0
 #define COMPILE_TEMPLATE_SSE2   0
@@ -81,6 +83,8 @@ DECLARE_ALIGNED(16, static uint16_t, inv_zigzag_direct16)[64];
 #include "mpegvideoenc_template.c"
 #endif /* HAVE_SSSE3_INLINE */
 
+#endif /* HAVE_6REGS */
+
 #if HAVE_INLINE_ASM
 static void  denoise_dct_mmx(MpegEncContext *s, int16_t *block){
     const int intra= s->mb_intra;
@@ -193,7 +197,7 @@ static void  denoise_dct_sse2(MpegEncContext *s, int16_t *block){
 }
 #endif /* HAVE_INLINE_ASM */
 
-av_cold void ff_MPV_encode_init_x86(MpegEncContext *s)
+av_cold void ff_dct_encode_init_x86(MpegEncContext *s)
 {
     const int dct_algo = s->avctx->dct_algo;
     int i;
@@ -205,21 +209,25 @@ av_cold void ff_MPV_encode_init_x86(MpegEncContext *s)
 #if HAVE_MMX_INLINE
         int cpu_flags = av_get_cpu_flags();
         if (INLINE_MMX(cpu_flags)) {
+#if HAVE_6REGS
             s->dct_quantize = dct_quantize_mmx;
+#endif
             s->denoise_dct  = denoise_dct_mmx;
         }
 #endif
-#if HAVE_MMXEXT_INLINE
+#if HAVE_6REGS && HAVE_MMXEXT_INLINE
         if (INLINE_MMXEXT(cpu_flags))
             s->dct_quantize = dct_quantize_mmxext;
 #endif
 #if HAVE_SSE2_INLINE
         if (INLINE_SSE2(cpu_flags)) {
+#if HAVE_6REGS
             s->dct_quantize = dct_quantize_sse2;
+#endif
             s->denoise_dct  = denoise_dct_sse2;
         }
 #endif
-#if HAVE_SSSE3_INLINE
+#if HAVE_6REGS && HAVE_SSSE3_INLINE
         if (INLINE_SSSE3(cpu_flags))
             s->dct_quantize = dct_quantize_ssse3;
 #endif
diff --git a/libavcodec/x86/mpegvideoenc_qns_template.c b/libavcodec/x86/mpegvideoenc_qns_template.c
index 8d8d687..882d486 100644
--- a/libavcodec/x86/mpegvideoenc_qns_template.c
+++ b/libavcodec/x86/mpegvideoenc_qns_template.c
@@ -5,26 +5,26 @@
  * MMX optimization by Michael Niedermayer <michaelni@gmx.at>
  * 3DNow! and SSSE3 optimization by Zuxy Meng <zuxy.meng@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include <assert.h>
 #include <stdint.h>
 
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/x86/asm.h"
 
@@ -36,7 +36,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
 {
     x86_reg i=0;
 
-    assert(FFABS(scale) < MAX_ABS);
+    av_assert2(FFABS(scale) < MAX_ABS);
     scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
 
     SET_RND(mm6);
diff --git a/libavcodec/x86/mpegvideoenc_template.c b/libavcodec/x86/mpegvideoenc_template.c
index 1274c13..1899ba2 100644
--- a/libavcodec/x86/mpegvideoenc_template.c
+++ b/libavcodec/x86/mpegvideoenc_template.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -107,7 +107,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
     const uint16_t *qmat, *bias;
     LOCAL_ALIGNED_16(int16_t, temp_block, [64]);
 
-    assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
+    av_assert2((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
 
     //s->fdct (block);
     RENAME_FDCT(ff_fdct)(block); // cannot be anything else ...
@@ -117,10 +117,15 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
 
     if (s->mb_intra) {
         int dummy;
-        if (n < 4)
+        if (n < 4){
             q = s->y_dc_scale;
-        else
+            bias = s->q_intra_matrix16[qscale][1];
+            qmat = s->q_intra_matrix16[qscale][0];
+        }else{
             q = s->c_dc_scale;
+            bias = s->q_chroma_intra_matrix16[qscale][1];
+            qmat = s->q_chroma_intra_matrix16[qscale][0];
+        }
         /* note: block[0] is assumed to be positive */
         if (!s->h263_aic) {
         __asm__ volatile (
@@ -135,8 +140,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
         block[0]=0; //avoid fake overflow
 //        temp_block[0] = (block[0] + (q >> 1)) / q;
         last_non_zero_p1 = 1;
-        bias = s->q_intra_matrix16[qscale][1];
-        qmat = s->q_intra_matrix16[qscale][0];
     } else {
         last_non_zero_p1 = 0;
         bias = s->q_inter_matrix16[qscale][1];
@@ -172,7 +175,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
             " js 1b                             \n\t"
             PMAX(MM"3", MM"0")
             "movd "MM"3, %%"REG_a"              \n\t"
-            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
+            "movzbl %%al, %%eax                 \n\t" // last_non_zero_p1
             : "+a" (last_non_zero_p1)
             : "r" (block+64), "r" (qmat), "r" (bias),
               "r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
@@ -206,7 +209,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
             " js 1b                             \n\t"
             PMAX(MM"3", MM"0")
             "movd "MM"3, %%"REG_a"              \n\t"
-            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
+            "movzbl %%al, %%eax                 \n\t" // last_non_zero_p1
             : "+a" (last_non_zero_p1)
             : "r" (block+64), "r" (qmat+64), "r" (bias+64),
               "r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
@@ -220,7 +223,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
         "psubusw "MM"1, "MM"4               \n\t"
         "packuswb "MM"4, "MM"4              \n\t"
 #if COMPILE_TEMPLATE_SSE2
-        "packuswb "MM"4, "MM"4              \n\t"
+        "packsswb "MM"4, "MM"4              \n\t"
 #endif
         "movd "MM"4, %0                     \n\t" // *overflow
         : "=g" (*overflow)
@@ -274,6 +277,50 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
         block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
         block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
         block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
+    }else if(s->idsp.perm_type == FF_IDCT_PERM_LIBMPEG2){
+        if(last_non_zero_p1 <= 1) goto end;
+        block[0x04] = temp_block[0x01];
+        block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
+        if(last_non_zero_p1 <= 4) goto end;
+        block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
+        block[0x05] = temp_block[0x03];
+        if(last_non_zero_p1 <= 7) goto end;
+        block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
+        block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
+        if(last_non_zero_p1 <= 11) goto end;
+        block[0x1C] = temp_block[0x19];
+        block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
+        block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
+        if(last_non_zero_p1 <= 16) goto end;
+        block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
+        block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
+        block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+        block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
+        if(last_non_zero_p1 <= 24) goto end;
+        block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
+        block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
+        block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
+        block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
+        if(last_non_zero_p1 <= 32) goto end;
+        block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
+        block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+        block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
+        block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
+        if(last_non_zero_p1 <= 40) goto end;
+        block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
+        block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+        block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
+        block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
+        if(last_non_zero_p1 <= 48) goto end;
+        block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
+        block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
+            block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+        block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
+        if(last_non_zero_p1 <= 56) goto end;
+        block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
+        block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
+        block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
+        block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
     }else{
         if(last_non_zero_p1 <= 1) goto end;
         block[0x01] = temp_block[0x01];
diff --git a/libavcodec/x86/mpegvideoencdsp.asm b/libavcodec/x86/mpegvideoencdsp.asm
index 9326ee7..4fe6cfe 100644
--- a/libavcodec/x86/mpegvideoencdsp.asm
+++ b/libavcodec/x86/mpegvideoencdsp.asm
@@ -4,92 +4,134 @@
 ;* Copyright (c) 2000, 2001 Fabrice Bellard
 ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;*****************************************************************************
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION .text
+SECTION_RODATA
 
-INIT_MMX mmx
+cextern pw_1
+
+SECTION .text
 ; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
-cglobal pix_sum16, 2, 3
+; %1 = number of xmm registers used
+; %2 = number of loops
+; %3 = number of GPRs used
+%macro PIX_SUM16 4
+cglobal pix_sum16, 2, %3, %1
     movsxdifnidn r1, r1d
-    mov          r2, r1
-    neg          r2
-    shl          r2, 4
-    sub          r0, r2
-    pxor         m7, m7
-    pxor         m6, m6
+    mov          r2, %2
+%if cpuflag(xop)
+    lea          r3, [r1*3]
+%else
+    pxor         m5, m5
+%endif
+    pxor         m4, m4
 .loop:
-    mova         m0, [r0+r2+0]
-    mova         m1, [r0+r2+0]
-    mova         m2, [r0+r2+8]
-    mova         m3, [r0+r2+8]
-    punpcklbw    m0, m7
-    punpckhbw    m1, m7
-    punpcklbw    m2, m7
-    punpckhbw    m3, m7
+%if cpuflag(xop)
+    vphaddubq    m0, [r0]
+    vphaddubq    m1, [r0+r1]
+    vphaddubq    m2, [r0+r1*2]
+    vphaddubq    m3, [r0+r3]
+%else
+    mova         m0, [r0]
+%if mmsize == 8
+    mova         m1, [r0+8]
+%else
+    mova         m1, [r0+r1]
+%endif
+    punpckhbw    m2, m0, m5
+    punpcklbw    m0, m5
+    punpckhbw    m3, m1, m5
+    punpcklbw    m1, m5
+%endif ; cpuflag(xop)
     paddw        m1, m0
     paddw        m3, m2
     paddw        m3, m1
-    paddw        m6, m3
-    add          r2, r1
-    js .loop
-    mova         m5, m6
-    psrlq        m6, 32
-    paddw        m6, m5
-    mova         m5, m6
-    psrlq        m6, 16
-    paddw        m6, m5
-    movd        eax, m6
-    and         eax, 0xffff
+    paddw        m4, m3
+%if mmsize == 8
+    add          r0, r1
+%else
+    lea          r0, [r0+r1*%4]
+%endif
+    dec r2
+    jne .loop
+%if cpuflag(xop)
+    pshufd       m0, m4, q0032
+    paddd        m4, m0
+%else
+    HADDW        m4, m5
+%endif
+    movd        eax, m4
     RET
+%endmacro
 
 INIT_MMX mmx
+PIX_SUM16 0, 16, 3, 0
+INIT_XMM sse2
+PIX_SUM16 6, 8,  3, 2
+%if HAVE_XOP_EXTERNAL
+INIT_XMM xop
+PIX_SUM16 5, 4,  4, 4
+%endif
+
 ; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
-cglobal pix_norm1, 2, 4
+; %1 = number of xmm registers used
+; %2 = number of loops
+%macro PIX_NORM1 2
+cglobal pix_norm1, 2, 3, %1
     movsxdifnidn r1, r1d
-    mov          r2, 16
+    mov          r2, %2
     pxor         m0, m0
-    pxor         m7, m7
+    pxor         m5, m5
 .loop:
     mova         m2, [r0+0]
+%if mmsize == 8
     mova         m3, [r0+8]
-    mova         m1, m2
-    punpckhbw    m1, m0
+%else
+    mova         m3, [r0+r1]
+%endif
+    punpckhbw    m1, m2, m0
     punpcklbw    m2, m0
-    mova         m4, m3
-    punpckhbw    m3, m0
-    punpcklbw    m4, m0
+    punpckhbw    m4, m3, m0
+    punpcklbw    m3, m0
     pmaddwd      m1, m1
     pmaddwd      m2, m2
     pmaddwd      m3, m3
     pmaddwd      m4, m4
     paddd        m2, m1
     paddd        m4, m3
-    paddd        m7, m2
+    paddd        m5, m2
+    paddd        m5, m4
+%if mmsize == 8
     add          r0, r1
-    paddd        m7, m4
+%else
+    lea          r0, [r0+r1*2]
+%endif
     dec r2
     jne .loop
-    mova         m1, m7
-    psrlq        m7, 32
-    paddd        m1, m7
-    movd        eax, m1
+    HADDD        m5, m1
+    movd        eax, m5
     RET
+%endmacro
+
+INIT_MMX mmx
+PIX_NORM1 0, 16
+INIT_XMM sse2
+PIX_NORM1 6, 8
 
diff --git a/libavcodec/x86/mpegvideoencdsp_init.c b/libavcodec/x86/mpegvideoencdsp_init.c
index 7732e73..d91b902 100644
--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
@@ -1,29 +1,33 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/mpegvideoencdsp.h"
 
 int ff_pix_sum16_mmx(uint8_t *pix, int line_size);
+int ff_pix_sum16_sse2(uint8_t *pix, int line_size);
+int ff_pix_sum16_xop(uint8_t *pix, int line_size);
 int ff_pix_norm1_mmx(uint8_t *pix, int line_size);
+int ff_pix_norm1_sse2(uint8_t *pix, int line_size);
 
 #if HAVE_INLINE_ASM
 
@@ -123,7 +127,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
             : "+r" (ptr)
             : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
               "r" (ptr + wrap * height));
-    } else {
+    } else if (w == 16) {
         __asm__ volatile (
             "1:                                 \n\t"
             "movd            (%0), %%mm0        \n\t"
@@ -141,6 +145,25 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
             "add               %1, %0           \n\t"
             "cmp               %3, %0           \n\t"
             "jb                1b               \n\t"
+            : "+r"(ptr)
+            : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
+            );
+    } else {
+        av_assert1(w == 4);
+        __asm__ volatile (
+            "1:                             \n\t"
+            "movd            (%0), %%mm0    \n\t"
+            "punpcklbw      %%mm0, %%mm0    \n\t"
+            "punpcklwd      %%mm0, %%mm0    \n\t"
+            "movd           %%mm0, -4(%0)   \n\t"
+            "movd      -4(%0, %2), %%mm1    \n\t"
+            "punpcklbw      %%mm1, %%mm1    \n\t"
+            "punpckhwd      %%mm1, %%mm1    \n\t"
+            "punpckhdq      %%mm1, %%mm1    \n\t"
+            "movd           %%mm1, (%0, %2) \n\t"
+            "add               %1, %0       \n\t"
+            "cmp               %3, %0       \n\t"
+            "jb                1b           \n\t"
             : "+r" (ptr)
             : "r" ((x86_reg) wrap), "r" ((x86_reg) width),
               "r" (ptr + wrap * height));
@@ -200,6 +223,15 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
         c->pix_norm1 = ff_pix_norm1_mmx;
     }
 
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->pix_sum     = ff_pix_sum16_sse2;
+        c->pix_norm1   = ff_pix_norm1_sse2;
+    }
+
+    if (EXTERNAL_XOP(cpu_flags)) {
+        c->pix_sum     = ff_pix_sum16_xop;
+    }
+
 #if HAVE_INLINE_ASM
 
     if (INLINE_MMX(cpu_flags)) {
diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm
index c8fd1b2..00ee9b4 100644
--- a/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@ -4,20 +4,20 @@
 ;* Copyright (c) 2000, 2001 Fabrice Bellard
 ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;*****************************************************************************
 
@@ -51,7 +51,7 @@ cglobal get_pixels, 3,4
     REP_RET
 
 INIT_XMM sse2
-cglobal get_pixels, 3, 4
+cglobal get_pixels, 3, 4, 5
     movsxdifnidn r2, r2d
     lea          r3, [r2*3]
     pxor         m4, m4
@@ -108,3 +108,28 @@ cglobal diff_pixels, 4,5
     add          r4, 16
     jne .loop
     REP_RET
+
+INIT_XMM sse2
+cglobal diff_pixels, 4, 5, 5
+    movsxdifnidn r3, r3d
+    pxor         m4, m4
+    add          r0,  128
+    mov          r4, -128
+.loop:
+    movh         m0, [r1]
+    movh         m2, [r2]
+    movh         m1, [r1+r3]
+    movh         m3, [r2+r3]
+    punpcklbw    m0, m4
+    punpcklbw    m1, m4
+    punpcklbw    m2, m4
+    punpcklbw    m3, m4
+    psubw        m0, m2
+    psubw        m1, m3
+    mova [r0+r4+0 ], m0
+    mova [r0+r4+16], m1
+    lea          r1, [r1+r3*2]
+    lea          r2, [r2+r3*2]
+    add          r4, 32
+    jne .loop
+    RET
diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c
index 9582e0b..4c31b80 100644
--- a/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@ -1,20 +1,20 @@
 /*
  * SIMD-optimized pixel operations
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -27,6 +27,8 @@ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size);
 void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size);
 void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                         int stride);
+void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
+                         int stride);
 
 av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
                                      AVCodecContext *avctx,
@@ -43,5 +45,6 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
     if (EXTERNAL_SSE2(cpu_flags)) {
         if (!high_bit_depth)
             c->get_pixels = ff_get_pixels_sse2;
+        c->diff_pixels = ff_diff_pixels_sse2;
     }
 }
diff --git a/libavcodec/x86/pngdsp.asm b/libavcodec/x86/pngdsp.asm
index c05f3da..8e23ccf 100644
--- a/libavcodec/x86/pngdsp.asm
+++ b/libavcodec/x86/pngdsp.asm
@@ -4,20 +4,20 @@
 ;* Copyright (c) 2008 Loren Merritt <lorenm@u.washington.edu>
 ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/pngdsp_init.c b/libavcodec/x86/pngdsp_init.c
index 34a3da3..7dca62c 100644
--- a/libavcodec/x86/pngdsp_init.c
+++ b/libavcodec/x86/pngdsp_init.c
@@ -2,20 +2,20 @@
  * x86 PNG optimizations.
  * Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm
index a0e97b3..255eb24 100644
--- a/libavcodec/x86/proresdsp.asm
+++ b/libavcodec/x86/proresdsp.asm
@@ -1,23 +1,24 @@
 ;******************************************************************************
 ;* x86-SIMD-optimized IDCT for prores
-;* this is identical to "simple" IDCT except for the clip range
+;* this is identical to "simple" IDCT written by Michael Niedermayer
+;* except for the clip range
 ;*
 ;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -47,10 +48,10 @@ w1_plus_w5: times 4 dw W1sh2, +W5sh2
 w5_min_w1:  times 4 dw W5sh2, -W1sh2
 w5_plus_w7: times 4 dw W5sh2, +W7sh2
 w7_min_w5:  times 4 dw W7sh2, -W5sh2
-row_round:  times 8 dw (1<<14)
+pw_88:      times 8 dw 0x2008
 
+cextern pw_1
 cextern pw_4
-cextern pw_8
 cextern pw_512
 cextern pw_1019
 
@@ -91,14 +92,12 @@ section .text align=16
     ; a2 -= W6 * row[2];
     ; a3 -= W2 * row[2];
 %ifidn %1, col
-    paddw       m10,[pw_8]
+    paddw       m10,[pw_88]
 %endif
-    SBUTTERFLY3 wd,  0,  1, 10,  8 ; { row[0], row[2] }[0-3]/[4-7]
 %ifidn %1, row
-    psubw       m10,[row_round]
+    paddw       m10,[pw_1]
 %endif
-    SIGNEXTEND  m8,  m9,  m14      ; { row[2] }[0-3] / [4-7]
-    SIGNEXTEND  m10, m11, m14      ; { row[0] }[0-3] / [4-7]
+    SBUTTERFLY3 wd,  0,  1, 10,  8 ; { row[0], row[2] }[0-3]/[4-7]
     pmaddwd     m2,  m0, [w4_plus_w6]
     pmaddwd     m3,  m1, [w4_plus_w6]
     pmaddwd     m4,  m0, [w4_min_w6]
@@ -107,75 +106,33 @@ section .text align=16
     pmaddwd     m7,  m1, [w4_min_w2]
     pmaddwd     m0, [w4_plus_w2]
     pmaddwd     m1, [w4_plus_w2]
-    pslld       m2,  2
-    pslld       m3,  2
-    pslld       m4,  2
-    pslld       m5,  2
-    pslld       m6,  2
-    pslld       m7,  2
-    pslld       m0,  2
-    pslld       m1,  2
 
     ; a0: -1*row[0]-1*row[2]
     ; a1: -1*row[0]
     ; a2: -1*row[0]
     ; a3: -1*row[0]+1*row[2]
-    psubd       m2,  m10           ; a1[0-3]
-    psubd       m3,  m11           ; a1[4-7]
-    psubd       m4,  m10           ; a2[0-3]
-    psubd       m5,  m11           ; a2[4-7]
-    psubd       m0,  m10
-    psubd       m1,  m11
-    psubd       m6,  m10
-    psubd       m7,  m11
-    psubd       m0,  m8            ; a0[0-3]
-    psubd       m1,  m9            ; a0[4-7]
-    paddd       m6,  m8            ; a3[0-3]
-    paddd       m7,  m9            ; a3[4-7]
 
     ; a0 +=   W4*row[4] + W6*row[6]; i.e. -1*row[4]
     ; a1 -=   W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
     ; a2 -=   W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
     ; a3 +=   W4*row[4] - W6*row[6]; i.e. -1*row[4]
     SBUTTERFLY3 wd,  8,  9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
-    SIGNEXTEND  m13, m14, m10      ; { row[4] }[0-3] / [4-7]
     pmaddwd     m10, m8, [w4_plus_w6]
     pmaddwd     m11, m9, [w4_plus_w6]
-    pslld       m10, 2
-    pslld       m11, 2
-    psubd       m10,  m13
-    psubd       m11,  m14
     paddd       m0,  m10            ; a0[0-3]
     paddd       m1,  m11            ; a0[4-7]
     pmaddwd     m10, m8, [w4_min_w6]
     pmaddwd     m11, m9, [w4_min_w6]
-    pslld       m10, 2
-    pslld       m11, 2
-    psubd       m10, m13
-    psubd       m11, m14
     paddd       m6,  m10           ; a3[0-3]
     paddd       m7,  m11           ; a3[4-7]
     pmaddwd     m10, m8, [w4_min_w2]
     pmaddwd     m11, m9, [w4_min_w2]
     pmaddwd     m8, [w4_plus_w2]
     pmaddwd     m9, [w4_plus_w2]
-    pslld       m10, 2
-    pslld       m11, 2
-    pslld       m8,  2
-    pslld       m9,  2
-    psubd       m10, m13
-    psubd       m11, m14
-    psubd       m8,  m13
-    psubd       m9,  m14
     psubd       m4,  m10           ; a2[0-3] intermediate
     psubd       m5,  m11           ; a2[4-7] intermediate
     psubd       m2,  m8            ; a1[0-3] intermediate
     psubd       m3,  m9            ; a1[4-7] intermediate
-    SIGNEXTEND  m12, m13, m10      ; { row[6] }[0-3] / [4-7]
-    psubd       m4,  m12           ; a2[0-3]
-    psubd       m5,  m13           ; a2[4-7]
-    paddd       m2,  m12           ; a1[0-3]
-    paddd       m3,  m13           ; a1[4-7]
 
     ; load/store
     mova   [r2+  0], m0
@@ -206,8 +163,6 @@ section .text align=16
     ; b3 = MUL(W7, row[1]);
     ; MAC(b3, -W5, row[3]);
     SBUTTERFLY3 wd,  0,  1, 10, 8  ; { row[1], row[3] }[0-3]/[4-7]
-    SIGNEXTEND  m10, m11, m12      ; { row[1] }[0-3] / [4-7]
-    SIGNEXTEND  m8,  m9,  m12      ; { row[3] }[0-3] / [4-7]
     pmaddwd     m2,  m0, [w3_min_w7]
     pmaddwd     m3,  m1, [w3_min_w7]
     pmaddwd     m4,  m0, [w5_min_w1]
@@ -216,35 +171,11 @@ section .text align=16
     pmaddwd     m7,  m1, [w7_min_w5]
     pmaddwd     m0, [w1_plus_w3]
     pmaddwd     m1, [w1_plus_w3]
-    pslld       m2,  2
-    pslld       m3,  2
-    pslld       m4,  2
-    pslld       m5,  2
-    pslld       m6,  2
-    pslld       m7,  2
-    pslld       m0,  2
-    pslld       m1,  2
 
     ; b0: +1*row[1]+2*row[3]
     ; b1: +2*row[1]-1*row[3]
     ; b2: -1*row[1]-1*row[3]
     ; b3: +1*row[1]+1*row[3]
-    psubd       m2,  m8
-    psubd       m3,  m9
-    paddd       m0,  m8
-    paddd       m1,  m9
-    paddd       m8,  m10           ; { row[1] + row[3] }[0-3]
-    paddd       m9,  m11           ; { row[1] + row[3] }[4-7]
-    paddd       m10, m10
-    paddd       m11, m11
-    paddd       m0,  m8            ; b0[0-3]
-    paddd       m1,  m9            ; b0[4-7]
-    paddd       m2,  m10           ; b1[0-3]
-    paddd       m3,  m11           ; b2[4-7]
-    psubd       m4,  m8            ; b2[0-3]
-    psubd       m5,  m9            ; b2[4-7]
-    paddd       m6,  m8            ; b3[0-3]
-    paddd       m7,  m9            ; b3[4-7]
 
     ; MAC(b0,  W5, row[5]);
     ; MAC(b0,  W7, row[7]);
@@ -255,38 +186,16 @@ section .text align=16
     ; MAC(b3,  W3, row[5]);
     ; MAC(b3, -W1, row[7]);
     SBUTTERFLY3 wd,  8,  9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
-    SIGNEXTEND  m13, m12, m11      ; { row[5] }[0-3] / [4-7]
-    SIGNEXTEND  m14, m11, m10      ; { row[7] }[0-3] / [4-7]
 
     ; b0: -1*row[5]+1*row[7]
     ; b1: -1*row[5]+1*row[7]
     ; b2: +1*row[5]+2*row[7]
     ; b3: +2*row[5]-1*row[7]
-    paddd       m4,  m13
-    paddd       m5,  m12
-    paddd       m6,  m13
-    paddd       m7,  m12
-    psubd       m13, m14           ; { row[5] - row[7] }[0-3]
-    psubd       m12, m11           ; { row[5] - row[7] }[4-7]
-    paddd       m14, m14
-    paddd       m11, m11
-    psubd       m0,  m13
-    psubd       m1,  m12
-    psubd       m2,  m13
-    psubd       m3,  m12
-    paddd       m4,  m14
-    paddd       m5,  m11
-    paddd       m6,  m13
-    paddd       m7,  m12
 
     pmaddwd     m10, m8, [w1_plus_w5]
     pmaddwd     m11, m9, [w1_plus_w5]
     pmaddwd     m12, m8, [w5_plus_w7]
     pmaddwd     m13, m9, [w5_plus_w7]
-    pslld       m10, 2
-    pslld       m11, 2
-    pslld       m12,  2
-    pslld       m13,  2
     psubd       m2,  m10           ; b1[0-3]
     psubd       m3,  m11           ; b1[4-7]
     paddd       m0,  m12            ; b0[0-3]
@@ -295,10 +204,6 @@ section .text align=16
     pmaddwd     m13, m9, [w7_plus_w3]
     pmaddwd     m8, [w3_min_w1]
     pmaddwd     m9, [w3_min_w1]
-    pslld       m12, 2
-    pslld       m13, 2
-    pslld       m8,  2
-    pslld       m9,  2
     paddd       m4,  m12           ; b2[0-3]
     paddd       m5,  m13           ; b2[4-7]
     paddd       m6,  m8            ; b3[0-3]
@@ -345,7 +250,7 @@ cglobal prores_idct_put_10, 4, 4, %1
     pmullw      m13,[r3+64]
     pmullw      m12,[r3+96]
 
-    IDCT_1D     row, 17
+    IDCT_1D     row, 15
 
     ; transpose for second part of IDCT
     TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
@@ -360,20 +265,11 @@ cglobal prores_idct_put_10, 4, 4, %1
 
     ; for (i = 0; i < 8; i++)
     ;     idctSparseColAdd(dest + i, line_size, block + i);
-    IDCT_1D     col, 20
+    IDCT_1D     col, 18
 
     ; clip/store
-    mova        m6, [pw_512]
     mova        m3, [pw_4]
     mova        m5, [pw_1019]
-    paddw       m8,  m6
-    paddw       m0,  m6
-    paddw       m1,  m6
-    paddw       m2,  m6
-    paddw       m4,  m6
-    paddw       m11, m6
-    paddw       m9,  m6
-    paddw       m10, m6
     pmaxsw      m8,  m3
     pmaxsw      m0,  m3
     pmaxsw      m1,  m3
@@ -422,7 +318,9 @@ INIT_XMM sse2
 idct_put_fn 16
 INIT_XMM sse4
 idct_put_fn 16
+%if HAVE_AVX_EXTERNAL
 INIT_XMM avx
 idct_put_fn 16
+%endif
 
 %endif
diff --git a/libavcodec/x86/proresdsp_init.c b/libavcodec/x86/proresdsp_init.c
index e82dac0..d647788 100644
--- a/libavcodec/x86/proresdsp_init.c
+++ b/libavcodec/x86/proresdsp_init.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2010-2011 Maxim Poliakovski
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,7 +32,7 @@ void ff_prores_idct_put_10_sse4(uint16_t *dst, int linesize,
 void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
                                 int16_t *block, const int16_t *qmat);
 
-av_cold void ff_proresdsp_init_x86(ProresDSPContext *dsp)
+av_cold void ff_proresdsp_init_x86(ProresDSPContext *dsp, AVCodecContext *avctx)
 {
 #if ARCH_X86_64
     int cpu_flags = av_get_cpu_flags();
diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm
index 27a1c63..4e72d50 100644
--- a/libavcodec/x86/qpel.asm
+++ b/libavcodec/x86/qpel.asm
@@ -4,20 +4,20 @@
 ;* Copyright (c) 2003-2013 Michael Niedermayer
 ;* Copyright (c) 2013 Daniel Kang
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/qpeldsp.asm b/libavcodec/x86/qpeldsp.asm
index 8f65550..dc0f900 100644
--- a/libavcodec/x86/qpeldsp.asm
+++ b/libavcodec/x86/qpeldsp.asm
@@ -1,22 +1,23 @@
 ;******************************************************************************
-;* quarterpel DSP functions
-;*
+;* mpeg4 qpel
+;* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 ;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2013 Daniel Kang
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/qpeldsp_init.c b/libavcodec/x86/qpeldsp_init.c
index cdefe50..3268d90 100644
--- a/libavcodec/x86/qpeldsp_init.c
+++ b/libavcodec/x86/qpeldsp_init.c
@@ -1,20 +1,22 @@
 /*
  * quarterpel DSP functions
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -77,13 +79,13 @@ void ff_avg_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst, const uint8_t *src,
 void ff_put_no_rnd_mpeg4_qpel8_v_lowpass_mmxext(uint8_t *dst,
                                                 const uint8_t *src,
                                                 int dstStride, int srcStride);
-#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
-#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
+#define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmx
+#define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmx
 
 #if HAVE_YASM
 
-CALL_2X_PIXELS(ff_avg_pixels16_mmxext, ff_avg_pixels8_mmxext, 8)
-CALL_2X_PIXELS(ff_put_pixels16_mmxext, ff_put_pixels8_mmxext, 8)
+#define ff_put_pixels16_mmxext ff_put_pixels16_mmx
+#define ff_put_pixels8_mmxext  ff_put_pixels8_mmx
 
 #define QPEL_OP(OPNAME, RND, MMX)                                       \
 static void OPNAME ## qpel8_mc00_ ## MMX(uint8_t *dst,                  \
diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c
index a9fb132..c9fd71e 100644
--- a/libavcodec/x86/rnd_template.c
+++ b/libavcodec/x86/rnd_template.c
@@ -7,20 +7,20 @@
  * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
  * and improved by Zdenek Kabelac <kabi@users.sf.net>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index 4d9c35b..7732d65 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -2,20 +2,20 @@
 ;* MMX/SSE2-optimized functions for the RV30 and RV40 decoders
 ;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/rv34dsp_init.c b/libavcodec/x86/rv34dsp_init.c
index 586e4e9..99c56f9 100644
--- a/libavcodec/x86/rv34dsp_init.c
+++ b/libavcodec/x86/rv34dsp_init.c
@@ -2,20 +2,20 @@
  * RV30/40 MMX/SSE2 optimizations
  * Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index 0a242b5..fdd81a0 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -4,20 +4,20 @@
 ;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
 ;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index e006c76..2900e2d 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -2,20 +2,20 @@
  * RV40 decoder motion compensation functions x86-optimised
  * Copyright (c) 2008 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -32,6 +32,13 @@
 #include "libavutil/x86/cpu.h"
 #include "hpeldsp.h"
 
+#define DEFINE_FN(op, size, insn) \
+static void op##_rv40_qpel##size##_mc33_##insn(uint8_t *dst, const uint8_t *src, \
+                                               ptrdiff_t stride) \
+{ \
+    ff_##op##_pixels##size##_xy2_##insn(dst, src, stride, size); \
+}
+
 #if HAVE_YASM
 void ff_put_rv40_chroma_mc8_mmx  (uint8_t *dst, uint8_t *src,
                                   int stride, int h, int x, int y);
@@ -127,8 +134,8 @@ QPEL_FUNCS_DECL(OP, 3, 2, OPT)
 /** @} */
 
 #define LOOPSIZE  8
-#define HCOFF(x)  (32 * (x - 1))
-#define VCOFF(x)  (32 * (x - 1))
+#define HCOFF(x)  (32 * ((x) - 1))
+#define VCOFF(x)  (32 * ((x) - 1))
 QPEL_MC_DECL(put_, _ssse3)
 QPEL_MC_DECL(avg_, _ssse3)
 
@@ -136,8 +143,8 @@ QPEL_MC_DECL(avg_, _ssse3)
 #undef HCOFF
 #undef VCOFF
 #define LOOPSIZE  8
-#define HCOFF(x)  (64 * (x - 1))
-#define VCOFF(x)  (64 * (x - 1))
+#define HCOFF(x)  (64 * ((x) - 1))
+#define VCOFF(x)  (64 * ((x) - 1))
 QPEL_MC_DECL(put_, _sse2)
 QPEL_MC_DECL(avg_, _sse2)
 
@@ -146,8 +153,8 @@ QPEL_MC_DECL(avg_, _sse2)
 #undef HCOFF
 #undef VCOFF
 #define LOOPSIZE  4
-#define HCOFF(x)  (64 * (x - 1))
-#define VCOFF(x)  (64 * (x - 1))
+#define HCOFF(x)  (64 * ((x) - 1))
+#define VCOFF(x)  (64 * ((x) - 1))
 
 QPEL_MC_DECL(put_, _mmx)
 
@@ -186,30 +193,24 @@ QPEL_FUNCS_SET (OP, 3, 1, OPT) \
 QPEL_FUNCS_SET (OP, 3, 2, OPT)
 /** @} */
 
+DEFINE_FN(put, 8, ssse3)
+
+DEFINE_FN(put, 16, sse2)
+DEFINE_FN(put, 16, ssse3)
+
+DEFINE_FN(avg, 8, mmxext)
+DEFINE_FN(avg, 8, ssse3)
+
+DEFINE_FN(avg, 16, sse2)
+DEFINE_FN(avg, 16, ssse3)
 #endif /* HAVE_YASM */
 
 #if HAVE_MMX_INLINE
-static void put_rv40_qpel8_mc33_mmx(uint8_t *dst, const uint8_t *src,
-                                    ptrdiff_t stride)
-{
-    ff_put_pixels8_xy2_mmx(dst, src, stride, 8);
-}
-static void put_rv40_qpel16_mc33_mmx(uint8_t *dst, const uint8_t *src,
-                                     ptrdiff_t stride)
-{
-    ff_put_pixels16_xy2_mmx(dst, src, stride, 16);
-}
-static void avg_rv40_qpel8_mc33_mmx(uint8_t *dst, const uint8_t *src,
-                                    ptrdiff_t stride)
-{
-    ff_avg_pixels8_xy2_mmx(dst, src, stride, 8);
-}
-static void avg_rv40_qpel16_mc33_mmx(uint8_t *dst, const uint8_t *src,
-                                     ptrdiff_t stride)
-{
-    ff_avg_pixels16_xy2_mmx(dst, src, stride, 16);
-}
-#endif /* HAVE_MMX_INLINE */
+DEFINE_FN(put, 8, mmx)
+DEFINE_FN(avg, 8, mmx)
+DEFINE_FN(put, 16, mmx)
+DEFINE_FN(avg, 16, mmx)
+#endif
 
 av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
 {
@@ -240,6 +241,7 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
 #endif
     }
     if (EXTERNAL_MMXEXT(cpu_flags)) {
+        c->avg_pixels_tab[1][15]        = avg_rv40_qpel8_mc33_mmxext;
         c->avg_chroma_pixels_tab[0]     = ff_avg_rv40_chroma_mc8_mmxext;
         c->avg_chroma_pixels_tab[1]     = ff_avg_rv40_chroma_mc4_mmxext;
         c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_mmxext;
@@ -251,6 +253,8 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
 #endif
     }
     if (EXTERNAL_SSE2(cpu_flags)) {
+        c->put_pixels_tab[0][15]        = put_rv40_qpel16_mc33_sse2;
+        c->avg_pixels_tab[0][15]        = avg_rv40_qpel16_mc33_sse2;
         c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
         c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
         c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
@@ -259,6 +263,10 @@ av_cold void ff_rv40dsp_init_x86(RV34DSPContext *c)
         QPEL_MC_SET(avg_, _sse2)
     }
     if (EXTERNAL_SSSE3(cpu_flags)) {
+        c->put_pixels_tab[0][15]        = put_rv40_qpel16_mc33_ssse3;
+        c->put_pixels_tab[1][15]        = put_rv40_qpel8_mc33_ssse3;
+        c->avg_pixels_tab[0][15]        = avg_rv40_qpel16_mc33_ssse3;
+        c->avg_pixels_tab[1][15]        = avg_rv40_qpel8_mc33_ssse3;
         c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
         c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
         c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index d7164b6..d556f27 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -2,20 +2,20 @@
 ;* AAC Spectral Band Replication decoding functions
 ;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -26,6 +26,12 @@ SECTION_RODATA
 ps_mask         times 2 dd 1<<31, 0
 ps_mask2        times 2 dd 0, 1<<31
 ps_neg          times 4 dd 1<<31
+ps_noise0       times 2 dd  1.0,  0.0,
+ps_noise2       times 2 dd -1.0,  0.0
+ps_noise13      dd  0.0,  1.0, 0.0, -1.0
+                dd  0.0, -1.0, 0.0,  1.0
+                dd  0.0,  1.0, 0.0, -1.0
+cextern         sbr_noise_table
 
 SECTION_TEXT
 
@@ -136,7 +142,6 @@ cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E
     mulps      m2, bw             ; (a1[0] a1[1])*bw*bw = (a0 a1)
     mova       m3, m1
     mova       m4, m2
-    mova       m7, [ps_mask]
 
     ; Set pointers
 %if ARCH_X86_64 == 0 || WIN64
@@ -156,30 +161,28 @@ cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E
     shl      start, 3            ; offset from num loops
 
     mova        m0, [X_lowq + start]
-    movlhps     m1, m1           ; (a2 a3 a2 a3)
-    movlhps     m2, m2           ; (a0 a1 a0 a1)
-    shufps      m3, m3, q0101    ; (a3 a2 a3 a2)
-    shufps      m4, m4, q0101    ; (a1 a0 a1 a0)
-    xorps       m3, m7           ; (-a3 a2 -a3 a2)
-    xorps       m4, m7           ; (-a1 a0 -a1 a0)
+    shufps      m3, m3, q1111
+    shufps      m4, m4, q1111
+    xorps       m3, [ps_mask]
+    shufps      m1, m1, q0000
+    shufps      m2, m2, q0000
+    xorps       m4, [ps_mask]
 .loop2:
-    mova        m5, m0
+    movu        m7, [X_lowq + start + 8]        ; BbCc
     mova        m6, m0
-    shufps      m0, m0, q2200    ; {Xl[-2][0],",Xl[-1][0],"}
-    shufps      m5, m5, q3311    ; {Xl[-2][1],",Xl[-1][1],"}
-    mulps       m0, m2
-    mulps       m5, m4
-    mova        m7, m6
-    addps       m5, m0
-    mova        m0, [X_lowq + start + 2*2*4]
-    shufps      m6, m0, q0022    ; {Xl[-1][0],",Xl[0][0],"}
-    shufps      m7, m0, q1133    ; {Xl[-1][1],",Xl[1][1],"}
-    mulps       m6, m1
+    mova        m5, m7
+    shufps      m0, m0, q2301                   ; aAbB
+    shufps      m7, m7, q2301                   ; bBcC
+    mulps       m0, m4
     mulps       m7, m3
-    addps       m5, m6
+    mulps       m6, m2
+    mulps       m5, m1
+    addps       m7, m0
+    mova        m0, [X_lowq + start +16]        ; CcDd
     addps       m7, m0
-    addps       m5, m7
-    mova  [X_highq + start], m5
+    addps       m6, m5
+    addps       m7, m6
+    mova  [X_highq + start], m7
     add     start, 16
     jnz         .loop2
     RET
@@ -246,33 +249,47 @@ cglobal sbr_neg_odd_64, 1,2,4,z
     jne      .loop
     REP_RET
 
-INIT_XMM sse2
 ; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1)
+%macro SBR_QMF_DEINT_BFLY  0
 cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
     mov               cq, 64*4-2*mmsize
     lea            vrevq, [vq + 64*4]
 .loop:
     mova              m0, [src0q+cq]
     mova              m1, [src1q]
-    mova              m2, [src0q+cq+mmsize]
-    mova              m3, [src1q+mmsize]
-    pshufd            m4, m0, q0123
-    pshufd            m5, m1, q0123
-    pshufd            m6, m2, q0123
-    pshufd            m7, m3, q0123
-    addps             m3, m4
+    mova              m4, [src0q+cq+mmsize]
+    mova              m5, [src1q+mmsize]
+%if cpuflag(sse2)
+    pshufd            m2, m0, q0123
+    pshufd            m3, m1, q0123
+    pshufd            m6, m4, q0123
+    pshufd            m7, m5, q0123
+%else
+    shufps            m2, m0, m0, q0123
+    shufps            m3, m1, m1, q0123
+    shufps            m6, m4, m4, q0123
+    shufps            m7, m5, m5, q0123
+%endif
+    addps             m5, m2
     subps             m0, m7
     addps             m1, m6
-    subps             m2, m5
+    subps             m4, m3
     mova         [vrevq], m1
-    mova  [vrevq+mmsize], m3
+    mova  [vrevq+mmsize], m5
     mova         [vq+cq], m0
-    mova  [vq+cq+mmsize], m2
+    mova  [vq+cq+mmsize], m4
     add            src1q, 2*mmsize
     add            vrevq, 2*mmsize
     sub               cq, 2*mmsize
     jge            .loop
     REP_RET
+%endmacro
+
+INIT_XMM sse
+SBR_QMF_DEINT_BFLY
+
+INIT_XMM sse2
+SBR_QMF_DEINT_BFLY
 
 INIT_XMM sse2
 cglobal sbr_qmf_pre_shuffle, 1,4,6,z
@@ -303,3 +320,128 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z
     movq       m2, [zq]
     movq    [r2q], m2
     REP_RET
+
+%ifdef PIC
+%define NREGS 1
+%if UNIX64
+%define NOISE_TABLE r6q ; r5q is m_max
+%else
+%define NOISE_TABLE r5q
+%endif
+%else
+%define NREGS 0
+%define NOISE_TABLE sbr_noise_table
+%endif
+
+%macro LOAD_NST  1
+%ifdef PIC
+    lea  NOISE_TABLE, [%1]
+    mova          m0, [kxq + NOISE_TABLE]
+%else
+    mova          m0, [kxq + %1]
+%endif
+%endmacro
+
+INIT_XMM sse2
+; sbr_hf_apply_noise_0(float (*Y)[2], const float *s_m,
+;                      const float *q_filt, int noise,
+;                      int kx, int m_max)
+cglobal sbr_hf_apply_noise_0, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
+    mova       m0, [ps_noise0]
+    jmp apply_noise_main
+
+; sbr_hf_apply_noise_1(float (*Y)[2], const float *s_m,
+;                      const float *q_filt, int noise,
+;                      int kx, int m_max)
+cglobal sbr_hf_apply_noise_1, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
+    and       kxq, 1
+    shl       kxq, 4
+    LOAD_NST  ps_noise13
+    jmp apply_noise_main
+
+; sbr_hf_apply_noise_2(float (*Y)[2], const float *s_m,
+;                      const float *q_filt, int noise,
+;                      int kx, int m_max)
+cglobal sbr_hf_apply_noise_2, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
+    mova       m0, [ps_noise2]
+    jmp apply_noise_main
+
+; sbr_hf_apply_noise_3(float (*Y)[2], const float *s_m,
+;                      const float *q_filt, int noise,
+;                      int kx, int m_max)
+cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
+    and       kxq, 1
+    shl       kxq, 4
+    LOAD_NST  ps_noise13+16
+
+apply_noise_main:
+%if ARCH_X86_64 == 0 || WIN64
+    mov       kxd, m_maxm
+%define count kxq
+%else
+%define count m_maxq
+%endif
+    dec    noiseq
+    shl    count, 2
+%ifdef PIC
+    lea NOISE_TABLE, [sbr_noise_table]
+%endif
+    lea        Yq, [Yq + 2*count]
+    add      s_mq, count
+    add   q_filtq, count
+    shl    noiseq, 3
+    pxor       m5, m5
+    neg    count
+.loop:
+    mova       m1, [q_filtq + count]
+    movu       m3, [noiseq + NOISE_TABLE + 1*mmsize]
+    movu       m4, [noiseq + NOISE_TABLE + 2*mmsize]
+    add    noiseq, 2*mmsize
+    and    noiseq, 0x1ff<<3
+    punpckhdq  m2, m1, m1
+    punpckldq  m1, m1
+    mulps      m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
+    mulps      m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
+    mova       m3, [s_mq + count]
+    ; TODO: replace by a vpermd in AVX2
+    punpckhdq  m4, m3, m3
+    punpckldq  m3, m3
+    pcmpeqd    m6, m3, m5 ; m6 == 0
+    pcmpeqd    m7, m4, m5 ; m7 == 0
+    mulps      m3, m0 ; s_m[m] * phi_sign
+    mulps      m4, m0 ; s_m[m] * phi_sign
+    pand       m1, m6
+    pand       m2, m7
+    movu       m6, [Yq + 2*count]
+    movu       m7, [Yq + 2*count + mmsize]
+    addps      m3, m1
+    addps      m4, m2
+    addps      m6, m3
+    addps      m7, m4
+    movu    [Yq + 2*count], m6
+    movu    [Yq + 2*count + mmsize], m7
+    add    count, mmsize
+    jl      .loop
+    RET
+
+INIT_XMM sse
+cglobal sbr_qmf_deint_neg, 2,4,4,v,src,vrev,c
+%define COUNT  32*4
+%define OFFSET 32*4
+    mov        cq, -COUNT
+    lea     vrevq, [vq + OFFSET + COUNT]
+    add        vq, OFFSET-mmsize
+    add      srcq, 2*COUNT
+    mova       m3, [ps_neg]
+.loop:
+    mova       m0, [srcq + 2*cq + 0*mmsize]
+    mova       m1, [srcq + 2*cq + 1*mmsize]
+    shufps     m2, m0, m1, q2020
+    shufps     m1, m0, q1313
+    xorps      m2, m3
+    mova     [vq], m1
+    mova  [vrevq + cq], m2
+    sub        vq, mmsize
+    add        cq, mmsize
+    jl      .loop
+    REP_RET
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
index 9600852..a2aca74 100644
--- a/libavcodec/x86/sbrdsp_init.c
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -2,20 +2,20 @@
  * AAC Spectral Band Replication decoding functions
  * Copyright (c) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,9 +34,25 @@ void ff_sbr_hf_gen_sse(float (*X_high)[2], const float (*X_low)[2],
                        float bw, int start, int end);
 void ff_sbr_neg_odd_64_sse(float *z);
 void ff_sbr_qmf_post_shuffle_sse(float W[32][2], const float *z);
+void ff_sbr_qmf_deint_bfly_sse(float *v, const float *src0, const float *src1);
 void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1);
 void ff_sbr_qmf_pre_shuffle_sse2(float *z);
 
+void ff_sbr_hf_apply_noise_0_sse2(float (*Y)[2], const float *s_m,
+                                  const float *q_filt, int noise,
+                                  int kx, int m_max);
+void ff_sbr_hf_apply_noise_1_sse2(float (*Y)[2], const float *s_m,
+                                  const float *q_filt, int noise,
+                                  int kx, int m_max);
+void ff_sbr_hf_apply_noise_2_sse2(float (*Y)[2], const float *s_m,
+                                  const float *q_filt, int noise,
+                                  int kx, int m_max);
+void ff_sbr_hf_apply_noise_3_sse2(float (*Y)[2], const float *s_m,
+                                  const float *q_filt, int noise,
+                                  int kx, int m_max);
+
+void ff_sbr_qmf_deint_neg_sse(float *v, const float *src);
+
 av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
 {
     int cpu_flags = av_get_cpu_flags();
@@ -48,10 +64,16 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
         s->hf_g_filt  = ff_sbr_hf_g_filt_sse;
         s->hf_gen     = ff_sbr_hf_gen_sse;
         s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse;
+        s->qmf_deint_bfly   = ff_sbr_qmf_deint_bfly_sse;
+        s->qmf_deint_neg    = ff_sbr_qmf_deint_neg_sse;
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         s->qmf_deint_bfly   = ff_sbr_qmf_deint_bfly_sse2;
         s->qmf_pre_shuffle  = ff_sbr_qmf_pre_shuffle_sse2;
+        s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_sse2;
+        s->hf_apply_noise[1] = ff_sbr_hf_apply_noise_1_sse2;
+        s->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_sse2;
+        s->hf_apply_noise[3] = ff_sbr_hf_apply_noise_3_sse2;
     }
 }
diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c
index 1002a78..6b4bd18 100644
--- a/libavcodec/x86/simple_idct.c
+++ b/libavcodec/x86/simple_idct.c
@@ -3,24 +3,23 @@
  *
  * Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/x86/asm.h"
 #include "idctdsp.h"
@@ -83,7 +82,7 @@ DECLARE_ALIGNED(8, static const int16_t, coeffs)[]= {
 
 static inline void idct(int16_t *block)
 {
-        DECLARE_ALIGNED(8, int64_t, align_tmp)[16];
+        LOCAL_ALIGNED_8(int64_t, align_tmp, [16]);
         int16_t * const temp= (int16_t*)align_tmp;
 
         __asm__ volatile(
@@ -1145,6 +1144,7 @@ Temp
 
 "9: \n\t"
                 :: "r" (block), "r" (temp), "r" (coeffs)
+                   NAMED_CONSTRAINTS_ADD(wm1010,d40000)
                 : "%eax"
         );
 }
diff --git a/libavcodec/x86/simple_idct.h b/libavcodec/x86/simple_idct.h
index 4fc2914..4a98732 100644
--- a/libavcodec/x86/simple_idct.h
+++ b/libavcodec/x86/simple_idct.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/snowdsp.c b/libavcodec/x86/snowdsp.c
new file mode 100644
index 0000000..2778489
--- /dev/null
+++ b/libavcodec/x86/snowdsp.c
@@ -0,0 +1,908 @@
+/*
+ * MMX and SSE2 optimized snow DSP utils
+ * Copyright (c) 2005-2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/snow.h"
+#include "libavcodec/snow_dwt.h"
+
+#if HAVE_INLINE_ASM
+
+static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, IDWTELEM *temp, int width){
+    const int w2= (width+1)>>1;
+    const int w_l= (width>>1);
+    const int w_r= w2 - 1;
+    int i;
+
+    { // Lift 0
+        IDWTELEM * const ref = b + w2 - 1;
+        IDWTELEM b_0 = b[0]; //By allowing the first entry in b[0] to be calculated twice
+        // (the first time erroneously), we allow the SSE2 code to run an extra pass.
+        // The savings in code and time are well worth having to store this value and
+        // calculate b[0] correctly afterwards.
+
+        i = 0;
+        __asm__ volatile(
+            "pcmpeqd   %%xmm7, %%xmm7         \n\t"
+            "pcmpeqd   %%xmm3, %%xmm3         \n\t"
+            "psllw         $1, %%xmm3         \n\t"
+            "paddw     %%xmm7, %%xmm3         \n\t"
+            "psllw        $13, %%xmm3         \n\t"
+        ::);
+        for(; i<w_l-15; i+=16){
+            __asm__ volatile(
+                "movdqu   (%1), %%xmm1        \n\t"
+                "movdqu 16(%1), %%xmm5        \n\t"
+                "movdqu  2(%1), %%xmm2        \n\t"
+                "movdqu 18(%1), %%xmm6        \n\t"
+                "paddw  %%xmm1, %%xmm2        \n\t"
+                "paddw  %%xmm5, %%xmm6        \n\t"
+                "paddw  %%xmm7, %%xmm2        \n\t"
+                "paddw  %%xmm7, %%xmm6        \n\t"
+                "pmulhw %%xmm3, %%xmm2        \n\t"
+                "pmulhw %%xmm3, %%xmm6        \n\t"
+                "paddw    (%0), %%xmm2        \n\t"
+                "paddw  16(%0), %%xmm6        \n\t"
+                "movdqa %%xmm2, (%0)          \n\t"
+                "movdqa %%xmm6, 16(%0)        \n\t"
+                :: "r"(&b[i]), "r"(&ref[i])
+                : "memory"
+            );
+        }
+        snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+        b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+    }
+
+    { // Lift 1
+        IDWTELEM * const dst = b+w2;
+
+        i = 0;
+        for(; (((x86_reg)&dst[i]) & 0x1F) && i<w_r; i++){
+            dst[i] = dst[i] - (b[i] + b[i + 1]);
+        }
+        for(; i<w_r-15; i+=16){
+            __asm__ volatile(
+                "movdqu   (%1), %%xmm1        \n\t"
+                "movdqu 16(%1), %%xmm5        \n\t"
+                "movdqu  2(%1), %%xmm2        \n\t"
+                "movdqu 18(%1), %%xmm6        \n\t"
+                "paddw  %%xmm1, %%xmm2        \n\t"
+                "paddw  %%xmm5, %%xmm6        \n\t"
+                "movdqa   (%0), %%xmm0        \n\t"
+                "movdqa 16(%0), %%xmm4        \n\t"
+                "psubw  %%xmm2, %%xmm0        \n\t"
+                "psubw  %%xmm6, %%xmm4        \n\t"
+                "movdqa %%xmm0, (%0)          \n\t"
+                "movdqa %%xmm4, 16(%0)        \n\t"
+                :: "r"(&dst[i]), "r"(&b[i])
+                : "memory"
+            );
+        }
+        snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+    }
+
+    { // Lift 2
+        IDWTELEM * const ref = b+w2 - 1;
+        IDWTELEM b_0 = b[0];
+
+        i = 0;
+        __asm__ volatile(
+            "psllw         $15, %%xmm7        \n\t"
+            "pcmpeqw    %%xmm6, %%xmm6        \n\t"
+            "psrlw         $13, %%xmm6        \n\t"
+            "paddw      %%xmm7, %%xmm6        \n\t"
+        ::);
+        for(; i<w_l-15; i+=16){
+            __asm__ volatile(
+                "movdqu   (%1), %%xmm0        \n\t"
+                "movdqu 16(%1), %%xmm4        \n\t"
+                "movdqu  2(%1), %%xmm1        \n\t"
+                "movdqu 18(%1), %%xmm5        \n\t" //FIXME try aligned reads and shifts
+                "paddw  %%xmm6, %%xmm0        \n\t"
+                "paddw  %%xmm6, %%xmm4        \n\t"
+                "paddw  %%xmm7, %%xmm1        \n\t"
+                "paddw  %%xmm7, %%xmm5        \n\t"
+                "pavgw  %%xmm1, %%xmm0        \n\t"
+                "pavgw  %%xmm5, %%xmm4        \n\t"
+                "psubw  %%xmm7, %%xmm0        \n\t"
+                "psubw  %%xmm7, %%xmm4        \n\t"
+                "psraw      $1, %%xmm0        \n\t"
+                "psraw      $1, %%xmm4        \n\t"
+                "movdqa   (%0), %%xmm1        \n\t"
+                "movdqa 16(%0), %%xmm5        \n\t"
+                "paddw  %%xmm1, %%xmm0        \n\t"
+                "paddw  %%xmm5, %%xmm4        \n\t"
+                "psraw      $2, %%xmm0        \n\t"
+                "psraw      $2, %%xmm4        \n\t"
+                "paddw  %%xmm1, %%xmm0        \n\t"
+                "paddw  %%xmm5, %%xmm4        \n\t"
+                "movdqa %%xmm0, (%0)          \n\t"
+                "movdqa %%xmm4, 16(%0)        \n\t"
+                :: "r"(&b[i]), "r"(&ref[i])
+                : "memory"
+            );
+        }
+        snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+        b[0] = b_0 + ((2 * ref[1] + W_BO-1 + 4 * b_0) >> W_BS);
+    }
+
+    { // Lift 3
+        IDWTELEM * const src = b+w2;
+
+        i = 0;
+        for(; (((x86_reg)&temp[i]) & 0x1F) && i<w_r; i++){
+            temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS);
+        }
+        for(; i<w_r-7; i+=8){
+            __asm__ volatile(
+                "movdqu  2(%1), %%xmm2        \n\t"
+                "movdqu 18(%1), %%xmm6        \n\t"
+                "paddw    (%1), %%xmm2        \n\t"
+                "paddw  16(%1), %%xmm6        \n\t"
+                "movdqu   (%0), %%xmm0        \n\t"
+                "movdqu 16(%0), %%xmm4        \n\t"
+                "paddw  %%xmm2, %%xmm0        \n\t"
+                "paddw  %%xmm6, %%xmm4        \n\t"
+                "psraw      $1, %%xmm2        \n\t"
+                "psraw      $1, %%xmm6        \n\t"
+                "paddw  %%xmm0, %%xmm2        \n\t"
+                "paddw  %%xmm4, %%xmm6        \n\t"
+                "movdqa %%xmm2, (%2)          \n\t"
+                "movdqa %%xmm6, 16(%2)        \n\t"
+                :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
+    }
+
+    {
+        snow_interleave_line_header(&i, width, b, temp);
+
+        for (; (i & 0x3E) != 0x3E; i-=2){
+            b[i+1] = temp[i>>1];
+            b[i] = b[i>>1];
+        }
+        for (i-=62; i>=0; i-=64){
+            __asm__ volatile(
+                "movdqa      (%1), %%xmm0       \n\t"
+                "movdqa    16(%1), %%xmm2       \n\t"
+                "movdqa    32(%1), %%xmm4       \n\t"
+                "movdqa    48(%1), %%xmm6       \n\t"
+                "movdqa      (%1), %%xmm1       \n\t"
+                "movdqa    16(%1), %%xmm3       \n\t"
+                "movdqa    32(%1), %%xmm5       \n\t"
+                "movdqa    48(%1), %%xmm7       \n\t"
+                "punpcklwd   (%2), %%xmm0       \n\t"
+                "punpcklwd 16(%2), %%xmm2       \n\t"
+                "punpcklwd 32(%2), %%xmm4       \n\t"
+                "punpcklwd 48(%2), %%xmm6       \n\t"
+                "movdqa    %%xmm0, (%0)         \n\t"
+                "movdqa    %%xmm2, 32(%0)       \n\t"
+                "movdqa    %%xmm4, 64(%0)       \n\t"
+                "movdqa    %%xmm6, 96(%0)       \n\t"
+                "punpckhwd   (%2), %%xmm1       \n\t"
+                "punpckhwd 16(%2), %%xmm3       \n\t"
+                "punpckhwd 32(%2), %%xmm5       \n\t"
+                "punpckhwd 48(%2), %%xmm7       \n\t"
+                "movdqa    %%xmm1, 16(%0)       \n\t"
+                "movdqa    %%xmm3, 48(%0)       \n\t"
+                "movdqa    %%xmm5, 80(%0)       \n\t"
+                "movdqa    %%xmm7, 112(%0)      \n\t"
+                :: "r"(&(b)[i]), "r"(&(b)[i>>1]), "r"(&(temp)[i>>1])
+                 : "memory"
+               );
+        }
+    }
+}
+
+static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int width){
+    const int w2= (width+1)>>1;
+    const int w_l= (width>>1);
+    const int w_r= w2 - 1;
+    int i;
+
+    { // Lift 0
+        IDWTELEM * const ref = b + w2 - 1;
+
+        i = 1;
+        b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+        __asm__ volatile(
+            "pcmpeqw    %%mm7, %%mm7         \n\t"
+            "pcmpeqw    %%mm3, %%mm3         \n\t"
+            "psllw         $1, %%mm3         \n\t"
+            "paddw      %%mm7, %%mm3         \n\t"
+            "psllw        $13, %%mm3         \n\t"
+           ::);
+        for(; i<w_l-7; i+=8){
+            __asm__ volatile(
+                "movq     (%1), %%mm2        \n\t"
+                "movq    8(%1), %%mm6        \n\t"
+                "paddw   2(%1), %%mm2        \n\t"
+                "paddw  10(%1), %%mm6        \n\t"
+                "paddw   %%mm7, %%mm2        \n\t"
+                "paddw   %%mm7, %%mm6        \n\t"
+                "pmulhw  %%mm3, %%mm2        \n\t"
+                "pmulhw  %%mm3, %%mm6        \n\t"
+                "paddw    (%0), %%mm2        \n\t"
+                "paddw   8(%0), %%mm6        \n\t"
+                "movq    %%mm2, (%0)         \n\t"
+                "movq    %%mm6, 8(%0)        \n\t"
+                :: "r"(&b[i]), "r"(&ref[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+    }
+
+    { // Lift 1
+        IDWTELEM * const dst = b+w2;
+
+        i = 0;
+        for(; i<w_r-7; i+=8){
+            __asm__ volatile(
+                "movq     (%1), %%mm2        \n\t"
+                "movq    8(%1), %%mm6        \n\t"
+                "paddw   2(%1), %%mm2        \n\t"
+                "paddw  10(%1), %%mm6        \n\t"
+                "movq     (%0), %%mm0        \n\t"
+                "movq    8(%0), %%mm4        \n\t"
+                "psubw   %%mm2, %%mm0        \n\t"
+                "psubw   %%mm6, %%mm4        \n\t"
+                "movq    %%mm0, (%0)         \n\t"
+                "movq    %%mm4, 8(%0)        \n\t"
+                :: "r"(&dst[i]), "r"(&b[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+    }
+
+    { // Lift 2
+        IDWTELEM * const ref = b+w2 - 1;
+
+        i = 1;
+        b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS);
+        __asm__ volatile(
+            "psllw         $15, %%mm7        \n\t"
+            "pcmpeqw     %%mm6, %%mm6        \n\t"
+            "psrlw         $13, %%mm6        \n\t"
+            "paddw       %%mm7, %%mm6        \n\t"
+           ::);
+        for(; i<w_l-7; i+=8){
+            __asm__ volatile(
+                "movq     (%1), %%mm0        \n\t"
+                "movq    8(%1), %%mm4        \n\t"
+                "movq    2(%1), %%mm1        \n\t"
+                "movq   10(%1), %%mm5        \n\t"
+                "paddw   %%mm6, %%mm0        \n\t"
+                "paddw   %%mm6, %%mm4        \n\t"
+                "paddw   %%mm7, %%mm1        \n\t"
+                "paddw   %%mm7, %%mm5        \n\t"
+                "pavgw   %%mm1, %%mm0        \n\t"
+                "pavgw   %%mm5, %%mm4        \n\t"
+                "psubw   %%mm7, %%mm0        \n\t"
+                "psubw   %%mm7, %%mm4        \n\t"
+                "psraw      $1, %%mm0        \n\t"
+                "psraw      $1, %%mm4        \n\t"
+                "movq     (%0), %%mm1        \n\t"
+                "movq    8(%0), %%mm5        \n\t"
+                "paddw   %%mm1, %%mm0        \n\t"
+                "paddw   %%mm5, %%mm4        \n\t"
+                "psraw      $2, %%mm0        \n\t"
+                "psraw      $2, %%mm4        \n\t"
+                "paddw   %%mm1, %%mm0        \n\t"
+                "paddw   %%mm5, %%mm4        \n\t"
+                "movq    %%mm0, (%0)         \n\t"
+                "movq    %%mm4, 8(%0)        \n\t"
+                :: "r"(&b[i]), "r"(&ref[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+    }
+
+    { // Lift 3
+        IDWTELEM * const src = b+w2;
+        i = 0;
+
+        for(; i<w_r-7; i+=8){
+            __asm__ volatile(
+                "movq    2(%1), %%mm2        \n\t"
+                "movq   10(%1), %%mm6        \n\t"
+                "paddw    (%1), %%mm2        \n\t"
+                "paddw   8(%1), %%mm6        \n\t"
+                "movq     (%0), %%mm0        \n\t"
+                "movq    8(%0), %%mm4        \n\t"
+                "paddw   %%mm2, %%mm0        \n\t"
+                "paddw   %%mm6, %%mm4        \n\t"
+                "psraw      $1, %%mm2        \n\t"
+                "psraw      $1, %%mm6        \n\t"
+                "paddw   %%mm0, %%mm2        \n\t"
+                "paddw   %%mm4, %%mm6        \n\t"
+                "movq    %%mm2, (%2)         \n\t"
+                "movq    %%mm6, 8(%2)        \n\t"
+                :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
+                 : "memory"
+               );
+        }
+        snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
+    }
+
+    {
+        snow_interleave_line_header(&i, width, b, temp);
+
+        for (; (i & 0x1E) != 0x1E; i-=2){
+            b[i+1] = temp[i>>1];
+            b[i] = b[i>>1];
+        }
+        for (i-=30; i>=0; i-=32){
+            __asm__ volatile(
+                "movq        (%1), %%mm0       \n\t"
+                "movq       8(%1), %%mm2       \n\t"
+                "movq      16(%1), %%mm4       \n\t"
+                "movq      24(%1), %%mm6       \n\t"
+                "movq        (%1), %%mm1       \n\t"
+                "movq       8(%1), %%mm3       \n\t"
+                "movq      16(%1), %%mm5       \n\t"
+                "movq      24(%1), %%mm7       \n\t"
+                "punpcklwd   (%2), %%mm0       \n\t"
+                "punpcklwd  8(%2), %%mm2       \n\t"
+                "punpcklwd 16(%2), %%mm4       \n\t"
+                "punpcklwd 24(%2), %%mm6       \n\t"
+                "movq       %%mm0, (%0)        \n\t"
+                "movq       %%mm2, 16(%0)      \n\t"
+                "movq       %%mm4, 32(%0)      \n\t"
+                "movq       %%mm6, 48(%0)      \n\t"
+                "punpckhwd   (%2), %%mm1       \n\t"
+                "punpckhwd  8(%2), %%mm3       \n\t"
+                "punpckhwd 16(%2), %%mm5       \n\t"
+                "punpckhwd 24(%2), %%mm7       \n\t"
+                "movq       %%mm1, 8(%0)       \n\t"
+                "movq       %%mm3, 24(%0)      \n\t"
+                "movq       %%mm5, 40(%0)      \n\t"
+                "movq       %%mm7, 56(%0)      \n\t"
+                :: "r"(&b[i]), "r"(&b[i>>1]), "r"(&temp[i>>1])
+                 : "memory"
+               );
+        }
+    }
+}
+
+#if HAVE_7REGS
+#define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\
+        ""op" ("r",%%"REG_d"), %%"t0"      \n\t"\
+        ""op" 16("r",%%"REG_d"), %%"t1"    \n\t"\
+        ""op" 32("r",%%"REG_d"), %%"t2"    \n\t"\
+        ""op" 48("r",%%"REG_d"), %%"t3"    \n\t"
+
+#define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\
+        snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_sse2_add(r,t0,t1,t2,t3)\
+        snow_vertical_compose_sse2_load_add("paddw",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_r2r_sub(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "psubw %%"s0", %%"t0" \n\t"\
+        "psubw %%"s1", %%"t1" \n\t"\
+        "psubw %%"s2", %%"t2" \n\t"\
+        "psubw %%"s3", %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\
+        "movdqa %%"s0", ("w",%%"REG_d")      \n\t"\
+        "movdqa %%"s1", 16("w",%%"REG_d")    \n\t"\
+        "movdqa %%"s2", 32("w",%%"REG_d")    \n\t"\
+        "movdqa %%"s3", 48("w",%%"REG_d")    \n\t"
+
+#define snow_vertical_compose_sra(n,t0,t1,t2,t3)\
+        "psraw $"n", %%"t0" \n\t"\
+        "psraw $"n", %%"t1" \n\t"\
+        "psraw $"n", %%"t2" \n\t"\
+        "psraw $"n", %%"t3" \n\t"
+
+#define snow_vertical_compose_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "paddw %%"s0", %%"t0" \n\t"\
+        "paddw %%"s1", %%"t1" \n\t"\
+        "paddw %%"s2", %%"t2" \n\t"\
+        "paddw %%"s3", %%"t3" \n\t"
+
+#define snow_vertical_compose_r2r_pmulhw(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "pmulhw %%"s0", %%"t0" \n\t"\
+        "pmulhw %%"s1", %%"t1" \n\t"\
+        "pmulhw %%"s2", %%"t2" \n\t"\
+        "pmulhw %%"s3", %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_move(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "movdqa %%"s0", %%"t0" \n\t"\
+        "movdqa %%"s1", %%"t1" \n\t"\
+        "movdqa %%"s2", %%"t2" \n\t"\
+        "movdqa %%"s3", %%"t3" \n\t"
+
+static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
+    x86_reg i = width;
+
+    while(i & 0x1F)
+    {
+        i--;
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+    i+=i;
+
+         __asm__ volatile (
+        "jmp 2f                                      \n\t"
+        "1:                                          \n\t"
+        snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add("%6","xmm0","xmm2","xmm4","xmm6")
+
+
+        "pcmpeqw    %%xmm0, %%xmm0                   \n\t"
+        "pcmpeqw    %%xmm2, %%xmm2                   \n\t"
+        "paddw      %%xmm2, %%xmm2                   \n\t"
+        "paddw      %%xmm0, %%xmm2                   \n\t"
+        "psllw         $13, %%xmm2                   \n\t"
+        snow_vertical_compose_r2r_add("xmm0","xmm0","xmm0","xmm0","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_r2r_pmulhw("xmm2","xmm2","xmm2","xmm2","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_add("%5","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_store("%5","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add("%3","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_r2r_sub("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_store("%4","xmm0","xmm2","xmm4","xmm6")
+
+        "pcmpeqw %%xmm7, %%xmm7                      \n\t"
+        "pcmpeqw %%xmm5, %%xmm5                      \n\t"
+        "psllw $15, %%xmm7                           \n\t"
+        "psrlw $13, %%xmm5                           \n\t"
+        "paddw %%xmm7, %%xmm5                        \n\t"
+        snow_vertical_compose_r2r_add("xmm5","xmm5","xmm5","xmm5","xmm0","xmm2","xmm4","xmm6")
+        "movq   (%2,%%"REG_d"), %%xmm1        \n\t"
+        "movq  8(%2,%%"REG_d"), %%xmm3        \n\t"
+        "paddw %%xmm7, %%xmm1                        \n\t"
+        "paddw %%xmm7, %%xmm3                        \n\t"
+        "pavgw %%xmm1, %%xmm0                        \n\t"
+        "pavgw %%xmm3, %%xmm2                        \n\t"
+        "movq 16(%2,%%"REG_d"), %%xmm1        \n\t"
+        "movq 24(%2,%%"REG_d"), %%xmm3        \n\t"
+        "paddw %%xmm7, %%xmm1                        \n\t"
+        "paddw %%xmm7, %%xmm3                        \n\t"
+        "pavgw %%xmm1, %%xmm4                        \n\t"
+        "pavgw %%xmm3, %%xmm6                        \n\t"
+        snow_vertical_compose_r2r_sub("xmm7","xmm7","xmm7","xmm7","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sra("1","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add("%3","xmm0","xmm2","xmm4","xmm6")
+
+        snow_vertical_compose_sra("2","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add("%3","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_store("%3","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add("%1","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
+        snow_vertical_compose_sra("1","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_add("%2","xmm0","xmm2","xmm4","xmm6")
+        snow_vertical_compose_sse2_store("%2","xmm0","xmm2","xmm4","xmm6")
+
+        "2:                                          \n\t"
+        "sub $64, %%"REG_d"                          \n\t"
+        "jge 1b                                      \n\t"
+        :"+d"(i)
+        :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
+}
+
+#define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
+        ""op" ("r",%%"REG_d"), %%"t0"   \n\t"\
+        ""op" 8("r",%%"REG_d"), %%"t1"  \n\t"\
+        ""op" 16("r",%%"REG_d"), %%"t2" \n\t"\
+        ""op" 24("r",%%"REG_d"), %%"t3" \n\t"
+
+#define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\
+        snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_add(r,t0,t1,t2,t3)\
+        snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\
+        "movq %%"s0", ("w",%%"REG_d")   \n\t"\
+        "movq %%"s1", 8("w",%%"REG_d")  \n\t"\
+        "movq %%"s2", 16("w",%%"REG_d") \n\t"\
+        "movq %%"s3", 24("w",%%"REG_d") \n\t"
+
+#define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\
+        "movq %%"s0", %%"t0" \n\t"\
+        "movq %%"s1", %%"t1" \n\t"\
+        "movq %%"s2", %%"t2" \n\t"\
+        "movq %%"s3", %%"t3" \n\t"
+
+
+static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
+    x86_reg i = width;
+    while(i & 15)
+    {
+        i--;
+        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+    }
+    i+=i;
+    __asm__ volatile(
+        "jmp 2f                                      \n\t"
+        "1:                                          \n\t"
+
+        snow_vertical_compose_mmx_load("%4","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_add("%6","mm1","mm3","mm5","mm7")
+        "pcmpeqw    %%mm0, %%mm0                     \n\t"
+        "pcmpeqw    %%mm2, %%mm2                     \n\t"
+        "paddw      %%mm2, %%mm2                     \n\t"
+        "paddw      %%mm0, %%mm2                     \n\t"
+        "psllw        $13, %%mm2                     \n\t"
+        snow_vertical_compose_r2r_add("mm0","mm0","mm0","mm0","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_r2r_pmulhw("mm2","mm2","mm2","mm2","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_add("%5","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_store("%5","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_mmx_load("%4","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add("%3","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_r2r_sub("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_store("%4","mm0","mm2","mm4","mm6")
+        "pcmpeqw %%mm7, %%mm7                        \n\t"
+        "pcmpeqw %%mm5, %%mm5                        \n\t"
+        "psllw $15, %%mm7                            \n\t"
+        "psrlw $13, %%mm5                            \n\t"
+        "paddw %%mm7, %%mm5                          \n\t"
+        snow_vertical_compose_r2r_add("mm5","mm5","mm5","mm5","mm0","mm2","mm4","mm6")
+        "movq   (%2,%%"REG_d"), %%mm1         \n\t"
+        "movq  8(%2,%%"REG_d"), %%mm3         \n\t"
+        "paddw %%mm7, %%mm1                          \n\t"
+        "paddw %%mm7, %%mm3                          \n\t"
+        "pavgw %%mm1, %%mm0                          \n\t"
+        "pavgw %%mm3, %%mm2                          \n\t"
+        "movq 16(%2,%%"REG_d"), %%mm1         \n\t"
+        "movq 24(%2,%%"REG_d"), %%mm3         \n\t"
+        "paddw %%mm7, %%mm1                          \n\t"
+        "paddw %%mm7, %%mm3                          \n\t"
+        "pavgw %%mm1, %%mm4                          \n\t"
+        "pavgw %%mm3, %%mm6                          \n\t"
+        snow_vertical_compose_r2r_sub("mm7","mm7","mm7","mm7","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_sra("1","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add("%3","mm0","mm2","mm4","mm6")
+
+        snow_vertical_compose_sra("2","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add("%3","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_store("%3","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add("%1","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
+        snow_vertical_compose_sra("1","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_add("%2","mm0","mm2","mm4","mm6")
+        snow_vertical_compose_mmx_store("%2","mm0","mm2","mm4","mm6")
+
+        "2:                                          \n\t"
+        "sub $32, %%"REG_d"                          \n\t"
+        "jge 1b                                      \n\t"
+        :"+d"(i)
+        :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
+}
+#endif //HAVE_7REGS
+
+#if HAVE_6REGS
+#define snow_inner_add_yblock_sse2_header \
+    IDWTELEM * * dst_array = sb->line + src_y;\
+    x86_reg tmp;\
+    __asm__ volatile(\
+             "mov  %7, %%"REG_c"             \n\t"\
+             "mov  %6, %2                    \n\t"\
+             "mov  %4, %%"REG_S"             \n\t"\
+             "pxor %%xmm7, %%xmm7            \n\t" /* 0 */\
+             "pcmpeqd %%xmm3, %%xmm3         \n\t"\
+             "psllw $15, %%xmm3              \n\t"\
+             "psrlw $12, %%xmm3              \n\t" /* FRAC_BITS >> 1 */\
+             "1:                             \n\t"\
+             "mov %1, %%"REG_D"              \n\t"\
+             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
+             "add %3, %%"REG_D"              \n\t"
+
+#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
+             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+             "movq (%%"REG_d"), %%"out_reg1" \n\t"\
+             "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\
+             "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
+             "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
+             "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
+             "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\
+             "punpcklbw %%xmm7, %%xmm0       \n\t"\
+             "punpcklbw %%xmm7, %%xmm4       \n\t"\
+             "pmullw %%xmm0, %%"out_reg1"    \n\t"\
+             "pmullw %%xmm4, %%"out_reg2"    \n\t"
+
+#define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\
+             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+             "movq (%%"REG_d"), %%"out_reg1" \n\t"\
+             "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\
+             "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
+             "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
+             "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
+             "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\
+             "punpcklbw %%xmm7, %%xmm0       \n\t"\
+             "punpcklbw %%xmm7, %%xmm4       \n\t"\
+             "pmullw %%xmm0, %%"out_reg1"    \n\t"\
+             "pmullw %%xmm4, %%"out_reg2"    \n\t"
+
+#define snow_inner_add_yblock_sse2_accum_8(ptr_offset, s_offset) \
+             snow_inner_add_yblock_sse2_start_8("xmm2", "xmm6", ptr_offset, s_offset)\
+             "paddusw %%xmm2, %%xmm1         \n\t"\
+             "paddusw %%xmm6, %%xmm5         \n\t"
+
+#define snow_inner_add_yblock_sse2_accum_16(ptr_offset, s_offset) \
+             snow_inner_add_yblock_sse2_start_16("xmm2", "xmm6", ptr_offset, s_offset)\
+             "paddusw %%xmm2, %%xmm1         \n\t"\
+             "paddusw %%xmm6, %%xmm5         \n\t"
+
+#define snow_inner_add_yblock_sse2_end_common1\
+             "add $32, %%"REG_S"             \n\t"\
+             "add %%"REG_c", %0              \n\t"\
+             "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
+             "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
+             "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
+             "add %%"REG_c", (%%"REG_a")     \n\t"
+
+#define snow_inner_add_yblock_sse2_end_common2\
+             "jnz 1b                         \n\t"\
+             :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
+             :\
+             "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
+             XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", )\
+             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+
+#define snow_inner_add_yblock_sse2_end_8\
+             "sal $1, %%"REG_c"              \n\t"\
+             "add"OPSIZE" $"PTR_SIZE"*2, %1  \n\t"\
+             snow_inner_add_yblock_sse2_end_common1\
+             "sar $1, %%"REG_c"              \n\t"\
+             "sub $2, %2                     \n\t"\
+             snow_inner_add_yblock_sse2_end_common2
+
+#define snow_inner_add_yblock_sse2_end_16\
+             "add"OPSIZE" $"PTR_SIZE"*1, %1  \n\t"\
+             snow_inner_add_yblock_sse2_end_common1\
+             "dec %2                         \n\t"\
+             snow_inner_add_yblock_sse2_end_common2
+
+static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
+                      int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_sse2_header
+snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0")
+snow_inner_add_yblock_sse2_accum_8("2", "8")
+snow_inner_add_yblock_sse2_accum_8("1", "128")
+snow_inner_add_yblock_sse2_accum_8("0", "136")
+
+             "mov %0, %%"REG_d"              \n\t"
+             "movdqa (%%"REG_D"), %%xmm0     \n\t"
+             "movdqa %%xmm1, %%xmm2          \n\t"
+
+             "punpckhwd %%xmm7, %%xmm1       \n\t"
+             "punpcklwd %%xmm7, %%xmm2       \n\t"
+             "paddd %%xmm2, %%xmm0           \n\t"
+             "movdqa 16(%%"REG_D"), %%xmm2   \n\t"
+             "paddd %%xmm1, %%xmm2           \n\t"
+             "paddd %%xmm3, %%xmm0           \n\t"
+             "paddd %%xmm3, %%xmm2           \n\t"
+
+             "mov %1, %%"REG_D"              \n\t"
+             "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t"
+             "add %3, %%"REG_D"              \n\t"
+
+             "movdqa (%%"REG_D"), %%xmm4     \n\t"
+             "movdqa %%xmm5, %%xmm6          \n\t"
+             "punpckhwd %%xmm7, %%xmm5       \n\t"
+             "punpcklwd %%xmm7, %%xmm6       \n\t"
+             "paddd %%xmm6, %%xmm4           \n\t"
+             "movdqa 16(%%"REG_D"), %%xmm6   \n\t"
+             "paddd %%xmm5, %%xmm6           \n\t"
+             "paddd %%xmm3, %%xmm4           \n\t"
+             "paddd %%xmm3, %%xmm6           \n\t"
+
+             "psrad $8, %%xmm0               \n\t" /* FRAC_BITS. */
+             "psrad $8, %%xmm2               \n\t" /* FRAC_BITS. */
+             "packssdw %%xmm2, %%xmm0        \n\t"
+             "packuswb %%xmm7, %%xmm0        \n\t"
+             "movq %%xmm0, (%%"REG_d")       \n\t"
+
+             "psrad $8, %%xmm4               \n\t" /* FRAC_BITS. */
+             "psrad $8, %%xmm6               \n\t" /* FRAC_BITS. */
+             "packssdw %%xmm6, %%xmm4        \n\t"
+             "packuswb %%xmm7, %%xmm4        \n\t"
+             "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t"
+snow_inner_add_yblock_sse2_end_8
+}
+
+static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
+                      int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_sse2_header
+snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0")
+snow_inner_add_yblock_sse2_accum_16("2", "16")
+snow_inner_add_yblock_sse2_accum_16("1", "512")
+snow_inner_add_yblock_sse2_accum_16("0", "528")
+
+             "mov %0, %%"REG_d"              \n\t"
+             "psrlw $4, %%xmm1               \n\t"
+             "psrlw $4, %%xmm5               \n\t"
+             "paddw   (%%"REG_D"), %%xmm1    \n\t"
+             "paddw 16(%%"REG_D"), %%xmm5    \n\t"
+             "paddw %%xmm3, %%xmm1           \n\t"
+             "paddw %%xmm3, %%xmm5           \n\t"
+             "psraw $4, %%xmm1               \n\t" /* FRAC_BITS. */
+             "psraw $4, %%xmm5               \n\t" /* FRAC_BITS. */
+             "packuswb %%xmm5, %%xmm1        \n\t"
+
+             "movdqu %%xmm1, (%%"REG_d")       \n\t"
+
+snow_inner_add_yblock_sse2_end_16
+}
+
+#define snow_inner_add_yblock_mmx_header \
+    IDWTELEM * * dst_array = sb->line + src_y;\
+    x86_reg tmp;\
+    __asm__ volatile(\
+             "mov  %7, %%"REG_c"             \n\t"\
+             "mov  %6, %2                    \n\t"\
+             "mov  %4, %%"REG_S"             \n\t"\
+             "pxor %%mm7, %%mm7              \n\t" /* 0 */\
+             "pcmpeqd %%mm3, %%mm3           \n\t"\
+             "psllw $15, %%mm3               \n\t"\
+             "psrlw $12, %%mm3               \n\t" /* FRAC_BITS >> 1 */\
+             "1:                             \n\t"\
+             "mov %1, %%"REG_D"              \n\t"\
+             "mov (%%"REG_D"), %%"REG_D"     \n\t"\
+             "add %3, %%"REG_D"              \n\t"
+
+#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
+             "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+             "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\
+             "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\
+             "punpcklbw %%mm7, %%"out_reg1" \n\t"\
+             "punpcklbw %%mm7, %%"out_reg2" \n\t"\
+             "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\
+             "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\
+             "punpcklbw %%mm7, %%mm0       \n\t"\
+             "punpcklbw %%mm7, %%mm4       \n\t"\
+             "pmullw %%mm0, %%"out_reg1"    \n\t"\
+             "pmullw %%mm4, %%"out_reg2"    \n\t"
+
+#define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \
+             snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\
+             "paddusw %%mm2, %%mm1         \n\t"\
+             "paddusw %%mm6, %%mm5         \n\t"
+
+#define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\
+             "mov %0, %%"REG_d"              \n\t"\
+             "psrlw $4, %%mm1                \n\t"\
+             "psrlw $4, %%mm5                \n\t"\
+             "paddw "read_offset"(%%"REG_D"), %%mm1 \n\t"\
+             "paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\
+             "paddw %%mm3, %%mm1             \n\t"\
+             "paddw %%mm3, %%mm5             \n\t"\
+             "psraw $4, %%mm1                \n\t"\
+             "psraw $4, %%mm5                \n\t"\
+             "packuswb %%mm5, %%mm1          \n\t"\
+             "movq %%mm1, "write_offset"(%%"REG_d") \n\t"
+
+#define snow_inner_add_yblock_mmx_end(s_step)\
+             "add $"s_step", %%"REG_S"             \n\t"\
+             "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
+             "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
+             "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
+             "add %%"REG_c", (%%"REG_a")     \n\t"\
+             "add"OPSIZE " $"PTR_SIZE"*1, %1 \n\t"\
+             "add %%"REG_c", %0              \n\t"\
+             "dec %2                         \n\t"\
+             "jnz 1b                         \n\t"\
+             :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
+             :\
+             "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
+             "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+
+static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
+                      int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_mmx_header
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
+snow_inner_add_yblock_mmx_accum("2", "8", "0")
+snow_inner_add_yblock_mmx_accum("1", "128", "0")
+snow_inner_add_yblock_mmx_accum("0", "136", "0")
+snow_inner_add_yblock_mmx_mix("0", "0")
+snow_inner_add_yblock_mmx_end("16")
+}
+
+static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
+                      int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_mmx_header
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
+snow_inner_add_yblock_mmx_accum("2", "16", "0")
+snow_inner_add_yblock_mmx_accum("1", "512", "0")
+snow_inner_add_yblock_mmx_accum("0", "528", "0")
+snow_inner_add_yblock_mmx_mix("0", "0")
+
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "8", "8")
+snow_inner_add_yblock_mmx_accum("2", "24", "8")
+snow_inner_add_yblock_mmx_accum("1", "520", "8")
+snow_inner_add_yblock_mmx_accum("0", "536", "8")
+snow_inner_add_yblock_mmx_mix("16", "8")
+snow_inner_add_yblock_mmx_end("32")
+}
+
+static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+                           int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+
+    if (b_w == 16)
+        inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    else if (b_w == 8 && obmc_stride == 16) {
+        if (!(b_h & 1))
+            inner_add_yblock_bw_8_obmc_16_bh_even_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+        else
+            inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    } else
+         ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+}
+
+static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+                          int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+    if (b_w == 16)
+        inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    else if (b_w == 8 && obmc_stride == 16)
+        inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+    else
+        ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+}
+#endif /* HAVE_6REGS */
+
+#endif /* HAVE_INLINE_ASM */
+
+void ff_dwt_init_x86(SnowDWTContext *c)
+{
+#if HAVE_INLINE_ASM
+    int mm_flags = av_get_cpu_flags();
+
+    if (mm_flags & AV_CPU_FLAG_MMX) {
+        if(mm_flags & AV_CPU_FLAG_SSE2 & 0){
+            c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
+#if HAVE_7REGS
+            c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
+#endif
+#if HAVE_6REGS
+            c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
+#endif
+        }
+        else{
+            if (mm_flags & AV_CPU_FLAG_MMXEXT) {
+            c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
+#if HAVE_7REGS
+            c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
+#endif
+            }
+#if HAVE_6REGS
+            c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
+#endif
+        }
+    }
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/x86/svq1enc.asm b/libavcodec/x86/svq1enc.asm
new file mode 100644
index 0000000..5fb3361
--- /dev/null
+++ b/libavcodec/x86/svq1enc.asm
@@ -0,0 +1,61 @@
+;******************************************************************************
+;* SIMD-optimized SVQ1 encoder functions
+;* Copyright (c) 2007 Loren Merritt
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_TEXT
+
+%macro SSD_INT8_VS_INT16 0
+cglobal ssd_int8_vs_int16, 3, 3, 3, pix1, pix2, size
+    pxor m0, m0
+.loop
+    sub       sizeq, 8
+    movq      m1, [pix1q + sizeq]
+    movu      m2, [pix2q + sizeq*2]
+%if mmsize == 8
+    movq      m3, [pix2q + sizeq*2 + mmsize]
+    punpckhbw m4, m1
+    punpcklbw m1, m1
+    psraw     m4, 8
+    psraw     m1, 8
+    psubw     m3, m4
+    psubw     m2, m1
+    pmaddwd   m3, m3
+    pmaddwd   m2, m2
+    paddd     m0, m3
+    paddd     m0, m2
+%else
+    punpcklbw m1, m1
+    psraw     m1, 8
+    psubw     m2, m1
+    pmaddwd   m2, m2
+    paddd     m0, m2
+%endif
+    jg .loop
+    HADDD     m0, m1
+    movd     eax, m0
+    RET
+%endmacro
+
+INIT_MMX mmx
+SSD_INT8_VS_INT16
+INIT_XMM sse2
+SSD_INT8_VS_INT16
diff --git a/libavcodec/x86/svq1enc.c b/libavcodec/x86/svq1enc.c
deleted file mode 100644
index 02b0a84..0000000
--- a/libavcodec/x86/svq1enc.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "libavutil/attributes.h"
-#include "libavutil/cpu.h"
-#include "libavutil/x86/asm.h"
-#include "libavutil/x86/cpu.h"
-#include "libavcodec/svq1enc.h"
-
-#if HAVE_INLINE_ASM
-
-static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
-                                 int size)
-{
-    int sum;
-    x86_reg i = size;
-
-    __asm__ volatile (
-        "pxor %%mm4, %%mm4 \n"
-        "1: \n"
-        "sub $8, %0 \n"
-        "movq (%2, %0), %%mm2 \n"
-        "movq (%3, %0, 2), %%mm0 \n"
-        "movq 8(%3, %0, 2), %%mm1 \n"
-        "punpckhbw %%mm2, %%mm3 \n"
-        "punpcklbw %%mm2, %%mm2 \n"
-        "psraw $8, %%mm3 \n"
-        "psraw $8, %%mm2 \n"
-        "psubw %%mm3, %%mm1 \n"
-        "psubw %%mm2, %%mm0 \n"
-        "pmaddwd %%mm1, %%mm1 \n"
-        "pmaddwd %%mm0, %%mm0 \n"
-        "paddd %%mm1, %%mm4 \n"
-        "paddd %%mm0, %%mm4 \n"
-        "jg 1b \n"
-        "movq %%mm4, %%mm3 \n"
-        "psrlq $32, %%mm3 \n"
-        "paddd %%mm3, %%mm4 \n"
-        "movd %%mm4, %1 \n"
-        : "+r" (i), "=r" (sum)
-        : "r" (pix1), "r" (pix2));
-
-    return sum;
-}
-
-#endif /* HAVE_INLINE_ASM */
-
-av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
-{
-#if HAVE_INLINE_ASM
-    int cpu_flags = av_get_cpu_flags();
-
-    if (INLINE_MMX(cpu_flags)) {
-        c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
-    }
-#endif /* HAVE_INLINE_ASM */
-}
diff --git a/libavcodec/x86/svq1enc_init.c b/libavcodec/x86/svq1enc_init.c
new file mode 100644
index 0000000..40b4b0e
--- /dev/null
+++ b/libavcodec/x86/svq1enc_init.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2007 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/svq1enc.h"
+
+int ff_ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
+                             intptr_t size);
+int ff_ssd_int8_vs_int16_sse2(const int8_t *pix1, const int16_t *pix2,
+                              intptr_t size);
+
+av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_MMX(cpu_flags)) {
+        c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_mmx;
+    }
+    if (EXTERNAL_SSE2(cpu_flags)) {
+        c->ssd_int8_vs_int16 = ff_ssd_int8_vs_int16_sse2;
+    }
+}
diff --git a/libavcodec/x86/ttadsp.asm b/libavcodec/x86/ttadsp.asm
new file mode 100644
index 0000000..8346cab
--- /dev/null
+++ b/libavcodec/x86/ttadsp.asm
@@ -0,0 +1,119 @@
+;******************************************************************************
+;* TTA DSP SIMD optimizations
+;*
+;* Copyright (C) 2014 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pd_n0113: dd ~0, ~1, ~1, ~3
+pd_1224:  dd 1, 2, 2, 4
+
+SECTION .text
+
+%macro TTA_FILTER 2
+INIT_XMM %1
+cglobal ttafilter_process_dec, 5,5,%2, qm, dx, dl, error, in, shift, round
+    mova       m2, [qmq       ]
+    mova       m3, [qmq + 0x10]
+    mova       m4, [dxq       ]
+    mova       m5, [dxq + 0x10]
+
+    movd       m6, [errorq]         ; if (filter->error < 0) {
+    SPLATD     m6                   ;     for (int i = 0; i < 8; i++)
+    psignd     m0, m4, m6           ;         filter->qm[i] -= filter->dx[i];
+    psignd     m1, m5, m6           ; } else if (filter->error > 0) {
+    paddd      m2, m0               ;     for (int i = 0; i < 8; i++)
+    paddd      m3, m1               ;         filter->qm[i] += filter->dx[i];
+    mova       [qmq       ], m2     ; }
+    mova       [qmq + 0x10], m3     ;
+
+    mova       m0, [dlq       ]
+    mova       m1, [dlq + 0x10]
+
+%if cpuflag(sse4)
+    pmulld     m2, m0
+    pmulld     m3, m1
+%else
+    pshufd     m6, m0, 0xb1
+    pshufd     m7, m2, 0xb1
+    pmuludq    m6, m7
+    pshufd     m6, m6, 0xd8
+    pmuludq    m2, m0
+    pshufd     m2, m2, 0xd8
+    punpckldq  m2, m6
+
+    pshufd     m6, m1, 0xb1
+    pshufd     m7, m3, 0xb1
+    pmuludq    m6, m7
+    pshufd     m6, m6, 0xd8
+    pmuludq    m3, m1
+    pshufd     m3, m3, 0xd8
+    punpckldq  m3, m6
+%endif
+    ; Using horizontal add (phaddd) seems to be slower than shuffling stuff around
+    paddd      m2, m3               ; int sum = filter->round +
+                                    ;           filter->dl[0] * filter->qm[0] +
+    punpckhqdq m3, m2, m2           ;           filter->dl[1] * filter->qm[1] +
+    paddd      m2, m3               ;           filter->dl[2] * filter->qm[2] +
+                                    ;           filter->dl[3] * filter->qm[3] +
+    movd       m6, roundm           ;           filter->dl[4] * filter->qm[4] +
+    paddd      m6, m2               ;           filter->dl[5] * filter->qm[5] +
+    pshufd     m2, m2, 0x1          ;           filter->dl[6] * filter->qm[6] +
+    paddd      m6, m2               ;           filter->dl[7] * filter->qm[7];
+
+    palignr    m5, m4, 4            ; filter->dx[0] = filter->dx[1]; filter->dx[1] = filter->dx[2];
+                                    ; filter->dx[2] = filter->dx[3]; filter->dx[3] = filter->dx[4];
+
+    palignr    m2, m1, m0, 4        ; filter->dl[0] = filter->dl[1]; filter->dl[1] = filter->dl[2];
+                                    ; filter->dl[2] = filter->dl[3]; filter->dl[3] = filter->dl[4];
+
+    psrad      m4, m1, 30           ; filter->dx[4] = ((filter->dl[4] >> 30) | 1);
+    por        m4, [pd_1224 ]       ; filter->dx[5] = ((filter->dl[5] >> 30) | 2) & ~1;
+    pand       m4, [pd_n0113]       ; filter->dx[6] = ((filter->dl[6] >> 30) | 2) & ~1;
+                                    ; filter->dx[7] = ((filter->dl[7] >> 30) | 4) & ~3;
+
+    mova       [dlq       ], m2
+    mova       [dxq       ], m5
+    mova       [dxq + 0x10], m4
+    movd       m0, [inq]            ; filter->error = *in;
+    movd       [errorq], m0         ;
+
+    movd       m2, shiftm           ; *in += (sum >> filter->shift);
+    psrad      m6, m2               ;
+    paddd      m0, m6               ;
+    movd       [inq], m0            ;
+
+    psrldq     m1, 4                ;
+    pslldq     m0, 12               ; filter->dl[4] = -filter->dl[5];
+    pshufd     m0, m0, 0xf0         ; filter->dl[5] = -filter->dl[6];
+    psubd      m0, m1               ; filter->dl[6] = *in - filter->dl[7];
+    psrldq     m1, m0, 4            ; filter->dl[7] = *in;
+    pshufd     m1, m1, 0xf4         ; filter->dl[5] += filter->dl[6];
+    paddd      m0, m1               ; filter->dl[4] += filter->dl[5];
+    psrldq     m1, 4                ;
+    paddd      m0, m1               ;
+    mova       [dlq + 0x10], m0     ;
+    RET
+%endmacro
+
+TTA_FILTER ssse3, 8
+TTA_FILTER sse4,  7
diff --git a/libavcodec/x86/ttadsp_init.c b/libavcodec/x86/ttadsp_init.c
new file mode 100644
index 0000000..47dc87f
--- /dev/null
+++ b/libavcodec/x86/ttadsp_init.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2014 James Almer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/ttadsp.h"
+#include "libavutil/x86/cpu.h"
+#include "config.h"
+
+void ff_ttafilter_process_dec_ssse3(int32_t *qm, int32_t *dx, int32_t *dl,
+                                    int32_t *error, int32_t *in, int32_t shift,
+                                    int32_t round);
+void ff_ttafilter_process_dec_sse4(int32_t *qm, int32_t *dx, int32_t *dl,
+                                   int32_t *error, int32_t *in, int32_t shift,
+                                   int32_t round);
+
+av_cold void ff_ttadsp_init_x86(TTADSPContext *c)
+{
+#if HAVE_YASM
+    int cpu_flags = av_get_cpu_flags();
+
+    if (EXTERNAL_SSSE3(cpu_flags))
+        c->ttafilter_process_dec = ff_ttafilter_process_dec_ssse3;
+    if (EXTERNAL_SSE4(cpu_flags))
+        c->ttafilter_process_dec = ff_ttafilter_process_dec_sse4;
+#endif
+}
diff --git a/libavcodec/x86/v210-init.c b/libavcodec/x86/v210-init.c
new file mode 100644
index 0000000..02c5eaa
--- /dev/null
+++ b/libavcodec/x86/v210-init.c
@@ -0,0 +1,48 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavcodec/v210dec.h"
+
+extern void ff_v210_planar_unpack_unaligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+extern void ff_v210_planar_unpack_unaligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+
+extern void ff_v210_planar_unpack_aligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+extern void ff_v210_planar_unpack_aligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+
+av_cold void v210_x86_init(V210DecContext *s)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+#if HAVE_YASM
+    if (s->aligned_input) {
+        if (cpu_flags & AV_CPU_FLAG_SSSE3)
+            s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3;
+
+        if (HAVE_AVX_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX)
+            s->unpack_frame = ff_v210_planar_unpack_aligned_avx;
+    }
+    else {
+        if (cpu_flags & AV_CPU_FLAG_SSSE3)
+            s->unpack_frame = ff_v210_planar_unpack_unaligned_ssse3;
+
+        if (HAVE_AVX_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX)
+            s->unpack_frame = ff_v210_planar_unpack_unaligned_avx;
+    }
+#endif
+}
diff --git a/libavcodec/x86/v210.asm b/libavcodec/x86/v210.asm
new file mode 100644
index 0000000..6554a43
--- /dev/null
+++ b/libavcodec/x86/v210.asm
@@ -0,0 +1,88 @@
+;******************************************************************************
+;* V210 SIMD unpack
+;* Copyright (c) 2011 Loren Merritt <lorenm@u.washington.edu>
+;* Copyright (c) 2011 Kieran Kunhya <kieran@kunhya.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+v210_mask: times 4 dd 0x3ff
+v210_mult: dw 64,4,64,4,64,4,64,4
+v210_luma_shuf: db 8,9,0,1,2,3,12,13,4,5,6,7,-1,-1,-1,-1
+v210_chroma_shuf: db 0,1,8,9,6,7,-1,-1,2,3,4,5,12,13,-1,-1
+
+SECTION .text
+
+%macro v210_planar_unpack 2
+
+; v210_planar_unpack(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width)
+cglobal v210_planar_unpack_%1_%2, 5, 5, 7
+    movsxdifnidn r4, r4d
+    lea    r1, [r1+2*r4]
+    add    r2, r4
+    add    r3, r4
+    neg    r4
+
+    mova   m3, [v210_mult]
+    mova   m4, [v210_mask]
+    mova   m5, [v210_luma_shuf]
+    mova   m6, [v210_chroma_shuf]
+.loop
+%ifidn %1, unaligned
+    movu   m0, [r0]
+%else
+    mova   m0, [r0]
+%endif
+
+    pmullw m1, m0, m3
+    psrld  m0, 10
+    psrlw  m1, 6  ; u0 v0 y1 y2 v1 u2 y4 y5
+    pand   m0, m4 ; y0 __ u1 __ y3 __ v2 __
+
+    shufps m2, m1, m0, 0x8d ; y1 y2 y4 y5 y0 __ y3 __
+    pshufb m2, m5 ; y0 y1 y2 y3 y4 y5 __ __
+    movu   [r1+2*r4], m2
+
+    shufps m1, m0, 0xd8 ; u0 v0 v1 u2 u1 __ v2 __
+    pshufb m1, m6 ; u0 u1 u2 __ v0 v1 v2 __
+    movq   [r2+r4], m1
+    movhps [r3+r4], m1
+
+    add r0, mmsize
+    add r4, 6
+    jl  .loop
+
+    REP_RET
+%endmacro
+
+INIT_XMM
+v210_planar_unpack unaligned, ssse3
+%if HAVE_AVX_EXTERNAL
+INIT_AVX
+v210_planar_unpack unaligned, avx
+%endif
+
+INIT_XMM
+v210_planar_unpack aligned, ssse3
+%if HAVE_AVX_EXTERNAL
+INIT_AVX
+v210_planar_unpack aligned, avx
+%endif
diff --git a/libavcodec/x86/vc1dsp.asm b/libavcodec/x86/vc1dsp.asm
index adf08d7d..546688c 100644
--- a/libavcodec/x86/vc1dsp.asm
+++ b/libavcodec/x86/vc1dsp.asm
@@ -2,20 +2,20 @@
 ;* VC1 deblocking optimizations
 ;* Copyright (c) 2009 David Conrad
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/vc1dsp.h b/libavcodec/x86/vc1dsp.h
index 9b6c8ad..fdd4de1 100644
--- a/libavcodec/x86/vc1dsp.h
+++ b/libavcodec/x86/vc1dsp.h
@@ -1,20 +1,20 @@
 /*
  * VC-1 and WMV3 decoder - X86 DSP init functions
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c
index aff4b26..2bef5f5 100644
--- a/libavcodec/x86/vc1dsp_init.c
+++ b/libavcodec/x86/vc1dsp_init.c
@@ -27,6 +27,7 @@
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86/cpu.h"
+#include "libavutil/x86/asm.h"
 #include "libavcodec/vc1dsp.h"
 #include "fpel.h"
 #include "vc1dsp.h"
@@ -62,12 +63,17 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
     ff_vc1_h_loop_filter8_sse4(src,          stride, pq);
     ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
 }
-
 static void avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
                                       ptrdiff_t stride, int rnd)
 {
     ff_avg_pixels8_mmxext(dst, src, stride, 8);
 }
+static void avg_vc1_mspel_mc00_16_sse2(uint8_t *dst, const uint8_t *src,
+                                       ptrdiff_t stride, int rnd)
+{
+    ff_avg_pixels16_sse2(dst, src, stride, 16);
+}
+
 #endif /* HAVE_YASM */
 
 void ff_put_vc1_chroma_mc8_nornd_mmx  (uint8_t *dst, uint8_t *src,
@@ -86,10 +92,10 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
 {
     int cpu_flags = av_get_cpu_flags();
 
-    if (INLINE_MMX(cpu_flags))
+    if (HAVE_6REGS && INLINE_MMX(cpu_flags))
         ff_vc1dsp_init_mmx(dsp);
 
-    if (INLINE_MMXEXT(cpu_flags))
+    if (HAVE_6REGS && INLINE_MMXEXT(cpu_flags))
         ff_vc1dsp_init_mmxext(dsp);
 
 #define ASSIGN_LF(EXT) \
@@ -111,13 +117,14 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         ASSIGN_LF(mmxext);
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_vc1_chroma_mc8_nornd_mmxext;
 
-        dsp->avg_vc1_mspel_pixels_tab[0]         = avg_vc1_mspel_mc00_mmxext;
+        dsp->avg_vc1_mspel_pixels_tab[1][0]      = avg_vc1_mspel_mc00_mmxext;
     }
     if (EXTERNAL_SSE2(cpu_flags)) {
         dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
         dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
         dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_sse2;
         dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_sse2;
+        dsp->avg_vc1_mspel_pixels_tab[0][0]      = avg_vc1_mspel_mc00_16_sse2;
     }
     if (EXTERNAL_SSSE3(cpu_flags)) {
         ASSIGN_LF(ssse3);
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index 046affb..77a8e35 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -25,7 +25,6 @@
  */
 
 #include "libavutil/cpu.h"
-#include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
@@ -34,7 +33,7 @@
 #include "fpel.h"
 #include "vc1dsp.h"
 
-#if HAVE_INLINE_ASM
+#if HAVE_6REGS && HAVE_INLINE_ASM
 
 #define OP_PUT(S,D)
 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
@@ -111,6 +110,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
         : "+r"(src), "+r"(dst)
         : "r"(stride), "r"(-2*stride),
           "m"(shift), "m"(rnd), "r"(9*stride-4)
+          NAMED_CONSTRAINTS_ADD(ff_pw_9)
         : "%"REG_c, "memory"
     );
 }
@@ -155,6 +155,7 @@ static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\
         "jnz 1b                            \n\t"\
         : "+r"(h), "+r" (src),  "+r" (dst)\
         : "r"(stride), "m"(rnd)\
+          NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\
         : "memory"\
     );\
 }
@@ -213,6 +214,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
         : "+r"(src),  "+r"(dst)\
         : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
           "g"(stride-offset)\
+          NAMED_CONSTRAINTS_ADD(ff_pw_9)\
         : "%"REG_c, "memory"\
     );\
 }
@@ -315,6 +317,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src,      \
         : "+r"(h), "+r" (src),  "+r" (dst)                              \
         : "r"(src_stride), "r"(3*src_stride),                           \
           "m"(rnd), "m"(shift)                                          \
+          NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_53,ff_pw_18)              \
         : "memory"                                                      \
     );                                                                  \
 }
@@ -352,6 +355,7 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride,    \
         "jnz 1b                    \n\t"                                \
         : "+r"(h), "+r" (src),  "+r" (dst)                              \
         : "r"(stride), "m"(rnd)                                         \
+          NAMED_CONSTRAINTS_ADD(ff_pw_3,ff_pw_18,ff_pw_53,ff_pw_128)    \
         : "memory"                                                      \
     );                                                                  \
 }
@@ -387,6 +391,7 @@ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src,         \
         "jnz 1b                    \n\t"                                \
         : "+r"(h), "+r" (src),  "+r" (dst)                              \
         : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd)             \
+          NAMED_CONSTRAINTS_ADD(ff_pw_53,ff_pw_18,ff_pw_3)              \
         : "memory"                                                      \
     );                                                                  \
 }
@@ -457,6 +462,15 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
 \
     /* Horizontal mode with no vertical mode */\
     vc1_put_shift_8bits[hmode](dst, src, stride, rnd, 1);\
+} \
+static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \
+                                  int stride, int hmode, int vmode, int rnd)\
+{ \
+    OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \
+    OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \
+    dst += 8*stride; src += 8*stride; \
+    OP ## vc1_mspel_mc(dst + 0, src + 0, stride, hmode, vmode, rnd); \
+    OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \
 }
 
 VC1_MSPEL_MC(put_)
@@ -477,6 +491,20 @@ static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst,         \
                                                   int rnd)              \
 {                                                                       \
      avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                     \
+}\
+static void put_vc1_mspel_mc ## a ## b ## _16_mmx(uint8_t *dst,         \
+                                                  const uint8_t *src,   \
+                                                  ptrdiff_t stride,     \
+                                                  int rnd)              \
+{                                                                       \
+     put_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                  \
+}\
+static void avg_vc1_mspel_mc ## a ## b ## _16_mmxext(uint8_t *dst,      \
+                                                     const uint8_t *src,\
+                                                     ptrdiff_t stride,  \
+                                                     int rnd)           \
+{                                                                       \
+     avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                  \
 }
 
 DECLARE_FUNCTION(0, 1)
@@ -700,59 +728,83 @@ static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize,
     );
 }
 
+#if HAVE_MMX_EXTERNAL
 static void put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
                                    ptrdiff_t stride, int rnd)
 {
     ff_put_pixels8_mmx(dst, src, stride, 8);
 }
+static void put_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
+                                      ptrdiff_t stride, int rnd)
+{
+    ff_put_pixels16_mmx(dst, src, stride, 16);
+}
+static void avg_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src,
+                                   ptrdiff_t stride, int rnd)
+{
+    ff_avg_pixels8_mmx(dst, src, stride, 8);
+}
+static void avg_vc1_mspel_mc00_16_mmx(uint8_t *dst, const uint8_t *src,
+                                      ptrdiff_t stride, int rnd)
+{
+    ff_avg_pixels16_mmx(dst, src, stride, 16);
+}
+#endif
+
+#define FN_ASSIGN(OP, X, Y, INSN) \
+    dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
+    dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN
 
 av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 {
-    dsp->put_vc1_mspel_pixels_tab[ 0] = put_vc1_mspel_mc00_mmx;
-    dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
-    dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
-    dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;
-
-    dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;
-    dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;
-    dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;
-    dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;
-
-    dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;
-    dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;
-    dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;
-    dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;
-
-    dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;
-    dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;
-    dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
-    dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
+#if HAVE_MMX_EXTERNAL
+    FN_ASSIGN(put_, 0, 0, _mmx);
+    FN_ASSIGN(avg_, 0, 0, _mmx);
+#endif
+    FN_ASSIGN(put_, 0, 1, _mmx);
+    FN_ASSIGN(put_, 0, 2, _mmx);
+    FN_ASSIGN(put_, 0, 3, _mmx);
+
+    FN_ASSIGN(put_, 1, 0, _mmx);
+    FN_ASSIGN(put_, 1, 1, _mmx);
+    FN_ASSIGN(put_, 1, 2, _mmx);
+    FN_ASSIGN(put_, 1, 3, _mmx);
+
+    FN_ASSIGN(put_, 2, 0, _mmx);
+    FN_ASSIGN(put_, 2, 1, _mmx);
+    FN_ASSIGN(put_, 2, 2, _mmx);
+    FN_ASSIGN(put_, 2, 3, _mmx);
+
+    FN_ASSIGN(put_, 3, 0, _mmx);
+    FN_ASSIGN(put_, 3, 1, _mmx);
+    FN_ASSIGN(put_, 3, 2, _mmx);
+    FN_ASSIGN(put_, 3, 3, _mmx);
 }
 
 av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
 {
-    dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmxext;
+    FN_ASSIGN(avg_, 0, 1, _mmxext);
+    FN_ASSIGN(avg_, 0, 2, _mmxext);
+    FN_ASSIGN(avg_, 0, 3, _mmxext);
 
-    dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmxext;
+    FN_ASSIGN(avg_, 1, 0, _mmxext);
+    FN_ASSIGN(avg_, 1, 1, _mmxext);
+    FN_ASSIGN(avg_, 1, 2, _mmxext);
+    FN_ASSIGN(avg_, 1, 3, _mmxext);
 
-    dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmxext;
+    FN_ASSIGN(avg_, 2, 0, _mmxext);
+    FN_ASSIGN(avg_, 2, 1, _mmxext);
+    FN_ASSIGN(avg_, 2, 2, _mmxext);
+    FN_ASSIGN(avg_, 2, 3, _mmxext);
 
-    dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmxext;
-    dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmxext;
+    FN_ASSIGN(avg_, 3, 0, _mmxext);
+    FN_ASSIGN(avg_, 3, 1, _mmxext);
+    FN_ASSIGN(avg_, 3, 2, _mmxext);
+    FN_ASSIGN(avg_, 3, 3, _mmxext);
 
     dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext;
     dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext;
     dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext;
     dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext;
 }
-#endif /* HAVE_INLINE_ASM */
+#endif /* HAVE_6REGS && HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index 53b9e82..1ac0257 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -2,20 +2,20 @@
 ;* Core video DSP functions
 ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -54,13 +54,13 @@ SECTION .text
 ; |    |    <- bottom is copied from last line in body of source
 ; '----' <- bh
 %if ARCH_X86_64
-cglobal emu_edge_vvar, 7, 8, 1, dst, src, dst_stride, src_stride, \
+cglobal emu_edge_vvar, 7, 8, 1, dst, dst_stride, src, src_stride, \
                                 start_y, end_y, bh, w
 %else ; x86-32
 cglobal emu_edge_vvar, 1, 6, 1, dst, src, start_y, end_y, bh, w
 %define src_strideq r3mp
-%define dst_strideq r2mp
-    mov            srcq, r1mp
+%define dst_strideq r1mp
+    mov            srcq, r2mp
     mov        start_yq, r4mp
     mov          end_yq, r5mp
     mov             bhq, r6mp
@@ -262,30 +262,30 @@ hvar_fn
 %rep 1+%2-%1
 %if %%n <= 3
 %if ARCH_X86_64
-cglobal emu_edge_vfix %+ %%n, 6, 8, 0, dst, src, dst_stride, src_stride, \
+cglobal emu_edge_vfix %+ %%n, 6, 8, 0, dst, dst_stride, src, src_stride, \
                                        start_y, end_y, val, bh
     mov             bhq, r6mp                   ; r6mp = bhmp
 %else ; x86-32
 cglobal emu_edge_vfix %+ %%n, 0, 6, 0, val, dst, src, start_y, end_y, bh
     mov            dstq, r0mp
-    mov            srcq, r1mp
+    mov            srcq, r2mp
     mov        start_yq, r4mp
     mov          end_yq, r5mp
     mov             bhq, r6mp
-%define dst_strideq r2mp
+%define dst_strideq r1mp
 %define src_strideq r3mp
 %endif ; x86-64/32
 %else
 %if ARCH_X86_64
-cglobal emu_edge_vfix %+ %%n, 7, 7, 1, dst, src, dst_stride, src_stride, \
+cglobal emu_edge_vfix %+ %%n, 7, 7, 1, dst, dst_stride, src, src_stride, \
                                        start_y, end_y, bh
 %else ; x86-32
 cglobal emu_edge_vfix %+ %%n, 1, 5, 1, dst, src, start_y, end_y, bh
-    mov            srcq, r1mp
+    mov            srcq, r2mp
     mov        start_yq, r4mp
     mov          end_yq, r5mp
     mov             bhq, r6mp
-%define dst_strideq r2mp
+%define dst_strideq r1mp
 %define src_strideq r3mp
 %endif ; x86-64/32
 %endif
@@ -344,10 +344,6 @@ VERTICAL_EXTEND 16, 22
 ; obviously not the same on both sides.
 
 %macro READ_V_PIXEL 2
-%if %1 == 2
-    movzx          valw, byte %2
-    imul           valw, 0x0101
-%else
     movzx          vald, byte %2
     imul           vald, 0x01010101
 %if %1 >= 8
@@ -356,13 +352,15 @@ VERTICAL_EXTEND 16, 22
     pshufd           m0, m0, q0000
 %else
     punpckldq        m0, m0
-%endif
-%endif ; %1 >= 8
-%endif
+%endif ; mmsize == 16
+%endif ; %1 > 16
 %endmacro ; READ_V_PIXEL
 
 %macro WRITE_V_PIXEL 2
 %assign %%off 0
+
+%if %1 >= 8
+
 %rep %1/mmsize
     movu     [%2+%%off], m0
 %assign %%off %%off+mmsize
@@ -378,27 +376,29 @@ VERTICAL_EXTEND 16, 22
 %assign %%off %%off+8
 %endif
 %endif ; %1-%%off >= 8
-%endif
+%endif ; mmsize == 16
 
 %if %1-%%off >= 4
 %if %1 > 8 && %1-%%off > 4
     movq      [%2+%1-8], m0
 %assign %%off %1
-%elif %1 >= 8 && %1-%%off >= 4
-    movd     [%2+%%off], m0
-%assign %%off %%off+4
 %else
-    mov      [%2+%%off], vald
+    movd     [%2+%%off], m0
 %assign %%off %%off+4
 %endif
 %endif ; %1-%%off >= 4
 
-%if %1-%%off >= 2
-%if %1 >= 8
-    movd      [%2+%1-4], m0
-%else
+%else ; %1 < 8
+
+%rep %1/4
+    mov      [%2+%%off], vald
+%assign %%off %%off+4
+%endrep ; %1/4
+
+%endif ; %1 >=/< 8
+
+%if %1-%%off == 2
     mov      [%2+%%off], valw
-%endif
 %endif ; (%1-%%off)/2
 %endmacro ; WRITE_V_PIXEL
 
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c
index 8ee8370..3218abd 100644
--- a/libavcodec/x86/videodsp_init.c
+++ b/libavcodec/x86/videodsp_init.c
@@ -1,25 +1,27 @@
 /*
+ * Copyright (C) 2002-2012 Michael Niedermayer
  * Copyright (C) 2012 Ronald S. Bultje
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/cpu.h"
 #include "libavutil/mem.h"
@@ -28,11 +30,11 @@
 #include "libavcodec/videodsp.h"
 
 #if HAVE_YASM
-typedef void emu_edge_vfix_func(uint8_t *dst, const uint8_t *src,
-                                x86_reg dst_stride, x86_reg src_stride,
+typedef void emu_edge_vfix_func(uint8_t *dst, x86_reg dst_stride,
+                                const uint8_t *src, x86_reg src_stride,
                                 x86_reg start_y, x86_reg end_y, x86_reg bh);
-typedef void emu_edge_vvar_func(uint8_t *dst, const uint8_t *src,
-                                x86_reg dst_stride, x86_reg src_stride,
+typedef void emu_edge_vvar_func(uint8_t *dst, x86_reg dst_stride,
+                                const uint8_t *src, x86_reg src_stride,
                                 x86_reg start_y, x86_reg end_y, x86_reg bh,
                                 x86_reg w);
 
@@ -141,14 +143,16 @@ static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
     x86_reg start_y, start_x, end_y, end_x, src_y_add = 0, p;
 
     if (!w || !h)
-         return;
+        return;
 
     if (src_y >= h) {
-        src  -= src_y * src_stride;
-        src_y = src_y_add = h - 1;
+        src -= src_y*src_stride;
+        src_y_add = h - 1;
+        src_y     = h - 1;
     } else if (src_y <= -block_h) {
-        src  -= src_y*src_stride;
-        src_y = src_y_add = 1 - block_h;
+        src -= src_y*src_stride;
+        src_y_add = 1 - block_h;
+        src_y     = 1 - block_h;
     }
     if (src_x >= w) {
         src   += w - 1 - src_x;
@@ -162,18 +166,17 @@ static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
     start_x = FFMAX(0, -src_x);
     end_y   = FFMIN(block_h, h-src_y);
     end_x   = FFMIN(block_w, w-src_x);
-    assert(start_x < end_x && block_w > 0);
-    assert(start_y < end_y && block_h > 0);
+    av_assert2(start_x < end_x && block_w > 0);
+    av_assert2(start_y < end_y && block_h > 0);
 
     // fill in the to-be-copied part plus all above/below
     src += (src_y_add + start_y) * src_stride + start_x;
     w = end_x - start_x;
     if (w <= 22) {
-        vfix_tbl[w - 1](dst + start_x, src,
-                        dst_stride, src_stride,
+        vfix_tbl[w - 1](dst + start_x, dst_stride, src, src_stride,
                         start_y, end_y, block_h);
     } else {
-        v_extend_var(dst + start_x, src, dst_stride, src_stride,
+        v_extend_var(dst + start_x, dst_stride, src, src_stride,
                      start_y, end_y, block_h, w);
     }
 
@@ -212,7 +215,7 @@ static av_noinline void emulated_edge_mc_mmx(uint8_t *buf, const uint8_t *src,
                      hfixtbl_mmx, &ff_emu_edge_hvar_mmx);
 }
 
-static av_noinline void emulated_edge_mc_sse(uint8_t * buf,const uint8_t *src,
+static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src,
                                              ptrdiff_t buf_stride,
                                              ptrdiff_t src_stride,
                                              int block_w, int block_h,
@@ -231,8 +234,8 @@ static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
                                               int src_x, int src_y, int w,
                                               int h)
 {
-    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h, src_x,
-                     src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+    emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+                     src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
                      hfixtbl_sse2, &ff_emu_edge_hvar_sse2);
 }
 #endif /* HAVE_YASM */
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm
index c54650e..b25d838 100644
--- a/libavcodec/x86/vorbisdsp.asm
+++ b/libavcodec/x86/vorbisdsp.asm
@@ -2,20 +2,20 @@
 ;* Vorbis x86 optimizations
 ;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c
index bbd8319..bc1cc43 100644
--- a/libavcodec/x86/vorbisdsp_init.c
+++ b/libavcodec/x86/vorbisdsp_init.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index fc8a047..24496ae 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -2,20 +2,20 @@
 ;* MMX/SSE2-optimized functions for the VP3 decoder
 ;* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
index ed38a8e..cc3eba4 100644
--- a/libavcodec/x86/vp3dsp_init.c
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -1,18 +1,20 @@
 /*
- * This file is part of Libav.
+ * Copyright (c) 2009 David Conrad <lessen42@gmail.com>
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -21,6 +23,7 @@
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86/cpu.h"
+#include "libavutil/x86/asm.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/vp3dsp.h"
 #include "config.h"
@@ -39,10 +42,70 @@ void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
 void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride,
                                  int *bounding_values);
 
+#if HAVE_MMX_INLINE
+
+#define MOVQ_BFE(regd)                                  \
+    __asm__ volatile (                                  \
+        "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
+        "paddb   %%"#regd", %%"#regd"   \n\t" ::)
+
+#define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp)   \
+    "movq  "#rega", "#regr"             \n\t"                    \
+    "movq  "#regc", "#regp"             \n\t"                    \
+    "pand  "#regb", "#regr"             \n\t"                    \
+    "pand  "#regd", "#regp"             \n\t"                    \
+    "pxor  "#rega", "#regb"             \n\t"                    \
+    "pxor  "#regc", "#regd"             \n\t"                    \
+    "pand    %%mm6, "#regb"             \n\t"                    \
+    "pand    %%mm6, "#regd"             \n\t"                    \
+    "psrlq      $1, "#regb"             \n\t"                    \
+    "psrlq      $1, "#regd"             \n\t"                    \
+    "paddb "#regb", "#regr"             \n\t"                    \
+    "paddb "#regd", "#regp"             \n\t"
+
+#if HAVE_6REGS
+static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h)
+{
+//    START_TIMER
+    MOVQ_BFE(mm6);
+    __asm__ volatile(
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "movq   (%1,%4), %%mm2          \n\t"
+        "movq   (%2,%4), %%mm3          \n\t"
+        PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "movq   %%mm5, (%3,%4)          \n\t"
+
+        "movq   (%1,%4,2), %%mm0        \n\t"
+        "movq   (%2,%4,2), %%mm1        \n\t"
+        "movq   (%1,%5), %%mm2          \n\t"
+        "movq   (%2,%5), %%mm3          \n\t"
+        "lea    (%1,%4,4), %1           \n\t"
+        "lea    (%2,%4,4), %2           \n\t"
+        PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3,%4,2)        \n\t"
+        "movq   %%mm5, (%3,%5)          \n\t"
+        "lea    (%3,%4,4), %3           \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+r"(h), "+r"(a), "+r"(b), "+r"(dst)
+        :"r"((x86_reg)stride), "r"((x86_reg)3L*stride)
+        :"memory");
+//    STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx")
+}
+#endif /*HAVE_6REGS */
+#endif /* HAVE_MMX_INLINE */
+
 av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
 {
     int cpu_flags = av_get_cpu_flags();
 
+#if HAVE_6REGS && HAVE_MMX_INLINE
+    c->put_no_rnd_pixels_l2 = put_vp_no_rnd_pixels8_l2_mmx;
+#endif /* HAVE_6REGS && HAVE_MMX_INLINE */
+
 #if ARCH_X86_32
     if (EXTERNAL_MMX(cpu_flags)) {
         c->idct_put  = ff_vp3_idct_put_mmx;
diff --git a/libavcodec/x86/vp56_arith.h b/libavcodec/x86/vp56_arith.h
index 0a69368..810cc8d 100644
--- a/libavcodec/x86/vp56_arith.h
+++ b/libavcodec/x86/vp56_arith.h
@@ -4,49 +4,46 @@
  * Copyright (C) 2006  Aurelien Jacobs <aurel@gnuage.org>
  * Copyright (C) 2010  Eli Friedman
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef AVCODEC_X86_VP56_ARITH_H
 #define AVCODEC_X86_VP56_ARITH_H
 
-#if HAVE_INLINE_ASM && HAVE_FAST_CMOV
+#if HAVE_INLINE_ASM && HAVE_FAST_CMOV && HAVE_6REGS
 #define vp56_rac_get_prob vp56_rac_get_prob
 static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
 {
     unsigned int code_word = vp56_rac_renorm(c);
-    unsigned int high = c->high;
-    unsigned int low = 1 + (((high - 1) * prob) >> 8);
+    unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
     unsigned int low_shift = low << 16;
     int bit = 0;
+    c->code_word = code_word;
 
     __asm__(
         "subl  %4, %1      \n\t"
         "subl  %3, %2      \n\t"
-        "leal (%2, %3), %3 \n\t"
         "setae %b0         \n\t"
         "cmovb %4, %1      \n\t"
-        "cmovb %3, %2      \n\t"
-        : "+q"(bit), "+r"(high), "+r"(code_word), "+r"(low_shift)
-        : "r"(low)
+        "cmovb %5, %2      \n\t"
+        : "+q"(bit), "+&r"(c->high), "+&r"(c->code_word)
+        : "r"(low_shift), "r"(low), "r"(code_word)
     );
 
-    c->high      = high;
-    c->code_word = code_word;
     return bit;
 }
 #endif
diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index 80f8ca5..3d874ea 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -3,20 +3,20 @@
 ;* Copyright (C) 2009  Sebastien Lucas <sebastien.lucas@gmail.com>
 ;* Copyright (C) 2009  Zuxy Meng <zuxy.meng@gmail.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/vp6dsp_init.c b/libavcodec/x86/vp6dsp_init.c
index cd94f3e..82baee7 100644
--- a/libavcodec/x86/vp6dsp_init.c
+++ b/libavcodec/x86/vp6dsp_init.c
@@ -3,20 +3,20 @@
  * Copyright (C) 2009  Sebastien Lucas <sebastien.lucas@gmail.com>
  * Copyright (C) 2009  Zuxy Meng <zuxy.meng@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index adc9730..77cc2f3 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -3,20 +3,20 @@
 ;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
 ;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c
index e5afd49..6668f91 100644
--- a/libavcodec/x86/vp8dsp_init.c
+++ b/libavcodec/x86/vp8dsp_init.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
  * Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -347,7 +347,7 @@ av_cold void ff_vp78dsp_init_x86(VP8DSPContext *c)
         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
     }
 
-    if (EXTERNAL_SSE2(cpu_flags) && (cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
+    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
         VP8_LUMA_MC_FUNC(0, 16, sse2);
         VP8_MC_FUNC(1, 8, sse2);
         VP8_BILINEAR_MC_FUNC(0, 16, sse2);
@@ -417,7 +417,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
         c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
     }
 
-    if (EXTERNAL_SSE2(cpu_flags) && (cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
+    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
 
         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
@@ -430,7 +430,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
     if (EXTERNAL_SSE2(cpu_flags)) {
         c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
 
-        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
+        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse2;
 
         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
@@ -455,7 +455,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext *c)
     }
 
     if (EXTERNAL_SSE4(cpu_flags)) {
-        c->vp8_idct_dc_add                  = ff_vp8_idct_dc_add_sse4;
+        c->vp8_idct_dc_add            = ff_vp8_idct_dc_add_sse4;
 
         c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
diff --git a/libavcodec/x86/vp8dsp_loopfilter.asm b/libavcodec/x86/vp8dsp_loopfilter.asm
index 5d792e8..98bb669 100644
--- a/libavcodec/x86/vp8dsp_loopfilter.asm
+++ b/libavcodec/x86/vp8dsp_loopfilter.asm
@@ -3,20 +3,20 @@
 ;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
 ;* Copyright (c) 2010 Fiona Glaser <fiona@x264.com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index ce58c08..b04e678 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -1,102 +1,90 @@
 /*
  * VP9 SIMD optimizations
  *
- * Copyright (c) 2013 Ronald S. Bultje <rsbultje@gmail.com>
+ * Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
-#include "libavutil/internal.h"
 #include "libavutil/mem.h"
 #include "libavutil/x86/asm.h"
 #include "libavutil/x86/cpu.h"
-#include "libavcodec/vp9.h"
+#include "libavcodec/vp9dsp.h"
 
 #if HAVE_YASM
 
-#define fpel_func(avg, sz, opt)                                         \
-void ff_ ## avg ## sz ## _ ## opt(uint8_t *dst, const uint8_t *src,     \
-                                  ptrdiff_t dst_stride,                 \
-                                  ptrdiff_t src_stride,                 \
-                                  int h, int mx, int my)
-
+#define fpel_func(avg, sz, opt) \
+void ff_vp9_##avg##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                              const uint8_t *src, ptrdiff_t src_stride, \
+                              int h, int mx, int my)
 fpel_func(put,  4, mmx);
 fpel_func(put,  8, mmx);
 fpel_func(put, 16, sse);
 fpel_func(put, 32, sse);
 fpel_func(put, 64, sse);
-fpel_func(avg,  4, sse);
-fpel_func(avg,  8, sse);
+fpel_func(avg,  4, mmxext);
+fpel_func(avg,  8, mmxext);
 fpel_func(avg, 16, sse2);
 fpel_func(avg, 32, sse2);
 fpel_func(avg, 64, sse2);
 #undef fpel_func
 
-#define mc_func(avg, sz, dir, opt)                                          \
-void                                                                        \
-ff_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst,         \
-                                                      const uint8_t *src,   \
-                                                      ptrdiff_t dst_stride, \
-                                                      ptrdiff_t src_stride, \
-                                                      int h,                \
-                                                      const int8_t (*filter)[16])
-
-#define mc_funcs(sz)            \
-    mc_func(put, sz, h, ssse3); \
-    mc_func(avg, sz, h, ssse3); \
-    mc_func(put, sz, v, ssse3); \
-    mc_func(avg, sz, v, ssse3)
+#define mc_func(avg, sz, dir, opt) \
+void ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                 const uint8_t *src, ptrdiff_t src_stride, \
+                                                 int h, const int8_t (*filter)[16])
+#define mc_funcs(sz) \
+mc_func(put, sz, h, ssse3); \
+mc_func(avg, sz, h, ssse3); \
+mc_func(put, sz, v, ssse3); \
+mc_func(avg, sz, v, ssse3)
 
 mc_funcs(4);
 mc_funcs(8);
+#if ARCH_X86_64
+mc_funcs(16);
+#endif
 
 #undef mc_funcs
 #undef mc_func
 
-#define mc_rep_func(avg, sz, hsz, dir, opt)                                 \
-static av_always_inline void                                                \
-ff_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst,         \
-                                                      const uint8_t *src,   \
-                                                      ptrdiff_t dst_stride, \
-                                                      ptrdiff_t src_stride, \
-                                                      int h,                \
-                                                      const int8_t (*filter)[16]) \
-{                                                                           \
-    ff_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, src,        \
-                                                           dst_stride,      \
-                                                           src_stride,      \
-                                                           h,               \
-                                                           filter);         \
-    ff_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst + hsz,       \
-                                                           src + hsz,       \
-                                                           dst_stride,      \
-                                                           src_stride,      \
-                                                           h, filter);      \
+#define mc_rep_func(avg, sz, hsz, dir, opt) \
+static av_always_inline void \
+ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride, \
+                                            const uint8_t *src, ptrdiff_t src_stride, \
+                                            int h, const int8_t (*filter)[16]) \
+{ \
+    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##opt(dst,       dst_stride, src, \
+                                                 src_stride, h, filter); \
+    ff_vp9_##avg##_8tap_1d_##dir##_##hsz##_##opt(dst + hsz, dst_stride, src + hsz, \
+                                                 src_stride, h, filter); \
 }
 
-#define mc_rep_funcs(sz, hsz)            \
-    mc_rep_func(put, sz, hsz, h, ssse3); \
-    mc_rep_func(avg, sz, hsz, h, ssse3); \
-    mc_rep_func(put, sz, hsz, v, ssse3); \
-    mc_rep_func(avg, sz, hsz, v, ssse3)
+#define mc_rep_funcs(sz, hsz) \
+mc_rep_func(put, sz, hsz, h, ssse3); \
+mc_rep_func(avg, sz, hsz, h, ssse3); \
+mc_rep_func(put, sz, hsz, v, ssse3); \
+mc_rep_func(avg, sz, hsz, v, ssse3)
 
+#if ARCH_X86_32
 mc_rep_funcs(16, 8);
+#endif
 mc_rep_funcs(32, 16);
 mc_rep_funcs(64, 32);
 
@@ -105,36 +93,29 @@ mc_rep_funcs(64, 32);
 
 extern const int8_t ff_filters_ssse3[3][15][4][16];
 
-#define filter_8tap_2d_fn(op, sz, f, fname)                             \
-static void                                                             \
-op ## _8tap_ ## fname ## _ ## sz ## hv_ssse3(uint8_t *dst,              \
-                                             const uint8_t *src,        \
-                                             ptrdiff_t dst_stride,      \
-                                             ptrdiff_t src_stride,      \
-                                             int h, int mx, int my)     \
-{                                                                       \
-    LOCAL_ALIGNED_16(uint8_t, temp, [71 * 64]);                         \
-    ff_put_8tap_1d_h_ ## sz ## _ssse3(temp, src - 3 * src_stride,       \
-                                      64, src_stride,                   \
-                                      h + 7,                            \
-                                      ff_filters_ssse3[f][mx - 1]);     \
-    ff_ ## op ## _8tap_1d_v_ ## sz ## _ssse3(dst, temp + 3 * 64,        \
-                                             dst_stride, 64,            \
-                                             h,                         \
-                                             ff_filters_ssse3[f][my - 1]); \
+#define filter_8tap_2d_fn(op, sz, f, fname) \
+static void op##_8tap_##fname##_##sz##hv_ssse3(uint8_t *dst, ptrdiff_t dst_stride, \
+                                               const uint8_t *src, ptrdiff_t src_stride, \
+                                               int h, int mx, int my) \
+{ \
+    LOCAL_ALIGNED_16(uint8_t, temp, [71 * 64]); \
+    ff_vp9_put_8tap_1d_h_##sz##_ssse3(temp, 64, src - 3 * src_stride, src_stride, \
+                                      h + 7, ff_filters_ssse3[f][mx - 1]); \
+    ff_vp9_##op##_8tap_1d_v_##sz##_ssse3(dst, dst_stride, temp + 3 * 64, 64, \
+                                         h, ff_filters_ssse3[f][my - 1]); \
 }
 
-#define filters_8tap_2d_fn(op, sz)                          \
-    filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, regular) \
-    filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP, sharp)     \
-    filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH, smooth)
+#define filters_8tap_2d_fn(op, sz) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_REGULAR, regular) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_SHARP,   sharp) \
+filter_8tap_2d_fn(op, sz, FILTER_8TAP_SMOOTH,  smooth)
 
 #define filters_8tap_2d_fn2(op) \
-    filters_8tap_2d_fn(op, 64)  \
-    filters_8tap_2d_fn(op, 32)  \
-    filters_8tap_2d_fn(op, 16)  \
-    filters_8tap_2d_fn(op, 8)   \
-    filters_8tap_2d_fn(op, 4)
+filters_8tap_2d_fn(op, 64) \
+filters_8tap_2d_fn(op, 32) \
+filters_8tap_2d_fn(op, 16) \
+filters_8tap_2d_fn(op, 8) \
+filters_8tap_2d_fn(op, 4)
 
 filters_8tap_2d_fn2(put)
 filters_8tap_2d_fn2(avg)
@@ -143,36 +124,30 @@ filters_8tap_2d_fn2(avg)
 #undef filters_8tap_2d_fn
 #undef filter_8tap_2d_fn
 
-#define filter_8tap_1d_fn(op, sz, f, fname, dir, dvar)                  \
-static void                                                             \
-op ## _8tap_ ## fname ## _ ## sz ## dir ## _ssse3(uint8_t *dst,         \
-                                                  const uint8_t *src,   \
-                                                  ptrdiff_t dst_stride, \
-                                                  ptrdiff_t src_stride, \
-                                                  int h, int mx,        \
-                                                  int my)               \
-{                                                                       \
-    ff_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ssse3(dst, src,        \
-                                                       dst_stride,      \
-                                                       src_stride, h,   \
-                                                       ff_filters_ssse3[f][dvar - 1]); \
+#define filter_8tap_1d_fn(op, sz, f, fname, dir, dvar) \
+static void op##_8tap_##fname##_##sz##dir##_ssse3(uint8_t *dst, ptrdiff_t dst_stride, \
+                                                  const uint8_t *src, ptrdiff_t src_stride, \
+                                                  int h, int mx, int my) \
+{ \
+    ff_vp9_##op##_8tap_1d_##dir##_##sz##_ssse3(dst, dst_stride, src, src_stride, \
+                                               h, ff_filters_ssse3[f][dvar - 1]); \
 }
 
-#define filters_8tap_1d_fn(op, sz, dir, dvar)                          \
-    filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, regular, dir, dvar) \
-    filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP, sharp, dir, dvar)     \
-    filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH, smooth, dir, dvar)
+#define filters_8tap_1d_fn(op, sz, dir, dvar) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_REGULAR, regular, dir, dvar) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_SHARP,   sharp,   dir, dvar) \
+filter_8tap_1d_fn(op, sz, FILTER_8TAP_SMOOTH,  smooth,  dir, dvar)
 
-#define filters_8tap_1d_fn2(op, sz)             \
-    filters_8tap_1d_fn(op, sz, h, mx)           \
-    filters_8tap_1d_fn(op, sz, v, my)
+#define filters_8tap_1d_fn2(op, sz) \
+filters_8tap_1d_fn(op, sz, h, mx) \
+filters_8tap_1d_fn(op, sz, v, my)
 
 #define filters_8tap_1d_fn3(op) \
-    filters_8tap_1d_fn2(op, 64) \
-    filters_8tap_1d_fn2(op, 32) \
-    filters_8tap_1d_fn2(op, 16) \
-    filters_8tap_1d_fn2(op,  8) \
-    filters_8tap_1d_fn2(op,  4)
+filters_8tap_1d_fn2(op, 64) \
+filters_8tap_1d_fn2(op, 32) \
+filters_8tap_1d_fn2(op, 16) \
+filters_8tap_1d_fn2(op, 8) \
+filters_8tap_1d_fn2(op, 4)
 
 filters_8tap_1d_fn3(put)
 filters_8tap_1d_fn3(avg)
@@ -182,6 +157,101 @@ filters_8tap_1d_fn3(avg)
 #undef filters_8tap_1d_fn3
 #undef filter_8tap_1d_fn
 
+#define itxfm_func(typea, typeb, size, opt) \
+void ff_vp9_##typea##_##typeb##_##size##x##size##_add_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                            int16_t *block, int eob)
+#define itxfm_funcs(size, opt) \
+itxfm_func(idct,  idct,  size, opt); \
+itxfm_func(iadst, idct,  size, opt); \
+itxfm_func(idct,  iadst, size, opt); \
+itxfm_func(iadst, iadst, size, opt)
+
+itxfm_funcs(4, ssse3);
+itxfm_funcs(8, ssse3);
+itxfm_funcs(8, avx);
+itxfm_funcs(16, ssse3);
+itxfm_funcs(16, avx);
+itxfm_func(idct, idct, 32, ssse3);
+itxfm_func(idct, idct, 32, avx);
+itxfm_func(iwht, iwht, 4, mmx);
+
+#undef itxfm_func
+#undef itxfm_funcs
+
+#define lpf_funcs(size1, size2, opt) \
+void ff_vp9_loop_filter_v_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                    int E, int I, int H); \
+void ff_vp9_loop_filter_h_##size1##_##size2##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                    int E, int I, int H)
+
+lpf_funcs(16, 16, sse2);
+lpf_funcs(16, 16, ssse3);
+lpf_funcs(16, 16, avx);
+lpf_funcs(44, 16, sse2);
+lpf_funcs(44, 16, ssse3);
+lpf_funcs(44, 16, avx);
+lpf_funcs(84, 16, sse2);
+lpf_funcs(84, 16, ssse3);
+lpf_funcs(84, 16, avx);
+lpf_funcs(48, 16, sse2);
+lpf_funcs(48, 16, ssse3);
+lpf_funcs(48, 16, avx);
+lpf_funcs(88, 16, sse2);
+lpf_funcs(88, 16, ssse3);
+lpf_funcs(88, 16, avx);
+
+#undef lpf_funcs
+
+#define ipred_func(size, type, opt) \
+void ff_vp9_ipred_##type##_##size##x##size##_##opt(uint8_t *dst, ptrdiff_t stride, \
+                                                   const uint8_t *l, const uint8_t *a)
+
+#define ipred_funcs(type, opt) \
+ipred_func(4, type, opt); \
+ipred_func(8, type, opt); \
+ipred_func(16, type, opt); \
+ipred_func(32, type, opt)
+
+ipred_funcs(dc, ssse3);
+ipred_funcs(dc_left, ssse3);
+ipred_funcs(dc_top, ssse3);
+
+#undef ipred_funcs
+
+ipred_func(8, v, mmx);
+ipred_func(16, v, sse2);
+ipred_func(32, v, sse2);
+
+#define ipred_func_set(size, type, opt1, opt2) \
+ipred_func(size, type, opt1); \
+ipred_func(size, type, opt2)
+
+#define ipred_funcs(type, opt1, opt2) \
+ipred_func(4, type, opt1); \
+ipred_func_set(8, type, opt1, opt2); \
+ipred_func_set(16, type, opt1, opt2); \
+ipred_func_set(32, type, opt1, opt2)
+
+ipred_funcs(h, ssse3, avx);
+ipred_funcs(tm, ssse3, avx);
+ipred_funcs(dl, ssse3, avx);
+ipred_funcs(dr, ssse3, avx);
+ipred_funcs(hu, ssse3, avx);
+ipred_funcs(hd, ssse3, avx);
+ipred_funcs(vl, ssse3, avx);
+ipred_funcs(vr, ssse3, avx);
+
+ipred_func(32, dc, avx2);
+ipred_func(32, dc_left, avx2);
+ipred_func(32, dc_top, avx2);
+ipred_func(32, v, avx2);
+ipred_func(32, h, avx2);
+ipred_func(32, tm, avx2);
+
+#undef ipred_funcs
+#undef ipred_func_set
+#undef ipred_func
+
 #endif /* HAVE_YASM */
 
 av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
@@ -189,52 +259,149 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
 #if HAVE_YASM
     int cpu_flags = av_get_cpu_flags();
 
-#define init_fpel(idx1, idx2, sz, type, opt)                            \
-    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] =                    \
-    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] =                    \
-    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] =                    \
-    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_ ## type ## sz ## _ ## opt
-
+#define init_fpel(idx1, idx2, sz, type, opt) \
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][0][0] = \
+    dsp->mc[idx1][FILTER_BILINEAR    ][idx2][0][0] = ff_vp9_##type##sz##_##opt
 
 #define init_subpel1(idx1, idx2, idxh, idxv, sz, dir, type, opt) \
-    dsp->mc[idx1][FILTER_8TAP_SMOOTH][idx2][idxh][idxv]  = type ## _8tap_smooth_  ## sz ## dir ## _ ## opt; \
-    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type ## _8tap_regular_ ## sz ## dir ## _ ## opt; \
-    dsp->mc[idx1][FILTER_8TAP_SHARP][idx2][idxh][idxv]   = type ## _8tap_sharp_   ## sz ## dir ## _ ## opt
+    dsp->mc[idx1][FILTER_8TAP_SMOOTH ][idx2][idxh][idxv] = type##_8tap_smooth_##sz##dir##_##opt; \
+    dsp->mc[idx1][FILTER_8TAP_REGULAR][idx2][idxh][idxv] = type##_8tap_regular_##sz##dir##_##opt; \
+    dsp->mc[idx1][FILTER_8TAP_SHARP  ][idx2][idxh][idxv] = type##_8tap_sharp_##sz##dir##_##opt
 
-#define init_subpel2(idx, idxh, idxv, dir, type, opt)     \
+#define init_subpel2(idx, idxh, idxv, dir, type, opt) \
     init_subpel1(0, idx, idxh, idxv, 64, dir, type, opt); \
     init_subpel1(1, idx, idxh, idxv, 32, dir, type, opt); \
     init_subpel1(2, idx, idxh, idxv, 16, dir, type, opt); \
     init_subpel1(3, idx, idxh, idxv,  8, dir, type, opt); \
     init_subpel1(4, idx, idxh, idxv,  4, dir, type, opt)
 
-#define init_subpel3(idx, type, opt)        \
+#define init_subpel3(idx, type, opt) \
     init_subpel2(idx, 1, 1, hv, type, opt); \
-    init_subpel2(idx, 0, 1,  v, type, opt); \
-    init_subpel2(idx, 1, 0,  h, type, opt)
+    init_subpel2(idx, 0, 1, v, type, opt); \
+    init_subpel2(idx, 1, 0, h, type, opt)
+
+#define init_lpf(opt) do { \
+    if (ARCH_X86_64) { \
+        dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
+        dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
+        dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
+        dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
+        dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
+        dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
+        dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
+        dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
+        dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
+        dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
+    } \
+} while (0)
+
+#define init_ipred(tx, sz, opt) do { \
+    dsp->intra_pred[tx][HOR_PRED]             = ff_vp9_ipred_h_##sz##x##sz##_##opt; \
+    dsp->intra_pred[tx][DIAG_DOWN_LEFT_PRED]  = ff_vp9_ipred_dl_##sz##x##sz##_##opt; \
+    dsp->intra_pred[tx][DIAG_DOWN_RIGHT_PRED] = ff_vp9_ipred_dr_##sz##x##sz##_##opt; \
+    dsp->intra_pred[tx][HOR_DOWN_PRED]        = ff_vp9_ipred_hd_##sz##x##sz##_##opt; \
+    dsp->intra_pred[tx][VERT_LEFT_PRED]       = ff_vp9_ipred_vl_##sz##x##sz##_##opt; \
+    dsp->intra_pred[tx][HOR_UP_PRED]          = ff_vp9_ipred_hu_##sz##x##sz##_##opt; \
+    if (ARCH_X86_64 || tx != TX_32X32) { \
+        dsp->intra_pred[tx][VERT_RIGHT_PRED]      = ff_vp9_ipred_vr_##sz##x##sz##_##opt; \
+        dsp->intra_pred[tx][TM_VP8_PRED]          = ff_vp9_ipred_tm_##sz##x##sz##_##opt; \
+    } \
+} while (0)
+#define init_dc_ipred(tx, sz, opt) do { \
+    init_ipred(tx, sz, opt); \
+    dsp->intra_pred[tx][DC_PRED]              = ff_vp9_ipred_dc_##sz##x##sz##_##opt; \
+    dsp->intra_pred[tx][LEFT_DC_PRED]         = ff_vp9_ipred_dc_left_##sz##x##sz##_##opt; \
+    dsp->intra_pred[tx][TOP_DC_PRED]          = ff_vp9_ipred_dc_top_##sz##x##sz##_##opt; \
+} while (0)
 
     if (EXTERNAL_MMX(cpu_flags)) {
         init_fpel(4, 0,  4, put, mmx);
         init_fpel(3, 0,  8, put, mmx);
+        dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
+        dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
+        dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
+        dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
+        dsp->intra_pred[TX_8X8][VERT_PRED] = ff_vp9_ipred_v_8x8_mmx;
+    }
+
+    if (EXTERNAL_MMXEXT(cpu_flags)) {
+        init_fpel(4, 1,  4, avg, mmxext);
+        init_fpel(3, 1,  8, avg, mmxext);
     }
 
     if (EXTERNAL_SSE(cpu_flags)) {
         init_fpel(2, 0, 16, put, sse);
         init_fpel(1, 0, 32, put, sse);
         init_fpel(0, 0, 64, put, sse);
-        init_fpel(4, 1,  4, avg, sse);
-        init_fpel(3, 1,  8, avg, sse);
     }
 
     if (EXTERNAL_SSE2(cpu_flags)) {
         init_fpel(2, 1, 16, avg, sse2);
         init_fpel(1, 1, 32, avg, sse2);
         init_fpel(0, 1, 64, avg, sse2);
+        init_lpf(sse2);
+        dsp->intra_pred[TX_16X16][VERT_PRED] = ff_vp9_ipred_v_16x16_sse2;
+        dsp->intra_pred[TX_32X32][VERT_PRED] = ff_vp9_ipred_v_32x32_sse2;
     }
 
     if (EXTERNAL_SSSE3(cpu_flags)) {
         init_subpel3(0, put, ssse3);
         init_subpel3(1, avg, ssse3);
+        dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][ADST_DCT]  = ff_vp9_idct_iadst_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][DCT_ADST]  = ff_vp9_iadst_idct_4x4_add_ssse3;
+        dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
+        if (ARCH_X86_64) {
+            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
+            dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_ssse3;
+            dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_ssse3;
+            dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
+            dsp->itxfm_add[TX_16X16][DCT_DCT]   = ff_vp9_idct_idct_16x16_add_ssse3;
+            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_ssse3;
+            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_ssse3;
+            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
+            dsp->itxfm_add[TX_32X32][ADST_ADST] =
+            dsp->itxfm_add[TX_32X32][ADST_DCT] =
+            dsp->itxfm_add[TX_32X32][DCT_ADST] =
+            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
+        }
+        init_lpf(ssse3);
+        init_dc_ipred(TX_4X4,    4, ssse3);
+        init_dc_ipred(TX_8X8,    8, ssse3);
+        init_dc_ipred(TX_16X16, 16, ssse3);
+        init_dc_ipred(TX_32X32, 32, ssse3);
+    }
+
+    if (EXTERNAL_AVX(cpu_flags)) {
+        if (ARCH_X86_64) {
+            dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
+            dsp->itxfm_add[TX_8X8][ADST_DCT]  = ff_vp9_idct_iadst_8x8_add_avx;
+            dsp->itxfm_add[TX_8X8][DCT_ADST]  = ff_vp9_iadst_idct_8x8_add_avx;
+            dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
+            dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
+            dsp->itxfm_add[TX_16X16][ADST_DCT]  = ff_vp9_idct_iadst_16x16_add_avx;
+            dsp->itxfm_add[TX_16X16][DCT_ADST]  = ff_vp9_iadst_idct_16x16_add_avx;
+            dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
+            dsp->itxfm_add[TX_32X32][ADST_ADST] =
+            dsp->itxfm_add[TX_32X32][ADST_DCT] =
+            dsp->itxfm_add[TX_32X32][DCT_ADST] =
+            dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
+        }
+        init_lpf(avx);
+        init_ipred(TX_8X8,    8, avx);
+        init_ipred(TX_16X16, 16, avx);
+        init_ipred(TX_32X32, 32, avx);
+    }
+
+    if (EXTERNAL_AVX2(cpu_flags)) {
+        dsp->intra_pred[TX_32X32][DC_PRED] = ff_vp9_ipred_dc_32x32_avx2;
+        dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_vp9_ipred_dc_left_32x32_avx2;
+        dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_vp9_ipred_dc_top_32x32_avx2;
+        dsp->intra_pred[TX_32X32][VERT_PRED] = ff_vp9_ipred_v_32x32_avx2;
+        dsp->intra_pred[TX_32X32][HOR_PRED] = ff_vp9_ipred_h_32x32_avx2;
+        dsp->intra_pred[TX_32X32][TM_VP8_PRED] = ff_vp9_ipred_tm_32x32_avx2;
     }
 
 #undef init_fpel
diff --git a/libavcodec/x86/vp9intrapred.asm b/libavcodec/x86/vp9intrapred.asm
new file mode 100644
index 0000000..1d8d219
--- /dev/null
+++ b/libavcodec/x86/vp9intrapred.asm
@@ -0,0 +1,1554 @@
+;******************************************************************************
+;* VP9 Intra prediction SIMD optimizations
+;*
+;* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* Parts based on:
+;* H.264 intra prediction asm optimizations
+;* Copyright (c) 2010 Fiona Glaser
+;* Copyright (c) 2010 Holger Lubitz
+;* Copyright (c) 2010 Loren Merritt
+;* Copyright (c) 2010 Ronald S. Bultje
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+
+pw_m256: times 16 dw -256
+pw_m255: times 16 dw -255
+pw_512:  times 8 dw  512
+pw_1024: times 8 dw 1024
+pw_2048: times 8 dw 2048
+pw_4096: times 8 dw 4096
+pw_8192: times 8 dw 8192
+
+pb_4x3_4x2_4x1_4x0: times 4 db 3
+                    times 4 db 2
+                    times 4 db 1
+                    times 4 db 0
+pb_8x1_8x0:   times 8 db 1
+              times 8 db 0
+pb_8x3_8x2:   times 8 db 3
+              times 8 db 2
+pb_0to5_2x7:  db 0, 1, 2, 3, 4, 5, 7, 7
+              times 8 db -1
+pb_0to6_9x7:  db 0, 1, 2, 3, 4, 5, 6
+              times 9 db 7
+pb_1to6_10x7: db 1, 2, 3, 4, 5, 6
+              times 10 db 7
+pb_2to6_3x7:
+pb_2to6_11x7: db 2, 3, 4, 5, 6
+              times 11 db 7
+pb_1toE_2xF:  db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15
+pb_2toE_3xF:  db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15
+pb_13456_3xm1: db 1, 3, 4, 5, 6
+               times 3 db -1
+pb_6012_4xm1: db 6, 0, 1, 2
+              times 4 db -1
+pb_6xm1_246_8toE: times 6 db -1
+                  db 2, 4, 6, 8, 9, 10, 11, 12, 13, 14
+pb_6xm1_BDF_0to6: times 6 db -1
+                  db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6
+pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
+pb_7to1_9x0:  db 7, 6, 5, 4
+pb_3to1_5x0:  db 3, 2, 1
+              times 9 db 0
+pb_Fto0:      db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+pb_2:  times 32 db 2
+pb_15: times 16 db 15
+
+cextern pb_1
+cextern pb_3
+
+SECTION .text
+
+; dc_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
+
+INIT_MMX ssse3
+cglobal vp9_ipred_dc_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+    pmulhrsw                m0, [pw_4096]
+    pshufb                  m0, m1
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    lea                   dstq, [dstq+strideq*2]
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    RET
+
+INIT_MMX ssse3
+cglobal vp9_ipred_dc_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [lq]
+    movq                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    pmulhrsw                m0, [pw_2048]
+    pshufb                  m0, m2
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM ssse3
+cglobal vp9_ipred_dc_16x16, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    movhlps                 m1, m0
+    paddw                   m0, m1
+    pmulhrsw                m0, [pw_1024]
+    pshufb                  m0, m2
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM ssse3
+cglobal vp9_ipred_dc_32x32, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [lq+16]
+    mova                    m2, [aq]
+    mova                    m3, [aq+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m4, m4
+    psadbw                  m0, m4
+    psadbw                  m1, m4
+    psadbw                  m2, m4
+    psadbw                  m3, m4
+    paddw                   m0, m1
+    paddw                   m2, m3
+    paddw                   m0, m2
+    movhlps                 m1, m0
+    paddw                   m0, m1
+    pmulhrsw                m0, [pw_512]
+    pshufb                  m0, m4
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_dc_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    vextracti128           xm1, m0, 1
+    paddw                  xm0, xm1
+    movhlps                xm1, xm0
+    paddw                  xm0, xm1
+    pmulhrsw               xm0, [pw_512]
+    vpbroadcastb            m0, xm0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+
+; dc_top/left_NxN(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
+
+%macro DC_1D_FUNCS 2 ; dir (top or left), arg (a or l)
+INIT_MMX ssse3
+cglobal vp9_ipred_dc_%1_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [%2q]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+    pmulhrsw                m0, [pw_8192]
+    pshufb                  m0, m1
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    lea                   dstq, [dstq+strideq*2]
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*1], m0
+    RET
+
+INIT_MMX ssse3
+cglobal vp9_ipred_dc_%1_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pxor                    m1, m1
+    psadbw                  m0, m1
+    pmulhrsw                m0, [pw_4096]
+    pshufb                  m0, m1
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM ssse3
+cglobal vp9_ipred_dc_%1_16x16, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    movhlps                 m1, m0
+    paddw                   m0, m1
+    pmulhrsw                m0, [pw_2048]
+    pshufb                  m0, m2
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM ssse3
+cglobal vp9_ipred_dc_%1_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    mova                    m1, [%2q+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    psadbw                  m1, m2
+    paddw                   m0, m1
+    movhlps                 m1, m0
+    paddw                   m0, m1
+    pmulhrsw                m0, [pw_1024]
+    pshufb                  m0, m2
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m0
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m0
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_dc_%1_32x32, 4, 4, 3, dst, stride, l, a
+    mova                    m0, [%2q]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pxor                    m2, m2
+    psadbw                  m0, m2
+    vextracti128           xm1, m0, 1
+    paddw                  xm0, xm1
+    movhlps                xm1, xm0
+    paddw                  xm0, xm1
+    pmulhrsw               xm0, [pw_1024]
+    vpbroadcastb            m0, xm0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+%endmacro
+
+DC_1D_FUNCS top,  a
+DC_1D_FUNCS left, l
+
+; v
+
+INIT_MMX mmx
+cglobal vp9_ipred_v_8x8, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+strideq*2], m0
+    movq      [dstq+stride3q ], m0
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_v_16x16, 4, 4, 1, dst, stride, l, a
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM sse2
+cglobal vp9_ipred_v_32x32, 4, 4, 2, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m1, [aq+16]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 8
+.loop:
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m1
+    mova   [dstq+strideq*1+ 0], m0
+    mova   [dstq+strideq*1+16], m1
+    mova   [dstq+strideq*2+ 0], m0
+    mova   [dstq+strideq*2+16], m1
+    mova   [dstq+stride3q + 0], m0
+    mova   [dstq+stride3q +16], m1
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_v_32x32, 4, 4, 1, dst, stride, l, a
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+strideq*2], m0
+    mova      [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+
+; h
+
+INIT_XMM ssse3
+cglobal vp9_ipred_h_4x4, 3, 4, 1, dst, stride, l, stride3
+    movd                    m0, [lq]
+    pshufb                  m0, [pb_4x3_4x2_4x1_4x0]
+    lea               stride3q, [strideq*3]
+    movd      [dstq+strideq*0], m0
+    psrldq                  m0, 4
+    movd      [dstq+strideq*1], m0
+    psrldq                  m0, 4
+    movd      [dstq+strideq*2], m0
+    psrldq                  m0, 4
+    movd      [dstq+stride3q ], m0
+    RET
+
+%macro H_XMM_FUNCS 1
+INIT_XMM %1
+cglobal vp9_ipred_h_8x8, 3, 5, 4, dst, stride, l, stride3, cnt
+    mova                    m2, [pb_8x1_8x0]
+    mova                    m3, [pb_8x3_8x2]
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 1
+.loop:
+    movd                    m0, [lq+cntq*4]
+    pshufb                  m1, m0, m3
+    pshufb                  m0, m2
+    movq      [dstq+strideq*0], m1
+    movhps    [dstq+strideq*1], m1
+    movq      [dstq+strideq*2], m0
+    movhps    [dstq+stride3q ], m0
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_h_16x16, 3, 5, 8, dst, stride, l, stride3, cnt
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 3
+.loop:
+    movd                    m3, [lq+cntq*4]
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_h_32x32, 3, 5, 8, dst, stride, l, stride3, cnt
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 7
+.loop:
+    movd                    m3, [lq+cntq*4]
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+    mova   [dstq+strideq*0+ 0], m0
+    mova   [dstq+strideq*0+16], m0
+    mova   [dstq+strideq*1+ 0], m1
+    mova   [dstq+strideq*1+16], m1
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+    mova   [dstq+strideq*2+ 0], m2
+    mova   [dstq+strideq*2+16], m2
+    mova   [dstq+stride3q + 0], m3
+    mova   [dstq+stride3q +16], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+%endmacro
+
+H_XMM_FUNCS ssse3
+H_XMM_FUNCS avx
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_h_32x32, 3, 5, 8, dst, stride, l, stride3, cnt
+    mova                    m5, [pb_1]
+    mova                    m6, [pb_2]
+    mova                    m7, [pb_3]
+    pxor                    m4, m4
+    lea               stride3q, [strideq*3]
+    mov                   cntq, 7
+.loop:
+    movd                   xm3, [lq+cntq*4]
+    vinserti128             m3, m3, xm3, 1
+    pshufb                  m0, m3, m7
+    pshufb                  m1, m3, m6
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m1
+    pshufb                  m2, m3, m5
+    pshufb                  m3, m4
+    mova      [dstq+strideq*2], m2
+    mova      [dstq+stride3q ], m3
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntq
+    jge .loop
+    RET
+%endif
+
+; tm
+
+INIT_MMX ssse3
+cglobal vp9_ipred_tm_4x4, 4, 4, 0, dst, stride, l, a
+    pxor                    m1, m1
+    pinsrw                  m2, [aq-1], 0
+    movd                    m0, [aq]
+    DEFINE_ARGS dst, stride, l, cnt
+    mova                    m3, [pw_m256]
+    mova                    m4, [pw_m255]
+    pshufb                  m2, m3
+    punpcklbw               m0, m1
+    psubw                   m0, m2
+    mov                   cntq, 1
+.loop:
+    pinsrw                  m2, [lq+cntq*2], 0
+    pshufb                  m1, m2, m4
+    pshufb                  m2, m3
+    paddw                   m1, m0
+    paddw                   m2, m0
+    packuswb                m1, m1
+    packuswb                m2, m2
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+
+%macro TM_XMM_FUNCS 1
+INIT_XMM %1
+cglobal vp9_ipred_tm_8x8, 4, 4, 5, dst, stride, l, a
+    pxor                    m1, m1
+    pinsrw                  m2, [aq-1], 0
+    movh                    m0, [aq]
+    DEFINE_ARGS dst, stride, l, cnt
+    mova                    m3, [pw_m256]
+    mova                    m4, [pw_m255]
+    pshufb                  m2, m3
+    punpcklbw               m0, m1
+    psubw                   m0, m2
+    mov                   cntq, 3
+.loop:
+    pinsrw                  m2, [lq+cntq*2], 0
+    pshufb                  m1, m2, m4
+    pshufb                  m2, m3
+    paddw                   m1, m0
+    paddw                   m2, m0
+    packuswb                m1, m2
+    movh      [dstq+strideq*0], m1
+    movhps    [dstq+strideq*1], m1
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_tm_16x16, 4, 4, 8, dst, stride, l, a
+    pxor                    m3, m3
+    pinsrw                  m2, [aq-1], 0
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, l, cnt
+    mova                    m4, [pw_m256]
+    mova                    m5, [pw_m255]
+    pshufb                  m2, m4
+    punpckhbw               m1, m0, m3
+    punpcklbw               m0, m3
+    psubw                   m1, m2
+    psubw                   m0, m2
+    mov                   cntq, 7
+.loop:
+    pinsrw                  m7, [lq+cntq*2], 0
+    pshufb                  m3, m7, m5
+    pshufb                  m7, m4
+    paddw                   m2, m3, m0
+    paddw                   m3, m1
+    paddw                   m6, m7, m0
+    paddw                   m7, m1
+    packuswb                m2, m3
+    packuswb                m6, m7
+    mova      [dstq+strideq*0], m2
+    mova      [dstq+strideq*1], m6
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+
+%if ARCH_X86_64
+INIT_XMM %1
+cglobal vp9_ipred_tm_32x32, 4, 4, 14, dst, stride, l, a
+    pxor                    m5, m5
+    pinsrw                  m4, [aq-1], 0
+    mova                    m0, [aq]
+    mova                    m2, [aq+16]
+    DEFINE_ARGS dst, stride, l, cnt
+    mova                    m8, [pw_m256]
+    mova                    m9, [pw_m255]
+    pshufb                  m4, m8
+    punpckhbw               m1, m0,  m5
+    punpckhbw               m3, m2,  m5
+    punpcklbw               m0, m5
+    punpcklbw               m2, m5
+    psubw                   m1, m4
+    psubw                   m0, m4
+    psubw                   m3, m4
+    psubw                   m2, m4
+    mov                   cntq, 15
+.loop:
+    pinsrw                 m13, [lq+cntq*2], 0
+    pshufb                  m7, m13, m9
+    pshufb                 m13, m8
+    paddw                   m4, m7,  m0
+    paddw                   m5, m7,  m1
+    paddw                   m6, m7,  m2
+    paddw                   m7, m3
+    paddw                  m10, m13, m0
+    paddw                  m11, m13, m1
+    paddw                  m12, m13, m2
+    paddw                  m13, m3
+    packuswb                m4, m5
+    packuswb                m6, m7
+    packuswb               m10, m11
+    packuswb               m12, m13
+    mova   [dstq+strideq*0+ 0], m4
+    mova   [dstq+strideq*0+16], m6
+    mova   [dstq+strideq*1+ 0], m10
+    mova   [dstq+strideq*1+16], m12
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+%endif
+%endmacro
+
+TM_XMM_FUNCS ssse3
+TM_XMM_FUNCS avx
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+cglobal vp9_ipred_tm_32x32, 4, 4, 8, dst, stride, l, a
+    pxor                    m3, m3
+    pinsrw                 xm2, [aq-1], 0
+    vinserti128             m2, m2, xm2, 1
+    mova                    m0, [aq]
+    DEFINE_ARGS dst, stride, l, cnt
+    mova                    m4, [pw_m256]
+    mova                    m5, [pw_m255]
+    pshufb                  m2, m4
+    punpckhbw               m1, m0, m3
+    punpcklbw               m0, m3
+    psubw                   m1, m2
+    psubw                   m0, m2
+    mov                   cntq, 15
+.loop:
+    pinsrw                 xm7, [lq+cntq*2], 0
+    vinserti128             m7, m7, xm7, 1
+    pshufb                  m3, m7, m5
+    pshufb                  m7, m4
+    paddw                   m2, m3, m0
+    paddw                   m3, m1
+    paddw                   m6, m7, m0
+    paddw                   m7, m1
+    packuswb                m2, m3
+    packuswb                m6, m7
+    mova      [dstq+strideq*0], m2
+    mova      [dstq+strideq*1], m6
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntq
+    jge .loop
+    RET
+%endif
+
+; dl
+
+%macro LOWPASS 4 ; left [dst], center, right, tmp
+    pxor                   m%4, m%1, m%3
+    pand                   m%4, [pb_1]
+    pavgb                  m%1, m%3
+    psubusb                m%1, m%4
+    pavgb                  m%1, m%2
+%endmacro
+
+INIT_MMX ssse3
+cglobal vp9_ipred_dl_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m1, [aq]
+    pshufb                  m0, m1, [pb_0to5_2x7]
+    pshufb                  m2, m1, [pb_2to6_3x7]
+    psrlq                   m1, 8
+    LOWPASS                  0, 1, 2, 3
+
+    pshufw                  m1, m0, q3321
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*2], m1
+    psrlq                   m0, 8
+    psrlq                   m1, 8
+    add                   dstq, strideq
+    movd      [dstq+strideq*0], m0
+    movd      [dstq+strideq*2], m1
+    RET
+
+%macro DL_XMM_FUNCS 1
+INIT_XMM %1
+cglobal vp9_ipred_dl_8x8, 4, 4, 4, dst, stride, stride5, a
+    movq                    m0, [aq]
+    lea               stride5q, [strideq*5]
+    pshufb                  m1, m0, [pb_1to6_10x7]
+    psrldq                  m2, m1, 1
+    shufps                  m0, m1, q3210
+    LOWPASS                  0, 1, 2, 3
+
+    pshufd                  m1, m0, q3321
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*4], m1
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+stride5q ], m1
+    lea                   dstq, [dstq+strideq*2]
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*0], m0
+    movq      [dstq+strideq*4], m1
+    psrldq                  m0, 1
+    psrldq                  m1, 1
+    movq      [dstq+strideq*1], m0
+    movq      [dstq+stride5q ], m1
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_dl_16x16, 4, 4, 6, dst, stride, l, a
+    mova                    m5, [pb_1toE_2xF]
+    mova                    m0, [aq]
+    pshufb                  m1, m0, m5
+    pshufb                  m2, m1, m5
+    pshufb                  m4, m0, [pb_15]
+    LOWPASS                  0, 1, 2, 3
+    DEFINE_ARGS dst, stride, cnt, stride9
+    lea               stride9q, [strideq*3]
+    mov                   cntd, 4
+    lea               stride9q, [stride9q*3]
+
+.loop:
+    movhlps                 m4, m0
+    mova      [dstq+strideq*0], m0
+    pshufb                  m0, m5
+    mova      [dstq+strideq*8], m4
+    movhlps                 m4, m0
+    mova      [dstq+strideq*1], m0
+    pshufb                  m0, m5
+    mova      [dstq+stride9q ], m4
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_dl_32x32, 4, 5, 8, dst, stride, cnt, a, dst16
+    mova                    m5, [pb_1toE_2xF]
+    mova                    m0, [aq]
+    mova                    m1, [aq+16]
+    palignr                 m2, m1, m0, 1
+    palignr                 m3, m1, m0, 2
+    LOWPASS                  0, 2, 3, 4
+    pshufb                  m2, m1, m5
+    pshufb                  m3, m2, m5
+    pshufb                  m6, m1, [pb_15]
+    LOWPASS                  1, 2, 3, 4
+    mova                    m7, m6
+    lea                 dst16q, [dstq  +strideq*8]
+    mov                   cntd, 8
+    lea                 dst16q, [dst16q+strideq*8]
+.loop:
+    movhlps                 m7, m1
+    mova [dstq  +strideq*0+ 0], m0
+    mova [dstq  +strideq*0+16], m1
+    movhps [dstq+strideq*8+ 0], m0
+    movq [dstq  +strideq*8+ 8], m1
+    mova [dstq  +strideq*8+16], m7
+    mova [dst16q+strideq*0+ 0], m1
+    mova [dst16q+strideq*0+16], m6
+    mova [dst16q+strideq*8+ 0], m7
+    mova [dst16q+strideq*8+16], m6
+%if cpuflag(avx)
+    vpalignr                m0, m1, m0, 1
+    pshufb                  m1, m5
+%else
+    palignr                 m2, m1, m0, 1
+    pshufb                  m1, m5
+    mova                    m0, m2
+%endif
+    add                   dstq, strideq
+    add                 dst16q, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+DL_XMM_FUNCS ssse3
+DL_XMM_FUNCS avx
+
+; dr
+
+INIT_MMX ssse3
+cglobal vp9_ipred_dr_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq-1]
+    movd                    m1, [aq+3]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    palignr                 m1, m0, 1
+    psrlq                   m2, m1, 8
+    LOWPASS                  0, 1, 2, 3
+
+    movd      [dstq+stride3q ], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*2], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*1], m0
+    psrlq                   m0, 8
+    movd      [dstq+strideq*0], m0
+    RET
+
+%macro DR_XMM_FUNCS 1
+INIT_XMM %1
+cglobal vp9_ipred_dr_8x8, 4, 4, 4, dst, stride, l, a
+    movq                    m1, [lq]
+    movhps                  m1, [aq-1]
+    movd                    m2, [aq+7]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pslldq                  m0, m1, 1
+    palignr                 m2, m1, 1
+    LOWPASS                  0, 1, 2, 3
+
+    movhps    [dstq+strideq*0], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*1], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*2], m0
+    pslldq                  m0, 1
+    movhps    [dstq+stride3q ], m0
+    pslldq                  m0, 1
+    lea                   dstq, [dstq+strideq*4]
+    movhps    [dstq+strideq*0], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*1], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*2], m0
+    pslldq                  m0, 1
+    movhps    [dstq+stride3q ], m0
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_dr_16x16, 4, 4, 6, dst, stride, l, a
+    mova                    m1, [lq]
+    movu                    m2, [aq-1]
+    movd                    m4, [aq+15]
+    DEFINE_ARGS dst, stride, stride9, cnt
+    lea               stride9q, [strideq *3]
+    mov                   cntd, 4
+    lea               stride9q, [stride9q*3]
+    palignr                 m4, m2, 1
+    palignr                 m3, m2, m1, 15
+    LOWPASS                  3,  2, 4, 5
+    pslldq                  m0, m1, 1
+    palignr                 m2, m1, 1
+    LOWPASS                  0,  1, 2, 4
+
+.loop:
+    mova    [dstq+strideq*0  ], m3
+    movhps  [dstq+strideq*8+0], m0
+    movq    [dstq+strideq*8+8], m3
+    palignr                 m3, m0, 15
+    pslldq                  m0, 1
+    mova    [dstq+strideq*1  ], m3
+    movhps  [dstq+stride9q +0], m0
+    movq    [dstq+stride9q +8], m3
+    palignr                 m3, m0, 15
+    pslldq                  m0, 1
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_dr_32x32, 4, 4, 8, dst, stride, l, a
+    mova                    m1, [lq]
+    mova                    m2, [lq+16]
+    movu                    m3, [aq-1]
+    movu                    m4, [aq+15]
+    movd                    m5, [aq+31]
+    DEFINE_ARGS dst, stride, stride8, cnt
+    lea               stride8q, [strideq*8]
+    palignr                 m5, m4, 1
+    palignr                 m6, m4, m3, 15
+    LOWPASS                  5,  4,  6,  7
+    palignr                 m4, m3, 1
+    palignr                 m6, m3, m2, 15
+    LOWPASS                  4,  3,  6,  7
+    palignr                 m3, m2, 1
+    palignr                 m6, m2, m1, 15
+    LOWPASS                  3,  2,  6,  7
+    palignr                 m2, m1, 1
+    pslldq                  m0, m1, 1
+    LOWPASS                  2,  1,  0,  6
+    mov                   cntd, 16
+
+    ; out=m2/m3/m4/m5
+.loop:
+    mova  [dstq+stride8q*0+ 0], m4
+    mova  [dstq+stride8q*0+16], m5
+    mova  [dstq+stride8q*2+ 0], m3
+    mova  [dstq+stride8q*2+16], m4
+    palignr                 m5, m4, 15
+    palignr                 m4, m3, 15
+    palignr                 m3, m2, 15
+    pslldq                  m2, 1
+    add                   dstq, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+DR_XMM_FUNCS ssse3
+DR_XMM_FUNCS avx
+
+; vl
+
+INIT_MMX ssse3
+cglobal vp9_ipred_vl_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m0, [aq]
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    lea                   dstq, [dstq+strideq*2]
+    psrlq                   m1, 8
+    psrlq                   m2, 8
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    RET
+
+%macro VL_XMM_FUNCS 1
+INIT_XMM %1
+cglobal vp9_ipred_vl_8x8, 4, 4, 4, dst, stride, l, a
+    movq                    m0, [aq]
+    pshufb                  m0, [pb_0to6_9x7]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m0, 2
+    LOWPASS                  2,  1,  0,  3
+    pavgb                   m1, m0
+
+    movq      [dstq+strideq*0], m1
+    movq      [dstq+strideq*1], m2
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*2], m1
+    movq      [dstq+stride3q ], m2
+    lea                   dstq, [dstq+strideq*4]
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*0], m1
+    movq      [dstq+strideq*1], m2
+    psrldq                  m1, 1
+    psrldq                  m2, 1
+    movq      [dstq+strideq*2], m1
+    movq      [dstq+stride3q ], m2
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_vl_16x16, 4, 4, 5, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m4, [pb_1toE_2xF]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    pshufb                  m1, m0, m4
+    pshufb                  m2, m1, m4
+    LOWPASS                  2,  1,  0, 3
+    pavgb                   m1, m0
+    mov                   cntd, 4
+.loop:
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*1], m2
+    pshufb                  m1, m4
+    pshufb                  m2, m4
+    mova      [dstq+strideq*2], m1
+    mova      [dstq+stride3q ], m2
+    pshufb                  m1, m4
+    pshufb                  m2, m4
+    lea                   dstq, [dstq+strideq*4]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_vl_32x32, 4, 4, 7, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m5, [aq+16]
+    mova                    m4, [pb_1toE_2xF]
+    DEFINE_ARGS dst, stride, dst16, cnt
+    palignr                 m2, m5, m0, 1
+    palignr                 m3, m5, m0, 2
+    lea                 dst16q, [dstq  +strideq*8]
+    LOWPASS                  3,  2,  0, 6
+    pavgb                   m2, m0
+    pshufb                  m0, m5, m4
+    pshufb                  m1, m0, m4
+    lea                 dst16q, [dst16q+strideq*8]
+    LOWPASS                  1,  0,  5, 6
+    pavgb                   m0, m5
+    pshufb                  m5, [pb_15]
+    mov                   cntd, 8
+
+.loop:
+%macro %%write 3
+    mova    [dstq+stride%1+ 0], %2
+    mova    [dstq+stride%1+16], %3
+    movhps  [dst16q+stride%1 ], %2
+    movu  [dst16q+stride%1+ 8], %3
+    movq  [dst16q+stride%1+24], m5
+%if cpuflag(avx)
+    palignr                 %2, %3, %2, 1
+    pshufb                  %3, m4
+%else
+    palignr                 m6, %3, %2, 1
+    pshufb                  %3, m4
+    mova                    %2, m6
+%endif
+%endmacro
+
+    %%write                q*0, m2, m0
+    %%write                q*1, m3, m1
+    lea                   dstq, [dstq  +strideq*2]
+    lea                 dst16q, [dst16q+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+VL_XMM_FUNCS ssse3
+VL_XMM_FUNCS avx
+
+; vr
+
+INIT_MMX ssse3
+cglobal vp9_ipred_vr_4x4, 4, 4, 0, dst, stride, l, a
+    movq                    m1, [aq-1]
+    punpckldq               m2, [lq]
+    movd                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pavgb                   m0, m1
+    palignr                 m1, m2, 5
+    psrlq                   m2, m1, 8
+    psllq                   m3, m1, 8
+    LOWPASS                  2,  1, 3, 4
+
+    ; ABCD <- for the following predictor:
+    ; EFGH
+    ; IABC  | m0 contains ABCDxxxx
+    ; JEFG  | m2 contains xJIEFGHx
+
+    punpckldq               m0, m2
+    pshufb                  m2, [pb_13456_3xm1]
+    movd      [dstq+strideq*0], m0
+    pshufb                  m0, [pb_6012_4xm1]
+    movd      [dstq+stride3q ], m2
+    psrlq                   m2, 8
+    movd      [dstq+strideq*2], m0
+    movd      [dstq+strideq*1], m2
+    RET
+
+%macro VR_XMM_FUNCS 1
+INIT_XMM %1
+cglobal vp9_ipred_vr_8x8, 4, 4, 5, dst, stride, l, a
+    movu                    m1, [aq-1]
+    movhps                  m2, [lq]
+    movq                    m0, [aq]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    pavgb                   m0, m1
+    palignr                 m1, m2, 9
+    pslldq                  m2, m1, 1
+    pslldq                  m3, m1, 2
+    LOWPASS                  1,  2, 3, 4
+
+    ; ABCDEFGH <- for the following predictor:
+    ; IJKLMNOP
+    ; QABCDEFG  | m0 contains ABCDEFGHxxxxxxxx
+    ; RIJKLMNO  | m1 contains xxVUTSRQIJKLMNOP
+    ; SQABCDEF
+    ; TRIJKLMN
+    ; USQABCDE
+    ; VTRIJKLM
+
+    punpcklqdq              m0, m1 ; ABCDEFGHxxVUTSRQ
+    movq      [dstq+strideq*0], m0
+    pshufb                  m0, [pb_6xm1_BDF_0to6] ; xxxxxxUSQABCDEFG
+    movhps    [dstq+strideq*1], m1
+    pshufb                  m1, [pb_6xm1_246_8toE] ; xxxxxxVTRIJKLMNO
+    movhps    [dstq+strideq*2], m0
+    pslldq                  m0, 1
+    movhps    [dstq+stride3q ], m1
+    lea                   dstq, [dstq+strideq*4]
+    pslldq                  m1, 1
+    movhps    [dstq+strideq*0], m0
+    pslldq                  m0, 1
+    movhps    [dstq+strideq*1], m1
+    pslldq                  m1, 1
+    movhps    [dstq+strideq*2], m0
+    movhps    [dstq+stride3q ], m1
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_vr_16x16, 4, 4, 6, dst, stride, l, a
+    mova                    m0, [aq]
+    movu                    m1, [aq-1]
+    mova                    m2, [lq]
+    DEFINE_ARGS dst, stride, stride3, cnt
+    lea               stride3q, [strideq*3]
+    palignr                 m3, m1, m2, 15
+    LOWPASS                  3,  1,  0,  4
+    pavgb                   m0, m1
+    palignr                 m1, m2,  1
+    pslldq                  m4, m2,  1
+    LOWPASS                  1,  2,  4,  5
+    pshufb                  m1, [pb_02468ACE_13579BDF]
+    mov                   cntd, 4
+
+.loop:
+    movlhps                 m2, m1
+    mova      [dstq+strideq*0], m0
+    mova      [dstq+strideq*1], m3
+    palignr                 m4, m0, m1, 15
+    palignr                 m5, m3, m2, 15
+    mova      [dstq+strideq*2], m4
+    mova      [dstq+stride3q ], m5
+    lea                   dstq, [dstq+strideq*4]
+    palignr                 m0, m1, 14
+    palignr                 m3, m2, 14
+    pslldq                  m1, 2
+    dec                   cntd
+    jg .loop
+    RET
+
+%if ARCH_X86_64
+INIT_XMM %1
+cglobal vp9_ipred_vr_32x32, 4, 4, 9, dst, stride, l, a
+    mova                    m0, [aq]
+    mova                    m2, [aq+16]
+    movu                    m1, [aq-1]
+    palignr                 m3, m2, m0, 15
+    palignr                 m4, m2, m0, 14
+    LOWPASS                  4,  3,  2,  5
+    pavgb                   m3, m2
+    mova                    m2, [lq+16]
+    palignr                 m5, m1, m2, 15
+    LOWPASS                  5,  1,  0,  6
+    pavgb                   m0, m1
+    mova                    m6, [lq]
+    palignr                 m1, m2,  1
+    palignr                 m7, m2, m6, 15
+    LOWPASS                  1,  2,  7,  8
+    palignr                 m2, m6,  1
+    pslldq                  m7, m6,  1
+    LOWPASS                  2,  6,  7,  8
+    pshufb                  m1, [pb_02468ACE_13579BDF]
+    pshufb                  m2, [pb_02468ACE_13579BDF]
+    DEFINE_ARGS dst, stride, dst16, cnt
+    lea                 dst16q, [dstq  +strideq*8]
+    lea                 dst16q, [dst16q+strideq*8]
+    SBUTTERFLY             qdq,  2,  1,  6
+    mov                   cntd, 8
+
+.loop:
+    ; even lines (0, 2, 4, ...): m1 | m0, m3
+    ;  odd lines (1, 3, 5, ...): m2 | m5, m4
+%macro %%write 4
+    mova    [dstq+stride%1+ 0], %3
+    mova    [dstq+stride%1+16], %4
+    movhps  [dst16q+stride%1 ], %2
+    movu  [dst16q+stride%1+ 8], %3
+    movq  [dst16q+stride%1+24], %4
+    palignr                 %4, %3, 15
+    palignr                 %3, %2, 15
+    pslldq                  %2,  1
+%endmacro
+
+    %%write                q*0, m1, m0, m3
+    %%write                q*1, m2, m5, m4
+    lea                   dstq, [dstq  +strideq*2]
+    lea                 dst16q, [dst16q+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+%endif
+%endmacro
+
+VR_XMM_FUNCS ssse3
+VR_XMM_FUNCS avx
+
+; hd
+
+INIT_MMX ssse3
+cglobal vp9_ipred_hd_4x4, 4, 4, 0, dst, stride, l, a
+    movd                    m0, [lq]
+    punpckldq               m0, [aq-1]
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0,  3
+    pavgb                   m1, m0
+
+    ; DHIJ <- for the following predictor:
+    ; CGDH
+    ; BFCG  | m1 contains ABCDxxxx
+    ; AEBF  | m2 contains EFGHIJxx
+
+    punpcklbw               m1, m2
+    punpckhdq               m0, m1, m2
+
+    ; m1 contains AEBFCGDH
+    ; m0 contains CGDHIJxx
+
+    movd      [dstq+stride3q ], m1
+    movd      [dstq+strideq*1], m0
+    psrlq                   m1, 16
+    psrlq                   m0, 16
+    movd      [dstq+strideq*2], m1
+    movd      [dstq+strideq*0], m0
+    RET
+
+%macro HD_XMM_FUNCS 1
+INIT_XMM %1
+cglobal vp9_ipred_hd_8x8, 4, 4, 4, dst, stride, l, a
+    movq                    m0, [lq]
+    movhps                  m0, [aq-1]
+    DEFINE_ARGS dst, stride, stride3, dst4
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m1, 1
+    LOWPASS                  2,  1,  0,  3
+    pavgb                   m1, m0
+
+    ; HPQRSTUV <- for the following predictor
+    ; GOHPQRST
+    ; FNGOHPQR  | m1 contains ABCDEFGHxxxxxxxx
+    ; EMFNGOHP  | m2 contains IJKLMNOPQRSTUVxx
+    ; DLEMFNGO
+    ; CKDLEMFN
+    ; BJCKDLEM
+    ; AIBJCKDL
+
+    punpcklbw               m1, m2
+    movhlps                 m2, m2
+
+    ; m1 contains AIBJCKDLEMFNGOHP
+    ; m2 contains QRSTUVxxxxxxxxxx
+
+    movhps   [dstq +stride3q ], m1
+    movq     [dst4q+stride3q ], m1
+    palignr                 m3, m2, m1, 2
+    movhps   [dstq +strideq*2], m3
+    movq     [dst4q+strideq*2], m3
+    palignr                 m3, m2, m1, 4
+    movhps   [dstq +strideq*1], m3
+    movq     [dst4q+strideq*1], m3
+    palignr                 m2, m1, 6
+    movhps   [dstq +strideq*0], m2
+    movq     [dst4q+strideq*0], m2
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_hd_16x16, 4, 6, 7, dst, stride, l, a
+    mova                    m0, [lq]
+    movu                    m3, [aq-1]
+    DEFINE_ARGS dst, stride, stride4, dst4, dst8, dst12
+    lea               stride4q, [strideq*4]
+    lea                  dst4q, [dstq +stride4q]
+    lea                  dst8q, [dst4q+stride4q]
+    lea                 dst12q, [dst8q+stride4q]
+    psrldq                  m4, m3,  1
+    psrldq                  m5, m3,  2
+    LOWPASS                  5,  4,  3,  6
+    palignr                 m1, m3, m0,  1
+    palignr                 m2, m3, m0,  2
+    LOWPASS                  2,  1,  0,  6
+    pavgb                   m1, m0
+    SBUTTERFLY              bw,  1,  2,  6
+
+    ; I PROBABLY INVERTED L0 ad L16 here
+    ; m1, m2, m5
+.loop:
+    sub               stride4q, strideq
+    movhps [dstq +stride4q +0], m2
+    movq   [dstq +stride4q +8], m5
+    mova   [dst4q+stride4q   ], m2
+    movhps [dst8q+stride4q +0], m1
+    movq   [dst8q+stride4q +8], m2
+    mova  [dst12q+stride4q   ], m1
+%if cpuflag(avx)
+    palignr                 m1, m2, m1, 2
+    palignr                 m2, m5, m2, 2
+%else
+    palignr                 m3, m2, m1, 2
+    palignr                 m0, m5, m2, 2
+    mova                    m1, m3
+    mova                    m2, m0
+%endif
+    psrldq                  m5, 2
+    jg .loop
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_hd_32x32, 4, 6, 8, dst, stride, l, a
+    mova                    m0, [lq]
+    mova                    m1, [lq+16]
+    movu                    m2, [aq-1]
+    movu                    m3, [aq+15]
+    DEFINE_ARGS dst, stride, stride8, dst8, dst16, dst24
+    lea               stride8q, [strideq*8]
+    lea                  dst8q, [dstq  +stride8q]
+    lea                 dst16q, [dst8q +stride8q]
+    lea                 dst24q, [dst16q+stride8q]
+    psrldq                  m4, m3,  1
+    psrldq                  m5, m3,  2
+    LOWPASS                  5,  4,  3,  6
+    palignr                 m4, m3, m2,  2
+    palignr                 m3, m2,  1
+    LOWPASS                  4,  3,  2,  6
+    palignr                 m3, m2, m1,  2
+    palignr                 m2, m1,  1
+    LOWPASS                  3,  2,  1,  6
+    pavgb                   m2, m1
+    palignr                 m6, m1, m0,  1
+    palignr                 m1, m0,  2
+    LOWPASS                  1,  6,  0,  7
+    pavgb                   m0, m1
+    SBUTTERFLY              bw,  2,  3,  6
+    SBUTTERFLY              bw,  0,  1,  6
+
+    ; m0, m1, m2, m3, m4, m5
+.loop:
+    sub               stride8q, strideq
+    mova  [dstq  +stride8q+ 0], m3
+    mova  [dstq  +stride8q+16], m4
+    mova  [dst8q +stride8q+ 0], m2
+    mova  [dst8q +stride8q+16], m3
+    mova  [dst16q+stride8q+ 0], m1
+    mova  [dst16q+stride8q+16], m2
+    mova  [dst24q+stride8q+ 0], m0
+    mova  [dst24q+stride8q+16], m1
+%if cpuflag(avx)
+    palignr                 m0, m1, m0, 2
+    palignr                 m1, m2, m1, 2
+    palignr                 m2, m3, m2, 2
+    palignr                 m3, m4, m3, 2
+    palignr                 m4, m5, m4, 2
+    psrldq                  m5, 2
+%else
+    psrldq                  m6, m5, 2
+    palignr                 m5, m4, 2
+    palignr                 m4, m3, 2
+    palignr                 m3, m2, 2
+    palignr                 m2, m1, 2
+    palignr                 m1, m0, 2
+    mova                    m0, m1
+    mova                    m1, m2
+    mova                    m2, m3
+    mova                    m3, m4
+    mova                    m4, m5
+    mova                    m5, m6
+%endif
+    jg .loop
+    RET
+%endmacro
+
+HD_XMM_FUNCS ssse3
+HD_XMM_FUNCS avx
+
+INIT_MMX ssse3
+cglobal vp9_ipred_hu_4x4, 3, 3, 0, dst, stride, l
+    movd                    m0, [lq]
+    pshufb                  m0, [pb_3to1_5x0]
+    psrlq                   m1, m0, 8
+    psrlq                   m2, m1, 8
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride3
+    lea               stride3q, [strideq*3]
+    SBUTTERFLY              bw,  1, 2, 0
+    palignr                 m2, m1, 2
+    movd      [dstq+strideq*0], m1
+    movd      [dstq+strideq*1], m2
+    punpckhdq               m1, m1
+    punpckhdq               m2, m2
+    movd      [dstq+strideq*2], m1
+    movd      [dstq+stride3q ], m2
+    RET
+
+%macro HU_XMM_FUNCS 1
+INIT_XMM %1
+cglobal vp9_ipred_hu_8x8, 3, 4, 4, dst, stride, l
+    movq                    m0, [lq]
+    pshufb                  m0, [pb_7to1_9x0]
+    psrldq                  m1, m0, 1
+    psrldq                  m2, m1, 1
+    LOWPASS                  2,  1, 0, 3
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride3, dst4
+    lea               stride3q, [strideq*3]
+    lea                  dst4q, [dstq+strideq*4]
+    SBUTTERFLY              bw,  1, 2, 0
+    movq     [dstq +strideq*0], m1
+    movhps   [dst4q+strideq*0], m1
+    palignr                 m0, m2, m1, 2
+    movq     [dstq +strideq*1], m0
+    movhps   [dst4q+strideq*1], m0
+    palignr                 m0, m2, m1, 4
+    movq     [dstq +strideq*2], m0
+    movhps   [dst4q+strideq*2], m0
+    palignr                 m2, m1, 6
+    movq     [dstq +stride3q ], m2
+    movhps   [dst4q+stride3q ], m2
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_hu_16x16, 3, 4, 5, dst, stride, l
+    mova                    m0, [lq]
+    pshufb                  m0, [pb_Fto0]
+    mova                    m3, [pb_2toE_3xF]
+    pshufb                  m1, m0, [pb_1toE_2xF]
+    pshufb                  m2, m0, m3
+    LOWPASS                  2,  1,  0,  4
+    pavgb                   m1, m0
+    DEFINE_ARGS dst, stride, stride9, cnt
+    lea                stride9q, [strideq *3]
+    mov                   cntd,  4
+    lea                stride9q, [stride9q*3]
+    SBUTTERFLY              bw,  1,  2,  0
+
+.loop:
+    mova      [dstq+strideq*0], m1
+    mova      [dstq+strideq*8], m2
+    palignr                 m0, m2, m1, 2
+    pshufb                  m2, m3
+    mova      [dstq+strideq*1], m0
+    mova      [dstq+stride9q ], m2
+    palignr                 m1, m2, m0, 2
+    pshufb                  m2, m3
+    lea                   dstq, [dstq+strideq*2]
+    dec                   cntd
+    jg .loop
+    RET
+
+INIT_XMM %1
+cglobal vp9_ipred_hu_32x32, 3, 7, 7, dst, stride, l
+    mova                    m0, [lq]
+    mova                    m1, [lq+16]
+    mova                    m2, [pb_Fto0]
+    mova                    m4, [pb_2toE_3xF]
+    pshufb                  m0, m2
+    pshufb                  m1, m2
+    palignr                 m2, m0, m1,  1
+    palignr                 m3, m0, m1,  2
+    LOWPASS                  3,  2,  1,  5
+    pavgb                   m2, m1
+    pshufb                  m1, m0, m4
+    pshufb                  m5, m0, [pb_1toE_2xF]
+    LOWPASS                  1,  5,  0,  6
+    pavgb                   m0, m5
+    DEFINE_ARGS dst, stride, cnt, stride0, dst8, dst16, dst24
+    mov                   cntd,  8
+    xor               stride0q, stride0q
+    lea                  dst8q, [dstq  +strideq*8]
+    lea                 dst16q, [dst8q +strideq*8]
+    lea                 dst24q, [dst16q+strideq*8]
+    SBUTTERFLY              bw,  0,  1,  5
+    SBUTTERFLY              bw,  2,  3,  5
+    pshufb                  m6, m1, [pb_15]
+
+.loop:
+    mova  [dstq  +stride0q+ 0], m2
+    mova  [dstq  +stride0q+16], m3
+    mova  [dst8q +stride0q+ 0], m3
+    mova  [dst8q +stride0q+16], m0
+    mova  [dst16q+stride0q+ 0], m0
+    mova  [dst16q+stride0q+16], m1
+    mova  [dst24q+stride0q+ 0], m1
+    mova  [dst24q+stride0q+16], m6
+%if cpuflag(avx)
+    palignr                 m2, m3, m2, 2
+    palignr                 m3, m0, m3, 2
+    palignr                 m0, m1, m0, 2
+    pshufb                  m1, m4
+%else
+    pshufb                  m5, m1, m4
+    palignr                 m1, m0, 2
+    palignr                 m0, m3, 2
+    palignr                 m3, m2, 2
+    mova                    m2, m3
+    mova                    m3, m0
+    mova                    m0, m1
+    mova                    m1, m5
+%endif
+    add               stride0q, strideq
+    dec                   cntd
+    jg .loop
+    RET
+%endmacro
+
+HU_XMM_FUNCS ssse3
+HU_XMM_FUNCS avx
+
+; FIXME 127, 128, 129 ?
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
new file mode 100644
index 0000000..8087c2e
--- /dev/null
+++ b/libavcodec/x86/vp9itxfm.asm
@@ -0,0 +1,1669 @@
+;******************************************************************************
+;* VP9 IDCT SIMD optimizations
+;*
+;* Copyright (C) 2013 Clément Bœsch <u pkh me>
+;* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+pw_11585x2:  times 8 dw 23170
+pw_m11585x2: times 8 dw -23170
+
+%macro VP9_IDCT_COEFFS 2-3 0
+pw_%1x2:    times 8 dw  %1*2
+pw_m%1x2:   times 8 dw -%1*2
+pw_%2x2:    times 8 dw  %2*2
+pw_m%2x2:   times 8 dw -%2*2
+pw_m%1_%2:  times 4 dw -%1,  %2
+pw_%2_%1:   times 4 dw  %2,  %1
+pw_m%2_m%1: times 4 dw -%2, -%1
+%if %3 == 1
+pw_m%2_%1:  times 4 dw -%2,  %1
+pw_%1_%2:   times 4 dw  %1,  %2
+%endif
+%endmacro
+
+VP9_IDCT_COEFFS 15137,  6270, 1
+VP9_IDCT_COEFFS 16069,  3196, 1
+VP9_IDCT_COEFFS  9102, 13623, 1
+VP9_IDCT_COEFFS 16305,  1606
+VP9_IDCT_COEFFS 10394, 12665
+VP9_IDCT_COEFFS 14449,  7723
+VP9_IDCT_COEFFS  4756, 15679
+VP9_IDCT_COEFFS 16364,   804
+VP9_IDCT_COEFFS 11003, 12140
+VP9_IDCT_COEFFS 14811,  7005
+VP9_IDCT_COEFFS  5520, 15426
+VP9_IDCT_COEFFS 15893,  3981
+VP9_IDCT_COEFFS  8423, 14053
+VP9_IDCT_COEFFS 13160,  9760
+VP9_IDCT_COEFFS  2404, 16207
+
+pw_5283_13377: times 4 dw 5283, 13377
+pw_9929_13377: times 4 dw 9929, 13377
+pw_15212_m13377: times 4 dw 15212, -13377
+pw_15212_9929: times 4 dw 15212, 9929
+pw_m5283_m15212: times 4 dw -5283, -15212
+pw_13377x2: times 8 dw 13377*2
+
+pd_8192: times 4 dd 8192
+pw_2048: times 8 dw 2048
+pw_1024: times 8 dw 1024
+pw_512:  times 8 dw 512
+pw_m1:   times 8 dw -1
+
+SECTION .text
+
+; (a*x + b*y + round) >> shift
+%macro VP9_MULSUB_2W_2X 5 ; dst1, dst2/src, round, coefs1, coefs2
+    pmaddwd            m%1, m%2, %4
+    pmaddwd            m%2,  %5
+    paddd              m%1,  %3
+    paddd              m%2,  %3
+    psrad              m%1,  14
+    psrad              m%2,  14
+%endmacro
+
+%macro VP9_MULSUB_2W_4X 7 ; dst1, dst2, coef1, coef2, rnd, tmp1/src, tmp2
+    VP9_MULSUB_2W_2X    %7,  %6,  %5, [pw_m%3_%4], [pw_%4_%3]
+    VP9_MULSUB_2W_2X    %1,  %2,  %5, [pw_m%3_%4], [pw_%4_%3]
+    packssdw           m%1, m%7
+    packssdw           m%2, m%6
+%endmacro
+
+%macro VP9_UNPACK_MULSUB_2W_4X 7-9 ; dst1, dst2, (src1, src2,) coef1, coef2, rnd, tmp1, tmp2
+%if %0 == 7
+    punpckhwd          m%6, m%2, m%1
+    punpcklwd          m%2, m%1
+    VP9_MULSUB_2W_4X   %1, %2, %3, %4, %5, %6, %7
+%else
+    punpckhwd          m%8, m%4, m%3
+    punpcklwd          m%2, m%4, m%3
+    VP9_MULSUB_2W_4X   %1, %2, %5, %6, %7, %8, %9
+%endif
+%endmacro
+
+%macro VP9_UNPACK_MULSUB_2D_4X 6 ; dst1 [src1], dst2 [src2], dst3, dst4, mul1, mul2
+    punpckhwd          m%4, m%2, m%1
+    punpcklwd          m%2, m%1
+    pmaddwd            m%3, m%4, [pw_m%5_%6]
+    pmaddwd            m%4, [pw_%6_%5]
+    pmaddwd            m%1, m%2, [pw_m%5_%6]
+    pmaddwd            m%2, [pw_%6_%5]
+%endmacro
+
+%macro VP9_RND_SH_SUMSUB_BA 6 ; dst1 [src1], dst2 [src2], src3, src4, tmp, round
+    SUMSUB_BA            d, %1, %2, %5
+    SUMSUB_BA            d, %3, %4, %5
+    paddd              m%1, %6
+    paddd              m%2, %6
+    paddd              m%3, %6
+    paddd              m%4, %6
+    psrad              m%1, 14
+    psrad              m%2, 14
+    psrad              m%3, 14
+    psrad              m%4, 14
+    packssdw           m%1, m%3
+    packssdw           m%2, m%4
+%endmacro
+
+%macro VP9_STORE_2X 5-6 dstq ; reg1, reg2, tmp1, tmp2, zero, dst
+    movh               m%3, [%6]
+    movh               m%4, [%6+strideq]
+    punpcklbw          m%3, m%5
+    punpcklbw          m%4, m%5
+    paddw              m%3, m%1
+    paddw              m%4, m%2
+    packuswb           m%3, m%5
+    packuswb           m%4, m%5
+    movh              [%6], m%3
+    movh      [%6+strideq], m%4
+%endmacro
+
+%macro ZERO_BLOCK 4 ; mem, stride, nnzcpl, zero_reg
+%assign %%y 0
+%rep %3
+%assign %%x 0
+%rep %3*2/mmsize
+    mova      [%1+%%y+%%x], %4
+%assign %%x (%%x+mmsize)
+%endrep
+%assign %%y (%%y+%2)
+%endrep
+%endmacro
+
+;-------------------------------------------------------------------------------------------
+; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro VP9_IWHT4_1D 0
+    SWAP                 1, 2, 3
+    paddw               m0, m2
+    psubw               m3, m1
+    psubw               m4, m0, m3
+    psraw               m4, 1
+    psubw               m5, m4, m1
+    SWAP                 5, 1
+    psubw               m4, m2
+    SWAP                 4, 2
+    psubw               m0, m1
+    paddw               m3, m2
+    SWAP                 3, 2, 1
+%endmacro
+
+INIT_MMX mmx
+cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
+    mova                m0, [blockq+0*8]
+    mova                m1, [blockq+1*8]
+    mova                m2, [blockq+2*8]
+    mova                m3, [blockq+3*8]
+    psraw               m0, 2
+    psraw               m1, 2
+    psraw               m2, 2
+    psraw               m3, 2
+
+    VP9_IWHT4_1D
+    TRANSPOSE4x4W        0, 1, 2, 3, 4
+    VP9_IWHT4_1D
+
+    pxor                m4, m4
+    VP9_STORE_2X         0, 1, 5, 6, 4
+    lea               dstq, [dstq+strideq*2]
+    VP9_STORE_2X         2, 3, 5, 6, 4
+    ZERO_BLOCK      blockq, 8, 4, m4
+    RET
+
+;-------------------------------------------------------------------------------------------
+; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro VP9_IDCT4_1D_FINALIZE 0
+    SUMSUB_BA            w, 3, 2, 4                         ; m3=t3+t0, m2=-t3+t0
+    SUMSUB_BA            w, 1, 0, 4                         ; m1=t2+t1, m0=-t2+t1
+    SWAP                 0, 3, 2                            ; 3102 -> 0123
+%endmacro
+
+%macro VP9_IDCT4_1D 0
+    SUMSUB_BA            w, 2, 0, 4                         ; m2=IN(0)+IN(2) m0=IN(0)-IN(2)
+    pmulhrsw            m2, m6                              ; m2=t0
+    pmulhrsw            m0, m6                              ; m0=t1
+    VP9_UNPACK_MULSUB_2W_4X 1, 3, 15137, 6270, m7, 4, 5     ; m1=t2, m3=t3
+    VP9_IDCT4_1D_FINALIZE
+%endmacro
+
+; 2x2 top left corner
+%macro VP9_IDCT4_2x2_1D 0
+    pmulhrsw            m0, m5                              ; m0=t1
+    mova                m2, m0                              ; m2=t0
+    mova                m3, m1
+    pmulhrsw            m1, m6                              ; m1=t2
+    pmulhrsw            m3, m7                              ; m3=t3
+    VP9_IDCT4_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT4_WRITEOUT 0
+    mova                m5, [pw_2048]
+    pmulhrsw            m0, m5              ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+    pmulhrsw            m1, m5
+    VP9_STORE_2X         0,  1,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    pmulhrsw            m2, m5
+    pmulhrsw            m3, m5
+    VP9_STORE_2X         2,  3,  6,  7,  4
+%endmacro
+
+INIT_MMX ssse3
+cglobal vp9_idct_idct_4x4_add, 4,4,0, dst, stride, block, eob
+
+    cmp eobd, 4 ; 2x2 or smaller
+    jg .idctfull
+
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idct2x2
+
+    movd                m0, [blockq]
+    mova                m5, [pw_11585x2]
+    pmulhrsw            m0, m5
+    pmulhrsw            m0, m5
+    pshufw              m0, m0, 0
+    pxor                m4, m4
+    movh          [blockq], m4
+    pmulhrsw            m0, [pw_2048]       ; (x*2048 + (1<<14))>>15 <=> (x+8)>>4
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    RET
+
+; faster path for when only top left 2x2 block is set
+.idct2x2:
+    movd                m0, [blockq+0]
+    movd                m1, [blockq+8]
+    mova                m5, [pw_11585x2]
+    mova                m6, [pw_6270x2]
+    mova                m7, [pw_15137x2]
+    VP9_IDCT4_2x2_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_IDCT4_2x2_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    movh       [blockq+ 0], m4
+    movh       [blockq+ 8], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+
+.idctfull: ; generic full 4x4 idct/idct
+    mova                m0, [blockq+ 0]
+    mova                m1, [blockq+ 8]
+    mova                m2, [blockq+16]
+    mova                m3, [blockq+24]
+    mova                m6, [pw_11585x2]
+    mova                m7, [pd_8192]       ; rounding
+    VP9_IDCT4_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_IDCT4_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    mova       [blockq+ 0], m4
+    mova       [blockq+ 8], m4
+    mova       [blockq+16], m4
+    mova       [blockq+24], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+
+;-------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro VP9_IADST4_1D 0
+    movq2dq           xmm0, m0
+    movq2dq           xmm1, m1
+    movq2dq           xmm2, m2
+    movq2dq           xmm3, m3
+    paddw               m3, m0
+    punpcklwd         xmm0, xmm1
+    punpcklwd         xmm2, xmm3
+    pmaddwd           xmm1, xmm0, [pw_5283_13377]
+    pmaddwd           xmm4, xmm0, [pw_9929_13377]
+    pmaddwd           xmm0, [pw_15212_m13377]
+    pmaddwd           xmm3, xmm2, [pw_15212_9929]
+    pmaddwd           xmm2, [pw_m5283_m15212]
+    psubw               m3, m2
+    paddd             xmm0, xmm2
+    paddd             xmm3, [pd_8192]
+    paddd             xmm2, [pd_8192]
+    paddd             xmm1, xmm3
+    paddd             xmm0, xmm3
+    paddd             xmm4, xmm2
+    psrad             xmm1, 14
+    psrad             xmm0, 14
+    psrad             xmm4, 14
+    pmulhrsw            m3, [pw_13377x2]        ; out2
+    packssdw          xmm0, xmm0
+    packssdw          xmm1, xmm1
+    packssdw          xmm4, xmm4
+    movdq2q             m0, xmm0                ; out3
+    movdq2q             m1, xmm1                ; out0
+    movdq2q             m2, xmm4                ; out1
+    SWAP                 0, 1, 2, 3
+%endmacro
+
+%macro IADST4_FN 5
+INIT_MMX %5
+cglobal vp9_%1_%3_4x4_add, 3, 3, 8, dst, stride, block, eob
+    mova                m0, [blockq+ 0]
+    mova                m1, [blockq+ 8]
+    mova                m2, [blockq+16]
+    mova                m3, [blockq+24]
+    mova                m6, [pw_11585x2]
+    mova                m7, [pd_8192]       ; rounding
+    VP9_%2_1D
+    TRANSPOSE4x4W  0, 1, 2, 3, 4
+    VP9_%4_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    mova       [blockq+ 0], m4
+    mova       [blockq+ 8], m4
+    mova       [blockq+16], m4
+    mova       [blockq+24], m4
+    VP9_IDCT4_WRITEOUT
+    RET
+%endmacro
+
+IADST4_FN idct,  IDCT4,  iadst, IADST4, ssse3
+IADST4_FN iadst, IADST4, idct,  IDCT4,  ssse3
+IADST4_FN iadst, IADST4, iadst, IADST4, ssse3
+
+%if ARCH_X86_64 ; TODO: 32-bit? (32-bit limited to 8 xmm reg, we use more)
+
+;-------------------------------------------------------------------------------------------
+; void vp9_idct_idct_8x8_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;-------------------------------------------------------------------------------------------
+
+%macro VP9_IDCT8_1D_FINALIZE 0
+    SUMSUB_BA            w,  3, 10, 4                       ;  m3=t0+t7, m10=t0-t7
+    SUMSUB_BA            w,  1,  2, 4                       ;  m1=t1+t6,  m2=t1-t6
+    SUMSUB_BA            w, 11,  0, 4                       ; m11=t2+t5,  m0=t2-t5
+    SUMSUB_BA            w,  9,  8, 4                       ;  m9=t3+t4,  m8=t3-t4
+    SWAP                11, 10, 2
+    SWAP                 3,  9, 0
+%endmacro
+
+%macro VP9_IDCT8_1D 0
+    SUMSUB_BA            w, 8, 0, 4                         ; m8=IN(0)+IN(4) m0=IN(0)-IN(4)
+    pmulhrsw            m8, m12                             ; m8=t0a
+    pmulhrsw            m0, m12                             ; m0=t1a
+    VP9_UNPACK_MULSUB_2W_4X 2, 10, 15137,  6270, m7, 4, 5   ; m2=t2a, m10=t3a
+    VP9_UNPACK_MULSUB_2W_4X 1, 11, 16069,  3196, m7, 4, 5   ; m1=t4a, m11=t7a
+    VP9_UNPACK_MULSUB_2W_4X 9,  3,  9102, 13623, m7, 4, 5   ; m9=t5a,  m3=t6a
+    SUMSUB_BA            w, 10,  8, 4                       ; m10=t0a+t3a (t0),  m8=t0a-t3a (t3)
+    SUMSUB_BA            w,  2,  0, 4                       ;  m2=t1a+t2a (t1),  m0=t1a-t2a (t2)
+    SUMSUB_BA            w,  9,  1, 4                       ;  m9=t4a+t5a (t4),  m1=t4a-t5a (t5a)
+    SUMSUB_BA            w,  3, 11, 4                       ;  m3=t7a+t6a (t7), m11=t7a-t6a (t6a)
+    SUMSUB_BA            w,  1, 11, 4                       ;  m1=t6a+t5a (t6), m11=t6a-t5a (t5)
+    pmulhrsw            m1, m12                             ; m1=t6
+    pmulhrsw           m11, m12                             ; m11=t5
+    VP9_IDCT8_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT8_4x4_1D 0
+    pmulhrsw            m0, m12                             ; m0=t1a/t0a
+    pmulhrsw           m10, m2, [pw_15137x2]                ; m10=t3a
+    pmulhrsw            m2, [pw_6270x2]                     ; m2=t2a
+    pmulhrsw           m11, m1, [pw_16069x2]                ; m11=t7a
+    pmulhrsw            m1, [pw_3196x2]                     ; m1=t4a
+    pmulhrsw            m9, m3, [pw_9102x2]                 ; m9=-t5a
+    pmulhrsw            m3, [pw_13623x2]                    ; m3=t6a
+    psubw               m8, m0, m10                         ; m8=t0a-t3a (t3)
+    paddw              m10, m0                              ; m10=t0a+t3a (t0)
+    SUMSUB_BA            w,  2,  0, 4                       ;  m2=t1a+t2a (t1),  m0=t1a-t2a (t2)
+    SUMSUB_BA            w,  9,  1, 4                       ;  m1=t4a+t5a (t4),  m9=t4a-t5a (t5a)
+    SWAP                 1,  9
+    SUMSUB_BA            w,  3, 11, 4                       ;  m3=t7a+t6a (t7), m11=t7a-t6a (t6a)
+    SUMSUB_BA            w,  1, 11, 4                       ;  m1=t6a+t5a (t6), m11=t6a-t5a (t5)
+    pmulhrsw            m1, m12                             ; m1=t6
+    pmulhrsw           m11, m12                             ; m11=t5
+    VP9_IDCT8_1D_FINALIZE
+%endmacro
+
+; TODO: a lot of t* copies can probably be removed and merged with
+; following SUMSUBs from VP9_IDCT8_1D_FINALIZE with AVX
+%macro VP9_IDCT8_2x2_1D 0
+    pmulhrsw            m0, m12                             ;  m0=t0
+    mova                m3, m1
+    pmulhrsw            m1, m6                              ;  m1=t4
+    pmulhrsw            m3, m7                              ;  m3=t7
+    mova                m2, m0                              ;  m2=t1
+    mova               m10, m0                              ; m10=t2
+    mova                m8, m0                              ;  m8=t3
+    mova               m11, m3                              ; t5 = t7a ...
+    mova                m9, m3                              ; t6 = t7a ...
+    psubw              m11, m1                              ; t5 = t7a - t4a
+    paddw               m9, m1                              ; t6 = t7a + t4a
+    pmulhrsw           m11, m12                             ; m11=t5
+    pmulhrsw            m9, m12                             ;  m9=t6
+    SWAP                 0, 10
+    SWAP                 9,  1
+    VP9_IDCT8_1D_FINALIZE
+%endmacro
+
+%macro VP9_IDCT8_WRITEOUT 0
+    mova                m5, [pw_1024]
+    pmulhrsw            m0, m5              ; (x*1024 + (1<<14))>>15 <=> (x+16)>>5
+    pmulhrsw            m1, m5
+    VP9_STORE_2X         0,  1,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    pmulhrsw            m2, m5
+    pmulhrsw            m3, m5
+    VP9_STORE_2X         2,  3,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    pmulhrsw            m8, m5
+    pmulhrsw            m9, m5
+    VP9_STORE_2X         8,  9,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    pmulhrsw           m10, m5
+    pmulhrsw           m11, m5
+    VP9_STORE_2X        10, 11,  6,  7,  4
+%endmacro
+
+%macro VP9_IDCT_IDCT_8x8_ADD_XMM 1
+INIT_XMM %1
+cglobal vp9_idct_idct_8x8_add, 4,4,13, dst, stride, block, eob
+
+    mova               m12, [pw_11585x2]    ; often used
+
+    cmp eobd, 12 ; top left half or less
+    jg .idctfull
+
+    cmp eobd, 3  ; top left corner or less
+    jg .idcthalf
+
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idcttopleftcorner
+
+    movd                m0, [blockq]
+    pmulhrsw            m0, m12
+    pmulhrsw            m0, m12
+    SPLATW              m0, m0, 0
+    pxor                m4, m4
+    movd          [blockq], m4
+    mova                m5, [pw_1024]
+    pmulhrsw            m0, m5              ; (x*1024 + (1<<14))>>15 <=> (x+16)>>5
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    lea               dstq, [dstq+2*strideq]
+    VP9_STORE_2X         0,  0,  6,  7,  4
+    RET
+
+; faster path for when only left corner is set (3 input: DC, right to DC, below
+; to DC). Note: also working with a 2x2 block
+.idcttopleftcorner:
+    movd                m0, [blockq+0]
+    movd                m1, [blockq+16]
+    mova                m6, [pw_3196x2]
+    mova                m7, [pw_16069x2]
+    VP9_IDCT8_2x2_1D
+    TRANSPOSE8x8W  0, 1, 2, 3, 8, 9, 10, 11, 4
+    VP9_IDCT8_2x2_1D
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    movd       [blockq+ 0], m4
+    movd       [blockq+16], m4
+    VP9_IDCT8_WRITEOUT
+    RET
+
+.idcthalf:
+    movh                m0, [blockq + 0]
+    movh                m1, [blockq +16]
+    movh                m2, [blockq +32]
+    movh                m3, [blockq +48]
+    VP9_IDCT8_4x4_1D
+    TRANSPOSE8x8W  0, 1, 2, 3, 8, 9, 10, 11, 4
+    VP9_IDCT8_4x4_1D
+    pxor                m4, m4
+    movh       [blockq+ 0], m4
+    movh       [blockq+16], m4
+    movh       [blockq+32], m4
+    movh       [blockq+48], m4
+    VP9_IDCT8_WRITEOUT
+    RET
+
+.idctfull: ; generic full 8x8 idct/idct
+    mova                m0, [blockq+  0]    ; IN(0)
+    mova                m1, [blockq+ 16]    ; IN(1)
+    mova                m2, [blockq+ 32]    ; IN(2)
+    mova                m3, [blockq+ 48]    ; IN(3)
+    mova                m8, [blockq+ 64]    ; IN(4)
+    mova                m9, [blockq+ 80]    ; IN(5)
+    mova               m10, [blockq+ 96]    ; IN(6)
+    mova               m11, [blockq+112]    ; IN(7)
+    mova                m7, [pd_8192]       ; rounding
+    VP9_IDCT8_1D
+    TRANSPOSE8x8W  0, 1, 2, 3, 8, 9, 10, 11, 4
+    VP9_IDCT8_1D
+
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    ZERO_BLOCK      blockq, 16, 8, m4
+    VP9_IDCT8_WRITEOUT
+    RET
+%endmacro
+
+VP9_IDCT_IDCT_8x8_ADD_XMM ssse3
+VP9_IDCT_IDCT_8x8_ADD_XMM avx
+
+;---------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_8x8_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+%macro VP9_IADST8_1D 0 ; input/output=m0/1/2/3/8/9/10/11
+    VP9_UNPACK_MULSUB_2D_4X 11,  0,  4,  5, 16305,  1606    ; m11/4=t1[d], m0/5=t0[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  8,  6, 13, 10394, 12665    ; m3/6=t5[d], m8/13=t4[d]
+    VP9_RND_SH_SUMSUB_BA     8,  0, 13,  5, 14, m7          ; m8=t0[w], m0=t4[w]
+    VP9_RND_SH_SUMSUB_BA     3, 11,  6,  4, 14, m7          ; m3=t1[w], m11=t5[w]
+
+    VP9_UNPACK_MULSUB_2D_4X  9,  2,  4,  5, 14449,  7723    ; m9/4=t3[d], m2/5=t2[d]
+    VP9_UNPACK_MULSUB_2D_4X  1, 10,  6, 13,  4756, 15679    ; m1/6=t7[d], m10/13=t6[d]
+    VP9_RND_SH_SUMSUB_BA    10,  2, 13,  5, 14, m7          ; m10=t2[w], m2=t6[w]
+    VP9_RND_SH_SUMSUB_BA     1,  9,  6,  4, 14, m7          ; m1=t3[w], m9=t7[w]
+
+    ; m8=t0, m3=t1, m10=t2, m1=t3, m0=t4, m11=t5, m2=t6, m9=t7
+
+    VP9_UNPACK_MULSUB_2D_4X  0, 11,  4,  5, 15137,  6270    ; m0/4=t5[d], m11/5=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X  9,  2,  6, 13,  6270, 15137    ; m9/6=t6[d], m2/13=t7[d]
+    VP9_RND_SH_SUMSUB_BA     9, 11,  6,  5, 14, m7
+    psignw                  m9, [pw_m1]                     ; m9=out1[w], m11=t6[w]
+    VP9_RND_SH_SUMSUB_BA     2,  0, 13,  4, 14, m7          ; m2=out6[w], m0=t7[w]
+
+    SUMSUB_BA                w, 10,  8, 14                  ; m10=out0[w], m8=t2[w]
+    SUMSUB_BA                w,  1,  3, 14
+    psignw                  m1, [pw_m1]                     ; m1=out7[w], m3=t3[w]
+
+    ; m10=out0, m9=out1, m8=t2, m3=t3, m11=t6, m0=t7, m2=out6, m1=out7
+
+    SUMSUB_BA                w,  3,  8,  4
+    SUMSUB_BA                w,  0, 11,  5
+    pmulhrsw                m3, m12
+    pmulhrsw               m11, m12
+    pmulhrsw                m8, m12                         ; out4
+    pmulhrsw                m0, m12                         ; out2
+    psignw                  m3, [pw_m1]                     ; out3
+    psignw                 m11, [pw_m1]                     ; out5
+
+    ; m10=out0, m9=out1, m0=out2, m3=out3, m8=out4, m11=out5, m2=out6, m1=out7
+
+    SWAP                     0, 10, 2
+    SWAP                    11,  1, 9
+%endmacro
+
+%macro IADST8_FN 5
+INIT_XMM %5
+cglobal vp9_%1_%3_8x8_add, 3, 3, 15, dst, stride, block, eob
+    mova                m0, [blockq+  0]    ; IN(0)
+    mova                m1, [blockq+ 16]    ; IN(1)
+    mova                m2, [blockq+ 32]    ; IN(2)
+    mova                m3, [blockq+ 48]    ; IN(3)
+    mova                m8, [blockq+ 64]    ; IN(4)
+    mova                m9, [blockq+ 80]    ; IN(5)
+    mova               m10, [blockq+ 96]    ; IN(6)
+    mova               m11, [blockq+112]    ; IN(7)
+
+    mova               m12, [pw_11585x2]    ; often used
+    mova                m7, [pd_8192]       ; rounding
+    VP9_%2_1D
+    TRANSPOSE8x8W  0, 1, 2, 3, 8, 9, 10, 11, 4
+    VP9_%4_1D
+
+    pxor                m4, m4  ; used for the block reset, and VP9_STORE_2X
+    ZERO_BLOCK      blockq, 16, 8, m4
+    VP9_IDCT8_WRITEOUT
+    RET
+%endmacro
+
+IADST8_FN idct,  IDCT8,  iadst, IADST8, ssse3
+IADST8_FN idct,  IDCT8,  iadst, IADST8, avx
+IADST8_FN iadst, IADST8, idct,  IDCT8,  ssse3
+IADST8_FN iadst, IADST8, idct,  IDCT8,  avx
+IADST8_FN iadst, IADST8, iadst, IADST8, ssse3
+IADST8_FN iadst, IADST8, iadst, IADST8, avx
+
+;---------------------------------------------------------------------------------------------
+; void vp9_idct_idct_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+; at the end of this macro, m7 is stored in stack_scratch
+; everything else (t0-6 and t8-15) is stored in m0-6 and m8-15
+; the following sumsubs have not been done yet:
+;    SUMSUB_BA            w,  6,  9, 15      ; t6, t9
+;    SUMSUB_BA            w,  7,  8, 15      ; t7, t8
+%macro VP9_IDCT16_1D_START 4 ; src, nnzc, stride, stack_scratch
+%if %2 <= 4
+    mova                m3, [%1+ 1*%3]      ; IN(1)
+    mova               m12, [%1+ 2*%3]      ; IN(2)
+    mova                m0, [%1+ 3*%3]      ; IN(3)
+
+    pmulhrsw           m15, m12, [pw_16069x2]       ; t6-7
+    pmulhrsw           m12, [pw_3196x2]             ; t4-5
+    pmulhrsw            m4, m3,  [pw_16305x2]       ; t14-15
+    pmulhrsw            m3, [pw_1606x2]             ; t8-9
+    pmulhrsw            m7, m0,  [pw_m4756x2]       ; t10-11
+    pmulhrsw            m0, [pw_15679x2]            ; t12-13
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m14=t5, m13=t6, m15=t7
+    ; m3=t8, m5=t9, m1=t10, m7=t11, m0=t12, m6=t13, m2=t14, m4=t15
+
+    paddw              m14, m15, m12
+    psubw              m13, m15, m12
+    pmulhrsw           m13, [pw_11585x2]            ; t5
+    pmulhrsw           m14, [pw_11585x2]            ; t6
+
+    VP9_UNPACK_MULSUB_2W_4X 2, 5, 4, 3, 15137,  6270, [pd_8192], 10, 11 ; t9,  t14
+    VP9_UNPACK_MULSUB_2W_4X 6, 1, 0, 7, 6270, m15137, [pd_8192], 10, 11 ; t10, t13
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m2=t10, m3=t11, m4=t12, m5=t13, m1=t14, m0=t15
+%else
+    mova                m5, [%1+ 1*%3]     ; IN(1)
+    mova               m14, [%1+ 2*%3]     ; IN(2)
+    mova                m6, [%1+ 3*%3]     ; IN(3)
+    mova                m9, [%1+ 4*%3]     ; IN(4)
+    mova                m7, [%1+ 5*%3]     ; IN(5)
+    mova               m15, [%1+ 6*%3]     ; IN(6)
+    mova                m4, [%1+ 7*%3]     ; IN(7)
+%if %2 <= 8
+    pmulhrsw            m8, m9,  [pw_15137x2]       ; t3
+    pmulhrsw            m9, [pw_6270x2]             ; t2
+    pmulhrsw           m13, m14, [pw_16069x2]       ; t7
+    pmulhrsw           m14, [pw_3196x2]             ; t4
+    pmulhrsw           m12, m15, [pw_m9102x2]       ; t5
+    pmulhrsw           m15, [pw_13623x2]            ; t6
+    pmulhrsw            m2, m5,  [pw_16305x2]       ; t15
+    pmulhrsw            m5, [pw_1606x2]             ; t8
+    pmulhrsw            m3, m4,  [pw_m10394x2]      ; t9
+    pmulhrsw            m4, [pw_12665x2]            ; t14
+    pmulhrsw            m0, m7,  [pw_14449x2]       ; t13
+    pmulhrsw            m7, [pw_7723x2]             ; t10
+    pmulhrsw            m1, m6,  [pw_m4756x2]       ; t11
+    pmulhrsw            m6, [pw_15679x2]            ; t12
+%else
+    mova                m3, [%1+ 9*%3]     ; IN(9)
+    mova               m12, [%1+10*%3]     ; IN(10)
+    mova                m0, [%1+11*%3]     ; IN(11)
+    mova                m8, [%1+12*%3]     ; IN(12)
+    mova                m1, [%1+13*%3]     ; IN(13)
+    mova               m13, [%1+14*%3]     ; IN(14)
+    mova                m2, [%1+15*%3]     ; IN(15)
+
+    ; m10=in0, m5=in1, m14=in2, m6=in3, m9=in4, m7=in5, m15=in6, m4=in7
+    ; m11=in8, m3=in9, m12=in10 m0=in11, m8=in12, m1=in13, m13=in14, m2=in15
+
+    VP9_UNPACK_MULSUB_2W_4X   9,   8, 15137,  6270, [pd_8192], 10, 11 ; t2,  t3
+    VP9_UNPACK_MULSUB_2W_4X  14,  13, 16069,  3196, [pd_8192], 10, 11 ; t4,  t7
+    VP9_UNPACK_MULSUB_2W_4X  12,  15,  9102, 13623, [pd_8192], 10, 11 ; t5,  t6
+    VP9_UNPACK_MULSUB_2W_4X   5,   2, 16305,  1606, [pd_8192], 10, 11 ; t8,  t15
+    VP9_UNPACK_MULSUB_2W_4X   3,   4, 10394, 12665, [pd_8192], 10, 11 ; t9,  t14
+    VP9_UNPACK_MULSUB_2W_4X   7,   0, 14449,  7723, [pd_8192], 10, 11 ; t10, t13
+    VP9_UNPACK_MULSUB_2W_4X   1,   6,  4756, 15679, [pd_8192], 10, 11 ; t11, t12
+%endif
+
+    ; m11=t0, m10=t1, m9=t2, m8=t3, m14=t4, m12=t5, m15=t6, m13=t7
+    ; m5=t8, m3=t9, m7=t10, m1=t11, m6=t12, m0=t13, m4=t14, m2=t15
+
+    SUMSUB_BA            w, 12, 14, 10      ; t4,  t5
+    SUMSUB_BA            w, 15, 13, 10      ; t7,  t6
+    SUMSUB_BA            w,  3,  5, 10      ; t8,  t9
+    SUMSUB_BA            w,  7,  1, 10      ; t11, t10
+    SUMSUB_BA            w,  0,  6, 10      ; t12, t13
+    SUMSUB_BA            w,  4,  2, 10      ; t15, t14
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m14=t5, m13=t6, m15=t7
+    ; m3=t8, m5=t9, m1=t10, m7=t11, m0=t12, m6=t13, m2=t14, m4=t15
+
+    SUMSUB_BA            w, 14, 13, 10
+    pmulhrsw           m13, [pw_11585x2]                              ; t5
+    pmulhrsw           m14, [pw_11585x2]                              ; t6
+    VP9_UNPACK_MULSUB_2W_4X   2,   5, 15137,  6270, [pd_8192], 10, 11 ; t9,  t14
+    VP9_UNPACK_MULSUB_2W_4X   6,   1, 6270, m15137, [pd_8192], 10, 11 ; t10, t13
+%endif
+
+    ; m8=t0, m9=t1, m10=t2, m11=t3, m12=t4, m13=t5, m14=t6, m15=t7
+    ; m3=t8, m2=t9, m6=t10, m7=t11, m0=t12, m1=t13, m5=t14, m4=t15
+
+    SUMSUB_BA            w,  7,  3, 10      ; t8,  t11
+    SUMSUB_BA            w,  6,  2, 10      ; t9,  t10
+    SUMSUB_BA            w,  0,  4, 10      ; t15, t12
+    SUMSUB_BA            w,  1,  5, 10      ; t14. t13
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m2=t10, m3=t11, m4=t12, m5=t13, m1=t14, m0=t15
+
+    SUMSUB_BA            w,  2,  5, 10
+    SUMSUB_BA            w,  3,  4, 10
+    pmulhrsw            m5, [pw_11585x2]    ; t10
+    pmulhrsw            m4, [pw_11585x2]    ; t11
+    pmulhrsw            m3, [pw_11585x2]    ; t12
+    pmulhrsw            m2, [pw_11585x2]    ; t13
+
+    ; backup first register
+    mova              [%4], m7
+
+    ; m15=t0, m14=t1, m13=t2, m12=t3, m11=t4, m10=t5, m9=t6, m8=t7
+    ; m7=t8, m6=t9, m5=t10, m4=t11, m3=t12, m2=t13, m1=t14, m0=t15
+
+    ; from load/start
+%if %2 <= 4
+    mova               m11, [%1+ 0*%3]      ; IN(0)
+    pmulhrsw           m11, [pw_11585x2]    ; t0-t3
+
+    psubw               m8, m11, m15
+    paddw              m15, m11
+    psubw               m9, m11, m14
+    paddw              m14, m11
+    psubw              m10, m11, m13
+    paddw              m13, m11
+%else
+    mova               m10, [%1+ 0*%3]      ; IN(0)
+%if %2 <= 8
+    pmulhrsw           m10, [pw_11585x2]    ; t0 and t1
+    psubw              m11, m10, m8
+    paddw               m8, m10
+%else
+    mova               m11, [%1+ 8*%3]      ; IN(8)
+
+    ; from 3 stages back
+    SUMSUB_BA            w, 11, 10, 7
+    pmulhrsw           m11, [pw_11585x2]    ; t0
+    pmulhrsw           m10, [pw_11585x2]    ; t1
+
+    ; from 2 stages back
+    SUMSUB_BA            w,  8, 11, 7       ; t0,  t3
+%endif
+    SUMSUB_BA            w,  9, 10, 7       ; t1,  t2
+
+    ; from 1 stage back
+    SUMSUB_BA            w, 15,  8, 7       ; t0,  t7
+    SUMSUB_BA            w, 14,  9, 7       ; t1,  t6
+    SUMSUB_BA            w, 13, 10, 7       ; t2,  t5
+%endif
+    SUMSUB_BA            w, 12, 11, 7       ; t3,  t4
+
+    SUMSUB_BA            w,  0, 15, 7       ; t0, t15
+    SUMSUB_BA            w,  1, 14, 7       ; t1, t14
+    SUMSUB_BA            w,  2, 13, 7       ; t2, t13
+    SUMSUB_BA            w,  3, 12, 7       ; t3, t12
+    SUMSUB_BA            w,  4, 11, 7       ; t4, t11
+    SUMSUB_BA            w,  5, 10, 7       ; t5, t10
+%endmacro
+
+%macro VP9_IDCT16_1D 2-3 16 ; src, pass, nnzc
+    VP9_IDCT16_1D_START %1, %3, 32, tmpq+32
+
+%if %2 == 1
+    ; backup a different register
+    mova         [tmpq+16], m15
+    mova                m7, [tmpq+32]
+
+    SUMSUB_BA            w,  6,  9, 15      ; t6, t9
+    SUMSUB_BA            w,  7,  8, 15      ; t7, t8
+
+    TRANSPOSE8x8W        0, 1, 2, 3, 4, 5, 6, 7, 15
+    mova        [tmpq+  0], m0
+    mova        [tmpq+ 32], m1
+    mova        [tmpq+ 64], m2
+    mova        [tmpq+ 96], m3
+    mova        [tmpq+128], m4
+    mova        [tmpq+160], m5
+    mova        [tmpq+192], m6
+    mova        [tmpq+224], m7
+
+    mova               m15, [tmpq+16]
+    TRANSPOSE8x8W        8, 9, 10, 11, 12, 13, 14, 15, 0
+    mova        [tmpq+ 16], m8
+    mova        [tmpq+ 48], m9
+    mova        [tmpq+ 80], m10
+    mova        [tmpq+112], m11
+    mova        [tmpq+144], m12
+    mova        [tmpq+176], m13
+    mova        [tmpq+208], m14
+    mova        [tmpq+240], m15
+%else ; %2 == 2
+    ; backup more registers
+    mova         [tmpq+64], m8
+    mova         [tmpq+96], m9
+
+    pxor                m7, m7
+    pmulhrsw            m0, [pw_512]
+    pmulhrsw            m1, [pw_512]
+    VP9_STORE_2X         0,  1,  8,  9,  7
+    lea               dstq, [dstq+strideq*2]
+    pmulhrsw            m2, [pw_512]
+    pmulhrsw            m3, [pw_512]
+    VP9_STORE_2X         2,  3,  8,  9,  7
+    lea               dstq, [dstq+strideq*2]
+    pmulhrsw            m4, [pw_512]
+    pmulhrsw            m5, [pw_512]
+    VP9_STORE_2X         4,  5,  8,  9,  7
+    lea               dstq, [dstq+strideq*2]
+
+    ; restore from cache
+    SWAP                 0, 7               ; move zero from m7 to m0
+    mova                m7, [tmpq+32]
+    mova                m8, [tmpq+64]
+    mova                m9, [tmpq+96]
+
+    SUMSUB_BA            w,  6,  9, 1       ; t6, t9
+    SUMSUB_BA            w,  7,  8, 1       ; t7, t8
+
+    pmulhrsw            m6, [pw_512]
+    pmulhrsw            m7, [pw_512]
+    VP9_STORE_2X         6,  7,  1,  2,  0
+    lea               dstq, [dstq+strideq*2]
+    pmulhrsw            m8, [pw_512]
+    pmulhrsw            m9, [pw_512]
+    VP9_STORE_2X         8,  9,  1,  2,  0
+    lea               dstq, [dstq+strideq*2]
+    pmulhrsw           m10, [pw_512]
+    pmulhrsw           m11, [pw_512]
+    VP9_STORE_2X        10, 11,  1,  2,  0
+    lea               dstq, [dstq+strideq*2]
+    pmulhrsw           m12, [pw_512]
+    pmulhrsw           m13, [pw_512]
+    VP9_STORE_2X        12, 13,  1,  2,  0
+    lea               dstq, [dstq+strideq*2]
+    pmulhrsw           m14, [pw_512]
+    pmulhrsw           m15, [pw_512]
+    VP9_STORE_2X        14, 15,  1,  2,  0
+%endif ; %2 == 1/2
+%endmacro
+
+%macro VP9_STORE_2XFULL 6-7 strideq; dc, tmp1, tmp2, tmp3, tmp4, zero, stride
+    mova               m%3, [dstq]
+    mova               m%5, [dstq+%7]
+    punpcklbw          m%2, m%3, m%6
+    punpckhbw          m%3, m%6
+    punpcklbw          m%4, m%5, m%6
+    punpckhbw          m%5, m%6
+    paddw              m%2, m%1
+    paddw              m%3, m%1
+    paddw              m%4, m%1
+    paddw              m%5, m%1
+    packuswb           m%2, m%3
+    packuswb           m%4, m%5
+    mova            [dstq], m%2
+    mova         [dstq+%7], m%4
+%endmacro
+
+%macro VP9_IDCT_IDCT_16x16_ADD_XMM 1
+INIT_XMM %1
+cglobal vp9_idct_idct_16x16_add, 4, 6, 16, 512, dst, stride, block, eob
+    ; 2x2=eob=3, 4x4=eob=10
+    cmp eobd, 38
+    jg .idctfull
+    cmp eobd, 1 ; faster path for when only DC is set
+    jne .idct8x8
+
+    ; dc-only
+    movd                m0, [blockq]
+    mova                m1, [pw_11585x2]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+    SPLATW              m0, m0, q0000
+    pmulhrsw            m0, [pw_512]
+    pxor                m5, m5
+    movd          [blockq], m5
+%rep 7
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5
+    lea               dstq, [dstq+2*strideq]
+%endrep
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5
+    RET
+
+    DEFINE_ARGS dst, stride, block, cnt, dst_bak, tmp
+.idct8x8:
+    mov               tmpq, rsp
+    VP9_IDCT16_1D   blockq, 1, 8
+
+    mov               cntd, 2
+    mov           dst_bakq, dstq
+.loop2_8x8:
+    VP9_IDCT16_1D     tmpq, 2, 8
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_8x8
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 8, m0
+    RET
+
+.idctfull:
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_IDCT16_1D   blockq, 1
+    add             blockq, 16
+    add               tmpq, 256
+    dec               cntd
+    jg .loop1_full
+    sub             blockq, 32
+
+    mov               cntd, 2
+    mov               tmpq, rsp
+    mov           dst_bakq, dstq
+.loop2_full:
+    VP9_IDCT16_1D     tmpq, 2
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endmacro
+
+VP9_IDCT_IDCT_16x16_ADD_XMM ssse3
+VP9_IDCT_IDCT_16x16_ADD_XMM avx
+
+;---------------------------------------------------------------------------------------------
+; void vp9_iadst_iadst_16x16_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+%macro VP9_IADST16_1D 2 ; src, pass
+%assign %%str 16*%2
+    mova                m0, [%1+ 0*32]  ; in0
+    mova                m1, [%1+15*32]  ; in15
+    mova                m8, [%1+ 7*32]  ; in7
+    mova                m9, [%1+ 8*32]  ; in8
+
+    VP9_UNPACK_MULSUB_2D_4X  1,  0,  2,  3, 16364,   804    ; m1/2=t1[d], m0/3=t0[d]
+    VP9_UNPACK_MULSUB_2D_4X  8,  9, 11, 10, 11003, 12140    ; m8/11=t9[d], m9/10=t8[d]
+    VP9_RND_SH_SUMSUB_BA     9,  0, 10,  3,  4, [pd_8192]   ; m9=t0[w], m0=t8[w]
+    VP9_RND_SH_SUMSUB_BA     8,  1, 11,  2,  4, [pd_8192]   ; m8=t1[w], m1=t9[w]
+
+    mova               m11, [%1+ 2*32]  ; in2
+    mova               m10, [%1+13*32]  ; in13
+    mova                m3, [%1+ 5*32]  ; in5
+    mova                m2, [%1+10*32]  ; in10
+
+    VP9_UNPACK_MULSUB_2D_4X 10, 11,  6,  7, 15893,  3981    ; m10/6=t3[d], m11/7=t2[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  4,  5,  8423, 14053    ; m3/4=t11[d], m2/5=t10[d]
+    VP9_RND_SH_SUMSUB_BA     2, 11,  5,  7, 12, [pd_8192]   ; m2=t2[w], m11=t10[w]
+    VP9_RND_SH_SUMSUB_BA     3, 10,  4,  6, 12, [pd_8192]   ; m3=t3[w], m10=t11[w]
+
+    mova   [tmpq+ 0*%%str], m9          ; make some scratch space (t0:m9->r0)
+    mova                m4, [%1+ 4*32]  ; in4
+    mova                m5, [%1+11*32]  ; in11
+    mova               m12, [%1+ 3*32]  ; in3
+    mova               m13, [%1+12*32]  ; in12
+
+    VP9_UNPACK_MULSUB_2D_4X  5,  4,  7,  6, 14811,  7005    ; m5/7=t5[d], m4/6=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X 12, 13, 14, 15,  5520, 15426    ; m12/14=t13[d], m13/15=t12[d]
+    VP9_RND_SH_SUMSUB_BA    13,  4, 15,  6,  9, [pd_8192]   ; m13=t4[w], m4=t12[w]
+    VP9_RND_SH_SUMSUB_BA    12,  5, 14,  7,  9, [pd_8192]   ; m12=t5[w], m5=t13[w]
+
+    mova   [tmpq+ 2*%%str], m8          ; t1:m9->r2
+    mova   [tmpq+ 3*%%str], m2          ; t2:m2->r3
+    mova   [tmpq+ 4*%%str], m3          ; t3:m3->r4
+    mova   [tmpq+ 5*%%str], m13         ; t4:m13->r5
+    mova                m2, [%1+ 6*32]  ; in6
+    mova                m3, [%1+ 9*32]  ; in9
+    mova                m8, [%1+ 1*32]  ; in1
+    mova                m9, [%1+14*32]  ; in14
+
+    VP9_UNPACK_MULSUB_2D_4X  3,  2,  7,  6, 13160,  9760    ; m3/7=t7[d], m2/6=t6[d]
+    VP9_UNPACK_MULSUB_2D_4X  8,  9, 13, 14,  2404, 16207    ; m8/13=t15[d], m9/14=t14[d]
+    VP9_RND_SH_SUMSUB_BA     9,  2, 14,  6, 15, [pd_8192]   ; m9=t6[w], m2=t14[w]
+    VP9_RND_SH_SUMSUB_BA     8,  3, 13,  7, 15, [pd_8192]   ; m8=t7[w], m3=t15[w]
+
+    ; r0=t0, r2=t1, r3=t2, r4=t3, r5=t4, m12=t5, m9=t6, m8=t7
+    ; m0=t8, m1=t9, m11=t10, m10=t11, m4=t12, m5=t13, m2=t14, m3=t15
+
+    ; handle t8-15 first
+    VP9_UNPACK_MULSUB_2D_4X  0,  1,  6,  7, 16069,  3196    ; m1/7=t8[d], m0/6=t9[d]
+    VP9_UNPACK_MULSUB_2D_4X  5,  4, 13, 14,  3196, 16069    ; m5/13=t12[d], m4/14=t13[d]
+    VP9_RND_SH_SUMSUB_BA     5,  1, 13,  7, 15, [pd_8192]   ; m5=t8[w], m1=t12[w]
+    VP9_RND_SH_SUMSUB_BA     4,  0, 14,  6, 15, [pd_8192]   ; m4=t9[w], m0=t13[w]
+
+    VP9_UNPACK_MULSUB_2D_4X 11, 10,  6,  7,  9102, 13623    ; m11/6=t11[d], m10/7=t10[d]
+    VP9_UNPACK_MULSUB_2D_4X  3,  2, 13, 14, 13623,  9102    ; m3/13=t14[d], m2/14=t15[d]
+    VP9_RND_SH_SUMSUB_BA     3, 10, 13,  7, 15, [pd_8192]   ; m3=t10[w], m10=t14[w]
+    VP9_RND_SH_SUMSUB_BA     2, 11, 14,  6, 15, [pd_8192]   ; m2=t11[w], m11=t15[w]
+
+    ; m5=t8, m4=t9, m3=t10, m2=t11, m1=t12, m0=t13, m10=t14, m11=t15
+
+    VP9_UNPACK_MULSUB_2D_4X  1,  0,  6,  7, 15137,  6270    ; m1/6=t13[d], m0/7=t12[d]
+    VP9_UNPACK_MULSUB_2D_4X 11, 10, 13, 14,  6270, 15137    ; m11/13=t14[d], m10/14=t15[d]
+    VP9_RND_SH_SUMSUB_BA    11,  0, 13,  7, 15, [pd_8192]   ; m11=out2[w], m0=t14[w]
+    VP9_RND_SH_SUMSUB_BA    10,  1, 14,  6, 15, [pd_8192]
+    psignw                 m10, [pw_m1]                     ; m10=out13[w], m1=t15[w]
+
+    SUMSUB_BA                w,  3,  5, 15
+    psignw                  m3, [pw_m1]                     ; m3=out1[w], m5=t10[w]
+    SUMSUB_BA                w,  2,  4, 15                  ; m2=out14[w], m4=t11[w]
+
+    SUMSUB_BA                w,  5,  4, 15
+    pmulhrsw                m5, [pw_11585x2]                ; m5=out6[w]
+    pmulhrsw                m4, [pw_11585x2]                ; m4=out9[w]
+    SUMSUB_BA                w,  1,  0, 15
+    pmulhrsw                m1, [pw_m11585x2]               ; m1=out5[w]
+    pmulhrsw                m0, [pw_11585x2]                ; m0=out10[w]
+
+    ; m3=out1, m11=out2, m1=out5, m5=out6, m4=out9, m0=out10, m10=out13, m2=out14
+
+    mova                    m6, [tmpq+ 0*%%str]
+    mova                    m7, [tmpq+ 2*%%str]
+    mova                   m13, [tmpq+ 3*%%str]
+    mova                   m14, [tmpq+ 4*%%str]
+    mova                   m15, [tmpq+ 5*%%str]
+    mova       [tmpq+ 8*%%str], m5
+    mova       [tmpq+ 9*%%str], m4
+    mova       [tmpq+10*%%str], m0
+    mova       [tmpq+11*%%str], m10
+    mova       [tmpq+12*%%str], m2
+
+    ; m6=t0, m7=t1, m13=t2, m14=t3, m15=t4, m12=t5, m9=t6, m8=t7
+    ; m3=out1, m11=out2, m1=out5, r8=out6, r9=out9, r10=out10, r11=out13, r12=out14
+
+    SUMSUB_BA                w, 15,  6,  0                  ; m15=t0[w], m6=t4[w]
+    SUMSUB_BA                w, 12,  7,  0                  ; m12=t1[w], m7=t5[w]
+    SUMSUB_BA                w,  9, 13,  0                  ; m9=t2[w], m13=t6[w]
+    SUMSUB_BA                w,  8, 14,  0                  ; m8=t3[w], m14=t7[w]
+
+    VP9_UNPACK_MULSUB_2D_4X  6,  7,  0,  2, 15137,  6270    ; m6/0=t5[d], m7/2=t4[d]
+    VP9_UNPACK_MULSUB_2D_4X 14, 13,  4,  5,  6270, 15137    ; m14/4=t6[d], m13/5=t7[d]
+    VP9_RND_SH_SUMSUB_BA    14,  7,  4,  2, 10, [pd_8192]
+    psignw                 m14, [pw_m1]                     ; m14=out3[w], m7=t6[w]
+    VP9_RND_SH_SUMSUB_BA    13,  6,  5,  0, 10, [pd_8192]   ; m13=out12[w], m6=t7[w]
+    SUMSUB_BA                w,  9, 15, 10                  ; m9=out0[w], m15=t2[w]
+    SUMSUB_BA                w,  8, 12, 10
+    psignw                  m8, [pw_m1]                     ; m8=out15[w], m12=t3[w]
+
+    SUMSUB_BA                w, 12, 15, 10
+    pmulhrsw               m12, [pw_m11585x2]               ; m12=out7[w]
+    pmulhrsw               m15, [pw_11585x2]                ; m15=out8[w]
+    SUMSUB_BA                w,  7,  6, 10
+    pmulhrsw                m7, [pw_11585x2]                ; m7=out4[w]
+    pmulhrsw                m6, [pw_11585x2]                ; m6=out11[w]
+
+    ; m9=out0, m14=out3, m7=out4, m12=out7, m15=out8, m6=out11, m13=out12, m8=out15
+    ; m3=out1, m11=out2, m1=out5, r8=out6, r9=out9, r10=out10, r11=out13, r12=out14
+
+%if %2 == 1
+    mova                    m0, [tmpq+ 8*%%str]
+    TRANSPOSE8x8W            9, 3, 11, 14, 7, 1, 0, 12, 2
+    mova          [tmpq+ 0*16], m9
+    mova          [tmpq+ 2*16], m3
+    mova          [tmpq+ 4*16], m11
+    mova          [tmpq+ 6*16], m14
+    mova                    m9, [tmpq+ 9*%%str]
+    mova                    m3, [tmpq+10*%%str]
+    mova                   m11, [tmpq+11*%%str]
+    mova                   m14, [tmpq+12*%%str]
+    mova          [tmpq+ 8*16], m7
+    mova          [tmpq+10*16], m1
+    mova          [tmpq+12*16], m0
+    mova          [tmpq+14*16], m12
+
+    TRANSPOSE8x8W           15, 9, 3, 6, 13, 11, 14, 8, 2
+    mova          [tmpq+ 1*16], m15
+    mova          [tmpq+ 3*16], m9
+    mova          [tmpq+ 5*16], m3
+    mova          [tmpq+ 7*16], m6
+    mova          [tmpq+ 9*16], m13
+    mova          [tmpq+11*16], m11
+    mova          [tmpq+13*16], m14
+    mova          [tmpq+15*16], m8
+%else
+    mova                    m5, [tmpq+ 8*%%str]
+    pxor                    m0, m0
+
+    pmulhrsw                m9, [pw_512]
+    pmulhrsw                m3, [pw_512]
+    VP9_STORE_2X             9,  3, 2, 4, 0
+    lea                   dstq, [dstq+strideq*2]
+    pmulhrsw               m11, [pw_512]
+    pmulhrsw               m14, [pw_512]
+    VP9_STORE_2X            11, 14, 2, 4, 0
+    lea                   dstq, [dstq+strideq*2]
+    pmulhrsw                m7, [pw_512]
+    pmulhrsw                m1, [pw_512]
+    VP9_STORE_2X             7,  1, 2, 4, 0
+    lea                   dstq, [dstq+strideq*2]
+    pmulhrsw                m5, [pw_512]
+    pmulhrsw               m12, [pw_512]
+    VP9_STORE_2X             5, 12, 2, 4, 0
+    lea                   dstq, [dstq+strideq*2]
+
+    mova                    m9, [tmpq+ 9*%%str]
+    mova                    m3, [tmpq+10*%%str]
+    mova                   m11, [tmpq+11*%%str]
+    mova                   m14, [tmpq+12*%%str]
+
+    pmulhrsw               m15, [pw_512]
+    pmulhrsw                m9, [pw_512]
+    VP9_STORE_2X            15,  9, 2, 4, 0
+    lea                   dstq, [dstq+strideq*2]
+    pmulhrsw                m3, [pw_512]
+    pmulhrsw                m6, [pw_512]
+    VP9_STORE_2X             3,  6, 2, 4, 0
+    lea                   dstq, [dstq+strideq*2]
+    pmulhrsw               m13, [pw_512]
+    pmulhrsw               m11, [pw_512]
+    VP9_STORE_2X            13, 11, 2, 4, 0
+    lea                   dstq, [dstq+strideq*2]
+    pmulhrsw               m14, [pw_512]
+    pmulhrsw                m8, [pw_512]
+    VP9_STORE_2X            14,  8, 2, 4, 0
+%endif
+%endmacro
+
+%macro IADST16_FN 5
+INIT_XMM %5
+cglobal vp9_%1_%3_16x16_add, 3, 6, 16, 512, dst, stride, block, cnt, dst_bak, tmp
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_%2_1D       blockq, 1
+    add             blockq, 16
+    add               tmpq, 256
+    dec               cntd
+    jg .loop1_full
+    sub             blockq, 32
+
+    mov               cntd, 2
+    mov               tmpq, rsp
+    mov           dst_bakq, dstq
+.loop2_full:
+    VP9_%4_1D         tmpq, 2
+    lea               dstq, [dst_bakq+8]
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m0 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 32, 16, m0
+    RET
+%endmacro
+
+IADST16_FN idct,  IDCT16,  iadst, IADST16, ssse3
+IADST16_FN idct,  IDCT16,  iadst, IADST16, avx
+IADST16_FN iadst, IADST16, idct,  IDCT16,  ssse3
+IADST16_FN iadst, IADST16, idct,  IDCT16,  avx
+IADST16_FN iadst, IADST16, iadst, IADST16, ssse3
+IADST16_FN iadst, IADST16, iadst, IADST16, avx
+
+;---------------------------------------------------------------------------------------------
+; void vp9_idct_idct_32x32_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
+;---------------------------------------------------------------------------------------------
+
+%macro VP9_IDCT32_1D 2-3 32 ; src, pass, nnzc
+%assign %%str 16*%2*%2
+    ; first do t0-15, this can be done identical to idct16x16
+    VP9_IDCT16_1D_START %1, %3/2, 64*2, tmpq+ 4*%%str
+
+    ; backup a different register
+    mova    [tmpq+30*%%str], m15    ; t15
+    mova                m7, [tmpq+ 4*%%str]
+
+    SUMSUB_BA            w,  6,  9, 15      ; t6, t9
+    SUMSUB_BA            w,  7,  8, 15      ; t7, t8
+
+    ; store everything on stack to make space available for t16-31
+    ; we store interleaved with the output of the second half (t16-31)
+    ; so we don't need to allocate extra stack space
+    mova    [tmpq+ 0*%%str], m0     ; t0
+    mova    [tmpq+ 4*%%str], m1     ; t1
+    mova    [tmpq+ 8*%%str], m2     ; t2
+    mova    [tmpq+12*%%str], m3     ; t3
+    mova    [tmpq+16*%%str], m4     ; t4
+    mova    [tmpq+20*%%str], m5     ; t5
+    mova    [tmpq+24*%%str], m6     ; t6
+    mova    [tmpq+28*%%str], m7     ; t7
+    mova    [tmpq+ 2*%%str], m8     ; t8
+    mova    [tmpq+ 6*%%str], m9     ; t9
+    mova    [tmpq+10*%%str], m10    ; t10
+    mova    [tmpq+14*%%str], m11    ; t11
+    mova    [tmpq+18*%%str], m12    ; t12
+    mova    [tmpq+22*%%str], m13    ; t13
+    mova    [tmpq+26*%%str], m14    ; t14
+
+    ; then, secondly, do t16-31
+%if %3 <= 8
+    mova                 m4, [%1+ 1*64]
+    mova                 m3, [%1+ 3*64]
+    mova                 m0, [%1+ 5*64]
+    mova                 m7, [%1+ 7*64]
+
+    pmulhrsw            m11,  m4, [pw_16364x2] ;t31
+    pmulhrsw             m4, [pw_804x2] ;t16
+    pmulhrsw             m8,  m7, [pw_m5520x2] ;t19
+    pmulhrsw             m7, [pw_15426x2] ;t28
+    pmulhrsw            m15,  m0, [pw_15893x2] ;t27
+    pmulhrsw             m0, [pw_3981x2] ;t20
+    pmulhrsw            m12,  m3, [pw_m2404x2] ;t23
+    pmulhrsw             m3, [pw_16207x2] ;t24
+
+    ; m4=t16/17, m8=t18/19, m0=t20/21, m12=t22/23,
+    ; m3=t24/25, m15=t26/27, m7=t28/29, m11=t30/31
+
+    VP9_UNPACK_MULSUB_2W_4X   5, 10, 11,  4, 16069,  3196, [pd_8192], 6,  9 ; t17, t30
+    VP9_UNPACK_MULSUB_2W_4X   9,  6,  7,  8, 3196, m16069, [pd_8192], 1, 14 ; t18, t29
+    ; from 1 stage forward
+    SUMSUB_BA                 w,  8,  4,  1
+    ; temporary storage
+    mova    [tmpq+17*%%str], m8             ; t16
+    mova    [tmpq+21*%%str], m4             ; t19
+    VP9_UNPACK_MULSUB_2W_4X   1, 14, 15,  0,  9102, 13623, [pd_8192], 4,  8 ; t21, t26
+    VP9_UNPACK_MULSUB_2W_4X  13,  2,  3, 12, 13623, m9102, [pd_8192], 4,  8 ; t22, t25
+
+    ; m4=t16, m5=t17, m9=t18, m8=t19, m0=t20, m1=t21, m13=t22, m12=t23,
+    ; m3=t24, m2=t25, m14=t26, m15=t27, m7=t28, m6=t29, m10=t30, m11=t31
+%else
+    mova                m10, [%1+ 1*64]
+    mova                m13, [%1+ 3*64]
+    mova                m14, [%1+ 5*64]
+    mova                 m9, [%1+ 7*64]
+    mova                 m8, [%1+ 9*64]
+    mova                m15, [%1+11*64]
+    mova                m12, [%1+13*64]
+    mova                m11, [%1+15*64]
+%if %3 <= 16
+    pmulhrsw             m5, m10, [pw_16364x2]
+    pmulhrsw            m10, [pw_804x2]
+    pmulhrsw             m4, m11, [pw_m11003x2]
+    pmulhrsw            m11, [pw_12140x2]
+    pmulhrsw             m7,  m8, [pw_14811x2]
+    pmulhrsw             m8, [pw_7005x2]
+    pmulhrsw             m6,  m9, [pw_m5520x2]
+    pmulhrsw             m9, [pw_15426x2]
+    pmulhrsw             m1, m14, [pw_15893x2]
+    pmulhrsw            m14, [pw_3981x2]
+    pmulhrsw             m0, m15, [pw_m8423x2]
+    pmulhrsw            m15, [pw_14053x2]
+%else
+    mova                 m4, [%1+17*64]
+    mova                 m0, [%1+21*64]
+    mova                 m7, [%1+23*64]
+    mova                 m6, [%1+25*64]
+    mova                 m1, [%1+27*64]
+    mova                 m5, [%1+31*64]
+
+    ; m10=in1, m4=in17, m8=in9, m6=in25, m14=in5, m0=in21, m12=in13, m2=in29,
+    ; m13=in3, m3=in19, m15=in11, m1=in27, m9=in7, m7=in23, m11=in15, m5=in31
+
+    VP9_UNPACK_MULSUB_2W_4X  10,  5, 16364,   804, [pd_8192], 2, 3 ; t16, t31
+    VP9_UNPACK_MULSUB_2W_4X   4, 11, 11003, 12140, [pd_8192], 2, 3 ; t17, t30
+    VP9_UNPACK_MULSUB_2W_4X   8,  7, 14811,  7005, [pd_8192], 2, 3 ; t18, t29
+    VP9_UNPACK_MULSUB_2W_4X   6,  9,  5520, 15426, [pd_8192], 2, 3 ; t19, t28
+    VP9_UNPACK_MULSUB_2W_4X  14,  1, 15893,  3981, [pd_8192], 2, 3 ; t20, t27
+    VP9_UNPACK_MULSUB_2W_4X   0, 15,  8423, 14053, [pd_8192], 2, 3 ; t21, t26
+%endif
+
+    ; from 1 stage forward
+    SUMSUB_BA             w,  4, 10,  2
+    SUMSUB_BA             w,  8,  6,  2
+    ; from 2 stages forward
+    SUMSUB_BA             w,  8,  4,  2
+    ; temporary storage
+    mova    [tmpq+17*%%str], m8             ; t16
+    mova    [tmpq+21*%%str], m4             ; t19
+%if %3 <= 16
+    pmulhrsw             m3, m12, [pw_13160x2]
+    pmulhrsw            m12, [pw_9760x2]
+    pmulhrsw             m2, m13, [pw_m2404x2]
+    pmulhrsw            m13, [pw_16207x2]
+%else
+    mova                 m2, [%1+29*64]
+    mova                 m3, [%1+19*64]
+    VP9_UNPACK_MULSUB_2W_4X  12,  3, 13160,  9760, [pd_8192], 4, 8 ; t22, t25
+    VP9_UNPACK_MULSUB_2W_4X   2, 13,  2404, 16207, [pd_8192], 4, 8 ; t23, t24
+%endif
+
+    ; m10=t16, m4=t17, m8=t18, m6=t19, m14=t20, m0=t21, m12=t22, m2=t23,
+    ; m13=t24, m3=t25, m15=t26, m1=t27, m9=t28, m7=t29, m11=t30, m5=t31
+
+    SUMSUB_BA             w,  0, 14,  4
+    SUMSUB_BA             w, 12,  2,  4
+    SUMSUB_BA             w,  3, 13,  4
+    SUMSUB_BA             w, 15,  1,  4
+    SUMSUB_BA             w,  7,  9,  4
+    SUMSUB_BA             w, 11,  5,  4
+
+    ; m4=t16, m10=t17, m6=t18, m8=t19, m0=t20, m14=t21, m2=t22, m12=t23,
+    ; m3=t24, m13=t25, m1=t26, m15=t27, m7=t28, m9=t29, m5=t30, m11=t31
+
+    VP9_UNPACK_MULSUB_2W_4X   5, 10, 16069,  3196, [pd_8192], 4, 8 ; t17, t30
+    VP9_UNPACK_MULSUB_2W_4X   9,  6, 3196, m16069, [pd_8192], 4, 8 ; t18, t29
+    VP9_UNPACK_MULSUB_2W_4X   1, 14,  9102, 13623, [pd_8192], 4, 8 ; t21, t26
+    VP9_UNPACK_MULSUB_2W_4X  13,  2, 13623, m9102, [pd_8192], 4, 8 ; t22, t25
+%endif
+
+    ; m4=t16, m5=t17, m9=t18, m8=t19, m0=t20, m1=t21, m13=t22, m12=t23,
+    ; m3=t24, m2=t25, m14=t26, m15=t27, m7=t28, m6=t29, m10=t30, m11=t31
+
+    SUMSUB_BA             w,  9,  5,  4
+    SUMSUB_BA             w,  1, 13,  4
+    SUMSUB_BA             w,  0, 12,  4
+    SUMSUB_BA             w, 15,  3,  4
+    SUMSUB_BA             w, 14,  2,  4
+    SUMSUB_BA             w,  6, 10,  4
+    SUMSUB_BA             w,  7, 11,  4
+
+    ; m8[s]=t16, m9=t17, m5=t18, m4[s]=t19, m12=t20, m13=t21, m1=t22, m0=t23,
+    ; m15=t24, m14=t25, m2=t26, m3=t27, m11=t28, m10=t29, m6=t30, m7=t31
+
+    mova                 m8, [tmpq+17*%%str] ; t16
+    ; from 2 stages forward
+    SUMSUB_BA             w,  0,  8,  4
+    SUMSUB_BA             w, 15,  7,  4
+    ; from 3 stages forward
+    SUMSUB_BA             w,  8,  7,  4
+    pmulhrsw             m7, [pw_11585x2]
+    pmulhrsw             m8, [pw_11585x2]
+    ; store t16/t23
+    mova    [tmpq+ 1*%%str], m0     ; t16
+    mova    [tmpq+29*%%str], m7     ; t23
+
+    mova                 m4, [tmpq+21*%%str] ; t19
+    VP9_UNPACK_MULSUB_2W_4X  10,  5, 15137,  6270, [pd_8192], 0, 7 ; t18, t29
+    VP9_UNPACK_MULSUB_2W_4X  11,  4, 15137,  6270, [pd_8192], 0, 7 ; t19, t28
+    VP9_UNPACK_MULSUB_2W_4X   3, 12, 6270, m15137, [pd_8192], 0, 7 ; t20, t27
+    VP9_UNPACK_MULSUB_2W_4X   2, 13, 6270, m15137, [pd_8192], 0, 7 ; t21, t26
+
+    ; m8=t16, m9=t17, m10=t18, m11=t19, m3=t20, m2=t21, m1=t22, m0=t23,
+    ; m15=t24, m14=t25, m13=t26, m12=t27, m4=t28, m5=t29, m6=t30, m7=t31
+
+    SUMSUB_BA             w,  1,  9,  0
+    SUMSUB_BA             w,  2, 10,  0
+    SUMSUB_BA             w,  3, 11,  0
+    SUMSUB_BA             w, 12,  4,  0
+    SUMSUB_BA             w, 13,  5,  0
+    SUMSUB_BA             w, 14,  6,  0
+
+    ; m0=t16, m1=t17, m2=t18, m3=t19, m11=t20, m10=t21, m9=t22, m8=t23,
+    ; m7=t24, m6=t25, m5=t26, m4=t27, m12=t28, m13=t29, m14=t30, m15=t31
+
+    SUMSUB_BA             w,  9,  6,  0
+    SUMSUB_BA             w, 10,  5,  0
+    SUMSUB_BA             w, 11,  4,  0
+
+    pmulhrsw             m6, [pw_11585x2]
+    pmulhrsw             m9, [pw_11585x2]
+    pmulhrsw             m5, [pw_11585x2]
+    pmulhrsw            m10, [pw_11585x2]
+    pmulhrsw             m4, [pw_11585x2]
+    pmulhrsw            m11, [pw_11585x2]
+
+    ; m0=t16, m1=t17, m2=t18, m3=t19, m4=t20, m5=t21, m6=t22, m7=t23,
+    ; m8=t24, m9=t25, m10=t26, m11=t27, m12=t28, m13=t29, m14=t30, m15=t31
+
+    ; store t17-19 (and t20-22 for pass 1) - keep t24-31 in registers for
+    ; final sumsub in pass 1, or keep t20-22 and t24-31 in registers for
+    ; final sumsub of pass 2
+    mova    [tmpq+ 5*%%str], m1     ; t17
+    mova    [tmpq+ 9*%%str], m2     ; t18
+    mova    [tmpq+13*%%str], m3     ; t19
+
+    ; then do final pass to sumsub+store the two halves
+%if %2 == 1
+    mova    [tmpq+17*%%str], m4     ; t20
+    mova    [tmpq+21*%%str], m5     ; t21
+    mova    [tmpq+25*%%str], m6     ; t22
+
+    mova                 m0, [tmpq+ 0*%%str] ; t0
+    mova                 m1, [tmpq+ 4*%%str] ; t1
+    mova                 m2, [tmpq+ 8*%%str] ; t2
+    mova                 m3, [tmpq+12*%%str] ; t3
+    mova                 m4, [tmpq+16*%%str] ; t4
+    mova                 m5, [tmpq+20*%%str] ; t5
+    mova                 m6, [tmpq+24*%%str] ; t6
+
+    SUMSUB_BA             w, 15,  0, 7
+    mova    [tmpq+ 3*%%str], m0              ; t15
+    mova                 m7, [tmpq+28*%%str] ; t7
+    SUMSUB_BA             w, 14,  1, 0
+    SUMSUB_BA             w, 13,  2, 0
+    SUMSUB_BA             w, 12,  3, 0
+    SUMSUB_BA             w, 11,  4, 0
+    SUMSUB_BA             w, 10,  5, 0
+    SUMSUB_BA             w,  9,  6, 0
+    SUMSUB_BA             w,  8,  7, 0
+
+    TRANSPOSE8x8W        15, 14, 13, 12, 11, 10, 9, 8, 0
+    mova    [tmpq+ 0*%%str], m15
+    mova    [tmpq+ 4*%%str], m14
+    mova    [tmpq+ 8*%%str], m13
+    mova    [tmpq+12*%%str], m12
+    mova    [tmpq+16*%%str], m11
+    mova    [tmpq+20*%%str], m10
+    mova    [tmpq+24*%%str], m9
+    mova    [tmpq+28*%%str], m8
+
+    mova                  m0, [tmpq+ 3*%%str] ; t15
+    TRANSPOSE8x8W          7, 6, 5, 4, 3, 2, 1, 0, 8
+    mova    [tmpq+ 3*%%str], m7
+    mova    [tmpq+ 7*%%str], m6
+    mova    [tmpq+11*%%str], m5
+    mova    [tmpq+15*%%str], m4
+    mova    [tmpq+19*%%str], m3
+    mova    [tmpq+23*%%str], m2
+    mova    [tmpq+27*%%str], m1
+    mova    [tmpq+31*%%str], m0
+
+    mova                m15, [tmpq+ 2*%%str] ; t8
+    mova                m14, [tmpq+ 6*%%str] ; t9
+    mova                m13, [tmpq+10*%%str] ; t10
+    mova                m12, [tmpq+14*%%str] ; t11
+    mova                m11, [tmpq+18*%%str] ; t12
+    mova                m10, [tmpq+22*%%str] ; t13
+    mova                 m9, [tmpq+26*%%str] ; t14
+    mova                 m8, [tmpq+30*%%str] ; t15
+    mova                 m7, [tmpq+ 1*%%str] ; t16
+    mova                 m6, [tmpq+ 5*%%str] ; t17
+    mova                 m5, [tmpq+ 9*%%str] ; t18
+    mova                 m4, [tmpq+13*%%str] ; t19
+    mova                 m3, [tmpq+17*%%str] ; t20
+    mova                 m2, [tmpq+21*%%str] ; t21
+    mova                 m1, [tmpq+25*%%str] ; t22
+
+    SUMSUB_BA             w,  7,  8, 0
+    mova    [tmpq+ 2*%%str], m8
+    mova                 m0, [tmpq+29*%%str] ; t23
+    SUMSUB_BA             w,  6,  9, 8
+    SUMSUB_BA             w,  5, 10, 8
+    SUMSUB_BA             w,  4, 11, 8
+    SUMSUB_BA             w,  3, 12, 8
+    SUMSUB_BA             w,  2, 13, 8
+    SUMSUB_BA             w,  1, 14, 8
+    SUMSUB_BA             w,  0, 15, 8
+
+    TRANSPOSE8x8W         0, 1, 2, 3, 4, 5, 6, 7, 8
+    mova    [tmpq+ 1*%%str], m0
+    mova    [tmpq+ 5*%%str], m1
+    mova    [tmpq+ 9*%%str], m2
+    mova    [tmpq+13*%%str], m3
+    mova    [tmpq+17*%%str], m4
+    mova    [tmpq+21*%%str], m5
+    mova    [tmpq+25*%%str], m6
+    mova    [tmpq+29*%%str], m7
+
+    mova                 m8, [tmpq+ 2*%%str]
+    TRANSPOSE8x8W         8, 9, 10, 11, 12, 13, 14, 15, 0
+    mova    [tmpq+ 2*%%str], m8
+    mova    [tmpq+ 6*%%str], m9
+    mova    [tmpq+10*%%str], m10
+    mova    [tmpq+14*%%str], m11
+    mova    [tmpq+18*%%str], m12
+    mova    [tmpq+22*%%str], m13
+    mova    [tmpq+26*%%str], m14
+    mova    [tmpq+30*%%str], m15
+%else
+    ; t0-7 is in [tmpq+{0,4,8,12,16,20,24,28}*%%str]
+    ; t8-15 is in [tmpq+{2,6,10,14,18,22,26,30}*%%str]
+    ; t16-19 and t23 is in [tmpq+{1,5,9,13,29}*%%str]
+    ; t20-22 is in m4-6
+    ; t24-31 is in m8-15
+    pxor                m7, m7
+
+%macro %%STORE_2X2 7-8 1 ; src[1-4], tmp[1-2], zero, inc_dst_ptrs
+    SUMSUB_BA            w, %4, %1, %5
+    SUMSUB_BA            w, %3, %2, %5
+    pmulhrsw           m%4, [pw_512]
+    pmulhrsw           m%3, [pw_512]
+    VP9_STORE_2X        %4, %3, %5, %6, %7
+%if %8 == 1
+    add               dstq, stride2q
+%endif
+    pmulhrsw           m%2, [pw_512]
+    pmulhrsw           m%1, [pw_512]
+    VP9_STORE_2X        %2, %1, %5, %6, %7, dst_endq
+%if %8 == 1
+    sub           dst_endq, stride2q
+%endif
+%endmacro
+
+    ; store t0-1 and t30-31
+    mova                m0, [tmpq+ 0*%%str]
+    mova                m1, [tmpq+ 4*%%str]
+    %%STORE_2X2          0,  1, 14, 15, 2, 3, 7
+
+    ; store t2-3 and t28-29
+    mova                m0, [tmpq+ 8*%%str]
+    mova                m1, [tmpq+12*%%str]
+    %%STORE_2X2          0,  1, 12, 13, 2, 3, 7
+
+    ; store t4-5 and t26-27
+    mova                m0, [tmpq+16*%%str]
+    mova                m1, [tmpq+20*%%str]
+    %%STORE_2X2          0,  1, 10, 11, 2, 3, 7
+
+    ; store t6-7 and t24-25
+    mova                m0, [tmpq+24*%%str]
+    mova                m1, [tmpq+28*%%str]
+    %%STORE_2X2          0,  1,  8,  9, 2, 3, 7
+
+    ; store t8-9 and t22-23
+    mova                m0, [tmpq+ 2*%%str]
+    mova                m1, [tmpq+ 6*%%str]
+    mova                m8, [tmpq+29*%%str]
+    %%STORE_2X2          0,  1,  6,  8, 2, 3, 7
+
+    ; store t10-11 and t20-21
+    mova                m0, [tmpq+10*%%str]
+    mova                m1, [tmpq+14*%%str]
+    %%STORE_2X2          0,  1,  4,  5, 2, 3, 7
+
+    ; store t12-13 and t18-19
+    mova                m0, [tmpq+18*%%str]
+    mova                m1, [tmpq+22*%%str]
+    mova                m5, [tmpq+13*%%str]
+    mova                m4, [tmpq+ 9*%%str]
+    %%STORE_2X2          0,  1,  4,  5, 2, 3, 7
+
+    ; store t14-17
+    mova                m0, [tmpq+26*%%str]
+    mova                m1, [tmpq+30*%%str]
+    mova                m5, [tmpq+ 5*%%str]
+    mova                m4, [tmpq+ 1*%%str]
+    %%STORE_2X2          0,  1,  4,  5, 2, 3, 7, 0
+%endif
+%endmacro
+
+%macro VP9_IDCT_IDCT_32x32_ADD_XMM 1
+INIT_XMM %1
+cglobal vp9_idct_idct_32x32_add, 4, 9, 16, 2048, dst, stride, block, eob
+    cmp eobd, 135
+    jg .idctfull
+    cmp eobd, 34
+    jg .idct16x16
+    cmp eobd, 1
+    jg .idct8x8
+
+    ; dc-only case
+    movd                m0, [blockq]
+    mova                m1, [pw_11585x2]
+    pmulhrsw            m0, m1
+    pmulhrsw            m0, m1
+    SPLATW              m0, m0, q0000
+    pmulhrsw            m0, [pw_512]
+    pxor                m5, m5
+    movd          [blockq], m5
+    DEFINE_ARGS        dst, stride, block, cnt
+%rep 31
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5, mmsize
+    add               dstq, strideq
+%endrep
+    VP9_STORE_2XFULL    0, 1, 2, 3, 4, 5, mmsize
+    RET
+
+    DEFINE_ARGS dst_bak, stride, block, cnt, dst, stride30, dst_end, stride2, tmp
+.idct8x8:
+    mov               tmpq, rsp
+    VP9_IDCT32_1D   blockq, 1, 8
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    sub          stride30q, stride2q        ; stride*30
+.loop2_8x8:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dst_bakq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2, 8
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_8x8
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 64,  8, m7
+    RET
+
+.idct16x16:
+    mov               cntd, 2
+    mov               tmpq, rsp
+.loop1_16x16:
+    VP9_IDCT32_1D   blockq, 1, 16
+    add             blockq, 16
+    add               tmpq, 512
+    dec               cntd
+    jg .loop1_16x16
+    sub             blockq, 32
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    mov               tmpq, rsp
+    sub          stride30q, stride2q        ; stride*30
+.loop2_16x16:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dst_bakq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2, 16
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_16x16
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 64, 16, m7
+    RET
+
+.idctfull:
+    mov               cntd, 4
+    mov               tmpq, rsp
+.loop1_full:
+    VP9_IDCT32_1D   blockq, 1
+    add             blockq, 16
+    add               tmpq, 512
+    dec               cntd
+    jg .loop1_full
+    sub             blockq, 64
+
+    mov          stride30q, strideq         ; stride
+    lea           stride2q, [strideq*2]     ; stride*2
+    shl          stride30q, 5               ; stride*32
+    mov               cntd, 4
+    mov               tmpq, rsp
+    sub          stride30q, stride2q        ; stride*30
+.loop2_full:
+    mov               dstq, dst_bakq
+    lea           dst_endq, [dst_bakq+stride30q]
+    VP9_IDCT32_1D     tmpq, 2
+    add           dst_bakq, 8
+    add               tmpq, 16
+    dec               cntd
+    jg .loop2_full
+
+    ; at the end of the loop, m7 should still be zero
+    ; use that to zero out block coefficients
+    ZERO_BLOCK      blockq, 64, 32, m7
+    RET
+%endmacro
+
+VP9_IDCT_IDCT_32x32_ADD_XMM ssse3
+VP9_IDCT_IDCT_32x32_ADD_XMM avx
+
+%endif ; x86-64
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
new file mode 100644
index 0000000..def7d5a
--- /dev/null
+++ b/libavcodec/x86/vp9lpf.asm
@@ -0,0 +1,816 @@
+;******************************************************************************
+;* VP9 loop filter SIMD optimizations
+;*
+;* Copyright (C) 2013-2014 Clément Bœsch <u pkh me>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%if ARCH_X86_64
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+cextern pb_3
+cextern pb_80
+
+pb_4:   times 16 db 0x04
+pb_10:  times 16 db 0x10
+pb_40:  times 16 db 0x40
+pb_81:  times 16 db 0x81
+pb_f8:  times 16 db 0xf8
+pb_fe:  times 16 db 0xfe
+
+pw_4:   times  8 dw 4
+pw_8:   times  8 dw 8
+
+; with mix functions, two 8-bit thresholds are stored in a 16-bit storage,
+; the following mask is used to splat both in the same register
+mask_mix: times 8 db 0
+          times 8 db 1
+
+mask_mix84: times 8 db 0xff
+            times 8 db 0x00
+mask_mix48: times 8 db 0x00
+            times 8 db 0xff
+
+SECTION .text
+
+; %1 = abs(%2-%3)
+%macro ABSSUB 4 ; dst, src1 (RO), src2 (RO), tmp
+    psubusb             %1, %3, %2
+    psubusb             %4, %2, %3
+    por                 %1, %4
+%endmacro
+
+; %1 = %1<=%2
+%macro CMP_LTE 3-4 ; src/dst, cmp, tmp, pb_80
+%if %0 == 4
+    pxor                %1, %4
+%endif
+    pcmpgtb             %3, %2, %1          ; cmp > src?
+    pcmpeqb             %1, %2              ; cmp == src? XXX: avoid this with a -1/+1 well placed?
+    por                 %1, %3              ; cmp >= src?
+%endmacro
+
+; %1 = abs(%2-%3) <= %4
+%macro ABSSUB_CMP 6-7 [pb_80]; dst, src1, src2, cmp, tmp1, tmp2, [pb_80]
+    ABSSUB              %1, %2, %3, %6      ; dst = abs(src1-src2)
+    CMP_LTE             %1, %4, %6, %7      ; dst <= cmp
+%endmacro
+
+%macro MASK_APPLY 4 ; %1=new_data/dst %2=old_data %3=mask %4=tmp
+    pand                %1, %3              ; new &= mask
+    pandn               %4, %3, %2          ; tmp = ~mask & old
+    por                 %1, %4              ; new&mask | old&~mask
+%endmacro
+
+%macro FILTER_SUBx2_ADDx2 8 ; %1=dst %2=h/l %3=cache %4=sub1 %5=sub2 %6=add1 %7=add2 %8=rshift
+    punpck%2bw          %3, %4, m0
+    psubw               %1, %3
+    punpck%2bw          %3, %5, m0
+    psubw               %1, %3
+    punpck%2bw          %3, %6, m0
+    paddw               %1, %3
+    punpck%2bw          %3, %7, m0
+    paddw               %3, %1
+    psraw               %1, %3, %8
+%endmacro
+
+%macro FILTER_INIT 8 ; tmp1, tmp2, cacheL, cacheH, dstp, filterid, mask, source
+    FILTER%6_INIT       %1, l, %3
+    FILTER%6_INIT       %2, h, %4
+    packuswb            %1, %2
+    MASK_APPLY          %1, %8, %7, %2
+    mova                %5, %1
+%endmacro
+
+%macro FILTER_UPDATE 11-14 ; tmp1, tmp2, cacheL, cacheH, dstp, -, -, +, +, rshift, mask, [source], [preload reg + value]
+%if %0 == 13 ; no source + preload
+    mova                %12, %13
+%elif %0 == 14 ; source + preload
+    mova                %13, %14
+%endif
+    FILTER_SUBx2_ADDx2  %1, l, %3, %6, %7, %8, %9, %10
+    FILTER_SUBx2_ADDx2  %2, h, %4, %6, %7, %8, %9, %10
+    packuswb            %1, %2
+%if %0 == 12 || %0 == 14
+    MASK_APPLY          %1, %12, %11, %2
+%else
+    MASK_APPLY          %1, %5, %11, %2
+%endif
+    mova                %5, %1
+%endmacro
+
+%macro SRSHIFT3B_2X 4 ; reg1, reg2, [pb_10], tmp
+    mova                %4, [pb_f8]
+    pand                %1, %4
+    pand                %2, %4
+    psrlq               %1, 3
+    psrlq               %2, 3
+    pxor                %1, %3
+    pxor                %2, %3
+    psubb               %1, %3
+    psubb               %2, %3
+%endmacro
+
+%macro EXTRACT_POS_NEG 3 ; i8, neg, pos
+    pxor                %3, %3
+    pxor                %2, %2
+    pcmpgtb             %3, %1                          ; i8 < 0 mask
+    psubb               %2, %1                          ; neg values (only the originally - will be kept)
+    pand                %2, %3                          ; negative values of i8 (but stored as +)
+    pandn               %3, %1                          ; positive values of i8
+%endmacro
+
+; clip_u8(u8 + i8)
+%macro SIGN_ADD 5 ; dst, u8, i8, tmp1, tmp2
+    EXTRACT_POS_NEG     %3, %4, %5
+    psubusb             %1, %2, %4                      ; sub the negatives
+    paddusb             %1, %5                          ; add the positives
+%endmacro
+
+; clip_u8(u8 - i8)
+%macro SIGN_SUB 5 ; dst, u8, i8, tmp1, tmp2
+    EXTRACT_POS_NEG     %3, %4, %5
+    psubusb             %1, %2, %5                      ; sub the positives
+    paddusb             %1, %4                          ; add the negatives
+%endmacro
+
+%macro FILTER6_INIT 3 ; %1=dst %2=h/l %3=cache
+    punpck%2bw          %1, m14, m0                     ; p3: B->W
+    paddw               %3, %1, %1                      ; p3*2
+    paddw               %3, %1                          ; p3*3
+    punpck%2bw          %1, m15, m0                     ; p2: B->W
+    paddw               %3, %1                          ; p3*3 + p2
+    paddw               %3, %1                          ; p3*3 + p2*2
+    punpck%2bw          %1, m10, m0                     ; p1: B->W
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1
+    punpck%2bw          %1, m11, m0                     ; p0: B->W
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1 + p0
+    punpck%2bw          %1, m12, m0                     ; q0: B->W
+    paddw               %3, %1                          ; p3*3 + p2*2 + p1 + p0 + q0
+    paddw               %3, [pw_4]                      ; p3*3 + p2*2 + p1 + p0 + q0 + 4
+    psraw               %1, %3, 3                       ; (p3*3 + p2*2 + p1 + p0 + q0 + 4) >> 3
+%endmacro
+
+%macro FILTER14_INIT 3 ; %1=dst %2=h/l %3=cache
+    punpck%2bw          %1, m2, m0                      ; p7: B->W
+    psllw               %3, %1, 3                       ; p7*8
+    psubw               %3, %1                          ; p7*7
+    punpck%2bw          %1, m3, m0                      ; p6: B->W
+    paddw               %3, %1                          ; p7*7 + p6
+    paddw               %3, %1                          ; p7*7 + p6*2
+    punpck%2bw          %1, m8, m0                      ; p5: B->W
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5
+    punpck%2bw          %1, m9, m0                      ; p4: B->W
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5 + p4
+    punpck%2bw          %1, m14, m0                     ; p3: B->W
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5 + p4 + p3
+    punpck%2bw          %1, m15, m0                     ; p2: B->W
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5 + .. + p2
+    punpck%2bw          %1, m10, m0                     ; p1: B->W
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5 + .. + p1
+    punpck%2bw          %1, m11, m0                     ; p0: B->W
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5 + .. + p0
+    punpck%2bw          %1, m12, m0                     ; q0: B->W
+    paddw               %3, %1                          ; p7*7 + p6*2 + p5 + .. + p0 + q0
+    paddw               %3, [pw_8]                      ; p7*7 + p6*2 + p5 + .. + p0 + q0 + 8
+    psraw               %1, %3, 4                       ; (p7*7 + p6*2 + p5 + .. + p0 + q0 + 8) >> 4
+%endmacro
+
+%macro TRANSPOSE16x16B 17
+    mova %17, m%16
+    SBUTTERFLY bw,  %1,  %2,  %16
+    SBUTTERFLY bw,  %3,  %4,  %16
+    SBUTTERFLY bw,  %5,  %6,  %16
+    SBUTTERFLY bw,  %7,  %8,  %16
+    SBUTTERFLY bw,  %9,  %10, %16
+    SBUTTERFLY bw,  %11, %12, %16
+    SBUTTERFLY bw,  %13, %14, %16
+    mova m%16,  %17
+    mova  %17, m%14
+    SBUTTERFLY bw,  %15, %16, %14
+    SBUTTERFLY wd,  %1,  %3,  %14
+    SBUTTERFLY wd,  %2,  %4,  %14
+    SBUTTERFLY wd,  %5,  %7,  %14
+    SBUTTERFLY wd,  %6,  %8,  %14
+    SBUTTERFLY wd,  %9,  %11, %14
+    SBUTTERFLY wd,  %10, %12, %14
+    SBUTTERFLY wd,  %13, %15, %14
+    mova m%14,  %17
+    mova  %17, m%12
+    SBUTTERFLY wd,  %14, %16, %12
+    SBUTTERFLY dq,  %1,  %5,  %12
+    SBUTTERFLY dq,  %2,  %6,  %12
+    SBUTTERFLY dq,  %3,  %7,  %12
+    SBUTTERFLY dq,  %4,  %8,  %12
+    SBUTTERFLY dq,  %9,  %13, %12
+    SBUTTERFLY dq,  %10, %14, %12
+    SBUTTERFLY dq,  %11, %15, %12
+    mova m%12, %17
+    mova  %17, m%8
+    SBUTTERFLY dq,  %12, %16, %8
+    SBUTTERFLY qdq, %1,  %9,  %8
+    SBUTTERFLY qdq, %2,  %10, %8
+    SBUTTERFLY qdq, %3,  %11, %8
+    SBUTTERFLY qdq, %4,  %12, %8
+    SBUTTERFLY qdq, %5,  %13, %8
+    SBUTTERFLY qdq, %6,  %14, %8
+    SBUTTERFLY qdq, %7,  %15, %8
+    mova m%8, %17
+    mova %17, m%1
+    SBUTTERFLY qdq, %8,  %16, %1
+    mova m%1, %17
+    SWAP %2,  %9
+    SWAP %3,  %5
+    SWAP %4,  %13
+    SWAP %6,  %11
+    SWAP %8,  %15
+    SWAP %12, %14
+%endmacro
+
+; transpose 16 half lines (high part) to 8 full centered lines
+%macro TRANSPOSE16x8B 16
+    punpcklbw   m%1,  m%2
+    punpcklbw   m%3,  m%4
+    punpcklbw   m%5,  m%6
+    punpcklbw   m%7,  m%8
+    punpcklbw   m%9,  m%10
+    punpcklbw   m%11, m%12
+    punpcklbw   m%13, m%14
+    punpcklbw   m%15, m%16
+    SBUTTERFLY  wd,  %1,  %3,  %2
+    SBUTTERFLY  wd,  %5,  %7,  %2
+    SBUTTERFLY  wd,  %9,  %11, %2
+    SBUTTERFLY  wd,  %13, %15, %2
+    SBUTTERFLY  dq,  %1,  %5,  %2
+    SBUTTERFLY  dq,  %3,  %7,  %2
+    SBUTTERFLY  dq,  %9,  %13, %2
+    SBUTTERFLY  dq,  %11, %15, %2
+    SBUTTERFLY  qdq, %1,  %9,  %2
+    SBUTTERFLY  qdq, %3,  %11, %2
+    SBUTTERFLY  qdq, %5,  %13, %2
+    SBUTTERFLY  qdq, %7,  %15, %2
+    SWAP %5, %1
+    SWAP %6, %9
+    SWAP %7, %1
+    SWAP %8, %13
+    SWAP %9, %3
+    SWAP %10, %11
+    SWAP %11, %1
+    SWAP %12, %15
+%endmacro
+
+%macro DEFINE_REAL_P7_TO_Q7 0-1 0
+%define P7 dst1q + 2*mstrideq  + %1
+%define P6 dst1q +   mstrideq  + %1
+%define P5 dst1q               + %1
+%define P4 dst1q +    strideq  + %1
+%define P3 dstq  + 4*mstrideq  + %1
+%define P2 dstq  +   mstride3q + %1
+%define P1 dstq  + 2*mstrideq  + %1
+%define P0 dstq  +   mstrideq  + %1
+%define Q0 dstq                + %1
+%define Q1 dstq  +   strideq   + %1
+%define Q2 dstq  + 2*strideq   + %1
+%define Q3 dstq  +   stride3q  + %1
+%define Q4 dstq  + 4*strideq   + %1
+%define Q5 dst2q + mstrideq    + %1
+%define Q6 dst2q               + %1
+%define Q7 dst2q +  strideq    + %1
+%endmacro
+
+; ..............AB -> AAAAAAAABBBBBBBB
+%macro SPLATB_MIX 1-2 [mask_mix]
+%if cpuflag(ssse3)
+    pshufb     %1, %2
+%else
+    punpcklbw  %1, %1
+    punpcklwd  %1, %1
+    punpckldq  %1, %1
+%endif
+%endmacro
+
+%macro LOOPFILTER 2 ; %1=v/h %2=size1
+    lea mstrideq, [strideq]
+    neg mstrideq
+
+    lea stride3q, [strideq+2*strideq]
+    mov mstride3q, stride3q
+    neg mstride3q
+
+%ifidn %1, h
+%if %2 > 16
+%define movx movh
+    lea dstq, [dstq + 8*strideq - 4]
+%else
+%define movx movu
+    lea dstq, [dstq + 8*strideq - 8] ; go from top center (h pos) to center left (v pos)
+%endif
+%endif
+
+    lea dst1q, [dstq + 2*mstride3q]                         ; dst1q = &dst[stride * -6]
+    lea dst2q, [dstq + 2* stride3q]                         ; dst2q = &dst[stride * +6]
+
+    DEFINE_REAL_P7_TO_Q7
+
+%ifidn %1, h
+    movx                    m0, [P7]
+    movx                    m1, [P6]
+    movx                    m2, [P5]
+    movx                    m3, [P4]
+    movx                    m4, [P3]
+    movx                    m5, [P2]
+    movx                    m6, [P1]
+    movx                    m7, [P0]
+    movx                    m8, [Q0]
+    movx                    m9, [Q1]
+    movx                   m10, [Q2]
+    movx                   m11, [Q3]
+    movx                   m12, [Q4]
+    movx                   m13, [Q5]
+    movx                   m14, [Q6]
+    movx                   m15, [Q7]
+%define P7 rsp +   0
+%define P6 rsp +  16
+%define P5 rsp +  32
+%define P4 rsp +  48
+%define P3 rsp +  64
+%define P2 rsp +  80
+%define P1 rsp +  96
+%define P0 rsp + 112
+%define Q0 rsp + 128
+%define Q1 rsp + 144
+%define Q2 rsp + 160
+%define Q3 rsp + 176
+%define Q4 rsp + 192
+%define Q5 rsp + 208
+%define Q6 rsp + 224
+%define Q7 rsp + 240
+
+%if %2 == 16
+    TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
+    mova           [P7],  m0
+    mova           [P6],  m1
+    mova           [P5],  m2
+    mova           [P4],  m3
+%else
+    TRANSPOSE16x8B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+%endif
+    mova           [P3],  m4
+    mova           [P2],  m5
+    mova           [P1],  m6
+    mova           [P0],  m7
+    mova           [Q0],  m8
+    mova           [Q1],  m9
+    mova           [Q2], m10
+    mova           [Q3], m11
+%if %2 == 16
+    mova           [Q4], m12
+    mova           [Q5], m13
+    mova           [Q6], m14
+    mova           [Q7], m15
+%endif
+%endif
+
+    ; calc fm mask
+%if %2 == 16
+%if cpuflag(ssse3)
+    pxor                m0, m0
+%endif
+    SPLATB_REG          m2, I, m0                       ; I I I I ...
+    SPLATB_REG          m3, E, m0                       ; E E E E ...
+%else
+%if cpuflag(ssse3)
+    mova                m0, [mask_mix]
+%endif
+    movd                m2, Id
+    movd                m3, Ed
+    SPLATB_MIX          m2, m0
+    SPLATB_MIX          m3, m0
+%endif
+    mova                m0, [pb_80]
+    pxor                m2, m0
+    pxor                m3, m0
+%ifidn %1, v
+    mova                m8, [P3]
+    mova                m9, [P2]
+    mova               m10, [P1]
+    mova               m11, [P0]
+    mova               m12, [Q0]
+    mova               m13, [Q1]
+    mova               m14, [Q2]
+    mova               m15, [Q3]
+%else
+    ; In case of horizontal, P3..Q3 are already present in some registers due
+    ; to the previous transpose, so we just swap registers.
+    SWAP                 8,  4, 12
+    SWAP                 9,  5, 13
+    SWAP                10,  6, 14
+    SWAP                11,  7, 15
+%endif
+    ABSSUB_CMP          m5,  m8,  m9, m2, m6, m7, m0    ; m5 = abs(p3-p2) <= I
+    ABSSUB_CMP          m1,  m9, m10, m2, m6, m7, m0    ; m1 = abs(p2-p1) <= I
+    pand                m5, m1
+    ABSSUB_CMP          m1, m10, m11, m2, m6, m7, m0    ; m1 = abs(p1-p0) <= I
+    pand                m5, m1
+    ABSSUB_CMP          m1, m12, m13, m2, m6, m7, m0    ; m1 = abs(q1-q0) <= I
+    pand                m5, m1
+    ABSSUB_CMP          m1, m13, m14, m2, m6, m7, m0    ; m1 = abs(q2-q1) <= I
+    pand                m5, m1
+    ABSSUB_CMP          m1, m14, m15, m2, m6, m7, m0    ; m1 = abs(q3-q2) <= I
+    pand                m5, m1
+    ABSSUB              m1, m11, m12, m7                ; abs(p0-q0)
+    paddusb             m1, m1                          ; abs(p0-q0) * 2
+    ABSSUB              m2, m10, m13, m7                ; abs(p1-q1)
+    pand                m2, [pb_fe]                     ; drop lsb so shift can work
+    psrlq               m2, 1                           ; abs(p1-q1)/2
+    paddusb             m1, m2                          ; abs(p0-q0)*2 + abs(p1-q1)/2
+    pxor                m1, m0
+    pcmpgtb             m4, m3, m1                      ; E > X?
+    pcmpeqb             m3, m1                          ; E == X?
+    por                 m3, m4                          ; E >= X?
+    pand                m3, m5                          ; fm final value
+
+    ; (m3: fm, m8..15: p3 p2 p1 p0 q0 q1 q2 q3)
+    ; calc flat8in (if not 44_16) and hev masks
+    mova                m6, [pb_81]                     ; [1 1 1 1 ...] ^ 0x80
+%if %2 != 44
+    ABSSUB_CMP          m2, m8, m11, m6, m4, m5         ; abs(p3 - p0) <= 1
+    mova                m8, [pb_80]
+    ABSSUB_CMP          m1, m9, m11, m6, m4, m5, m8     ; abs(p2 - p0) <= 1
+    pand                m2, m1
+    ABSSUB              m4, m10, m11, m5                ; abs(p1 - p0)
+%if %2 == 16
+%if cpuflag(ssse3)
+    pxor                m0, m0
+%endif
+    SPLATB_REG          m7, H, m0                       ; H H H H ...
+%else
+    movd                m7, Hd
+    SPLATB_MIX          m7
+%endif
+    pxor                m7, m8
+    pxor                m4, m8
+    pcmpgtb             m0, m4, m7                      ; abs(p1 - p0) > H (1/2 hev condition)
+    CMP_LTE             m4, m6, m5                      ; abs(p1 - p0) <= 1
+    pand                m2, m4                          ; (flat8in)
+    ABSSUB              m4, m13, m12, m1                ; abs(q1 - q0)
+    pxor                m4, m8
+    pcmpgtb             m5, m4, m7                      ; abs(q1 - q0) > H (2/2 hev condition)
+    por                 m0, m5                          ; hev final value
+    CMP_LTE             m4, m6, m5                      ; abs(q1 - q0) <= 1
+    pand                m2, m4                          ; (flat8in)
+    ABSSUB_CMP          m1, m14, m12, m6, m4, m5, m8    ; abs(q2 - q0) <= 1
+    pand                m2, m1
+    ABSSUB_CMP          m1, m15, m12, m6, m4, m5, m8    ; abs(q3 - q0) <= 1
+    pand                m2, m1                          ; flat8in final value
+%if %2 == 84 || %2 == 48
+    pand                m2, [mask_mix%2]
+%endif
+%else
+    mova                m6, [pb_80]
+    movd                m7, Hd
+    SPLATB_MIX          m7
+    pxor                m7, m6
+    ABSSUB              m4, m10, m11, m1                ; abs(p1 - p0)
+    pxor                m4, m6
+    pcmpgtb             m0, m4, m7                      ; abs(p1 - p0) > H (1/2 hev condition)
+    ABSSUB              m4, m13, m12, m1                ; abs(q1 - q0)
+    pxor                m4, m6
+    pcmpgtb             m5, m4, m7                      ; abs(q1 - q0) > H (2/2 hev condition)
+    por                 m0, m5                          ; hev final value
+%endif
+
+%if %2 == 16
+    ; (m0: hev, m2: flat8in, m3: fm, m6: pb_81, m9..15: p2 p1 p0 q0 q1 q2 q3)
+    ; calc flat8out mask
+    mova                m8, [P7]
+    mova                m9, [P6]
+    ABSSUB_CMP          m1, m8, m11, m6, m4, m5         ; abs(p7 - p0) <= 1
+    ABSSUB_CMP          m7, m9, m11, m6, m4, m5         ; abs(p6 - p0) <= 1
+    pand                m1, m7
+    mova                m8, [P5]
+    mova                m9, [P4]
+    ABSSUB_CMP          m7, m8, m11, m6, m4, m5         ; abs(p5 - p0) <= 1
+    pand                m1, m7
+    ABSSUB_CMP          m7, m9, m11, m6, m4, m5         ; abs(p4 - p0) <= 1
+    pand                m1, m7
+    mova                m14, [Q4]
+    mova                m15, [Q5]
+    ABSSUB_CMP          m7, m14, m12, m6, m4, m5        ; abs(q4 - q0) <= 1
+    pand                m1, m7
+    ABSSUB_CMP          m7, m15, m12, m6, m4, m5        ; abs(q5 - q0) <= 1
+    pand                m1, m7
+    mova                m14, [Q6]
+    mova                m15, [Q7]
+    ABSSUB_CMP          m7, m14, m12, m6, m4, m5        ; abs(q4 - q0) <= 1
+    pand                m1, m7
+    ABSSUB_CMP          m7, m15, m12, m6, m4, m5        ; abs(q5 - q0) <= 1
+    pand                m1, m7                          ; flat8out final value
+%endif
+
+    ; if (fm) {
+    ;     if (out && in) filter_14()
+    ;     else if (in)   filter_6()
+    ;     else if (hev)  filter_2()
+    ;     else           filter_4()
+    ; }
+    ;
+    ; f14:                                                                            fm &  out &  in
+    ; f6:  fm & ~f14 & in        => fm & ~(out & in) & in                          => fm & ~out &  in
+    ; f2:  fm & ~f14 & ~f6 & hev => fm & ~(out & in) & ~(~out & in) & hev          => fm &  ~in &  hev
+    ; f4:  fm & ~f14 & ~f6 & ~f2 => fm & ~(out & in) & ~(~out & in) & ~(~in & hev) => fm &  ~in & ~hev
+
+    ; (m0: hev, [m1: flat8out], [m2: flat8in], m3: fm, m8..15: p5 p4 p1 p0 q0 q1 q6 q7)
+    ; filter2()
+%if %2 != 44
+    mova                m6, [pb_80]                     ; already in m6 if 44_16
+%endif
+    pxor                m15, m12, m6                    ; q0 ^ 0x80
+    pxor                m14, m11, m6                    ; p0 ^ 0x80
+    psubsb              m15, m14                        ; (signed) q0 - p0
+    pxor                m4, m10, m6                     ; p1 ^ 0x80
+    pxor                m5, m13, m6                     ; q1 ^ 0x80
+    psubsb              m4, m5                          ; (signed) p1 - q1
+    paddsb              m4, m15                         ;   (q0 - p0) + (p1 - q1)
+    paddsb              m4, m15                         ; 2*(q0 - p0) + (p1 - q1)
+    paddsb              m4, m15                         ; 3*(q0 - p0) + (p1 - q1)
+    paddsb              m6, m4, [pb_4]                  ; m6: f1 = clip(f + 4, 127)
+    paddsb              m4, [pb_3]                      ; m4: f2 = clip(f + 3, 127)
+    mova                m14, [pb_10]                    ; will be reused in filter4()
+    SRSHIFT3B_2X        m6, m4, m14, m7                 ; f1 and f2 sign byte shift by 3
+    SIGN_SUB            m7, m12, m6, m5, m9             ; m7 = q0 - f1
+    SIGN_ADD            m8, m11, m4, m5, m9             ; m8 = p0 + f2
+%if %2 != 44
+    pandn               m6, m2, m3                      ;  ~mask(in) & mask(fm)
+    pand                m6, m0                          ; (~mask(in) & mask(fm)) & mask(hev)
+%else
+    pand                m6, m3, m0
+%endif
+    MASK_APPLY          m7, m12, m6, m5                 ; m7 = filter2(q0) & mask / we write it in filter4()
+    MASK_APPLY          m8, m11, m6, m5                 ; m8 = filter2(p0) & mask / we write it in filter4()
+
+    ; (m0: hev, [m1: flat8out], [m2: flat8in], m3: fm, m7..m8: q0' p0', m10..13: p1 p0 q0 q1, m14: pb_10, m15: q0-p0)
+    ; filter4()
+    mova                m4, m15
+    paddsb              m15, m4                         ; 2 * (q0 - p0)
+    paddsb              m15, m4                         ; 3 * (q0 - p0)
+    paddsb              m6, m15, [pb_4]                 ; m6:  f1 = clip(f + 4, 127)
+    paddsb              m15, [pb_3]                     ; m15: f2 = clip(f + 3, 127)
+    SRSHIFT3B_2X        m6, m15, m14, m9                ; f1 and f2 sign byte shift by 3
+%if %2 != 44
+%define p0tmp m7
+%define q0tmp m9
+    pandn               m5, m2, m3                      ;               ~mask(in) & mask(fm)
+    pandn               m0, m5                          ; ~mask(hev) & (~mask(in) & mask(fm))
+%else
+%define p0tmp m1
+%define q0tmp m2
+    pandn               m0, m3
+%endif
+    SIGN_SUB            q0tmp, m12, m6, m4, m14         ; q0 - f1
+    MASK_APPLY          q0tmp, m7, m0, m5               ; filter4(q0) & mask
+    mova                [Q0], q0tmp
+    SIGN_ADD            p0tmp, m11, m15, m4, m14        ; p0 + f2
+    MASK_APPLY          p0tmp, m8, m0, m5               ; filter4(p0) & mask
+    mova                [P0], p0tmp
+    paddb               m6, [pb_80]                     ;
+    pxor                m8, m8                          ;   f=(f1+1)>>1
+    pavgb               m6, m8                          ;
+    psubb               m6, [pb_40]                     ;
+    SIGN_ADD            m7, m10, m6, m8, m9             ; p1 + f
+    SIGN_SUB            m4, m13, m6, m8, m9             ; q1 - f
+    MASK_APPLY          m7, m10, m0, m14                ; m7 = filter4(p1)
+    MASK_APPLY          m4, m13, m0, m14                ; m4 = filter4(q1)
+    mova                [P1], m7
+    mova                [Q1], m4
+
+    ; ([m1: flat8out], m2: flat8in, m3: fm, m10..13: p1 p0 q0 q1)
+    ; filter6()
+%if %2 != 44
+    pxor                m0, m0
+%if %2 > 16
+    pand                m3, m2
+%else
+    pand                m2, m3                          ;               mask(fm) & mask(in)
+    pandn               m3, m1, m2                      ; ~mask(out) & (mask(fm) & mask(in))
+%endif
+    mova               m14, [P3]
+    mova               m15, [P2]
+    mova                m8, [Q2]
+    mova                m9, [Q3]
+    FILTER_INIT         m4, m5, m6, m7, [P2], 6,                     m3, m15    ; [p2]
+    FILTER_UPDATE       m6, m7, m4, m5, [P1], m14, m15, m10, m13, 3, m3         ; [p1] -p3 -p2 +p1 +q1
+    FILTER_UPDATE       m4, m5, m6, m7, [P0], m14, m10, m11,  m8, 3, m3         ; [p0] -p3 -p1 +p0 +q2
+    FILTER_UPDATE       m6, m7, m4, m5, [Q0], m14, m11, m12,  m9, 3, m3         ; [q0] -p3 -p0 +q0 +q3
+    FILTER_UPDATE       m4, m5, m6, m7, [Q1], m15, m12, m13,  m9, 3, m3         ; [q1] -p2 -q0 +q1 +q3
+    FILTER_UPDATE       m6, m7, m4, m5, [Q2], m10, m13,  m8,  m9, 3, m3,  m8    ; [q2] -p1 -q1 +q2 +q3
+%endif
+
+    ; (m0: 0, [m1: flat8out], m2: fm & flat8in, m8..15: q2 q3 p1 p0 q0 q1 p3 p2)
+    ; filter14()
+    ;
+    ;                            m2  m3  m8  m9 m14 m15 m10 m11 m12 m13
+    ;
+    ;                                    q2  q3  p3  p2  p1  p0  q0  q1
+    ; p6  -7                     p7  p6  p5  p4   .   .   .   .   .
+    ; p5  -6  -p7 -p6 +p5 +q1     .   .   .                           .
+    ; p4  -5  -p7 -p5 +p4 +q2     .       .   .                      q2
+    ; p3  -4  -p7 -p4 +p3 +q3     .           .   .                  q3
+    ; p2  -3  -p7 -p3 +p2 +q4     .               .   .              q4
+    ; p1  -2  -p7 -p2 +p1 +q5     .                   .   .          q5
+    ; p0  -1  -p7 -p1 +p0 +q6     .                       .   .      q6
+    ; q0  +0  -p7 -p0 +q0 +q7     .                           .   .  q7
+    ; q1  +1  -p6 -q0 +q1 +q7    q1   .                           .   .
+    ; q2  +2  -p5 -q1 +q2 +q7     .  q2   .                           .
+    ; q3  +3  -p4 -q2 +q3 +q7         .  q3   .                       .
+    ; q4  +4  -p3 -q3 +q4 +q7             .  q4   .                   .
+    ; q5  +5  -p2 -q4 +q5 +q7                 .  q5   .               .
+    ; q6  +6  -p1 -q5 +q6 +q7                     .  q6   .           .
+
+%if %2 == 16
+    pand            m1, m2                                                              ; mask(out) & (mask(fm) & mask(in))
+    mova            m2, [P7]
+    mova            m3, [P6]
+    mova            m8, [P5]
+    mova            m9, [P4]
+    FILTER_INIT     m4, m5, m6, m7, [P6],  14,                   m1,  m3
+    FILTER_UPDATE   m6, m7, m4, m5, [P5],  m2,  m3,  m8, m13, 4, m1,  m8                ; [p5] -p7 -p6 +p5 +q1
+    FILTER_UPDATE   m4, m5, m6, m7, [P4],  m2,  m8,  m9, m13, 4, m1,  m9, m13, [Q2]     ; [p4] -p7 -p5 +p4 +q2
+    FILTER_UPDATE   m6, m7, m4, m5, [P3],  m2,  m9, m14, m13, 4, m1, m14, m13, [Q3]     ; [p3] -p7 -p4 +p3 +q3
+    FILTER_UPDATE   m4, m5, m6, m7, [P2],  m2, m14, m15, m13, 4, m1,      m13, [Q4]     ; [p2] -p7 -p3 +p2 +q4
+    FILTER_UPDATE   m6, m7, m4, m5, [P1],  m2, m15, m10, m13, 4, m1,      m13, [Q5]     ; [p1] -p7 -p2 +p1 +q5
+    FILTER_UPDATE   m4, m5, m6, m7, [P0],  m2, m10, m11, m13, 4, m1,      m13, [Q6]     ; [p0] -p7 -p1 +p0 +q6
+    FILTER_UPDATE   m6, m7, m4, m5, [Q0],  m2, m11, m12, m13, 4, m1,      m13, [Q7]     ; [q0] -p7 -p0 +q0 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q1],  m3, m12,  m2, m13, 4, m1,       m2, [Q1]     ; [q1] -p6 -q0 +q1 +q7
+    FILTER_UPDATE   m6, m7, m4, m5, [Q2],  m8,  m2,  m3, m13, 4, m1,       m3, [Q2]     ; [q2] -p5 -q1 +q2 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q3],  m9,  m3,  m8, m13, 4, m1,  m8,  m8, [Q3]     ; [q3] -p4 -q2 +q3 +q7
+    FILTER_UPDATE   m6, m7, m4, m5, [Q4], m14,  m8,  m9, m13, 4, m1,  m9,  m9, [Q4]     ; [q4] -p3 -q3 +q4 +q7
+    FILTER_UPDATE   m4, m5, m6, m7, [Q5], m15,  m9, m14, m13, 4, m1, m14, m14, [Q5]     ; [q5] -p2 -q4 +q5 +q7
+    FILTER_UPDATE   m6, m7, m4, m5, [Q6], m10, m14, m15, m13, 4, m1, m15, m15, [Q6]     ; [q6] -p1 -q5 +q6 +q7
+%endif
+
+%ifidn %1, h
+%if %2 == 16
+    mova                    m0, [P7]
+    mova                    m1, [P6]
+    mova                    m2, [P5]
+    mova                    m3, [P4]
+    mova                    m4, [P3]
+    mova                    m5, [P2]
+    mova                    m6, [P1]
+    mova                    m7, [P0]
+    mova                    m8, [Q0]
+    mova                    m9, [Q1]
+    mova                   m10, [Q2]
+    mova                   m11, [Q3]
+    mova                   m12, [Q4]
+    mova                   m13, [Q5]
+    mova                   m14, [Q6]
+    mova                   m15, [Q7]
+    TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
+    DEFINE_REAL_P7_TO_Q7
+    movu  [P7],  m0
+    movu  [P6],  m1
+    movu  [P5],  m2
+    movu  [P4],  m3
+    movu  [P3],  m4
+    movu  [P2],  m5
+    movu  [P1],  m6
+    movu  [P0],  m7
+    movu  [Q0],  m8
+    movu  [Q1],  m9
+    movu  [Q2], m10
+    movu  [Q3], m11
+    movu  [Q4], m12
+    movu  [Q5], m13
+    movu  [Q6], m14
+    movu  [Q7], m15
+%elif %2 == 44
+    SWAP 0, 7   ; m0 = p1
+    SWAP 3, 4   ; m3 = q1
+    DEFINE_REAL_P7_TO_Q7 2
+    SBUTTERFLY  bw, 0, 1, 8
+    SBUTTERFLY  bw, 2, 3, 8
+    SBUTTERFLY  wd, 0, 2, 8
+    SBUTTERFLY  wd, 1, 3, 8
+    SBUTTERFLY  dq, 0, 4, 8
+    SBUTTERFLY  dq, 1, 5, 8
+    SBUTTERFLY  dq, 2, 6, 8
+    SBUTTERFLY  dq, 3, 7, 8
+    movd  [P7], m0
+    punpckhqdq m0, m8
+    movd  [P6], m0
+    movd  [Q0], m1
+    punpckhqdq  m1, m9
+    movd  [Q1], m1
+    movd  [P3], m2
+    punpckhqdq  m2, m10
+    movd  [P2], m2
+    movd  [Q4], m3
+    punpckhqdq m3, m11
+    movd  [Q5], m3
+    movd  [P5], m4
+    punpckhqdq m4, m12
+    movd  [P4], m4
+    movd  [Q2], m5
+    punpckhqdq m5, m13
+    movd  [Q3], m5
+    movd  [P1], m6
+    punpckhqdq m6, m14
+    movd  [P0], m6
+    movd  [Q6], m7
+    punpckhqdq m7, m8
+    movd  [Q7], m7
+%else
+    ; the following code do a transpose of 8 full lines to 16 half
+    ; lines (high part). It is inlined to avoid the need of a staging area
+    mova                    m0, [P3]
+    mova                    m1, [P2]
+    mova                    m2, [P1]
+    mova                    m3, [P0]
+    mova                    m4, [Q0]
+    mova                    m5, [Q1]
+    mova                    m6, [Q2]
+    mova                    m7, [Q3]
+    DEFINE_REAL_P7_TO_Q7
+    SBUTTERFLY  bw,  0,  1, 8
+    SBUTTERFLY  bw,  2,  3, 8
+    SBUTTERFLY  bw,  4,  5, 8
+    SBUTTERFLY  bw,  6,  7, 8
+    SBUTTERFLY  wd,  0,  2, 8
+    SBUTTERFLY  wd,  1,  3, 8
+    SBUTTERFLY  wd,  4,  6, 8
+    SBUTTERFLY  wd,  5,  7, 8
+    SBUTTERFLY  dq,  0,  4, 8
+    SBUTTERFLY  dq,  1,  5, 8
+    SBUTTERFLY  dq,  2,  6, 8
+    SBUTTERFLY  dq,  3,  7, 8
+    movh  [P7], m0
+    punpckhqdq m0, m8
+    movh  [P6], m0
+    movh  [Q0], m1
+    punpckhqdq  m1, m9
+    movh  [Q1], m1
+    movh  [P3], m2
+    punpckhqdq  m2, m10
+    movh  [P2], m2
+    movh  [Q4], m3
+    punpckhqdq m3, m11
+    movh  [Q5], m3
+    movh  [P5], m4
+    punpckhqdq m4, m12
+    movh  [P4], m4
+    movh  [Q2], m5
+    punpckhqdq m5, m13
+    movh  [Q3], m5
+    movh  [P1], m6
+    punpckhqdq m6, m14
+    movh  [P0], m6
+    movh  [Q6], m7
+    punpckhqdq m7, m8
+    movh  [Q7], m7
+%endif
+%endif
+
+    RET
+%endmacro
+
+%macro LPF_16_VH 2
+INIT_XMM %2
+cglobal vp9_loop_filter_v_%1_16, 5,10,16,      dst, stride, E, I, H, mstride, dst1, dst2, stride3, mstride3
+    LOOPFILTER v, %1
+cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, dst1, dst2, stride3, mstride3
+    LOOPFILTER h, %1
+%endmacro
+
+%macro LPF_16_VH_ALL_OPTS 1
+LPF_16_VH %1, sse2
+LPF_16_VH %1, ssse3
+LPF_16_VH %1, avx
+%endmacro
+
+LPF_16_VH_ALL_OPTS 16
+LPF_16_VH_ALL_OPTS 44
+LPF_16_VH_ALL_OPTS 48
+LPF_16_VH_ALL_OPTS 84
+LPF_16_VH_ALL_OPTS 88
+
+%endif ; x86-64
diff --git a/libavcodec/x86/vp9dsp.asm b/libavcodec/x86/vp9mc.asm
index 6488f30..7c2a38c 100644
--- a/libavcodec/x86/vp9dsp.asm
+++ b/libavcodec/x86/vp9mc.asm
@@ -1,22 +1,22 @@
 ;******************************************************************************
-;* VP9 SIMD optimizations
+;* VP9 MC SIMD optimizations
 ;*
 ;* Copyright (c) 2013 Ronald S. Bultje <rsbultje gmail com>
 ;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
 ;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
 ;* modify it under the terms of the GNU Lesser General Public
 ;* License as published by the Free Software Foundation; either
 ;* version 2.1 of the License, or (at your option) any later version.
 ;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 ;* Lesser General Public License for more details.
 ;*
 ;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 ;******************************************************************************
 
@@ -87,7 +87,7 @@ SECTION .text
 
 %macro filter_h_fn 1
 %assign %%px mmsize/2
-cglobal %1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, src, dstride, sstride, h, filtery
+cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, dstride, src, sstride, h, filtery
     mova        m6, [pw_256]
     mova        m7, [filteryq+ 0]
 %if ARCH_X86_64 && mmsize > 8
@@ -145,30 +145,85 @@ INIT_XMM ssse3
 filter_h_fn put
 filter_h_fn avg
 
+%if ARCH_X86_64
+%macro filter_hx2_fn 1
+%assign %%px mmsize
+cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, dstride, src, sstride, h, filtery
+    mova       m13, [pw_256]
+    mova        m8, [filteryq+ 0]
+    mova        m9, [filteryq+16]
+    mova       m10, [filteryq+32]
+    mova       m11, [filteryq+48]
+.loop:
+    movu        m0, [srcq-3]
+    movu        m1, [srcq-2]
+    movu        m2, [srcq-1]
+    movu        m3, [srcq+0]
+    movu        m4, [srcq+1]
+    movu        m5, [srcq+2]
+    movu        m6, [srcq+3]
+    movu        m7, [srcq+4]
+    add       srcq, sstrideq
+    SBUTTERFLY  bw, 0, 1, 12
+    SBUTTERFLY  bw, 2, 3, 12
+    SBUTTERFLY  bw, 4, 5, 12
+    SBUTTERFLY  bw, 6, 7, 12
+    pmaddubsw   m0, m8
+    pmaddubsw   m1, m8
+    pmaddubsw   m2, m9
+    pmaddubsw   m3, m9
+    pmaddubsw   m4, m10
+    pmaddubsw   m5, m10
+    pmaddubsw   m6, m11
+    pmaddubsw   m7, m11
+    paddw       m0, m2
+    paddw       m1, m3
+    paddw       m4, m6
+    paddw       m5, m7
+    paddsw      m0, m4
+    paddsw      m1, m5
+    pmulhrsw    m0, m13
+    pmulhrsw    m1, m13
+    packuswb    m0, m1
+%ifidn %1, avg
+    pavgb       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+filter_hx2_fn put
+filter_hx2_fn avg
+
+%endif ; ARCH_X86_64
+
 %macro filter_v_fn 1
 %assign %%px mmsize/2
 %if ARCH_X86_64
-cglobal %1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, src, dstride, sstride, h, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
 %else
-cglobal %1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, src, dstride, sstride, filtery, src4, sstride3
+cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
     mov   filteryq, r5mp
 %define hd r4mp
 %endif
-    sub       srcq, sstrideq
-    lea  sstride3q, [sstrideq*3]
-    sub       srcq, sstrideq
     mova        m6, [pw_256]
-    sub       srcq, sstrideq
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
     mova        m7, [filteryq+ 0]
-    lea      src4q, [srcq+sstrideq*4]
 %if ARCH_X86_64 && mmsize > 8
     mova        m8, [filteryq+16]
     mova        m9, [filteryq+32]
     mova       m10, [filteryq+48]
 %endif
 .loop:
-    ; FIXME maybe reuse loads from previous rows, or just more generally
-    ; unroll this to prevent multiple loads of the same data?
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
     movh        m0, [srcq]
     movh        m1, [srcq+sstrideq]
     movh        m2, [srcq+sstrideq*2]
@@ -219,6 +274,70 @@ INIT_XMM ssse3
 filter_v_fn put
 filter_v_fn avg
 
+%if ARCH_X86_64
+
+%macro filter_vx2_fn 1
+%assign %%px mmsize
+cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
+    mova       m13, [pw_256]
+    lea  sstride3q, [sstrideq*3]
+    lea      src4q, [srcq+sstrideq]
+    sub       srcq, sstride3q
+    mova        m8, [filteryq+ 0]
+    mova        m9, [filteryq+16]
+    mova       m10, [filteryq+32]
+    mova       m11, [filteryq+48]
+.loop:
+    ; FIXME maybe reuse loads from previous rows, or just
+    ; more generally unroll this to prevent multiple loads of
+    ; the same data?
+    movu        m0, [srcq]
+    movu        m1, [srcq+sstrideq]
+    movu        m2, [srcq+sstrideq*2]
+    movu        m3, [srcq+sstride3q]
+    movu        m4, [src4q]
+    movu        m5, [src4q+sstrideq]
+    movu        m6, [src4q+sstrideq*2]
+    movu        m7, [src4q+sstride3q]
+    add       srcq, sstrideq
+    add      src4q, sstrideq
+    SBUTTERFLY  bw, 0, 1, 12
+    SBUTTERFLY  bw, 2, 3, 12
+    SBUTTERFLY  bw, 4, 5, 12
+    SBUTTERFLY  bw, 6, 7, 12
+    pmaddubsw   m0, m8
+    pmaddubsw   m1, m8
+    pmaddubsw   m2, m9
+    pmaddubsw   m3, m9
+    pmaddubsw   m4, m10
+    pmaddubsw   m5, m10
+    pmaddubsw   m6, m11
+    pmaddubsw   m7, m11
+    paddw       m0, m2
+    paddw       m1, m3
+    paddw       m4, m6
+    paddw       m5, m7
+    paddsw      m0, m4
+    paddsw      m1, m5
+    pmulhrsw    m0, m13
+    pmulhrsw    m1, m13
+    packuswb    m0, m1
+%ifidn %1, avg
+    pavgb       m0, [dstq]
+%endif
+    mova    [dstq], m0
+    add       dstq, dstrideq
+    dec         hd
+    jg .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+filter_vx2_fn put
+filter_vx2_fn avg
+
+%endif ; ARCH_X86_64
+
 %macro fpel_fn 6
 %if %2 == 4
 %define %%srcfn movh
@@ -229,11 +348,11 @@ filter_v_fn avg
 %endif
 
 %if %2 <= 16
-cglobal %1%2, 5, 7, 4, dst, src, dstride, sstride, h, dstride3, sstride3
+cglobal vp9_%1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
     lea  sstride3q, [sstrideq*3]
     lea  dstride3q, [dstrideq*3]
 %else
-cglobal %1%2, 5, 5, 4, dst, src, dstride, sstride, h
+cglobal vp9_%1%2, 5, 5, 4, dst, dstride, src, sstride, h
 %endif
 .loop:
     %%srcfn     m0, [srcq]
@@ -262,7 +381,7 @@ cglobal %1%2, 5, 5, 4, dst, src, dstride, sstride, h
 INIT_MMX mmx
 fpel_fn put, 4,  strideq, strideq*2, stride3q, 4
 fpel_fn put, 8,  strideq, strideq*2, stride3q, 4
-INIT_MMX sse
+INIT_MMX mmxext
 fpel_fn avg, 4,  strideq, strideq*2, stride3q, 4
 fpel_fn avg, 8,  strideq, strideq*2, stride3q, 4
 INIT_XMM sse
diff --git a/libavcodec/x86/w64xmmtest.c b/libavcodec/x86/w64xmmtest.c
index 2f064ca..25e833f 100644
--- a/libavcodec/x86/w64xmmtest.c
+++ b/libavcodec/x86/w64xmmtest.c
@@ -2,20 +2,20 @@
  * check XMM registers for clobbers on Win64
  * Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -65,6 +65,13 @@ wrap(avcodec_encode_audio2(AVCodecContext *avctx,
                     got_packet_ptr);
 }
 
+wrap(avcodec_encode_video(AVCodecContext *avctx,
+                          uint8_t *buf, int buf_size,
+                          const AVFrame *pict))
+{
+    testxmmclobbers(avcodec_encode_video, avctx, buf, buf_size, pict);
+}
+
 wrap(avcodec_encode_subtitle(AVCodecContext *avctx,
                              uint8_t *buf, int buf_size,
                              const AVSubtitle *sub))
diff --git a/libavcodec/x86/xvididct_init.c b/libavcodec/x86/xvididct_init.c
index 3112fb5..2ea4810 100644
--- a/libavcodec/x86/xvididct_init.c
+++ b/libavcodec/x86/xvididct_init.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/xan.c b/libavcodec/xan.c
index 4bf1d87..7489113 100644
--- a/libavcodec/xan.c
+++ b/libavcodec/xan.c
@@ -2,20 +2,20 @@
  * Wing Commander/Xan Video Decoder
  * Copyright (C) 2003 the ffmpeg project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -54,13 +54,13 @@ typedef struct XanContext {
     AVCodecContext *avctx;
     AVFrame *last_frame;
 
-    const unsigned char *buf;
+    const uint8_t *buf;
     int size;
 
     /* scratch space */
-    unsigned char *buffer1;
+    uint8_t *buffer1;
     int buffer1_size;
-    unsigned char *buffer2;
+    uint8_t *buffer2;
     int buffer2_size;
 
     unsigned *palettes;
@@ -113,22 +113,21 @@ static av_cold int xan_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int xan_huffman_decode(unsigned char *dest, int dest_len,
-                              const unsigned char *src, int src_len)
+static int xan_huffman_decode(uint8_t *dest, int dest_len,
+                              const uint8_t *src, int src_len)
 {
-    unsigned char byte = *src++;
-    unsigned char ival = byte + 0x16;
-    const unsigned char * ptr = src + byte*2;
+    uint8_t byte = *src++;
+    uint8_t ival = byte + 0x16;
+    const uint8_t * ptr = src + byte*2;
     int ptr_len = src_len - 1 - byte*2;
-    unsigned char val = ival;
-    unsigned char *dest_end = dest + dest_len;
-    unsigned char *dest_start = dest;
+    uint8_t val = ival;
+    uint8_t *dest_end = dest + dest_len;
+    uint8_t *dest_start = dest;
+    int ret;
     GetBitContext gb;
 
-    if (ptr_len < 0)
-        return AVERROR_INVALIDDATA;
-
-    init_get_bits(&gb, ptr, ptr_len * 8);
+    if ((ret = init_get_bits8(&gb, ptr, ptr_len)) < 0)
+        return ret;
 
     while (val != 0x16) {
         unsigned idx = val - 0x17 + get_bits1(&gb) * byte;
@@ -152,13 +151,13 @@ static int xan_huffman_decode(unsigned char *dest, int dest_len,
  *
  * @param dest destination buffer of dest_len, must be padded with at least 130 bytes
  */
-static void xan_unpack(unsigned char *dest, int dest_len,
-                       const unsigned char *src, int src_len)
+static void xan_unpack(uint8_t *dest, int dest_len,
+                       const uint8_t *src, int src_len)
 {
-    unsigned char opcode;
+    uint8_t opcode;
     int size;
-    unsigned char *dest_org = dest;
-    unsigned char *dest_end = dest + dest_len;
+    uint8_t *dest_org = dest;
+    uint8_t *dest_end = dest + dest_len;
     GetByteContext ctx;
 
     bytestream2_init(&ctx, src, src_len);
@@ -207,14 +206,14 @@ static void xan_unpack(unsigned char *dest, int dest_len,
 }
 
 static inline void xan_wc3_output_pixel_run(XanContext *s, AVFrame *frame,
-    const unsigned char *pixel_buffer, int x, int y, int pixel_count)
+    const uint8_t *pixel_buffer, int x, int y, int pixel_count)
 {
     int stride;
     int line_inc;
     int index;
     int current_x;
     int width = s->avctx->width;
-    unsigned char *palette_plane;
+    uint8_t *palette_plane;
 
     palette_plane = frame->data[0];
     stride = frame->linesize[0];
@@ -246,7 +245,7 @@ static inline void xan_wc3_copy_pixel_run(XanContext *s, AVFrame *frame,
     int curframe_index, prevframe_index;
     int curframe_x, prevframe_x;
     int width = s->avctx->width;
-    unsigned char *palette_plane, *prev_palette_plane;
+    uint8_t *palette_plane, *prev_palette_plane;
 
     if (y + motion_y < 0 || y + motion_y >= s->avctx->height ||
         x + motion_x < 0 || x + motion_x >= s->avctx->width)
@@ -262,6 +261,12 @@ static inline void xan_wc3_copy_pixel_run(XanContext *s, AVFrame *frame,
     curframe_x = x;
     prevframe_index = (y + motion_y) * stride + x + motion_x;
     prevframe_x = x + motion_x;
+
+    if (prev_palette_plane == palette_plane && FFABS(curframe_index - prevframe_index) < pixel_count) {
+         avpriv_request_sample(s->avctx, "Overlapping copy\n");
+         return ;
+    }
+
     while (pixel_count &&
            curframe_index  < s->frame_size &&
            prevframe_index < s->frame_size) {
@@ -294,22 +299,22 @@ static int xan_wc3_decode_frame(XanContext *s, AVFrame *frame)
     int width  = s->avctx->width;
     int height = s->avctx->height;
     int total_pixels = width * height;
-    unsigned char opcode;
-    unsigned char flag = 0;
+    uint8_t opcode;
+    uint8_t flag = 0;
     int size = 0;
     int motion_x, motion_y;
     int x, y, ret;
 
-    unsigned char *opcode_buffer = s->buffer1;
-    unsigned char *opcode_buffer_end = s->buffer1 + s->buffer1_size;
+    uint8_t *opcode_buffer = s->buffer1;
+    uint8_t *opcode_buffer_end = s->buffer1 + s->buffer1_size;
     int opcode_buffer_size = s->buffer1_size;
-    const unsigned char *imagedata_buffer = s->buffer2;
+    const uint8_t *imagedata_buffer = s->buffer2;
 
     /* pointers to segments inside the compressed chunk */
-    const unsigned char *huffman_segment;
+    const uint8_t *huffman_segment;
     GetByteContext       size_segment;
     GetByteContext       vector_segment;
-    const unsigned char *imagedata_segment;
+    const uint8_t *imagedata_segment;
     int huffman_offset, size_offset, vector_offset, imagedata_offset,
         imagedata_size;
 
@@ -382,16 +387,28 @@ static int xan_wc3_decode_frame(XanContext *s, AVFrame *frame)
 
         case 9:
         case 19:
+            if (bytestream2_get_bytes_left(&size_segment) < 1) {
+                av_log(s->avctx, AV_LOG_ERROR, "size_segment overread\n");
+                return AVERROR_INVALIDDATA;
+            }
             size = bytestream2_get_byte(&size_segment);
             break;
 
         case 10:
         case 20:
+            if (bytestream2_get_bytes_left(&size_segment) < 2) {
+                av_log(s->avctx, AV_LOG_ERROR, "size_segment overread\n");
+                return AVERROR_INVALIDDATA;
+            }
             size = bytestream2_get_be16(&size_segment);
             break;
 
         case 11:
         case 21:
+            if (bytestream2_get_bytes_left(&size_segment) < 3) {
+                av_log(s->avctx, AV_LOG_ERROR, "size_segment overread\n");
+                return AVERROR_INVALIDDATA;
+            }
             size = bytestream2_get_be24(&size_segment);
             break;
         }
@@ -413,8 +430,13 @@ static int xan_wc3_decode_frame(XanContext *s, AVFrame *frame)
                 imagedata_size -= size;
             }
         } else {
+            uint8_t vector;
+            if (bytestream2_get_bytes_left(&vector_segment) <= 0) {
+                av_log(s->avctx, AV_LOG_ERROR, "vector_segment overread\n");
+                return AVERROR_INVALIDDATA;
+            }
             /* run-based motion compensation from last frame */
-            uint8_t vector = bytestream2_get_byte(&vector_segment);
+            vector = bytestream2_get_byte(&vector_segment);
             motion_x = sign_extend(vector >> 4,  4);
             motion_y = sign_extend(vector & 0xF, 4);
 
@@ -534,6 +556,10 @@ static int xan_decode_frame(AVCodecContext *avctx,
         int i;
         tag  = bytestream2_get_le32(&ctx);
         size = bytestream2_get_be32(&ctx);
+        if(size < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid tag size %d\n", size);
+            return AVERROR_INVALIDDATA;
+        }
         size = FFMIN(size, bytestream2_get_bytes_left(&ctx));
         switch (tag) {
         case PALT_TAG:
@@ -557,7 +583,7 @@ static int xan_decode_frame(AVCodecContext *avctx,
                 int g = gamma_lookup[bytestream2_get_byteu(&ctx)];
                 int b = gamma_lookup[bytestream2_get_byteu(&ctx)];
 #endif
-                *tmpptr++ = (r << 16) | (g << 8) | b;
+                *tmpptr++ = (0xFFU << 24) | (r << 16) | (g << 8) | b;
             }
             s->palettes_count++;
             break;
@@ -584,10 +610,8 @@ static int xan_decode_frame(AVCodecContext *avctx,
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF))) {
-        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, frame, AV_GET_BUFFER_FLAG_REF)) < 0)
         return ret;
-    }
 
     if (!s->frame_size)
         s->frame_size = frame->linesize[0] * s->avctx->height;
diff --git a/libavcodec/xbmdec.c b/libavcodec/xbmdec.c
index c26f343..143e3a2 100644
--- a/libavcodec/xbmdec.c
+++ b/libavcodec/xbmdec.c
@@ -1,20 +1,22 @@
 /*
  * XBM image format
  *
- * This file is part of Libav.
+ * Copyright (c) 2012 Paul B Mahol
  *
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,43 +26,54 @@
 #include "internal.h"
 #include "mathops.h"
 
+static int convert(uint8_t x)
+{
+    if (x >= 'a')
+        x -= 87;
+    else if (x >= 'A')
+        x -= 55;
+    else
+        x -= '0';
+    return x;
+}
+
+static int parse_str_int(const uint8_t *p, int len, const uint8_t *key)
+{
+    const uint8_t *end = p + len;
+
+    for(; p<end - strlen(key); p++) {
+        if (!memcmp(p, key, strlen(key)))
+            break;
+    }
+    p += strlen(key);
+    if (p >= end)
+        return INT_MIN;
+
+    for(; p<end; p++) {
+        char *eptr;
+        int64_t ret = strtol(p, &eptr, 10);
+        if ((const uint8_t *)eptr != p)
+            return ret;
+    }
+    return INT_MIN;
+}
+
 static int xbm_decode_frame(AVCodecContext *avctx, void *data,
                             int *got_frame, AVPacket *avpkt)
 {
     AVFrame *p = data;
-    int ret, linesize, i;
+    int ret, linesize, i, j;
     int width  = 0;
     int height = 0;
-    const uint8_t *ptr = avpkt->data;
+    const uint8_t *end, *ptr = avpkt->data;
+    const uint8_t *next;
     uint8_t *dst;
 
     avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
-    while (!width || !height) {
-        ptr += strcspn(ptr, "#");
-        if (ptr >= avpkt->data + avpkt->size) {
-            av_log(avctx, AV_LOG_ERROR, "End of file reached.\n");
-            return AVERROR_INVALIDDATA;
-        }
-        if (strncmp(ptr, "#define", 7) != 0) {
-            av_log(avctx, AV_LOG_ERROR,
-                   "Unexpected preprocessor directive.\n");
-            return AVERROR_INVALIDDATA;
-        }
-        // skip the name
-        ptr += strcspn(ptr, "_") + 1;
-        // get width or height
-        if (strncmp(ptr, "width", 5) == 0) {
-            ptr += strcspn(ptr, " ");
-            width = strtol(ptr, NULL, 10);
-        } else if (strncmp(ptr, "height", 6) == 0) {
-            ptr += strcspn(ptr, " ");
-            height = strtol(ptr, NULL, 10);
-        } else {
-            // skip offset and unknown variables
-            av_log(avctx, AV_LOG_VERBOSE,
-                   "Ignoring preprocessor directive.\n");
-        }
-    }
+    end = avpkt->data + avpkt->size;
+
+    width  = parse_str_int(avpkt->data, avpkt->size, "_width");
+    height = parse_str_int(avpkt->data, avpkt->size, "_height");
 
     if ((ret = ff_set_dimensions(avctx, width, height)) < 0)
         return ret;
@@ -68,46 +81,48 @@ static int xbm_decode_frame(AVCodecContext *avctx, void *data,
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
 
-    // go to start of image data
-    ptr += strcspn(ptr, "{");
+    // goto start of image data
+    next = memchr(ptr, '{', avpkt->size);
+    if (!next)
+        next = memchr(ptr, '(', avpkt->size);
+    if (!next)
+        return AVERROR_INVALIDDATA;
+    ptr = next + 1;
 
     linesize = (avctx->width + 7) / 8;
     for (i = 0; i < avctx->height; i++) {
-        int eol = 0, e = 0;
         dst = p->data[0] + i * p->linesize[0];
-        if (ptr >= avpkt->data + avpkt->size) {
-            av_log(avctx, AV_LOG_ERROR, "End of file reached.\n");
-            return AVERROR_INVALIDDATA;
-        }
-        do {
-            int val;
-            uint8_t *endptr;
+        for (j = 0; j < linesize; j++) {
+            uint8_t val;
 
-            ptr += strcspn(ptr, "x") - 1; // -1 to get 0x
-            val = strtol(ptr, (char **)&endptr, 16);
+            while (ptr < end && *ptr != 'x' && *ptr != '$')
+                ptr++;
 
-            if (endptr - ptr == 4) {
-                // XBM X11 format
+            ptr ++;
+            if (ptr < end && av_isxdigit(*ptr)) {
+                val = convert(*ptr++);
+                if (av_isxdigit(*ptr))
+                    val = (val << 4) + convert(*ptr++);
                 *dst++ = ff_reverse[val];
-                eol = linesize;
-            } else if (endptr - ptr == 6) {
-                // XBM X10 format
-                *dst++ = ff_reverse[val >> 8];
-                *dst++ = ff_reverse[val & 0xFF];
-                eol = linesize / 2; // 2 bytes read
+                if (av_isxdigit(*ptr) && j+1 < linesize) {
+                    j++;
+                    val = convert(*ptr++);
+                    if (av_isxdigit(*ptr))
+                        val = (val << 4) + convert(*ptr++);
+                    *dst++ = ff_reverse[val];
+                }
             } else {
                 av_log(avctx, AV_LOG_ERROR,
                        "Unexpected data at %.8s.\n", ptr);
                 return AVERROR_INVALIDDATA;
             }
-            ptr = endptr;
-        } while (++e < eol);
+        }
     }
 
     p->key_frame = 1;
     p->pict_type = AV_PICTURE_TYPE_I;
 
-    *got_frame = 1;
+    *got_frame       = 1;
 
     return avpkt->size;
 }
diff --git a/libavcodec/xbmenc.c b/libavcodec/xbmenc.c
index 517e569..a752bdf 100644
--- a/libavcodec/xbmenc.c
+++ b/libavcodec/xbmenc.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -24,16 +24,6 @@
 #include "internal.h"
 #include "mathops.h"
 
-static av_cold int xbm_encode_init(AVCodecContext *avctx)
-{
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
-
-    return 0;
-}
-
 static int xbm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
                             const AVFrame *p, int *got_packet)
 {
@@ -42,10 +32,8 @@ static int xbm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
 
     linesize = (avctx->width + 7) / 8;
     size     = avctx->height * (linesize * 7 + 2) + 110;
-    if ((ret = ff_alloc_packet(pkt, size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting output packet.\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, size)) < 0)
         return ret;
-    }
 
     buf = pkt->data;
     ptr = p->data[0];
@@ -67,21 +55,12 @@ static int xbm_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int xbm_encode_close(AVCodecContext *avctx)
-{
-    av_frame_free(&avctx->coded_frame);
-
-    return 0;
-}
-
 AVCodec ff_xbm_encoder = {
     .name         = "xbm",
     .long_name    = NULL_IF_CONFIG_SMALL("XBM (X BitMap) image"),
     .type         = AVMEDIA_TYPE_VIDEO,
     .id           = AV_CODEC_ID_XBM,
-    .init         = xbm_encode_init,
     .encode2      = xbm_encode_frame,
-    .close        = xbm_encode_close,
     .pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_MONOWHITE,
-                                                 AV_PIX_FMT_NONE },
+                                                   AV_PIX_FMT_NONE },
 };
diff --git a/libavcodec/xface.c b/libavcodec/xface.c
new file mode 100644
index 0000000..0ebf2f2
--- /dev/null
+++ b/libavcodec/xface.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 1990 James Ashton - Sydney University
+ * Copyright (c) 2012 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * X-Face common data and utilities definition.
+ */
+
+#include "xface.h"
+
+void ff_big_add(BigInt *b, uint8_t a)
+{
+    int i;
+    uint8_t *w;
+    uint16_t c;
+
+    a &= XFACE_WORDMASK;
+    if (a == 0)
+        return;
+    w = b->words;
+    c = a;
+    for (i = 0; i < b->nb_words && c; i++) {
+        c += *w;
+        *w++ = c & XFACE_WORDMASK;
+        c >>= XFACE_BITSPERWORD;
+    }
+    if (i == b->nb_words && c) {
+        b->nb_words++;
+        *w = c & XFACE_WORDMASK;
+    }
+}
+
+void ff_big_div(BigInt *b, uint8_t a, uint8_t *r)
+{
+    int i;
+    uint8_t *w;
+    uint16_t c, d;
+
+    a &= XFACE_WORDMASK;
+    if (a == 1 || b->nb_words == 0) {
+        *r = 0;
+        return;
+    }
+
+    /* treat this as a == WORDCARRY and just shift everything right a WORD */
+    if (a == 0) {
+        i = --b->nb_words;
+        w = b->words;
+        *r = *w;
+        while (i--) {
+            *w = *(w + 1);
+            w++;
+        }
+        *w = 0;
+        return;
+    }
+    i = b->nb_words;
+    w = b->words + i;
+    c = 0;
+    while (i--) {
+        c <<= XFACE_BITSPERWORD;
+        c += *--w;
+        d = c / (uint16_t)a;
+        c = c % (uint16_t)a;
+        *w = d & XFACE_WORDMASK;
+    }
+    *r = c;
+    if (b->words[b->nb_words - 1] == 0)
+        b->nb_words--;
+}
+
+void ff_big_mul(BigInt *b, uint8_t a)
+{
+    int i;
+    uint8_t *w;
+    uint16_t c;
+
+    a &= XFACE_WORDMASK;
+    if (a == 1 || b->nb_words == 0)
+        return;
+    if (a == 0) {
+        /* treat this as a == WORDCARRY and just shift everything left a WORD */
+        i = b->nb_words++;
+        w = b->words + i;
+        while (i--) {
+            *w = *(w - 1);
+            w--;
+        }
+        *w = 0;
+        return;
+    }
+    i = b->nb_words;
+    w = b->words;
+    c = 0;
+    while (i--) {
+        c += (uint16_t)*w * (uint16_t)a;
+        *(w++) = c & XFACE_WORDMASK;
+        c >>= XFACE_BITSPERWORD;
+    }
+    if (c) {
+        b->nb_words++;
+        *w = c & XFACE_WORDMASK;
+    }
+}
+
+const ProbRange ff_xface_probranges_per_level[4][3] = {
+    //  black      grey       white
+    { {  1, 255}, {251, 0}, {  4, 251} }, /* Top of tree almost always grey */
+    { {  1, 255}, {200, 0}, { 55, 200} },
+    { { 33, 223}, {159, 0}, { 64, 159} },
+    { {131,   0}, {  0, 0}, {125, 131} }, /* Grey disallowed at bottom */
+};
+
+const ProbRange ff_xface_probranges_2x2[16] = {
+    { 0,   0},  {38,   0}, {38,  38},  {13, 152},
+    {38,  76},  {13, 165}, {13, 178},  { 6, 230},
+    {38, 114},  {13, 191}, {13, 204},  { 6, 236},
+    {13, 217},  { 6, 242}, { 5, 248},  { 3, 253},
+};
+
+/*
+ * The "guess the next pixel" tables follow. Normally there are 12
+ * neighbour pixels used to give 1<<12 cases as we get closer to the
+ * upper left corner lesser numbers of neighbours are available.
+ *
+ * Each byte in the tables represents 8 boolean values starting from
+ * the most significant bit.
+ */
+
+static const uint8_t g_00[] = {
+    0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0xe3, 0xdf, 0x05, 0x17,
+    0x05, 0x0f, 0x00, 0x1b, 0x0f, 0xdf, 0x00, 0x04, 0x00, 0x00,
+    0x0d, 0x0f, 0x03, 0x7f, 0x00, 0x00, 0x00, 0x01, 0x00, 0x1d,
+    0x45, 0x2f, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x0a, 0xff, 0xff,
+    0x00, 0x04, 0x00, 0x05, 0x01, 0x3f, 0xcf, 0xff, 0x10, 0x01,
+    0x80, 0xc9, 0x0f, 0x0f, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
+    0x1b, 0x1f, 0xff, 0xff, 0x4f, 0x54, 0x07, 0x1f, 0x57, 0x47,
+    0xd7, 0x3d, 0xff, 0xff, 0x5f, 0x1f, 0x7f, 0xff, 0x7f, 0x7f,
+    0x05, 0x0f, 0x01, 0x0f, 0x0f, 0x5f, 0x9b, 0xdf, 0x7f, 0xff,
+    0x5f, 0x1d, 0x5f, 0xff, 0x0f, 0x1f, 0x0f, 0x5f, 0x03, 0x1f,
+    0x4f, 0x5f, 0xf7, 0x7f, 0x7f, 0xff, 0x0d, 0x0f, 0xfb, 0xff,
+    0xf7, 0xbf, 0x0f, 0x4f, 0xd7, 0x3f, 0x4f, 0x7f, 0xff, 0xff,
+    0x67, 0xbf, 0x56, 0x25, 0x1f, 0x7f, 0x9f, 0xff, 0x00, 0x00,
+    0x00, 0x05, 0x5f, 0x7f, 0x01, 0xdf, 0x14, 0x00, 0x05, 0x0f,
+    0x07, 0xa2, 0x09, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x5f,
+    0x18, 0xd7, 0x94, 0x71, 0x00, 0x05, 0x1f, 0xb7, 0x0c, 0x07,
+    0x0f, 0x0f, 0x00, 0x0f, 0x0f, 0x1f, 0x84, 0x8f, 0x05, 0x15,
+    0x05, 0x0f, 0x4f, 0xff, 0x87, 0xdf, 0x05, 0x01, 0x10, 0x00,
+    0x0f, 0x0f, 0x00, 0x08, 0x05, 0x04, 0x04, 0x01, 0x4f, 0xff,
+    0x9f, 0x8f, 0x4a, 0x40, 0x5f, 0x5f, 0xff, 0xfe, 0xdf, 0xff,
+    0x7f, 0xf7, 0xff, 0x7f, 0xff, 0xff, 0x7b, 0xff, 0x0f, 0xfd,
+    0xd7, 0x5f, 0x4f, 0x7f, 0x7f, 0xdf, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0x77, 0xdf, 0x7f, 0x4f, 0xef, 0xff, 0xff, 0x77, 0xff,
+    0xff, 0xff, 0x6f, 0xff, 0x0f, 0x4f, 0xff, 0xff, 0x9d, 0xff,
+    0x0f, 0xef, 0xff, 0xdf, 0x6f, 0xff, 0xff, 0xff, 0x4f, 0xff,
+    0xcd, 0x0f, 0x4f, 0xff, 0xff, 0xdf, 0x00, 0x00, 0x00, 0x0b,
+    0x05, 0x02, 0x02, 0x0f, 0x04, 0x00, 0x00, 0x0c, 0x01, 0x06,
+    0x00, 0x0f, 0x20, 0x03, 0x00, 0x00, 0x05, 0x0f, 0x40, 0x08,
+    0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x0c, 0x0f, 0x01, 0x00,
+    0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x14, 0x01, 0x05,
+    0x01, 0x15, 0xaf, 0x0f, 0x00, 0x01, 0x10, 0x00, 0x08, 0x00,
+    0x46, 0x0c, 0x20, 0x00, 0x88, 0x00, 0x0f, 0x15, 0xff, 0xdf,
+    0x02, 0x00, 0x00, 0x0f, 0x7f, 0x5f, 0xdb, 0xff, 0x4f, 0x3e,
+    0x05, 0x0f, 0x7f, 0xf7, 0x95, 0x4f, 0x0d, 0x0f, 0x01, 0x0f,
+    0x4f, 0x5f, 0x9f, 0xdf, 0x25, 0x0e, 0x0d, 0x0d, 0x4f, 0x7f,
+    0x8f, 0x0f, 0x0f, 0xfa, 0x04, 0x4f, 0x4f, 0xff, 0xf7, 0x77,
+    0x47, 0xed, 0x05, 0x0f, 0xff, 0xff, 0xdf, 0xff, 0x4f, 0x6f,
+    0xd8, 0x5f, 0x0f, 0x7f, 0xdf, 0x5f, 0x07, 0x0f, 0x94, 0x0d,
+    0x1f, 0xff, 0xff, 0xff, 0x00, 0x02, 0x00, 0x03, 0x46, 0x57,
+    0x01, 0x0d, 0x01, 0x08, 0x01, 0x0f, 0x47, 0x6c, 0x0d, 0x0f,
+    0x02, 0x00, 0x00, 0x00, 0x0b, 0x4f, 0x00, 0x08, 0x05, 0x00,
+    0x95, 0x01, 0x0f, 0x7f, 0x0c, 0x0f, 0x01, 0x0e, 0x00, 0x00,
+    0x0f, 0x41, 0x00, 0x00, 0x04, 0x24, 0x0d, 0x0f, 0x0f, 0x7f,
+    0xcf, 0xdf, 0x00, 0x00, 0x00, 0x00, 0x04, 0x40, 0x00, 0x00,
+    0x06, 0x26, 0xcf, 0x05, 0xcf, 0x7f, 0xdf, 0xdf, 0x00, 0x00,
+    0x17, 0x5f, 0xff, 0xfd, 0xff, 0xff, 0x46, 0x09, 0x4f, 0x5f,
+    0x7f, 0xfd, 0xdf, 0xff, 0x0a, 0x88, 0xa7, 0x7f, 0x7f, 0xff,
+    0xff, 0xff, 0x0f, 0x04, 0xdf, 0x7f, 0x4f, 0xff, 0x9f, 0xff,
+    0x0e, 0xe6, 0xdf, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x0f, 0xec,
+    0x8f, 0x4f, 0x7f, 0xff, 0xdf, 0xff, 0x0f, 0xcf, 0xdf, 0xff,
+    0x6f, 0x7f, 0xff, 0xff, 0x03, 0x0c, 0x9d, 0x0f, 0x7f, 0xff,
+    0xff, 0xff,
+};
+
+static const uint8_t g_01[] = {
+    0x37, 0x73, 0x00, 0x19, 0x57, 0x7f, 0xf5, 0xfb, 0x70, 0x33,
+    0xf0, 0xf9, 0x7f, 0xff, 0xff, 0xff,
+};
+
+static const uint8_t g_02[] = {
+    0x50,
+};
+
+static const uint8_t g_10[] = {
+    0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0xf3, 0x5f, 0x84, 0x04,
+    0x17, 0x9f, 0x04, 0x23, 0x05, 0xff, 0x00, 0x00, 0x00, 0x02,
+    0x03, 0x03, 0x33, 0xd7, 0x05, 0x03, 0x5f, 0x3f, 0x17, 0x33,
+    0xff, 0xff, 0x00, 0x80, 0x02, 0x04, 0x12, 0x00, 0x11, 0x57,
+    0x05, 0x25, 0x05, 0x03, 0x35, 0xbf, 0x9f, 0xff, 0x07, 0x6f,
+    0x20, 0x40, 0x17, 0x06, 0xfa, 0xe8, 0x01, 0x07, 0x1f, 0x9f,
+    0x1f, 0xff, 0xff, 0xff,
+};
+
+static const uint8_t g_20[] = {
+    0x04, 0x00, 0x01, 0x01, 0x43, 0x2e, 0xff, 0x3f,
+};
+
+static const uint8_t g_30[] = {
+    0x11, 0x11, 0x11, 0x11, 0x51, 0x11, 0x13, 0x11, 0x11, 0x11,
+    0x13, 0x11, 0x11, 0x11, 0x33, 0x11, 0x13, 0x11, 0x13, 0x13,
+    0x13, 0x13, 0x31, 0x31, 0x11, 0x01, 0x11, 0x11, 0x71, 0x11,
+    0x11, 0x75,
+};
+
+static const uint8_t g_40[] = {
+    0x00, 0x0f, 0x00, 0x09, 0x00, 0x0d, 0x00, 0x0d, 0x00, 0x0f,
+    0x00, 0x4e, 0xe4, 0x0d, 0x10, 0x0f, 0x00, 0x0f, 0x44, 0x4f,
+    0x00, 0x1e, 0x0f, 0x0f, 0xae, 0xaf, 0x45, 0x7f, 0xef, 0xff,
+    0x0f, 0xff, 0x00, 0x09, 0x01, 0x11, 0x00, 0x01, 0x1c, 0xdd,
+    0x00, 0x15, 0x00, 0xff, 0x00, 0x10, 0x00, 0xfd, 0x00, 0x0f,
+    0x4f, 0x5f, 0x3d, 0xff, 0xff, 0xff, 0x4f, 0xff, 0x1c, 0xff,
+    0xdf, 0xff, 0x8f, 0xff, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x15,
+    0x01, 0x07, 0x00, 0x01, 0x02, 0x1f, 0x01, 0x11, 0x05, 0x7f,
+    0x00, 0x1f, 0x41, 0x57, 0x1f, 0xff, 0x05, 0x77, 0x0d, 0x5f,
+    0x4d, 0xff, 0x4f, 0xff, 0x0f, 0xff, 0x00, 0x00, 0x02, 0x05,
+    0x00, 0x11, 0x05, 0x7d, 0x10, 0x15, 0x2f, 0xff, 0x40, 0x50,
+    0x0d, 0xfd, 0x04, 0x0f, 0x07, 0x1f, 0x07, 0x7f, 0x0f, 0xbf,
+    0x0d, 0x7f, 0x0f, 0xff, 0x4d, 0x7d, 0x0f, 0xff,
+};
+
+static const uint8_t g_11[] = {
+    0x01, 0x13, 0x03, 0x7f,
+};
+
+static const uint8_t g_21[] = {
+    0x17,
+};
+
+static const uint8_t g_31[] = {
+    0x55, 0x57, 0x57, 0x7f,
+};
+
+static const uint8_t g_41[] = {
+    0x01, 0x01, 0x01, 0x1f, 0x03, 0x1f, 0x3f, 0xff,
+};
+
+static const uint8_t g_12[] = {
+    0x40,
+};
+
+static const uint8_t g_22[] = {
+    0x00,
+};
+
+static const uint8_t g_32[] = {
+    0x10,
+};
+
+static const uint8_t g_42[] = {
+    0x10,
+};
+
+void ff_xface_generate_face(uint8_t *dst, uint8_t * const src)
+{
+    int h, i, j, k, l, m;
+
+    for (j = 0; j < XFACE_HEIGHT; j++) {
+        for (i = 0; i < XFACE_WIDTH; i++) {
+            h = i + j * XFACE_WIDTH;
+            k = 0;
+
+            /*
+               Compute k, encoding the bits *before* the current one, contained in the
+               image buffer. That is, given the grid:
+
+                l      i
+                |      |
+                v      v
+               +--+--+--+--+--+
+          m -> | 1| 2| 3| 4| 5|
+               +--+--+--+--+--+
+               | 6| 7| 8| 9|10|
+               +--+--+--+--+--+
+          j -> |11|12| *|  |  |
+               +--+--+--+--+--+
+
+               the value k for the pixel marked as "*" will contain the bit encoding of
+               the values in the matrix marked from "1" to "12". In case the pixel is
+               near the border of the grid, the number of values contained within the
+               grid will be lesser than 12.
+             */
+
+            for (l = i - 2; l <= i + 2; l++) {
+                for (m = j - 2; m <= j; m++) {
+                    if (l >= i && m == j)
+                        continue;
+                    if (l > 0 && l <= XFACE_WIDTH && m > 0)
+                        k = 2*k + src[l + m * XFACE_WIDTH];
+                }
+            }
+
+            /*
+              Use the guess for the given position and the computed value of k.
+
+              The following table shows the number of digits in k, depending on
+              the position of the pixel, and shows the corresponding guess table
+              to use:
+
+                 i=1  i=2  i=3       i=w-1 i=w
+               +----+----+----+ ... +----+----+
+           j=1 |  0 |  1 |  2 |     |  2 |  2 |
+               |g22 |g12 |g02 |     |g42 |g32 |
+               +----+----+----+ ... +----+----+
+           j=2 |  3 |  5 |  7 |     |  6 |  5 |
+               |g21 |g11 |g01 |     |g41 |g31 |
+               +----+----+----+ ... +----+----+
+           j=3 |  5 |  9 | 12 |     | 10 |  8 |
+               |g20 |g10 |g00 |     |g40 |g30 |
+               +----+----+----+ ... +----+----+
+            */
+
+#define GEN(table) dst[h] ^= (table[k>>3]>>(7-(k&7)))&1
+
+            switch (i) {
+            case 1:
+                switch (j) {
+                case 1:  GEN(g_22); break;
+                case 2:  GEN(g_21); break;
+                default: GEN(g_20); break;
+                }
+                break;
+            case 2:
+                switch (j) {
+                case 1:  GEN(g_12); break;
+                case 2:  GEN(g_11); break;
+                default: GEN(g_10); break;
+                }
+                break;
+            case XFACE_WIDTH - 1:
+                switch (j) {
+                case 1:  GEN(g_42); break;
+                case 2:  GEN(g_41); break;
+                default: GEN(g_40); break;
+                }
+                break;
+            case XFACE_WIDTH:
+                switch (j) {
+                case 1:  GEN(g_32); break;
+                case 2:  GEN(g_31); break;
+                default: GEN(g_30); break;
+                }
+                break;
+            default:
+                switch (j) {
+                case 1:  GEN(g_02); break;
+                case 2:  GEN(g_01); break;
+                default: GEN(g_00); break;
+                }
+                break;
+            }
+        }
+    }
+}
diff --git a/libavcodec/xface.h b/libavcodec/xface.h
new file mode 100644
index 0000000..cd59ba0
--- /dev/null
+++ b/libavcodec/xface.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 1990 James Ashton - Sydney University
+ * Copyright (c) 2012 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * X-Face common definitions.
+ */
+
+#include <stdint.h>
+
+/* define the face size - 48x48x1 */
+#define XFACE_WIDTH  48
+#define XFACE_HEIGHT 48
+#define XFACE_PIXELS (XFACE_WIDTH * XFACE_HEIGHT)
+
+/* compressed output uses the full range of printable characters.
+ * In ASCII these are in a contiguous block so we just need to know
+ * the first and last. The total number of printables is needed too. */
+#define XFACE_FIRST_PRINT '!'
+#define XFACE_LAST_PRINT '~'
+#define XFACE_PRINTS (XFACE_LAST_PRINT - XFACE_FIRST_PRINT + 1)
+
+/*
+ * Image is encoded as a big integer, using characters from '~' to
+ * '!', for a total of 92 symbols. In order to express 48x48=2304
+ * bits, we need a total of 354 digits, as given by:
+ * ceil(lg_92(2^2304)) = 354
+ */
+#define XFACE_MAX_DIGITS 354
+
+#define XFACE_BITSPERWORD 8
+#define XFACE_WORDCARRY (1 << XFACE_BITSPERWORD)
+#define XFACE_WORDMASK (XFACE_WORDCARRY - 1)
+
+#define XFACE_MAX_WORDS ((XFACE_PIXELS * 2 + XFACE_BITSPERWORD - 1) / XFACE_BITSPERWORD)
+
+/* Portable, very large unsigned integer arithmetic is needed.
+ * Implementation uses arrays of WORDs. */
+typedef struct {
+    int nb_words;
+    uint8_t words[XFACE_MAX_WORDS];
+} BigInt;
+
+/**
+ * Add a to b storing the result in b.
+ */
+void ff_big_add(BigInt *b, uint8_t a);
+
+/**
+ * Divide b by a storing the result in b and the remainder in the word
+ * pointed to by r.
+ */
+void ff_big_div(BigInt *b, uint8_t a, uint8_t *r);
+
+/**
+ * Multiply a by b storing the result in b.
+ */
+void ff_big_mul(BigInt *b, uint8_t a);
+
+/* Each face is encoded using 9 octrees of 16x16 each. Each level of the
+ * trees has varying probabilities of being white, grey or black.
+ * The table below is based on sampling many faces */
+enum XFaceColor { XFACE_COLOR_BLACK = 0, XFACE_COLOR_GREY, XFACE_COLOR_WHITE };
+
+/* Data of varying probabilities are encoded by a value in the range 0 - 255.
+ * The probability of the data determines the range of possible encodings.
+ * Offset gives the first possible encoding of the range. */
+typedef struct {
+    int range;
+    int offset;
+} ProbRange;
+
+extern const ProbRange ff_xface_probranges_per_level[4][3];
+
+extern const ProbRange ff_xface_probranges_2x2[16];
+
+void ff_xface_generate_face(uint8_t *dst, uint8_t * const src);
diff --git a/libavcodec/xfacedec.c b/libavcodec/xfacedec.c
new file mode 100644
index 0000000..d045cb6
--- /dev/null
+++ b/libavcodec/xfacedec.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 1990 James Ashton - Sydney University
+ * Copyright (c) 2012 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * X-Face decoder, based on libcompface, by James Ashton.
+ */
+
+#include "libavutil/pixdesc.h"
+#include "avcodec.h"
+#include "bytestream.h"
+#include "internal.h"
+#include "xface.h"
+
+static int pop_integer(BigInt *b, const ProbRange *pranges)
+{
+    uint8_t r;
+    int i;
+
+    /* extract the last byte into r, and shift right b by 8 bits */
+    ff_big_div(b, 0, &r);
+
+    i = 0;
+    while (r < pranges->offset || r >= pranges->range + pranges->offset) {
+        pranges++;
+        i++;
+    }
+    ff_big_mul(b, pranges->range);
+    ff_big_add(b, r - pranges->offset);
+    return i;
+}
+
+static void pop_greys(BigInt *b, char *bitmap, int w, int h)
+{
+    if (w > 3) {
+        w /= 2;
+        h /= 2;
+        pop_greys(b, bitmap,                       w, h);
+        pop_greys(b, bitmap + w,                   w, h);
+        pop_greys(b, bitmap + XFACE_WIDTH * h,     w, h);
+        pop_greys(b, bitmap + XFACE_WIDTH * h + w, w, h);
+    } else {
+        w = pop_integer(b, ff_xface_probranges_2x2);
+        if (w & 1) bitmap[0]               = 1;
+        if (w & 2) bitmap[1]               = 1;
+        if (w & 4) bitmap[XFACE_WIDTH]     = 1;
+        if (w & 8) bitmap[XFACE_WIDTH + 1] = 1;
+    }
+}
+
+static void decode_block(BigInt *b, char *bitmap, int w, int h, int level)
+{
+    switch (pop_integer(b, &ff_xface_probranges_per_level[level][0])) {
+    case XFACE_COLOR_WHITE:
+        return;
+    case XFACE_COLOR_BLACK:
+        pop_greys(b, bitmap, w, h);
+        return;
+    default:
+        w /= 2;
+        h /= 2;
+        level++;
+        decode_block(b, bitmap,                       w, h, level);
+        decode_block(b, bitmap + w,                   w, h, level);
+        decode_block(b, bitmap + h * XFACE_WIDTH,     w, h, level);
+        decode_block(b, bitmap + w + h * XFACE_WIDTH, w, h, level);
+        return;
+    }
+}
+
+typedef struct XFaceContext {
+    uint8_t bitmap[XFACE_PIXELS]; ///< image used internally for decoding
+} XFaceContext;
+
+static av_cold int xface_decode_init(AVCodecContext *avctx)
+{
+    if (avctx->width || avctx->height) {
+        if (avctx->width != XFACE_WIDTH || avctx->height != XFACE_HEIGHT) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Size value %dx%d not supported, only accepts a size of %dx%d\n",
+                   avctx->width, avctx->height, XFACE_WIDTH, XFACE_HEIGHT);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    avctx->width   = XFACE_WIDTH;
+    avctx->height  = XFACE_HEIGHT;
+    avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
+
+    return 0;
+}
+
+static int xface_decode_frame(AVCodecContext *avctx,
+                              void *data, int *got_frame,
+                              AVPacket *avpkt)
+{
+    XFaceContext *xface = avctx->priv_data;
+    int ret, i, j, k;
+    uint8_t byte;
+    BigInt b = {0};
+    char *buf;
+    int64_t c;
+    AVFrame *frame = data;
+
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
+    for (i = 0, k = 0; avpkt->data[i] && i < avpkt->size; i++) {
+        c = avpkt->data[i];
+
+        /* ignore invalid digits */
+        if (c < XFACE_FIRST_PRINT || c > XFACE_LAST_PRINT)
+            continue;
+
+        if (++k > XFACE_MAX_DIGITS) {
+            av_log(avctx, AV_LOG_WARNING,
+                   "Buffer is longer than expected, truncating at byte %d\n", i);
+            break;
+        }
+        ff_big_mul(&b, XFACE_PRINTS);
+        ff_big_add(&b, c - XFACE_FIRST_PRINT);
+    }
+
+    /* decode image and put it in bitmap */
+    memset(xface->bitmap, 0, XFACE_PIXELS);
+    buf = xface->bitmap;
+    decode_block(&b, buf,                         16, 16, 0);
+    decode_block(&b, buf + 16,                    16, 16, 0);
+    decode_block(&b, buf + 32,                    16, 16, 0);
+    decode_block(&b, buf + XFACE_WIDTH * 16,      16, 16, 0);
+    decode_block(&b, buf + XFACE_WIDTH * 16 + 16, 16, 16, 0);
+    decode_block(&b, buf + XFACE_WIDTH * 16 + 32, 16, 16, 0);
+    decode_block(&b, buf + XFACE_WIDTH * 32     , 16, 16, 0);
+    decode_block(&b, buf + XFACE_WIDTH * 32 + 16, 16, 16, 0);
+    decode_block(&b, buf + XFACE_WIDTH * 32 + 32, 16, 16, 0);
+
+    ff_xface_generate_face(xface->bitmap, xface->bitmap);
+
+    /* convert image from 1=black 0=white bitmap to MONOWHITE */
+    buf = frame->data[0];
+    for (i = 0, j = 0, k = 0, byte = 0; i < XFACE_PIXELS; i++) {
+        byte += xface->bitmap[i];
+        if (k == 7) {
+            buf[j++] = byte;
+            byte = k = 0;
+        } else {
+            k++;
+            byte <<= 1;
+        }
+        if (j == XFACE_WIDTH/8) {
+            j = 0;
+            buf += frame->linesize[0];
+        }
+    }
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_xface_decoder = {
+    .name           = "xface",
+    .long_name      = NULL_IF_CONFIG_SMALL("X-face image"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_XFACE,
+    .priv_data_size = sizeof(XFaceContext),
+    .init           = xface_decode_init,
+    .decode         = xface_decode_frame,
+    .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_MONOWHITE, AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/xfaceenc.c b/libavcodec/xfaceenc.c
new file mode 100644
index 0000000..e213c9d
--- /dev/null
+++ b/libavcodec/xfaceenc.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 1990 James Ashton - Sydney University
+ * Copyright (c) 2012 Stefano Sabatini
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * X-Face encoder, based on libcompface, by James Ashton.
+ */
+
+#include "xface.h"
+#include "avcodec.h"
+#include "internal.h"
+
+typedef struct XFaceContext {
+    AVClass *class;
+    uint8_t bitmap[XFACE_PIXELS]; ///< image used internally for decoding
+    int max_line_len;             ///< max line length for compressed data
+    int set_header;               ///< set X-Face header in the output
+} XFaceContext;
+
+static int all_same(char *bitmap, int w, int h)
+{
+    char val, *row;
+    int x;
+
+    val = *bitmap;
+    while (h--) {
+        row = bitmap;
+        x = w;
+        while (x--)
+            if (*(row++) != val)
+                return 0;
+        bitmap += XFACE_WIDTH;
+    }
+    return 1;
+}
+
+static int all_black(char *bitmap, int w, int h)
+{
+    if (w > 3) {
+        w /= 2;
+        h /= 2;
+        return (all_black(bitmap, w, h) && all_black(bitmap + w, w, h) &&
+                all_black(bitmap + XFACE_WIDTH * h, w, h) &&
+                all_black(bitmap + XFACE_WIDTH * h + w, w, h));
+    } else {
+        /* at least one pixel in the 2x2 grid is non-zero */
+        return *bitmap || *(bitmap + 1) ||
+               *(bitmap + XFACE_WIDTH) || *(bitmap + XFACE_WIDTH + 1);
+    }
+}
+
+static int all_white(char *bitmap, int w, int h)
+{
+    return *bitmap == 0 && all_same(bitmap, w, h);
+}
+
+typedef struct {
+    const ProbRange *prob_ranges[XFACE_PIXELS*2];
+    int prob_ranges_idx;
+} ProbRangesQueue;
+
+static inline int pq_push(ProbRangesQueue *pq, const ProbRange *p)
+{
+    if (pq->prob_ranges_idx >= XFACE_PIXELS * 2 - 1)
+        return -1;
+    pq->prob_ranges[pq->prob_ranges_idx++] = p;
+    return 0;
+}
+
+static void push_greys(ProbRangesQueue *pq, char *bitmap, int w, int h)
+{
+    if (w > 3) {
+        w /= 2;
+        h /= 2;
+        push_greys(pq, bitmap,                       w, h);
+        push_greys(pq, bitmap + w,                   w, h);
+        push_greys(pq, bitmap + XFACE_WIDTH * h,     w, h);
+        push_greys(pq, bitmap + XFACE_WIDTH * h + w, w, h);
+    } else {
+        const ProbRange *p = ff_xface_probranges_2x2 +
+                 *bitmap +
+            2 * *(bitmap + 1) +
+            4 * *(bitmap + XFACE_WIDTH) +
+            8 * *(bitmap + XFACE_WIDTH + 1);
+        pq_push(pq, p);
+    }
+}
+
+static void encode_block(char *bitmap, int w, int h, int level, ProbRangesQueue *pq)
+{
+    if (all_white(bitmap, w, h)) {
+        pq_push(pq, &ff_xface_probranges_per_level[level][XFACE_COLOR_WHITE]);
+    } else if (all_black(bitmap, w, h)) {
+        pq_push(pq, &ff_xface_probranges_per_level[level][XFACE_COLOR_BLACK]);
+        push_greys(pq, bitmap, w, h);
+    } else {
+        pq_push(pq, &ff_xface_probranges_per_level[level][XFACE_COLOR_GREY]);
+        w /= 2;
+        h /= 2;
+        level++;
+        encode_block(bitmap,                       w, h, level, pq);
+        encode_block(bitmap + w,                   w, h, level, pq);
+        encode_block(bitmap + h * XFACE_WIDTH,     w, h, level, pq);
+        encode_block(bitmap + w + h * XFACE_WIDTH, w, h, level, pq);
+    }
+}
+
+static av_cold int xface_encode_init(AVCodecContext *avctx)
+{
+    avctx->coded_frame = av_frame_alloc();
+    if (!avctx->coded_frame)
+        return AVERROR(ENOMEM);
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
+    return 0;
+}
+
+static void push_integer(BigInt *b, const ProbRange *prange)
+{
+    uint8_t r;
+
+    ff_big_div(b, prange->range, &r);
+    ff_big_mul(b, 0);
+    ff_big_add(b, r + prange->offset);
+}
+
+static int xface_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                              const AVFrame *frame, int *got_packet)
+{
+    XFaceContext *xface = avctx->priv_data;
+    ProbRangesQueue pq = {{ 0 }, 0};
+    uint8_t bitmap_copy[XFACE_PIXELS];
+    BigInt b = {0};
+    int i, j, k, ret = 0;
+    const uint8_t *buf;
+    uint8_t *p;
+    char intbuf[XFACE_MAX_DIGITS];
+
+    if (avctx->width || avctx->height) {
+        if (avctx->width != XFACE_WIDTH || avctx->height != XFACE_HEIGHT) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Size value %dx%d not supported, only accepts a size of %dx%d\n",
+                   avctx->width, avctx->height, XFACE_WIDTH, XFACE_HEIGHT);
+            return AVERROR(EINVAL);
+        }
+    }
+    avctx->width  = XFACE_WIDTH;
+    avctx->height = XFACE_HEIGHT;
+
+    /* convert image from MONOWHITE to 1=black 0=white bitmap */
+    buf = frame->data[0];
+    i = j = 0;
+    do {
+        for (k = 0; k < 8; k++)
+            xface->bitmap[i++] = (buf[j]>>(7-k))&1;
+        if (++j == XFACE_WIDTH/8) {
+            buf += frame->linesize[0];
+            j = 0;
+        }
+    } while (i < XFACE_PIXELS);
+
+    /* create a copy of bitmap */
+    memcpy(bitmap_copy, xface->bitmap, XFACE_PIXELS);
+    ff_xface_generate_face(xface->bitmap, bitmap_copy);
+
+    encode_block(xface->bitmap,                         16, 16, 0, &pq);
+    encode_block(xface->bitmap + 16,                    16, 16, 0, &pq);
+    encode_block(xface->bitmap + 32,                    16, 16, 0, &pq);
+    encode_block(xface->bitmap + XFACE_WIDTH * 16,      16, 16, 0, &pq);
+    encode_block(xface->bitmap + XFACE_WIDTH * 16 + 16, 16, 16, 0, &pq);
+    encode_block(xface->bitmap + XFACE_WIDTH * 16 + 32, 16, 16, 0, &pq);
+    encode_block(xface->bitmap + XFACE_WIDTH * 32,      16, 16, 0, &pq);
+    encode_block(xface->bitmap + XFACE_WIDTH * 32 + 16, 16, 16, 0, &pq);
+    encode_block(xface->bitmap + XFACE_WIDTH * 32 + 32, 16, 16, 0, &pq);
+
+    while (pq.prob_ranges_idx > 0)
+        push_integer(&b, pq.prob_ranges[--pq.prob_ranges_idx]);
+
+    /* write the inverted big integer in b to intbuf */
+    i = 0;
+    while (b.nb_words) {
+        uint8_t r;
+        ff_big_div(&b, XFACE_PRINTS, &r);
+        intbuf[i++] = r + XFACE_FIRST_PRINT;
+    }
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, i+2)) < 0)
+        return ret;
+
+    /* revert the number, and close the buffer */
+    p = pkt->data;
+    while (--i >= 0)
+        *(p++) = intbuf[i];
+    *(p++) = '\n';
+    *(p++) = 0;
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+
+    return 0;
+}
+
+static av_cold int xface_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+AVCodec ff_xface_encoder = {
+    .name           = "xface",
+    .long_name      = NULL_IF_CONFIG_SMALL("X-face image"),
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = AV_CODEC_ID_XFACE,
+    .priv_data_size = sizeof(XFaceContext),
+    .init           = xface_encode_init,
+    .close          = xface_encode_close,
+    .encode2        = xface_encode_frame,
+    .pix_fmts       = (const enum PixelFormat[]) { AV_PIX_FMT_MONOWHITE, AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/xiph.c b/libavcodec/xiph.c
index 7c3c710..0636f8e 100644
--- a/libavcodec/xiph.c
+++ b/libavcodec/xiph.c
@@ -1,20 +1,20 @@
 /*
- * Copyright (C) 2007  Libav Project
+ * Copyright (C) 2007  FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/xiph.h b/libavcodec/xiph.h
index afaece7..cd8caa4 100644
--- a/libavcodec/xiph.h
+++ b/libavcodec/xiph.h
@@ -1,20 +1,20 @@
 /*
- * Copyright (C) 2007  Libav Project
+ * Copyright (C) 2007  FFmpeg Project
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/xl.c b/libavcodec/xl.c
index 8e9bdc6..2d1da1d 100644
--- a/libavcodec/xl.c
+++ b/libavcodec/xl.c
@@ -2,20 +2,20 @@
  * Miro VideoXL codec
  * Copyright (c) 2004 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -50,19 +50,16 @@ static int decode_frame(AVCodecContext *avctx,
     int y0, y1, y2, y3 = 0, c0 = 0, c1 = 0;
 
     if (avctx->width % 4) {
-        av_log(avctx, AV_LOG_ERROR, "Width not a multiple of 4.\n");
+        av_log(avctx, AV_LOG_ERROR, "width is not a multiple of 4\n");
         return AVERROR_INVALIDDATA;
     }
-
     if (buf_size < avctx->width * avctx->height) {
         av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
         return AVERROR_INVALIDDATA;
     }
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
     p->pict_type = AV_PICTURE_TYPE_I;
     p->key_frame = 1;
 
diff --git a/libavcodec/xsubdec.c b/libavcodec/xsubdec.c
index 3d85973..174d74e 100644
--- a/libavcodec/xsubdec.c
+++ b/libavcodec/xsubdec.c
@@ -2,20 +2,20 @@
  * XSUB subtitle decoder
  * Copyright (c) 2007 Reimar Döffinger
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -56,12 +56,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     int w, h, x, y, i;
     int64_t packet_time = 0;
     GetBitContext gb;
-
-    memset(sub, 0, sizeof(*sub));
+    int has_alpha = avctx->codec_tag == MKTAG('D','X','S','A');
 
     // check that at least header fits
     if (buf_size < 27 + 7 * 2 + 4 * 3) {
-        av_log(avctx, AV_LOG_ERROR, "coded frame too small\n");
+        av_log(avctx, AV_LOG_ERROR, "coded frame size %d too small\n", buf_size);
         return -1;
     }
 
@@ -108,8 +107,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
     for (i = 0; i < sub->rects[0]->nb_colors; i++)
         ((uint32_t*)sub->rects[0]->pict.data[1])[i] = bytestream_get_be24(&buf);
     // make all except background (first entry) non-transparent
-    for (i = 1; i < sub->rects[0]->nb_colors; i++)
-        ((uint32_t*)sub->rects[0]->pict.data[1])[i] |= 0xff000000;
+    for (i = 0; i < sub->rects[0]->nb_colors; i++)
+        ((uint32_t*)sub->rects[0]->pict.data[1])[i] |= (has_alpha ? *buf++ : (i ? 0xff : 0)) << 24;
 
     // process RLE-compressed data
     init_get_bits(&gb, buf, (buf_end - buf) * 8);
diff --git a/libavcodec/xsubenc.c b/libavcodec/xsubenc.c
index fc46fb8..7070854 100644
--- a/libavcodec/xsubenc.c
+++ b/libavcodec/xsubenc.c
@@ -3,20 +3,20 @@
  * Copyright (c) 2005 DivX, Inc.
  * Copyright (c) 2009 Bjorn Axelsson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -128,7 +128,7 @@ static int xsub_encode(AVCodecContext *avctx, unsigned char *buf,
     }
 
     // TODO: support multiple rects
-    if (h->num_rects > 1)
+    if (h->num_rects != 1)
         av_log(avctx, AV_LOG_WARNING, "Only single rects supported (%d in subtitle.)\n", h->num_rects);
 
     // TODO: render text-based subtitles into bitmaps
@@ -142,7 +142,7 @@ static int xsub_encode(AVCodecContext *avctx, unsigned char *buf,
         av_log(avctx, AV_LOG_WARNING, "No more than 4 subtitle colors supported (%d found.)\n", h->rects[0]->nb_colors);
 
     // TODO: Palette swapping if color zero is not transparent
-    if (((uint32_t *)h->rects[0]->pict.data[1])[0] & 0xff)
+    if (((uint32_t *)h->rects[0]->pict.data[1])[0] & 0xff000000)
         av_log(avctx, AV_LOG_WARNING, "Color index 0 is not transparent. Transparency will be messed up.\n");
 
     if (make_tc(startTime, start_tc) || make_tc(endTime, end_tc)) {
@@ -166,8 +166,8 @@ static int xsub_encode(AVCodecContext *avctx, unsigned char *buf,
     bytestream_put_le16(&hdr, height);
     bytestream_put_le16(&hdr, h->rects[0]->x);
     bytestream_put_le16(&hdr, h->rects[0]->y);
-    bytestream_put_le16(&hdr, h->rects[0]->x + width);
-    bytestream_put_le16(&hdr, h->rects[0]->y + height);
+    bytestream_put_le16(&hdr, h->rects[0]->x + width -1);
+    bytestream_put_le16(&hdr, h->rects[0]->y + height -1);
 
     rlelenptr = hdr; // Will store length of first field here later.
     hdr+=2;
@@ -190,7 +190,7 @@ static int xsub_encode(AVCodecContext *avctx, unsigned char *buf,
                         h->rects[0]->w, h->rects[0]->h >> 1))
         return -1;
 
-    // Enforce total height to be be multiple of 2
+    // Enforce total height to be a multiple of 2
     if (h->rects[0]->h & 1) {
         put_xsub_rle(&pb, h->rects[0]->w, PADDING_COLOR);
         avpriv_align_put_bits(&pb);
@@ -206,6 +206,8 @@ static av_cold int xsub_encoder_init(AVCodecContext *avctx)
     if (!avctx->codec_tag)
         avctx->codec_tag = MKTAG('D','X','S','B');
 
+    avctx->bits_per_coded_sample = 4;
+
     return 0;
 }
 
diff --git a/libavcodec/xvididct.c b/libavcodec/xvididct.c
index 1453e51..36f65a6 100644
--- a/libavcodec/xvididct.c
+++ b/libavcodec/xvididct.c
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,7 +26,7 @@ av_cold void ff_xvididct_init(IDCTDSPContext *c, AVCodecContext *avctx)
 {
     const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
 
-    if (high_bit_depth ||
+    if (high_bit_depth || avctx->lowres ||
         !(avctx->idct_algo == FF_IDCT_AUTO ||
           avctx->idct_algo == FF_IDCT_XVIDMMX))
         return;
diff --git a/libavcodec/xvididct.h b/libavcodec/xvididct.h
index a688bc8..6678329 100644
--- a/libavcodec/xvididct.h
+++ b/libavcodec/xvididct.h
@@ -1,18 +1,18 @@
 /*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/xvmc.h b/libavcodec/xvmc.h
index 950ed18..c2e187c 100644
--- a/libavcodec/xvmc.h
+++ b/libavcodec/xvmc.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2003 Ivan Kalvachev
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -33,8 +33,6 @@
 #include "version.h"
 #include "avcodec.h"
 
-#if FF_API_XVMC
-
 /**
  * @defgroup lavc_codec_hwaccel_xvmc XvMC
  * @ingroup lavc_codec_hwaccel
@@ -169,6 +167,4 @@ attribute_deprecated struct xvmc_pix_fmt {
  * @}
  */
 
-#endif /* FF_API_XVMC */
-
 #endif /* AVCODEC_XVMC_H */
diff --git a/libavcodec/xvmc_internal.h b/libavcodec/xvmc_internal.h
index 9018e4a..d365ef0 100644
--- a/libavcodec/xvmc_internal.h
+++ b/libavcodec/xvmc_internal.h
@@ -1,20 +1,20 @@
 /*
  * XVideo Motion Compensation internal functions
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -25,14 +25,7 @@
 #include "mpegvideo.h"
 #include "version.h"
 
-#if FF_API_XVMC
-
 void ff_xvmc_init_block(MpegEncContext *s);
 void ff_xvmc_pack_pblocks(MpegEncContext *s, int cbp);
-int  ff_xvmc_field_start(MpegEncContext*s, AVCodecContext *avctx);
-void ff_xvmc_field_end(MpegEncContext *s);
-void ff_xvmc_decode_mb(MpegEncContext *s);
-
-#endif /* FF_API_XVMC */
 
 #endif /* AVCODEC_XVMC_INTERNAL_H */
diff --git a/libavcodec/xwd.h b/libavcodec/xwd.h
index f41e2cd..d046046 100644
--- a/libavcodec/xwd.h
+++ b/libavcodec/xwd.h
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libavcodec/xwddec.c b/libavcodec/xwddec.c
index f6ccb96..62dfdac 100644
--- a/libavcodec/xwddec.c
+++ b/libavcodec/xwddec.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -147,7 +147,7 @@ static int xwd_decode_frame(AVCodecContext *avctx, void *data,
     }
 
     if (pixformat != XWD_Z_PIXMAP) {
-        av_log(avctx, AV_LOG_ERROR, "pixmap format %"PRIu32" unsupported\n", pixformat);
+        avpriv_report_missing_feature(avctx, "Pixmap format %"PRIu32, pixformat);
         return AVERROR_PATCHWELCOME;
     }
 
@@ -155,10 +155,13 @@ static int xwd_decode_frame(AVCodecContext *avctx, void *data,
     switch (vclass) {
     case XWD_STATIC_GRAY:
     case XWD_GRAY_SCALE:
-        if (bpp != 1)
+        if (bpp != 1 && bpp != 8)
             return AVERROR_INVALIDDATA;
-        if (pixdepth == 1)
+        if (pixdepth == 1) {
             avctx->pix_fmt = AV_PIX_FMT_MONOWHITE;
+        } else if (pixdepth == 8) {
+            avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+        }
         break;
     case XWD_STATIC_COLOR:
     case XWD_PSEUDO_COLOR:
@@ -204,10 +207,8 @@ static int xwd_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_PATCHWELCOME;
     }
 
-    if ((ret = ff_get_buffer(avctx, p, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
         return ret;
-    }
 
     p->key_frame = 1;
     p->pict_type = AV_PICTURE_TYPE_I;
diff --git a/libavcodec/xwdenc.c b/libavcodec/xwdenc.c
index 54599a0..06fa4a0 100644
--- a/libavcodec/xwdenc.c
+++ b/libavcodec/xwdenc.c
@@ -3,20 +3,20 @@
  *
  * Copyright (c) 2012 Paul B Mahol
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -30,17 +30,8 @@
 #define WINDOW_NAME         "lavcxwdenc"
 #define WINDOW_NAME_SIZE    11
 
-static av_cold int xwd_encode_init(AVCodecContext *avctx)
-{
-    avctx->coded_frame = av_frame_alloc();
-    if (!avctx->coded_frame)
-        return AVERROR(ENOMEM);
-
-    return 0;
-}
-
 static int xwd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
-                            const AVFrame *p, int *got_packet)
+                            const AVFrame *pict, int *got_packet)
 {
     enum AVPixelFormat pix_fmt = avctx->pix_fmt;
     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
@@ -49,6 +40,7 @@ static int xwd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     uint32_t header_size;
     int i, out_size, ret;
     uint8_t *ptr, *buf;
+    AVFrame * const p = (AVFrame *)pict;
 
     pixdepth = av_get_bits_per_pixel(desc);
     if (desc->flags & AV_PIX_FMT_FLAG_BE)
@@ -133,6 +125,11 @@ static int xwd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         bpad     = 8;
         ncolors  = 256;
         break;
+    case AV_PIX_FMT_GRAY8:
+        bpp      = 8;
+        bpad     = 8;
+        vclass   = XWD_STATIC_GRAY;
+        break;
     case AV_PIX_FMT_MONOWHITE:
         be       = 1;
         bitorder = 1;
@@ -141,7 +138,7 @@ static int xwd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
         vclass   = XWD_STATIC_GRAY;
         break;
     default:
-        av_log(avctx, AV_LOG_INFO, "unsupported pixel format\n");
+        av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
         return AVERROR(EINVAL);
     }
 
@@ -149,14 +146,12 @@ static int xwd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     header_size = XWD_HEADER_SIZE + WINDOW_NAME_SIZE;
     out_size    = header_size + ncolors * XWD_CMAP_SIZE + avctx->height * lsize;
 
-    if ((ret = ff_alloc_packet(pkt, out_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "output buffer too small\n");
+    if ((ret = ff_alloc_packet2(avctx, pkt, out_size)) < 0)
         return ret;
-    }
     buf = pkt->data;
 
-    avctx->coded_frame->key_frame = 1;
-    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+    p->key_frame = 1;
+    p->pict_type = AV_PICTURE_TYPE_I;
 
     bytestream_put_be32(&buf, header_size);
     bytestream_put_be32(&buf, XWD_VERSION);   // file version
@@ -213,21 +208,12 @@ static int xwd_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     return 0;
 }
 
-static av_cold int xwd_encode_close(AVCodecContext *avctx)
-{
-    av_freep(&avctx->coded_frame);
-
-    return 0;
-}
-
 AVCodec ff_xwd_encoder = {
     .name         = "xwd",
     .long_name    = NULL_IF_CONFIG_SMALL("XWD (X Window Dump) image"),
     .type         = AVMEDIA_TYPE_VIDEO,
     .id           = AV_CODEC_ID_XWD,
-    .init         = xwd_encode_init,
     .encode2      = xwd_encode_frame,
-    .close        = xwd_encode_close,
     .pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_BGRA,
                                                  AV_PIX_FMT_RGBA,
                                                  AV_PIX_FMT_ARGB,
@@ -247,6 +233,7 @@ AVCodec ff_xwd_encoder = {
                                                  AV_PIX_FMT_RGB4_BYTE,
                                                  AV_PIX_FMT_BGR4_BYTE,
                                                  AV_PIX_FMT_PAL8,
+                                                 AV_PIX_FMT_GRAY8,
                                                  AV_PIX_FMT_MONOWHITE,
                                                  AV_PIX_FMT_NONE },
 };
diff --git a/libavcodec/xxan.c b/libavcodec/xxan.c
index d77a50f..b261cdf 100644
--- a/libavcodec/xxan.c
+++ b/libavcodec/xxan.c
@@ -3,20 +3,20 @@
  * Copyright (C) 2011 Konstantin Shishkov
  * based on work by Mike Melanson
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -73,7 +73,7 @@ static av_cold int xan_decode_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
     s->scratch_buffer = av_malloc(s->buffer_size + 130);
     if (!s->scratch_buffer) {
-        av_freep(&s->y_buffer);
+        xan_decode_end(avctx);
         return AVERROR(ENOMEM);
     }
 
@@ -224,16 +224,18 @@ static int xan_decode_chroma(AVCodecContext *avctx, unsigned chroma_off)
     if (mode) {
         for (j = 0; j < avctx->height >> 1; j++) {
             for (i = 0; i < avctx->width >> 1; i++) {
+                if (src_end - src < 1)
+                    return 0;
                 val = *src++;
-                if (val && val < table_size) {
+                if (val) {
+                    if (val >= table_size)
+                        return AVERROR_INVALIDDATA;
                     val  = AV_RL16(table + (val << 1));
                     uval = (val >> 3) & 0xF8;
                     vval = (val >> 8) & 0xF8;
                     U[i] = uval | (uval >> 5);
                     V[i] = vval | (vval >> 5);
                 }
-                if (src == src_end)
-                    return 0;
             }
             U += s->pic->linesize[1];
             V += s->pic->linesize[2];
@@ -248,8 +250,12 @@ static int xan_decode_chroma(AVCodecContext *avctx, unsigned chroma_off)
 
         for (j = 0; j < avctx->height >> 2; j++) {
             for (i = 0; i < avctx->width >> 1; i += 2) {
+                if (src_end - src < 1)
+                    return 0;
                 val = *src++;
-                if (val && val < table_size) {
+                if (val) {
+                    if (val >= table_size)
+                        return AVERROR_INVALIDDATA;
                     val  = AV_RL16(table + (val << 1));
                     uval = (val >> 3) & 0xF8;
                     vval = (val >> 8) & 0xF8;
@@ -288,7 +294,7 @@ static int xan_decode_frame_type0(AVCodecContext *avctx)
     if ((ret = xan_decode_chroma(avctx, chroma_off)) != 0)
         return ret;
 
-    if (corr_off >= (s->gb.buffer_end - s->gb.buffer_start)) {
+    if (corr_off >= bytestream2_size(&s->gb)) {
         av_log(avctx, AV_LOG_WARNING, "Ignoring invalid correction block position\n");
         corr_off = 0;
     }
@@ -333,6 +339,9 @@ static int xan_decode_frame_type0(AVCodecContext *avctx)
         dec_size = xan_unpack(s, s->scratch_buffer, s->buffer_size / 2);
         if (dec_size < 0)
             dec_size = 0;
+        else
+            dec_size = FFMIN(dec_size, s->buffer_size/2 - 1);
+
         for (i = 0; i < dec_size; i++)
             s->y_buffer[i*2+1] = (s->y_buffer[i*2+1] + (s->scratch_buffer[i] << 1)) & 0x3F;
     }
@@ -402,10 +411,8 @@ static int xan_decode_frame(AVCodecContext *avctx,
     int ftype;
     int ret;
 
-    if ((ret = ff_reget_buffer(avctx, s->pic))) {
-        av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, s->pic)) < 0)
         return ret;
-    }
 
     bytestream2_init(&s->gb, avpkt->data, avpkt->size);
     ftype = bytestream2_get_le32(&s->gb);
diff --git a/libavcodec/y41pdec.c b/libavcodec/y41pdec.c
new file mode 100644
index 0000000..9d1e531
--- /dev/null
+++ b/libavcodec/y41pdec.c
@@ -0,0 +1,92 @@
+/*
+ * y41p decoder
+ *
+ * Copyright (c) 2012 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int y41p_decode_init(AVCodecContext *avctx)
+{
+    avctx->pix_fmt             = AV_PIX_FMT_YUV411P;
+    avctx->bits_per_raw_sample = 12;
+
+    if (avctx->width & 7) {
+        av_log(avctx, AV_LOG_WARNING, "y41p requires width to be divisible by 8.\n");
+    }
+
+    return 0;
+}
+
+static int y41p_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame, AVPacket *avpkt)
+{
+    AVFrame *pic = data;
+    uint8_t *src = avpkt->data;
+    uint8_t *y, *u, *v;
+    int i, j, ret;
+
+    if (avpkt->size < 1.5 * avctx->height * avctx->width) {
+        av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
+
+    pic->key_frame = 1;
+    pic->pict_type = AV_PICTURE_TYPE_I;
+
+    for (i = avctx->height - 1; i >= 0 ; i--) {
+        y = &pic->data[0][i * pic->linesize[0]];
+        u = &pic->data[1][i * pic->linesize[1]];
+        v = &pic->data[2][i * pic->linesize[2]];
+        for (j = 0; j < avctx->width; j += 8) {
+            *(u++) = *src++;
+            *(y++) = *src++;
+            *(v++) = *src++;
+            *(y++) = *src++;
+
+            *(u++) = *src++;
+            *(y++) = *src++;
+            *(v++) = *src++;
+            *(y++) = *src++;
+
+            *(y++) = *src++;
+            *(y++) = *src++;
+            *(y++) = *src++;
+            *(y++) = *src++;
+        }
+    }
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_y41p_decoder = {
+    .name         = "y41p",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed YUV 4:1:1 12-bit"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_Y41P,
+    .init         = y41p_decode_init,
+    .decode       = y41p_decode_frame,
+    .capabilities = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/y41penc.c b/libavcodec/y41penc.c
new file mode 100644
index 0000000..8f67944
--- /dev/null
+++ b/libavcodec/y41penc.c
@@ -0,0 +1,102 @@
+/*
+ * y41p encoder
+ *
+ * Copyright (c) 2012 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int y41p_encode_init(AVCodecContext *avctx)
+{
+    if (avctx->width & 7) {
+        av_log(avctx, AV_LOG_ERROR, "y41p requires width to be divisible by 8.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    avctx->coded_frame = av_frame_alloc();
+    avctx->bits_per_coded_sample = 12;
+
+    if (!avctx->coded_frame) {
+        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int y41p_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                             const AVFrame *pic, int *got_packet)
+{
+    uint8_t *dst;
+    uint8_t *y, *u, *v;
+    int i, j, ret;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, avctx->width * avctx->height * 1.5)) < 0)
+        return ret;
+
+    avctx->coded_frame->key_frame = 1;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+    dst = pkt->data;
+
+    for (i = avctx->height - 1; i >= 0; i--) {
+        y = &pic->data[0][i * pic->linesize[0]];
+        u = &pic->data[1][i * pic->linesize[1]];
+        v = &pic->data[2][i * pic->linesize[2]];
+        for (j = 0; j < avctx->width; j += 8) {
+            *(dst++) = *(u++);
+            *(dst++) = *(y++);
+            *(dst++) = *(v++);
+            *(dst++) = *(y++);
+
+            *(dst++) = *(u++);
+            *(dst++) = *(y++);
+            *(dst++) = *(v++);
+            *(dst++) = *(y++);
+
+            *(dst++) = *(y++);
+            *(dst++) = *(y++);
+            *(dst++) = *(y++);
+            *(dst++) = *(y++);
+        }
+    }
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int y41p_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+AVCodec ff_y41p_encoder = {
+    .name         = "y41p",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed YUV 4:1:1 12-bit"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_Y41P,
+    .init         = y41p_encode_init,
+    .encode2      = y41p_encode_frame,
+    .close        = y41p_encode_close,
+    .pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUV411P,
+                                                 AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/yop.c b/libavcodec/yop.c
index 3434fd9..c6b19ec 100644
--- a/libavcodec/yop.c
+++ b/libavcodec/yop.c
@@ -5,20 +5,20 @@
  * derived from the code by
  * Copyright (C) 2009 Thomas P. Higdon <thomas.p.higdon@gmail.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -31,6 +31,7 @@
 
 typedef struct YopDecContext {
     AVCodecContext *avctx;
+    AVFrame *frame;
 
     int num_pal_colors;
     int first_color[2];
@@ -78,6 +79,15 @@ static const int8_t motion_vector[16][2] =
      { 4, -2}, {-2,  0},
     };
 
+static av_cold int yop_decode_close(AVCodecContext *avctx)
+{
+    YopDecContext *s = avctx->priv_data;
+
+    av_frame_free(&s->frame);
+
+    return 0;
+}
+
 static av_cold int yop_decode_init(AVCodecContext *avctx)
 {
     YopDecContext *s = avctx->priv_data;
@@ -103,10 +113,14 @@ static av_cold int yop_decode_init(AVCodecContext *avctx)
     if (s->num_pal_colors + s->first_color[0] > 256 ||
         s->num_pal_colors + s->first_color[1] > 256) {
         av_log(avctx, AV_LOG_ERROR,
-               "YOP: palette parameters invalid, header probably corrupt\n");
+               "Palette parameters invalid, header probably corrupt\n");
         return AVERROR_INVALIDDATA;
     }
 
+    s->frame = av_frame_alloc();
+    if (!s->frame)
+        return AVERROR(ENOMEM);
+
     return 0;
 }
 
@@ -144,8 +158,7 @@ static int yop_copy_previous_block(YopDecContext *s, int linesize, int copy_tag)
     bufptr = s->dstptr + motion_vector[copy_tag][0] +
              linesize * motion_vector[copy_tag][1];
     if (bufptr < s->dstbuf) {
-        av_log(s->avctx, AV_LOG_ERROR,
-               "YOP: cannot decode, file probably corrupt\n");
+        av_log(s->avctx, AV_LOG_ERROR, "File probably corrupt\n");
         return AVERROR_INVALIDDATA;
     }
 
@@ -179,7 +192,7 @@ static int yop_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                             AVPacket *avpkt)
 {
     YopDecContext *s = avctx->priv_data;
-    AVFrame *frame = data;
+    AVFrame *frame = s->frame;
     int tag, firstcolor, is_odd_frame;
     int ret, i, x, y;
     uint32_t *palette;
@@ -189,11 +202,8 @@ static int yop_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         return AVERROR_INVALIDDATA;
     }
 
-    ret = ff_get_buffer(avctx, frame, 0);
-    if (ret < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+    if ((ret = ff_reget_buffer(avctx, frame)) < 0)
         return ret;
-    }
 
     if (!avctx->frame_number)
         memset(frame->data[1], 0, AVPALETTE_SIZE);
@@ -205,13 +215,20 @@ static int yop_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     s->low_nibble = NULL;
 
     is_odd_frame = avpkt->data[0];
+    if(is_odd_frame>1){
+        av_log(avctx, AV_LOG_ERROR, "frame is too odd %d\n", is_odd_frame);
+        return AVERROR_INVALIDDATA;
+    }
     firstcolor   = s->first_color[is_odd_frame];
     palette      = (uint32_t *)frame->data[1];
 
-    for (i = 0; i < s->num_pal_colors; i++, s->srcptr += 3)
+    for (i = 0; i < s->num_pal_colors; i++, s->srcptr += 3) {
         palette[i + firstcolor] = (s->srcptr[0] << 18) |
                                   (s->srcptr[1] << 10) |
                                   (s->srcptr[2] << 2);
+        palette[i + firstcolor] |= 0xFFU << 24 |
+                                   (palette[i + firstcolor] >> 6) & 0x30303;
+    }
 
     frame->palette_has_changed = 1;
 
@@ -239,6 +256,9 @@ static int yop_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         s->dstptr += 2*frame->linesize[0] - x;
     }
 
+    if ((ret = av_frame_ref(data, s->frame)) < 0)
+        return ret;
+
     *got_frame = 1;
     return avpkt->size;
 }
@@ -250,6 +270,6 @@ AVCodec ff_yop_decoder = {
     .id             = AV_CODEC_ID_YOP,
     .priv_data_size = sizeof(YopDecContext),
     .init           = yop_decode_init,
+    .close          = yop_decode_close,
     .decode         = yop_decode_frame,
-    .capabilities   = CODEC_CAP_DR1,
 };
diff --git a/libavcodec/yuv4dec.c b/libavcodec/yuv4dec.c
new file mode 100644
index 0000000..00ccf58
--- /dev/null
+++ b/libavcodec/yuv4dec.c
@@ -0,0 +1,84 @@
+/*
+ * libquicktime yuv4 decoder
+ *
+ * Copyright (c) 2011 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int yuv4_decode_init(AVCodecContext *avctx)
+{
+    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+
+    return 0;
+}
+
+static int yuv4_decode_frame(AVCodecContext *avctx, void *data,
+                             int *got_frame, AVPacket *avpkt)
+{
+    AVFrame *pic = data;
+    const uint8_t *src = avpkt->data;
+    uint8_t *y, *u, *v;
+    int i, j, ret;
+
+    if (avpkt->size < 6 * (avctx->width + 1 >> 1) * (avctx->height + 1 >> 1)) {
+        av_log(avctx, AV_LOG_ERROR, "Insufficient input data.\n");
+        return AVERROR(EINVAL);
+    }
+
+    if ((ret = ff_get_buffer(avctx, pic, 0)) < 0)
+        return ret;
+
+    pic->key_frame = 1;
+    pic->pict_type = AV_PICTURE_TYPE_I;
+
+    y = pic->data[0];
+    u = pic->data[1];
+    v = pic->data[2];
+
+    for (i = 0; i < (avctx->height + 1) >> 1; i++) {
+        for (j = 0; j < (avctx->width + 1) >> 1; j++) {
+            u[j] = *src++ ^ 0x80;
+            v[j] = *src++ ^ 0x80;
+            y[                   2 * j    ] = *src++;
+            y[                   2 * j + 1] = *src++;
+            y[pic->linesize[0] + 2 * j    ] = *src++;
+            y[pic->linesize[0] + 2 * j + 1] = *src++;
+        }
+
+        y += 2 * pic->linesize[0];
+        u +=     pic->linesize[1];
+        v +=     pic->linesize[2];
+    }
+
+    *got_frame = 1;
+
+    return avpkt->size;
+}
+
+AVCodec ff_yuv4_decoder = {
+    .name         = "yuv4",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:2:0"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_YUV4,
+    .init         = yuv4_decode_init,
+    .decode       = yuv4_decode_frame,
+    .capabilities = CODEC_CAP_DR1,
+};
diff --git a/libavcodec/yuv4enc.c b/libavcodec/yuv4enc.c
new file mode 100644
index 0000000..ed0fc77
--- /dev/null
+++ b/libavcodec/yuv4enc.c
@@ -0,0 +1,91 @@
+/*
+ * libquicktime yuv4 encoder
+ *
+ * Copyright (c) 2011 Carl Eugen Hoyos
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+
+static av_cold int yuv4_encode_init(AVCodecContext *avctx)
+{
+    avctx->coded_frame = av_frame_alloc();
+
+    if (!avctx->coded_frame) {
+        av_log(avctx, AV_LOG_ERROR, "Could not allocate frame.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    return 0;
+}
+
+static int yuv4_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+                             const AVFrame *pic, int *got_packet)
+{
+    uint8_t *dst;
+    uint8_t *y, *u, *v;
+    int i, j, ret;
+
+    if ((ret = ff_alloc_packet2(avctx, pkt, 6 * (avctx->width + 1 >> 1) * (avctx->height + 1 >> 1))) < 0)
+        return ret;
+    dst = pkt->data;
+
+    avctx->coded_frame->key_frame = 1;
+    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+
+    y = pic->data[0];
+    u = pic->data[1];
+    v = pic->data[2];
+
+    for (i = 0; i < avctx->height + 1 >> 1; i++) {
+        for (j = 0; j < avctx->width + 1 >> 1; j++) {
+            *dst++ = u[j] ^ 0x80;
+            *dst++ = v[j] ^ 0x80;
+            *dst++ = y[                   2 * j    ];
+            *dst++ = y[                   2 * j + 1];
+            *dst++ = y[pic->linesize[0] + 2 * j    ];
+            *dst++ = y[pic->linesize[0] + 2 * j + 1];
+        }
+        y += 2 * pic->linesize[0];
+        u +=     pic->linesize[1];
+        v +=     pic->linesize[2];
+    }
+
+    pkt->flags |= AV_PKT_FLAG_KEY;
+    *got_packet = 1;
+    return 0;
+}
+
+static av_cold int yuv4_encode_close(AVCodecContext *avctx)
+{
+    av_freep(&avctx->coded_frame);
+
+    return 0;
+}
+
+AVCodec ff_yuv4_encoder = {
+    .name         = "yuv4",
+    .long_name    = NULL_IF_CONFIG_SMALL("Uncompressed packed 4:2:0"),
+    .type         = AVMEDIA_TYPE_VIDEO,
+    .id           = AV_CODEC_ID_YUV4,
+    .init         = yuv4_encode_init,
+    .encode2      = yuv4_encode_frame,
+    .close        = yuv4_encode_close,
+    .pix_fmts     = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
+};
diff --git a/libavcodec/zerocodec.c b/libavcodec/zerocodec.c
index eeba2de..9f6c37c 100644
--- a/libavcodec/zerocodec.c
+++ b/libavcodec/zerocodec.c
@@ -59,10 +59,8 @@ static int zerocodec_decode_frame(AVCodecContext *avctx, void *data,
         return AVERROR_INVALIDDATA;
     }
 
-    if (ff_get_buffer(avctx, pic, AV_GET_BUFFER_FLAG_REF) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Could not allocate buffer.\n");
-        return AVERROR(ENOMEM);
-    }
+    if ((ret = ff_get_buffer(avctx, pic, AV_GET_BUFFER_FLAG_REF)) < 0)
+        return ret;
 
     zstream->next_in  = avpkt->data;
     zstream->avail_in = avpkt->size;
diff --git a/libavcodec/zmbv.c b/libavcodec/zmbv.c
index d17f37a..21a9e35 100644
--- a/libavcodec/zmbv.c
+++ b/libavcodec/zmbv.c
@@ -2,20 +2,20 @@
  * Zip Motion Blocks Video (ZMBV) decoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,7 @@
 #include <stdlib.h>
 
 #include "libavutil/common.h"
+#include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 #include "internal.h"
@@ -64,6 +65,7 @@ typedef struct ZmbvContext {
     int fmt;
     int comp;
     int flags;
+    int stride;
     int bw, bh, bx, by;
     int decomp_len;
     z_stream zstream;
@@ -143,7 +145,7 @@ static int zmbv_decode_xor_8(ZmbvContext *c)
         prev += c->width * c->bh;
     }
     if (src - c->decomp_buf != c->decomp_len)
-        av_log(c->avctx, AV_LOG_ERROR, "Used %ti of %i bytes\n",
+        av_log(c->avctx, AV_LOG_ERROR, "Used %"PTRDIFF_SPECIFIER" of %i bytes\n",
                src-c->decomp_buf, c->decomp_len);
     return 0;
 }
@@ -217,7 +219,7 @@ static int zmbv_decode_xor_16(ZmbvContext *c)
         prev += c->width * c->bh;
     }
     if (src - c->decomp_buf != c->decomp_len)
-        av_log(c->avctx, AV_LOG_ERROR, "Used %ti of %i bytes\n",
+        av_log(c->avctx, AV_LOG_ERROR, "Used %"PTRDIFF_SPECIFIER" of %i bytes\n",
                src-c->decomp_buf, c->decomp_len);
     return 0;
 }
@@ -375,7 +377,7 @@ static int zmbv_decode_xor_32(ZmbvContext *c)
         prev   += c->width * c->bh;
     }
     if (src - c->decomp_buf != c->decomp_len)
-        av_log(c->avctx, AV_LOG_ERROR, "Used %ti of %i bytes\n",
+        av_log(c->avctx, AV_LOG_ERROR, "Used %"PTRDIFF_SPECIFIER" of %i bytes\n",
                src-c->decomp_buf, c->decomp_len);
     return 0;
 }
@@ -406,17 +408,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     int zret = Z_OK; // Zlib return code
     int len = buf_size;
     int hi_ver, lo_ver, ret;
-    uint8_t *tmp;
-
-    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-        return ret;
-    }
 
     /* parse header */
     c->flags = buf[0];
     buf++; len--;
     if (c->flags & ZMBV_KEYFRAME) {
+        void *decode_intra = NULL;
+        c->decode_intra= NULL;
         hi_ver = buf[0];
         lo_ver = buf[1];
         c->comp = buf[2];
@@ -447,29 +445,39 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         switch (c->fmt) {
         case ZMBV_FMT_8BPP:
             c->bpp = 8;
-            c->decode_intra = zmbv_decode_intra;
+            decode_intra = zmbv_decode_intra;
             c->decode_xor = zmbv_decode_xor_8;
+            avctx->pix_fmt = AV_PIX_FMT_PAL8;
+            c->stride = c->width;
             break;
         case ZMBV_FMT_15BPP:
         case ZMBV_FMT_16BPP:
             c->bpp = 16;
-            c->decode_intra = zmbv_decode_intra;
+            decode_intra = zmbv_decode_intra;
             c->decode_xor = zmbv_decode_xor_16;
+            if (c->fmt == ZMBV_FMT_15BPP)
+                avctx->pix_fmt = AV_PIX_FMT_RGB555LE;
+            else
+                avctx->pix_fmt = AV_PIX_FMT_RGB565LE;
+            c->stride = c->width * 2;
             break;
 #ifdef ZMBV_ENABLE_24BPP
         case ZMBV_FMT_24BPP:
             c->bpp = 24;
-            c->decode_intra = zmbv_decode_intra;
+            decode_intra = zmbv_decode_intra;
             c->decode_xor = zmbv_decode_xor_24;
+            avctx->pix_fmt = AV_PIX_FMT_RGB24;
+            c->stride = c->width * 3;
             break;
 #endif //ZMBV_ENABLE_24BPP
         case ZMBV_FMT_32BPP:
             c->bpp = 32;
-            c->decode_intra = zmbv_decode_intra;
+            decode_intra = zmbv_decode_intra;
             c->decode_xor = zmbv_decode_xor_32;
+            avctx->pix_fmt = AV_PIX_FMT_BGR0;
+            c->stride = c->width * 4;
             break;
         default:
-            c->decode_intra = NULL;
             c->decode_xor = NULL;
             avpriv_request_sample(avctx, "Format %i", c->fmt);
             return AVERROR_PATCHWELCOME;
@@ -481,16 +489,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
             return AVERROR_UNKNOWN;
         }
 
-        tmp = av_realloc(c->cur,  avctx->width * avctx->height * (c->bpp / 8));
-        if (!tmp)
-            return AVERROR(ENOMEM);
-        c->cur = tmp;
-        tmp = av_realloc(c->prev, avctx->width * avctx->height * (c->bpp / 8));
-        if (!tmp)
+        c->cur  = av_realloc_f(c->cur, avctx->width * avctx->height,  (c->bpp / 8));
+        c->prev = av_realloc_f(c->prev, avctx->width * avctx->height,  (c->bpp / 8));
+        c->bx = (c->width + c->bw - 1) / c->bw;
+        c->by = (c->height+ c->bh - 1) / c->bh;
+        if (!c->cur || !c->prev)
             return AVERROR(ENOMEM);
-        c->prev = tmp;
-        c->bx   = (c->width  + c->bw - 1) / c->bw;
-        c->by   = (c->height + c->bh - 1) / c->bh;
+        memset(c->cur, 0, avctx->width * avctx->height * (c->bpp / 8));
+        memset(c->prev, 0, avctx->width * avctx->height * (c->bpp / 8));
+        c->decode_intra= decode_intra;
     }
 
     if (c->decode_intra == NULL) {
@@ -498,6 +505,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         return AVERROR_INVALIDDATA;
     }
 
+    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+        return ret;
+
     if (c->comp == 0) { //Uncompressed data
         if (c->decomp_size < len) {
             av_log(avctx, AV_LOG_ERROR, "Buffer too small\n");
@@ -506,7 +516,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
         memcpy(c->decomp_buf, buf, len);
     } else { // ZLIB-compressed data
         c->zstream.total_in = c->zstream.total_out = 0;
-        c->zstream.next_in = buf;
+        c->zstream.next_in = (uint8_t*)buf;
         c->zstream.avail_in = len;
         c->zstream.next_out = c->decomp_buf;
         c->zstream.avail_out = c->decomp_size;
@@ -531,64 +541,22 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, AVPac
     /* update frames */
     {
         uint8_t *out, *src;
-        int i, j;
+        int j;
 
         out = frame->data[0];
         src = c->cur;
         switch (c->fmt) {
         case ZMBV_FMT_8BPP:
-            for (j = 0; j < c->height; j++) {
-                for (i = 0; i < c->width; i++) {
-                    out[i * 3 + 0] = c->pal[(*src) * 3 + 0];
-                    out[i * 3 + 1] = c->pal[(*src) * 3 + 1];
-                    out[i * 3 + 2] = c->pal[(*src) * 3 + 2];
-                    src++;
-                }
-                out += frame->linesize[0];
-            }
-            break;
+            for (j = 0; j < 256; j++)
+                AV_WN32(&frame->data[1][j * 4], 0xFFU << 24 | AV_RB24(&c->pal[j * 3]));
         case ZMBV_FMT_15BPP:
-            for (j = 0; j < c->height; j++) {
-                for (i = 0; i < c->width; i++) {
-                    uint16_t tmp = AV_RL16(src);
-                    src += 2;
-                    out[i * 3 + 0] = (tmp & 0x7C00) >> 7;
-                    out[i * 3 + 1] = (tmp & 0x03E0) >> 2;
-                    out[i * 3 + 2] = (tmp & 0x001F) << 3;
-                }
-                out += frame->linesize[0];
-            }
-            break;
         case ZMBV_FMT_16BPP:
-            for (j = 0; j < c->height; j++) {
-                for (i = 0; i < c->width; i++) {
-                    uint16_t tmp = AV_RL16(src);
-                    src += 2;
-                    out[i * 3 + 0] = (tmp & 0xF800) >> 8;
-                    out[i * 3 + 1] = (tmp & 0x07E0) >> 3;
-                    out[i * 3 + 2] = (tmp & 0x001F) << 3;
-                }
-                out += frame->linesize[0];
-            }
-            break;
 #ifdef ZMBV_ENABLE_24BPP
         case ZMBV_FMT_24BPP:
-            for (j = 0; j < c->height; j++) {
-                memcpy(out, src, c->width * 3);
-                src += c->width * 3;
-                out += frame->linesize[0];
-            }
-            break;
-#endif //ZMBV_ENABLE_24BPP
+#endif
         case ZMBV_FMT_32BPP:
-            for (j = 0; j < c->height; j++) {
-                for (i = 0; i < c->width; i++) {
-                    uint32_t tmp = AV_RL32(src);
-                    src += 4;
-                    AV_WB24(out+(i*3), tmp);
-                }
-                out += frame->linesize[0];
-            }
+            av_image_copy_plane(out, frame->linesize[0], src, c->stride,
+                                c->stride, c->height);
             break;
         default:
             av_log(avctx, AV_LOG_ERROR, "Cannot handle format %i\n", c->fmt);
@@ -616,12 +584,11 @@ static av_cold int decode_init(AVCodecContext *avctx)
     // Needed if zlib unused or init aborted before inflateInit
     memset(&c->zstream, 0, sizeof(z_stream));
 
-    avctx->pix_fmt = AV_PIX_FMT_RGB24;
     c->decomp_size = (avctx->width + 255) * 4 * (avctx->height + 64);
 
     /* Allocate decompression buffer */
     if (c->decomp_size) {
-        if ((c->decomp_buf = av_malloc(c->decomp_size)) == NULL) {
+        if ((c->decomp_buf = av_mallocz(c->decomp_size)) == NULL) {
             av_log(avctx, AV_LOG_ERROR,
                    "Can't allocate decompression buffer.\n");
             return AVERROR(ENOMEM);
diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c
index 785ee0a..28dbe20 100644
--- a/libavcodec/zmbvenc.c
+++ b/libavcodec/zmbvenc.c
@@ -2,20 +2,20 @@
  * Zip Motion Blocks Video (ZMBV) encoder
  * Copyright (c) 2006 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -227,10 +227,8 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
     }
 
     pkt_size = c->zstream.total_out + 1 + 6*keyframe;
-    if ((ret = ff_alloc_packet(pkt, pkt_size)) < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Error getting packet of size %d.\n", pkt_size);
+    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
         return ret;
-    }
     buf = pkt->data;
 
     fl = (keyframe ? ZMBV_KEYFRAME : 0) | (chpal ? ZMBV_DELTAPAL : 0);